1 #####################################################################
3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
4 # line breaks to the token stream
6 # WARNING: This is not a real class for speed reasons. Only one
7 # Formatter may be used.
9 #####################################################################
11 package Perl::Tidy::Formatter;
15 our $VERSION = '20181120';
17 # The Tokenizer will be loaded with the Formatter
18 ##use Perl::Tidy::Tokenizer; # for is_keyword()
22 Perl::Tidy::Die($msg);
23 croak "unexpected return from Perl::Tidy::Die";
28 Perl::Tidy::Warn($msg);
34 Perl::Tidy::Exit($msg);
35 croak "unexpected return from Perl::Tidy::Exit";
40 # Caution: these debug flags produce a lot of output
41 # They should all be 0 except when debugging small scripts
42 use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
43 use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
44 use constant FORMATTER_DEBUG_FLAG_BOND => 0;
45 use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
46 use constant FORMATTER_DEBUG_FLAG_CI => 0;
47 use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
48 use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
49 use constant FORMATTER_DEBUG_FLAG_LIST => 0;
50 use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
51 use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
52 use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
53 use constant FORMATTER_DEBUG_FLAG_STORE => 0;
54 use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
55 use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
57 my $debug_warning = sub {
58 print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
61 FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
62 FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
63 FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
64 FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
65 FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
66 FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
67 FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
68 FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
69 FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
70 FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
71 FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
72 FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
73 FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
74 FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
81 $gnu_position_predictor
82 $line_start_index_to_go
83 $last_indentation_written
84 $last_unadjusted_indentation
86 $last_output_short_opening_token
89 $saw_VERSION_in_this_file
95 $last_output_indentation
102 @container_environment_to_go
104 @forced_breakpoint_to_go
106 @summed_lengths_to_go
108 @leading_spaces_to_go
109 @reduced_spaces_to_go
110 @matching_token_to_go
115 @old_breakpoint_to_go
122 %saved_opening_indentation
125 $comma_count_in_batch
126 $last_nonblank_index_to_go
127 $last_nonblank_type_to_go
128 $last_nonblank_token_to_go
129 $last_last_nonblank_index_to_go
130 $last_last_nonblank_type_to_go
131 $last_last_nonblank_token_to_go
132 @nonblank_lines_at_depth
135 @whitespace_level_stack
136 $whitespace_last_level
138 $format_skipping_pattern_begin
139 $format_skipping_pattern_end
141 $forced_breakpoint_count
142 $forced_breakpoint_undo_count
143 @forced_breakpoint_undo_stack
144 %postponed_breakpoint
148 $first_embedded_tab_at
149 $last_embedded_tab_at
150 $deleted_semicolon_count
151 $first_deleted_semicolon_at
152 $last_deleted_semicolon_at
153 $added_semicolon_count
154 $first_added_semicolon_at
155 $last_added_semicolon_at
156 $first_tabbing_disagreement
157 $last_tabbing_disagreement
158 $in_tabbing_disagreement
159 $tabbing_disagreement_count
162 $last_line_leading_type
163 $last_line_leading_level
164 $last_last_line_leading_level
167 %block_opening_line_number
168 $csc_new_statement_ok
171 $accumulating_text_for_block
173 $rleading_block_if_elsif_text
174 $leading_block_text_level
175 $leading_block_text_length_exceeded
176 $leading_block_text_line_length
177 $leading_block_text_line_number
178 $closing_side_comment_prefix_pattern
179 $closing_side_comment_list_pattern
181 $blank_lines_after_opening_block_pattern
182 $blank_lines_before_closing_block_pattern
186 $last_last_nonblank_token
187 $last_last_nonblank_type
188 $last_nonblank_block_type
191 %is_if_brace_follower
195 %is_last_next_redo_return
196 %is_other_brace_follower
197 %is_else_brace_follower
198 %is_anon_sub_brace_follower
199 %is_anon_sub_1_brace_follower
201 %is_sort_map_grep_eval
202 %is_sort_map_grep_eval_do
203 %is_block_without_semicolon
208 %is_if_unless_and_or_last_next_redo_return
209 %ok_to_add_semicolon_for_block_type
215 $is_static_block_comment
216 $index_start_one_line_block
217 $semicolons_before_block_self_destruct
218 $index_max_forced_break
221 $vertical_aligner_object
228 $static_block_comment_pattern
229 $static_side_comment_pattern
230 %opening_vertical_tightness
231 %closing_vertical_tightness
232 %closing_token_indentation
233 $some_closing_token_indentation
239 $block_brace_vertical_tightness_pattern
242 $rOpts_add_whitespace
243 $rOpts_block_brace_tightness
244 $rOpts_block_brace_vertical_tightness
245 $rOpts_brace_left_and_indent
246 $rOpts_comma_arrow_breakpoints
247 $rOpts_break_at_old_keyword_breakpoints
248 $rOpts_break_at_old_comma_breakpoints
249 $rOpts_break_at_old_logical_breakpoints
250 $rOpts_break_at_old_ternary_breakpoints
251 $rOpts_break_at_old_attribute_breakpoints
252 $rOpts_closing_side_comment_else_flag
253 $rOpts_closing_side_comment_maximum_text
254 $rOpts_continuation_indentation
255 $rOpts_delete_old_whitespace
256 $rOpts_fuzzy_line_length
257 $rOpts_indent_columns
258 $rOpts_line_up_parentheses
259 $rOpts_maximum_fields_per_table
260 $rOpts_maximum_line_length
261 $rOpts_variable_maximum_line_length
262 $rOpts_short_concatenation_item_length
263 $rOpts_keep_old_blank_lines
264 $rOpts_ignore_old_breakpoints
265 $rOpts_format_skipping
266 $rOpts_space_function_paren
267 $rOpts_space_keyword_paren
268 $rOpts_keep_interior_semicolons
269 $rOpts_ignore_side_comment_lengths
270 $rOpts_stack_closing_block_brace
271 $rOpts_space_backslash_quote
272 $rOpts_whitespace_cycle
276 %is_keyword_returning_list
294 %weld_len_left_closing
295 %weld_len_right_closing
296 %weld_len_left_opening
297 %weld_len_right_opening
299 $rcuddled_block_types
310 # Array index names for token variables
313 _BLOCK_TYPE_ => $i++,
315 _CONTAINER_ENVIRONMENT_ => $i++,
316 _CONTAINER_TYPE_ => $i++,
317 _CUMULATIVE_LENGTH_ => $i++,
318 _LINE_INDEX_ => $i++,
319 _KNEXT_SEQ_ITEM_ => $i++,
321 _LEVEL_TRUE_ => $i++,
325 _TYPE_SEQUENCE_ => $i++,
327 $NVARS = 1 + _TYPE_SEQUENCE_;
329 # default list of block types for which -bli would apply
330 $bli_list_string = 'if else elsif unless while for foreach do : sub';
335 .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
336 <= >= == =~ !~ != ++ -- /= x=
338 @is_digraph{@q} = (1) x scalar(@q);
340 @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
341 @is_trigraph{@q} = (1) x scalar(@q);
344 = **= += *= &= <<= &&=
349 @is_assignment{@q} = (1) x scalar(@q);
359 @is_keyword_returning_list{@q} = (1) x scalar(@q);
361 @q = qw(is if unless and or err last next redo return);
362 @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
364 @q = qw(last next redo return);
365 @is_last_next_redo_return{@q} = (1) x scalar(@q);
367 @q = qw(sort map grep);
368 @is_sort_map_grep{@q} = (1) x scalar(@q);
370 @q = qw(sort map grep eval);
371 @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
373 @q = qw(sort map grep eval do);
374 @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
377 @is_if_unless{@q} = (1) x scalar(@q);
380 @is_and_or{@q} = (1) x scalar(@q);
382 # Identify certain operators which often occur in chains.
383 # Note: the minus (-) causes a side effect of padding of the first line in
384 # something like this (by sub set_logical_padding):
385 # Checkbutton => 'Transmission checked',
386 # -variable => \$TRANS
387 # This usually improves appearance so it seems ok.
388 @q = qw(&& || and or : ? . + - * /);
389 @is_chain_operator{@q} = (1) x scalar(@q);
391 # We can remove semicolons after blocks preceded by these keywords
393 qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
394 unless while until for foreach given when default);
395 @is_block_without_semicolon{@q} = (1) x scalar(@q);
397 # We will allow semicolons to be added within these block types
398 # as well as sub and package blocks.
400 # 1. Note that these keywords are omitted:
401 # switch case given when default sort map grep
402 # 2. It is also ok to add for sub and package blocks and a labeled block
403 # 3. But not okay for other perltidy types including:
405 # 4. Test files: blktype.t, blktype1.t, semicolon.t
407 qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
408 unless do while until eval for foreach );
409 @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
411 # 'L' is token for opening { at hash key
413 @is_opening_type{@q} = (1) x scalar(@q);
415 # 'R' is token for closing } at hash key
417 @is_closing_type{@q} = (1) x scalar(@q);
420 @is_opening_token{@q} = (1) x scalar(@q);
423 @is_closing_token{@q} = (1) x scalar(@q);
425 # Patterns for standardizing matches to block types for regular subs and
426 # anonymous subs. Examples
427 # 'sub process' is a named sub
428 # 'sub ::m' is a named sub
429 # 'sub' is an anonymous sub
430 # 'sub:' is a label, not a sub
431 # 'substr' is a keyword
432 $SUB_PATTERN = '^sub\s+(::|\w)';
433 $ASUB_PATTERN = '^sub$';
437 use constant WS_YES => 1;
438 use constant WS_OPTIONAL => 0;
439 use constant WS_NO => -1;
441 # Token bond strengths.
442 use constant NO_BREAK => 10000;
443 use constant VERY_STRONG => 100;
444 use constant STRONG => 2.1;
445 use constant NOMINAL => 1.1;
446 use constant WEAK => 0.8;
447 use constant VERY_WEAK => 0.55;
449 # values for testing indexes in output array
450 use constant UNDEFINED_INDEX => -1;
452 # Maximum number of little messages; probably need not be changed.
453 use constant MAX_NAG_MESSAGES => 6;
455 # increment between sequence numbers for each type
456 # For example, ?: pairs might have numbers 7,11,15,...
457 use constant TYPE_SEQUENCE_INCREMENT => 4;
461 # methods to count instances
463 sub get_count { return $_count; }
464 sub _increment_count { return ++$_count }
465 sub _decrement_count { return --$_count }
470 # trim leading and trailing whitespace from a string
479 my $max = shift @vals;
480 foreach my $val (@vals) {
481 $max = ( $max < $val ) ? $val : $max;
488 my $min = shift @vals;
489 foreach my $val (@vals) {
490 $min = ( $min > $val ) ? $val : $min;
497 # given a string containing words separated by whitespace,
498 # return the list of words
503 return split( /\s+/, $str );
507 my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
509 # Check the keys of a hash:
510 # $rtest = ref to hash to test
511 # $rvalid = ref to hash with valid keys
513 # $msg = a message to write in case of error
514 # $exact_match defines the type of check:
515 # = false: test hash must not have unknown key
516 # = true: test hash must have exactly same keys as known hash
518 grep { !exists $rvalid->{$_} } keys %{$rtest};
520 grep { !exists $rtest->{$_} } keys %{$rvalid};
521 my $error = @unknown_keys;
522 if ($exact_match) { $error ||= @missing_keys }
525 my @expected_keys = sort keys %{$rvalid};
526 @unknown_keys = sort @unknown_keys;
528 ------------------------------------------------------------------------
529 Program error detected checking hash keys
531 Expected keys: (@expected_keys)
532 Unknown key(s): (@unknown_keys)
533 Missing key(s): (@missing_keys)
534 ------------------------------------------------------------------------
540 # interface to Perl::Tidy::Logger routines
543 if ($logger_object) { $logger_object->warning($msg); }
549 if ($logger_object) {
550 $logger_object->complain($msg);
555 sub write_logfile_entry {
557 if ($logger_object) {
558 $logger_object->write_logfile_entry(@msg);
565 if ($logger_object) { $logger_object->black_box(@msg); }
569 sub report_definite_bug {
570 if ($logger_object) {
571 $logger_object->report_definite_bug();
576 sub get_saw_brace_error {
577 if ($logger_object) {
578 return $logger_object->get_saw_brace_error();
583 sub we_are_at_the_last_line {
584 if ($logger_object) {
585 $logger_object->we_are_at_the_last_line();
590 # interface to Perl::Tidy::Diagnostics routine
591 sub write_diagnostics {
593 if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
597 sub get_added_semicolon_count {
599 return $added_semicolon_count;
604 $self->_decrement_count();
608 sub get_output_line_number {
609 return $vertical_aligner_object->get_output_line_number();
614 my ( $class, @args ) = @_;
616 # we are given an object with a write_line() method to take lines
618 sink_object => undef,
619 diagnostics_object => undef,
620 logger_object => undef,
622 my %args = ( %defaults, @args );
624 $logger_object = $args{logger_object};
625 $diagnostics_object = $args{diagnostics_object};
627 # we create another object with a get_line() and peek_ahead() method
628 my $sink_object = $args{sink_object};
629 $file_writer_object =
630 Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
632 # initialize the leading whitespace stack to negative levels
633 # so that we can never run off the end of the stack
634 $peak_batch_size = 0; # flag to determine if we have output code
635 $gnu_position_predictor = 0; # where the current token is predicted to be
636 $max_gnu_stack_index = 0;
637 $max_gnu_item_index = -1;
638 $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
640 $last_output_indentation = 0;
641 $last_indentation_written = 0;
642 $last_unadjusted_indentation = 0;
643 $last_leading_token = "";
644 $last_output_short_opening_token = 0;
646 $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
647 $saw_END_or_DATA_ = 0;
649 @block_type_to_go = ();
650 @type_sequence_to_go = ();
651 @container_environment_to_go = ();
652 @bond_strength_to_go = ();
653 @forced_breakpoint_to_go = ();
654 @summed_lengths_to_go = (); # line length to start of ith token
655 @token_lengths_to_go = ();
657 @matching_token_to_go = ();
658 @mate_index_to_go = ();
659 @ci_levels_to_go = ();
660 @nesting_depth_to_go = (0);
662 @old_breakpoint_to_go = ();
666 @leading_spaces_to_go = ();
667 @reduced_spaces_to_go = ();
671 @whitespace_level_stack = ();
672 $whitespace_last_level = -1;
675 @has_broken_sublist = ();
676 @want_comma_break = ();
679 $first_tabbing_disagreement = 0;
680 $last_tabbing_disagreement = 0;
681 $tabbing_disagreement_count = 0;
682 $in_tabbing_disagreement = 0;
683 $input_line_tabbing = undef;
685 $last_last_line_leading_level = 0;
686 $last_line_leading_level = 0;
687 $last_line_leading_type = '#';
689 $last_nonblank_token = ';';
690 $last_nonblank_type = ';';
691 $last_last_nonblank_token = ';';
692 $last_last_nonblank_type = ';';
693 $last_nonblank_block_type = "";
694 $last_output_level = 0;
695 $looking_for_else = 0;
696 $embedded_tab_count = 0;
697 $first_embedded_tab_at = 0;
698 $last_embedded_tab_at = 0;
699 $deleted_semicolon_count = 0;
700 $first_deleted_semicolon_at = 0;
701 $last_deleted_semicolon_at = 0;
702 $added_semicolon_count = 0;
703 $first_added_semicolon_at = 0;
704 $last_added_semicolon_at = 0;
705 $is_static_block_comment = 0;
706 %postponed_breakpoint = ();
708 # variables for adding side comments
709 %block_leading_text = ();
710 %block_opening_line_number = ();
711 $csc_new_statement_ok = 1;
712 %csc_block_label = ();
714 %saved_opening_indentation = ();
716 reset_block_text_accumulator();
718 prepare_for_new_input_lines();
720 $vertical_aligner_object =
721 Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
722 $logger_object, $diagnostics_object );
724 if ( $rOpts->{'entab-leading-whitespace'} ) {
726 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
729 elsif ( $rOpts->{'tabs'} ) {
730 write_logfile_entry("Indentation will be with a tab character\n");
734 "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
737 # This hash holds the main data structures for formatting
738 # All hash keys must be defined here.
740 rlines => [], # = ref to array of lines of the file
741 rlines_new => [], # = ref to array of output lines
742 # (FOR FUTURE DEVELOPMENT)
743 rLL => [], # = ref to array with all tokens
744 # in the file. LL originally meant
745 # 'Linked List'. Linked lists were a
746 # bad idea but LL is easy to type.
747 Klimit => undef, # = maximum K index for rLL. This is
748 # needed to catch any autovivification
750 rnested_pairs => [], # for welding decisions
751 K_opening_container => {}, # for quickly traversing structure
752 K_closing_container => {}, # for quickly traversing structure
753 K_opening_ternary => {}, # for quickly traversing structure
754 K_closing_ternary => {}, # for quickly traversing structure
755 rK_phantom_semicolons =>
756 undef, # for undoing phantom semicolons if iterating
757 rpaired_to_inner_container => {},
758 rbreak_container => {}, # prevent one-line blocks
759 rvalid_self_keys => [], # for checking
760 valign_batch_count => 0,
762 my @valid_keys = keys %{$formatter_self};
763 $formatter_self->{rvalid_self_keys} = \@valid_keys;
765 bless $formatter_self, $class;
767 # Safety check..this is not a class yet
768 if ( _increment_count() > 1 ) {
770 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
772 return $formatter_self;
775 # Future routines for storing new lines
777 my ( $self, $rline ) = @_;
779 # my $rline = $rlines->[$index_old];
780 # push @{$rlines_new}, $rline;
785 my ( $self, $index_old ) = @_;
787 # TODO: This will copy line with index $index_old to the new line array
788 # my $rlines = $self->{rlines};
789 # my $rline = $rlines->[$index_old];
790 # $self->push_line($rline);
794 sub push_blank_line {
798 # $self->push_line($rline);
803 my ( $self, $Kmin, $Kmax ) = @_;
805 # TODO: This will store the values for one new line of CODE
806 # CHECK TOKEN RANGE HERE
807 # $self->push_line($rline);
811 sub increment_valign_batch_count {
813 return ++$self->{valign_batch_count};
816 sub get_valign_batch_count {
818 return $self->{valign_batch_count};
824 # "I've just picked up a fault in the AE35 unit" - 2001: A Space Odyssey ...
826 # This routine is called for errors that really should not occur
827 # except if there has been a bug introduced by a recent program change
828 my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
829 my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
830 my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
833 ==============================================================================
834 Fault detected at line $line0 of sub '$subroutine1'
836 which was called from line $line1 of sub '$subroutine2'
838 This is probably an error introduced by a recent programming change.
839 ==============================================================================
842 # This is for Perl-Critic
846 sub check_self_hash {
848 my @valid_self_keys = @{ $self->{rvalid_self_keys} };
850 @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
851 check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
855 sub check_token_array {
858 # Check for errors in the array of tokens
859 # Uses package variable $NVARS
860 $self->check_self_hash();
861 my $rLL = $self->{rLL};
862 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
863 my $nvars = @{ $rLL->[$KK] };
864 if ( $nvars != $NVARS ) {
865 my $type = $rLL->[$KK]->[_TYPE_];
866 $type = '*' unless defined($type);
868 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
871 foreach my $var ( _TOKEN_, _TYPE_ ) {
872 if ( !defined( $rLL->[$KK]->[$var] ) ) {
873 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
874 Fault("Undefined variable $var for K=$KK, line=$iline\n");
881 sub set_rLL_max_index {
884 # Set the limit of the rLL array, assuming that it is correct.
885 # This should only be called by routines after they make changes
887 my $rLL = $self->{rLL};
888 if ( !defined($rLL) ) {
890 # Shouldn't happen because rLL was initialized to be an array ref
891 Fault("Undefined Memory rLL");
893 my $Klimit_old = $self->{Klimit};
896 if ( $num > 0 ) { $Klimit = $num - 1 }
897 $self->{Klimit} = $Klimit;
901 sub get_rLL_max_index {
904 # the memory location $rLL and number of tokens should be obtained
905 # from this routine so that any autovivication can be immediately caught.
906 my $rLL = $self->{rLL};
907 my $Klimit = $self->{Klimit};
908 if ( !defined($rLL) ) {
910 # Shouldn't happen because rLL was initialized to be an array ref
911 Fault("Undefined Memory rLL");
914 if ( $num == 0 && defined($Klimit)
915 || $num > 0 && !defined($Klimit)
916 || $num > 0 && $Klimit != $num - 1 )
919 # Possible autovivification problem...
920 if ( !defined($Klimit) ) { $Klimit = '*' }
921 Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
926 sub prepare_for_new_input_lines {
928 # Remember the largest batch size processed. This is needed
929 # by the pad routine to avoid padding the first nonblank token
930 if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
931 $peak_batch_size = $max_index_to_go;
934 $gnu_sequence_number++; # increment output batch counter
935 %last_gnu_equals = ();
936 %gnu_comma_count = ();
937 %gnu_arrow_count = ();
938 $line_start_index_to_go = 0;
939 $max_gnu_item_index = UNDEFINED_INDEX;
940 $index_max_forced_break = UNDEFINED_INDEX;
941 $max_index_to_go = UNDEFINED_INDEX;
942 $last_nonblank_index_to_go = UNDEFINED_INDEX;
943 $last_nonblank_type_to_go = '';
944 $last_nonblank_token_to_go = '';
945 $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
946 $last_last_nonblank_type_to_go = '';
947 $last_last_nonblank_token_to_go = '';
948 $forced_breakpoint_count = 0;
949 $forced_breakpoint_undo_count = 0;
950 $rbrace_follower = undef;
951 $summed_lengths_to_go[0] = 0;
952 $comma_count_in_batch = 0;
953 $starting_in_quote = 0;
955 destroy_one_line_block();
961 # Loop over old lines to set new line break points
964 my $rlines = $self->{rlines};
966 # Flag to prevent blank lines when POD occurs in a format skipping sect.
967 my $in_format_skipping_section;
970 foreach my $line_of_tokens ( @{$rlines} ) {
972 my $last_line_type = $line_type;
973 $line_type = $line_of_tokens->{_line_type};
974 my $input_line = $line_of_tokens->{_line_text};
976 # _line_type codes are:
977 # SYSTEM - system-specific code before hash-bang line
978 # CODE - line of perl code (including comments)
979 # POD_START - line starting pod, such as '=head'
980 # POD - pod documentation text
981 # POD_END - last line of pod section, '=cut'
982 # HERE - text of here-document
983 # HERE_END - last line of here-doc (target word)
984 # FORMAT - format section
985 # FORMAT_END - last line of format section, '.'
986 # DATA_START - __DATA__ line
987 # DATA - unidentified text following __DATA__
988 # END_START - __END__ line
989 # END - unidentified text following __END__
990 # ERROR - we are in big trouble, probably not a perl script
992 # put a blank line after an =cut which comes before __END__ and __DATA__
993 # (required by podchecker)
994 if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
995 $file_writer_object->reset_consecutive_blank_lines();
996 if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
997 $self->want_blank_line();
1001 # handle line of code..
1002 if ( $line_type eq 'CODE' ) {
1004 my $CODE_type = $line_of_tokens->{_code_type};
1005 $in_format_skipping_section = $CODE_type eq 'FS';
1007 # Handle blank lines
1008 if ( $CODE_type eq 'BL' ) {
1010 # If keep-old-blank-lines is zero, we delete all
1011 # old blank lines and let the blank line rules generate any
1013 if ($rOpts_keep_old_blank_lines) {
1015 $file_writer_object->write_blank_code_line(
1016 $rOpts_keep_old_blank_lines == 2 );
1017 $last_line_leading_type = 'b';
1023 # let logger see all non-blank lines of code
1024 my $output_line_number = get_output_line_number();
1025 ##$vertical_aligner_object->get_output_line_number();
1026 black_box( $line_of_tokens, $output_line_number );
1029 # Handle Format Skipping (FS) and Verbatim (VB) Lines
1030 if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
1031 $self->write_unindented_line("$input_line");
1032 $file_writer_object->reset_consecutive_blank_lines();
1036 # Handle all other lines of code
1037 $self->print_line_of_tokens($line_of_tokens);
1040 # handle line of non-code..
1046 if ( $line_type =~ /^POD/ ) {
1048 # Pod docs should have a preceding blank line. But stay
1049 # out of __END__ and __DATA__ sections, because
1050 # the user may be using this section for any purpose whatsoever
1051 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
1052 if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
1053 if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
1055 && !$in_format_skipping_section
1056 && $line_type eq 'POD_START'
1057 && !$saw_END_or_DATA_ )
1059 $self->want_blank_line();
1063 # leave the blank counters in a predictable state
1064 # after __END__ or __DATA__
1065 elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
1066 $file_writer_object->reset_consecutive_blank_lines();
1067 $saw_END_or_DATA_ = 1;
1070 # write unindented non-code line
1071 if ( !$skip_line ) {
1072 if ($tee_line) { $file_writer_object->tee_on() }
1073 $self->write_unindented_line($input_line);
1074 if ($tee_line) { $file_writer_object->tee_off() }
1081 { ## Beginning of routine to check line hashes
1083 my %valid_line_hash;
1087 # These keys are defined for each line in the formatter
1088 # Each line must have exactly these quantities
1089 my @valid_line_keys = qw(
1092 _guessed_indentation_level
1099 _square_bracket_depth
1101 _ended_in_blank_token
1110 @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
1113 sub check_line_hashes {
1115 $self->check_self_hash();
1116 my $rlines = $self->{rlines};
1117 foreach my $rline ( @{$rlines} ) {
1118 my $iline = $rline->{_line_number};
1119 my $line_type = $rline->{_line_type};
1120 check_keys( $rline, \%valid_line_hash,
1121 "Checkpoint: line number =$iline, line_type=$line_type", 1 );
1126 } ## End check line hashes
1130 # We are caching tokenized lines as they arrive and converting them to the
1131 # format needed for the final formatting.
1132 my ( $self, $line_of_tokens_old ) = @_;
1133 my $rLL = $self->{rLL};
1134 my $Klimit = $self->{Klimit};
1135 my $rlines_new = $self->{rlines};
1138 my $line_of_tokens = {};
1143 _guessed_indentation_level
1149 _square_bracket_depth
1154 $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
1157 # Data needed by Logger
1158 $line_of_tokens->{_level_0} = 0;
1159 $line_of_tokens->{_ci_level_0} = 0;
1160 $line_of_tokens->{_nesting_blocks_0} = "";
1161 $line_of_tokens->{_nesting_tokens_0} = "";
1163 # Needed to avoid trimming quotes
1164 $line_of_tokens->{_ended_in_blank_token} = undef;
1166 my $line_type = $line_of_tokens_old->{_line_type};
1167 my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
1168 if ( $line_type eq 'CODE' ) {
1170 my $rtokens = $line_of_tokens_old->{_rtokens};
1171 my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
1172 my $rblock_type = $line_of_tokens_old->{_rblock_type};
1173 my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
1174 my $rcontainer_environment =
1175 $line_of_tokens_old->{_rcontainer_environment};
1176 my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
1177 my $rlevels = $line_of_tokens_old->{_rlevels};
1178 my $rslevels = $line_of_tokens_old->{_rslevels};
1179 my $rci_levels = $line_of_tokens_old->{_rci_levels};
1180 my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
1181 my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
1183 my $jmax = @{$rtokens} - 1;
1185 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
1186 foreach my $j ( 0 .. $jmax ) {
1190 _BLOCK_TYPE_, _CONTAINER_TYPE_,
1191 _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
1192 _LEVEL_, _LEVEL_TRUE_,
1193 _SLEVEL_, _CI_LEVEL_,
1197 $rtokens->[$j], $rtoken_type->[$j],
1198 $rblock_type->[$j], $rcontainer_type->[$j],
1199 $rcontainer_environment->[$j], $rtype_sequence->[$j],
1200 $rlevels->[$j], $rlevels->[$j],
1201 $rslevels->[$j], $rci_levels->[$j],
1204 push @{$rLL}, \@tokary;
1207 $Klimit = @{$rLL} - 1;
1209 # Need to remember if we can trim the input line
1210 $line_of_tokens->{_ended_in_blank_token} =
1211 $rtoken_type->[$jmax] eq 'b';
1213 $line_of_tokens->{_level_0} = $rlevels->[0];
1214 $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
1215 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
1216 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
1220 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
1221 $line_of_tokens->{_code_type} = "";
1222 $self->{Klimit} = $Klimit;
1224 push @{$rlines_new}, $line_of_tokens;
1228 sub initialize_whitespace_hashes {
1230 # initialize these global hashes, which control the use of
1231 # whitespace around tokens:
1236 # %space_after_keyword
1238 # Many token types are identical to the tokens themselves.
1239 # See the tokenizer for a complete list. Here are some special types:
1241 # f = semicolon in for statement
1244 # Note that :: is excluded since it should be contained in an identifier
1245 # Note that '->' is excluded because it never gets space
1246 # parentheses and brackets are excluded since they are handled specially
1247 # curly braces are included but may be overridden by logic, such as
1250 # NEW_TOKENS: create a whitespace rule here. This can be as
1251 # simple as adding your new letter to @spaces_both_sides, for
1254 my @opening_type = qw< L { ( [ >;
1255 @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
1257 my @closing_type = qw< R } ) ] >;
1258 @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
1260 my @spaces_both_sides = qw#
1261 + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
1262 .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
1263 &&= ||= //= <=> A k f w F n C Y U G v
1266 my @spaces_left_side = qw<
1267 t ! ~ m p { \ h pp mm Z j
1269 push( @spaces_left_side, '#' ); # avoids warning message
1271 my @spaces_right_side = qw<
1272 ; } ) ] R J ++ -- **=
1274 push( @spaces_right_side, ',' ); # avoids warning message
1276 # Note that we are in a BEGIN block here. Later in processing
1277 # the values of %want_left_space and %want_right_space
1278 # may be overridden by any user settings specified by the
1279 # -wls and -wrs parameters. However the binary_whitespace_rules
1280 # are hardwired and have priority.
1281 @want_left_space{@spaces_both_sides} =
1282 (1) x scalar(@spaces_both_sides);
1283 @want_right_space{@spaces_both_sides} =
1284 (1) x scalar(@spaces_both_sides);
1285 @want_left_space{@spaces_left_side} =
1286 (1) x scalar(@spaces_left_side);
1287 @want_right_space{@spaces_left_side} =
1288 (-1) x scalar(@spaces_left_side);
1289 @want_left_space{@spaces_right_side} =
1290 (-1) x scalar(@spaces_right_side);
1291 @want_right_space{@spaces_right_side} =
1292 (1) x scalar(@spaces_right_side);
1293 $want_left_space{'->'} = WS_NO;
1294 $want_right_space{'->'} = WS_NO;
1295 $want_left_space{'**'} = WS_NO;
1296 $want_right_space{'**'} = WS_NO;
1297 $want_right_space{'CORE::'} = WS_NO;
1299 # These binary_ws_rules are hardwired and have priority over the above
1300 # settings. It would be nice to allow adjustment by the user,
1301 # but it would be complicated to specify.
1303 # hash type information must stay tightly bound
1305 $binary_ws_rules{'i'}{'L'} = WS_NO;
1306 $binary_ws_rules{'i'}{'{'} = WS_YES;
1307 $binary_ws_rules{'k'}{'{'} = WS_YES;
1308 $binary_ws_rules{'U'}{'{'} = WS_YES;
1309 $binary_ws_rules{'i'}{'['} = WS_NO;
1310 $binary_ws_rules{'R'}{'L'} = WS_NO;
1311 $binary_ws_rules{'R'}{'{'} = WS_NO;
1312 $binary_ws_rules{'t'}{'L'} = WS_NO;
1313 $binary_ws_rules{'t'}{'{'} = WS_NO;
1314 $binary_ws_rules{'}'}{'L'} = WS_NO;
1315 $binary_ws_rules{'}'}{'{'} = WS_NO;
1316 $binary_ws_rules{'$'}{'L'} = WS_NO;
1317 $binary_ws_rules{'$'}{'{'} = WS_NO;
1318 $binary_ws_rules{'@'}{'L'} = WS_NO;
1319 $binary_ws_rules{'@'}{'{'} = WS_NO;
1320 $binary_ws_rules{'='}{'L'} = WS_YES;
1321 $binary_ws_rules{'J'}{'J'} = WS_YES;
1323 # the following includes ') {'
1324 # as in : if ( xxx ) { yyy }
1325 $binary_ws_rules{']'}{'L'} = WS_NO;
1326 $binary_ws_rules{']'}{'{'} = WS_NO;
1327 $binary_ws_rules{')'}{'{'} = WS_YES;
1328 $binary_ws_rules{')'}{'['} = WS_NO;
1329 $binary_ws_rules{']'}{'['} = WS_NO;
1330 $binary_ws_rules{']'}{'{'} = WS_NO;
1331 $binary_ws_rules{'}'}{'['} = WS_NO;
1332 $binary_ws_rules{'R'}{'['} = WS_NO;
1334 $binary_ws_rules{']'}{'++'} = WS_NO;
1335 $binary_ws_rules{']'}{'--'} = WS_NO;
1336 $binary_ws_rules{')'}{'++'} = WS_NO;
1337 $binary_ws_rules{')'}{'--'} = WS_NO;
1339 $binary_ws_rules{'R'}{'++'} = WS_NO;
1340 $binary_ws_rules{'R'}{'--'} = WS_NO;
1342 $binary_ws_rules{'i'}{'Q'} = WS_YES;
1343 $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
1345 # FIXME: we could to split 'i' into variables and functions
1346 # and have no space for functions but space for variables. For now,
1347 # I have a special patch in the special rules below
1348 $binary_ws_rules{'i'}{'('} = WS_NO;
1350 $binary_ws_rules{'w'}{'('} = WS_NO;
1351 $binary_ws_rules{'w'}{'{'} = WS_YES;
1354 } ## end initialize_whitespace_hashes
1356 sub set_whitespace_flags {
1358 # This routine examines each pair of nonblank tokens and
1359 # sets a flag indicating if white space is needed.
1361 # $rwhitespace_flags->[$j] is a flag indicating whether a white space
1362 # BEFORE token $j is needed, with the following values:
1364 # WS_NO = -1 do not want a space before token $j
1365 # WS_OPTIONAL= 0 optional space or $j is a whitespace
1366 # WS_YES = 1 want a space before token $j
1370 my $rLL = $self->{rLL};
1372 my $rwhitespace_flags = [];
1374 my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
1375 $token, $type, $block_type, $input_line_no );
1376 my $j_tight_closing_paren = -1;
1384 $last_block_type = '';
1385 $last_input_line_no = 0;
1387 my $jmax = @{$rLL} - 1;
1391 # This is some logic moved to a sub to avoid deep nesting of if stmts
1392 my $ws_in_container = sub {
1396 if ( $j + 1 > $jmax ) { return (WS_NO) }
1398 # Patch to count '-foo' as single token so that
1399 # each of $a{-foo} and $a{foo} and $a{'foo'} do
1400 # not get spaces with default formatting.
1404 && $last_token eq '{'
1405 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
1407 # $j_next is where a closing token should be if
1408 # the container has a single token
1409 if ( $j_here + 1 > $jmax ) { return (WS_NO) }
1411 ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
1415 if ( $j_next > $jmax ) { return WS_NO }
1416 my $tok_next = $rLL->[$j_next]->[_TOKEN_];
1417 my $type_next = $rLL->[$j_next]->[_TYPE_];
1419 # for tightness = 1, if there is just one token
1420 # within the matching pair, we will keep it tight
1422 $tok_next eq $matching_token{$last_token}
1424 # but watch out for this: [ [ ] (misc.t)
1425 && $last_token ne $token
1427 # double diamond is usually spaced
1433 # remember where to put the space for the closing paren
1434 $j_tight_closing_paren = $j_next;
1440 # main loop over all tokens to define the whitespace flags
1441 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
1443 my $rtokh = $rLL->[$j];
1446 $rwhitespace_flags->[$j] = WS_OPTIONAL;
1448 if ( $rtokh->[_TYPE_] eq 'b' ) {
1452 # set a default value, to be changed as needed
1454 $last_token = $token;
1456 $last_block_type = $block_type;
1457 $last_input_line_no = $input_line_no;
1458 $token = $rtokh->[_TOKEN_];
1459 $type = $rtokh->[_TYPE_];
1460 $block_type = $rtokh->[_BLOCK_TYPE_];
1461 $input_line_no = $rtokh->[_LINE_INDEX_];
1463 #---------------------------------------------------------------
1464 # Whitespace Rules Section 1:
1465 # Handle space on the inside of opening braces.
1466 #---------------------------------------------------------------
1469 if ( $is_opening_type{$last_type} ) {
1471 $j_tight_closing_paren = -1;
1473 # let us keep empty matched braces together: () {} []
1475 if ( $token eq $matching_token{$last_token} ) {
1485 # we're considering the right of an opening brace
1486 # tightness = 0 means always pad inside with space
1487 # tightness = 1 means pad inside if "complex"
1488 # tightness = 2 means never pad inside with space
1491 if ( $last_type eq '{'
1492 && $last_token eq '{'
1493 && $last_block_type )
1495 $tightness = $rOpts_block_brace_tightness;
1497 else { $tightness = $tightness{$last_token} }
1499 #=============================================================
1500 # Patch for test problem <<snippets/fabrice_bug.in>>
1501 # We must always avoid spaces around a bare word beginning
1503 # my $before = ${^PREMATCH};
1504 # Because all of the following cause an error in perl:
1505 # my $before = ${ ^PREMATCH };
1506 # my $before = ${ ^PREMATCH};
1507 # my $before = ${^PREMATCH };
1508 # So if brace tightness flag is -bt=0 we must temporarily reset
1509 # to bt=1. Note that here we must set tightness=1 and not 2 so
1510 # that the closing space
1511 # is also avoided (via the $j_tight_closing_paren flag in coding)
1512 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
1514 #=============================================================
1516 if ( $tightness <= 0 ) {
1519 elsif ( $tightness > 1 ) {
1523 $ws = $ws_in_container->($j);
1526 } # end setting space flag inside opening tokens
1529 if FORMATTER_DEBUG_FLAG_WHITE;
1531 #---------------------------------------------------------------
1532 # Whitespace Rules Section 2:
1533 # Handle space on inside of closing brace pairs.
1534 #---------------------------------------------------------------
1537 if ( $is_closing_type{$type} ) {
1539 if ( $j == $j_tight_closing_paren ) {
1541 $j_tight_closing_paren = -1;
1546 if ( !defined($ws) ) {
1549 if ( $type eq '}' && $token eq '}' && $block_type ) {
1550 $tightness = $rOpts_block_brace_tightness;
1552 else { $tightness = $tightness{$token} }
1554 $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
1557 } # end setting space flag inside closing tokens
1561 if FORMATTER_DEBUG_FLAG_WHITE;
1563 #---------------------------------------------------------------
1564 # Whitespace Rules Section 3:
1565 # Use the binary rule table.
1566 #---------------------------------------------------------------
1567 if ( !defined($ws) ) {
1568 $ws = $binary_ws_rules{$last_type}{$type};
1572 if FORMATTER_DEBUG_FLAG_WHITE;
1574 #---------------------------------------------------------------
1575 # Whitespace Rules Section 4:
1576 # Handle some special cases.
1577 #---------------------------------------------------------------
1578 if ( $token eq '(' ) {
1580 # This will have to be tweaked as tokenization changes.
1581 # We usually want a space at '} (', for example:
1582 # <<snippets/space1.in>>
1583 # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
1586 # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
1587 # At present, the above & block is marked as type L/R so this case
1588 # won't go through here.
1589 if ( $last_type eq '}' ) { $ws = WS_YES }
1591 # NOTE: some older versions of Perl had occasional problems if
1592 # spaces are introduced between keywords or functions and opening
1593 # parens. So the default is not to do this except is certain
1594 # cases. The current Perl seems to tolerate spaces.
1596 # Space between keyword and '('
1597 elsif ( $last_type eq 'k' ) {
1599 unless ( $rOpts_space_keyword_paren
1600 || $space_after_keyword{$last_token} );
1603 # Space between function and '('
1604 # -----------------------------------------------------
1605 # 'w' and 'i' checks for something like:
1606 # myfun( &myfun( ->myfun(
1607 # -----------------------------------------------------
1608 elsif (( $last_type =~ /^[wUG]$/ )
1609 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
1611 $ws = WS_NO unless ($rOpts_space_function_paren);
1614 # space between something like $i and ( in <<snippets/space2.in>>
1615 # for $i ( 0 .. 20 ) {
1616 # FIXME: eventually, type 'i' needs to be split into multiple
1617 # token types so this can be a hardwired rule.
1618 elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
1622 # allow constant function followed by '()' to retain no space
1623 elsif ($last_type eq 'C'
1624 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
1630 # patch for SWITCH/CASE: make space at ']{' optional
1631 # since the '{' might begin a case or when block
1632 elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
1636 # keep space between 'sub' and '{' for anonymous sub definition
1637 if ( $type eq '{' ) {
1638 if ( $last_token eq 'sub' ) {
1642 # this is needed to avoid no space in '){'
1643 if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
1645 # avoid any space before the brace or bracket in something like
1646 # @opts{'a','b',...}
1647 if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
1652 elsif ( $type eq 'i' ) {
1654 # never a space before ->
1655 if ( $token =~ /^\-\>/ ) {
1660 # retain any space between '-' and bare word
1661 elsif ( $type eq 'w' || $type eq 'C' ) {
1662 $ws = WS_OPTIONAL if $last_type eq '-';
1664 # never a space before ->
1665 if ( $token =~ /^\-\>/ ) {
1670 # retain any space between '-' and bare word; for example
1671 # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
1672 # $myhash{USER-NAME}='steve';
1673 elsif ( $type eq 'm' || $type eq '-' ) {
1674 $ws = WS_OPTIONAL if ( $last_type eq 'w' );
1677 # always space before side comment
1678 elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
1680 # always preserver whatever space was used after a possible
1681 # filehandle (except _) or here doc operator
1684 && ( ( $last_type eq 'Z' && $last_token ne '_' )
1685 || $last_type eq 'h' )
1691 # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
1692 # allow a space between a backslash and single or double quote
1693 # to avoid fooling html formatters
1694 elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
1695 if ($rOpts_space_backslash_quote) {
1696 if ( $rOpts_space_backslash_quote == 1 ) {
1699 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
1700 else { } # shouldnt happen
1709 if FORMATTER_DEBUG_FLAG_WHITE;
1711 #---------------------------------------------------------------
1712 # Whitespace Rules Section 5:
1713 # Apply default rules not covered above.
1714 #---------------------------------------------------------------
1716 # If we fall through to here, look at the pre-defined hash tables for
1717 # the two tokens, and:
1718 # if (they are equal) use the common value
1719 # if (either is zero or undef) use the other
1720 # if (either is -1) use it
1734 if ( !defined($ws) ) {
1735 my $wl = $want_left_space{$type};
1736 my $wr = $want_right_space{$last_type};
1737 if ( !defined($wl) ) { $wl = 0 }
1738 if ( !defined($wr) ) { $wr = 0 }
1739 $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
1742 if ( !defined($ws) ) {
1745 "WS flag is undefined for tokens $last_token $token\n");
1748 # Treat newline as a whitespace. Otherwise, we might combine
1749 # 'Send' and '-recipients' here according to the above rules:
1750 # <<snippets/space3.in>>
1751 # my $msg = new Fax::Send
1752 # -recipients => $to,
1754 if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
1759 && ( $last_type !~ /^[Zh]$/ ) )
1762 # If this happens, we have a non-fatal but undesirable
1763 # hole in the above rules which should be patched.
1765 "WS flag is zero for tokens $last_token $token\n");
1768 $rwhitespace_flags->[$j] = $ws;
1770 FORMATTER_DEBUG_FLAG_WHITE && do {
1771 my $str = substr( $last_token, 0, 15 );
1772 $str .= ' ' x ( 16 - length($str) );
1773 if ( !defined($ws_1) ) { $ws_1 = "*" }
1774 if ( !defined($ws_2) ) { $ws_2 = "*" }
1775 if ( !defined($ws_3) ) { $ws_3 = "*" }
1776 if ( !defined($ws_4) ) { $ws_4 = "*" }
1778 "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
1782 if ( $rOpts->{'tight-secret-operators'} ) {
1783 new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
1785 return $rwhitespace_flags;
1786 } ## end sub set_whitespace_flags
1788 sub respace_tokens {
1791 return if $rOpts->{'indent-only'};
1793 # This routine makes all necessary changes to the tokenization after the
1794 # file has been read. This consists mostly of inserting and deleting spaces
1795 # according to the selected parameters. In a few cases non-space characters
1796 # are added, deleted or modified.
1798 # The old tokens are copied one-by-one, with changes, from the old
1799 # linear storage array to a new array.
1801 my $rLL = $self->{rLL};
1802 my $Klimit_old = $self->{Klimit};
1803 my $rlines = $self->{rlines};
1804 my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
1806 my $rLL_new = []; # This is the new array
1809 my $Kmax = @{$rLL} - 1;
1811 # Set the whitespace flags, which indicate the token spacing preference.
1812 my $rwhitespace_flags = $self->set_whitespace_flags();
1814 # we will be setting token lengths as we go
1815 my $cumulative_length = 0;
1817 # We also define these hash indexes giving container token array indexes
1818 # as a function of the container sequence numbers. For example,
1819 my $K_opening_container = {}; # opening [ { or (
1820 my $K_closing_container = {}; # closing ] } or )
1821 my $K_opening_ternary = {}; # opening ? of ternary
1822 my $K_closing_ternary = {}; # closing : of ternary
1824 # List of new K indexes of phantom semicolons
1825 # This will be needed if we want to undo them for iterations
1826 my $rK_phantom_semicolons = [];
1828 # Temporary hashes for adding semicolons
1829 ##my $rKfirst_new = {};
1831 # a sub to link preceding nodes forward to a new node type
1832 my $link_back = sub {
1833 my ( $Ktop, $key ) = @_;
1835 my $Kprev = $Ktop - 1;
1837 && !defined( $rLL_new->[$Kprev]->[$key] ) )
1839 $rLL_new->[$Kprev]->[$key] = $Ktop;
1844 # A sub to store one token in the new array
1845 # All new tokens must be stored by this sub so that it can update
1846 # all data structures on the fly.
1847 my $last_nonblank_type = ';';
1848 my $store_token = sub {
1851 # This will be the index of this item in the new array
1852 my $KK_new = @{$rLL_new};
1854 # check for a sequenced item (i.e., container or ?/:)
1855 my $type_sequence = $item->[_TYPE_SEQUENCE_];
1856 if ($type_sequence) {
1858 $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
1860 my $token = $item->[_TOKEN_];
1861 if ( $is_opening_token{$token} ) {
1863 $K_opening_container->{$type_sequence} = $KK_new;
1865 elsif ( $is_closing_token{$token} ) {
1867 $K_closing_container->{$type_sequence} = $KK_new;
1870 # These are not yet used but could be useful
1872 if ( $token eq '?' ) {
1873 $K_opening_ternary->{$type_sequence} = $KK;
1875 elsif ( $token eq ':' ) {
1876 $K_closing_ternary->{$type_sequence} = $KK;
1880 print STDERR "Ugh: shouldn't happen\n";
1885 # find the length of this token
1886 my $token_length = length( $item->[_TOKEN_] );
1888 # and update the cumulative length
1889 $cumulative_length += $token_length;
1891 # Save the length sum to just AFTER this token
1892 $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
1894 my $type = $item->[_TYPE_];
1895 if ( $type ne 'b' ) { $last_nonblank_type = $type }
1897 # and finally, add this item to the new array
1898 push @{$rLL_new}, $item;
1901 my $store_token_and_space = sub {
1902 my ( $item, $want_space ) = @_;
1904 # store a token with preceding space if requested and needed
1906 # First store the space
1909 && $rLL_new->[-1]->[_TYPE_] ne 'b'
1910 && $rOpts_add_whitespace )
1912 my $rcopy = copy_token_as_type( $item, 'b', ' ' );
1913 $rcopy->[_LINE_INDEX_] =
1914 $rLL_new->[-1]->[_LINE_INDEX_];
1915 $store_token->($rcopy);
1919 $store_token->($item);
1925 my $Kn = $self->K_next_nonblank($KK);
1926 while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
1928 $Kn = $self->K_next_nonblank($Kn);
1933 my $add_phantom_semicolon = sub {
1937 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
1938 return unless ( defined($Kp) );
1940 # we are only adding semicolons for certain block types
1941 my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
1943 unless ( $ok_to_add_semicolon_for_block_type{$block_type}
1944 || $block_type =~ /^(sub|package)/
1945 || $block_type =~ /^\w+\:$/ );
1947 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
1949 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
1950 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
1952 # Do not add a semicolon if...
1956 # it would follow a comment (and be isolated)
1957 $previous_nonblank_type eq '#'
1959 # it follows a code block ( because they are not always wanted
1960 # there and may add clutter)
1961 || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
1963 # it would follow a label
1964 || $previous_nonblank_type eq 'J'
1966 # it would be inside a 'format' statement (and cause syntax error)
1967 || ( $previous_nonblank_type eq 'k'
1968 && $previous_nonblank_token =~ /format/ )
1970 # if it would prevent welding two containers
1971 || $rpaired_to_inner_container->{$type_sequence}
1975 # We will insert an empty semicolon here as a placeholder. Later, if
1976 # it becomes the last token on a line, we will bring it to life. The
1977 # advantage of doing this is that (1) we just have to check line
1978 # endings, and (2) the phantom semicolon has zero width and therefore
1979 # won't cause needless breaks of one-line blocks.
1981 if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
1982 && $want_left_space{';'} == WS_NO )
1985 # convert the blank into a semicolon..
1986 # be careful: we are working on the new stack top
1987 # on a token which has been stored.
1988 my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
1990 # Convert the existing blank to a semicolon
1991 $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length
1992 $rLL_new->[$Ktop]->[_TYPE_] = ';';
1993 $rLL_new->[$Ktop]->[_SLEVEL_] =
1994 $rLL->[$KK]->[_SLEVEL_];
1996 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
1998 # Then store a new blank
1999 $store_token->($rcopy);
2003 # insert a new token
2004 my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
2005 $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
2006 $store_token->($rcopy);
2007 push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
2013 # Check that a quote looks okay
2014 # This sub works but needs to by sync'd with the log file output
2015 # before it can be used.
2016 my ( $KK, $Kfirst ) = @_;
2017 my $token = $rLL->[$KK]->[_TOKEN_];
2018 note_embedded_tab() if ( $token =~ "\t" );
2020 my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
2021 return unless ( defined($Kp) );
2022 my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
2023 my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
2025 my $previous_nonblank_type_2 = 'b';
2026 my $previous_nonblank_token_2 = "";
2027 my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
2028 if ( defined($Kpp) ) {
2029 $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
2030 $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
2033 my $Kn = $self->K_next_nonblank($KK);
2034 my $next_nonblank_token = "";
2035 if ( defined($Kn) ) {
2036 $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
2039 my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
2040 my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
2042 # make note of something like '$var = s/xxx/yyy/;'
2043 # in case it should have been '$var =~ s/xxx/yyy/;'
2045 $token =~ /^(s|tr|y|m|\/)/
2046 && $previous_nonblank_token =~ /^(=|==|!=)$/
2048 # preceded by simple scalar
2049 && $previous_nonblank_type_2 eq 'i'
2050 && $previous_nonblank_token_2 =~ /^\$/
2052 # followed by some kind of termination
2053 # (but give complaint if we can not see far enough ahead)
2054 && $next_nonblank_token =~ /^[; \)\}]$/
2056 # scalar is not declared
2057 && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
2060 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
2062 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
2067 # Main loop over all lines of the file
2072 # Testing option to break qw. Do not use; it can make a mess.
2073 my $ALLOW_BREAK_MULTILINE_QW = 0;
2074 my $in_multiline_qw;
2075 foreach my $line_of_tokens ( @{$rlines} ) {
2077 $input_line_number = $line_of_tokens->{_line_number};
2078 my $last_line_type = $line_type;
2079 $line_type = $line_of_tokens->{_line_type};
2080 next unless ( $line_type eq 'CODE' );
2081 my $last_CODE_type = $CODE_type;
2082 $CODE_type = $line_of_tokens->{_code_type};
2083 my $rK_range = $line_of_tokens->{_rK_range};
2084 my ( $Kfirst, $Klast ) = @{$rK_range};
2085 next unless defined($Kfirst);
2087 # Check for correct sequence of token indexes...
2088 # An error here means that sub write_line() did not correctly
2089 # package the tokenized lines as it received them.
2090 if ( defined($last_K_out) ) {
2091 if ( $Kfirst != $last_K_out + 1 ) {
2093 "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
2098 if ( $Kfirst != 0 ) {
2099 Fault("Program Bug: first K is $Kfirst but should be 0");
2102 $last_K_out = $Klast;
2104 # Handle special lines of code
2105 if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
2107 # CODE_types are as follows.
2109 # 'VB' = Verbatim - line goes out verbatim
2110 # 'FS' = Format Skipping - line goes out verbatim, no blanks
2111 # 'IO' = Indent Only - only indentation may be changed
2112 # 'NIN' = No Internal Newlines - line does not get broken
2113 # 'HSC'=Hanging Side Comment - fix this hanging side comment
2114 # 'BC'=Block Comment - an ordinary full line comment
2115 # 'SBC'=Static Block Comment - a block comment which does not get
2117 # 'SBCX'=Static Block Comment Without Leading Space
2118 # 'DEL'=Delete this line
2119 # 'VER'=VERSION statement
2120 # '' or (undefined) - no restructions
2122 # For a hanging side comment we insert an empty quote before
2123 # the comment so that it becomes a normal side comment and
2124 # will be aligned by the vertical aligner
2125 if ( $CODE_type eq 'HSC' ) {
2127 # Safety Check: This must be a line with one token (a comment)
2128 my $rtoken_vars = $rLL->[$Kfirst];
2129 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
2131 # Note that even if the flag 'noadd-whitespace' is set, we
2132 # will make an exception here and allow a blank to be
2133 # inserted to push the comment to the right. We can think
2134 # of this as an adjustment of indentation rather than
2135 # whitespace between tokens. This will also prevent the
2136 # hanging side comment from getting converted to a block
2137 # comment if whitespace gets deleted, as for example with
2138 # the -extrude and -mangle options.
2139 my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
2140 $store_token->($rcopy);
2141 $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
2142 $store_token->($rcopy);
2143 $store_token->($rtoken_vars);
2148 # This line was mis-marked by sub scan_comment
2150 "Program bug. A hanging side comment has been mismarked"
2155 # Copy tokens unchanged
2156 foreach my $KK ( $Kfirst .. $Klast ) {
2157 $store_token->( $rLL->[$KK] );
2162 # Handle normal line..
2164 # Insert any essential whitespace between lines
2165 # if last line was normal CODE.
2166 # Patch for rt #125012: use K_previous_code rather than '_nonblank'
2167 # because comments may disappear.
2168 my $type_next = $rLL->[$Kfirst]->[_TYPE_];
2169 my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
2170 my $Kp = $self->K_previous_code( undef, $rLL_new );
2171 if ( $last_line_type eq 'CODE'
2172 && $type_next ne 'b'
2175 my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
2176 my $type_p = $rLL_new->[$Kp]->[_TYPE_];
2178 my ( $token_pp, $type_pp );
2179 my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
2180 if ( defined($Kpp) ) {
2181 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
2182 $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
2190 is_essential_whitespace(
2191 $token_pp, $type_pp, $token_p,
2192 $type_p, $token_next, $type_next,
2197 # Copy this first token as blank, but use previous line number
2198 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
2199 $rcopy->[_LINE_INDEX_] =
2200 $rLL_new->[-1]->[_LINE_INDEX_];
2201 $store_token->($rcopy);
2205 # loop to copy all tokens on this line, with any changes
2207 for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
2208 $rtoken_vars = $rLL->[$KK];
2209 my $token = $rtoken_vars->[_TOKEN_];
2210 my $type = $rtoken_vars->[_TYPE_];
2211 my $last_type_sequence = $type_sequence;
2212 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
2214 # Handle a blank space ...
2215 if ( $type eq 'b' ) {
2217 # Delete it if not wanted by whitespace rules
2218 # or we are deleting all whitespace
2219 # Note that whitespace flag is a flag indicating whether a
2220 # white space BEFORE the token is needed
2221 next if ( $KK >= $Kmax ); # skip terminal blank
2222 my $Knext = $KK + 1;
2223 my $ws = $rwhitespace_flags->[$Knext];
2225 || $rOpts_delete_old_whitespace )
2228 # FIXME: maybe switch to using _new
2229 my $Kp = $self->K_previous_nonblank($KK);
2230 next unless defined($Kp);
2231 my $token_p = $rLL->[$Kp]->[_TOKEN_];
2232 my $type_p = $rLL->[$Kp]->[_TYPE_];
2234 my ( $token_pp, $type_pp );
2236 #my $Kpp = $K_previous_nonblank->($Kp);
2237 my $Kpp = $self->K_previous_nonblank($Kp);
2238 if ( defined($Kpp) ) {
2239 $token_pp = $rLL->[$Kpp]->[_TOKEN_];
2240 $type_pp = $rLL->[$Kpp]->[_TYPE_];
2246 my $token_next = $rLL->[$Knext]->[_TOKEN_];
2247 my $type_next = $rLL->[$Knext]->[_TYPE_];
2249 my $do_not_delete = is_essential_whitespace(
2250 $token_pp, $type_pp, $token_p,
2251 $type_p, $token_next, $type_next,
2254 next unless ($do_not_delete);
2257 # make it just one character if allowed
2258 if ($rOpts_add_whitespace) {
2259 $rtoken_vars->[_TOKEN_] = ' ';
2261 $store_token->($rtoken_vars);
2265 # Handle a nonblank token...
2267 # check for a qw quote
2268 if ( $type eq 'q' ) {
2270 # trim blanks from right of qw quotes
2271 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
2274 $rtoken_vars->[_TOKEN_] = $token;
2275 note_embedded_tab() if ( $token =~ "\t" );
2277 if ($in_multiline_qw) {
2279 # If we are at the end of a multiline qw ..
2280 if ( $in_multiline_qw == $KK ) {
2282 # Split off the closing delimiter character
2283 # so that the formatter can put a line break there if necessary
2285 my $part2 = substr( $part1, -1, 1, "" );
2289 copy_token_as_type( $rtoken_vars, 'q', $part1 );
2290 $store_token->($rcopy);
2292 $rtoken_vars->[_TOKEN_] = $token;
2295 $in_multiline_qw = undef;
2297 # store without preceding blank
2298 $store_token->($rtoken_vars);
2302 # continuing a multiline qw
2303 $store_token->($rtoken_vars);
2310 # we are encountered new qw token...see if multiline
2311 my $K_end = $K_end_q->($KK);
2312 if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
2314 # Starting multiline qw...
2315 # set flag equal to the ending K
2316 $in_multiline_qw = $K_end;
2318 # Split off the leading part
2319 # so that the formatter can put a line break there if necessary
2320 if ( $token =~ /^(qw\s*.)(.*)$/ ) {
2325 copy_token_as_type( $rtoken_vars, 'q',
2327 $store_token_and_space->(
2328 $rcopy, $rwhitespace_flags->[$KK] == WS_YES
2331 $rtoken_vars->[_TOKEN_] = $token;
2333 # Second part goes without intermediate blank
2334 $store_token->($rtoken_vars);
2341 # this is a new single token qw -
2342 # store with possible preceding blank
2343 $store_token_and_space->(
2344 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
2349 } ## end if ( $type eq 'q' )
2351 # Modify certain tokens here for whitespace
2352 # The following is not yet done, but could be:
2354 elsif ( $type =~ /^[wit]$/ ) {
2356 # Examples: <<snippets/space1.in>>
2357 # change '$ var' to '$var' etc
2358 # '-> new' to '->new'
2359 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
2361 $rtoken_vars->[_TOKEN_] = $token;
2364 # Split identifiers with leading arrows, inserting blanks if
2365 # necessary. It is easier and safer here than in the
2366 # tokenizer. For example '->new' becomes two tokens, '->' and
2367 # 'new' with a possible blank between.
2369 # Note: there is a related patch in sub set_whitespace_flags
2370 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
2371 my $token_save = $1;
2372 my $type_save = $type;
2374 # store a blank to left of arrow if necessary
2375 my $Kprev = $self->K_previous_nonblank($KK);
2376 if ( defined($Kprev)
2377 && $rLL->[$Kprev]->[_TYPE_] ne 'b'
2378 && $rOpts_add_whitespace
2379 && $want_left_space{'->'} == WS_YES )
2382 copy_token_as_type( $rtoken_vars, 'b', ' ' );
2383 $store_token->($rcopy);
2386 # then store the arrow
2387 my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
2388 $store_token->($rcopy);
2390 # then reset the current token to be the remainder,
2391 # and reset the whitespace flag according to the arrow
2392 $token = $rtoken_vars->[_TOKEN_] = $token_save;
2393 $type = $rtoken_vars->[_TYPE_] = $type_save;
2394 $store_token->($rtoken_vars);
2398 if ( $token =~ /$SUB_PATTERN/ ) {
2399 $token =~ s/\s+/ /g;
2400 $rtoken_vars->[_TOKEN_] = $token;
2403 # trim identifiers of trailing blanks which can occur
2404 # under some unusual circumstances, such as if the
2405 # identifier 'witch' has trailing blanks on input here:
2409 # () # prototype may be on new line ...
2411 if ( $type eq 'i' ) {
2412 $token =~ s/\s+$//g;
2413 $rtoken_vars->[_TOKEN_] = $token;
2417 # change 'LABEL :' to 'LABEL:'
2418 elsif ( $type eq 'J' ) {
2420 $rtoken_vars->[_TOKEN_] = $token;
2423 # patch to add space to something like "x10"
2424 # This avoids having to split this token in the pre-tokenizer
2425 elsif ( $type eq 'n' ) {
2426 if ( $token =~ /^x\d+/ ) {
2428 $rtoken_vars->[_TOKEN_] = $token;
2432 # check a quote for problems
2433 elsif ( $type eq 'Q' ) {
2435 # This is ready to go but is commented out because there is
2436 # still identical logic in sub break_lines.
2437 # $check_Q->($KK, $Kfirst);
2440 elsif ($type_sequence) {
2442 # if ( $is_opening_token{$token} ) {
2445 if ( $is_closing_token{$token} ) {
2447 # Insert a tentative missing semicolon if the next token is
2448 # a closing block brace
2453 # not preceded by a ';'
2454 && $last_nonblank_type ne ';'
2456 # and this is not a VERSION stmt (is all one line, we are not
2457 # inserting semicolons on one-line blocks)
2458 && $CODE_type ne 'VER'
2460 # and we are allowed to add semicolons
2461 && $rOpts->{'add-semicolons'}
2464 $add_phantom_semicolon->($KK);
2469 # Store this token with possible previous blank
2470 $store_token_and_space->(
2471 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
2477 # Reset memory to be the new array
2478 $self->{rLL} = $rLL_new;
2479 $self->set_rLL_max_index();
2480 $self->{K_opening_container} = $K_opening_container;
2481 $self->{K_closing_container} = $K_closing_container;
2482 $self->{K_opening_ternary} = $K_opening_ternary;
2483 $self->{K_closing_ternary} = $K_closing_ternary;
2484 $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
2486 # make sure the new array looks okay
2487 $self->check_token_array();
2489 # reset the token limits of each line
2490 $self->resync_lines_and_tokens();
2497 my $Last_line_had_side_comment;
2498 my $In_format_skipping_section;
2499 my $Saw_VERSION_in_this_file;
2503 my $rlines = $self->{rlines};
2505 $Last_line_had_side_comment = undef;
2506 $In_format_skipping_section = undef;
2507 $Saw_VERSION_in_this_file = undef;
2509 # Loop over all lines
2510 foreach my $line_of_tokens ( @{$rlines} ) {
2511 my $line_type = $line_of_tokens->{_line_type};
2512 next unless ( $line_type eq 'CODE' );
2513 my $CODE_type = $self->get_CODE_type($line_of_tokens);
2514 $line_of_tokens->{_code_type} = $CODE_type;
2520 my ( $self, $line_of_tokens ) = @_;
2522 # We are looking at a line of code and setting a flag to
2523 # describe any special processing that it requires
2525 # Possible CODE_types are as follows.
2527 # 'VB' = Verbatim - line goes out verbatim
2528 # 'IO' = Indent Only - line goes out unchanged except for indentation
2529 # 'NIN' = No Internal Newlines - line does not get broken
2530 # 'HSC'=Hanging Side Comment - fix this hanging side comment
2531 # 'BC'=Block Comment - an ordinary full line comment
2532 # 'SBC'=Static Block Comment - a block comment which does not get
2534 # 'SBCX'=Static Block Comment Without Leading Space
2535 # 'DEL'=Delete this line
2536 # 'VER'=VERSION statement
2537 # '' or (undefined) - no restructions
2539 my $rLL = $self->{rLL};
2540 my $Klimit = $self->{Klimit};
2542 my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
2543 my $no_internal_newlines = 1 - $rOpts_add_newlines;
2544 if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
2546 # extract what we need for this line..
2548 # Global value for error messages:
2549 $input_line_number = $line_of_tokens->{_line_number};
2551 my $rK_range = $line_of_tokens->{_rK_range};
2552 my ( $Kfirst, $Klast ) = @{$rK_range};
2554 if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
2555 my $input_line = $line_of_tokens->{_line_text};
2556 my $in_continued_quote = my $starting_in_quote =
2557 $line_of_tokens->{_starting_in_quote};
2558 my $in_quote = $line_of_tokens->{_ending_in_quote};
2559 my $ending_in_quote = $in_quote;
2560 my $guessed_indentation_level =
2561 $line_of_tokens->{_guessed_indentation_level};
2563 my $is_static_block_comment = 0;
2565 # Handle a continued quote..
2566 if ($in_continued_quote) {
2568 # A line which is entirely a quote or pattern must go out
2569 # verbatim. Note: the \n is contained in $input_line.
2571 if ( ( $input_line =~ "\t" ) ) {
2572 note_embedded_tab();
2574 $Last_line_had_side_comment = 0;
2579 my $is_block_comment =
2580 ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
2582 # Write line verbatim if we are in a formatting skip section
2583 if ($In_format_skipping_section) {
2584 $Last_line_had_side_comment = 0;
2586 # Note: extra space appended to comment simplifies pattern matching
2587 if ( $is_block_comment
2588 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
2589 /$format_skipping_pattern_end/o )
2591 $In_format_skipping_section = 0;
2592 write_logfile_entry("Exiting formatting skip section\n");
2597 # See if we are entering a formatting skip section
2598 if ( $rOpts_format_skipping
2599 && $is_block_comment
2600 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
2601 /$format_skipping_pattern_begin/o )
2603 $In_format_skipping_section = 1;
2604 write_logfile_entry("Entering formatting skip section\n");
2605 $Last_line_had_side_comment = 0;
2609 # ignore trailing blank tokens (they will get deleted later)
2610 if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
2614 # Handle a blank line..
2616 $Last_line_had_side_comment = 0;
2620 # see if this is a static block comment (starts with ## by default)
2621 my $is_static_block_comment_without_leading_space = 0;
2622 if ( $is_block_comment
2623 && $rOpts->{'static-block-comments'}
2624 && $input_line =~ /$static_block_comment_pattern/o )
2626 $is_static_block_comment = 1;
2627 $is_static_block_comment_without_leading_space =
2628 substr( $input_line, 0, 1 ) eq '#';
2631 # Check for comments which are line directives
2632 # Treat exactly as static block comments without leading space
2633 # reference: perlsyn, near end, section Plain Old Comments (Not!)
2634 # example: '# line 42 "new_filename.plx"'
2637 && $input_line =~ /^\# \s*
2639 (?:\s("?)([^"]+)\2)? \s*
2643 $is_static_block_comment = 1;
2644 $is_static_block_comment_without_leading_space = 1;
2647 # look for hanging side comment
2650 && $Last_line_had_side_comment # last line had side comment
2651 && $input_line =~ /^\s/ # there is some leading space
2652 && !$is_static_block_comment # do not make static comment hanging
2653 && $rOpts->{'hanging-side-comments'} # user is allowing
2654 # hanging side comments
2658 $Last_line_had_side_comment = 1;
2662 # remember if this line has a side comment
2663 $Last_line_had_side_comment =
2664 ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
2666 # Handle a block (full-line) comment..
2667 if ($is_block_comment) {
2669 if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
2671 # TRIM COMMENTS -- This could be turned off as a option
2672 $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
2674 if ($is_static_block_comment_without_leading_space) {
2677 elsif ($is_static_block_comment) {
2686 # NOTE: This does not work yet. Version in print-line-of-tokens
2687 # is Still used until fixed
2689 # compare input/output indentation except for continuation lines
2690 # (because they have an unknown amount of initial blank space)
2691 # and lines which are quotes (because they may have been outdented)
2692 # Note: this test is placed here because we know the continuation flag
2693 # at this point, which allows us to avoid non-meaningful checks.
2694 my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
2695 compare_indentation_levels( $guessed_indentation_level,
2696 $structural_indentation_level )
2697 unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
2698 || $guessed_indentation_level == 0
2699 && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
2702 # Patch needed for MakeMaker. Do not break a statement
2703 # in which $VERSION may be calculated. See MakeMaker.pm;
2704 # this is based on the coding in it.
2705 # The first line of a file that matches this will be eval'd:
2706 # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
2708 # *VERSION = \'1.01';
2709 # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
2710 # We will pass such a line straight through without breaking
2711 # it unless -npvl is used.
2713 # Patch for problem reported in RT #81866, where files
2714 # had been flattened into a single line and couldn't be
2715 # tidied without -npvl. There are two parts to this patch:
2716 # First, it is not done for a really long line (80 tokens for now).
2717 # Second, we will only allow up to one semicolon
2718 # before the VERSION. We need to allow at least one semicolon
2719 # for statements like this:
2720 # require Exporter; our $VERSION = $Exporter::VERSION;
2721 # where both statements must be on a single line for MakeMaker
2723 my $is_VERSION_statement = 0;
2724 if ( !$Saw_VERSION_in_this_file
2727 /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
2729 $Saw_VERSION_in_this_file = 1;
2730 write_logfile_entry("passing VERSION line; -npvl deactivates\n");
2737 sub find_nested_pairs {
2740 my $rLL = $self->{rLL};
2741 return unless ( defined($rLL) && @{$rLL} );
2743 # We define an array of pairs of nested containers
2746 # We also set the following hash values to identify container pairs for
2747 # which the opening and closing tokens are adjacent in the token stream:
2748 # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
2749 # $seqno_in are the seqence numbers of the outer and inner containers of
2750 # the pair We need these later to decide if we can insert a missing
2752 my $rpaired_to_inner_container = {};
2754 # This local hash remembers if an outer container has a close following
2756 # The key is the outer sequence number
2757 # The value is the token_hash of the inner container
2759 my %has_close_following_opening;
2761 # Names of calling routines can either be marked as 'i' or 'w',
2762 # and they may invoke a sub call with an '->'. We will consider
2763 # any consecutive string of such types as a single unit when making
2764 # weld decisions. We also allow a leading !
2765 my $is_name_type = {
2775 return $type && $is_name_type->{$type};
2779 my $last_last_container;
2780 my $last_nonblank_token_vars;
2783 my $nonblank_token_count = 0;
2785 # loop over all tokens
2786 foreach my $rtoken_vars ( @{$rLL} ) {
2788 my $type = $rtoken_vars->[_TYPE_];
2790 next if ( $type eq 'b' );
2792 # long identifier-like items are counted as a single item
2793 $nonblank_token_count++
2794 unless ( $is_name->($type)
2795 && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
2797 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
2798 if ($type_sequence) {
2800 my $token = $rtoken_vars->[_TOKEN_];
2802 if ( $is_opening_token{$token} ) {
2804 # following previous opening token ...
2805 if ( $last_container
2806 && $is_opening_token{ $last_container->[_TOKEN_] } )
2809 # adjacent to this one
2810 my $tok_diff = $nonblank_token_count - $last_count;
2812 my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
2815 || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
2818 # remember this pair...
2819 my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
2820 my $inner_seqno = $type_sequence;
2821 $has_close_following_opening{$outer_seqno} =
2827 elsif ( $is_closing_token{$token} ) {
2829 # if the corresponding opening token had an adjacent opening
2830 if ( $has_close_following_opening{$type_sequence}
2831 && $is_closing_token{ $last_container->[_TOKEN_] }
2832 && $has_close_following_opening{$type_sequence}
2833 ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
2836 # The closing weld tokens must be adjacent
2837 # NOTE: so intermediate commas and semicolons
2838 # can currently block a weld. This is something
2839 # that could be fixed in the future by including
2840 # a flag to delete un-necessary commas and semicolons.
2841 my $tok_diff = $nonblank_token_count - $last_count;
2843 if ( $tok_diff == 1 ) {
2845 # This is a closely nested pair ..
2846 my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
2847 my $outer_seqno = $type_sequence;
2848 $rpaired_to_inner_container->{$outer_seqno} =
2851 push @nested_pairs, [ $inner_seqno, $outer_seqno ];
2856 $last_last_container = $last_container;
2857 $last_container = $rtoken_vars;
2858 $last_count = $nonblank_token_count;
2860 $last_nonblank_token_vars = $rtoken_vars;
2862 $self->{rnested_pairs} = \@nested_pairs;
2863 $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
2869 # a debug routine, not normally used
2870 my ( $self, $msg ) = @_;
2871 my $rLL = $self->{rLL};
2872 my $nvars = @{$rLL};
2873 print STDERR "$msg\n";
2874 print STDERR "ntokens=$nvars\n";
2875 print STDERR "K\t_TOKEN_\t_TYPE_\n";
2877 foreach my $item ( @{$rLL} ) {
2878 print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
2884 sub get_old_line_index {
2885 my ( $self, $K ) = @_;
2886 my $rLL = $self->{rLL};
2887 return 0 unless defined($K);
2888 return $rLL->[$K]->[_LINE_INDEX_];
2891 sub get_old_line_count {
2892 my ( $self, $Kbeg, $Kend ) = @_;
2893 my $rLL = $self->{rLL};
2894 return 0 unless defined($Kbeg);
2895 return 0 unless defined($Kend);
2896 return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
2900 my ( $self, $KK, $rLL ) = @_;
2902 # return the index K of the next nonblank, non-comment token
2903 return unless ( defined($KK) && $KK >= 0 );
2905 # use the standard array unless given otherwise
2906 $rLL = $self->{rLL} unless ( defined($rLL) );
2909 while ( $Knnb < $Num ) {
2910 if ( !defined( $rLL->[$Knnb] ) ) {
2911 Fault("Undefined entry for k=$Knnb");
2913 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
2914 && $rLL->[$Knnb]->[_TYPE_] ne '#' )
2923 sub K_next_nonblank {
2924 my ( $self, $KK, $rLL ) = @_;
2926 # return the index K of the next nonblank token
2927 return unless ( defined($KK) && $KK >= 0 );
2929 # use the standard array unless given otherwise
2930 $rLL = $self->{rLL} unless ( defined($rLL) );
2933 while ( $Knnb < $Num ) {
2934 if ( !defined( $rLL->[$Knnb] ) ) {
2935 Fault("Undefined entry for k=$Knnb");
2937 if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
2943 sub K_previous_code {
2945 # return the index K of the previous nonblank, non-comment token
2946 # Call with $KK=undef to start search at the top of the array
2947 my ( $self, $KK, $rLL ) = @_;
2949 # use the standard array unless given otherwise
2950 $rLL = $self->{rLL} unless ( defined($rLL) );
2952 if ( !defined($KK) ) { $KK = $Num }
2953 elsif ( $KK > $Num ) {
2955 # The caller should make the first call with KK_new=undef to
2958 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
2962 while ( $Kpnb >= 0 ) {
2963 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
2964 && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
2973 sub K_previous_nonblank {
2975 # return index of previous nonblank token before item K;
2976 # Call with $KK=undef to start search at the top of the array
2977 my ( $self, $KK, $rLL ) = @_;
2979 # use the standard array unless given otherwise
2980 $rLL = $self->{rLL} unless ( defined($rLL) );
2982 if ( !defined($KK) ) { $KK = $Num }
2983 elsif ( $KK > $Num ) {
2985 # The caller should make the first call with KK_new=undef to
2988 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
2992 while ( $Kpnb >= 0 ) {
2993 if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
2999 sub weld_containers {
3001 # do any welding operations
3004 # initialize weld length hashes needed later for checking line lengths
3005 # TODO: These should eventually be stored in $self rather than be package vars
3006 %weld_len_left_closing = ();
3007 %weld_len_right_closing = ();
3008 %weld_len_left_opening = ();
3009 %weld_len_right_opening = ();
3011 return if ( $rOpts->{'indent-only'} );
3012 return unless ($rOpts_add_newlines);
3014 if ( $rOpts->{'weld-nested-containers'} ) {
3016 # if called, weld_nested_containers must be called before other weld
3017 # operations. # This is because weld_nested_containers could overwrite
3018 # hash values written by weld_cuddled_blocks and weld_nested_quotes.
3019 $self->weld_nested_containers();
3021 $self->weld_nested_quotes();
3024 # Note that weld_nested_containers() changes the _LEVEL_ values, so
3025 # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
3027 # Here is a good test case to Be sure that both cuddling and welding
3028 # are working and not interfering with each other: <<snippets/ce_wn1.in>>
3032 # if ($BOLD_MATH) { (
3033 # $labels, $comment,
3034 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
3036 # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
3040 $self->weld_cuddled_blocks();
3045 sub cumulative_length_before_K {
3046 my ( $self, $KK ) = @_;
3047 my $rLL = $self->{rLL};
3048 return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3051 sub cumulative_length_after_K {
3052 my ( $self, $KK ) = @_;
3053 my $rLL = $self->{rLL};
3054 return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
3057 sub weld_cuddled_blocks {
3060 # This routine implements the -cb flag by finding the appropriate
3061 # closing and opening block braces and welding them together.
3062 return unless ( %{$rcuddled_block_types} );
3064 my $rLL = $self->{rLL};
3065 return unless ( defined($rLL) && @{$rLL} );
3066 my $rbreak_container = $self->{rbreak_container};
3068 my $K_opening_container = $self->{K_opening_container};
3069 my $K_closing_container = $self->{K_closing_container};
3071 my $length_to_opening_seqno = sub {
3073 my $KK = $K_opening_container->{$seqno};
3074 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3077 my $length_to_closing_seqno = sub {
3079 my $KK = $K_closing_container->{$seqno};
3080 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3084 my $is_broken_block = sub {
3086 # a block is broken if the input line numbers of the braces differ
3087 # we can only cuddle between broken blocks
3089 my $K_opening = $K_opening_container->{$seqno};
3090 return unless ( defined($K_opening) );
3091 my $K_closing = $K_closing_container->{$seqno};
3092 return unless ( defined($K_closing) );
3093 return $rbreak_container->{$seqno}
3094 || $rLL->[$K_closing]->[_LINE_INDEX_] !=
3095 $rLL->[$K_opening]->[_LINE_INDEX_];
3098 # A stack to remember open chains at all levels:
3099 # $in_chain[$level] = [$chain_type, $type_sequence];
3101 my $CBO = $rOpts->{'cuddled-break-option'};
3103 # loop over structure items to find cuddled pairs
3106 while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
3107 my $rtoken_vars = $rLL->[$KK];
3108 my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
3109 if ( !$type_sequence ) {
3110 Fault("sequence = $type_sequence not defined");
3113 # We use the original levels because they get changed by sub
3114 # 'weld_nested_containers'. So if this were to be called before that
3115 # routine, the levels would be wrong and things would go bad.
3116 my $last_level = $level;
3117 $level = $rtoken_vars->[_LEVEL_TRUE_];
3119 if ( $level < $last_level ) { $in_chain[$last_level] = undef }
3120 elsif ( $level > $last_level ) { $in_chain[$level] = undef }
3122 # We are only looking at code blocks
3123 my $token = $rtoken_vars->[_TOKEN_];
3124 my $type = $rtoken_vars->[_TYPE_];
3125 next unless ( $type eq $token );
3127 if ( $token eq '{' ) {
3129 my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
3130 if ( !$block_type ) {
3132 # patch for unrecognized block types which may not be labeled
3133 my $Kp = $self->K_previous_nonblank($KK);
3134 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
3135 $Kp = $self->K_previous_nonblank($Kp);
3138 $block_type = $rLL->[$Kp]->[_TOKEN_];
3140 if ( $in_chain[$level] ) {
3142 # we are in a chain and are at an opening block brace.
3143 # See if we are welding this opening brace with the previous
3144 # block brace. Get their identification numbers:
3145 my $closing_seqno = $in_chain[$level]->[1];
3146 my $opening_seqno = $type_sequence;
3148 # The preceding block must be on multiple lines so that its
3149 # closing brace will start a new line.
3150 if ( !$is_broken_block->($closing_seqno) ) {
3151 next unless ( $CBO == 2 );
3152 $rbreak_container->{$closing_seqno} = 1;
3155 # we will let the trailing block be either broken or intact
3156 ## && $is_broken_block->($opening_seqno);
3158 # We can weld the closing brace to its following word ..
3159 my $Ko = $K_closing_container->{$closing_seqno};
3160 my $Kon = $self->K_next_nonblank($Ko);
3162 # ..unless it is a comment
3163 if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
3165 $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
3166 $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
3167 $weld_len_right_closing{$closing_seqno} = $dlen;
3169 # Set flag that we want to break the next container
3170 # so that the cuddled line is balanced.
3171 $rbreak_container->{$opening_seqno} = 1
3178 # We are not in a chain. Start a new chain if we see the
3179 # starting block type.
3180 if ( $rcuddled_block_types->{$block_type} ) {
3181 $in_chain[$level] = [ $block_type, $type_sequence ];
3185 $in_chain[$level] = [ $block_type, $type_sequence ];
3189 elsif ( $token eq '}' ) {
3190 if ( $in_chain[$level] ) {
3192 # We are in a chain at a closing brace. See if this chain
3194 my $Knn = $self->K_next_code($KK);
3197 my $chain_type = $in_chain[$level]->[0];
3198 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
3200 $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
3204 # Note that we do not weld yet because we must wait until
3205 # we we are sure that an opening brace for this follows.
3206 $in_chain[$level]->[1] = $type_sequence;
3208 else { $in_chain[$level] = undef }
3216 sub weld_nested_containers {
3219 # This routine implements the -wn flag by "welding together"
3220 # the nested closing and opening tokens which were previously
3221 # identified by sub 'find_nested_pairs'. "welding" simply
3222 # involves setting certain hash values which will be checked
3223 # later during formatting.
3225 my $rLL = $self->{rLL};
3226 my $Klimit = $self->get_rLL_max_index();
3227 my $rnested_pairs = $self->{rnested_pairs};
3228 my $rlines = $self->{rlines};
3229 my $K_opening_container = $self->{K_opening_container};
3230 my $K_closing_container = $self->{K_closing_container};
3232 # Return unless there are nested pairs to weld
3233 return unless defined($rnested_pairs) && @{$rnested_pairs};
3235 # This array will hold the sequence numbers of the tokens to be welded.
3238 # Variables needed for estimating line lengths
3239 my $starting_indent;
3240 my $starting_lentot;
3242 # A tolerance to the length for length estimates. In some rare cases
3243 # this can avoid problems where a final weld slightly exceeds the
3244 # line length and gets broken in a bad spot.
3247 my $excess_length_to_K = sub {
3250 # Estimate the length from the line start to a given token
3251 my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
3253 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
3254 return ($excess_length);
3257 my $length_to_opening_seqno = sub {
3259 my $KK = $K_opening_container->{$seqno};
3260 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3264 my $length_to_closing_seqno = sub {
3266 my $KK = $K_closing_container->{$seqno};
3267 my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
3268 ##my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
3273 # _oo=outer opening, i.e. first of { {
3274 # _io=inner opening, i.e. second of { {
3275 # _oc=outer closing, i.e. second of } {
3276 # _ic=inner closing, i.e. first of } }
3280 # We are working from outermost to innermost pairs so that
3281 # level changes will be complete when we arrive at the inner pairs.
3283 while ( my $item = pop( @{$rnested_pairs} ) ) {
3284 my ( $inner_seqno, $outer_seqno ) = @{$item};
3286 my $Kouter_opening = $K_opening_container->{$outer_seqno};
3287 my $Kinner_opening = $K_opening_container->{$inner_seqno};
3288 my $Kouter_closing = $K_closing_container->{$outer_seqno};
3289 my $Kinner_closing = $K_closing_container->{$inner_seqno};
3291 my $outer_opening = $rLL->[$Kouter_opening];
3292 my $inner_opening = $rLL->[$Kinner_opening];
3293 my $outer_closing = $rLL->[$Kouter_closing];
3294 my $inner_closing = $rLL->[$Kinner_closing];
3296 my $iline_oo = $outer_opening->[_LINE_INDEX_];
3297 my $iline_io = $inner_opening->[_LINE_INDEX_];
3299 # Set flag saying if this pair starts a new weld
3300 my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
3302 # Set flag saying if this pair is adjacent to the previous nesting pair
3303 # (even if previous pair was rejected as a weld)
3304 my $touch_previous_pair =
3305 defined($previous_pair) && $outer_seqno == $previous_pair->[0];
3306 $previous_pair = $item;
3308 # Set a flag if we should not weld. It sometimes looks best not to weld
3309 # when the opening and closing tokens are very close. However, there
3310 # is a danger that we will create a "blinker", which oscillates between
3311 # two semi-stable states, if we do not weld. So the rules for
3312 # not welding have to be carefully defined and tested.
3314 if ( !$touch_previous_pair ) {
3316 # If this pair is not adjacent to the previous pair (skipped or
3317 # not), then measure lengths from the start of line of oo
3319 my $rK_range = $rlines->[$iline_oo]->{_rK_range};
3320 my ( $Kfirst, $Klast ) = @{$rK_range};
3322 $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
3323 $starting_indent = 0;
3324 if ( !$rOpts_variable_maximum_line_length ) {
3325 my $level = $rLL->[$Kfirst]->[_LEVEL_];
3326 $starting_indent = $rOpts_indent_columns * $level;
3329 # DO-NOT-WELD RULE 1:
3330 # Do not weld something that looks like the start of a two-line
3331 # function call, like this: <<snippets/wn6.in>>
3332 # $trans->add_transformation(
3333 # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
3334 # We will look for a semicolon after the closing paren.
3336 # We want to weld something complex, like this though
3337 # my $compass = uc( opposite_direction( line_to_canvas_direction(
3338 # @{ $coords[0] }, @{ $coords[1] } ) ) );
3339 # Otherwise we will get a 'blinker'
3341 my $iline_oc = $outer_closing->[_LINE_INDEX_];
3342 if ( $iline_oc <= $iline_oo + 1 ) {
3344 # Look for following semicolon...
3345 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
3346 my $next_nonblank_type =
3347 defined($Knext_nonblank)
3348 ? $rLL->[$Knext_nonblank]->[_TYPE_]
3350 if ( $next_nonblank_type eq ';' ) {
3352 # Then do not weld if no other containers between inner
3353 # opening and closing.
3354 my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
3355 if ( $Knext_seq_item == $Kinner_closing ) {
3362 my $iline_ic = $inner_closing->[_LINE_INDEX_];
3364 # DO-NOT-WELD RULE 2:
3365 # Do not weld an opening paren to an inner one line brace block
3366 # We will just use old line numbers for this test and require
3367 # iterations if necessary for convergence
3369 # For example, otherwise we could cause the opening paren
3370 # in the following example to separate from the caller name
3373 # $_[0]->code_handler
3374 # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
3376 # Here is another example where we do not want to weld:
3377 # $wrapped->add_around_modifier(
3378 # sub { push @tracelog => 'around 1'; $_[0]->(); } );
3380 # If the one line sub block gets broken due to length or by the
3381 # user, then we can weld. The result will then be:
3382 # $wrapped->add_around_modifier( sub {
3383 # push @tracelog => 'around 1';
3387 if ( $iline_ic == $iline_io ) {
3389 my $token_oo = $outer_opening->[_TOKEN_];
3390 my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
3391 my $token_io = $inner_opening->[_TOKEN_];
3392 $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
3395 # DO-NOT-WELD RULE 3:
3396 # Do not weld if this makes our line too long
3397 $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
3401 # After neglecting a pair, we start measuring from start of point io
3403 $self->cumulative_length_before_K($Kinner_opening);
3404 $starting_indent = 0;
3405 if ( !$rOpts_variable_maximum_line_length ) {
3406 my $level = $inner_opening->[_LEVEL_];
3407 $starting_indent = $rOpts_indent_columns * $level;
3410 # Normally, a broken pair should not decrease indentation of
3411 # intermediate tokens:
3412 ## if ( $last_pair_broken ) { next }
3413 # However, for long strings of welded tokens, such as '{{{{{{...'
3414 # we will allow broken pairs to also remove indentation.
3415 # This will keep very long strings of opening and closing
3416 # braces from marching off to the right. We will do this if the
3417 # number of tokens in a weld before the broken weld is 4 or more.
3418 # This rule will mainly be needed for test scripts, since typical
3419 # welds have fewer than about 4 welded tokens.
3420 if ( !@welds || @{ $welds[-1] } < 4 ) { next }
3423 # otherwise start new weld ...
3424 elsif ($starting_new_weld) {
3428 # ... or extend current weld
3430 unshift @{ $welds[-1] }, $inner_seqno;
3433 # After welding, reduce the indentation level if all intermediate tokens
3434 my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
3435 if ( $dlevel != 0 ) {
3436 my $Kstart = $Kinner_opening;
3437 my $Kstop = $Kinner_closing;
3438 for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
3439 $rLL->[$KK]->[_LEVEL_] += $dlevel;
3444 # Define weld lengths needed later to set line breaks
3445 foreach my $item (@welds) {
3447 # sweep from inner to outer
3452 foreach my $outer_seqno ( @{$item} ) {
3456 $length_to_opening_seqno->($inner_seqno) -
3457 $length_to_opening_seqno->($outer_seqno);
3460 $length_to_closing_seqno->($outer_seqno) -
3461 $length_to_closing_seqno->($inner_seqno);
3463 $len_open += $dlen_opening;
3464 $len_close += $dlen_closing;
3468 $weld_len_left_closing{$outer_seqno} = $len_close;
3469 $weld_len_right_opening{$outer_seqno} = $len_open;
3471 $inner_seqno = $outer_seqno;
3474 # sweep from outer to inner
3475 foreach my $seqno ( reverse @{$item} ) {
3476 $weld_len_right_closing{$seqno} =
3477 $len_close - $weld_len_left_closing{$seqno};
3478 $weld_len_left_opening{$seqno} =
3479 $len_open - $weld_len_right_opening{$seqno};
3483 #####################################
3485 #####################################
3489 foreach my $weld (@welds) {
3490 print "\nWeld number $count has seq: (@{$weld})\n";
3491 foreach my $seq ( @{$weld} ) {
3494 left_opening=$weld_len_left_opening{$seq};
3495 right_opening=$weld_len_right_opening{$seq};
3496 left_closing=$weld_len_left_closing{$seq};
3497 right_closing=$weld_len_right_closing{$seq};
3507 sub weld_nested_quotes {
3510 my $rLL = $self->{rLL};
3511 return unless ( defined($rLL) && @{$rLL} );
3513 my $K_opening_container = $self->{K_opening_container};
3514 my $K_closing_container = $self->{K_closing_container};
3515 my $rlines = $self->{rlines};
3517 my $is_single_quote = sub {
3518 my ( $Kbeg, $Kend, $quote_type ) = @_;
3519 foreach my $K ( $Kbeg .. $Kend ) {
3520 my $test_type = $rLL->[$K]->[_TYPE_];
3521 next if ( $test_type eq 'b' );
3522 return if ( $test_type ne $quote_type );
3527 my $excess_line_length = sub {
3528 my ( $KK, $Ktest ) = @_;
3530 # what is the excess length if we add token $Ktest to the line with $KK?
3531 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
3532 my $rK_range = $rlines->[$iline]->{_rK_range};
3533 my ( $Kfirst, $Klast ) = @{$rK_range};
3534 my $starting_lentot =
3535 $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
3536 my $starting_indent = 0;
3538 if ( !$rOpts_variable_maximum_line_length ) {
3539 my $level = $rLL->[$Kfirst]->[_LEVEL_];
3540 $starting_indent = $rOpts_indent_columns * $level;
3543 my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
3545 $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
3546 return $excess_length;
3549 # look for single qw quotes nested in containers
3551 while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
3552 my $rtoken_vars = $rLL->[$KK];
3553 my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
3554 if ( !$outer_seqno ) {
3555 Fault("sequence = $outer_seqno not defined");
3558 my $token = $rtoken_vars->[_TOKEN_];
3559 if ( $is_opening_token{$token} ) {
3561 # see if the next token is a quote of some type
3562 my $Kn = $self->K_next_nonblank($KK);
3564 my $next_token = $rLL->[$Kn]->[_TOKEN_];
3565 my $next_type = $rLL->[$Kn]->[_TYPE_];
3567 unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
3568 && $next_token =~ /^q/ );
3570 # The token before the closing container must also be a quote
3571 my $K_closing = $K_closing_container->{$outer_seqno};
3572 my $Kt_end = $self->K_previous_nonblank($K_closing);
3573 next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
3575 # Do not weld to single-line quotes. Nothing is gained, and it may
3577 next if ( $Kt_end == $Kn );
3579 # Only weld to quotes delimited with container tokens. This is
3580 # because welding to arbitrary quote delimiters can produce code
3581 # which is less readable than without welding.
3582 my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
3584 unless ( $is_closing_token{$closing_delimiter}
3585 || $closing_delimiter eq '>' );
3587 # Now make sure that there is just a single quote in the container
3589 unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
3591 # If welded, the line must not exceed allowed line length
3592 # Assume old line breaks for this estimate.
3593 next if ( $excess_line_length->( $KK, $Kn ) > 0 );
3596 # FIXME: Are these always correct?
3597 $weld_len_left_closing{$outer_seqno} = 1;
3598 $weld_len_right_opening{$outer_seqno} = 2;
3606 my ( $seqno, $type_or_tok ) = @_;
3608 # Given the sequence number of a token, and the token or its type,
3609 # return the length of any weld to its left
3613 if ( $is_closing_type{$type_or_tok} ) {
3614 $weld_len = $weld_len_left_closing{$seqno};
3616 elsif ( $is_opening_type{$type_or_tok} ) {
3617 $weld_len = $weld_len_left_opening{$seqno};
3620 if ( !defined($weld_len) ) { $weld_len = 0 }
3624 sub weld_len_right {
3626 my ( $seqno, $type_or_tok ) = @_;
3628 # Given the sequence number of a token, and the token or its type,
3629 # return the length of any weld to its right
3633 if ( $is_closing_type{$type_or_tok} ) {
3634 $weld_len = $weld_len_right_closing{$seqno};
3636 elsif ( $is_opening_type{$type_or_tok} ) {
3637 $weld_len = $weld_len_right_opening{$seqno};
3640 if ( !defined($weld_len) ) { $weld_len = 0 }
3644 sub weld_len_left_to_go {
3647 # Given the index of a token in the 'to_go' array
3648 # return the length of any weld to its left
3649 return if ( $i < 0 );
3651 weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
3655 sub weld_len_right_to_go {
3658 # Given the index of a token in the 'to_go' array
3659 # return the length of any weld to its right
3660 return if ( $i < 0 );
3661 if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
3663 weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
3667 sub link_sequence_items {
3669 # This has been merged into 'respace_tokens' but retained for reference
3671 my $rlines = $self->{rlines};
3672 my $rLL = $self->{rLL};
3674 # We walk the token list and make links to the next sequence item.
3675 # We also define these hashes to container tokens using sequence number as
3677 my $K_opening_container = {}; # opening [ { or (
3678 my $K_closing_container = {}; # closing ] } or )
3679 my $K_opening_ternary = {}; # opening ? of ternary
3680 my $K_closing_ternary = {}; # closing : of ternary
3682 # sub to link preceding nodes forward to a new node type
3683 my $link_back = sub {
3684 my ( $Ktop, $key ) = @_;
3686 my $Kprev = $Ktop - 1;
3688 && !defined( $rLL->[$Kprev]->[$key] ) )
3690 $rLL->[$Kprev]->[$key] = $Ktop;
3695 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
3697 $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
3699 my $type = $rLL->[$KK]->[_TYPE_];
3701 next if ( $type eq 'b' );
3703 my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
3704 if ($type_sequence) {
3706 $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
3708 my $token = $rLL->[$KK]->[_TOKEN_];
3709 if ( $is_opening_token{$token} ) {
3711 $K_opening_container->{$type_sequence} = $KK;
3713 elsif ( $is_closing_token{$token} ) {
3715 $K_closing_container->{$type_sequence} = $KK;
3718 # These are not yet used but could be useful
3720 if ( $token eq '?' ) {
3721 $K_opening_ternary->{$type_sequence} = $KK;
3723 elsif ( $token eq ':' ) {
3724 $K_closing_ternary->{$type_sequence} = $KK;
3728 Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
3735 $self->{K_opening_container} = $K_opening_container;
3736 $self->{K_closing_container} = $K_closing_container;
3737 $self->{K_opening_ternary} = $K_opening_ternary;
3738 $self->{K_closing_ternary} = $K_closing_ternary;
3742 sub sum_token_lengths {
3745 # This has been merged into 'respace_tokens' but retained for reference
3746 my $rLL = $self->{rLL};
3747 my $cumulative_length = 0;
3748 for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
3750 # now set the length of this token
3751 my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
3753 $cumulative_length += $token_length;
3755 # Save the length sum to just AFTER this token
3756 $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
3762 sub resync_lines_and_tokens {
3765 my $rLL = $self->{rLL};
3766 my $Klimit = $self->{Klimit};
3767 my $rlines = $self->{rlines};
3769 # Re-construct the arrays of tokens associated with the original input lines
3770 # since they have probably changed due to inserting and deleting blanks
3771 # and a few other tokens.
3775 # This is the next token and its line index:
3778 if ( defined($rLL) && @{$rLL} ) {
3779 $Kmax = @{$rLL} - 1;
3780 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
3783 my $get_inext = sub {
3784 if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
3786 $inext = $rLL->[$Knext]->[_LINE_INDEX_];
3791 # Remember the most recently output token index
3795 foreach my $line_of_tokens ( @{$rlines} ) {
3797 my $line_type = $line_of_tokens->{_line_type};
3798 if ( $line_type eq 'CODE' ) {
3802 $inext = $get_inext->();
3803 while ( defined($inext) && $inext <= $iline ) {
3804 push @{K_array}, $Knext;
3806 $inext = $get_inext->();
3809 # Delete any terminal blank token
3811 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
3816 # Define the range of K indexes for the line:
3817 # $Kfirst = index of first token on line
3818 # $Klast_out = index of last token on line
3819 my ( $Kfirst, $Klast );
3821 $Kfirst = $K_array[0];
3822 $Klast = $K_array[-1];
3823 $Klast_out = $Klast;
3826 # It is only safe to trim the actual line text if the input
3827 # line had a terminal blank token. Otherwise, we may be
3829 if ( $line_of_tokens->{_ended_in_blank_token} ) {
3830 $line_of_tokens->{_line_text} =~ s/\s+$//;
3832 $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
3836 # There shouldn't be any nodes beyond the last one unless we start
3837 # allowing 'link_after' calls
3838 if ( defined($inext) ) {
3840 Fault("unexpected tokens at end of file when reconstructing lines");
3848 my $rlines = $self->{rlines};
3849 foreach my $line ( @{$rlines} ) {
3850 my $input_line = $line->{_line_text};
3851 $self->write_unindented_line($input_line);
3856 sub finish_formatting {
3858 my ( $self, $severe_error ) = @_;
3860 # The file has been tokenized and is ready to be formatted.
3861 # All of the relevant data is stored in $self, ready to go.
3863 # output file verbatim if severe error or no formatting requested
3864 if ( $severe_error || $rOpts->{notidy} ) {
3865 $self->dump_verbatim();
3870 # Make a pass through the lines, looking at lines of CODE and identifying
3871 # special processing needs, such format skipping sections marked by
3873 $self->scan_comments();
3875 # Find nested pairs of container tokens for any welding. This information
3876 # is also needed for adding semicolons, so it is split apart from the
3878 $self->find_nested_pairs();
3880 # Make sure everything looks good
3881 $self->check_line_hashes();
3883 # Future: Place to Begin future Iteration Loop
3884 # foreach my $it_count(1..$maxit) {
3886 # Future: We must reset some things after the first iteration.
3888 # - resetting levels if there was any welding
3889 # - resetting any phantom semicolons
3890 # - dealing with any line numbering issues so we can relate final lines
3891 # line numbers with input line numbers.
3893 # If ($it_count>1) {
3894 # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
3898 # Make a pass through all tokens, adding or deleting any whitespace as
3899 # required. Also make any other changes, such as adding semicolons.
3900 # All token changes must be made here so that the token data structure
3901 # remains fixed for the rest of this iteration.
3902 $self->respace_tokens();
3904 # Implement any welding needed for the -wn or -cb options
3905 $self->weld_containers();
3907 # Finishes formatting and write the result to the line sink.
3908 # Eventually this call should just change the 'rlines' data according to the
3909 # new line breaks and then return so that we can do an internal iteration
3910 # before continuing with the next stages of formatting.
3911 $self->break_lines();
3913 ############################################################
3914 # A possible future decomposition of 'break_lines()' follows.
3916 # - allow perltidy to do an internal iteration which eliminates
3917 # many unnecessary steps, such as re-parsing and vertical alignment.
3918 # This will allow iterations to be automatic.
3919 # - consolidate all length calculations to allow utf8 alignment
3920 ############################################################
3922 # Future: Check for convergence of beginning tokens on CODE lines
3924 # Future: End of Iteration Loop
3926 # Future: add_padding($rargs);
3928 # Future: add_closing_side_comments($rargs);
3930 # Future: vertical_alignment($rargs);
3932 # Future: output results
3934 # A final routine to tie up any loose ends
3939 sub create_one_line_block {
3940 ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
3945 sub destroy_one_line_block {
3946 $index_start_one_line_block = UNDEFINED_INDEX;
3947 $semicolons_before_block_self_destruct = 0;
3951 sub leading_spaces_to_go {
3953 # return the number of indentation spaces for a token in the output stream;
3954 # these were previously stored by 'set_leading_whitespace'.
3957 if ( $ii < 0 ) { $ii = 0 }
3958 return get_spaces( $leading_spaces_to_go[$ii] );
3964 # return the number of leading spaces associated with an indentation
3965 # variable $indentation is either a constant number of spaces or an object
3966 # with a get_spaces method.
3967 my $indentation = shift;
3968 return ref($indentation) ? $indentation->get_spaces() : $indentation;
3971 sub get_recoverable_spaces {
3973 # return the number of spaces (+ means shift right, - means shift left)
3974 # that we would like to shift a group of lines with the same indentation
3975 # to get them to line up with their opening parens
3976 my $indentation = shift;
3977 return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
3980 sub get_available_spaces_to_go {
3983 my $item = $leading_spaces_to_go[$ii];
3985 # return the number of available leading spaces associated with an
3986 # indentation variable. $indentation is either a constant number of
3987 # spaces or an object with a get_available_spaces method.
3988 return ref($item) ? $item->get_available_spaces() : 0;
3991 sub new_lp_indentation_item {
3993 # this is an interface to the IndentationItem class
3994 my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
3996 # A negative level implies not to store the item in the item_list
3998 if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
4000 my $item = Perl::Tidy::IndentationItem->new(
4002 $ci_level, $available_spaces,
4003 $index, $gnu_sequence_number,
4004 $align_paren, $max_gnu_stack_index,
4005 $line_start_index_to_go,
4008 if ( $level >= 0 ) {
4009 $gnu_item_list[$max_gnu_item_index] = $item;
4015 sub set_leading_whitespace {
4017 # This routine defines leading whitespace
4018 # given: the level and continuation_level of a token,
4019 # define: space count of leading string which would apply if it
4020 # were the first token of a new line.
4022 my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
4024 # Adjust levels if necessary to recycle whitespace:
4025 # given $level_abs, the absolute level
4026 # define $level, a possibly reduced level for whitespace
4027 my $level = $level_abs;
4028 if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
4029 if ( $level_abs < $whitespace_last_level ) {
4030 pop(@whitespace_level_stack);
4032 if ( !@whitespace_level_stack ) {
4033 push @whitespace_level_stack, $level_abs;
4035 elsif ( $level_abs > $whitespace_last_level ) {
4036 $level = $whitespace_level_stack[-1] +
4037 ( $level_abs - $whitespace_last_level );
4040 # 1 Try to break at a block brace
4042 $level > $rOpts_whitespace_cycle
4043 && $last_nonblank_type eq '{'
4044 && $last_nonblank_token eq '{'
4047 # 2 Then either a brace or bracket
4048 || ( $level > $rOpts_whitespace_cycle + 1
4049 && $last_nonblank_token =~ /^[\{\[]$/ )
4051 # 3 Then a paren too
4052 || $level > $rOpts_whitespace_cycle + 2
4057 push @whitespace_level_stack, $level;
4059 $level = $whitespace_level_stack[-1];
4061 $whitespace_last_level = $level_abs;
4063 # modify for -bli, which adds one continuation indentation for
4065 if ( $rOpts_brace_left_and_indent
4066 && $max_index_to_go == 0
4067 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
4072 # patch to avoid trouble when input file has negative indentation.
4073 # other logic should catch this error.
4074 if ( $level < 0 ) { $level = 0 }
4076 #-------------------------------------------
4077 # handle the standard indentation scheme
4078 #-------------------------------------------
4079 unless ($rOpts_line_up_parentheses) {
4081 $ci_level * $rOpts_continuation_indentation +
4082 $level * $rOpts_indent_columns;
4084 ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
4086 if ($in_continued_quote) {
4090 $leading_spaces_to_go[$max_index_to_go] = $space_count;
4091 $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
4095 #-------------------------------------------------------------
4096 # handle case of -lp indentation..
4097 #-------------------------------------------------------------
4099 # The continued_quote flag means that this is the first token of a
4100 # line, and it is the continuation of some kind of multi-line quote
4101 # or pattern. It requires special treatment because it must have no
4102 # added leading whitespace. So we create a special indentation item
4103 # which is not in the stack.
4104 if ($in_continued_quote) {
4105 my $space_count = 0;
4106 my $available_space = 0;
4107 $level = -1; # flag to prevent storing in item_list
4108 $leading_spaces_to_go[$max_index_to_go] =
4109 $reduced_spaces_to_go[$max_index_to_go] =
4110 new_lp_indentation_item( $space_count, $level, $ci_level,
4111 $available_space, 0 );
4115 # get the top state from the stack
4116 my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
4117 my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
4118 my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
4120 my $type = $types_to_go[$max_index_to_go];
4121 my $token = $tokens_to_go[$max_index_to_go];
4122 my $total_depth = $nesting_depth_to_go[$max_index_to_go];
4124 if ( $type eq '{' || $type eq '(' ) {
4126 $gnu_comma_count{ $total_depth + 1 } = 0;
4127 $gnu_arrow_count{ $total_depth + 1 } = 0;
4129 # If we come to an opening token after an '=' token of some type,
4130 # see if it would be helpful to 'break' after the '=' to save space
4131 my $last_equals = $last_gnu_equals{$total_depth};
4132 if ( $last_equals && $last_equals > $line_start_index_to_go ) {
4134 # find the position if we break at the '='
4135 my $i_test = $last_equals;
4136 if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
4139 ##my $too_close = ($i_test==$max_index_to_go-1);
4141 my $test_position = total_line_length( $i_test, $max_index_to_go );
4142 my $mll = maximum_line_length($i_test);
4146 # the equals is not just before an open paren (testing)
4149 # if we are beyond the midpoint
4150 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
4152 # or we are beyond the 1/4 point and there was an old
4153 # break at the equals
4155 $gnu_position_predictor >
4156 $mll - $rOpts_maximum_line_length * 3 / 4
4158 $old_breakpoint_to_go[$last_equals]
4159 || ( $last_equals > 0
4160 && $old_breakpoint_to_go[ $last_equals - 1 ] )
4161 || ( $last_equals > 1
4162 && $types_to_go[ $last_equals - 1 ] eq 'b'
4163 && $old_breakpoint_to_go[ $last_equals - 2 ] )
4169 # then make the switch -- note that we do not set a real
4170 # breakpoint here because we may not really need one; sub
4171 # scan_list will do that if necessary
4172 $line_start_index_to_go = $i_test + 1;
4173 $gnu_position_predictor = $test_position;
4179 maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
4181 # Check for decreasing depth ..
4182 # Note that one token may have both decreasing and then increasing
4183 # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
4184 # in this example we would first go back to (1,0) then up to (2,0)
4186 if ( $level < $current_level || $ci_level < $current_ci_level ) {
4188 # loop to find the first entry at or completely below this level
4189 my ( $lev, $ci_lev );
4191 if ($max_gnu_stack_index) {
4193 # save index of token which closes this level
4194 $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
4196 # Undo any extra indentation if we saw no commas
4197 my $available_spaces =
4198 $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
4200 my $comma_count = 0;
4201 my $arrow_count = 0;
4202 if ( $type eq '}' || $type eq ')' ) {
4203 $comma_count = $gnu_comma_count{$total_depth};
4204 $arrow_count = $gnu_arrow_count{$total_depth};
4205 $comma_count = 0 unless $comma_count;
4206 $arrow_count = 0 unless $arrow_count;
4208 $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
4209 $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
4211 if ( $available_spaces > 0 ) {
4213 if ( $comma_count <= 0 || $arrow_count > 0 ) {
4215 my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
4217 $gnu_stack[$max_gnu_stack_index]
4218 ->get_sequence_number();
4220 # Be sure this item was created in this batch. This
4221 # should be true because we delete any available
4222 # space from open items at the end of each batch.
4223 if ( $gnu_sequence_number != $seqno
4224 || $i > $max_gnu_item_index )
4227 "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
4229 report_definite_bug();
4233 if ( $arrow_count == 0 ) {
4235 ->permanently_decrease_available_spaces(
4240 ->tentatively_decrease_available_spaces(
4243 foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
4245 ->decrease_SPACES($available_spaces);
4252 --$max_gnu_stack_index;
4253 $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
4254 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
4256 # stop when we reach a level at or below the current level
4257 if ( $lev <= $level && $ci_lev <= $ci_level ) {
4259 $gnu_stack[$max_gnu_stack_index]->get_spaces();
4260 $current_level = $lev;
4261 $current_ci_level = $ci_lev;
4266 # reached bottom of stack .. should never happen because
4267 # only negative levels can get here, and $level was forced
4268 # to be positive above.
4271 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
4273 report_definite_bug();
4279 # handle increasing depth
4280 if ( $level > $current_level || $ci_level > $current_ci_level ) {
4282 # Compute the standard incremental whitespace. This will be
4283 # the minimum incremental whitespace that will be used. This
4284 # choice results in a smooth transition between the gnu-style
4285 # and the standard style.
4286 my $standard_increment =
4287 ( $level - $current_level ) * $rOpts_indent_columns +
4288 ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
4290 # Now we have to define how much extra incremental space
4291 # ("$available_space") we want. This extra space will be
4292 # reduced as necessary when long lines are encountered or when
4293 # it becomes clear that we do not have a good list.
4294 my $available_space = 0;
4295 my $align_paren = 0;
4298 # initialization on empty stack..
4299 if ( $max_gnu_stack_index == 0 ) {
4300 $space_count = $level * $rOpts_indent_columns;
4303 # if this is a BLOCK, add the standard increment
4304 elsif ($last_nonblank_block_type) {
4305 $space_count += $standard_increment;
4308 # if last nonblank token was not structural indentation,
4309 # just use standard increment
4310 elsif ( $last_nonblank_type ne '{' ) {
4311 $space_count += $standard_increment;
4314 # otherwise use the space to the first non-blank level change token
4317 $space_count = $gnu_position_predictor;
4319 my $min_gnu_indentation =
4320 $gnu_stack[$max_gnu_stack_index]->get_spaces();
4322 $available_space = $space_count - $min_gnu_indentation;
4323 if ( $available_space >= $standard_increment ) {
4324 $min_gnu_indentation += $standard_increment;
4326 elsif ( $available_space > 1 ) {
4327 $min_gnu_indentation += $available_space + 1;
4329 elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
4330 if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
4331 $min_gnu_indentation += 2;
4334 $min_gnu_indentation += 1;
4338 $min_gnu_indentation += $standard_increment;
4340 $available_space = $space_count - $min_gnu_indentation;
4342 if ( $available_space < 0 ) {
4343 $space_count = $min_gnu_indentation;
4344 $available_space = 0;
4349 # update state, but not on a blank token
4350 if ( $types_to_go[$max_index_to_go] ne 'b' ) {
4352 $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
4354 ++$max_gnu_stack_index;
4355 $gnu_stack[$max_gnu_stack_index] =
4356 new_lp_indentation_item( $space_count, $level, $ci_level,
4357 $available_space, $align_paren );
4359 # If the opening paren is beyond the half-line length, then
4360 # we will use the minimum (standard) indentation. This will
4361 # help avoid problems associated with running out of space
4362 # near the end of a line. As a result, in deeply nested
4363 # lists, there will be some indentations which are limited
4364 # to this minimum standard indentation. But the most deeply
4365 # nested container will still probably be able to shift its
4366 # parameters to the right for proper alignment, so in most
4367 # cases this will not be noticeable.
4368 if ( $available_space > 0 && $space_count > $halfway ) {
4369 $gnu_stack[$max_gnu_stack_index]
4370 ->tentatively_decrease_available_spaces($available_space);
4375 # Count commas and look for non-list characters. Once we see a
4376 # non-list character, we give up and don't look for any more commas.
4377 if ( $type eq '=>' ) {
4378 $gnu_arrow_count{$total_depth}++;
4380 # tentatively treating '=>' like '=' for estimating breaks
4381 # TODO: this could use some experimentation
4382 $last_gnu_equals{$total_depth} = $max_index_to_go;
4385 elsif ( $type eq ',' ) {
4386 $gnu_comma_count{$total_depth}++;
4389 elsif ( $is_assignment{$type} ) {
4390 $last_gnu_equals{$total_depth} = $max_index_to_go;
4393 # this token might start a new line
4394 # if this is a non-blank..
4395 if ( $type ne 'b' ) {
4400 # this is the first nonblank token of the line
4401 $max_index_to_go == 1 && $types_to_go[0] eq 'b'
4403 # or previous character was one of these:
4404 || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
4406 # or previous character was opening and this does not close it
4407 || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
4408 || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
4410 # or this token is one of these:
4411 || $type =~ /^([\.]|\|\||\&\&)$/
4413 # or this is a closing structure
4414 || ( $last_nonblank_type_to_go eq '}'
4415 && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
4417 # or previous token was keyword 'return'
4418 || ( $last_nonblank_type_to_go eq 'k'
4419 && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
4421 # or starting a new line at certain keywords is fine
4423 && $is_if_unless_and_or_last_next_redo_return{$token} )
4425 # or this is after an assignment after a closing structure
4427 $is_assignment{$last_nonblank_type_to_go}
4429 $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
4431 # and it is significantly to the right
4432 || $gnu_position_predictor > $halfway
4437 check_for_long_gnu_style_lines();
4438 $line_start_index_to_go = $max_index_to_go;
4440 # back up 1 token if we want to break before that type
4441 # otherwise, we may strand tokens like '?' or ':' on a line
4442 if ( $line_start_index_to_go > 0 ) {
4443 if ( $last_nonblank_type_to_go eq 'k' ) {
4445 if ( $want_break_before{$last_nonblank_token_to_go} ) {
4446 $line_start_index_to_go--;
4449 elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
4450 $line_start_index_to_go--;
4456 # remember the predicted position of this token on the output line
4457 if ( $max_index_to_go > $line_start_index_to_go ) {
4458 $gnu_position_predictor =
4459 total_line_length( $line_start_index_to_go, $max_index_to_go );
4462 $gnu_position_predictor =
4463 $space_count + $token_lengths_to_go[$max_index_to_go];
4466 # store the indentation object for this token
4467 # this allows us to manipulate the leading whitespace
4468 # (in case we have to reduce indentation to fit a line) without
4469 # having to change any token values
4470 $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
4471 $reduced_spaces_to_go[$max_index_to_go] =
4472 ( $max_gnu_stack_index > 0 && $ci_level )
4473 ? $gnu_stack[ $max_gnu_stack_index - 1 ]
4474 : $gnu_stack[$max_gnu_stack_index];
4478 sub check_for_long_gnu_style_lines {
4480 # look at the current estimated maximum line length, and
4481 # remove some whitespace if it exceeds the desired maximum
4483 # this is only for the '-lp' style
4484 return unless ($rOpts_line_up_parentheses);
4486 # nothing can be done if no stack items defined for this line
4487 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
4489 # see if we have exceeded the maximum desired line length
4490 # keep 2 extra free because they are needed in some cases
4491 # (result of trial-and-error testing)
4493 $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
4495 return if ( $spaces_needed <= 0 );
4497 # We are over the limit, so try to remove a requested number of
4498 # spaces from leading whitespace. We are only allowed to remove
4499 # from whitespace items created on this batch, since others have
4500 # already been used and cannot be undone.
4501 my @candidates = ();
4504 # loop over all whitespace items created for the current batch
4505 for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
4506 my $item = $gnu_item_list[$i];
4508 # item must still be open to be a candidate (otherwise it
4509 # cannot influence the current token)
4510 next if ( $item->get_closed() >= 0 );
4512 my $available_spaces = $item->get_available_spaces();
4514 if ( $available_spaces > 0 ) {
4515 push( @candidates, [ $i, $available_spaces ] );
4519 return unless (@candidates);
4521 # sort by available whitespace so that we can remove whitespace
4522 # from the maximum available first
4523 @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
4525 # keep removing whitespace until we are done or have no more
4526 foreach my $candidate (@candidates) {
4527 my ( $i, $available_spaces ) = @{$candidate};
4528 my $deleted_spaces =
4529 ( $available_spaces > $spaces_needed )
4531 : $available_spaces;
4533 # remove the incremental space from this item
4534 $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
4538 # update the leading whitespace of this item and all items
4539 # that came after it
4540 for ( ; $i <= $max_gnu_item_index ; $i++ ) {
4542 my $old_spaces = $gnu_item_list[$i]->get_spaces();
4543 if ( $old_spaces >= $deleted_spaces ) {
4544 $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
4547 # shouldn't happen except for code bug:
4549 my $level = $gnu_item_list[$i_debug]->get_level();
4550 my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
4551 my $old_level = $gnu_item_list[$i]->get_level();
4552 my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
4554 "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
4556 report_definite_bug();
4559 $gnu_position_predictor -= $deleted_spaces;
4560 $spaces_needed -= $deleted_spaces;
4561 last unless ( $spaces_needed > 0 );
4566 sub finish_lp_batch {
4568 # This routine is called once after each output stream batch is
4569 # finished to undo indentation for all incomplete -lp
4570 # indentation levels. It is too risky to leave a level open,
4571 # because then we can't backtrack in case of a long line to follow.
4572 # This means that comments and blank lines will disrupt this
4573 # indentation style. But the vertical aligner may be able to
4574 # get the space back if there are side comments.
4576 # this is only for the 'lp' style
4577 return unless ($rOpts_line_up_parentheses);
4579 # nothing can be done if no stack items defined for this line
4580 return if ( $max_gnu_item_index == UNDEFINED_INDEX );
4582 # loop over all whitespace items created for the current batch
4583 foreach my $i ( 0 .. $max_gnu_item_index ) {
4584 my $item = $gnu_item_list[$i];
4586 # only look for open items
4587 next if ( $item->get_closed() >= 0 );
4589 # Tentatively remove all of the available space
4590 # (The vertical aligner will try to get it back later)
4591 my $available_spaces = $item->get_available_spaces();
4592 if ( $available_spaces > 0 ) {
4594 # delete incremental space for this item
4596 ->tentatively_decrease_available_spaces($available_spaces);
4598 # Reduce the total indentation space of any nodes that follow
4599 # Note that any such nodes must necessarily be dependents
4601 foreach ( $i + 1 .. $max_gnu_item_index ) {
4602 $gnu_item_list[$_]->decrease_SPACES($available_spaces);
4609 sub reduce_lp_indentation {
4611 # reduce the leading whitespace at token $i if possible by $spaces_needed
4612 # (a large value of $spaces_needed will remove all excess space)
4613 # NOTE: to be called from scan_list only for a sequence of tokens
4614 # contained between opening and closing parens/braces/brackets
4616 my ( $i, $spaces_wanted ) = @_;
4617 my $deleted_spaces = 0;
4619 my $item = $leading_spaces_to_go[$i];
4620 my $available_spaces = $item->get_available_spaces();
4623 $available_spaces > 0
4624 && ( ( $spaces_wanted <= $available_spaces )
4625 || !$item->get_have_child() )
4629 # we'll remove these spaces, but mark them as recoverable
4631 $item->tentatively_decrease_available_spaces($spaces_wanted);
4634 return $deleted_spaces;
4637 sub token_sequence_length {
4639 # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
4640 # returns 0 if $ibeg > $iend (shouldn't happen)
4641 my ( $ibeg, $iend ) = @_;
4642 return 0 if ( $iend < 0 || $ibeg > $iend );
4643 return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
4644 return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
4647 sub total_line_length {
4649 # return length of a line of tokens ($ibeg .. $iend)
4650 my ( $ibeg, $iend ) = @_;
4651 return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
4654 sub maximum_line_length_for_level {
4656 # return maximum line length for line starting with a given level
4657 my $maximum_line_length = $rOpts_maximum_line_length;
4659 # Modify if -vmll option is selected
4660 if ($rOpts_variable_maximum_line_length) {
4662 if ( $level < 0 ) { $level = 0 }
4663 $maximum_line_length += $level * $rOpts_indent_columns;
4665 return $maximum_line_length;
4668 sub maximum_line_length {
4670 # return maximum line length for line starting with the token at given index
4672 return maximum_line_length_for_level( $levels_to_go[$ii] );
4675 sub excess_line_length {
4677 # return number of characters by which a line of tokens ($ibeg..$iend)
4678 # exceeds the allowable line length.
4679 my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
4681 # Include left and right weld lengths unless requested not to
4682 my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
4683 my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
4685 return total_line_length( $ibeg, $iend ) + $wl + $wr -
4686 maximum_line_length($ibeg);
4691 # flush buffer and write any informative messages
4695 $file_writer_object->decrement_output_line_number()
4696 ; # fix up line number since it was incremented
4697 we_are_at_the_last_line();
4698 if ( $added_semicolon_count > 0 ) {
4699 my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
4701 ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
4702 write_logfile_entry("$added_semicolon_count $what added:\n");
4703 write_logfile_entry(
4704 " $first at input line $first_added_semicolon_at\n");
4706 if ( $added_semicolon_count > 1 ) {
4707 write_logfile_entry(
4708 " Last at input line $last_added_semicolon_at\n");
4710 write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
4711 write_logfile_entry("\n");
4714 if ( $deleted_semicolon_count > 0 ) {
4715 my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
4717 ( $deleted_semicolon_count > 1 )
4720 write_logfile_entry(
4721 "$deleted_semicolon_count unnecessary $what deleted:\n");
4722 write_logfile_entry(
4723 " $first at input line $first_deleted_semicolon_at\n");
4725 if ( $deleted_semicolon_count > 1 ) {
4726 write_logfile_entry(
4727 " Last at input line $last_deleted_semicolon_at\n");
4729 write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
4730 write_logfile_entry("\n");
4733 if ( $embedded_tab_count > 0 ) {
4734 my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
4736 ( $embedded_tab_count > 1 )
4737 ? "quotes or patterns"
4738 : "quote or pattern";
4739 write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
4740 write_logfile_entry(
4741 "This means the display of this script could vary with device or software\n"
4743 write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
4745 if ( $embedded_tab_count > 1 ) {
4746 write_logfile_entry(
4747 " Last at input line $last_embedded_tab_at\n");
4749 write_logfile_entry("\n");
4752 if ($first_tabbing_disagreement) {
4753 write_logfile_entry(
4754 "First indentation disagreement seen at input line $first_tabbing_disagreement\n"
4758 if ($in_tabbing_disagreement) {
4759 write_logfile_entry(
4760 "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
4765 if ($last_tabbing_disagreement) {
4767 write_logfile_entry(
4768 "Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
4772 write_logfile_entry("No indentation disagreement seen\n");
4775 if ($first_tabbing_disagreement) {
4776 write_logfile_entry(
4777 "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
4780 write_logfile_entry("\n");
4782 $vertical_aligner_object->report_anything_unusual();
4784 $file_writer_object->report_line_length_errors();
4791 # This routine is called to check the Opts hash after it is defined
4794 initialize_whitespace_hashes();
4795 initialize_bond_strength_hashes();
4797 make_static_block_comment_pattern();
4798 make_static_side_comment_pattern();
4799 make_closing_side_comment_prefix();
4800 make_closing_side_comment_list_pattern();
4801 $format_skipping_pattern_begin =
4802 make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
4803 $format_skipping_pattern_end =
4804 make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
4806 # If closing side comments ARE selected, then we can safely
4807 # delete old closing side comments unless closing side comment
4808 # warnings are requested. This is a good idea because it will
4809 # eliminate any old csc's which fall below the line count threshold.
4810 # We cannot do this if warnings are turned on, though, because we
4811 # might delete some text which has been added. So that must
4812 # be handled when comments are created.
4813 if ( $rOpts->{'closing-side-comments'} ) {
4814 if ( !$rOpts->{'closing-side-comment-warnings'} ) {
4815 $rOpts->{'delete-closing-side-comments'} = 1;
4819 # If closing side comments ARE NOT selected, but warnings ARE
4820 # selected and we ARE DELETING csc's, then we will pretend to be
4821 # adding with a huge interval. This will force the comments to be
4822 # generated for comparison with the old comments, but not added.
4823 elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
4824 if ( $rOpts->{'delete-closing-side-comments'} ) {
4825 $rOpts->{'delete-closing-side-comments'} = 0;
4826 $rOpts->{'closing-side-comments'} = 1;
4827 $rOpts->{'closing-side-comment-interval'} = 100000000;
4832 make_block_brace_vertical_tightness_pattern();
4833 make_blank_line_pattern();
4835 prepare_cuddled_block_types();
4836 if ( $rOpts->{'dump-cuddled-block-list'} ) {
4837 dump_cuddled_block_list(*STDOUT);
4841 if ( $rOpts->{'line-up-parentheses'} ) {
4843 if ( $rOpts->{'indent-only'}
4844 || !$rOpts->{'add-newlines'}
4845 || !$rOpts->{'delete-old-newlines'} )
4848 -----------------------------------------------------------------------
4849 Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
4851 The -lp indentation logic requires that perltidy be able to coordinate
4852 arbitrarily large numbers of line breakpoints. This isn't possible
4853 with these flags. Sometimes an acceptable workaround is to use -wocb=3
4854 -----------------------------------------------------------------------
4856 $rOpts->{'line-up-parentheses'} = 0;
4860 # At present, tabs are not compatible with the line-up-parentheses style
4861 # (it would be possible to entab the total leading whitespace
4862 # just prior to writing the line, if desired).
4863 if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
4865 Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
4867 $rOpts->{'tabs'} = 0;
4870 # Likewise, tabs are not compatible with outdenting..
4871 if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
4873 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
4875 $rOpts->{'tabs'} = 0;
4878 if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
4880 Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
4882 $rOpts->{'tabs'} = 0;
4885 if ( !$rOpts->{'space-for-semicolon'} ) {
4886 $want_left_space{'f'} = -1;
4889 if ( $rOpts->{'space-terminal-semicolon'} ) {
4890 $want_left_space{';'} = 1;
4893 # implement outdenting preferences for keywords
4894 %outdent_keyword = ();
4895 my @okw = split_words( $rOpts->{'outdent-keyword-okl'} );
4897 @okw = qw(next last redo goto return); # defaults
4900 # FUTURE: if not a keyword, assume that it is an identifier
4902 if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
4903 $outdent_keyword{$_} = 1;
4906 Warn("ignoring '$_' in -okwl list; not a perl keyword");
4910 # implement user whitespace preferences
4911 if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
4912 @want_left_space{@q} = (1) x scalar(@q);
4915 if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
4916 @want_right_space{@q} = (1) x scalar(@q);
4919 if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
4920 @want_left_space{@q} = (-1) x scalar(@q);
4923 if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
4924 @want_right_space{@q} = (-1) x scalar(@q);
4926 if ( $rOpts->{'dump-want-left-space'} ) {
4927 dump_want_left_space(*STDOUT);
4931 if ( $rOpts->{'dump-want-right-space'} ) {
4932 dump_want_right_space(*STDOUT);
4936 # default keywords for which space is introduced before an opening paren
4937 # (at present, including them messes up vertical alignment)
4938 my @sak = qw(my local our and or err eq ne if else elsif until
4939 unless while for foreach return switch case given when catch);
4940 @space_after_keyword{@sak} = (1) x scalar(@sak);
4942 # first remove any or all of these if desired
4943 if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
4945 # -nsak='*' selects all the above keywords
4946 if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
4947 @space_after_keyword{@q} = (0) x scalar(@q);
4950 # then allow user to add to these defaults
4951 if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
4952 @space_after_keyword{@q} = (1) x scalar(@q);
4955 # implement user break preferences
4956 my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
4957 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
4958 . : ? && || and or err xor
4961 my $break_after = sub {
4963 foreach my $tok (@toks) {
4964 if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
4965 my $lbs = $left_bond_strength{$tok};
4966 my $rbs = $right_bond_strength{$tok};
4967 if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
4968 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
4974 my $break_before = sub {
4976 foreach my $tok (@toks) {
4977 my $lbs = $left_bond_strength{$tok};
4978 my $rbs = $right_bond_strength{$tok};
4979 if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
4980 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
4986 $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
4987 $break_before->(@all_operators)
4988 if ( $rOpts->{'break-before-all-operators'} );
4990 $break_after->( split_words( $rOpts->{'want-break-after'} ) );
4991 $break_before->( split_words( $rOpts->{'want-break-before'} ) );
4993 # make note if breaks are before certain key types
4994 %want_break_before = ();
4995 foreach my $tok ( @all_operators, ',' ) {
4996 $want_break_before{$tok} =
4997 $left_bond_strength{$tok} < $right_bond_strength{$tok};
5000 # Coordinate ?/: breaks, which must be similar
5001 if ( !$want_break_before{':'} ) {
5002 $want_break_before{'?'} = $want_break_before{':'};
5003 $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
5004 $left_bond_strength{'?'} = NO_BREAK;
5007 # Define here tokens which may follow the closing brace of a do statement
5008 # on the same line, as in:
5009 # } while ( $something);
5010 my @dof = qw(until while unless if ; : );
5012 @is_do_follower{@dof} = (1) x scalar(@dof);
5014 # What tokens may follow the closing brace of an if or elsif block?
5015 # Not used. Previously used for cuddled else, but no longer needed.
5016 %is_if_brace_follower = ();
5018 # nothing can follow the closing curly of an else { } block:
5019 %is_else_brace_follower = ();
5021 # what can follow a multi-line anonymous sub definition closing curly:
5022 my @asf = qw# ; : => or and && || ~~ !~~ ) #;
5024 @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
5026 # what can follow a one-line anonymous sub closing curly:
5027 # one-line anonymous subs also have ']' here...
5028 # see tk3.t and PP.pm
5029 my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
5031 @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
5033 # What can follow a closing curly of a block
5034 # which is not an if/elsif/else/do/sort/map/grep/eval/sub
5035 # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
5036 my @obf = qw# ; : => or and && || ) #;
5038 @is_other_brace_follower{@obf} = (1) x scalar(@obf);
5040 $right_bond_strength{'{'} = WEAK;
5041 $left_bond_strength{'{'} = VERY_STRONG;
5043 # make -l=0 equal to -l=infinite
5044 if ( !$rOpts->{'maximum-line-length'} ) {
5045 $rOpts->{'maximum-line-length'} = 1000000;
5048 # make -lbl=0 equal to -lbl=infinite
5049 if ( !$rOpts->{'long-block-line-count'} ) {
5050 $rOpts->{'long-block-line-count'} = 1000000;
5053 my $enc = $rOpts->{'character-encoding'};
5054 if ( $enc && $enc !~ /^(none|utf8)$/i ) {
5056 Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
5060 my $ole = $rOpts->{'output-line-ending'};
5069 # Patch for RT #99514, a memoization issue.
5070 # Normally, the user enters one of 'dos', 'win', etc, and we change the
5071 # value in the options parameter to be the corresponding line ending
5072 # character. But, if we are using memoization, on later passes through
5073 # here the option parameter will already have the desired ending
5074 # character rather than the keyword 'dos', 'win', etc. So
5075 # we must check to see if conversion has already been done and, if so,
5076 # bypass the conversion step.
5077 my %endings_inverted = (
5078 "\015\012" => 'dos',
5079 "\015\012" => 'win',
5084 if ( defined( $endings_inverted{$ole} ) ) {
5086 # we already have valid line ending, nothing more to do
5090 unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
5091 my $str = join " ", keys %endings;
5093 Unrecognized line ending '$ole'; expecting one of: $str
5096 if ( $rOpts->{'preserve-line-endings'} ) {
5097 Warn("Ignoring -ple; conflicts with -ole\n");
5098 $rOpts->{'preserve-line-endings'} = undef;
5103 # hashes used to simplify setting whitespace
5105 '{' => $rOpts->{'brace-tightness'},
5106 '}' => $rOpts->{'brace-tightness'},
5107 '(' => $rOpts->{'paren-tightness'},
5108 ')' => $rOpts->{'paren-tightness'},
5109 '[' => $rOpts->{'square-bracket-tightness'},
5110 ']' => $rOpts->{'square-bracket-tightness'},
5119 # frequently used parameters
5120 $rOpts_add_newlines = $rOpts->{'add-newlines'};
5121 $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
5122 $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
5123 $rOpts_block_brace_vertical_tightness =
5124 $rOpts->{'block-brace-vertical-tightness'};
5125 $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
5126 $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
5127 $rOpts_break_at_old_ternary_breakpoints =
5128 $rOpts->{'break-at-old-ternary-breakpoints'};
5129 $rOpts_break_at_old_attribute_breakpoints =
5130 $rOpts->{'break-at-old-attribute-breakpoints'};
5131 $rOpts_break_at_old_comma_breakpoints =
5132 $rOpts->{'break-at-old-comma-breakpoints'};
5133 $rOpts_break_at_old_keyword_breakpoints =
5134 $rOpts->{'break-at-old-keyword-breakpoints'};
5135 $rOpts_break_at_old_logical_breakpoints =
5136 $rOpts->{'break-at-old-logical-breakpoints'};
5137 $rOpts_closing_side_comment_else_flag =
5138 $rOpts->{'closing-side-comment-else-flag'};
5139 $rOpts_closing_side_comment_maximum_text =
5140 $rOpts->{'closing-side-comment-maximum-text'};
5141 $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
5142 $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
5143 $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
5144 $rOpts_indent_columns = $rOpts->{'indent-columns'};
5145 $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
5146 $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
5147 $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
5148 $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
5150 $rOpts_variable_maximum_line_length =
5151 $rOpts->{'variable-maximum-line-length'};
5152 $rOpts_short_concatenation_item_length =
5153 $rOpts->{'short-concatenation-item-length'};
5155 $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
5156 $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
5157 $rOpts_format_skipping = $rOpts->{'format-skipping'};
5158 $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
5159 $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
5160 $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
5161 $rOpts_ignore_side_comment_lengths =
5162 $rOpts->{'ignore-side-comment-lengths'};
5164 # Note that both opening and closing tokens can access the opening
5165 # and closing flags of their container types.
5166 %opening_vertical_tightness = (
5167 '(' => $rOpts->{'paren-vertical-tightness'},
5168 '{' => $rOpts->{'brace-vertical-tightness'},
5169 '[' => $rOpts->{'square-bracket-vertical-tightness'},
5170 ')' => $rOpts->{'paren-vertical-tightness'},
5171 '}' => $rOpts->{'brace-vertical-tightness'},
5172 ']' => $rOpts->{'square-bracket-vertical-tightness'},
5175 %closing_vertical_tightness = (
5176 '(' => $rOpts->{'paren-vertical-tightness-closing'},
5177 '{' => $rOpts->{'brace-vertical-tightness-closing'},
5178 '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
5179 ')' => $rOpts->{'paren-vertical-tightness-closing'},
5180 '}' => $rOpts->{'brace-vertical-tightness-closing'},
5181 ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
5184 # assume flag for '>' same as ')' for closing qw quotes
5185 %closing_token_indentation = (
5186 ')' => $rOpts->{'closing-paren-indentation'},
5187 '}' => $rOpts->{'closing-brace-indentation'},
5188 ']' => $rOpts->{'closing-square-bracket-indentation'},
5189 '>' => $rOpts->{'closing-paren-indentation'},
5192 # flag indicating if any closing tokens are indented
5193 $some_closing_token_indentation =
5194 $rOpts->{'closing-paren-indentation'}
5195 || $rOpts->{'closing-brace-indentation'}
5196 || $rOpts->{'closing-square-bracket-indentation'}
5197 || $rOpts->{'indent-closing-brace'};
5199 %opening_token_right = (
5200 '(' => $rOpts->{'opening-paren-right'},
5201 '{' => $rOpts->{'opening-hash-brace-right'},
5202 '[' => $rOpts->{'opening-square-bracket-right'},
5205 %stack_opening_token = (
5206 '(' => $rOpts->{'stack-opening-paren'},
5207 '{' => $rOpts->{'stack-opening-hash-brace'},
5208 '[' => $rOpts->{'stack-opening-square-bracket'},
5211 %stack_closing_token = (
5212 ')' => $rOpts->{'stack-closing-paren'},
5213 '}' => $rOpts->{'stack-closing-hash-brace'},
5214 ']' => $rOpts->{'stack-closing-square-bracket'},
5216 $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
5217 $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
5223 # See if a pattern will compile. We have to use a string eval here,
5224 # but it should be safe because the pattern has been constructed
5227 eval "'##'=~/$pattern/";
5234 # Add keywords here which really should not be cuddled
5236 my @q = qw(if unless for foreach while);
5237 @no_cuddle{@q} = (1) x scalar(@q);
5240 sub prepare_cuddled_block_types {
5242 # the cuddled-else style, if used, is controlled by a hash that
5245 # Include keywords here which should not be cuddled
5247 my $cuddled_string = "";
5248 if ( $rOpts->{'cuddled-else'} ) {
5251 $cuddled_string = 'elsif else continue catch finally'
5252 unless ( $rOpts->{'cuddled-block-list-exclusive'} );
5254 # This is the old equivalent but more complex version
5255 # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
5257 # Add users other blocks to be cuddled
5258 my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
5259 if ($cuddled_block_list) {
5260 $cuddled_string .= " " . $cuddled_block_list;
5265 # If we have a cuddled string of the form
5266 # 'try-catch-finally'
5268 # we want to prepare a hash of the form
5270 # $rcuddled_block_types = {
5277 # use -dcbl to dump this hash
5279 # Multiple such strings are input as a space or comma separated list
5281 # If we get two lists with the same leading type, such as
5282 # -cbl = "-try-catch-finally -try-catch-otherwise"
5283 # then they will get merged as follows:
5284 # $rcuddled_block_types = {
5291 # This will allow either type of chain to be followed.
5293 $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
5294 my @cuddled_strings = split /\s+/, $cuddled_string;
5296 $rcuddled_block_types = {};
5298 # process each dash-separated string...
5299 my $string_count = 0;
5300 foreach my $string (@cuddled_strings) {
5301 next unless $string;
5302 my @words = split /-+/, $string; # allow multiple dashes
5304 # we could look for and report possible errors here...
5305 next unless ( @words > 0 );
5307 # allow either '-continue' or *-continue' for arbitrary starting type
5310 # a single word without dashes is a secondary block type
5312 $start = shift @words;
5315 # always make an entry for the leading word. If none follow, this
5316 # will still prevent a wildcard from matching this word.
5317 if ( !defined( $rcuddled_block_types->{$start} ) ) {
5318 $rcuddled_block_types->{$start} = {};
5321 # The count gives the original word order in case we ever want it.
5324 foreach my $word (@words) {
5326 if ( $no_cuddle{$word} ) {
5328 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
5333 $rcuddled_block_types->{$start}->{$word} =
5334 1; #"$string_count.$word_count";
5341 sub dump_cuddled_block_list {
5344 # ORIGINAL METHOD: Here is the format of the cuddled block type hash
5345 # which controls this routine
5346 # my $rcuddled_block_types = {
5357 # SIMPLFIED METHOD: the simplified method uses a wildcard for
5358 # the starting block type and puts all cuddled blocks together:
5359 # my $rcuddled_block_types = {
5368 # Both methods work, but the simplified method has proven to be adequate and
5371 my $cuddled_string = $rOpts->{'cuddled-block-list'};
5372 $cuddled_string = '' unless $cuddled_string;
5375 $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
5376 $flags .= " -cbl='$cuddled_string'";
5378 unless ( $rOpts->{'cuddled-else'} ) {
5379 $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
5383 ------------------------------------------------------------------------
5384 Hash of cuddled block types prepared for a run with these parameters:
5386 ------------------------------------------------------------------------
5390 $fh->print( Dumper($rcuddled_block_types) );
5393 ------------------------------------------------------------------------
5398 sub make_static_block_comment_pattern {
5400 # create the pattern used to identify static block comments
5401 $static_block_comment_pattern = '^\s*##';
5403 # allow the user to change it
5404 if ( $rOpts->{'static-block-comment-prefix'} ) {
5405 my $prefix = $rOpts->{'static-block-comment-prefix'};
5406 $prefix =~ s/^\s*//;
5407 my $pattern = $prefix;
5409 # user may give leading caret to force matching left comments only
5410 if ( $prefix !~ /^\^#/ ) {
5411 if ( $prefix !~ /^#/ ) {
5413 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
5416 $pattern = '^\s*' . $prefix;
5418 if ( bad_pattern($pattern) ) {
5420 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
5423 $static_block_comment_pattern = $pattern;
5428 sub make_format_skipping_pattern {
5429 my ( $opt_name, $default ) = @_;
5430 my $param = $rOpts->{$opt_name};
5431 unless ($param) { $param = $default }
5433 if ( $param !~ /^#/ ) {
5434 Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
5436 my $pattern = '^' . $param . '\s';
5437 if ( bad_pattern($pattern) ) {
5439 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
5445 sub make_closing_side_comment_list_pattern {
5447 # turn any input list into a regex for recognizing selected block types
5448 $closing_side_comment_list_pattern = '^\w+';
5449 if ( defined( $rOpts->{'closing-side-comment-list'} )
5450 && $rOpts->{'closing-side-comment-list'} )
5452 $closing_side_comment_list_pattern =
5453 make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
5458 sub make_bli_pattern {
5460 if ( defined( $rOpts->{'brace-left-and-indent-list'} )
5461 && $rOpts->{'brace-left-and-indent-list'} )
5463 $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
5466 $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
5470 sub make_block_brace_vertical_tightness_pattern {
5472 # turn any input list into a regex for recognizing selected block types
5473 $block_brace_vertical_tightness_pattern =
5474 '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5475 if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
5476 && $rOpts->{'block-brace-vertical-tightness-list'} )
5478 $block_brace_vertical_tightness_pattern =
5479 make_block_pattern( '-bbvtl',
5480 $rOpts->{'block-brace-vertical-tightness-list'} );
5485 sub make_blank_line_pattern {
5487 $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
5488 my $key = 'blank-lines-before-closing-block-list';
5489 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5490 $blank_lines_before_closing_block_pattern =
5491 make_block_pattern( '-blbcl', $rOpts->{$key} );
5494 $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
5495 $key = 'blank-lines-after-opening-block-list';
5496 if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
5497 $blank_lines_after_opening_block_pattern =
5498 make_block_pattern( '-blaol', $rOpts->{$key} );
5503 sub make_block_pattern {
5505 # given a string of block-type keywords, return a regex to match them
5506 # The only tricky part is that labels are indicated with a single ':'
5507 # and the 'sub' token text may have additional text after it (name of
5512 # input string: "if else elsif unless while for foreach do : sub";
5513 # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
5517 # To distinguish between anonymous subs and named subs, use 'sub' to
5518 # indicate a named sub, and 'asub' to indicate an anonymous sub
5520 my ( $abbrev, $string ) = @_;
5521 my @list = split_words($string);
5525 if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
5528 if ( $i eq 'sub' ) {
5530 elsif ( $i eq 'asub' ) {
5532 elsif ( $i eq ';' ) {
5535 elsif ( $i eq '{' ) {
5538 elsif ( $i eq ':' ) {
5539 push @words, '\w+:';
5541 elsif ( $i =~ /^\w/ ) {
5545 Warn("unrecognized block type $i after $abbrev, ignoring\n");
5548 my $pattern = '(' . join( '|', @words ) . ')$';
5549 my $sub_patterns = "";
5550 if ( $seen{'sub'} ) {
5551 $sub_patterns .= '|' . $SUB_PATTERN;
5553 if ( $seen{'asub'} ) {
5554 $sub_patterns .= '|' . $ASUB_PATTERN;
5556 if ($sub_patterns) {
5557 $pattern = '(' . $pattern . $sub_patterns . ')';
5559 $pattern = '^' . $pattern;
5563 sub make_static_side_comment_pattern {
5565 # create the pattern used to identify static side comments
5566 $static_side_comment_pattern = '^##';
5568 # allow the user to change it
5569 if ( $rOpts->{'static-side-comment-prefix'} ) {
5570 my $prefix = $rOpts->{'static-side-comment-prefix'};
5571 $prefix =~ s/^\s*//;
5572 my $pattern = '^' . $prefix;
5573 if ( bad_pattern($pattern) ) {
5575 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
5578 $static_side_comment_pattern = $pattern;
5583 sub make_closing_side_comment_prefix {
5585 # Be sure we have a valid closing side comment prefix
5586 my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
5587 my $csc_prefix_pattern;
5588 if ( !defined($csc_prefix) ) {
5589 $csc_prefix = '## end';
5590 $csc_prefix_pattern = '^##\s+end';
5593 my $test_csc_prefix = $csc_prefix;
5594 if ( $test_csc_prefix !~ /^#/ ) {
5595 $test_csc_prefix = '#' . $test_csc_prefix;
5598 # make a regex to recognize the prefix
5599 my $test_csc_prefix_pattern = $test_csc_prefix;
5601 # escape any special characters
5602 $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
5604 $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
5606 # allow exact number of intermediate spaces to vary
5607 $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
5609 # make sure we have a good pattern
5610 # if we fail this we probably have an error in escaping
5613 if ( bad_pattern($test_csc_prefix_pattern) ) {
5615 # shouldn't happen..must have screwed up escaping, above
5616 report_definite_bug();
5618 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
5621 # just warn and keep going with defaults
5622 Warn("Please consider using a simpler -cscp prefix\n");
5623 Warn("Using default -cscp instead; please check output\n");
5626 $csc_prefix = $test_csc_prefix;
5627 $csc_prefix_pattern = $test_csc_prefix_pattern;
5630 $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
5631 $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
5635 sub dump_want_left_space {
5639 These values are the main control of whitespace to the left of a token type;
5640 They may be altered with the -wls parameter.
5641 For a list of token types, use perltidy --dump-token-types (-dtt)
5642 1 means the token wants a space to its left
5643 -1 means the token does not want a space to its left
5644 ------------------------------------------------------------------------
5646 foreach my $key ( sort keys %want_left_space ) {
5647 print $fh "$key\t$want_left_space{$key}\n";
5652 sub dump_want_right_space {
5656 These values are the main control of whitespace to the right of a token type;
5657 They may be altered with the -wrs parameter.
5658 For a list of token types, use perltidy --dump-token-types (-dtt)
5659 1 means the token wants a space to its right
5660 -1 means the token does not want a space to its right
5661 ------------------------------------------------------------------------
5663 foreach my $key ( sort keys %want_right_space ) {
5664 print $fh "$key\t$want_right_space{$key}\n";
5669 { # begin is_essential_whitespace
5671 my %is_sort_grep_map;
5677 @q = qw(sort grep map);
5678 @is_sort_grep_map{@q} = (1) x scalar(@q);
5680 @q = qw(for foreach);
5681 @is_for_foreach{@q} = (1) x scalar(@q);
5685 sub is_essential_whitespace {
5687 # Essential whitespace means whitespace which cannot be safely deleted
5688 # without risking the introduction of a syntax error.
5689 # We are given three tokens and their types:
5690 # ($tokenl, $typel) is the token to the left of the space in question
5691 # ($tokenr, $typer) is the token to the right of the space in question
5692 # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
5694 # This is a slow routine but is not needed too often except when -mangle
5697 # Note: This routine should almost never need to be changed. It is
5698 # for avoiding syntax problems rather than for formatting.
5699 my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
5703 # never combine two bare words or numbers
5704 # examples: and ::ok(1)
5706 # for bla::bla:: abc
5707 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
5708 # $input eq"quit" to make $inputeq"quit"
5709 # my $size=-s::SINK if $file; <==OK but we won't do it
5710 # don't join something like: for bla::bla:: abc
5711 # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
5712 ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
5713 && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
5715 # do not combine a number with a concatenation dot
5716 # example: pom.caputo:
5717 # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
5718 || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
5719 || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
5721 # do not join a minus with a bare word, because you might form
5722 # a file test operator. Example from Complex.pm:
5723 # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
5724 || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
5726 # do not join a bare word with a minus, like between 'Send' and
5727 # '-recipients' here <<snippets/space3.in>>
5728 # my $msg = new Fax::Send
5729 # -recipients => $to,
5731 # This is the safest thing to do. If we had the token to the right of
5732 # the minus we could do a better check.
5733 || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) )
5735 # and something like this could become ambiguous without space
5737 # use constant III=>1;
5741 || ( ( $tokenl eq '-' )
5742 && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
5744 # '= -' should not become =- or you will get a warning
5746 # || ($tokenr eq '-')
5748 # keep a space between a quote and a bareword to prevent the
5749 # bareword from becoming a quote modifier.
5750 || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
5752 # keep a space between a token ending in '$' and any word;
5753 # this caused trouble: "die @$ if $@"
5754 || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
5755 && ( $tokenr =~ /^[a-zA-Z_]/ ) )
5757 # perl is very fussy about spaces before <<
5758 || ( $tokenr =~ /^\<\</ )
5760 # avoid combining tokens to create new meanings. Example:
5761 # $a+ +$b must not become $a++$b
5762 || ( $is_digraph{ $tokenl . $tokenr } )
5763 || ( $is_trigraph{ $tokenl . $tokenr } )
5765 # another example: do not combine these two &'s:
5766 # allow_options & &OPT_EXECCGI
5767 || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
5769 # don't combine $$ or $# with any alphanumeric
5770 # (testfile mangle.t with --mangle)
5771 || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
5773 # retain any space after possible filehandle
5774 # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
5775 || ( $typel eq 'Z' )
5777 # Perl is sensitive to whitespace after the + here:
5778 # $b = xvals $a + 0.1 * yvals $a;
5779 || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
5781 # keep paren separate in 'use Foo::Bar ()'
5785 && $tokenll eq 'use' )
5787 # keep any space between filehandle and paren:
5788 # file mangle.t with --mangle:
5789 || ( $typel eq 'Y' && $tokenr eq '(' )
5791 # retain any space after here doc operator ( hereerr.t)
5792 || ( $typel eq 'h' )
5794 # be careful with a space around ++ and --, to avoid ambiguity as to
5795 # which token it applies
5796 || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
5797 || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
5799 # need space after foreach my; for example, this will fail in
5800 # older versions of Perl:
5801 # foreach my$ft(@filetypes)...
5806 && $is_for_foreach{$tokenll}
5810 # must have space between grep and left paren; "grep(" will fail
5811 || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
5813 # don't stick numbers next to left parens, as in:
5814 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
5815 || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
5817 # We must be sure that a space between a ? and a quoted string
5818 # remains if the space before the ? remains. [Loca.pm, lockarea]
5820 # $b=join $comma ? ',' : ':', @_; # ok
5821 # $b=join $comma?',' : ':', @_; # ok!
5822 # $b=join $comma ?',' : ':', @_; # error!
5823 # Not really required:
5824 ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
5826 # do not remove space between an '&' and a bare word because
5827 # it may turn into a function evaluation, like here
5828 # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
5829 # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
5830 || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
5832 # space stacked labels (TODO: check if really necessary)
5833 || ( $typel eq 'J' && $typer eq 'J' )
5835 ; # the value of this long logic sequence is the result we want
5836 ##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
5842 my %secret_operators;
5843 my %is_leading_secret_token;
5847 # token lists for perl secret operators as compiled by Philippe Bruhat
5848 # at: https://metacpan.org/module/perlsecret
5849 %secret_operators = (
5850 'Goatse' => [qw#= ( ) =#], #=( )=
5851 'Venus1' => [qw#0 +#], # 0+
5852 'Venus2' => [qw#+ 0#], # +0
5853 'Enterprise' => [qw#) x ! !#], # ()x!!
5854 'Kite1' => [qw#~ ~ <>#], # ~~<>
5855 'Kite2' => [qw#~~ <>#], # ~~<>
5856 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
5857 'Bang bang ' => [qw#! !#], # !!
5860 # The following operators and constants are not included because they
5861 # are normally kept tight by perltidy:
5865 # Make a lookup table indexed by the first token of each operator:
5866 # first token => [list, list, ...]
5867 foreach my $value ( values(%secret_operators) ) {
5868 my $tok = $value->[0];
5869 push @{ $is_leading_secret_token{$tok} }, $value;
5873 sub new_secret_operator_whitespace {
5875 my ( $rlong_array, $rwhitespace_flags ) = @_;
5877 # Loop over all tokens in this line
5878 my ( $token, $type );
5879 my $jmax = @{$rlong_array} - 1;
5880 foreach my $j ( 0 .. $jmax ) {
5882 $token = $rlong_array->[$j]->[_TOKEN_];
5883 $type = $rlong_array->[$j]->[_TYPE_];
5885 # Skip unless this token might start a secret operator
5886 next if ( $type eq 'b' );
5887 next unless ( $is_leading_secret_token{$token} );
5889 # Loop over all secret operators with this leading token
5890 foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
5892 foreach my $tok ( @{$rpattern} ) {
5897 && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
5899 || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
5908 # set flags to prevent spaces within this operator
5909 foreach my $jj ( $j + 1 .. $jend ) {
5910 $rwhitespace_flags->[$jj] = WS_NO;
5915 } ## End Loop over all operators
5916 } ## End loop over all tokens
5921 { # begin print_line_of_tokens
5923 my $rinput_token_array; # Current working array
5924 my $rinput_K_array; # Future working array
5927 my $guessed_indentation_level;
5929 # This should be a return variable from extract_token
5930 # These local token variables are stored by store_token_to_go:
5934 my $container_environment;
5936 my $in_continued_quote;
5938 my $no_internal_newlines;
5944 # routine to pull the jth token from the line of tokens
5946 my ( $self, $j ) = @_;
5948 my $rLL = $self->{rLL};
5949 $Ktoken_vars = $rinput_K_array->[$j];
5950 if ( !defined($Ktoken_vars) ) {
5952 # Shouldn't happen: an error here would be due to a recent program change
5953 Fault("undefined index K for j=$j");
5955 my $rtoken_vars = $rLL->[$Ktoken_vars];
5957 if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
5959 # Shouldn't happen: an error here would be due to a recent program change
5961 j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
5965 #########################################################
5966 # these are now redundant and can eventually be eliminated
5968 $token = $rtoken_vars->[_TOKEN_];
5969 $type = $rtoken_vars->[_TYPE_];
5970 $block_type = $rtoken_vars->[_BLOCK_TYPE_];
5971 $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
5972 $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
5973 $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
5974 $level = $rtoken_vars->[_LEVEL_];
5975 $slevel = $rtoken_vars->[_SLEVEL_];
5976 $ci_level = $rtoken_vars->[_CI_LEVEL_];
5977 #########################################################
5985 sub save_current_token {
5988 $block_type, $ci_level,
5989 $container_environment, $container_type,
5990 $in_continued_quote, $level,
5991 $no_internal_newlines, $slevel,
5993 $type_sequence, $Ktoken_vars,
5998 sub restore_current_token {
6000 $block_type, $ci_level,
6001 $container_environment, $container_type,
6002 $in_continued_quote, $level,
6003 $no_internal_newlines, $slevel,
6005 $type_sequence, $Ktoken_vars,
6013 # Returns the length of a token, given:
6014 # $token=text of the token
6016 # $not_first_token = should be TRUE if this is not the first token of
6017 # the line. It might the index of this token in an array. It is
6018 # used to test for a side comment vs a block comment.
6019 # Note: Eventually this should be the only routine determining the
6020 # length of a token in this package.
6021 my ( $token, $type, $not_first_token ) = @_;
6022 my $token_length = length($token);
6024 # We mark lengths of side comments as just 1 if we are
6025 # ignoring their lengths when setting line breaks.
6027 if ( $rOpts_ignore_side_comment_lengths
6030 return $token_length;
6035 # return length of ith token in @{$rtokens}
6037 return token_length( $rinput_token_array->[$i]->[_TOKEN_],
6038 $rinput_token_array->[$i]->[_TYPE_], $i );
6041 # Routine to place the current token into the output stream.
6042 # Called once per output token.
6043 sub store_token_to_go {
6045 my ( $self, $side_comment_follows ) = @_;
6047 my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
6050 $K_to_go[$max_index_to_go] = $Ktoken_vars;
6051 $tokens_to_go[$max_index_to_go] = $token;
6052 $types_to_go[$max_index_to_go] = $type;
6053 $nobreak_to_go[$max_index_to_go] = $flag;
6054 $old_breakpoint_to_go[$max_index_to_go] = 0;
6055 $forced_breakpoint_to_go[$max_index_to_go] = 0;
6056 $block_type_to_go[$max_index_to_go] = $block_type;
6057 $type_sequence_to_go[$max_index_to_go] = $type_sequence;
6058 $container_environment_to_go[$max_index_to_go] = $container_environment;
6059 $ci_levels_to_go[$max_index_to_go] = $ci_level;
6060 $mate_index_to_go[$max_index_to_go] = -1;
6061 $matching_token_to_go[$max_index_to_go] = '';
6062 $bond_strength_to_go[$max_index_to_go] = 0;
6064 # Note: negative levels are currently retained as a diagnostic so that
6065 # the 'final indentation level' is correctly reported for bad scripts.
6066 # But this means that every use of $level as an index must be checked.
6067 # If this becomes too much of a problem, we might give up and just clip
6069 ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
6070 $levels_to_go[$max_index_to_go] = $level;
6071 $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
6073 # link the non-blank tokens
6074 my $iprev = $max_index_to_go - 1;
6075 $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
6076 $iprev_to_go[$max_index_to_go] = $iprev;
6077 $inext_to_go[$iprev] = $max_index_to_go
6078 if ( $iprev >= 0 && $type ne 'b' );
6079 $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
6081 $token_lengths_to_go[$max_index_to_go] =
6082 token_length( $token, $type, $max_index_to_go );
6084 # We keep a running sum of token lengths from the start of this batch:
6085 # summed_lengths_to_go[$i] = total length to just before token $i
6086 # summed_lengths_to_go[$i+1] = total length to just after token $i
6087 $summed_lengths_to_go[ $max_index_to_go + 1 ] =
6088 $summed_lengths_to_go[$max_index_to_go] +
6089 $token_lengths_to_go[$max_index_to_go];
6091 # Define the indentation that this token would have if it started
6092 # a new line. We have to do this now because we need to know this
6093 # when considering one-line blocks.
6094 set_leading_whitespace( $level, $ci_level, $in_continued_quote );
6096 # remember previous nonblank tokens seen
6097 if ( $type ne 'b' ) {
6098 $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
6099 $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
6100 $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
6101 $last_nonblank_index_to_go = $max_index_to_go;
6102 $last_nonblank_type_to_go = $type;
6103 $last_nonblank_token_to_go = $token;
6104 if ( $type eq ',' ) {
6105 $comma_count_in_batch++;
6109 FORMATTER_DEBUG_FLAG_STORE && do {
6110 my ( $a, $b, $c ) = caller();
6112 "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
6117 sub insert_new_token_to_go {
6119 # insert a new token into the output stream. use same level as
6120 # previous token; assumes a character at max_index_to_go.
6121 my ( $self, @args ) = @_;
6122 save_current_token();
6123 ( $token, $type, $slevel, $no_internal_newlines ) = @args;
6125 if ( $max_index_to_go == UNDEFINED_INDEX ) {
6126 warning("code bug: bad call to insert_new_token_to_go\n");
6128 $level = $levels_to_go[$max_index_to_go];
6130 # FIXME: it seems to be necessary to use the next, rather than
6131 # previous, value of this variable when creating a new blank (align.t)
6132 #my $slevel = $nesting_depth_to_go[$max_index_to_go];
6133 $ci_level = $ci_levels_to_go[$max_index_to_go];
6134 $container_environment = $container_environment_to_go[$max_index_to_go];
6135 $in_continued_quote = 0;
6137 $type_sequence = "";
6139 # store an undef for the K value to catch unexpected usage
6140 # This routine is only called by add_closing_side_comments, and
6141 # eventually that call will be eliminated.
6142 $Ktoken_vars = undef;
6144 $self->store_token_to_go();
6145 restore_current_token();
6150 my ($rold_token_hash) = @_;
6151 my %new_token_hash =
6152 map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash};
6153 return \%new_token_hash;
6158 my @new = map { $_ } @{$rold};
6162 sub copy_token_as_type {
6163 my ( $rold_token, $type, $token ) = @_;
6164 if ( $type eq 'b' ) {
6165 $token = " " unless defined($token);
6167 elsif ( $type eq 'q' ) {
6168 $token = '' unless defined($token);
6170 elsif ( $type eq '->' ) {
6171 $token = '->' unless defined($token);
6173 elsif ( $type eq ';' ) {
6174 $token = ';' unless defined($token);
6178 "Programming error: copy_token_as has type $type but should be 'b' or 'q'"
6181 my $rnew_token = copy_array($rold_token);
6182 $rnew_token->[_TYPE_] = $type;
6183 $rnew_token->[_TOKEN_] = $token;
6184 $rnew_token->[_BLOCK_TYPE_] = '';
6185 $rnew_token->[_CONTAINER_TYPE_] = '';
6186 $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
6187 $rnew_token->[_TYPE_SEQUENCE_] = '';
6191 sub boolean_equals {
6192 my ( $val1, $val2 ) = @_;
6193 return ( $val1 && $val2 || !$val1 && !$val2 );
6196 sub print_line_of_tokens {
6198 my ( $self, $line_of_tokens ) = @_;
6200 # This routine is called once per input line to process all of
6201 # the tokens on that line. This is the first stage of
6204 # Full-line comments and blank lines may be processed immediately.
6206 # For normal lines of code, the tokens are stored one-by-one,
6207 # via calls to 'sub store_token_to_go', until a known line break
6208 # point is reached. Then, the batch of collected tokens is
6209 # passed along to 'sub output_line_to_go' for further
6210 # processing. This routine decides if there should be
6211 # whitespace between each pair of non-white tokens, so later
6212 # routines only need to decide on any additional line breaks.
6213 # Any whitespace is initially a single space character. Later,
6214 # the vertical aligner may expand that to be multiple space
6215 # characters if necessary for alignment.
6217 $input_line_number = $line_of_tokens->{_line_number};
6218 my $input_line = $line_of_tokens->{_line_text};
6219 my $CODE_type = $line_of_tokens->{_code_type};
6221 my $rK_range = $line_of_tokens->{_rK_range};
6222 my ( $K_first, $K_last ) = @{$rK_range};
6224 my $rLL = $self->{rLL};
6225 my $rbreak_container = $self->{rbreak_container};
6227 if ( !defined($K_first) ) {
6229 # Unexpected blank line..
6230 # Calling routine was supposed to handle this
6232 "Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
6237 $no_internal_newlines = 1 - $rOpts_add_newlines;
6239 ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
6240 my $is_static_block_comment_without_leading_space =
6241 $CODE_type eq 'SBCX';
6242 $is_static_block_comment =
6243 $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
6244 my $is_hanging_side_comment = $CODE_type eq 'HSC';
6245 my $is_VERSION_statement = $CODE_type eq 'VER';
6246 if ($is_VERSION_statement) {
6247 $saw_VERSION_in_this_file = 1;
6248 $no_internal_newlines = 1;
6251 # Add interline blank if any
6252 my $last_old_nonblank_type = "b";
6253 my $first_new_nonblank_type = "b";
6254 my $first_new_nonblank_token = " ";
6255 if ( $max_index_to_go >= 0 ) {
6256 $last_old_nonblank_type = $types_to_go[$max_index_to_go];
6257 $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
6258 $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
6260 && $types_to_go[$max_index_to_go] ne 'b'
6262 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
6268 # Copy the tokens into local arrays
6269 $rinput_token_array = [];
6270 $rinput_K_array = [];
6271 $rinput_K_array = [ ( $K_first .. $K_last ) ];
6272 $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
6273 my $jmax = @{$rinput_K_array} - 1;
6275 $in_continued_quote = $starting_in_quote =
6276 $line_of_tokens->{_starting_in_quote};
6277 $in_quote = $line_of_tokens->{_ending_in_quote};
6278 $ending_in_quote = $in_quote;
6279 $guessed_indentation_level =
6280 $line_of_tokens->{_guessed_indentation_level};
6283 my $next_nonblank_token;
6284 my $next_nonblank_token_type;
6287 $container_type = "";
6288 $container_environment = "";
6289 $type_sequence = "";
6291 ######################################
6292 # Handle a block (full-line) comment..
6293 ######################################
6296 if ( $rOpts->{'delete-block-comments'} ) { return }
6298 if ( $rOpts->{'tee-block-comments'} ) {
6299 $file_writer_object->tee_on();
6302 destroy_one_line_block();
6303 $self->output_line_to_go();
6305 # output a blank line before block comments
6307 # unless we follow a blank or comment line
6308 $last_line_leading_type !~ /^[#b]$/
6311 && $rOpts->{'blanks-before-comments'}
6313 # if this is NOT an empty comment line
6314 && $rinput_token_array->[0]->[_TOKEN_] ne '#'
6316 # not after a short line ending in an opening token
6317 # because we already have space above this comment.
6318 # Note that the first comment in this if block, after
6319 # the 'if (', does not get a blank line because of this.
6320 && !$last_output_short_opening_token
6322 # never before static block comments
6323 && !$is_static_block_comment
6326 $self->flush(); # switching to new output stream
6327 $file_writer_object->write_blank_code_line();
6328 $last_line_leading_type = 'b';
6331 # TRIM COMMENTS -- This could be turned off as a option
6332 $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
6335 $rOpts->{'indent-block-comments'}
6336 && ( !$rOpts->{'indent-spaced-block-comments'}
6337 || $input_line =~ /^\s+/ )
6338 && !$is_static_block_comment_without_leading_space
6341 $self->extract_token(0);
6342 $self->store_token_to_go();
6343 $self->output_line_to_go();
6346 $self->flush(); # switching to new output stream
6347 $file_writer_object->write_code_line(
6348 $rinput_token_array->[0]->[_TOKEN_] . "\n" );
6349 $last_line_leading_type = '#';
6351 if ( $rOpts->{'tee-block-comments'} ) {
6352 $file_writer_object->tee_off();
6357 # TODO: Move to sub scan_comments
6358 # compare input/output indentation except for continuation lines
6359 # (because they have an unknown amount of initial blank space)
6360 # and lines which are quotes (because they may have been outdented)
6361 # Note: this test is placed here because we know the continuation flag
6362 # at this point, which allows us to avoid non-meaningful checks.
6363 my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
6364 compare_indentation_levels( $guessed_indentation_level,
6365 $structural_indentation_level )
6366 unless ( $is_hanging_side_comment
6367 || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
6368 || $guessed_indentation_level == 0
6369 && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
6371 ##########################
6372 # Handle indentation-only
6373 ##########################
6375 # NOTE: In previous versions we sent all qw lines out immediately here.
6376 # No longer doing this: also write a line which is entirely a 'qw' list
6377 # to allow stacking of opening and closing tokens. Note that interior
6378 # qw lines will still go out at the end of this routine.
6379 ##if ( $rOpts->{'indent-only'} ) {
6380 if ( $CODE_type eq 'IO' ) {
6382 my $line = $input_line;
6384 # delete side comments if requested with -io, but
6385 # we will not allow deleting of closing side comments with -io
6386 # because the coding would be more complex
6387 if ( $rOpts->{'delete-side-comments'}
6388 && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
6392 foreach my $jj ( 0 .. $jmax - 1 ) {
6393 $line .= $rinput_token_array->[$jj]->[_TOKEN_];
6397 # Fix for rt #125506 Unexpected string formating
6398 # in which leading space of a terminal quote was removed
6400 $line =~ s/^\s+// unless ($in_continued_quote);
6402 $self->extract_token(0);
6406 $container_type = "";
6407 $container_environment = "";
6408 $type_sequence = "";
6409 $self->store_token_to_go();
6410 $self->output_line_to_go();
6414 ############################
6415 # Handle all other lines ...
6416 ############################
6418 #######################################################
6419 # FIXME: this should become unnecessary
6420 # making $j+2 valid simplifies coding
6422 copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
6423 push @{$rinput_token_array}, $rnew_blank;
6424 push @{$rinput_token_array}, $rnew_blank;
6425 #######################################################
6427 # If we just saw the end of an elsif block, write nag message
6428 # if we do not see another elseif or an else.
6429 if ($looking_for_else) {
6431 unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
6432 write_logfile_entry("(No else block)\n");
6434 $looking_for_else = 0;
6437 # This is a good place to kill incomplete one-line blocks
6440 ( $semicolons_before_block_self_destruct == 0 )
6441 && ( $max_index_to_go >= 0 )
6442 && ( $last_old_nonblank_type eq ';' )
6443 && ( $first_new_nonblank_token ne '}' )
6446 # Patch for RT #98902. Honor request to break at old commas.
6447 || ( $rOpts_break_at_old_comma_breakpoints
6448 && $max_index_to_go >= 0
6449 && $last_old_nonblank_type eq ',' )
6452 $forced_breakpoint_to_go[$max_index_to_go] = 1
6453 if ($rOpts_break_at_old_comma_breakpoints);
6454 destroy_one_line_block();
6455 $self->output_line_to_go();
6458 # loop to process the tokens one-by-one
6462 # We do not want a leading blank if the previous batch just got output
6464 if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
6468 foreach my $j ( $jmin .. $jmax ) {
6470 # pull out the local values for this token
6471 $self->extract_token($j);
6473 if ( $type eq '#' ) {
6475 # trim trailing whitespace
6476 # (there is no option at present to prevent this)
6480 $rOpts->{'delete-side-comments'}
6482 # delete closing side comments if necessary
6483 || ( $rOpts->{'delete-closing-side-comments'}
6484 && $token =~ /$closing_side_comment_prefix_pattern/o
6485 && $last_nonblank_block_type =~
6486 /$closing_side_comment_list_pattern/o )
6489 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
6490 unstore_token_to_go();
6496 # If we are continuing after seeing a right curly brace, flush
6497 # buffer unless we see what we are looking for, as in
6499 if ( $rbrace_follower && $type ne 'b' ) {
6501 unless ( $rbrace_follower->{$token} ) {
6502 $self->output_line_to_go();
6504 $rbrace_follower = undef;
6508 ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
6511 $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
6512 $next_nonblank_token_type =
6513 $rinput_token_array->[$j_next]->[_TYPE_];
6515 ######################
6516 # MAYBE MOVE ELSEWHERE?
6517 ######################
6518 if ( $type eq 'Q' ) {
6519 note_embedded_tab() if ( $token =~ "\t" );
6521 # make note of something like '$var = s/xxx/yyy/;'
6522 # in case it should have been '$var =~ s/xxx/yyy/;'
6524 $token =~ /^(s|tr|y|m|\/)/
6525 && $last_nonblank_token =~ /^(=|==|!=)$/
6527 # preceded by simple scalar
6528 && $last_last_nonblank_type eq 'i'
6529 && $last_last_nonblank_token =~ /^\$/
6531 # followed by some kind of termination
6532 # (but give complaint if we can's see far enough ahead)
6533 && $next_nonblank_token =~ /^[; \)\}]$/
6535 # scalar is not declared
6537 $types_to_go[0] eq 'k'
6538 && $tokens_to_go[0] =~ /^(my|our|local)$/
6542 my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
6544 "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
6549 # Do not allow breaks which would promote a side comment to a
6550 # block comment. In order to allow a break before an opening
6551 # or closing BLOCK, followed by a side comment, those sections
6552 # of code will handle this flag separately.
6553 my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
6554 my $is_opening_BLOCK =
6558 && $block_type ne 't' );
6559 my $is_closing_BLOCK =
6563 && $block_type ne 't' );
6565 if ( $side_comment_follows
6566 && !$is_opening_BLOCK
6567 && !$is_closing_BLOCK )
6569 $no_internal_newlines = 1;
6572 # We're only going to handle breaking for code BLOCKS at this
6573 # (top) level. Other indentation breaks will be handled by
6574 # sub scan_list, which is better suited to dealing with them.
6575 if ($is_opening_BLOCK) {
6577 # Tentatively output this token. This is required before
6578 # calling starting_one_line_block. We may have to unstore
6579 # it, though, if we have to break before it.
6580 $self->store_token_to_go($side_comment_follows);
6582 # Look ahead to see if we might form a one-line block..
6584 $self->starting_one_line_block( $j, $jmax, $level, $slevel,
6585 $ci_level, $rinput_token_array );
6586 clear_breakpoint_undo_stack();
6588 # to simplify the logic below, set a flag to indicate if
6589 # this opening brace is far from the keyword which introduces it
6590 my $keyword_on_same_line = 1;
6591 if ( ( $max_index_to_go >= 0 )
6592 && ( $last_nonblank_type eq ')' )
6593 && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) )
6595 $keyword_on_same_line = 0;
6598 # decide if user requested break before '{'
6601 # use -bl flag if not a sub block of any type
6602 $block_type !~ /^sub\b/
6603 ? $rOpts->{'opening-brace-on-new-line'}
6605 # use -sbl flag for a named sub block
6606 : $block_type !~ /$ASUB_PATTERN/
6607 ? $rOpts->{'opening-sub-brace-on-new-line'}
6609 # use -asbl flag for an anonymous sub block
6610 : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
6612 # Do not break if this token is welded to the left
6613 if ( weld_len_left( $type_sequence, $token ) ) {
6617 # Break before an opening '{' ...
6623 # and we were unable to start looking for a block,
6624 && $index_start_one_line_block == UNDEFINED_INDEX
6626 # or if it will not be on same line as its keyword, so that
6627 # it will be outdented (eval.t, overload.t), and the user
6628 # has not insisted on keeping it on the right
6629 || ( !$keyword_on_same_line
6630 && !$rOpts->{'opening-brace-always-on-right'} )
6635 # but only if allowed
6636 unless ($no_internal_newlines) {
6638 # since we already stored this token, we must unstore it
6639 $self->unstore_token_to_go();
6641 # then output the line
6642 $self->output_line_to_go();
6644 # and now store this token at the start of a new line
6645 $self->store_token_to_go($side_comment_follows);
6649 # Now update for side comment
6650 if ($side_comment_follows) { $no_internal_newlines = 1 }
6652 # now output this line
6653 unless ($no_internal_newlines) {
6654 $self->output_line_to_go();
6658 elsif ($is_closing_BLOCK) {
6660 # If there is a pending one-line block ..
6661 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
6663 # we have to terminate it if..
6666 # it is too long (final length may be different from
6667 # initial estimate). note: must allow 1 space for this
6669 excess_line_length( $index_start_one_line_block,
6670 $max_index_to_go ) >= 0
6672 # or if it has too many semicolons
6673 || ( $semicolons_before_block_self_destruct == 0
6674 && $last_nonblank_type ne ';' )
6677 destroy_one_line_block();
6681 # put a break before this closing curly brace if appropriate
6682 unless ( $no_internal_newlines
6683 || $index_start_one_line_block != UNDEFINED_INDEX )
6686 # write out everything before this closing curly brace
6687 $self->output_line_to_go();
6690 # Now update for side comment
6691 if ($side_comment_follows) { $no_internal_newlines = 1 }
6693 # store the closing curly brace
6694 $self->store_token_to_go();
6696 # ok, we just stored a closing curly brace. Often, but
6697 # not always, we want to end the line immediately.
6698 # So now we have to check for special cases.
6700 # if this '}' successfully ends a one-line block..
6701 my $is_one_line_block = 0;
6703 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
6705 # Remember the type of token just before the
6706 # opening brace. It would be more general to use
6707 # a stack, but this will work for one-line blocks.
6708 $is_one_line_block =
6709 $types_to_go[$index_start_one_line_block];
6711 # we have to actually make it by removing tentative
6712 # breaks that were set within it
6713 undo_forced_breakpoint_stack(0);
6714 set_nobreaks( $index_start_one_line_block,
6715 $max_index_to_go - 1 );
6717 # then re-initialize for the next one-line block
6718 destroy_one_line_block();
6720 # then decide if we want to break after the '}' ..
6721 # We will keep going to allow certain brace followers as in:
6722 # do { $ifclosed = 1; last } unless $losing;
6724 # But make a line break if the curly ends a
6725 # significant block:
6728 $is_block_without_semicolon{$block_type}
6730 # Follow users break point for
6731 # one line block types U & G, such as a 'try' block
6732 || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
6735 # if needless semicolon follows we handle it later
6736 && $next_nonblank_token ne ';'
6739 $self->output_line_to_go()
6740 unless ($no_internal_newlines);
6744 # set string indicating what we need to look for brace follower
6746 if ( $block_type eq 'do' ) {
6747 $rbrace_follower = \%is_do_follower;
6749 elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
6750 $rbrace_follower = \%is_if_brace_follower;
6752 elsif ( $block_type eq 'else' ) {
6753 $rbrace_follower = \%is_else_brace_follower;
6756 # added eval for borris.t
6757 elsif ($is_sort_map_grep_eval{$block_type}
6758 || $is_one_line_block eq 'G' )
6760 $rbrace_follower = undef;
6765 elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
6767 if ($is_one_line_block) {
6768 $rbrace_follower = \%is_anon_sub_1_brace_follower;
6771 $rbrace_follower = \%is_anon_sub_brace_follower;
6775 # None of the above: specify what can follow a closing
6776 # brace of a block which is not an
6777 # if/elsif/else/do/sort/map/grep/eval
6779 # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
6781 $rbrace_follower = \%is_other_brace_follower;
6784 # See if an elsif block is followed by another elsif or else;
6786 if ( $block_type eq 'elsif' ) {
6788 if ( $next_nonblank_token_type eq 'b' ) { # end of line?
6789 $looking_for_else = 1; # ok, check on next line
6793 unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
6794 write_logfile_entry("No else block :(\n");
6799 # keep going after certain block types (map,sort,grep,eval)
6800 # added eval for borris.t
6806 # if no more tokens, postpone decision until re-entring
6807 elsif ( ( $next_nonblank_token_type eq 'b' )
6808 && $rOpts_add_newlines )
6810 unless ($rbrace_follower) {
6811 $self->output_line_to_go()
6812 unless ($no_internal_newlines);
6816 elsif ($rbrace_follower) {
6818 unless ( $rbrace_follower->{$next_nonblank_token} ) {
6819 $self->output_line_to_go()
6820 unless ($no_internal_newlines);
6822 $rbrace_follower = undef;
6826 $self->output_line_to_go() unless ($no_internal_newlines);
6829 } # end treatment of closing block token
6832 elsif ( $type eq ';' ) {
6834 # kill one-line blocks with too many semicolons
6835 $semicolons_before_block_self_destruct--;
6837 ( $semicolons_before_block_self_destruct < 0 )
6838 || ( $semicolons_before_block_self_destruct == 0
6839 && $next_nonblank_token_type !~ /^[b\}]$/ )
6842 destroy_one_line_block();
6845 # Remove unnecessary semicolons, but not after bare
6846 # blocks, where it could be unsafe if the brace is
6850 $last_nonblank_token eq '}'
6852 $is_block_without_semicolon{
6853 $last_nonblank_block_type}
6854 || $last_nonblank_block_type =~ /$SUB_PATTERN/
6855 || $last_nonblank_block_type =~ /^\w+:$/ )
6857 || $last_nonblank_type eq ';'
6862 $rOpts->{'delete-semicolons'}
6864 # don't delete ; before a # because it would promote it
6865 # to a block comment
6866 && ( $next_nonblank_token_type ne '#' )
6869 note_deleted_semicolon();
6870 $self->output_line_to_go()
6871 unless ( $no_internal_newlines
6872 || $index_start_one_line_block != UNDEFINED_INDEX );
6876 write_logfile_entry("Extra ';'\n");
6879 $self->store_token_to_go();
6881 $self->output_line_to_go()
6882 unless ( $no_internal_newlines
6883 || ( $rOpts_keep_interior_semicolons && $j < $jmax )
6884 || ( $next_nonblank_token eq '}' ) );
6888 # handle here_doc target string
6889 elsif ( $type eq 'h' ) {
6891 # no newlines after seeing here-target
6892 $no_internal_newlines = 1;
6893 destroy_one_line_block();
6894 $self->store_token_to_go();
6897 # handle all other token types
6900 $self->store_token_to_go();
6903 # remember two previous nonblank OUTPUT tokens
6904 if ( $type ne '#' && $type ne 'b' ) {
6905 $last_last_nonblank_token = $last_nonblank_token;
6906 $last_last_nonblank_type = $last_nonblank_type;
6907 $last_nonblank_token = $token;
6908 $last_nonblank_type = $type;
6909 $last_nonblank_block_type = $block_type;
6912 # unset the continued-quote flag since it only applies to the
6913 # first token, and we want to resume normal formatting if
6914 # there are additional tokens on the line
6915 $in_continued_quote = 0;
6917 } # end of loop over all tokens in this 'line_of_tokens'
6919 # we have to flush ..
6922 # if there is a side comment
6923 ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
6925 # if this line ends in a quote
6926 # NOTE: This is critically important for insuring that quoted lines
6927 # do not get processed by things like -sot and -sct
6930 # if this is a VERSION statement
6931 || $is_VERSION_statement
6933 # to keep a label at the end of a line
6936 # if we are instructed to keep all old line breaks
6937 || !$rOpts->{'delete-old-newlines'}
6940 destroy_one_line_block();
6941 $self->output_line_to_go();
6944 # mark old line breakpoints in current output stream
6945 if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
6946 my $jobp = $max_index_to_go;
6947 if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
6951 $old_breakpoint_to_go[$jobp] = 1;
6954 } ## end sub print_line_of_tokens
6955 } ## end block print_line_of_tokens
6957 # sub output_line_to_go sends one logical line of tokens on down the
6958 # pipeline to the VerticalAligner package, breaking the line into continuation
6959 # lines as necessary. The line of tokens is ready to go in the "to_go"
6961 sub output_line_to_go {
6964 my $rLL = $self->{rLL};
6966 # debug stuff; this routine can be called from many points
6967 FORMATTER_DEBUG_FLAG_OUTPUT && do {
6968 my ( $a, $b, $c ) = caller;
6970 "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
6972 my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
6973 write_diagnostics("$output_str\n");
6976 # Do not end line in a weld
6977 # TODO: Move this fix into the routine?
6978 #my $jnb = $max_index_to_go;
6979 #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
6980 return if ( weld_len_right_to_go($max_index_to_go) );
6982 # just set a tentative breakpoint if we might be in a one-line block
6983 if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
6984 set_forced_breakpoint($max_index_to_go);
6988 ## my $cscw_block_comment;
6989 ## $cscw_block_comment = $self->add_closing_side_comment()
6990 ## if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
6992 my $comma_arrow_count_contained = match_opening_and_closing_tokens();
6994 # tell the -lp option we are outputting a batch so it can close
6995 # any unfinished items in its stack
6998 # If this line ends in a code block brace, set breaks at any
6999 # previous closing code block braces to breakup a chain of code
7000 # blocks on one line. This is very rare but can happen for
7001 # user-defined subs. For example we might be looking at this:
7002 # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
7003 my $saw_good_break = 0; # flag to force breaks even if short line
7006 # looking for opening or closing block brace
7007 $block_type_to_go[$max_index_to_go]
7009 # but not one of these which are never duplicated on a line:
7010 # until|while|for|if|elsif|else
7011 && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
7014 my $lev = $nesting_depth_to_go[$max_index_to_go];
7016 # Walk backwards from the end and
7017 # set break at any closing block braces at the same level.
7018 # But quit if we are not in a chain of blocks.
7019 for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
7020 last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
7021 next if ( $levels_to_go[$i] > $lev ); # skip past higher level
7023 if ( $block_type_to_go[$i] ) {
7024 if ( $tokens_to_go[$i] eq '}' ) {
7025 set_forced_breakpoint($i);
7026 $saw_good_break = 1;
7030 # quit if we see anything besides words, function, blanks
7032 elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
7037 my $imax = $max_index_to_go;
7039 # trim any blank tokens
7040 if ( $max_index_to_go >= 0 ) {
7041 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
7042 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
7045 # anything left to write?
7046 if ( $imin <= $imax ) {
7048 # add a blank line before certain key types but not after a comment
7049 if ( $last_line_leading_type !~ /^[#]/ ) {
7051 my $leading_token = $tokens_to_go[$imin];
7052 my $leading_type = $types_to_go[$imin];
7054 # blank lines before subs except declarations and one-liners
7055 # MCONVERSION LOCATION - for sub tokenization change
7056 if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
7057 $want_blank = $rOpts->{'blank-lines-before-subs'}
7059 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
7060 $imax ) !~ /^[\;\}]$/
7064 # break before all package declarations
7065 # MCONVERSION LOCATION - for tokenizaton change
7066 elsif ($leading_token =~ /^(package\s)/
7067 && $leading_type eq 'i' )
7069 $want_blank = $rOpts->{'blank-lines-before-packages'};
7072 # break before certain key blocks except one-liners
7073 if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
7074 $want_blank = $rOpts->{'blank-lines-before-subs'}
7076 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
7081 # Break before certain block types if we haven't had a
7082 # break at this level for a while. This is the
7083 # difficult decision..
7084 elsif ($leading_type eq 'k'
7085 && $last_line_leading_type ne 'b'
7086 && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
7088 my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
7089 if ( !defined($lc) ) { $lc = 0 }
7092 $rOpts->{'blanks-before-blocks'}
7093 && $lc >= $rOpts->{'long-block-line-count'}
7094 && $file_writer_object->get_consecutive_nonblank_lines() >=
7095 $rOpts->{'long-block-line-count'}
7097 terminal_type( \@types_to_go, \@block_type_to_go, $imin,
7102 # Check for blank lines wanted before a closing brace
7103 if ( $leading_token eq '}' ) {
7104 if ( $rOpts->{'blank-lines-before-closing-block'}
7105 && $block_type_to_go[$imin]
7106 && $block_type_to_go[$imin] =~
7107 /$blank_lines_before_closing_block_pattern/ )
7109 my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
7110 if ( $nblanks > $want_blank ) {
7111 $want_blank = $nblanks;
7118 # future: send blank line down normal path to VerticalAligner
7119 Perl::Tidy::VerticalAligner::flush();
7120 $file_writer_object->require_blank_code_lines($want_blank);
7124 # update blank line variables and count number of consecutive
7125 # non-blank, non-comment lines at this level
7126 $last_last_line_leading_level = $last_line_leading_level;
7127 $last_line_leading_level = $levels_to_go[$imin];
7128 if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
7129 $last_line_leading_type = $types_to_go[$imin];
7130 if ( $last_line_leading_level == $last_last_line_leading_level
7131 && $last_line_leading_type ne 'b'
7132 && $last_line_leading_type ne '#'
7133 && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
7135 $nonblank_lines_at_depth[$last_line_leading_level]++;
7138 $nonblank_lines_at_depth[$last_line_leading_level] = 1;
7141 FORMATTER_DEBUG_FLAG_FLUSH && do {
7142 my ( $package, $file, $line ) = caller;
7144 "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
7147 # add a couple of extra terminal blank tokens
7150 # set all forced breakpoints for good list formatting
7151 my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
7153 my $old_line_count_in_batch =
7154 $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] );
7158 || $old_line_count_in_batch > 1
7160 # must always call scan_list() with unbalanced batches because it
7161 # is maintaining some stacks
7162 || is_unbalanced_batch()
7164 # call scan_list if we might want to break at commas
7166 $comma_count_in_batch
7167 && ( $rOpts_maximum_fields_per_table > 0
7168 || $rOpts_comma_arrow_breakpoints == 0 )
7171 # call scan_list if user may want to break open some one-line
7173 || ( $comma_arrow_count_contained
7174 && $rOpts_comma_arrow_breakpoints != 3 )
7177 ## This caused problems in one version of perl for unknown reasons:
7178 ## $saw_good_break ||= scan_list();
7179 my $sgb = scan_list();
7180 $saw_good_break ||= $sgb;
7183 # let $ri_first and $ri_last be references to lists of
7184 # first and last tokens of line fragments to output..
7185 my ( $ri_first, $ri_last );
7187 # write a single line if..
7190 # we aren't allowed to add any newlines
7191 !$rOpts_add_newlines
7193 # or, we don't already have an interior breakpoint
7194 # and we didn't see a good breakpoint
7196 !$forced_breakpoint_count
7199 # and this line is 'short'
7204 @{$ri_first} = ($imin);
7205 @{$ri_last} = ($imax);
7208 # otherwise use multiple lines
7211 ( $ri_first, $ri_last, my $colon_count ) =
7212 set_continuation_breaks($saw_good_break);
7214 break_all_chain_tokens( $ri_first, $ri_last );
7216 break_equals( $ri_first, $ri_last );
7218 # now we do a correction step to clean this up a bit
7219 # (The only time we would not do this is for debugging)
7220 if ( $rOpts->{'recombine'} ) {
7221 ( $ri_first, $ri_last ) =
7222 recombine_breakpoints( $ri_first, $ri_last );
7225 insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
7228 # do corrector step if -lp option is used
7230 if ($rOpts_line_up_parentheses) {
7231 $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
7233 $self->unmask_phantom_semicolons( $ri_first, $ri_last );
7234 $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
7237 # Insert any requested blank lines after an opening brace. We have to
7238 # skip back before any side comment to find the terminal token
7240 for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
7241 next if $types_to_go[$iterm] eq '#';
7242 next if $types_to_go[$iterm] eq 'b';
7246 # write requested number of blank lines after an opening block brace
7247 if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
7248 if ( $rOpts->{'blank-lines-after-opening-block'}
7249 && $block_type_to_go[$iterm]
7250 && $block_type_to_go[$iterm] =~
7251 /$blank_lines_after_opening_block_pattern/ )
7253 my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
7254 Perl::Tidy::VerticalAligner::flush();
7255 $file_writer_object->require_blank_code_lines($nblanks);
7260 prepare_for_new_input_lines();
7262 ## # output any new -cscw block comment
7263 ## if ($cscw_block_comment) {
7265 ## $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
7270 sub note_added_semicolon {
7271 my ($line_number) = @_;
7272 $last_added_semicolon_at = $line_number;
7273 if ( $added_semicolon_count == 0 ) {
7274 $first_added_semicolon_at = $last_added_semicolon_at;
7276 $added_semicolon_count++;
7277 write_logfile_entry("Added ';' here\n");
7281 sub note_deleted_semicolon {
7282 $last_deleted_semicolon_at = $input_line_number;
7283 if ( $deleted_semicolon_count == 0 ) {
7284 $first_deleted_semicolon_at = $last_deleted_semicolon_at;
7286 $deleted_semicolon_count++;
7287 write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
7291 sub note_embedded_tab {
7292 $embedded_tab_count++;
7293 $last_embedded_tab_at = $input_line_number;
7294 if ( !$first_embedded_tab_at ) {
7295 $first_embedded_tab_at = $last_embedded_tab_at;
7298 if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
7299 write_logfile_entry("Embedded tabs in quote or pattern\n");
7304 sub starting_one_line_block {
7306 # after seeing an opening curly brace, look for the closing brace
7307 # and see if the entire block will fit on a line. This routine is
7308 # not always right because it uses the old whitespace, so a check
7309 # is made later (at the closing brace) to make sure we really
7310 # have a one-line block. We have to do this preliminary check,
7311 # though, because otherwise we would always break at a semicolon
7312 # within a one-line block if the block contains multiple statements.
7314 my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
7315 my $rbreak_container = $self->{rbreak_container};
7317 my $jmax_check = @{$rtoken_array};
7318 if ( $jmax_check < $jmax ) {
7319 print STDERR "jmax=$jmax > $jmax_check\n";
7322 # kill any current block - we can only go 1 deep
7323 destroy_one_line_block();
7326 # 1=distance from start of block to opening brace exceeds line length
7331 # shouldn't happen: there must have been a prior call to
7332 # store_token_to_go to put the opening brace in the output stream
7333 if ( $max_index_to_go < 0 ) {
7334 Fault("program bug: store_token_to_go called incorrectly\n");
7336 #warning("program bug: store_token_to_go called incorrectly\n");
7337 ##report_definite_bug();
7340 # return if block should be broken
7341 my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_];
7342 if ( $rbreak_container->{$type_sequence} ) {
7346 my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
7348 # find the starting keyword for this block (such as 'if', 'else', ...)
7350 if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
7351 $i_start = $max_index_to_go;
7354 # the previous nonblank token should start these block types
7355 elsif (( $last_last_nonblank_token_to_go eq $block_type )
7356 || ( $block_type =~ /^sub\b/ )
7357 || $block_type =~ /\(\)/ )
7359 $i_start = $last_last_nonblank_index_to_go;
7361 # For signatures and extended syntax ...
7362 # If this brace follows a parenthesized list, we should look back to
7363 # find the keyword before the opening paren because otherwise we might
7364 # form a one line block which stays intack, and cause the parenthesized
7365 # expression to break open. That looks bad. However, actually
7366 # searching for the opening paren is slow and tedius.
7367 # The actual keyword is often at the start of a line, but might not be.
7368 # For example, we might have an anonymous sub with signature list
7369 # following a =>. It is safe to mark the start anywhere before the
7370 # opening paren, so we just go back to the prevoious break (or start of
7371 # the line) if that is before the opening paren. The minor downside is
7372 # that we may very occasionally break open a block unnecessarily.
7373 if ( $tokens_to_go[$i_start] eq ')' ) {
7374 $i_start = $index_max_forced_break + 1;
7375 if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
7376 my $lev = $levels_to_go[$i_start];
7377 if ( $lev > $level ) { return 0 }
7381 elsif ( $last_last_nonblank_token_to_go eq ')' ) {
7383 # For something like "if (xxx) {", the keyword "if" will be
7384 # just after the most recent break. This will be 0 unless
7385 # we have just killed a one-line block and are starting another.
7387 # Note: cannot use inext_index_to_go[] here because that array
7388 # is still being constructed.
7389 $i_start = $index_max_forced_break + 1;
7390 if ( $types_to_go[$i_start] eq 'b' ) {
7394 # Patch to avoid breaking short blocks defined with extended_syntax:
7395 # Strip off any trailing () which was added in the parser to mark
7396 # the opening keyword. For example, in the following
7397 # create( TypeFoo $e) {$bubba}
7398 # the blocktype would be marked as create()
7399 my $stripped_block_type = $block_type;
7400 $stripped_block_type =~ s/\(\)$//;
7402 unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
7407 # patch for SWITCH/CASE to retain one-line case/when blocks
7408 elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
7410 # Note: cannot use inext_index_to_go[] here because that array
7411 # is still being constructed.
7412 $i_start = $index_max_forced_break + 1;
7413 if ( $types_to_go[$i_start] eq 'b' ) {
7416 unless ( $tokens_to_go[$i_start] eq $block_type ) {
7425 my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
7427 # see if length is too long to even start
7428 if ( $pos > maximum_line_length($i_start) ) {
7432 foreach my $i ( $j + 1 .. $jmax ) {
7434 # old whitespace could be arbitrarily large, so don't use it
7435 if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
7436 else { $pos += rtoken_length($i) }
7438 # Return false result if we exceed the maximum line length,
7439 if ( $pos > maximum_line_length($i_start) ) {
7443 # or encounter another opening brace before finding the closing brace.
7444 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
7445 && $rtoken_array->[$i]->[_TYPE_] eq '{'
7446 && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
7451 # if we find our closing brace..
7452 elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
7453 && $rtoken_array->[$i]->[_TYPE_] eq '}'
7454 && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
7457 # be sure any trailing comment also fits on the line
7459 ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
7461 # Patch for one-line sort/map/grep/eval blocks with side comments:
7462 # We will ignore the side comment length for sort/map/grep/eval
7463 # because this can lead to statements which change every time
7464 # perltidy is run. Here is an example from Denis Moskowitz which
7465 # oscillates between these two states without this patch:
7468 ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
7473 ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
7477 # When the first line is input it gets broken apart by the main
7478 # line break logic in sub print_line_of_tokens.
7479 # When the second line is input it gets recombined by
7480 # print_line_of_tokens and passed to the output routines. The
7481 # output routines (set_continuation_breaks) do not break it apart
7482 # because the bond strengths are set to the highest possible value
7483 # for grep/map/eval/sort blocks, so the first version gets output.
7484 # It would be possible to fix this by changing bond strengths,
7485 # but they are high to prevent errors in older versions of perl.
7487 if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
7488 && !$is_sort_map_grep{$block_type} )
7491 $pos += rtoken_length($i_nonblank);
7493 if ( $i_nonblank > $i + 1 ) {
7495 # source whitespace could be anything, assume
7496 # at least one space before the hash on output
7497 if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
7500 else { $pos += rtoken_length( $i + 1 ) }
7503 if ( $pos >= maximum_line_length($i_start) ) {
7508 # ok, it's a one-line block
7509 create_one_line_block( $i_start, 20 );
7513 # just keep going for other characters
7518 # Allow certain types of new one-line blocks to form by joining
7519 # input lines. These can be safely done, but for other block types,
7520 # we keep old one-line blocks but do not form new ones. It is not
7521 # always a good idea to make as many one-line blocks as possible,
7522 # so other types are not done. The user can always use -mangle.
7523 if ( $is_sort_map_grep_eval{$block_type} ) {
7524 create_one_line_block( $i_start, 1 );
7529 sub unstore_token_to_go {
7531 # remove most recent token from output stream
7533 if ( $max_index_to_go > 0 ) {
7537 $max_index_to_go = UNDEFINED_INDEX;
7542 sub want_blank_line {
7545 $file_writer_object->want_blank_line();
7549 sub write_unindented_line {
7550 my ( $self, $line ) = @_;
7552 $file_writer_object->write_line($line);
7558 # Undo continuation indentation in certain sequences
7559 # For example, we can undo continuation indentation in sort/map/grep chains
7560 # my $dat1 = pack( "n*",
7561 # map { $_, $lookup->{$_} }
7562 # sort { $a <=> $b }
7563 # grep { $lookup->{$_} ne $default } keys %$lookup );
7564 # To align the map/sort/grep keywords like this:
7565 # my $dat1 = pack( "n*",
7566 # map { $_, $lookup->{$_} }
7567 # sort { $a <=> $b }
7568 # grep { $lookup->{$_} ne $default } keys %$lookup );
7569 my ( $ri_first, $ri_last ) = @_;
7570 my ( $line_1, $line_2, $lev_last );
7571 my $this_line_is_semicolon_terminated;
7572 my $max_line = @{$ri_first} - 1;
7574 # looking at each line of this batch..
7575 # We are looking at leading tokens and looking for a sequence
7576 # all at the same level and higher level than enclosing lines.
7577 foreach my $line ( 0 .. $max_line ) {
7579 my $ibeg = $ri_first->[$line];
7580 my $lev = $levels_to_go[$ibeg];
7583 # if we have started a chain..
7586 # see if it continues..
7587 if ( $lev == $lev_last ) {
7588 if ( $types_to_go[$ibeg] eq 'k'
7589 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
7592 # chain continues...
7593 # check for chain ending at end of a statement
7594 if ( $line == $max_line ) {
7596 # see of this line ends a statement
7597 my $iend = $ri_last->[$line];
7598 $this_line_is_semicolon_terminated =
7599 $types_to_go[$iend] eq ';'
7601 # with possible side comment
7602 || ( $types_to_go[$iend] eq '#'
7603 && $iend - $ibeg >= 2
7604 && $types_to_go[ $iend - 2 ] eq ';'
7605 && $types_to_go[ $iend - 1 ] eq 'b' );
7607 $line_2 = $line if ($this_line_is_semicolon_terminated);
7615 elsif ( $lev < $lev_last ) {
7617 # chain ends with previous line
7618 $line_2 = $line - 1;
7620 elsif ( $lev > $lev_last ) {
7626 # undo the continuation indentation if a chain ends
7627 if ( defined($line_2) && defined($line_1) ) {
7628 my $continuation_line_count = $line_2 - $line_1 + 1;
7629 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
7630 (0) x ($continuation_line_count)
7631 if ( $continuation_line_count >= 0 );
7632 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
7633 = @reduced_spaces_to_go[ @{$ri_first}
7634 [ $line_1 .. $line_2 ] ];
7639 # not in a chain yet..
7642 # look for start of a new sort/map/grep chain
7643 if ( $lev > $lev_last ) {
7644 if ( $types_to_go[$ibeg] eq 'k'
7645 && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
7659 # If there is a single, long parameter within parens, like this:
7661 # $self->command( "/msg "
7663 # . " You said $1, but did you know that it's square was "
7664 # . $1 * $1 . " ?" );
7666 # we can remove the continuation indentation of the 2nd and higher lines
7667 # to achieve this effect, which is more pleasing:
7669 # $self->command("/msg "
7671 # . " You said $1, but did you know that it's square was "
7672 # . $1 * $1 . " ?");
7674 my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
7675 my $max_line = @{$ri_first} - 1;
7677 # must be multiple lines
7678 return unless $max_line > $line_open;
7680 my $lev_start = $levels_to_go[$i_start];
7681 my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
7683 # see if all additional lines in this container have continuation
7686 my $line_1 = 1 + $line_open;
7687 for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
7688 my $ibeg = $ri_first->[$n];
7689 my $iend = $ri_last->[$n];
7690 if ( $ibeg eq $closing_index ) { $n--; last }
7691 return if ( $lev_start != $levels_to_go[$ibeg] );
7692 return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
7693 last if ( $closing_index <= $iend );
7696 # we can reduce the indentation of all continuation lines
7697 my $continuation_line_count = $n - $line_open;
7698 @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
7699 (0) x ($continuation_line_count);
7700 @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
7701 @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
7707 # insert $pad_spaces before token number $ipad
7708 my ( $ipad, $pad_spaces ) = @_;
7709 if ( $pad_spaces > 0 ) {
7710 $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
7712 elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
7713 $tokens_to_go[$ipad] = "";
7721 $token_lengths_to_go[$ipad] += $pad_spaces;
7722 foreach my $i ( $ipad .. $max_index_to_go ) {
7723 $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
7733 my @q = qw( + - * / );
7734 @is_math_op{@q} = (1) x scalar(@q);
7737 sub set_logical_padding {
7739 # Look at a batch of lines and see if extra padding can improve the
7740 # alignment when there are certain leading operators. Here is an
7741 # example, in which some extra space is introduced before
7742 # '( $year' to make it line up with the subsequent lines:
7744 # if ( ( $Year < 1601 )
7745 # || ( $Year > 2899 )
7746 # || ( $EndYear < 1601 )
7747 # || ( $EndYear > 2899 ) )
7749 # &Error_OutOfRange;
7752 my ( $ri_first, $ri_last ) = @_;
7753 my $max_line = @{$ri_first} - 1;
7755 # FIXME: move these declarations below
7756 my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
7757 $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
7759 # looking at each line of this batch..
7760 foreach my $line ( 0 .. $max_line - 1 ) {
7762 # see if the next line begins with a logical operator
7763 $ibeg = $ri_first->[$line];
7764 $iend = $ri_last->[$line];
7765 $ibeg_next = $ri_first->[ $line + 1 ];
7766 $tok_next = $tokens_to_go[$ibeg_next];
7767 $type_next = $types_to_go[$ibeg_next];
7769 $has_leading_op_next = ( $tok_next =~ /^\w/ )
7770 ? $is_chain_operator{$tok_next} # + - * / : ? && ||
7771 : $is_chain_operator{$type_next}; # and, or
7773 next unless ($has_leading_op_next);
7775 # next line must not be at lesser depth
7777 if ( $nesting_depth_to_go[$ibeg] >
7778 $nesting_depth_to_go[$ibeg_next] );
7780 # identify the token in this line to be padded on the left
7783 # handle lines at same depth...
7784 if ( $nesting_depth_to_go[$ibeg] ==
7785 $nesting_depth_to_go[$ibeg_next] )
7788 # if this is not first line of the batch ...
7791 # and we have leading operator..
7792 next if $has_leading_op;
7794 # Introduce padding if..
7795 # 1. the previous line is at lesser depth, or
7796 # 2. the previous line ends in an assignment
7797 # 3. the previous line ends in a 'return'
7798 # 4. the previous line ends in a comma
7799 # Example 1: previous line at lesser depth
7800 # if ( ( $Year < 1601 ) # <- we are here but
7801 # || ( $Year > 2899 ) # list has not yet
7802 # || ( $EndYear < 1601 ) # collapsed vertically
7803 # || ( $EndYear > 2899 ) )
7806 # Example 2: previous line ending in assignment:
7808 # $year % 4 ? 0 # <- We are here
7813 # Example 3: previous line ending in comma:
7820 # be sure levels agree (do not indent after an indented 'if')
7822 if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
7824 # allow padding on first line after a comma but only if:
7825 # (1) this is line 2 and
7826 # (2) there are at more than three lines and
7827 # (3) lines 3 and 4 have the same leading operator
7828 # These rules try to prevent padding within a long
7829 # comma-separated list.
7831 if ( $types_to_go[$iendm] eq ','
7835 my $ibeg_next_next = $ri_first->[ $line + 2 ];
7836 my $tok_next_next = $tokens_to_go[$ibeg_next_next];
7837 $ok_comma = $tok_next_next eq $tok_next;
7842 $is_assignment{ $types_to_go[$iendm] }
7844 || ( $nesting_depth_to_go[$ibegm] <
7845 $nesting_depth_to_go[$ibeg] )
7846 || ( $types_to_go[$iendm] eq 'k'
7847 && $tokens_to_go[$iendm] eq 'return' )
7850 # we will add padding before the first token
7854 # for first line of the batch..
7857 # WARNING: Never indent if first line is starting in a
7858 # continued quote, which would change the quote.
7859 next if $starting_in_quote;
7861 # if this is text after closing '}'
7862 # then look for an interior token to pad
7863 if ( $types_to_go[$ibeg] eq '}' ) {
7867 # otherwise, we might pad if it looks really good
7870 # we might pad token $ibeg, so be sure that it
7871 # is at the same depth as the next line.
7873 if ( $nesting_depth_to_go[$ibeg] !=
7874 $nesting_depth_to_go[$ibeg_next] );
7876 # We can pad on line 1 of a statement if at least 3
7877 # lines will be aligned. Otherwise, it
7878 # can look very confusing.
7880 # We have to be careful not to pad if there are too few
7881 # lines. The current rule is:
7882 # (1) in general we require at least 3 consecutive lines
7883 # with the same leading chain operator token,
7884 # (2) but an exception is that we only require two lines
7885 # with leading colons if there are no more lines. For example,
7886 # the first $i in the following snippet would get padding
7887 # by the second rule:
7889 # $i == 1 ? ( "First", "Color" )
7890 # : $i == 2 ? ( "Then", "Rarity" )
7891 # : ( "Then", "Name" );
7893 if ( $max_line > 1 ) {
7894 my $leading_token = $tokens_to_go[$ibeg_next];
7897 # never indent line 1 of a '.' series because
7898 # previous line is most likely at same level.
7899 # TODO: we should also look at the leasing_spaces
7900 # of the last output line and skip if it is same
7902 next if ( $leading_token eq '.' );
7905 foreach my $l ( 2 .. 3 ) {
7906 last if ( $line + $l > $max_line );
7907 my $ibeg_next_next = $ri_first->[ $line + $l ];
7908 if ( $tokens_to_go[$ibeg_next_next] ne
7916 next if ($tokens_differ);
7917 next if ( $count < 3 && $leading_token ne ':' );
7927 # find interior token to pad if necessary
7928 if ( !defined($ipad) ) {
7930 for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
7932 # find any unclosed container
7934 unless ( $type_sequence_to_go[$i]
7935 && $mate_index_to_go[$i] > $iend );
7937 # find next nonblank token to pad
7938 $ipad = $inext_to_go[$i];
7939 last if ( $ipad > $iend );
7944 # We cannot pad the first leading token of a file because
7945 # it could cause a bug in which the starting indentation
7946 # level is guessed incorrectly each time the code is run
7947 # though perltidy, thus causing the code to march off to
7948 # the right. For example, the following snippet would have
7951 ## ov_method mycan( $package, '(""' ), $package
7952 ## or ov_method mycan( $package, '(0+' ), $package
7953 ## or ov_method mycan( $package, '(bool' ), $package
7954 ## or ov_method mycan( $package, '(nomethod' ), $package;
7956 # If this snippet is within a block this won't happen
7957 # unless the user just processes the snippet alone within
7958 # an editor. In that case either the user will see and
7959 # fix the problem or it will be corrected next time the
7960 # entire file is processed with perltidy.
7961 ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
7962 next if ( $ipad == 0 && $peak_batch_size <= 1 );
7964 ## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
7965 ## IT DID MORE HARM THAN GOOD
7967 ## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
7970 ##? # do not put leading padding for just 2 lines of math
7971 ##? if ( $ipad == $ibeg
7973 ##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
7974 ##? && $is_math_op{$type_next}
7975 ##? && $line + 2 <= $max_line )
7977 ##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
7978 ##? my $type_next_next = $types_to_go[$ibeg_next_next];
7979 ##? next if !$is_math_op{$type_next_next};
7982 # next line must not be at greater depth
7983 my $iend_next = $ri_last->[ $line + 1 ];
7985 if ( $nesting_depth_to_go[ $iend_next + 1 ] >
7986 $nesting_depth_to_go[$ipad] );
7988 # lines must be somewhat similar to be padded..
7989 my $inext_next = $inext_to_go[$ibeg_next];
7990 my $type = $types_to_go[$ipad];
7991 my $type_next = $types_to_go[ $ipad + 1 ];
7993 # see if there are multiple continuation lines
7994 my $logical_continuation_lines = 1;
7995 if ( $line + 2 <= $max_line ) {
7996 my $leading_token = $tokens_to_go[$ibeg_next];
7997 my $ibeg_next_next = $ri_first->[ $line + 2 ];
7998 if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
7999 && $nesting_depth_to_go[$ibeg_next] eq
8000 $nesting_depth_to_go[$ibeg_next_next] )
8002 $logical_continuation_lines++;
8006 # see if leading types match
8007 my $types_match = $types_to_go[$inext_next] eq $type;
8008 my $matches_without_bang;
8010 # if first line has leading ! then compare the following token
8011 if ( !$types_match && $type eq '!' ) {
8012 $types_match = $matches_without_bang =
8013 $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
8018 # either we have multiple continuation lines to follow
8019 # and we are not padding the first token
8020 ( $logical_continuation_lines > 1 && $ipad > 0 )
8028 # and keywords must match if keyword
8031 && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
8037 #----------------------begin special checks--------------
8040 # A check is needed before we can make the pad.
8041 # If we are in a list with some long items, we want each
8042 # item to stand out. So in the following example, the
8043 # first line beginning with '$casefold->' would look good
8044 # padded to align with the next line, but then it
8045 # would be indented more than the last line, so we
8049 # $casefold->{code} eq '0041'
8050 # && $casefold->{status} eq 'C'
8051 # && $casefold->{mapping} eq '0061',
8056 # It would be faster, and almost as good, to use a comma
8057 # count, and not pad if comma_count > 1 and the previous
8058 # line did not end with a comma.
8062 my $ibg = $ri_first->[ $line + 1 ];
8063 my $depth = $nesting_depth_to_go[ $ibg + 1 ];
8065 # just use simplified formula for leading spaces to avoid
8066 # needless sub calls
8067 my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
8069 # look at each line beyond the next ..
8071 foreach my $ltest ( $line + 2 .. $max_line ) {
8073 my $ibg = $ri_first->[$l];
8075 # quit looking at the end of this container
8077 if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
8078 || ( $nesting_depth_to_go[$ibg] < $depth );
8080 # cannot do the pad if a later line would be
8082 if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
8088 # don't pad if we end in a broken list
8089 if ( $l == $max_line ) {
8090 my $i2 = $ri_last->[$l];
8091 if ( $types_to_go[$i2] eq '#' ) {
8092 my $i1 = $ri_first->[$l];
8095 terminal_type( \@types_to_go, \@block_type_to_go,
8102 # a minus may introduce a quoted variable, and we will
8103 # add the pad only if this line begins with a bare word,
8104 # such as for the word 'Button' here:
8106 # Button => "Print letter \"~$_\"",
8107 # -command => [ sub { print "$_[0]\n" }, $_ ],
8108 # -accelerator => "Meta+$_"
8111 # On the other hand, if 'Button' is quoted, it looks best
8114 # 'Button' => "Print letter \"~$_\"",
8115 # -command => [ sub { print "$_[0]\n" }, $_ ],
8116 # -accelerator => "Meta+$_"
8118 if ( $types_to_go[$ibeg_next] eq 'm' ) {
8119 $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
8122 next unless $ok_to_pad;
8124 #----------------------end special check---------------
8126 my $length_1 = total_line_length( $ibeg, $ipad - 1 );
8127 my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
8128 $pad_spaces = $length_2 - $length_1;
8130 # If the first line has a leading ! and the second does
8131 # not, then remove one space to try to align the next
8132 # leading characters, which are often the same. For example:
8134 # || $ts == $self->Holder
8135 # || $self->Holder->Type eq "Arena" )
8137 # This usually helps readability, but if there are subsequent
8138 # ! operators things will still get messed up. For example:
8140 # if ( !exists $Net::DNS::typesbyname{$qtype}
8141 # && exists $Net::DNS::classesbyname{$qtype}
8142 # && !exists $Net::DNS::classesbyname{$qclass}
8143 # && exists $Net::DNS::typesbyname{$qclass} )
8144 # We can't fix that.
8145 if ($matches_without_bang) { $pad_spaces-- }
8147 # make sure this won't change if -lp is used
8148 my $indentation_1 = $leading_spaces_to_go[$ibeg];
8149 if ( ref($indentation_1) ) {
8150 if ( $indentation_1->get_recoverable_spaces() == 0 ) {
8151 my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
8152 unless ( $indentation_2->get_recoverable_spaces() == 0 )
8159 # we might be able to handle a pad of -1 by removing a blank
8161 if ( $pad_spaces < 0 ) {
8163 if ( $pad_spaces == -1 ) {
8164 if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
8166 pad_token( $ipad - 1, $pad_spaces );
8172 # now apply any padding for alignment
8173 if ( $ipad >= 0 && $pad_spaces ) {
8175 my $length_t = total_line_length( $ibeg, $iend );
8176 if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
8178 pad_token( $ipad, $pad_spaces );
8186 $has_leading_op = $has_leading_op_next;
8187 } # end of loop over lines
8192 sub correct_lp_indentation {
8194 # When the -lp option is used, we need to make a last pass through
8195 # each line to correct the indentation positions in case they differ
8196 # from the predictions. This is necessary because perltidy uses a
8197 # predictor/corrector method for aligning with opening parens. The
8198 # predictor is usually good, but sometimes stumbles. The corrector
8199 # tries to patch things up once the actual opening paren locations
8201 my ( $ri_first, $ri_last ) = @_;
8204 # Note on flag '$do_not_pad':
8205 # We want to avoid a situation like this, where the aligner inserts
8206 # whitespace before the '=' to align it with a previous '=', because
8207 # otherwise the parens might become mis-aligned in a situation like
8208 # this, where the '=' has become aligned with the previous line,
8209 # pushing the opening '(' forward beyond where we want it.
8211 # $mkFloor::currentRoom = '';
8212 # $mkFloor::c_entry = $c->Entry(
8214 # -relief => 'sunken',
8218 # We leave it to the aligner to decide how to do this.
8220 # first remove continuation indentation if appropriate
8221 my $max_line = @{$ri_first} - 1;
8223 # looking at each line of this batch..
8224 my ( $ibeg, $iend );
8225 foreach my $line ( 0 .. $max_line ) {
8226 $ibeg = $ri_first->[$line];
8227 $iend = $ri_last->[$line];
8229 # looking at each token in this output line..
8230 foreach my $i ( $ibeg .. $iend ) {
8232 # How many space characters to place before this token
8233 # for special alignment. Actual padding is done in the
8236 # looking for next unvisited indentation item
8237 my $indentation = $leading_spaces_to_go[$i];
8238 if ( !$indentation->get_marked() ) {
8239 $indentation->set_marked(1);
8241 # looking for indentation item for which we are aligning
8242 # with parens, braces, and brackets
8243 next unless ( $indentation->get_align_paren() );
8245 # skip closed container on this line
8247 my $im = max( $ibeg, $iprev_to_go[$i] );
8248 if ( $type_sequence_to_go[$im]
8249 && $mate_index_to_go[$im] <= $iend )
8255 if ( $line == 1 && $i == $ibeg ) {
8259 # Ok, let's see what the error is and try to fix it
8261 my $predicted_pos = $indentation->get_spaces();
8264 # token is mid-line - use length to previous token
8265 $actual_pos = total_line_length( $ibeg, $i - 1 );
8267 # for mid-line token, we must check to see if all
8268 # additional lines have continuation indentation,
8269 # and remove it if so. Otherwise, we do not get
8271 my $closing_index = $indentation->get_closed();
8272 if ( $closing_index > $iend ) {
8273 my $ibeg_next = $ri_first->[ $line + 1 ];
8274 if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
8275 undo_lp_ci( $line, $i, $closing_index, $ri_first,
8280 elsif ( $line > 0 ) {
8282 # handle case where token starts a new line;
8283 # use length of previous line
8284 my $ibegm = $ri_first->[ $line - 1 ];
8285 my $iendm = $ri_last->[ $line - 1 ];
8286 $actual_pos = total_line_length( $ibegm, $iendm );
8290 if ( $types_to_go[ $iendm + 1 ] eq 'b' );
8294 # token is first character of first line of batch
8295 $actual_pos = $predicted_pos;
8298 my $move_right = $actual_pos - $predicted_pos;
8300 # done if no error to correct (gnu2.t)
8301 if ( $move_right == 0 ) {
8302 $indentation->set_recoverable_spaces($move_right);
8306 # if we have not seen closure for this indentation in
8307 # this batch, we can only pass on a request to the
8309 my $closing_index = $indentation->get_closed();
8311 if ( $closing_index < 0 ) {
8312 $indentation->set_recoverable_spaces($move_right);
8316 # If necessary, look ahead to see if there is really any
8317 # leading whitespace dependent on this whitespace, and
8318 # also find the longest line using this whitespace.
8319 # Since it is always safe to move left if there are no
8320 # dependents, we only need to do this if we may have
8321 # dependent nodes or need to move right.
8323 my $right_margin = 0;
8324 my $have_child = $indentation->get_have_child();
8326 my %saw_indentation;
8328 $saw_indentation{$indentation} = $indentation;
8330 if ( $have_child || $move_right > 0 ) {
8333 if ( $i == $ibeg ) {
8334 $max_length = total_line_length( $ibeg, $iend );
8337 # look ahead at the rest of the lines of this batch..
8338 foreach my $line_t ( $line + 1 .. $max_line ) {
8339 my $ibeg_t = $ri_first->[$line_t];
8340 my $iend_t = $ri_last->[$line_t];
8341 last if ( $closing_index <= $ibeg_t );
8343 # remember all different indentation objects
8344 my $indentation_t = $leading_spaces_to_go[$ibeg_t];
8345 $saw_indentation{$indentation_t} = $indentation_t;
8348 # remember longest line in the group
8349 my $length_t = total_line_length( $ibeg_t, $iend_t );
8350 if ( $length_t > $max_length ) {
8351 $max_length = $length_t;
8354 $right_margin = maximum_line_length($ibeg) - $max_length;
8355 if ( $right_margin < 0 ) { $right_margin = 0 }
8358 my $first_line_comma_count =
8359 grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
8360 my $comma_count = $indentation->get_comma_count();
8361 my $arrow_count = $indentation->get_arrow_count();
8363 # This is a simple approximate test for vertical alignment:
8364 # if we broke just after an opening paren, brace, bracket,
8365 # and there are 2 or more commas in the first line,
8366 # and there are no '=>'s,
8367 # then we are probably vertically aligned. We could set
8368 # an exact flag in sub scan_list, but this is good
8370 my $indentation_count = keys %saw_indentation;
8371 my $is_vertically_aligned =
8373 && $first_line_comma_count > 1
8374 && $indentation_count == 1
8375 && ( $arrow_count == 0 || $arrow_count == $line_count ) );
8377 # Make the move if possible ..
8380 # we can always move left
8383 # but we should only move right if we are sure it will
8384 # not spoil vertical alignment
8385 || ( $comma_count == 0 )
8386 || ( $comma_count > 0 && !$is_vertically_aligned )
8390 ( $move_right <= $right_margin )
8394 foreach ( keys %saw_indentation ) {
8395 $saw_indentation{$_}
8396 ->permanently_decrease_available_spaces( -$move );
8400 # Otherwise, record what we want and the vertical aligner
8401 # will try to recover it.
8403 $indentation->set_recoverable_spaces($move_right);
8411 # flush is called to output any tokens in the pipeline, so that
8412 # an alternate source of lines can be written in the correct order
8416 destroy_one_line_block();
8417 $self->output_line_to_go();
8418 Perl::Tidy::VerticalAligner::flush();
8422 sub reset_block_text_accumulator {
8424 # save text after 'if' and 'elsif' to append after 'else'
8425 if ($accumulating_text_for_block) {
8427 if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
8428 push @{$rleading_block_if_elsif_text}, $leading_block_text;
8431 $accumulating_text_for_block = "";
8432 $leading_block_text = "";
8433 $leading_block_text_level = 0;
8434 $leading_block_text_length_exceeded = 0;
8435 $leading_block_text_line_number = 0;
8436 $leading_block_text_line_length = 0;
8440 sub set_block_text_accumulator {
8442 $accumulating_text_for_block = $tokens_to_go[$i];
8443 if ( $accumulating_text_for_block !~ /^els/ ) {
8444 $rleading_block_if_elsif_text = [];
8446 $leading_block_text = "";
8447 $leading_block_text_level = $levels_to_go[$i];
8448 $leading_block_text_line_number = get_output_line_number();
8449 ##$vertical_aligner_object->get_output_line_number();
8450 $leading_block_text_length_exceeded = 0;
8452 # this will contain the column number of the last character
8453 # of the closing side comment
8454 $leading_block_text_line_length =
8455 length($csc_last_label) +
8456 length($accumulating_text_for_block) +
8457 length( $rOpts->{'closing-side-comment-prefix'} ) +
8458 $leading_block_text_level * $rOpts_indent_columns + 3;
8462 sub accumulate_block_text {
8465 # accumulate leading text for -csc, ignoring any side comments
8466 if ( $accumulating_text_for_block
8467 && !$leading_block_text_length_exceeded
8468 && $types_to_go[$i] ne '#' )
8471 my $added_length = $token_lengths_to_go[$i];
8472 $added_length += 1 if $i == 0;
8473 my $new_line_length = $leading_block_text_line_length + $added_length;
8475 # we can add this text if we don't exceed some limits..
8478 # we must not have already exceeded the text length limit
8479 length($leading_block_text) <
8480 $rOpts_closing_side_comment_maximum_text
8483 # the new total line length must be below the line length limit
8484 # or the new length must be below the text length limit
8485 # (ie, we may allow one token to exceed the text length limit)
8488 maximum_line_length_for_level($leading_block_text_level)
8490 || length($leading_block_text) + $added_length <
8491 $rOpts_closing_side_comment_maximum_text
8494 # UNLESS: we are adding a closing paren before the brace we seek.
8495 # This is an attempt to avoid situations where the ... to be
8496 # added are longer than the omitted right paren, as in:
8498 # foreach my $item (@a_rather_long_variable_name_here) {
8500 # } ## end foreach my $item (@a_rather_long_variable_name_here...
8503 $tokens_to_go[$i] eq ')'
8506 $i + 1 <= $max_index_to_go
8507 && $block_type_to_go[ $i + 1 ] eq
8508 $accumulating_text_for_block
8510 || ( $i + 2 <= $max_index_to_go
8511 && $block_type_to_go[ $i + 2 ] eq
8512 $accumulating_text_for_block )
8518 # add an extra space at each newline
8519 if ( $i == 0 ) { $leading_block_text .= ' ' }
8521 # add the token text
8522 $leading_block_text .= $tokens_to_go[$i];
8523 $leading_block_text_line_length = $new_line_length;
8526 # show that text was truncated if necessary
8527 elsif ( $types_to_go[$i] ne 'b' ) {
8528 $leading_block_text_length_exceeded = 1;
8529 $leading_block_text .= '...';
8536 my %is_if_elsif_else_unless_while_until_for_foreach;
8540 # These block types may have text between the keyword and opening
8541 # curly. Note: 'else' does not, but must be included to allow trailing
8542 # if/elsif text to be appended.
8543 # patch for SWITCH/CASE: added 'case' and 'when'
8545 qw(if elsif else unless while until for foreach case when catch);
8546 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
8550 sub accumulate_csc_text {
8552 # called once per output buffer when -csc is used. Accumulates
8553 # the text placed after certain closing block braces.
8554 # Defines and returns the following for this buffer:
8556 my $block_leading_text = ""; # the leading text of the last '}'
8557 my $rblock_leading_if_elsif_text;
8558 my $i_block_leading_text =
8559 -1; # index of token owning block_leading_text
8560 my $block_line_count = 100; # how many lines the block spans
8561 my $terminal_type = 'b'; # type of last nonblank token
8562 my $i_terminal = 0; # index of last nonblank token
8563 my $terminal_block_type = "";
8565 # update most recent statement label
8566 $csc_last_label = "" unless ($csc_last_label);
8567 if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
8568 my $block_label = $csc_last_label;
8570 # Loop over all tokens of this batch
8571 for my $i ( 0 .. $max_index_to_go ) {
8572 my $type = $types_to_go[$i];
8573 my $block_type = $block_type_to_go[$i];
8574 my $token = $tokens_to_go[$i];
8576 # remember last nonblank token type
8577 if ( $type ne '#' && $type ne 'b' ) {
8578 $terminal_type = $type;
8579 $terminal_block_type = $block_type;
8583 my $type_sequence = $type_sequence_to_go[$i];
8584 if ( $block_type && $type_sequence ) {
8586 if ( $token eq '}' ) {
8588 # restore any leading text saved when we entered this block
8589 if ( defined( $block_leading_text{$type_sequence} ) ) {
8590 ( $block_leading_text, $rblock_leading_if_elsif_text )
8591 = @{ $block_leading_text{$type_sequence} };
8592 $i_block_leading_text = $i;
8593 delete $block_leading_text{$type_sequence};
8594 $rleading_block_if_elsif_text =
8595 $rblock_leading_if_elsif_text;
8598 if ( defined( $csc_block_label{$type_sequence} ) ) {
8599 $block_label = $csc_block_label{$type_sequence};
8600 delete $csc_block_label{$type_sequence};
8603 # if we run into a '}' then we probably started accumulating
8604 # at something like a trailing 'if' clause..no harm done.
8605 if ( $accumulating_text_for_block
8606 && $levels_to_go[$i] <= $leading_block_text_level )
8608 my $lev = $levels_to_go[$i];
8609 reset_block_text_accumulator();
8612 if ( defined( $block_opening_line_number{$type_sequence} ) )
8614 my $output_line_number = get_output_line_number();
8615 ##$vertical_aligner_object->get_output_line_number();
8617 $output_line_number -
8618 $block_opening_line_number{$type_sequence} + 1;
8619 delete $block_opening_line_number{$type_sequence};
8623 # Error: block opening line undefined for this line..
8624 # This shouldn't be possible, but it is not a
8625 # significant problem.
8629 elsif ( $token eq '{' ) {
8631 my $line_number = get_output_line_number();
8632 ##$vertical_aligner_object->get_output_line_number();
8633 $block_opening_line_number{$type_sequence} = $line_number;
8635 # set a label for this block, except for
8636 # a bare block which already has the label
8637 # A label can only be used on the next {
8638 if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
8639 $csc_block_label{$type_sequence} = $csc_last_label;
8640 $csc_last_label = "";
8642 if ( $accumulating_text_for_block
8643 && $levels_to_go[$i] == $leading_block_text_level )
8646 if ( $accumulating_text_for_block eq $block_type ) {
8648 # save any leading text before we enter this block
8649 $block_leading_text{$type_sequence} = [
8650 $leading_block_text,
8651 $rleading_block_if_elsif_text
8653 $block_opening_line_number{$type_sequence} =
8654 $leading_block_text_line_number;
8655 reset_block_text_accumulator();
8659 # shouldn't happen, but not a serious error.
8660 # We were accumulating -csc text for block type
8661 # $accumulating_text_for_block and unexpectedly
8662 # encountered a '{' for block type $block_type.
8669 && $csc_new_statement_ok
8670 && $is_if_elsif_else_unless_while_until_for_foreach{$token}
8671 && $token =~ /$closing_side_comment_list_pattern/o )
8673 set_block_text_accumulator($i);
8677 # note: ignoring type 'q' because of tricks being played
8678 # with 'q' for hanging side comments
8679 if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
8680 $csc_new_statement_ok =
8681 ( $block_type || $type eq 'J' || $type eq ';' );
8684 && $accumulating_text_for_block
8685 && $levels_to_go[$i] == $leading_block_text_level )
8687 reset_block_text_accumulator();
8690 accumulate_block_text($i);
8695 # Treat an 'else' block specially by adding preceding 'if' and
8696 # 'elsif' text. Otherwise, the 'end else' is not helpful,
8697 # especially for cuddled-else formatting.
8698 if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
8699 $block_leading_text =
8700 make_else_csc_text( $i_terminal, $terminal_block_type,
8701 $block_leading_text, $rblock_leading_if_elsif_text );
8704 # if this line ends in a label then remember it for the next pass
8705 $csc_last_label = "";
8706 if ( $terminal_type eq 'J' ) {
8707 $csc_last_label = $tokens_to_go[$i_terminal];
8710 return ( $terminal_type, $i_terminal, $i_block_leading_text,
8711 $block_leading_text, $block_line_count, $block_label );
8715 sub make_else_csc_text {
8717 # create additional -csc text for an 'else' and optionally 'elsif',
8718 # depending on the value of switch
8719 # $rOpts_closing_side_comment_else_flag:
8721 # = 0 add 'if' text to trailing else
8722 # = 1 same as 0 plus:
8723 # add 'if' to 'elsif's if can fit in line length
8724 # add last 'elsif' to trailing else if can fit in one line
8725 # = 2 same as 1 but do not check if exceed line length
8727 # $rif_elsif_text = a reference to a list of all previous closing
8728 # side comments created for this if block
8730 my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
8731 my $csc_text = $block_leading_text;
8733 if ( $block_type eq 'elsif'
8734 && $rOpts_closing_side_comment_else_flag == 0 )
8739 my $count = @{$rif_elsif_text};
8740 return $csc_text unless ($count);
8742 my $if_text = '[ if' . $rif_elsif_text->[0];
8744 # always show the leading 'if' text on 'else'
8745 if ( $block_type eq 'else' ) {
8746 $csc_text .= $if_text;
8750 if ( $rOpts_closing_side_comment_else_flag == 0 ) {
8754 my $last_elsif_text = "";
8756 $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
8757 if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
8760 # tentatively append one more item
8761 my $saved_text = $csc_text;
8762 if ( $block_type eq 'else' ) {
8763 $csc_text .= $last_elsif_text;
8766 $csc_text .= ' ' . $if_text;
8769 # all done if no length checks requested
8770 if ( $rOpts_closing_side_comment_else_flag == 2 ) {
8774 # undo it if line length exceeded
8777 length($block_type) +
8778 length( $rOpts->{'closing-side-comment-prefix'} ) +
8779 $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
8780 if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
8781 $csc_text = $saved_text;
8786 { # sub balance_csc_text
8801 sub balance_csc_text {
8803 # Append characters to balance a closing side comment so that editors
8804 # such as vim can correctly jump through code.
8806 # input = ## end foreach my $foo ( sort { $b ...
8807 # output = ## end foreach my $foo ( sort { $b ...})
8809 # NOTE: This routine does not currently filter out structures within
8810 # quoted text because the bounce algorithms in text editors do not
8811 # necessarily do this either (a version of vim was checked and
8814 # Some complex examples which will cause trouble for some editors:
8815 # while ( $mask_string =~ /\{[^{]*?\}/g ) {
8816 # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
8817 # if ( $1 eq '{' ) {
8818 # test file test1/braces.pl has many such examples.
8822 # loop to examine characters one-by-one, RIGHT to LEFT and
8823 # build a balancing ending, LEFT to RIGHT.
8824 for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
8826 my $char = substr( $csc, $pos, 1 );
8828 # ignore everything except structural characters
8829 next unless ( $matching_char{$char} );
8831 # pop most recently appended character
8832 my $top = chop($csc);
8834 # push it back plus the mate to the newest character
8835 # unless they balance each other.
8836 $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
8839 # return the balanced string
8844 sub add_closing_side_comment {
8848 # add closing side comments after closing block braces if -csc used
8849 my $cscw_block_comment;
8851 #---------------------------------------------------------------
8852 # Step 1: loop through all tokens of this line to accumulate
8853 # the text needed to create the closing side comments. Also see
8854 # how the line ends.
8855 #---------------------------------------------------------------
8857 my ( $terminal_type, $i_terminal, $i_block_leading_text,
8858 $block_leading_text, $block_line_count, $block_label )
8859 = accumulate_csc_text();
8861 #---------------------------------------------------------------
8862 # Step 2: make the closing side comment if this ends a block
8863 #---------------------------------------------------------------
8864 ##my $have_side_comment = $i_terminal != $max_index_to_go;
8865 my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
8867 # if this line might end in a block closure..
8869 $terminal_type eq '}'
8874 # the block is long enough
8875 ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
8877 # or there is an existing comment to check
8878 || ( $have_side_comment
8879 && $rOpts->{'closing-side-comment-warnings'} )
8882 # .. and if this is one of the types of interest
8883 && $block_type_to_go[$i_terminal] =~
8884 /$closing_side_comment_list_pattern/o
8886 # .. but not an anonymous sub
8887 # These are not normally of interest, and their closing braces are
8888 # often followed by commas or semicolons anyway. This also avoids
8889 # possible erratic output due to line numbering inconsistencies
8890 # in the cases where their closing braces terminate a line.
8891 && $block_type_to_go[$i_terminal] ne 'sub'
8893 # ..and the corresponding opening brace must is not in this batch
8894 # (because we do not need to tag one-line blocks, although this
8895 # should also be caught with a positive -csci value)
8896 && $mate_index_to_go[$i_terminal] < 0
8901 # this is the last token (line doesn't have a side comment)
8904 # or the old side comment is a closing side comment
8905 || $tokens_to_go[$max_index_to_go] =~
8906 /$closing_side_comment_prefix_pattern/o
8911 # then make the closing side comment text
8912 if ($block_label) { $block_label .= " " }
8914 "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
8916 # append any extra descriptive text collected above
8917 if ( $i_block_leading_text == $i_terminal ) {
8918 $token .= $block_leading_text;
8921 $token = balance_csc_text($token)
8922 if $rOpts->{'closing-side-comments-balanced'};
8924 $token =~ s/\s*$//; # trim any trailing whitespace
8926 # handle case of existing closing side comment
8927 if ($have_side_comment) {
8929 # warn if requested and tokens differ significantly
8930 if ( $rOpts->{'closing-side-comment-warnings'} ) {
8931 my $old_csc = $tokens_to_go[$max_index_to_go];
8932 my $new_csc = $token;
8933 $new_csc =~ s/\s+//g; # trim all whitespace
8934 $old_csc =~ s/\s+//g; # trim all whitespace
8935 $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
8936 $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
8937 $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
8938 my $new_trailing_dots = $1;
8939 $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
8941 # Patch to handle multiple closing side comments at
8942 # else and elsif's. These have become too complicated
8943 # to check, so if we see an indication of
8944 # '[ if' or '[ # elsif', then assume they were made
8946 if ( $block_type_to_go[$i_terminal] eq 'else' ) {
8947 if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
8949 elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
8950 if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
8953 # if old comment is contained in new comment,
8954 # only compare the common part.
8955 if ( length($new_csc) > length($old_csc) ) {
8956 $new_csc = substr( $new_csc, 0, length($old_csc) );
8959 # if the new comment is shorter and has been limited,
8960 # only compare the common part.
8961 if ( length($new_csc) < length($old_csc)
8962 && $new_trailing_dots )
8964 $old_csc = substr( $old_csc, 0, length($new_csc) );
8967 # any remaining difference?
8968 if ( $new_csc ne $old_csc ) {
8970 # just leave the old comment if we are below the threshold
8971 # for creating side comments
8972 if ( $block_line_count <
8973 $rOpts->{'closing-side-comment-interval'} )
8978 # otherwise we'll make a note of it
8982 "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
8985 # save the old side comment in a new trailing block
8988 if ( $rOpts->{'timestamp'} ) {
8989 my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
8992 $timestamp = "$year-$month-$day";
8994 $cscw_block_comment =
8995 "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
8996 ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
9001 # No differences.. we can safely delete old comment if we
9002 # are below the threshold
9003 if ( $block_line_count <
9004 $rOpts->{'closing-side-comment-interval'} )
9007 $self->unstore_token_to_go()
9008 if ( $types_to_go[$max_index_to_go] eq '#' );
9009 $self->unstore_token_to_go()
9010 if ( $types_to_go[$max_index_to_go] eq 'b' );
9015 # switch to the new csc (unless we deleted it!)
9016 $tokens_to_go[$max_index_to_go] = $token if $token;
9019 # handle case of NO existing closing side comment
9022 # Remove any existing blank and add another below.
9023 # This is a tricky point. A side comment needs to have the same level
9024 # as the preceding closing brace or else the line will not get the right
9025 # indentation. So even if we have a blank, we are going to replace it.
9026 if ( $types_to_go[$max_index_to_go] eq 'b' ) {
9027 unstore_token_to_go();
9030 # insert the new side comment into the output token stream
9032 my $block_type = '';
9033 my $type_sequence = '';
9034 my $container_environment =
9035 $container_environment_to_go[$max_index_to_go];
9036 my $level = $levels_to_go[$max_index_to_go];
9037 my $slevel = $nesting_depth_to_go[$max_index_to_go];
9038 my $no_internal_newlines = 0;
9040 my $ci_level = $ci_levels_to_go[$max_index_to_go];
9041 my $in_continued_quote = 0;
9043 # insert a blank token
9044 $self->insert_new_token_to_go( ' ', 'b', $slevel,
9045 $no_internal_newlines );
9047 # then the side comment
9048 $self->insert_new_token_to_go( $token, $type, $slevel,
9049 $no_internal_newlines );
9052 return $cscw_block_comment;
9055 sub previous_nonblank_token {
9059 return "" if ( $im < 0 );
9060 if ( $types_to_go[$im] eq 'b' ) { $im--; }
9061 return "" if ( $im < 0 );
9062 $name = $tokens_to_go[$im];
9064 # prepend any sub name to an isolated -> to avoid unwanted alignments
9065 # [test case is test8/penco.pl]
9066 if ( $name eq '->' ) {
9068 if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
9069 $name = $tokens_to_go[$im] . $name;
9075 sub send_lines_to_vertical_aligner {
9077 my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
9079 my $valign_batch_number = $self->increment_valign_batch_count();
9081 my $cscw_block_comment;
9082 if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ) {
9083 $cscw_block_comment = $self->add_closing_side_comment();
9085 # Add or update any closing side comment
9086 if ( $types_to_go[$max_index_to_go] eq '#' ) {
9087 $ri_last->[-1] = $max_index_to_go;
9091 my $rindentation_list = [0]; # ref to indentations for each line
9093 # define the array @matching_token_to_go for the output tokens
9094 # which will be non-blank for each special token (such as =>)
9095 # for which alignment is required.
9096 set_vertical_alignment_markers( $ri_first, $ri_last );
9098 # flush if necessary to avoid unwanted alignment
9100 if ( @{$ri_first} > 1 ) {
9102 # flush before a long if statement
9103 if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
9108 Perl::Tidy::VerticalAligner::flush();
9111 undo_ci( $ri_first, $ri_last );
9113 set_logical_padding( $ri_first, $ri_last );
9115 # loop to prepare each line for shipment
9116 my $n_last_line = @{$ri_first} - 1;
9118 for my $n ( 0 .. $n_last_line ) {
9119 my $ibeg = $ri_first->[$n];
9120 my $iend = $ri_last->[$n];
9122 my ( $rtokens, $rfields, $rpatterns ) =
9123 make_alignment_patterns( $ibeg, $iend );
9125 # Set flag to show how much level changes between this line
9126 # and the next line, if we have it.
9128 if ( $n < $n_last_line ) {
9129 my $ibegp = $ri_first->[ $n + 1 ];
9130 $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
9133 my ( $indentation, $lev, $level_end, $terminal_type,
9134 $is_semicolon_terminated, $is_outdented_line )
9135 = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
9136 $ri_first, $ri_last, $rindentation_list, $ljump );
9138 # we will allow outdenting of long lines..
9139 my $outdent_long_lines = (
9141 # which are long quotes, if allowed
9142 ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
9144 # which are long block comments, if allowed
9146 $types_to_go[$ibeg] eq '#'
9147 && $rOpts->{'outdent-long-comments'}
9149 # but not if this is a static block comment
9150 && !$is_static_block_comment
9155 $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
9157 my $rvertical_tightness_flags =
9158 set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
9159 $ri_first, $ri_last );
9161 # flush an outdented line to avoid any unwanted vertical alignment
9162 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
9164 # Set a flag at the final ':' of a ternary chain to request
9165 # vertical alignment of the final term. Here is a
9166 # slightly complex example:
9168 # $self->{_text} = (
9170 # : $type eq 'item' ? "the $section entry"
9171 # : "the section on $section"
9175 # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
9176 # : ' elsewhere in this document'
9179 my $is_terminal_ternary = 0;
9180 if ( $tokens_to_go[$ibeg] eq ':'
9181 || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
9183 my $last_leading_type = ":";
9185 my $iprev = $ri_first->[ $n - 1 ];
9186 $last_leading_type = $types_to_go[$iprev];
9188 if ( $terminal_type ne ';'
9189 && $n_last_line > $n
9190 && $level_end == $lev )
9192 my $inext = $ri_first->[ $n + 1 ];
9193 $level_end = $levels_to_go[$inext];
9194 $terminal_type = $types_to_go[$inext];
9197 $is_terminal_ternary = $last_leading_type eq ':'
9198 && ( ( $terminal_type eq ';' && $level_end <= $lev )
9199 || ( $terminal_type ne ':' && $level_end < $lev ) )
9201 # the terminal term must not contain any ternary terms, as in
9203 # $Is_MSWin32 ? ".\\echo$$"
9204 # : $Is_MacOS ? ":echo$$"
9205 # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
9207 && !grep { /^[\?\:]$/ } @types_to_go[ $ibeg + 1 .. $iend ];
9210 # send this new line down the pipe
9211 my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
9213 my $rvalign_hash = {};
9214 $rvalign_hash->{level} = $lev;
9215 $rvalign_hash->{level_end} = $level_end;
9216 $rvalign_hash->{indentation} = $indentation;
9217 $rvalign_hash->{is_forced_break} =
9218 $forced_breakpoint_to_go[$iend] || $in_comma_list;
9219 $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
9220 $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
9221 $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
9222 $rvalign_hash->{do_not_pad} = $do_not_pad;
9223 $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
9224 $rvalign_hash->{level_jump} = $level_jump;
9226 $rvalign_hash->{valign_batch_number} = $valign_batch_number;
9228 Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
9229 $rtokens, $rpatterns );
9232 $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
9234 # flush an outdented line to avoid any unwanted vertical alignment
9235 Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
9239 # Set flag indicating if this line ends in an opening
9240 # token and is very short, so that a blank line is not
9241 # needed if the subsequent line is a comment.
9242 # Examples of what we are looking for:
9248 $last_output_short_opening_token
9250 # line ends in opening token
9251 = $types_to_go[$iend] =~ /^[\{\(\[L]$/
9255 # line has either single opening token
9258 # or is a single token followed by opening token.
9259 # Note that sub identifiers have blanks like 'sub doit'
9260 || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
9263 # and limit total to 10 character widths
9264 && token_sequence_length( $ibeg, $iend ) <= 10;
9266 } # end of loop to output each line
9268 # remember indentation of lines containing opening containers for
9269 # later use by sub set_adjusted_indentation
9270 save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
9272 # output any new -cscw block comment
9273 if ($cscw_block_comment) {
9274 Perl::Tidy::VerticalAligner::flush();
9275 $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9280 { # begin make_alignment_patterns
9287 # map related block names into a common name to
9300 # map certain keywords to the same 'if' class to align
9301 # long if/elsif sequences. [elsif.pl]
9307 'default' => 'given',
9310 # treat an 'undef' similar to numbers and quotes
9315 sub make_alignment_patterns {
9317 # Here we do some important preliminary work for the
9318 # vertical aligner. We create three arrays for one
9319 # output line. These arrays contain strings that can
9320 # be tested by the vertical aligner to see if
9321 # consecutive lines can be aligned vertically.
9323 # The three arrays are indexed on the vertical
9324 # alignment fields and are:
9325 # @tokens - a list of any vertical alignment tokens for this line.
9326 # These are tokens, such as '=' '&&' '#' etc which
9327 # we want to might align vertically. These are
9328 # decorated with various information such as
9329 # nesting depth to prevent unwanted vertical
9330 # alignment matches.
9331 # @fields - the actual text of the line between the vertical alignment
9333 # @patterns - a modified list of token types, one for each alignment
9334 # field. These should normally each match before alignment is
9335 # allowed, even when the alignment tokens match.
9336 my ( $ibeg, $iend ) = @_;
9340 my $i_start = $ibeg;
9343 my @container_name = ("");
9344 my @multiple_comma_arrows = (undef);
9346 my $j = 0; # field index
9349 for my $i ( $ibeg .. $iend ) {
9351 # Keep track of containers balanced on this line only.
9352 # These are used below to prevent unwanted cross-line alignments.
9353 # Unbalanced containers already avoid aligning across
9354 # container boundaries.
9355 if ( $tokens_to_go[$i] eq '(' ) {
9357 # if container is balanced on this line...
9358 my $i_mate = $mate_index_to_go[$i];
9359 if ( $i_mate > $i && $i_mate <= $iend ) {
9361 my $seqno = $type_sequence_to_go[$i];
9362 my $count = comma_arrow_count($seqno);
9363 $multiple_comma_arrows[$depth] = $count && $count > 1;
9365 # Append the previous token name to make the container name
9366 # more unique. This name will also be given to any commas
9367 # within this container, and it helps avoid undesirable
9368 # alignments of different types of containers.
9369 my $name = previous_nonblank_token($i);
9371 $container_name[$depth] = "+" . $name;
9373 # Make the container name even more unique if necessary.
9374 # If we are not vertically aligning this opening paren,
9375 # append a character count to avoid bad alignment because
9376 # it usually looks bad to align commas within containers
9377 # for which the opening parens do not align. Here
9378 # is an example very BAD alignment of commas (because
9379 # the atan2 functions are not all aligned):
9381 # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
9382 # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
9383 # $X * atan2( $X, 1 ) -
9384 # $Y * atan2( $Y, 1 );
9386 # On the other hand, it is usually okay to align commas if
9387 # opening parens align, such as:
9388 # glVertex3d( $cx + $s * $xs, $cy, $z );
9389 # glVertex3d( $cx, $cy + $s * $ys, $z );
9390 # glVertex3d( $cx - $s * $xs, $cy, $z );
9391 # glVertex3d( $cx, $cy - $s * $ys, $z );
9393 # To distinguish between these situations, we will
9394 # append the length of the line from the previous matching
9395 # token, or beginning of line, to the function name. This
9396 # will allow the vertical aligner to reject undesirable
9399 # if we are not aligning on this paren...
9400 if ( $matching_token_to_go[$i] eq '' ) {
9402 # Sum length from previous alignment, or start of line.
9404 ( $i_start == $ibeg )
9405 ? total_line_length( $i_start, $i - 1 )
9406 : token_sequence_length( $i_start, $i - 1 );
9408 # tack length onto the container name to make unique
9409 $container_name[$depth] .= "-" . $len;
9413 elsif ( $tokens_to_go[$i] eq ')' ) {
9414 $depth-- if $depth > 0;
9417 # if we find a new synchronization token, we are done with
9419 if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
9421 my $tok = my $raw_tok = $matching_token_to_go[$i];
9424 if ( $tok eq '!~' ) { $tok = '=~' }
9426 # make separators in different nesting depths unique
9427 # by appending the nesting depth digit.
9428 if ( $raw_tok ne '#' ) {
9429 $tok .= "$nesting_depth_to_go[$i]";
9432 # also decorate commas with any container name to avoid
9433 # unwanted cross-line alignments.
9434 if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
9435 if ( $container_name[$depth] ) {
9436 $tok .= $container_name[$depth];
9440 # Patch to avoid aligning leading and trailing if, unless.
9441 # Mark trailing if, unless statements with container names.
9442 # This makes them different from leading if, unless which
9443 # are not so marked at present. If we ever need to name
9444 # them too, we could use ci to distinguish them.
9445 # Example problem to avoid:
9446 # return ( 2, "DBERROR" )
9447 # if ( $retval == 2 );
9448 # if ( scalar @_ ) {
9449 # my ( $a, $b, $c, $d, $e, $f ) = @_;
9451 if ( $raw_tok eq '(' ) {
9452 my $ci = $ci_levels_to_go[$ibeg];
9453 if ( $container_name[$depth] =~ /^\+(if|unless)/
9456 $tok .= $container_name[$depth];
9460 # Decorate block braces with block types to avoid
9461 # unwanted alignments such as the following:
9462 # foreach ( @{$routput_array} ) { $fh->print($_) }
9463 # eval { $fh->close() };
9464 if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
9465 my $block_type = $block_type_to_go[$i];
9467 # map certain related block types to allow
9468 # else blocks to align
9469 $block_type = $block_type_map{$block_type}
9470 if ( defined( $block_type_map{$block_type} ) );
9472 # remove sub names to allow one-line sub braces to align
9473 # regardless of name
9474 #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
9475 if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
9477 # allow all control-type blocks to align
9478 if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
9480 $tok .= $block_type;
9483 # concatenate the text of the consecutive tokens to form
9486 join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
9488 # store the alignment token for this field
9489 push( @tokens, $tok );
9491 # get ready for the next batch
9497 # continue accumulating tokens
9498 # handle non-keywords..
9499 if ( $types_to_go[$i] ne 'k' ) {
9500 my $type = $types_to_go[$i];
9502 # Mark most things before arrows as a quote to
9503 # get them to line up. Testfile: mixed.pl.
9504 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
9505 my $next_type = $types_to_go[ $i + 1 ];
9506 my $i_next_nonblank =
9507 ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
9509 if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
9512 # Patch to ignore leading minus before words,
9513 # by changing pattern 'mQ' into just 'Q',
9514 # so that we can align things like this:
9515 # Button => "Print letter \"~$_\"",
9516 # -command => [ sub { print "$_[0]\n" }, $_ ],
9517 if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
9521 # Convert a bareword within braces into a quote for matching. This will
9522 # allow alignment of expressions like this:
9523 # local ( $SIG{'INT'} ) = IGNORE;
9524 # local ( $SIG{ALRM} ) = 'POSTMAN';
9528 && $types_to_go[ $i - 1 ] eq 'L'
9529 && $types_to_go[ $i + 1 ] eq 'R' )
9534 # patch to make numbers and quotes align
9535 if ( $type eq 'n' ) { $type = 'Q' }
9537 # patch to ignore any ! in patterns
9538 if ( $type eq '!' ) { $type = '' }
9540 $patterns[$j] .= $type;
9543 # for keywords we have to use the actual text
9546 my $tok = $tokens_to_go[$i];
9548 # but map certain keywords to a common string to allow
9550 $tok = $keyword_map{$tok}
9551 if ( defined( $keyword_map{$tok} ) );
9552 $patterns[$j] .= $tok;
9556 # done with this line .. join text of tokens to make the last field
9557 push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
9558 return ( \@tokens, \@fields, \@patterns );
9561 } # end make_alignment_patterns
9563 { # begin unmatched_indexes
9565 # closure to keep track of unbalanced containers.
9566 # arrays shared by the routines in this block:
9567 my @unmatched_opening_indexes_in_this_batch;
9568 my @unmatched_closing_indexes_in_this_batch;
9569 my %comma_arrow_count;
9571 sub is_unbalanced_batch {
9572 return @unmatched_opening_indexes_in_this_batch +
9573 @unmatched_closing_indexes_in_this_batch;
9576 sub comma_arrow_count {
9578 return $comma_arrow_count{$seqno};
9581 sub match_opening_and_closing_tokens {
9583 # Match up indexes of opening and closing braces, etc, in this batch.
9584 # This has to be done after all tokens are stored because unstoring
9585 # of tokens would otherwise cause trouble.
9587 @unmatched_opening_indexes_in_this_batch = ();
9588 @unmatched_closing_indexes_in_this_batch = ();
9589 %comma_arrow_count = ();
9590 my $comma_arrow_count_contained = 0;
9592 foreach my $i ( 0 .. $max_index_to_go ) {
9593 if ( $type_sequence_to_go[$i] ) {
9594 my $token = $tokens_to_go[$i];
9595 if ( $token =~ /^[\(\[\{\?]$/ ) {
9596 push @unmatched_opening_indexes_in_this_batch, $i;
9598 elsif ( $token =~ /^[\)\]\}\:]$/ ) {
9600 my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
9601 if ( defined($i_mate) && $i_mate >= 0 ) {
9602 if ( $type_sequence_to_go[$i_mate] ==
9603 $type_sequence_to_go[$i] )
9605 $mate_index_to_go[$i] = $i_mate;
9606 $mate_index_to_go[$i_mate] = $i;
9607 my $seqno = $type_sequence_to_go[$i];
9608 if ( $comma_arrow_count{$seqno} ) {
9609 $comma_arrow_count_contained +=
9610 $comma_arrow_count{$seqno};
9614 push @unmatched_opening_indexes_in_this_batch,
9616 push @unmatched_closing_indexes_in_this_batch, $i;
9620 push @unmatched_closing_indexes_in_this_batch, $i;
9624 elsif ( $tokens_to_go[$i] eq '=>' ) {
9625 if (@unmatched_opening_indexes_in_this_batch) {
9626 my $j = $unmatched_opening_indexes_in_this_batch[-1];
9627 my $seqno = $type_sequence_to_go[$j];
9628 $comma_arrow_count{$seqno}++;
9632 return $comma_arrow_count_contained;
9635 sub save_opening_indentation {
9637 # This should be called after each batch of tokens is output. It
9638 # saves indentations of lines of all unmatched opening tokens.
9639 # These will be used by sub get_opening_indentation.
9641 my ( $ri_first, $ri_last, $rindentation_list ) = @_;
9643 # we no longer need indentations of any saved indentations which
9644 # are unmatched closing tokens in this batch, because we will
9645 # never encounter them again. So we can delete them to keep
9646 # the hash size down.
9647 foreach (@unmatched_closing_indexes_in_this_batch) {
9648 my $seqno = $type_sequence_to_go[$_];
9649 delete $saved_opening_indentation{$seqno};
9652 # we need to save indentations of any unmatched opening tokens
9653 # in this batch because we may need them in a subsequent batch.
9654 foreach (@unmatched_opening_indexes_in_this_batch) {
9655 my $seqno = $type_sequence_to_go[$_];
9656 $saved_opening_indentation{$seqno} = [
9657 lookup_opening_indentation(
9658 $_, $ri_first, $ri_last, $rindentation_list
9664 } # end unmatched_indexes
9666 sub get_opening_indentation {
9668 # get the indentation of the line which output the opening token
9669 # corresponding to a given closing token in the current output batch.
9672 # $i_closing - index in this line of a closing token ')' '}' or ']'
9674 # $ri_first - reference to list of the first index $i for each output
9675 # line in this batch
9676 # $ri_last - reference to list of the last index $i for each output line
9678 # $rindentation_list - reference to a list containing the indentation
9679 # used for each line.
9682 # -the indentation of the line which contained the opening token
9683 # which matches the token at index $i_opening
9684 # -and its offset (number of columns) from the start of the line
9686 my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
9688 # first, see if the opening token is in the current batch
9689 my $i_opening = $mate_index_to_go[$i_closing];
9690 my ( $indent, $offset, $is_leading, $exists );
9692 if ( $i_opening >= 0 ) {
9694 # it is..look up the indentation
9695 ( $indent, $offset, $is_leading ) =
9696 lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
9697 $rindentation_list );
9700 # if not, it should have been stored in the hash by a previous batch
9702 my $seqno = $type_sequence_to_go[$i_closing];
9704 if ( $saved_opening_indentation{$seqno} ) {
9705 ( $indent, $offset, $is_leading ) =
9706 @{ $saved_opening_indentation{$seqno} };
9709 # some kind of serious error
9710 # (example is badfile.t)
9719 # if no sequence number it must be an unbalanced container
9727 return ( $indent, $offset, $is_leading, $exists );
9730 sub lookup_opening_indentation {
9732 # get the indentation of the line in the current output batch
9733 # which output a selected opening token
9736 # $i_opening - index of an opening token in the current output batch
9737 # whose line indentation we need
9738 # $ri_first - reference to list of the first index $i for each output
9739 # line in this batch
9740 # $ri_last - reference to list of the last index $i for each output line
9742 # $rindentation_list - reference to a list containing the indentation
9743 # used for each line. (NOTE: the first slot in
9744 # this list is the last returned line number, and this is
9745 # followed by the list of indentations).
9748 # -the indentation of the line which contained token $i_opening
9749 # -and its offset (number of columns) from the start of the line
9751 my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
9753 my $nline = $rindentation_list->[0]; # line number of previous lookup
9755 # reset line location if necessary
9756 $nline = 0 if ( $i_opening < $ri_start->[$nline] );
9758 # find the correct line
9759 unless ( $i_opening > $ri_last->[-1] ) {
9760 while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
9763 # error - token index is out of bounds - shouldn't happen
9766 "non-fatal program bug in lookup_opening_indentation - index out of range\n"
9768 report_definite_bug();
9769 $nline = $#{$ri_last};
9772 $rindentation_list->[0] =
9773 $nline; # save line number to start looking next call
9774 my $ibeg = $ri_start->[$nline];
9775 my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
9776 my $is_leading = ( $ibeg == $i_opening );
9777 return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
9781 my %is_if_elsif_else_unless_while_until_for_foreach;
9785 # These block types may have text between the keyword and opening
9786 # curly. Note: 'else' does not, but must be included to allow trailing
9787 # if/elsif text to be appended.
9788 # patch for SWITCH/CASE: added 'case' and 'when'
9789 my @q = qw(if elsif else unless while until for foreach case when);
9790 @is_if_elsif_else_unless_while_until_for_foreach{@q} =
9794 sub set_adjusted_indentation {
9796 # This routine has the final say regarding the actual indentation of
9797 # a line. It starts with the basic indentation which has been
9798 # defined for the leading token, and then takes into account any
9799 # options that the user has set regarding special indenting and
9803 $self, $ibeg, $iend,
9804 $rfields, $rpatterns, $ri_first,
9805 $ri_last, $rindentation_list, $level_jump
9808 my $rLL = $self->{rLL};
9810 # we need to know the last token of this line
9811 my ( $terminal_type, $i_terminal ) =
9812 terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
9814 my $is_outdented_line = 0;
9816 my $is_semicolon_terminated = $terminal_type eq ';'
9817 && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
9819 # NOTE: A future improvement would be to make it semicolon terminated
9820 # even if it does not have a semicolon but is followed by a closing
9821 # block brace. This would undo ci even for something like the
9822 # following, in which the final paren does not have a semicolon because
9823 # it is a possible weld location:
9827 # $labels, $comment,
9828 # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
9833 # MOJO: Set a flag if this lines begins with ')->'
9834 my $leading_paren_arrow = (
9835 $types_to_go[$ibeg] eq '}'
9836 && $tokens_to_go[$ibeg] eq ')'
9838 ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
9839 || ( $ibeg < $i_terminal - 1
9840 && $types_to_go[ $ibeg + 1 ] eq 'b'
9841 && $types_to_go[ $ibeg + 2 ] eq '->' )
9845 ##########################################################
9846 # Section 1: set a flag and a default indentation
9848 # Most lines are indented according to the initial token.
9849 # But it is common to outdent to the level just after the
9850 # terminal token in certain cases...
9851 # adjust_indentation flag:
9854 # 2 - vertically align with opening token
9856 ##########################################################
9857 my $adjust_indentation = 0;
9858 my $default_adjust_indentation = $adjust_indentation;
9861 $opening_indentation, $opening_offset,
9862 $is_leading, $opening_exists
9865 # if we are at a closing token of some type..
9866 if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
9868 # get the indentation of the line containing the corresponding
9871 $opening_indentation, $opening_offset,
9872 $is_leading, $opening_exists
9874 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
9875 $rindentation_list );
9877 # First set the default behavior:
9880 # default behavior is to outdent closing lines
9881 # of the form: "); }; ]; )->xxx;"
9882 $is_semicolon_terminated
9884 # and 'cuddled parens' of the form: ")->pack("
9885 # Bug fix for RT #123749]: the types here were
9886 # incorrectly '(' and ')'. Corrected to be '{' and '}'
9888 $terminal_type eq '{'
9889 && $types_to_go[$ibeg] eq '}'
9890 && ( $nesting_depth_to_go[$iend] + 1 ==
9891 $nesting_depth_to_go[$ibeg] )
9894 # remove continuation indentation for any line like
9896 # or without ending '{' and unbalanced, such as
9897 # such as '}->{$operator}'
9899 $types_to_go[$ibeg] eq '}'
9901 && ( $types_to_go[$iend] eq '{'
9902 || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
9905 # and when the next line is at a lower indentation level
9906 # PATCH: and only if the style allows undoing continuation
9907 # for all closing token types. We should really wait until
9908 # the indentation of the next line is known and then make
9909 # a decision, but that would require another pass.
9910 || ( $level_jump < 0 && !$some_closing_token_indentation )
9912 # Patch for -wn=2, multiple welded closing tokens
9913 || ( $i_terminal > $ibeg
9914 && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
9918 $adjust_indentation = 1;
9921 # outdent something like '),'
9923 $terminal_type eq ','
9925 # Removed this constraint for -wn
9926 # OLD: allow just one character before the comma
9927 # && $i_terminal == $ibeg + 1
9929 # require LIST environment; otherwise, we may outdent too much -
9930 # this can happen in calls without parentheses (overload.t);
9931 && $container_environment_to_go[$i_terminal] eq 'LIST'
9934 $adjust_indentation = 1;
9937 # undo continuation indentation of a terminal closing token if
9938 # it is the last token before a level decrease. This will allow
9939 # a closing token to line up with its opening counterpart, and
9940 # avoids a indentation jump larger than 1 level.
9941 my $K_beg = $K_to_go[$ibeg];
9942 if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
9943 && $i_terminal == $ibeg
9944 && defined($K_beg) )
9946 my $K_next_nonblank = $self->K_next_code($K_beg);
9947 if ( defined($K_next_nonblank) ) {
9948 my $lev = $rLL->[$K_beg]->[_LEVEL_];
9949 my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
9950 $adjust_indentation = 1 if ( $level_next < $lev );
9953 # Patch for RT #96101, in which closing brace of anonymous subs
9954 # was not outdented. We should look ahead and see if there is
9955 # a level decrease at the next token (i.e., a closing token),
9956 # but right now we do not have that information. For now
9957 # we see if we are in a list, and this works well.
9958 # See test files 'sub*.t' for good test cases.
9959 if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
9960 && $container_environment_to_go[$i_terminal] eq 'LIST'
9961 && !$rOpts->{'indent-closing-brace'} )
9964 $opening_indentation, $opening_offset,
9965 $is_leading, $opening_exists
9967 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
9968 $rindentation_list );
9969 my $indentation = $leading_spaces_to_go[$ibeg];
9970 if ( defined($opening_indentation)
9971 && get_spaces($indentation) >
9972 get_spaces($opening_indentation) )
9974 $adjust_indentation = 1;
9979 # YVES patch 1 of 2:
9980 # Undo ci of line with leading closing eval brace,
9981 # but not beyond the indention of the line with
9982 # the opening brace.
9983 if ( $block_type_to_go[$ibeg] eq 'eval'
9984 && !$rOpts->{'line-up-parentheses'}
9985 && !$rOpts->{'indent-closing-brace'} )
9988 $opening_indentation, $opening_offset,
9989 $is_leading, $opening_exists
9991 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
9992 $rindentation_list );
9993 my $indentation = $leading_spaces_to_go[$ibeg];
9994 if ( defined($opening_indentation)
9995 && get_spaces($indentation) >
9996 get_spaces($opening_indentation) )
9998 $adjust_indentation = 1;
10002 $default_adjust_indentation = $adjust_indentation;
10004 # Now modify default behavior according to user request:
10005 # handle option to indent non-blocks of the form ); }; ];
10006 # But don't do special indentation to something like ')->pack('
10007 if ( !$block_type_to_go[$ibeg] ) {
10008 my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
10010 if ( $i_terminal <= $ibeg + 1
10011 || $is_semicolon_terminated )
10013 $adjust_indentation = 2;
10016 $adjust_indentation = 0;
10019 elsif ( $cti == 2 ) {
10020 if ($is_semicolon_terminated) {
10021 $adjust_indentation = 3;
10024 $adjust_indentation = 0;
10027 elsif ( $cti == 3 ) {
10028 $adjust_indentation = 3;
10032 # handle option to indent blocks
10035 $rOpts->{'indent-closing-brace'}
10037 $i_terminal == $ibeg # isolated terminal '}'
10038 || $is_semicolon_terminated
10042 $adjust_indentation = 3;
10047 # if at ');', '};', '>;', and '];' of a terminal qw quote
10048 elsif ($rpatterns->[0] =~ /^qb*;$/
10049 && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
10051 if ( $closing_token_indentation{$1} == 0 ) {
10052 $adjust_indentation = 1;
10055 $adjust_indentation = 3;
10059 # if line begins with a ':', align it with any
10060 # previous line leading with corresponding ?
10061 elsif ( $types_to_go[$ibeg] eq ':' ) {
10063 $opening_indentation, $opening_offset,
10064 $is_leading, $opening_exists
10066 = get_opening_indentation( $ibeg, $ri_first, $ri_last,
10067 $rindentation_list );
10068 if ($is_leading) { $adjust_indentation = 2; }
10071 ##########################################################
10072 # Section 2: set indentation according to flag set above
10074 # Select the indentation object to define leading
10075 # whitespace. If we are outdenting something like '} } );'
10076 # then we want to use one level below the last token
10077 # ($i_terminal) in order to get it to fully outdent through
10079 ##########################################################
10082 my $level_end = $levels_to_go[$iend];
10084 if ( $adjust_indentation == 0 ) {
10085 $indentation = $leading_spaces_to_go[$ibeg];
10086 $lev = $levels_to_go[$ibeg];
10088 elsif ( $adjust_indentation == 1 ) {
10090 # Change the indentation to be that of a different token on the line
10091 # Previously, the indentation of the terminal token was used:
10093 # $indentation = $reduced_spaces_to_go[$i_terminal];
10094 # $lev = $levels_to_go[$i_terminal];
10096 # Generalization for MOJO:
10097 # Use the lowest level indentation of the tokens on the line.
10098 # For example, here we can use the indentation of the ending ';':
10099 # } until ($selection > 0 and $selection < 10); # ok to use ';'
10100 # But this will not outdent if we use the terminal indentation:
10101 # )->then( sub { # use indentation of the ->, not the {
10102 # Warning: reduced_spaces_to_go[] may be a reference, do not
10103 # do numerical checks with it
10106 $indentation = $reduced_spaces_to_go[$i_ind];
10107 $lev = $levels_to_go[$i_ind];
10108 while ( $i_ind < $i_terminal ) {
10110 if ( $levels_to_go[$i_ind] < $lev ) {
10111 $indentation = $reduced_spaces_to_go[$i_ind];
10112 $lev = $levels_to_go[$i_ind];
10117 # handle indented closing token which aligns with opening token
10118 elsif ( $adjust_indentation == 2 ) {
10120 # handle option to align closing token with opening token
10121 $lev = $levels_to_go[$ibeg];
10123 # calculate spaces needed to align with opening token
10125 get_spaces($opening_indentation) + $opening_offset;
10127 # Indent less than the previous line.
10129 # Problem: For -lp we don't exactly know what it was if there
10130 # were recoverable spaces sent to the aligner. A good solution
10131 # would be to force a flush of the vertical alignment buffer, so
10132 # that we would know. For now, this rule is used for -lp:
10134 # When the last line did not start with a closing token we will
10135 # be optimistic that the aligner will recover everything wanted.
10137 # This rule will prevent us from breaking a hierarchy of closing
10138 # tokens, and in a worst case will leave a closing paren too far
10139 # indented, but this is better than frequently leaving it not
10141 my $last_spaces = get_spaces($last_indentation_written);
10142 if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
10144 get_recoverable_spaces($last_indentation_written);
10147 # reset the indentation to the new space count if it works
10148 # only options are all or none: nothing in-between looks good
10149 $lev = $levels_to_go[$ibeg];
10150 if ( $space_count < $last_spaces ) {
10151 if ($rOpts_line_up_parentheses) {
10152 my $lev = $levels_to_go[$ibeg];
10154 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10157 $indentation = $space_count;
10161 # revert to default if it doesn't work
10163 $space_count = leading_spaces_to_go($ibeg);
10164 if ( $default_adjust_indentation == 0 ) {
10165 $indentation = $leading_spaces_to_go[$ibeg];
10167 elsif ( $default_adjust_indentation == 1 ) {
10168 $indentation = $reduced_spaces_to_go[$i_terminal];
10169 $lev = $levels_to_go[$i_terminal];
10174 # Full indentaion of closing tokens (-icb and -icp or -cti=2)
10177 # handle -icb (indented closing code block braces)
10178 # Updated method for indented block braces: indent one full level if
10179 # there is no continuation indentation. This will occur for major
10180 # structures such as sub, if, else, but not for things like map
10183 # Note: only code blocks without continuation indentation are
10184 # handled here (if, else, unless, ..). In the following snippet,
10185 # the terminal brace of the sort block will have continuation
10186 # indentation as shown so it will not be handled by the coding
10187 # here. We would have to undo the continuation indentation to do
10188 # this, but it probably looks ok as is. This is a possible future
10189 # update for semicolon terminated lines.
10191 # if ($sortby eq 'date' or $sortby eq 'size') {
10193 # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
10198 if ( $block_type_to_go[$ibeg]
10199 && $ci_levels_to_go[$i_terminal] == 0 )
10201 my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
10202 $indentation = $spaces + $rOpts_indent_columns;
10204 # NOTE: for -lp we could create a new indentation object, but
10205 # there is probably no need to do it
10208 # handle -icp and any -icb block braces which fall through above
10209 # test such as the 'sort' block mentioned above.
10212 # There are currently two ways to handle -icp...
10213 # One way is to use the indentation of the previous line:
10214 # $indentation = $last_indentation_written;
10216 # The other way is to use the indentation that the previous line
10217 # would have had if it hadn't been adjusted:
10218 $indentation = $last_unadjusted_indentation;
10220 # Current method: use the minimum of the two. This avoids
10221 # inconsistent indentation.
10222 if ( get_spaces($last_indentation_written) <
10223 get_spaces($indentation) )
10225 $indentation = $last_indentation_written;
10229 # use previous indentation but use own level
10230 # to cause list to be flushed properly
10231 $lev = $levels_to_go[$ibeg];
10234 # remember indentation except for multi-line quotes, which get
10236 unless ( $ibeg == 0 && $starting_in_quote ) {
10237 $last_indentation_written = $indentation;
10238 $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
10239 $last_leading_token = $tokens_to_go[$ibeg];
10242 # be sure lines with leading closing tokens are not outdented more
10243 # than the line which contained the corresponding opening token.
10245 #############################################################
10246 # updated per bug report in alex_bug.pl: we must not
10247 # mess with the indentation of closing logical braces so
10248 # we must treat something like '} else {' as if it were
10249 # an isolated brace my $is_isolated_block_brace = (
10250 # $iend == $ibeg ) && $block_type_to_go[$ibeg];
10251 #############################################################
10252 my $is_isolated_block_brace = $block_type_to_go[$ibeg]
10253 && ( $iend == $ibeg
10254 || $is_if_elsif_else_unless_while_until_for_foreach{
10255 $block_type_to_go[$ibeg]
10258 # only do this for a ':; which is aligned with its leading '?'
10259 my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
10262 defined($opening_indentation)
10263 && !$leading_paren_arrow # MOJO
10264 && !$is_isolated_block_brace
10265 && !$is_unaligned_colon
10268 if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
10269 $indentation = $opening_indentation;
10273 # remember the indentation of each line of this batch
10274 push @{$rindentation_list}, $indentation;
10276 # outdent lines with certain leading tokens...
10279 # must be first word of this batch
10285 # certain leading keywords if requested
10287 $rOpts->{'outdent-keywords'}
10288 && $types_to_go[$ibeg] eq 'k'
10289 && $outdent_keyword{ $tokens_to_go[$ibeg] }
10292 # or labels if requested
10293 || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
10295 # or static block comments if requested
10296 || ( $types_to_go[$ibeg] eq '#'
10297 && $rOpts->{'outdent-static-block-comments'}
10298 && $is_static_block_comment )
10303 my $space_count = leading_spaces_to_go($ibeg);
10304 if ( $space_count > 0 ) {
10305 $space_count -= $rOpts_continuation_indentation;
10306 $is_outdented_line = 1;
10307 if ( $space_count < 0 ) { $space_count = 0 }
10309 # do not promote a spaced static block comment to non-spaced;
10310 # this is not normally necessary but could be for some
10311 # unusual user inputs (such as -ci = -i)
10312 if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
10316 if ($rOpts_line_up_parentheses) {
10318 new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
10321 $indentation = $space_count;
10326 return ( $indentation, $lev, $level_end, $terminal_type,
10327 $is_semicolon_terminated, $is_outdented_line );
10331 sub set_vertical_tightness_flags {
10333 my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
10335 # Define vertical tightness controls for the nth line of a batch.
10336 # We create an array of parameters which tell the vertical aligner
10337 # if we should combine this line with the next line to achieve the
10338 # desired vertical tightness. The array of parameters contains:
10340 # [0] type: 1=opening non-block 2=closing non-block
10341 # 3=opening block brace 4=closing block brace
10343 # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
10344 # if closing: spaces of padding to use
10345 # [2] sequence number of container
10346 # [3] valid flag: do not append if this flag is false. Will be
10347 # true if appropriate -vt flag is set. Otherwise, Will be
10348 # made true only for 2 line container in parens with -lp
10350 # These flags are used by sub set_leading_whitespace in
10351 # the vertical aligner
10353 my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
10355 #--------------------------------------------------------------
10356 # Vertical Tightness Flags Section 1:
10357 # Handle Lines 1 .. n-1 but not the last line
10358 # For non-BLOCK tokens, we will need to examine the next line
10359 # too, so we won't consider the last line.
10360 #--------------------------------------------------------------
10361 if ( $n < $n_last_line ) {
10363 #--------------------------------------------------------------
10364 # Vertical Tightness Flags Section 1a:
10365 # Look for Type 1, last token of this line is a non-block opening token
10366 #--------------------------------------------------------------
10367 my $ibeg_next = $ri_first->[ $n + 1 ];
10368 my $token_end = $tokens_to_go[$iend];
10369 my $iend_next = $ri_last->[ $n + 1 ];
10371 $type_sequence_to_go[$iend]
10372 && !$block_type_to_go[$iend]
10373 && $is_opening_token{$token_end}
10375 $opening_vertical_tightness{$token_end} > 0
10377 # allow 2-line method call to be closed up
10378 || ( $rOpts_line_up_parentheses
10379 && $token_end eq '('
10381 && $types_to_go[ $iend - 1 ] ne 'b' )
10386 # avoid multiple jumps in nesting depth in one line if
10388 my $ovt = $opening_vertical_tightness{$token_end};
10389 my $iend_next = $ri_last->[ $n + 1 ];
10392 && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
10393 $nesting_depth_to_go[$ibeg_next] )
10397 # If -vt flag has not been set, mark this as invalid
10398 # and aligner will validate it if it sees the closing paren
10400 my $valid_flag = $ovt;
10401 @{$rvertical_tightness_flags} =
10402 ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
10406 #--------------------------------------------------------------
10407 # Vertical Tightness Flags Section 1b:
10408 # Look for Type 2, first token of next line is a non-block closing
10409 # token .. and be sure this line does not have a side comment
10410 #--------------------------------------------------------------
10411 my $token_next = $tokens_to_go[$ibeg_next];
10412 if ( $type_sequence_to_go[$ibeg_next]
10413 && !$block_type_to_go[$ibeg_next]
10414 && $is_closing_token{$token_next}
10415 && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
10417 my $ovt = $opening_vertical_tightness{$token_next};
10418 my $cvt = $closing_vertical_tightness{$token_next};
10421 # never append a trailing line like )->pack(
10422 # because it will throw off later alignment
10424 $nesting_depth_to_go[$ibeg_next] ==
10425 $nesting_depth_to_go[ $iend_next + 1 ] + 1
10430 $container_environment_to_go[$ibeg_next] ne 'LIST'
10434 # allow closing up 2-line method calls
10435 || ( $rOpts_line_up_parentheses
10436 && $token_next eq ')' )
10443 # decide which trailing closing tokens to append..
10445 if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
10447 my $str = join( '',
10448 @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
10450 # append closing token if followed by comment or ';'
10451 if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
10455 my $valid_flag = $cvt;
10456 @{$rvertical_tightness_flags} = (
10458 $tightness{$token_next} == 2 ? 0 : 1,
10459 $type_sequence_to_go[$ibeg_next], $valid_flag,
10465 #--------------------------------------------------------------
10466 # Vertical Tightness Flags Section 1c:
10467 # Implement the Opening Token Right flag (Type 2)..
10468 # If requested, move an isolated trailing opening token to the end of
10469 # the previous line which ended in a comma. We could do this
10470 # in sub recombine_breakpoints but that would cause problems
10471 # with -lp formatting. The problem is that indentation will
10472 # quickly move far to the right in nested expressions. By
10473 # doing it after indentation has been set, we avoid changes
10474 # to the indentation. Actual movement of the token takes place
10475 # in sub valign_output_step_B.
10476 #--------------------------------------------------------------
10478 $opening_token_right{ $tokens_to_go[$ibeg_next] }
10480 # previous line is not opening
10481 # (use -sot to combine with it)
10482 && !$is_opening_token{$token_end}
10484 # previous line ended in one of these
10485 # (add other cases if necessary; '=>' and '.' are not necessary
10486 && !$block_type_to_go[$ibeg_next]
10488 # this is a line with just an opening token
10489 && ( $iend_next == $ibeg_next
10490 || $iend_next == $ibeg_next + 2
10491 && $types_to_go[$iend_next] eq '#' )
10493 # looks bad if we align vertically with the wrong container
10494 && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
10497 my $valid_flag = 1;
10498 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
10499 @{$rvertical_tightness_flags} =
10500 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
10503 #--------------------------------------------------------------
10504 # Vertical Tightness Flags Section 1d:
10505 # Stacking of opening and closing tokens (Type 2)
10506 #--------------------------------------------------------------
10508 my $token_beg_next = $tokens_to_go[$ibeg_next];
10510 # patch to make something like 'qw(' behave like an opening paren
10512 if ( $types_to_go[$ibeg_next] eq 'q' ) {
10513 if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
10514 $token_beg_next = $1;
10518 if ( $is_closing_token{$token_end}
10519 && $is_closing_token{$token_beg_next} )
10521 $stackable = $stack_closing_token{$token_beg_next}
10522 unless ( $block_type_to_go[$ibeg_next] )
10523 ; # shouldn't happen; just checking
10525 elsif ($is_opening_token{$token_end}
10526 && $is_opening_token{$token_beg_next} )
10528 $stackable = $stack_opening_token{$token_beg_next}
10529 unless ( $block_type_to_go[$ibeg_next] )
10530 ; # shouldn't happen; just checking
10535 my $is_semicolon_terminated;
10536 if ( $n + 1 == $n_last_line ) {
10537 my ( $terminal_type, $i_terminal ) = terminal_type(
10538 \@types_to_go, \@block_type_to_go,
10539 $ibeg_next, $iend_next
10541 $is_semicolon_terminated = $terminal_type eq ';'
10542 && $nesting_depth_to_go[$iend_next] <
10543 $nesting_depth_to_go[$ibeg_next];
10546 # this must be a line with just an opening token
10547 # or end in a semicolon
10549 $is_semicolon_terminated
10550 || ( $iend_next == $ibeg_next
10551 || $iend_next == $ibeg_next + 2
10552 && $types_to_go[$iend_next] eq '#' )
10555 my $valid_flag = 1;
10556 my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
10557 @{$rvertical_tightness_flags} =
10558 ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
10564 #--------------------------------------------------------------
10565 # Vertical Tightness Flags Section 2:
10566 # Handle type 3, opening block braces on last line of the batch
10567 # Check for a last line with isolated opening BLOCK curly
10568 #--------------------------------------------------------------
10569 elsif ($rOpts_block_brace_vertical_tightness
10571 && $types_to_go[$iend] eq '{'
10572 && $block_type_to_go[$iend] =~
10573 /$block_brace_vertical_tightness_pattern/o )
10575 @{$rvertical_tightness_flags} =
10576 ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
10579 #--------------------------------------------------------------
10580 # Vertical Tightness Flags Section 3:
10581 # Handle type 4, a closing block brace on the last line of the batch Check
10582 # for a last line with isolated closing BLOCK curly
10583 #--------------------------------------------------------------
10584 elsif ($rOpts_stack_closing_block_brace
10586 && $block_type_to_go[$iend]
10587 && $types_to_go[$iend] eq '}' )
10589 my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
10590 @{$rvertical_tightness_flags} =
10591 ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
10594 # pack in the sequence numbers of the ends of this line
10595 $rvertical_tightness_flags->[4] = get_seqno($ibeg);
10596 $rvertical_tightness_flags->[5] = get_seqno($iend);
10597 return $rvertical_tightness_flags;
10602 # get opening and closing sequence numbers of a token for the vertical
10603 # aligner. Assign qw quotes a value to allow qw opening and closing tokens
10604 # to be treated somewhat like opening and closing tokens for stacking
10605 # tokens by the vertical aligner.
10607 my $seqno = $type_sequence_to_go[$ii];
10608 if ( $types_to_go[$ii] eq 'q' ) {
10611 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
10614 if ( !$ending_in_quote ) {
10615 $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
10623 my %is_vertical_alignment_type;
10624 my %is_vertical_alignment_keyword;
10625 my %is_terminal_alignment_type;
10631 # Replaced =~ and // in the list. // had been removed in RT 119588
10633 = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
10634 { ? : => && || ~~ !~~ =~ !~ //
10636 @is_vertical_alignment_type{@q} = (1) x scalar(@q);
10638 # only align these at end of line
10640 @is_terminal_alignment_type{@q} = (1) x scalar(@q);
10642 # eq and ne were removed from this list to improve alignment chances
10643 @q = qw(if unless and or err for foreach while until);
10644 @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
10647 sub set_vertical_alignment_markers {
10649 # This routine takes the first step toward vertical alignment of the
10650 # lines of output text. It looks for certain tokens which can serve as
10651 # vertical alignment markers (such as an '=').
10653 # Method: We look at each token $i in this output batch and set
10654 # $matching_token_to_go[$i] equal to those tokens at which we would
10655 # accept vertical alignment.
10657 my ( $ri_first, $ri_last ) = @_;
10659 # nothing to do if we aren't allowed to change whitespace
10660 if ( !$rOpts_add_whitespace ) {
10661 for my $i ( 0 .. $max_index_to_go ) {
10662 $matching_token_to_go[$i] = '';
10667 # remember the index of last nonblank token before any sidecomment
10668 my $i_terminal = $max_index_to_go;
10669 if ( $types_to_go[$i_terminal] eq '#' ) {
10670 if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
10671 if ( $i_terminal > 0 ) { --$i_terminal }
10675 # look at each line of this batch..
10676 my $last_vertical_alignment_before_index;
10677 my $vert_last_nonblank_type;
10678 my $vert_last_nonblank_token;
10679 my $vert_last_nonblank_block_type;
10680 my $max_line = @{$ri_first} - 1;
10682 foreach my $line ( 0 .. $max_line ) {
10683 my $ibeg = $ri_first->[$line];
10684 my $iend = $ri_last->[$line];
10685 $last_vertical_alignment_before_index = -1;
10686 $vert_last_nonblank_type = '';
10687 $vert_last_nonblank_token = '';
10688 $vert_last_nonblank_block_type = '';
10690 # look at each token in this output line..
10692 foreach my $i ( $ibeg .. $iend ) {
10693 my $alignment_type = '';
10694 my $type = $types_to_go[$i];
10695 my $block_type = $block_type_to_go[$i];
10696 my $token = $tokens_to_go[$i];
10698 # check for flag indicating that we should not align
10700 if ( $matching_token_to_go[$i] ) {
10701 $matching_token_to_go[$i] = '';
10705 #--------------------------------------------------------
10706 # First see if we want to align BEFORE this token
10707 #--------------------------------------------------------
10709 # The first possible token that we can align before
10710 # is index 2 because: 1) it doesn't normally make sense to
10711 # align before the first token and 2) the second
10712 # token must be a blank if we are to align before
10714 if ( $i < $ibeg + 2 ) { }
10716 # must follow a blank token
10717 elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
10719 # align a side comment --
10720 elsif ( $type eq '#' ) {
10724 # it is a static side comment
10726 $rOpts->{'static-side-comments'}
10727 && $token =~ /$static_side_comment_pattern/o
10730 # or a closing side comment
10731 || ( $vert_last_nonblank_block_type
10733 /$closing_side_comment_prefix_pattern/o )
10736 $alignment_type = $type;
10737 } ## Example of a static side comment
10740 # otherwise, do not align two in a row to create a
10742 elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
10744 # align before one of these keywords
10745 # (within a line, since $i>1)
10746 elsif ( $type eq 'k' ) {
10748 # /^(if|unless|and|or|eq|ne)$/
10749 if ( $is_vertical_alignment_keyword{$token} ) {
10750 $alignment_type = $token;
10754 # align before one of these types..
10755 # Note: add '.' after new vertical aligner is operational
10756 elsif ( $is_vertical_alignment_type{$type} ) {
10757 $alignment_type = $token;
10759 # Do not align a terminal token. Although it might
10760 # occasionally look ok to do this, this has been found to be
10761 # a good general rule. The main problems are:
10762 # (1) that the terminal token (such as an = or :) might get
10763 # moved far to the right where it is hard to see because
10764 # nothing follows it, and
10765 # (2) doing so may prevent other good alignments.
10766 # Current exceptions are && and ||
10767 if ( $i == $iend || $i >= $i_terminal ) {
10768 $alignment_type = ""
10769 unless ( $is_terminal_alignment_type{$type} );
10772 # Do not align leading ': (' or '. ('. This would prevent
10773 # alignment in something like the following:
10775 # ( $input_line_number < 10 ) ? " "
10776 # : ( $input_line_number < 100 ) ? " "
10780 # ( $case_matters ? $accessor : " lc($accessor) " )
10781 # . ( $yesno ? " eq " : " ne " )
10782 if ( $i == $ibeg + 2
10783 && $types_to_go[$ibeg] =~ /^[\.\:]$/
10784 && $types_to_go[ $i - 1 ] eq 'b' )
10786 $alignment_type = "";
10789 # For a paren after keyword, only align something like this:
10791 # elsif ( $b ) { &b }
10792 if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
10793 $alignment_type = ""
10794 unless $vert_last_nonblank_token =~
10795 /^(if|unless|elsif)$/;
10798 # be sure the alignment tokens are unique
10799 # This didn't work well: reason not determined
10800 # if ($token ne $type) {$alignment_type .= $type}
10803 # NOTE: This is deactivated because it causes the previous
10804 # if/elsif alignment to fail
10805 #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
10806 #{ $alignment_type = $type; }
10808 if ($alignment_type) {
10809 $last_vertical_alignment_before_index = $i;
10812 #--------------------------------------------------------
10813 # Next see if we want to align AFTER the previous nonblank
10814 #--------------------------------------------------------
10816 # We want to line up ',' and interior ';' tokens, with the added
10817 # space AFTER these tokens. (Note: interior ';' is included
10818 # because it may occur in short blocks).
10821 # we haven't already set it
10824 # and its not the first token of the line
10827 # and it follows a blank
10828 && $types_to_go[ $i - 1 ] eq 'b'
10830 # and previous token IS one of these:
10831 && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
10833 # and it's NOT one of these
10834 && ( $type !~ /^[b\#\)\]\}]$/ )
10836 # then go ahead and align
10840 $alignment_type = $vert_last_nonblank_type;
10843 #--------------------------------------------------------
10844 # patch for =~ operator. We only align this if it
10845 # is the first operator in a line, and the line is a simple
10846 # statement. Aligning them within a statement causes
10847 # interferes with other good alignments.
10848 #--------------------------------------------------------
10849 if ( $alignment_type eq '=~' ) {
10850 my $terminal_type = $types_to_go[$i_terminal];
10851 if ( $count > 0 || $max_line > 0 || $terminal_type ne ';' )
10853 $alignment_type = "";
10857 #--------------------------------------------------------
10858 # then store the value
10859 #--------------------------------------------------------
10860 $matching_token_to_go[$i] = $alignment_type;
10861 $count++ if ($alignment_type);
10862 if ( $type ne 'b' ) {
10863 $vert_last_nonblank_type = $type;
10864 $vert_last_nonblank_token = $token;
10865 $vert_last_nonblank_block_type = $block_type;
10873 sub terminal_type {
10875 # returns type of last token on this line (terminal token), as follows:
10876 # returns # for a full-line comment
10877 # returns ' ' for a blank line
10878 # otherwise returns final token type
10880 my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
10882 # check for full-line comment..
10883 if ( $rtype->[$ibeg] eq '#' ) {
10884 return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
10888 # start at end and walk backwards..
10889 for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
10891 # skip past any side comment and blanks
10892 next if ( $rtype->[$i] eq 'b' );
10893 next if ( $rtype->[$i] eq '#' );
10895 # found it..make sure it is a BLOCK termination,
10896 # but hide a terminal } after sort/grep/map because it is not
10897 # necessarily the end of the line. (terminal.t)
10898 my $terminal_type = $rtype->[$i];
10900 $terminal_type eq '}'
10901 && ( !$rblock_type->[$i]
10902 || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
10905 $terminal_type = 'b';
10907 return wantarray ? ( $terminal_type, $i ) : $terminal_type;
10911 return wantarray ? ( ' ', $ibeg ) : ' ';
10915 { # set_bond_strengths
10917 my %is_good_keyword_breakpoint;
10918 my %is_lt_gt_le_ge;
10920 my %binary_bond_strength;
10927 sub bias_table_key {
10928 my ( $type, $token ) = @_;
10929 my $bias_table_key = $type;
10930 if ( $type eq 'k' ) {
10931 $bias_table_key = $token;
10932 if ( $token eq 'err' ) { $bias_table_key = 'or' }
10934 return $bias_table_key;
10937 sub initialize_bond_strength_hashes {
10940 @q = qw(if unless while until for foreach);
10941 @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
10943 @q = qw(lt gt le ge);
10944 @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
10946 # The decision about where to break a line depends upon a "bond
10947 # strength" between tokens. The LOWER the bond strength, the MORE
10948 # likely a break. A bond strength may be any value but to simplify
10949 # things there are several pre-defined strength levels:
10951 # NO_BREAK => 10000;
10952 # VERY_STRONG => 100;
10956 # VERY_WEAK => 0.55;
10958 # The strength values are based on trial-and-error, and need to be
10959 # tweaked occasionally to get desired results. Some comments:
10961 # 1. Only relative strengths are important. small differences
10962 # in strengths can make big formatting differences.
10963 # 2. Each indentation level adds one unit of bond strength.
10964 # 3. A value of NO_BREAK makes an unbreakable bond
10965 # 4. A value of VERY_WEAK is the strength of a ','
10966 # 5. Values below NOMINAL are considered ok break points.
10967 # 6. Values above NOMINAL are considered poor break points.
10969 # The bond strengths should roughly follow precedence order where
10970 # possible. If you make changes, please check the results very
10971 # carefully on a variety of scripts. Testing with the -extrude
10972 # options is particularly helpful in exercising all of the rules.
10974 # Wherever possible, bond strengths are defined in the following
10975 # tables. There are two main stages to setting bond strengths and
10976 # two types of tables:
10978 # The first stage involves looking at each token individually and
10979 # defining left and right bond strengths, according to if we want
10980 # to break to the left or right side, and how good a break point it
10981 # is. For example tokens like =, ||, && make good break points and
10982 # will have low strengths, but one might want to break on either
10983 # side to put them at the end of one line or beginning of the next.
10985 # The second stage involves looking at certain pairs of tokens and
10986 # defining a bond strength for that particular pair. This second
10987 # stage has priority.
10989 #---------------------------------------------------------------
10990 # Bond Strength BEGIN Section 1.
10991 # Set left and right bond strengths of individual tokens.
10992 #---------------------------------------------------------------
10994 # NOTE: NO_BREAK's set in this section first are HINTS which will
10995 # probably not be honored. Essential NO_BREAKS's should be set in
10996 # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
10997 # of this subroutine.
10999 # Note that we are setting defaults in this section. The user
11000 # cannot change bond strengths but can cause the left and right
11001 # bond strengths of any token type to be swapped through the use of
11002 # the -wba and -wbb flags. In this way the user can determine if a
11003 # breakpoint token should appear at the end of one line or the
11004 # beginning of the next line.
11006 # The hash keys in this section are token types, plus the text of
11007 # certain keywords like 'or', 'and'.
11009 # no break around possible filehandle
11010 $left_bond_strength{'Z'} = NO_BREAK;
11011 $right_bond_strength{'Z'} = NO_BREAK;
11013 # never put a bare word on a new line:
11014 # example print (STDERR, "bla"); will fail with break after (
11015 $left_bond_strength{'w'} = NO_BREAK;
11017 # blanks always have infinite strength to force breaks after
11019 $right_bond_strength{'b'} = NO_BREAK;
11021 # try not to break on exponentation
11022 @q = qw# ** .. ... <=> #;
11023 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11024 @right_bond_strength{@q} = (STRONG) x scalar(@q);
11026 # The comma-arrow has very low precedence but not a good break point
11027 $left_bond_strength{'=>'} = NO_BREAK;
11028 $right_bond_strength{'=>'} = NOMINAL;
11030 # ok to break after label
11031 $left_bond_strength{'J'} = NO_BREAK;
11032 $right_bond_strength{'J'} = NOMINAL;
11033 $left_bond_strength{'j'} = STRONG;
11034 $right_bond_strength{'j'} = STRONG;
11035 $left_bond_strength{'A'} = STRONG;
11036 $right_bond_strength{'A'} = STRONG;
11038 $left_bond_strength{'->'} = STRONG;
11039 $right_bond_strength{'->'} = VERY_STRONG;
11041 $left_bond_strength{'CORE::'} = NOMINAL;
11042 $right_bond_strength{'CORE::'} = NO_BREAK;
11044 # breaking AFTER modulus operator is ok:
11046 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11047 @right_bond_strength{@q} =
11048 ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
11050 # Break AFTER math operators * and /
11052 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11053 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
11055 # Break AFTER weakest math operators + and -
11056 # Make them weaker than * but a bit stronger than '.'
11058 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11059 @right_bond_strength{@q} =
11060 ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
11062 # breaking BEFORE these is just ok:
11064 @right_bond_strength{@q} = (STRONG) x scalar(@q);
11065 @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
11067 # breaking before the string concatenation operator seems best
11068 # because it can be hard to see at the end of a line
11069 $right_bond_strength{'.'} = STRONG;
11070 $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
11072 @q = qw< } ] ) R >;
11073 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11074 @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
11076 # make these a little weaker than nominal so that they get
11077 # favored for end-of-line characters
11078 @q = qw< != == =~ !~ ~~ !~~ >;
11079 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11080 @right_bond_strength{@q} =
11081 ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
11083 # break AFTER these
11084 @q = qw# < > | & >= <= #;
11085 @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
11086 @right_bond_strength{@q} =
11087 ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
11089 # breaking either before or after a quote is ok
11090 # but bias for breaking before a quote
11091 $left_bond_strength{'Q'} = NOMINAL;
11092 $right_bond_strength{'Q'} = NOMINAL + 0.02;
11093 $left_bond_strength{'q'} = NOMINAL;
11094 $right_bond_strength{'q'} = NOMINAL;
11096 # starting a line with a keyword is usually ok
11097 $left_bond_strength{'k'} = NOMINAL;
11099 # we usually want to bond a keyword strongly to what immediately
11100 # follows, rather than leaving it stranded at the end of a line
11101 $right_bond_strength{'k'} = STRONG;
11103 $left_bond_strength{'G'} = NOMINAL;
11104 $right_bond_strength{'G'} = STRONG;
11106 # assignment operators
11108 = **= += *= &= <<= &&=
11109 -= /= |= >>= ||= //=
11114 # Default is to break AFTER various assignment operators
11115 @left_bond_strength{@q} = (STRONG) x scalar(@q);
11116 @right_bond_strength{@q} =
11117 ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
11119 # Default is to break BEFORE '&&' and '||' and '//'
11120 # set strength of '||' to same as '=' so that chains like
11121 # $a = $b || $c || $d will break before the first '||'
11122 $right_bond_strength{'||'} = NOMINAL;
11123 $left_bond_strength{'||'} = $right_bond_strength{'='};
11125 # same thing for '//'
11126 $right_bond_strength{'//'} = NOMINAL;
11127 $left_bond_strength{'//'} = $right_bond_strength{'='};
11129 # set strength of && a little higher than ||
11130 $right_bond_strength{'&&'} = NOMINAL;
11131 $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
11133 $left_bond_strength{';'} = VERY_STRONG;
11134 $right_bond_strength{';'} = VERY_WEAK;
11135 $left_bond_strength{'f'} = VERY_STRONG;
11137 # make right strength of for ';' a little less than '='
11138 # to make for contents break after the ';' to avoid this:
11139 # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
11140 # $number_of_fields )
11141 # and make it weaker than ',' and 'and' too
11142 $right_bond_strength{'f'} = VERY_WEAK - 0.03;
11144 # The strengths of ?/: should be somewhere between
11145 # an '=' and a quote (NOMINAL),
11146 # make strength of ':' slightly less than '?' to help
11147 # break long chains of ? : after the colons
11148 $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
11149 $right_bond_strength{':'} = NO_BREAK;
11150 $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
11151 $right_bond_strength{'?'} = NO_BREAK;
11153 $left_bond_strength{','} = VERY_STRONG;
11154 $right_bond_strength{','} = VERY_WEAK;
11156 # remaining digraphs and trigraphs not defined above
11157 @q = qw( :: <> ++ --);
11158 @left_bond_strength{@q} = (WEAK) x scalar(@q);
11159 @right_bond_strength{@q} = (STRONG) x scalar(@q);
11161 # Set bond strengths of certain keywords
11162 # make 'or', 'err', 'and' slightly weaker than a ','
11163 $left_bond_strength{'and'} = VERY_WEAK - 0.01;
11164 $left_bond_strength{'or'} = VERY_WEAK - 0.02;
11165 $left_bond_strength{'err'} = VERY_WEAK - 0.02;
11166 $left_bond_strength{'xor'} = NOMINAL;
11167 $right_bond_strength{'and'} = NOMINAL;
11168 $right_bond_strength{'or'} = NOMINAL;
11169 $right_bond_strength{'err'} = NOMINAL;
11170 $right_bond_strength{'xor'} = STRONG;
11172 #---------------------------------------------------------------
11173 # Bond Strength BEGIN Section 2.
11174 # Set binary rules for bond strengths between certain token types.
11175 #---------------------------------------------------------------
11177 # We have a little problem making tables which apply to the
11178 # container tokens. Here is a list of container tokens and
11181 # type tokens // meaning
11182 # { {, [, ( // indent
11183 # } }, ], ) // outdent
11184 # [ [ // left non-structural [ (enclosing an array index)
11185 # ] ] // right non-structural square bracket
11186 # ( ( // left non-structural paren
11187 # ) ) // right non-structural paren
11188 # L { // left non-structural curly brace (enclosing a key)
11189 # R } // right non-structural curly brace
11191 # Some rules apply to token types and some to just the token
11192 # itself. We solve the problem by combining type and token into a
11193 # new hash key for the container types.
11195 # If a rule applies to a token 'type' then we need to make rules
11196 # for each of these 'type.token' combinations:
11207 # If a rule applies to a token then we need to make rules for
11208 # these 'type.token' combinations:
11217 # allow long lines before final { in an if statement, as in:
11222 # Otherwise, the line before the { tends to be too short.
11224 $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
11225 $binary_bond_strength{'(('}{'{{'} = NOMINAL;
11227 # break on something like '} (', but keep this stronger than a ','
11228 # example is in 'howe.pl'
11229 $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
11230 $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
11232 # keep matrix and hash indices together
11233 # but make them a little below STRONG to allow breaking open
11234 # something like {'some-word'}{'some-very-long-word'} at the }{
11236 $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
11237 $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
11238 $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
11239 $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
11241 # increase strength to the point where a break in the following
11242 # will be after the opening paren rather than at the arrow:
11244 $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
11246 $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11247 $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11248 $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11249 $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11250 $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11251 $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
11253 $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
11254 $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
11255 $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
11256 $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
11258 #---------------------------------------------------------------
11259 # Binary NO_BREAK rules
11260 #---------------------------------------------------------------
11262 # use strict requires that bare word and => not be separated
11263 $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
11264 $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
11266 # Never break between a bareword and a following paren because
11267 # perl may give an error. For example, if a break is placed
11268 # between 'to_filehandle' and its '(' the following line will
11269 # give a syntax error [Carp.pm]: my( $no) =fileno(
11270 # to_filehandle( $in)) ;
11271 $binary_bond_strength{'C'}{'(('} = NO_BREAK;
11272 $binary_bond_strength{'C'}{'{('} = NO_BREAK;
11273 $binary_bond_strength{'U'}{'(('} = NO_BREAK;
11274 $binary_bond_strength{'U'}{'{('} = NO_BREAK;
11276 # use strict requires that bare word within braces not start new
11278 $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
11280 $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
11282 # use strict requires that bare word and => not be separated
11283 $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
11285 # use strict does not allow separating type info from trailing { }
11286 # testfile is readmail.pl
11287 $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
11288 $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
11290 # As a defensive measure, do not break between a '(' and a
11291 # filehandle. In some cases, this can cause an error. For
11292 # example, the following program works:
11299 # But this program fails:
11307 # This is normally only a problem with the 'extrude' option
11308 $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
11309 $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
11311 # never break between sub name and opening paren
11312 $binary_bond_strength{'w'}{'(('} = NO_BREAK;
11313 $binary_bond_strength{'w'}{'{('} = NO_BREAK;
11315 # keep '}' together with ';'
11316 $binary_bond_strength{'}}'}{';'} = NO_BREAK;
11318 # Breaking before a ++ can cause perl to guess wrong. For
11319 # example the following line will cause a syntax error
11320 # with -extrude if we break between '$i' and '++' [fixstyle2]
11321 # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
11322 $nobreak_lhs{'++'} = NO_BREAK;
11324 # Do not break before a possible file handle
11325 $nobreak_lhs{'Z'} = NO_BREAK;
11327 # use strict hates bare words on any new line. For
11328 # example, a break before the underscore here provokes the
11329 # wrath of use strict:
11330 # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
11331 $nobreak_rhs{'F'} = NO_BREAK;
11332 $nobreak_rhs{'CORE::'} = NO_BREAK;
11334 #---------------------------------------------------------------
11335 # Bond Strength BEGIN Section 3.
11336 # Define tables and values for applying a small bias to the above
11338 #---------------------------------------------------------------
11339 # Adding a small 'bias' to strengths is a simple way to make a line
11340 # break at the first of a sequence of identical terms. For
11341 # example, to force long string of conditional operators to break
11342 # with each line ending in a ':', we can add a small number to the
11343 # bond strength of each ':' (colon.t)
11344 @bias_tokens = qw( : && || f and or . ); # tokens which get bias
11345 $delta_bias = 0.0001; # a very small strength level
11348 } ## end sub initialize_bond_strength_hashes
11350 sub set_bond_strengths {
11352 # patch-its always ok to break at end of line
11353 $nobreak_to_go[$max_index_to_go] = 0;
11355 # we start a new set of bias values for each line
11357 @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
11358 my $code_bias = -.01; # bias for closing block braces
11363 my $last_nonblank_type = $type;
11364 my $last_nonblank_token = $token;
11365 my $list_str = $left_bond_strength{'?'};
11367 my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
11368 $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
11371 # main loop to compute bond strengths between each pair of tokens
11372 foreach my $i ( 0 .. $max_index_to_go ) {
11373 $last_type = $type;
11374 if ( $type ne 'b' ) {
11375 $last_nonblank_type = $type;
11376 $last_nonblank_token = $token;
11378 $type = $types_to_go[$i];
11380 # strength on both sides of a blank is the same
11381 if ( $type eq 'b' && $last_type ne 'b' ) {
11382 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
11386 $token = $tokens_to_go[$i];
11387 $block_type = $block_type_to_go[$i];
11389 $next_type = $types_to_go[$i_next];
11390 $next_token = $tokens_to_go[$i_next];
11391 $total_nesting_depth = $nesting_depth_to_go[$i_next];
11392 $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11393 $next_nonblank_type = $types_to_go[$i_next_nonblank];
11394 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
11396 # We are computing the strength of the bond between the current
11397 # token and the NEXT token.
11399 #---------------------------------------------------------------
11400 # Bond Strength Section 1:
11401 # First Approximation.
11402 # Use minimum of individual left and right tabulated bond
11404 #---------------------------------------------------------------
11405 my $bsr = $right_bond_strength{$type};
11406 my $bsl = $left_bond_strength{$next_nonblank_type};
11408 # define right bond strengths of certain keywords
11409 if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
11410 $bsr = $right_bond_strength{$token};
11412 elsif ( $token eq 'ne' or $token eq 'eq' ) {
11416 # set terminal bond strength to the nominal value
11417 # this will cause good preceding breaks to be retained
11418 if ( $i_next_nonblank > $max_index_to_go ) {
11422 # define right bond strengths of certain keywords
11423 if ( $next_nonblank_type eq 'k'
11424 && defined( $left_bond_strength{$next_nonblank_token} ) )
11426 $bsl = $left_bond_strength{$next_nonblank_token};
11428 elsif ($next_nonblank_token eq 'ne'
11429 or $next_nonblank_token eq 'eq' )
11433 elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
11434 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
11437 # Use the minimum of the left and right strengths. Note: it might
11438 # seem that we would want to keep a NO_BREAK if either token has
11439 # this value. This didn't work, for example because in an arrow
11440 # list, it prevents the comma from separating from the following
11441 # bare word (which is probably quoted by its arrow). So necessary
11442 # NO_BREAK's have to be handled as special cases in the final
11444 if ( !defined($bsr) ) { $bsr = VERY_STRONG }
11445 if ( !defined($bsl) ) { $bsl = VERY_STRONG }
11446 my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
11447 my $bond_str_1 = $bond_str;
11449 #---------------------------------------------------------------
11450 # Bond Strength Section 2:
11451 # Apply hardwired rules..
11452 #---------------------------------------------------------------
11454 # Patch to put terminal or clauses on a new line: Weaken the bond
11455 # at an || followed by die or similar keyword to make the terminal
11456 # or clause fall on a new line, like this:
11458 # my $class = shift
11459 # || die "Cannot add broadcast: No class identifier found";
11461 # Otherwise the break will be at the previous '=' since the || and
11462 # = have the same starting strength and the or is biased, like
11466 # shift || die "Cannot add broadcast: No class identifier found";
11468 # In any case if the user places a break at either the = or the ||
11469 # it should remain there.
11470 if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
11471 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
11472 if ( $want_break_before{$token} && $i > 0 ) {
11473 $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
11476 $bond_str -= $delta_bias;
11481 # good to break after end of code blocks
11482 if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
11484 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
11485 $code_bias += $delta_bias;
11488 if ( $type eq 'k' ) {
11490 # allow certain control keywords to stand out
11491 if ( $next_nonblank_type eq 'k'
11492 && $is_last_next_redo_return{$token} )
11494 $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
11497 # Don't break after keyword my. This is a quick fix for a
11498 # rare problem with perl. An example is this line from file
11501 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
11502 # $this->{'question'} ) )
11504 if ( $token eq 'my' ) {
11505 $bond_str = NO_BREAK;
11510 # good to break before 'if', 'unless', etc
11511 if ( $is_if_brace_follower{$next_nonblank_token} ) {
11512 $bond_str = VERY_WEAK;
11515 if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
11517 # FIXME: needs more testing
11518 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
11519 $bond_str = $list_str if ( $bond_str > $list_str );
11522 # keywords like 'unless', 'if', etc, within statements
11524 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
11525 $bond_str = VERY_WEAK / 1.05;
11529 # try not to break before a comma-arrow
11530 elsif ( $next_nonblank_type eq '=>' ) {
11531 if ( $bond_str < STRONG ) { $bond_str = STRONG }
11534 #---------------------------------------------------------------
11535 # Additional hardwired NOBREAK rules
11536 #---------------------------------------------------------------
11538 # map1.t -- correct for a quirk in perl
11540 && $next_nonblank_type eq 'i'
11541 && $last_nonblank_type eq 'k'
11542 && $is_sort_map_grep{$last_nonblank_token} )
11544 # /^(sort|map|grep)$/ )
11546 $bond_str = NO_BREAK;
11549 # extrude.t: do not break before paren at:
11551 if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
11552 $bond_str = NO_BREAK;
11555 # in older version of perl, use strict can cause problems with
11556 # breaks before bare words following opening parens. For example,
11557 # this will fail under older versions if a break is made between
11558 # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
11559 # command"); close MAIL;
11560 if ( $type eq '{' ) {
11562 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
11564 # but it's fine to break if the word is followed by a '=>'
11565 # or if it is obviously a sub call
11566 my $i_next_next_nonblank = $i_next_nonblank + 1;
11567 my $next_next_type = $types_to_go[$i_next_next_nonblank];
11568 if ( $next_next_type eq 'b'
11569 && $i_next_nonblank < $max_index_to_go )
11571 $i_next_next_nonblank++;
11572 $next_next_type = $types_to_go[$i_next_next_nonblank];
11575 # We'll check for an old breakpoint and keep a leading
11576 # bareword if it was that way in the input file.
11577 # Presumably it was ok that way. For example, the
11578 # following would remain unchanged:
11581 # January, February, March, April,
11582 # May, June, July, August,
11583 # September, October, November, December,
11586 # This should be sufficient:
11588 !$old_breakpoint_to_go[$i]
11589 && ( $next_next_type eq ','
11590 || $next_next_type eq '}' )
11593 $bond_str = NO_BREAK;
11598 # Do not break between a possible filehandle and a ? or / and do
11599 # not introduce a break after it if there is no blank
11601 elsif ( $type eq 'Z' ) {
11606 # if there is no blank and we do not want one. Examples:
11607 # print $x++ # do not break after $x
11608 # print HTML"HELLO" # break ok after HTML
11611 && defined( $want_left_space{$next_type} )
11612 && $want_left_space{$next_type} == WS_NO
11615 # or we might be followed by the start of a quote
11616 || $next_nonblank_type =~ /^[\/\?]$/
11619 $bond_str = NO_BREAK;
11623 # Breaking before a ? before a quote can cause trouble if
11624 # they are not separated by a blank.
11625 # Example: a syntax error occurs if you break before the ? here
11626 # my$logic=join$all?' && ':' || ',@regexps;
11627 # From: Professional_Perl_Programming_Code/multifind.pl
11628 if ( $next_nonblank_type eq '?' ) {
11629 $bond_str = NO_BREAK
11630 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
11633 # Breaking before a . followed by a number
11634 # can cause trouble if there is no intervening space
11635 # Example: a syntax error occurs if you break before the .2 here
11636 # $str .= pack($endian.2, ensurrogate($ord));
11637 # From: perl58/Unicode.pm
11638 elsif ( $next_nonblank_type eq '.' ) {
11639 $bond_str = NO_BREAK
11640 if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
11643 my $bond_str_2 = $bond_str;
11645 #---------------------------------------------------------------
11646 # End of hardwired rules
11647 #---------------------------------------------------------------
11649 #---------------------------------------------------------------
11650 # Bond Strength Section 3:
11651 # Apply table rules. These have priority over the above
11653 #---------------------------------------------------------------
11655 my $tabulated_bond_str;
11657 my $rtype = $next_nonblank_type;
11658 if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
11659 if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
11660 $rtype = $next_nonblank_type . $next_nonblank_token;
11663 if ( $binary_bond_strength{$ltype}{$rtype} ) {
11664 $bond_str = $binary_bond_strength{$ltype}{$rtype};
11665 $tabulated_bond_str = $bond_str;
11668 if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
11669 $bond_str = NO_BREAK;
11670 $tabulated_bond_str = $bond_str;
11672 my $bond_str_3 = $bond_str;
11674 # If the hardwired rules conflict with the tabulated bond
11675 # strength then there is an inconsistency that should be fixed
11676 FORMATTER_DEBUG_FLAG_BOND_TABLES
11677 && $tabulated_bond_str
11679 && $bond_str_1 != $bond_str_2
11680 && $bond_str_2 != $tabulated_bond_str
11683 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
11686 #-----------------------------------------------------------------
11687 # Bond Strength Section 4:
11688 # Modify strengths of certain tokens which often occur in sequence
11689 # by adding a small bias to each one in turn so that the breaks
11690 # occur from left to right.
11692 # Note that we only changing strengths by small amounts here,
11693 # and usually increasing, so we should not be altering any NO_BREAKs.
11694 # Other routines which check for NO_BREAKs will use a tolerance
11695 # of one to avoid any problem.
11696 #-----------------------------------------------------------------
11698 # The bias tables use special keys
11699 my $left_key = bias_table_key( $type, $token );
11701 bias_table_key( $next_nonblank_type, $next_nonblank_token );
11703 # add any bias set by sub scan_list at old comma break points.
11704 if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
11707 elsif ( defined( $bias{$left_key} ) ) {
11708 if ( !$want_break_before{$left_key} ) {
11709 $bias{$left_key} += $delta_bias;
11710 $bond_str += $bias{$left_key};
11715 if ( defined( $bias{$right_key} ) ) {
11716 if ( $want_break_before{$right_key} ) {
11718 # for leading '.' align all but 'short' quotes; the idea
11719 # is to not place something like "\n" on a single line.
11720 if ( $right_key eq '.' ) {
11722 $last_nonblank_type eq '.'
11725 $rOpts_short_concatenation_item_length )
11726 && ( !$is_closing_token{$token} )
11729 $bias{$right_key} += $delta_bias;
11733 $bias{$right_key} += $delta_bias;
11735 $bond_str += $bias{$right_key};
11738 my $bond_str_4 = $bond_str;
11740 #---------------------------------------------------------------
11741 # Bond Strength Section 5:
11742 # Fifth Approximation.
11743 # Take nesting depth into account by adding the nesting depth
11744 # to the bond strength.
11745 #---------------------------------------------------------------
11748 if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
11749 if ( $total_nesting_depth > 0 ) {
11750 $strength = $bond_str + $total_nesting_depth;
11753 $strength = $bond_str;
11757 $strength = NO_BREAK;
11760 #---------------------------------------------------------------
11761 # Bond Strength Section 6:
11762 # Sixth Approximation. Welds.
11763 #---------------------------------------------------------------
11765 # Do not allow a break within welds,
11766 if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
11768 # But encourage breaking after opening welded tokens
11769 elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
11773 # always break after side comment
11774 if ( $type eq '#' ) { $strength = 0 }
11776 $bond_strength_to_go[$i] = $strength;
11778 FORMATTER_DEBUG_FLAG_BOND && do {
11779 my $str = substr( $token, 0, 15 );
11780 $str .= ' ' x ( 16 - length($str) );
11782 "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
11786 } ## end sub set_bond_strengths
11789 sub pad_array_to_go {
11791 # to simplify coding in scan_list and set_bond_strengths, it helps
11792 # to create some extra blank tokens at the end of the arrays
11793 $tokens_to_go[ $max_index_to_go + 1 ] = '';
11794 $tokens_to_go[ $max_index_to_go + 2 ] = '';
11795 $types_to_go[ $max_index_to_go + 1 ] = 'b';
11796 $types_to_go[ $max_index_to_go + 2 ] = 'b';
11797 $nesting_depth_to_go[ $max_index_to_go + 1 ] =
11798 $nesting_depth_to_go[$max_index_to_go];
11801 if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
11802 if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
11804 # shouldn't happen:
11805 unless ( get_saw_brace_error() ) {
11807 "Program bug in scan_list: hit nesting error which should have been caught\n"
11809 report_definite_bug();
11813 $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
11818 elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
11819 $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
11824 { # begin scan_list
11827 $block_type, $current_depth,
11829 $i_last_nonblank_token, $last_colon_sequence_number,
11830 $last_nonblank_token, $last_nonblank_type,
11831 $last_nonblank_block_type, $last_old_breakpoint_count,
11832 $minimum_depth, $next_nonblank_block_type,
11833 $next_nonblank_token, $next_nonblank_type,
11834 $old_breakpoint_count, $starting_breakpoint_count,
11835 $starting_depth, $token,
11836 $type, $type_sequence,
11840 @breakpoint_stack, @breakpoint_undo_stack,
11841 @comma_index, @container_type,
11842 @identifier_count_stack, @index_before_arrow,
11843 @interrupted_list, @item_count_stack,
11844 @last_comma_index, @last_dot_index,
11845 @last_nonblank_type, @old_breakpoint_count_stack,
11846 @opening_structure_index_stack, @rfor_semicolon_list,
11847 @has_old_logical_breakpoints, @rand_or_list,
11851 # routine to define essential variables when we go 'up' to
11853 sub check_for_new_minimum_depth {
11855 if ( $depth < $minimum_depth ) {
11857 $minimum_depth = $depth;
11859 # these arrays need not retain values between calls
11860 $breakpoint_stack[$depth] = $starting_breakpoint_count;
11861 $container_type[$depth] = "";
11862 $identifier_count_stack[$depth] = 0;
11863 $index_before_arrow[$depth] = -1;
11864 $interrupted_list[$depth] = 1;
11865 $item_count_stack[$depth] = 0;
11866 $last_nonblank_type[$depth] = "";
11867 $opening_structure_index_stack[$depth] = -1;
11869 $breakpoint_undo_stack[$depth] = undef;
11870 $comma_index[$depth] = undef;
11871 $last_comma_index[$depth] = undef;
11872 $last_dot_index[$depth] = undef;
11873 $old_breakpoint_count_stack[$depth] = undef;
11874 $has_old_logical_breakpoints[$depth] = 0;
11875 $rand_or_list[$depth] = [];
11876 $rfor_semicolon_list[$depth] = [];
11877 $i_equals[$depth] = -1;
11879 # these arrays must retain values between calls
11880 if ( !defined( $has_broken_sublist[$depth] ) ) {
11881 $dont_align[$depth] = 0;
11882 $has_broken_sublist[$depth] = 0;
11883 $want_comma_break[$depth] = 0;
11889 # routine to decide which commas to break at within a container;
11891 # $bp_count = number of comma breakpoints set
11892 # $do_not_break_apart = a flag indicating if container need not
11894 sub set_comma_breakpoints {
11898 my $do_not_break_apart = 0;
11901 if ( $item_count_stack[$dd] ) {
11903 # handle commas not in containers...
11904 if ( $dont_align[$dd] ) {
11905 do_uncontained_comma_breaks($dd);
11908 # handle commas within containers...
11910 my $fbc = $forced_breakpoint_count;
11912 # always open comma lists not preceded by keywords,
11913 # barewords, identifiers (that is, anything that doesn't
11914 # look like a function call)
11915 my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
11917 set_comma_breakpoints_do(
11919 $opening_structure_index_stack[$dd],
11921 $item_count_stack[$dd],
11922 $identifier_count_stack[$dd],
11924 $next_nonblank_type,
11925 $container_type[$dd],
11926 $interrupted_list[$dd],
11927 \$do_not_break_apart,
11930 $bp_count = $forced_breakpoint_count - $fbc;
11931 $do_not_break_apart = 0 if $must_break_open;
11934 return ( $bp_count, $do_not_break_apart );
11937 sub do_uncontained_comma_breaks {
11939 # Handle commas not in containers...
11940 # This is a catch-all routine for commas that we
11941 # don't know what to do with because the don't fall
11942 # within containers. We will bias the bond strength
11943 # to break at commas which ended lines in the input
11944 # file. This usually works better than just trying
11945 # to put as many items on a line as possible. A
11946 # downside is that if the input file is garbage it
11947 # won't work very well. However, the user can always
11948 # prevent following the old breakpoints with the
11952 my $old_comma_break_count = 0;
11953 foreach my $ii ( @{ $comma_index[$dd] } ) {
11954 if ( $old_breakpoint_to_go[$ii] ) {
11955 $old_comma_break_count++;
11956 $bond_strength_to_go[$ii] = $bias;
11958 # reduce bias magnitude to force breaks in order
11963 # Also put a break before the first comma if
11964 # (1) there was a break there in the input, and
11965 # (2) there was exactly one old break before the first comma break
11966 # (3) OLD: there are multiple old comma breaks
11967 # (3) NEW: there are one or more old comma breaks (see return example)
11969 # For example, we will follow the user and break after
11970 # 'print' in this snippet:
11972 # "conformability (Not the same dimension)\n",
11973 # "\t", $have, " is ", text_unit($hu), "\n",
11974 # "\t", $want, " is ", text_unit($wu), "\n",
11977 # Another example, just one comma, where we will break after
11980 # $x * cos($a) - $y * sin($a),
11981 # $x * sin($a) + $y * cos($a);
11983 # Breaking a print statement:
11985 # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
11986 # ( $? & 128 ) ? " -- core dumped" : "", "\n";
11988 # But we will not force a break after the opening paren here
11989 # (causes a blinker):
11990 # $heap->{stream}->set_output_filter(
11991 # poe::filter::reference->new('myotherfreezer') ),
11994 my $i_first_comma = $comma_index[$dd]->[0];
11995 if ( $old_breakpoint_to_go[$i_first_comma] ) {
11996 my $level_comma = $levels_to_go[$i_first_comma];
11999 for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
12000 if ( $old_breakpoint_to_go[$ii] ) {
12002 last if ( $obp_count > 1 );
12004 if ( $levels_to_go[$ii] == $level_comma );
12008 # Changed rule from multiple old commas to just one here:
12009 if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
12011 # Do not to break before an opening token because
12012 # it can lead to "blinkers".
12013 my $ibreakm = $ibreak;
12014 $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
12015 if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
12017 set_forced_breakpoint($ibreak);
12024 my %is_logical_container;
12027 my @q = qw# if elsif unless while and or err not && | || ? : ! #;
12028 @is_logical_container{@q} = (1) x scalar(@q);
12031 sub set_for_semicolon_breakpoints {
12033 foreach ( @{ $rfor_semicolon_list[$dd] } ) {
12034 set_forced_breakpoint($_);
12039 sub set_logical_breakpoints {
12042 $item_count_stack[$dd] == 0
12043 && $is_logical_container{ $container_type[$dd] }
12045 || $has_old_logical_breakpoints[$dd]
12049 # Look for breaks in this order:
12052 foreach my $i ( 0 .. 3 ) {
12053 if ( $rand_or_list[$dd][$i] ) {
12054 foreach ( @{ $rand_or_list[$dd][$i] } ) {
12055 set_forced_breakpoint($_);
12058 # break at any 'if' and 'unless' too
12059 foreach ( @{ $rand_or_list[$dd][4] } ) {
12060 set_forced_breakpoint($_);
12062 $rand_or_list[$dd] = [];
12070 sub is_unbreakable_container {
12072 # never break a container of one of these types
12073 # because bad things can happen (map1.t)
12075 return $is_sort_map_grep{ $container_type[$dd] };
12080 # This routine is responsible for setting line breaks for all lists,
12081 # so that hierarchical structure can be displayed and so that list
12082 # items can be vertically aligned. The output of this routine is
12083 # stored in the array @forced_breakpoint_to_go, which is used to set
12084 # final breakpoints.
12086 $starting_depth = $nesting_depth_to_go[0];
12089 $current_depth = $starting_depth;
12091 $last_colon_sequence_number = -1;
12092 $last_nonblank_token = ';';
12093 $last_nonblank_type = ';';
12094 $last_nonblank_block_type = ' ';
12095 $last_old_breakpoint_count = 0;
12096 $minimum_depth = $current_depth + 1; # forces update in check below
12097 $old_breakpoint_count = 0;
12098 $starting_breakpoint_count = $forced_breakpoint_count;
12101 $type_sequence = '';
12103 my $total_depth_variation = 0;
12104 my $i_old_assignment_break;
12105 my $depth_last = $starting_depth;
12107 check_for_new_minimum_depth($current_depth);
12109 my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
12110 my $want_previous_breakpoint = -1;
12112 my $saw_good_breakpoint;
12113 my $i_line_end = -1;
12114 my $i_line_start = -1;
12116 # loop over all tokens in this batch
12117 while ( ++$i <= $max_index_to_go ) {
12118 if ( $type ne 'b' ) {
12119 $i_last_nonblank_token = $i - 1;
12120 $last_nonblank_type = $type;
12121 $last_nonblank_token = $token;
12122 $last_nonblank_block_type = $block_type;
12123 } ## end if ( $type ne 'b' )
12124 $type = $types_to_go[$i];
12125 $block_type = $block_type_to_go[$i];
12126 $token = $tokens_to_go[$i];
12127 $type_sequence = $type_sequence_to_go[$i];
12128 my $next_type = $types_to_go[ $i + 1 ];
12129 my $next_token = $tokens_to_go[ $i + 1 ];
12130 my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12131 $next_nonblank_type = $types_to_go[$i_next_nonblank];
12132 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12133 $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
12135 # set break if flag was set
12136 if ( $want_previous_breakpoint >= 0 ) {
12137 set_forced_breakpoint($want_previous_breakpoint);
12138 $want_previous_breakpoint = -1;
12141 $last_old_breakpoint_count = $old_breakpoint_count;
12142 if ( $old_breakpoint_to_go[$i] ) {
12144 $i_line_start = $i_next_nonblank;
12146 $old_breakpoint_count++;
12148 # Break before certain keywords if user broke there and
12149 # this is a 'safe' break point. The idea is to retain
12150 # any preferred breaks for sequential list operations,
12151 # like a schwartzian transform.
12152 if ($rOpts_break_at_old_keyword_breakpoints) {
12154 $next_nonblank_type eq 'k'
12155 && $is_keyword_returning_list{$next_nonblank_token}
12156 && ( $type =~ /^[=\)\]\}Riw]$/
12158 && $is_keyword_returning_list{$token} )
12162 # we actually have to set this break next time through
12163 # the loop because if we are at a closing token (such
12164 # as '}') which forms a one-line block, this break might
12166 $want_previous_breakpoint = $i;
12167 } ## end if ( $next_nonblank_type...)
12168 } ## end if ($rOpts_break_at_old_keyword_breakpoints)
12170 # Break before attributes if user broke there
12171 if ($rOpts_break_at_old_attribute_breakpoints) {
12172 if ( $next_nonblank_type eq 'A' ) {
12173 $want_previous_breakpoint = $i;
12177 # remember an = break as possible good break point
12178 if ( $is_assignment{$type} ) {
12179 $i_old_assignment_break = $i;
12181 elsif ( $is_assignment{$next_nonblank_type} ) {
12182 $i_old_assignment_break = $i_next_nonblank;
12184 } ## end if ( $old_breakpoint_to_go...)
12186 next if ( $type eq 'b' );
12187 $depth = $nesting_depth_to_go[ $i + 1 ];
12189 $total_depth_variation += abs( $depth - $depth_last );
12190 $depth_last = $depth;
12192 # safety check - be sure we always break after a comment
12193 # Shouldn't happen .. an error here probably means that the
12194 # nobreak flag did not get turned off correctly during
12196 if ( $type eq '#' ) {
12197 if ( $i != $max_index_to_go ) {
12199 "Non-fatal program bug: backup logic needed to break after a comment\n"
12201 report_definite_bug();
12202 $nobreak_to_go[$i] = 0;
12203 set_forced_breakpoint($i);
12204 } ## end if ( $i != $max_index_to_go)
12205 } ## end if ( $type eq '#' )
12207 # Force breakpoints at certain tokens in long lines.
12208 # Note that such breakpoints will be undone later if these tokens
12209 # are fully contained within parens on a line.
12212 # break before a keyword within a line
12216 # if one of these keywords:
12217 && $token =~ /^(if|unless|while|until|for)$/
12219 # but do not break at something like '1 while'
12220 && ( $last_nonblank_type ne 'n' || $i > 2 )
12222 # and let keywords follow a closing 'do' brace
12223 && $last_nonblank_block_type ne 'do'
12228 # or container is broken (by side-comment, etc)
12229 || ( $next_nonblank_token eq '('
12230 && $mate_index_to_go[$i_next_nonblank] < $i )
12234 set_forced_breakpoint( $i - 1 );
12235 } ## end if ( $type eq 'k' && $i...)
12237 # remember locations of '||' and '&&' for possible breaks if we
12238 # decide this is a long logical expression.
12239 if ( $type eq '||' ) {
12240 push @{ $rand_or_list[$depth][2] }, $i;
12241 ++$has_old_logical_breakpoints[$depth]
12242 if ( ( $i == $i_line_start || $i == $i_line_end )
12243 && $rOpts_break_at_old_logical_breakpoints );
12244 } ## end if ( $type eq '||' )
12245 elsif ( $type eq '&&' ) {
12246 push @{ $rand_or_list[$depth][3] }, $i;
12247 ++$has_old_logical_breakpoints[$depth]
12248 if ( ( $i == $i_line_start || $i == $i_line_end )
12249 && $rOpts_break_at_old_logical_breakpoints );
12250 } ## end elsif ( $type eq '&&' )
12251 elsif ( $type eq 'f' ) {
12252 push @{ $rfor_semicolon_list[$depth] }, $i;
12254 elsif ( $type eq 'k' ) {
12255 if ( $token eq 'and' ) {
12256 push @{ $rand_or_list[$depth][1] }, $i;
12257 ++$has_old_logical_breakpoints[$depth]
12258 if ( ( $i == $i_line_start || $i == $i_line_end )
12259 && $rOpts_break_at_old_logical_breakpoints );
12260 } ## end if ( $token eq 'and' )
12262 # break immediately at 'or's which are probably not in a logical
12263 # block -- but we will break in logical breaks below so that
12264 # they do not add to the forced_breakpoint_count
12265 elsif ( $token eq 'or' ) {
12266 push @{ $rand_or_list[$depth][0] }, $i;
12267 ++$has_old_logical_breakpoints[$depth]
12268 if ( ( $i == $i_line_start || $i == $i_line_end )
12269 && $rOpts_break_at_old_logical_breakpoints );
12270 if ( $is_logical_container{ $container_type[$depth] } ) {
12273 if ($is_long_line) { set_forced_breakpoint($i) }
12274 elsif ( ( $i == $i_line_start || $i == $i_line_end )
12275 && $rOpts_break_at_old_logical_breakpoints )
12277 $saw_good_breakpoint = 1;
12279 } ## end else [ if ( $is_logical_container...)]
12280 } ## end elsif ( $token eq 'or' )
12281 elsif ( $token eq 'if' || $token eq 'unless' ) {
12282 push @{ $rand_or_list[$depth][4] }, $i;
12283 if ( ( $i == $i_line_start || $i == $i_line_end )
12284 && $rOpts_break_at_old_logical_breakpoints )
12286 set_forced_breakpoint($i);
12288 } ## end elsif ( $token eq 'if' ||...)
12289 } ## end elsif ( $type eq 'k' )
12290 elsif ( $is_assignment{$type} ) {
12291 $i_equals[$depth] = $i;
12294 if ($type_sequence) {
12296 # handle any postponed closing breakpoints
12297 if ( $token =~ /^[\)\]\}\:]$/ ) {
12298 if ( $type eq ':' ) {
12299 $last_colon_sequence_number = $type_sequence;
12301 # retain break at a ':' line break
12302 if ( ( $i == $i_line_start || $i == $i_line_end )
12303 && $rOpts_break_at_old_ternary_breakpoints )
12306 set_forced_breakpoint($i);
12308 # break at previous '='
12309 if ( $i_equals[$depth] > 0 ) {
12310 set_forced_breakpoint( $i_equals[$depth] );
12311 $i_equals[$depth] = -1;
12313 } ## end if ( ( $i == $i_line_start...))
12314 } ## end if ( $type eq ':' )
12315 if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
12316 my $inc = ( $type eq ':' ) ? 0 : 1;
12317 set_forced_breakpoint( $i - $inc );
12318 delete $postponed_breakpoint{$type_sequence};
12320 } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
12322 # set breaks at ?/: if they will get separated (and are
12323 # not a ?/: chain), or if the '?' is at the end of the
12325 elsif ( $token eq '?' ) {
12326 my $i_colon = $mate_index_to_go[$i];
12328 $i_colon <= 0 # the ':' is not in this batch
12329 || $i == 0 # this '?' is the first token of the line
12331 $max_index_to_go # or this '?' is the last token
12335 # don't break at a '?' if preceded by ':' on
12336 # this line of previous ?/: pair on this line.
12337 # This is an attempt to preserve a chain of ?/:
12338 # expressions (elsif2.t). And don't break if
12339 # this has a side comment.
12340 set_forced_breakpoint($i)
12342 $type_sequence == (
12343 $last_colon_sequence_number +
12344 TYPE_SEQUENCE_INCREMENT
12346 || $tokens_to_go[$max_index_to_go] eq '#'
12348 set_closing_breakpoint($i);
12349 } ## end if ( $i_colon <= 0 ||...)
12350 } ## end elsif ( $token eq '?' )
12351 } ## end if ($type_sequence)
12353 #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
12355 #------------------------------------------------------------
12356 # Handle Increasing Depth..
12358 # prepare for a new list when depth increases
12359 # token $i is a '(','{', or '['
12360 #------------------------------------------------------------
12361 if ( $depth > $current_depth ) {
12363 $breakpoint_stack[$depth] = $forced_breakpoint_count;
12364 $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
12365 $has_broken_sublist[$depth] = 0;
12366 $identifier_count_stack[$depth] = 0;
12367 $index_before_arrow[$depth] = -1;
12368 $interrupted_list[$depth] = 0;
12369 $item_count_stack[$depth] = 0;
12370 $last_comma_index[$depth] = undef;
12371 $last_dot_index[$depth] = undef;
12372 $last_nonblank_type[$depth] = $last_nonblank_type;
12373 $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
12374 $opening_structure_index_stack[$depth] = $i;
12375 $rand_or_list[$depth] = [];
12376 $rfor_semicolon_list[$depth] = [];
12377 $i_equals[$depth] = -1;
12378 $want_comma_break[$depth] = 0;
12379 $container_type[$depth] =
12380 ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
12381 ? $last_nonblank_token
12383 $has_old_logical_breakpoints[$depth] = 0;
12385 # if line ends here then signal closing token to break
12386 if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
12388 set_closing_breakpoint($i);
12391 # Not all lists of values should be vertically aligned..
12392 $dont_align[$depth] =
12394 # code BLOCKS are handled at a higher level
12395 ( $block_type ne "" )
12397 # certain paren lists
12398 || ( $type eq '(' ) && (
12400 # it does not usually look good to align a list of
12401 # identifiers in a parameter list, as in:
12402 # my($var1, $var2, ...)
12403 # (This test should probably be refined, for now I'm just
12404 # testing for any keyword)
12405 ( $last_nonblank_type eq 'k' )
12407 # a trailing '(' usually indicates a non-list
12408 || ( $next_nonblank_type eq '(' )
12411 # patch to outdent opening brace of long if/for/..
12412 # statements (like this one). See similar coding in
12413 # set_continuation breaks. We have also catch it here for
12414 # short line fragments which otherwise will not go through
12415 # set_continuation_breaks.
12419 # if we have the ')' but not its '(' in this batch..
12420 && ( $last_nonblank_token eq ')' )
12421 && $mate_index_to_go[$i_last_nonblank_token] < 0
12423 # and user wants brace to left
12424 && !$rOpts->{'opening-brace-always-on-right'}
12426 && ( $type eq '{' ) # should be true
12427 && ( $token eq '{' ) # should be true
12430 set_forced_breakpoint( $i - 1 );
12431 } ## end if ( $block_type && ( ...))
12432 } ## end if ( $depth > $current_depth)
12434 #------------------------------------------------------------
12435 # Handle Decreasing Depth..
12437 # finish off any old list when depth decreases
12438 # token $i is a ')','}', or ']'
12439 #------------------------------------------------------------
12440 elsif ( $depth < $current_depth ) {
12442 check_for_new_minimum_depth($depth);
12444 # force all outer logical containers to break after we see on
12446 $has_old_logical_breakpoints[$depth] ||=
12447 $has_old_logical_breakpoints[$current_depth];
12449 # Patch to break between ') {' if the paren list is broken.
12450 # There is similar logic in set_continuation_breaks for
12451 # non-broken lists.
12453 && $next_nonblank_block_type
12454 && $interrupted_list[$current_depth]
12455 && $next_nonblank_type eq '{'
12456 && !$rOpts->{'opening-brace-always-on-right'} )
12458 set_forced_breakpoint($i);
12459 } ## end if ( $token eq ')' && ...
12461 #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
12463 # set breaks at commas if necessary
12464 my ( $bp_count, $do_not_break_apart ) =
12465 set_comma_breakpoints($current_depth);
12467 my $i_opening = $opening_structure_index_stack[$current_depth];
12468 my $saw_opening_structure = ( $i_opening >= 0 );
12470 # this term is long if we had to break at interior commas..
12471 my $is_long_term = $bp_count > 0;
12473 # If this is a short container with one or more comma arrows,
12474 # then we will mark it as a long term to open it if requested.
12475 # $rOpts_comma_arrow_breakpoints =
12476 # 0 - open only if comma precedes closing brace
12477 # 1 - stable: except for one line blocks
12478 # 2 - try to form 1 line blocks
12480 # 4 - always open up if vt=0
12481 # 5 - stable: even for one line blocks if vt=0
12482 if ( !$is_long_term
12483 && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
12484 && $index_before_arrow[ $depth + 1 ] > 0
12485 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
12488 $is_long_term = $rOpts_comma_arrow_breakpoints == 4
12489 || ( $rOpts_comma_arrow_breakpoints == 0
12490 && $last_nonblank_token eq ',' )
12491 || ( $rOpts_comma_arrow_breakpoints == 5
12492 && $old_breakpoint_to_go[$i_opening] );
12493 } ## end if ( !$is_long_term &&...)
12495 # mark term as long if the length between opening and closing
12496 # parens exceeds allowed line length
12497 if ( !$is_long_term && $saw_opening_structure ) {
12498 my $i_opening_minus = find_token_starting_list($i_opening);
12500 # Note: we have to allow for one extra space after a
12501 # closing token so that we do not strand a comma or
12502 # semicolon, hence the '>=' here (oneline.t)
12503 # Note: we ignore left weld lengths here for best results
12505 excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
12506 } ## end if ( !$is_long_term &&...)
12508 # We've set breaks after all comma-arrows. Now we have to
12509 # undo them if this can be a one-line block
12510 # (the only breakpoints set will be due to comma-arrows)
12513 # user doesn't require breaking after all comma-arrows
12514 ( $rOpts_comma_arrow_breakpoints != 0 )
12515 && ( $rOpts_comma_arrow_breakpoints != 4 )
12517 # and if the opening structure is in this batch
12518 && $saw_opening_structure
12520 # and either on the same old line
12522 $old_breakpoint_count_stack[$current_depth] ==
12523 $last_old_breakpoint_count
12525 # or user wants to form long blocks with arrows
12526 || $rOpts_comma_arrow_breakpoints == 2
12529 # and we made some breakpoints between the opening and closing
12530 && ( $breakpoint_undo_stack[$current_depth] <
12531 $forced_breakpoint_undo_count )
12533 # and this block is short enough to fit on one line
12534 # Note: use < because need 1 more space for possible comma
12539 undo_forced_breakpoint_stack(
12540 $breakpoint_undo_stack[$current_depth] );
12541 } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
12543 # now see if we have any comma breakpoints left
12544 my $has_comma_breakpoints =
12545 ( $breakpoint_stack[$current_depth] !=
12546 $forced_breakpoint_count );
12548 # update broken-sublist flag of the outer container
12549 $has_broken_sublist[$depth] =
12550 $has_broken_sublist[$depth]
12551 || $has_broken_sublist[$current_depth]
12553 || $has_comma_breakpoints;
12555 # Having come to the closing ')', '}', or ']', now we have to decide if we
12556 # should 'open up' the structure by placing breaks at the opening and
12557 # closing containers. This is a tricky decision. Here are some of the
12558 # basic considerations:
12560 # -If this is a BLOCK container, then any breakpoints will have already
12561 # been set (and according to user preferences), so we need do nothing here.
12563 # -If we have a comma-separated list for which we can align the list items,
12564 # then we need to do so because otherwise the vertical aligner cannot
12565 # currently do the alignment.
12567 # -If this container does itself contain a container which has been broken
12568 # open, then it should be broken open to properly show the structure.
12570 # -If there is nothing to align, and no other reason to break apart,
12571 # then do not do it.
12573 # We will not break open the parens of a long but 'simple' logical expression.
12576 # This is an example of a simple logical expression and its formatting:
12578 # if ( $bigwasteofspace1 && $bigwasteofspace2
12579 # || $bigwasteofspace3 && $bigwasteofspace4 )
12581 # Most people would prefer this than the 'spacey' version:
12584 # $bigwasteofspace1 && $bigwasteofspace2
12585 # || $bigwasteofspace3 && $bigwasteofspace4
12588 # To illustrate the rules for breaking logical expressions, consider:
12592 # and ( exists $ids_excl_uc{$id_uc}
12593 # or grep $id_uc =~ /$_/, @ids_excl_uc ))
12595 # This is on the verge of being difficult to read. The current default is to
12596 # open it up like this:
12601 # and ( exists $ids_excl_uc{$id_uc}
12602 # or grep $id_uc =~ /$_/, @ids_excl_uc )
12605 # This is a compromise which tries to avoid being too dense and to spacey.
12606 # A more spaced version would be:
12612 # exists $ids_excl_uc{$id_uc}
12613 # or grep $id_uc =~ /$_/, @ids_excl_uc
12617 # Some people might prefer the spacey version -- an option could be added. The
12618 # innermost expression contains a long block '( exists $ids_... ')'.
12620 # Here is how the logic goes: We will force a break at the 'or' that the
12621 # innermost expression contains, but we will not break apart its opening and
12622 # closing containers because (1) it contains no multi-line sub-containers itself,
12623 # and (2) there is no alignment to be gained by breaking it open like this
12626 # exists $ids_excl_uc{$id_uc}
12627 # or grep $id_uc =~ /$_/, @ids_excl_uc
12630 # (although this looks perfectly ok and might be good for long expressions). The
12631 # outer 'if' container, though, contains a broken sub-container, so it will be
12632 # broken open to avoid too much density. Also, since it contains no 'or's, there
12633 # will be a forced break at its 'and'.
12635 # set some flags telling something about this container..
12636 my $is_simple_logical_expression = 0;
12637 if ( $item_count_stack[$current_depth] == 0
12638 && $saw_opening_structure
12639 && $tokens_to_go[$i_opening] eq '('
12640 && $is_logical_container{ $container_type[$current_depth] }
12644 # This seems to be a simple logical expression with
12645 # no existing breakpoints. Set a flag to prevent
12647 if ( !$has_comma_breakpoints ) {
12648 $is_simple_logical_expression = 1;
12651 # This seems to be a simple logical expression with
12652 # breakpoints (broken sublists, for example). Break
12653 # at all 'or's and '||'s.
12655 set_logical_breakpoints($current_depth);
12657 } ## end if ( $item_count_stack...)
12660 && @{ $rfor_semicolon_list[$current_depth] } )
12662 set_for_semicolon_breakpoints($current_depth);
12664 # open up a long 'for' or 'foreach' container to allow
12665 # leading term alignment unless -lp is used.
12666 $has_comma_breakpoints = 1
12667 unless $rOpts_line_up_parentheses;
12668 } ## end if ( $is_long_term && ...)
12672 # breaks for code BLOCKS are handled at a higher level
12675 # we do not need to break at the top level of an 'if'
12677 && !$is_simple_logical_expression
12679 ## modification to keep ': (' containers vertically tight;
12680 ## but probably better to let user set -vt=1 to avoid
12681 ## inconsistency with other paren types
12682 ## && ($container_type[$current_depth] ne ':')
12684 # otherwise, we require one of these reasons for breaking:
12687 # - this term has forced line breaks
12688 $has_comma_breakpoints
12690 # - the opening container is separated from this batch
12691 # for some reason (comment, blank line, code block)
12692 # - this is a non-paren container spanning multiple lines
12693 || !$saw_opening_structure
12695 # - this is a long block contained in another breakable
12698 && $container_environment_to_go[$i_opening] ne
12704 # For -lp option, we must put a breakpoint before
12705 # the token which has been identified as starting
12706 # this indentation level. This is necessary for
12707 # proper alignment.
12708 if ( $rOpts_line_up_parentheses && $saw_opening_structure )
12710 my $item = $leading_spaces_to_go[ $i_opening + 1 ];
12711 if ( $i_opening + 1 < $max_index_to_go
12712 && $types_to_go[ $i_opening + 1 ] eq 'b' )
12714 $item = $leading_spaces_to_go[ $i_opening + 2 ];
12716 if ( defined($item) ) {
12717 my $i_start_2 = $item->get_starting_index();
12719 defined($i_start_2)
12721 # we are breaking after an opening brace, paren,
12722 # so don't break before it too
12723 && $i_start_2 ne $i_opening
12727 # Only break for breakpoints at the same
12728 # indentation level as the opening paren
12729 my $test1 = $nesting_depth_to_go[$i_opening];
12730 my $test2 = $nesting_depth_to_go[$i_start_2];
12731 if ( $test2 == $test1 ) {
12732 set_forced_breakpoint( $i_start_2 - 1 );
12734 } ## end if ( defined($i_start_2...))
12735 } ## end if ( defined($item) )
12736 } ## end if ( $rOpts_line_up_parentheses...)
12738 # break after opening structure.
12739 # note: break before closing structure will be automatic
12740 if ( $minimum_depth <= $current_depth ) {
12742 set_forced_breakpoint($i_opening)
12743 unless ( $do_not_break_apart
12744 || is_unbreakable_container($current_depth) );
12746 # break at ',' of lower depth level before opening token
12747 if ( $last_comma_index[$depth] ) {
12748 set_forced_breakpoint( $last_comma_index[$depth] );
12751 # break at '.' of lower depth level before opening token
12752 if ( $last_dot_index[$depth] ) {
12753 set_forced_breakpoint( $last_dot_index[$depth] );
12756 # break before opening structure if preceded by another
12757 # closing structure and a comma. This is normally
12758 # done by the previous closing brace, but not
12759 # if it was a one-line block.
12760 if ( $i_opening > 2 ) {
12762 ( $types_to_go[ $i_opening - 1 ] eq 'b' )
12766 if ( $types_to_go[$i_prev] eq ','
12767 && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
12769 set_forced_breakpoint($i_prev);
12772 # also break before something like ':(' or '?('
12775 $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
12777 my $token_prev = $tokens_to_go[$i_prev];
12778 if ( $want_break_before{$token_prev} ) {
12779 set_forced_breakpoint($i_prev);
12781 } ## end elsif ( $types_to_go[$i_prev...])
12782 } ## end if ( $i_opening > 2 )
12783 } ## end if ( $minimum_depth <=...)
12785 # break after comma following closing structure
12786 if ( $next_type eq ',' ) {
12787 set_forced_breakpoint( $i + 1 );
12790 # break before an '=' following closing structure
12792 $is_assignment{$next_nonblank_type}
12793 && ( $breakpoint_stack[$current_depth] !=
12794 $forced_breakpoint_count )
12797 set_forced_breakpoint($i);
12798 } ## end if ( $is_assignment{$next_nonblank_type...})
12800 # break at any comma before the opening structure Added
12801 # for -lp, but seems to be good in general. It isn't
12802 # obvious how far back to look; the '5' below seems to
12803 # work well and will catch the comma in something like
12804 # push @list, myfunc( $param, $param, ..
12806 my $icomma = $last_comma_index[$depth];
12807 if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
12808 unless ( $forced_breakpoint_to_go[$icomma] ) {
12809 set_forced_breakpoint($icomma);
12812 } # end logic to open up a container
12814 # Break open a logical container open if it was already open
12815 elsif ($is_simple_logical_expression
12816 && $has_old_logical_breakpoints[$current_depth] )
12818 set_logical_breakpoints($current_depth);
12821 # Handle long container which does not get opened up
12822 elsif ($is_long_term) {
12824 # must set fake breakpoint to alert outer containers that
12826 set_fake_breakpoint();
12827 } ## end elsif ($is_long_term)
12829 } ## end elsif ( $depth < $current_depth)
12831 #------------------------------------------------------------
12832 # Handle this token
12833 #------------------------------------------------------------
12835 $current_depth = $depth;
12837 # handle comma-arrow
12838 if ( $type eq '=>' ) {
12839 next if ( $last_nonblank_type eq '=>' );
12840 next if $rOpts_break_at_old_comma_breakpoints;
12841 next if $rOpts_comma_arrow_breakpoints == 3;
12842 $want_comma_break[$depth] = 1;
12843 $index_before_arrow[$depth] = $i_last_nonblank_token;
12845 } ## end if ( $type eq '=>' )
12847 elsif ( $type eq '.' ) {
12848 $last_dot_index[$depth] = $i;
12851 # Turn off alignment if we are sure that this is not a list
12852 # environment. To be safe, we will do this if we see certain
12853 # non-list tokens, such as ';', and also the environment is
12854 # not a list. Note that '=' could be in any of the = operators
12855 # (lextest.t). We can't just use the reported environment
12856 # because it can be incorrect in some cases.
12857 elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
12858 && $container_environment_to_go[$i] ne 'LIST' )
12860 $dont_align[$depth] = 1;
12861 $want_comma_break[$depth] = 0;
12862 $index_before_arrow[$depth] = -1;
12863 } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
12865 # now just handle any commas
12866 next unless ( $type eq ',' );
12868 $last_dot_index[$depth] = undef;
12869 $last_comma_index[$depth] = $i;
12871 # break here if this comma follows a '=>'
12872 # but not if there is a side comment after the comma
12873 if ( $want_comma_break[$depth] ) {
12875 if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
12876 if ($rOpts_comma_arrow_breakpoints) {
12877 $want_comma_break[$depth] = 0;
12878 ##$index_before_arrow[$depth] = -1;
12883 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
12885 # break before the previous token if it looks safe
12886 # Example of something that we will not try to break before:
12887 # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
12888 # Also we don't want to break at a binary operator (like +):
12892 # $y - $R, -fill => 'black',
12894 my $ibreak = $index_before_arrow[$depth] - 1;
12896 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
12898 if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
12899 if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
12900 if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
12902 # don't break pointer calls, such as the following:
12903 # File::Spec->curdir => 1,
12904 # (This is tokenized as adjacent 'w' tokens)
12905 ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
12907 # And don't break before a comma, as in the following:
12908 # ( LONGER_THAN,=> 1,
12909 # EIGHTY_CHARACTERS,=> 2,
12910 # CAUSES_FORMATTING,=> 3,
12913 # This example is for -tso but should be general rule
12914 if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
12915 && $tokens_to_go[ $ibreak + 1 ] ne ',' )
12917 set_forced_breakpoint($ibreak);
12919 } ## end if ( $types_to_go[$ibreak...])
12920 } ## end if ( $ibreak > 0 && $tokens_to_go...)
12922 $want_comma_break[$depth] = 0;
12923 $index_before_arrow[$depth] = -1;
12925 # handle list which mixes '=>'s and ','s:
12926 # treat any list items so far as an interrupted list
12927 $interrupted_list[$depth] = 1;
12929 } ## end if ( $want_comma_break...)
12931 # break after all commas above starting depth
12932 if ( $depth < $starting_depth && !$dont_align[$depth] ) {
12933 set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
12937 # add this comma to the list..
12938 my $item_count = $item_count_stack[$depth];
12939 if ( $item_count == 0 ) {
12941 # but do not form a list with no opening structure
12944 # open INFILE_COPY, ">$input_file_copy"
12945 # or die ("very long message");
12947 if ( ( $opening_structure_index_stack[$depth] < 0 )
12948 && $container_environment_to_go[$i] eq 'BLOCK' )
12950 $dont_align[$depth] = 1;
12952 } ## end if ( $item_count == 0 )
12954 $comma_index[$depth][$item_count] = $i;
12955 ++$item_count_stack[$depth];
12956 if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
12957 $identifier_count_stack[$depth]++;
12959 } ## end while ( ++$i <= $max_index_to_go)
12961 #-------------------------------------------
12962 # end of loop over all tokens in this batch
12963 #-------------------------------------------
12965 # set breaks for any unfinished lists ..
12966 for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
12968 $interrupted_list[$dd] = 1;
12969 $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
12970 set_comma_breakpoints($dd);
12971 set_logical_breakpoints($dd)
12972 if ( $has_old_logical_breakpoints[$dd] );
12973 set_for_semicolon_breakpoints($dd);
12975 # break open container...
12976 my $i_opening = $opening_structure_index_stack[$dd];
12977 set_forced_breakpoint($i_opening)
12979 is_unbreakable_container($dd)
12981 # Avoid a break which would place an isolated ' or "
12984 && $i_opening >= $max_index_to_go - 2
12985 && $token =~ /^['"]$/ )
12987 } ## end for ( my $dd = $current_depth...)
12989 # Return a flag indicating if the input file had some good breakpoints.
12990 # This flag will be used to force a break in a line shorter than the
12991 # allowed line length.
12992 if ( $has_old_logical_breakpoints[$current_depth] ) {
12993 $saw_good_breakpoint = 1;
12996 # A complex line with one break at an = has a good breakpoint.
12997 # This is not complex ($total_depth_variation=0):
13001 # This is complex ($total_depth_variation=6):
13003 # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
13004 elsif ($i_old_assignment_break
13005 && $total_depth_variation > 4
13006 && $old_breakpoint_count == 1 )
13008 $saw_good_breakpoint = 1;
13009 } ## end elsif ( $i_old_assignment_break...)
13011 return $saw_good_breakpoint;
13012 } ## end sub scan_list
13015 sub find_token_starting_list {
13017 # When testing to see if a block will fit on one line, some
13018 # previous token(s) may also need to be on the line; particularly
13019 # if this is a sub call. So we will look back at least one
13020 # token. NOTE: This isn't perfect, but not critical, because
13021 # if we mis-identify a block, it will be wrapped and therefore
13022 # fixed the next time it is formatted.
13023 my $i_opening_paren = shift;
13024 my $i_opening_minus = $i_opening_paren;
13025 my $im1 = $i_opening_paren - 1;
13026 my $im2 = $i_opening_paren - 2;
13027 my $im3 = $i_opening_paren - 3;
13028 my $typem1 = $types_to_go[$im1];
13029 my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
13030 if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
13031 $i_opening_minus = $i_opening_paren;
13033 elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
13034 $i_opening_minus = $im1 if $im1 >= 0;
13036 # walk back to improve length estimate
13037 for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
13038 last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
13039 $i_opening_minus = $j;
13041 if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
13043 elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
13044 elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
13045 $i_opening_minus = $im2;
13047 return $i_opening_minus;
13050 { # begin set_comma_breakpoints_do
13052 my %is_keyword_with_special_leading_term;
13056 # These keywords have prototypes which allow a special leading item
13057 # followed by a list
13059 qw(formline grep kill map printf sprintf push chmod join pack unshift);
13060 @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
13063 sub set_comma_breakpoints_do {
13065 # Given a list with some commas, set breakpoints at some of the
13066 # commas, if necessary, to make it easy to read. This list is
13069 $depth, $i_opening_paren, $i_closing_paren,
13070 $item_count, $identifier_count, $rcomma_index,
13071 $next_nonblank_type, $list_type, $interrupted,
13072 $rdo_not_break_apart, $must_break_open,
13075 # nothing to do if no commas seen
13076 return if ( $item_count < 1 );
13077 my $i_first_comma = $rcomma_index->[0];
13078 my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
13079 my $i_last_comma = $i_true_last_comma;
13080 if ( $i_last_comma >= $max_index_to_go ) {
13081 $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
13082 return if ( $item_count < 1 );
13085 #---------------------------------------------------------------
13086 # find lengths of all items in the list to calculate page layout
13087 #---------------------------------------------------------------
13088 my $comma_count = $item_count;
13094 my @max_length = ( 0, 0 );
13095 my $first_term_length;
13096 my $i = $i_opening_paren;
13099 foreach my $j ( 0 .. $comma_count - 1 ) {
13100 $is_odd = 1 - $is_odd;
13101 $i_prev_plus = $i + 1;
13102 $i = $rcomma_index->[$j];
13105 ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
13107 ( $types_to_go[$i_prev_plus] eq 'b' )
13110 push @i_term_begin, $i_term_begin;
13111 push @i_term_end, $i_term_end;
13112 push @i_term_comma, $i;
13114 # note: currently adding 2 to all lengths (for comma and space)
13116 2 + token_sequence_length( $i_term_begin, $i_term_end );
13117 push @item_lengths, $length;
13120 $first_term_length = $length;
13124 if ( $length > $max_length[$is_odd] ) {
13125 $max_length[$is_odd] = $length;
13130 # now we have to make a distinction between the comma count and item
13131 # count, because the item count will be one greater than the comma
13132 # count if the last item is not terminated with a comma
13134 ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
13135 ? $i_last_comma + 1
13138 ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
13139 ? $i_closing_paren - 2
13140 : $i_closing_paren - 1;
13141 my $i_effective_last_comma = $i_last_comma;
13143 my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
13145 if ( $last_item_length > 0 ) {
13147 # add 2 to length because other lengths include a comma and a blank
13148 $last_item_length += 2;
13149 push @item_lengths, $last_item_length;
13150 push @i_term_begin, $i_b + 1;
13151 push @i_term_end, $i_e;
13152 push @i_term_comma, undef;
13154 my $i_odd = $item_count % 2;
13156 if ( $last_item_length > $max_length[$i_odd] ) {
13157 $max_length[$i_odd] = $last_item_length;
13161 $i_effective_last_comma = $i_e + 1;
13163 if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
13164 $identifier_count++;
13168 #---------------------------------------------------------------
13169 # End of length calculations
13170 #---------------------------------------------------------------
13172 #---------------------------------------------------------------
13173 # Compound List Rule 1:
13174 # Break at (almost) every comma for a list containing a broken
13175 # sublist. This has higher priority than the Interrupted List
13177 #---------------------------------------------------------------
13178 if ( $has_broken_sublist[$depth] ) {
13180 # Break at every comma except for a comma between two
13181 # simple, small terms. This prevents long vertical
13182 # columns of, say, just 0's.
13183 my $small_length = 10; # 2 + actual maximum length wanted
13185 # We'll insert a break in long runs of small terms to
13186 # allow alignment in uniform tables.
13187 my $skipped_count = 0;
13188 my $columns = table_columns_available($i_first_comma);
13189 my $fields = int( $columns / $small_length );
13190 if ( $rOpts_maximum_fields_per_table
13191 && $fields > $rOpts_maximum_fields_per_table )
13193 $fields = $rOpts_maximum_fields_per_table;
13195 my $max_skipped_count = $fields - 1;
13197 my $is_simple_last_term = 0;
13198 my $is_simple_next_term = 0;
13199 foreach my $j ( 0 .. $item_count ) {
13200 $is_simple_last_term = $is_simple_next_term;
13201 $is_simple_next_term = 0;
13202 if ( $j < $item_count
13203 && $i_term_end[$j] == $i_term_begin[$j]
13204 && $item_lengths[$j] <= $small_length )
13206 $is_simple_next_term = 1;
13209 if ( $is_simple_last_term
13210 && $is_simple_next_term
13211 && $skipped_count < $max_skipped_count )
13216 $skipped_count = 0;
13217 my $i = $i_term_comma[ $j - 1 ];
13218 last unless defined $i;
13219 set_forced_breakpoint($i);
13223 # always break at the last comma if this list is
13224 # interrupted; we wouldn't want to leave a terminal '{', for
13226 if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
13230 #my ( $a, $b, $c ) = caller();
13231 #print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
13232 #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
13233 #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
13235 #---------------------------------------------------------------
13236 # Interrupted List Rule:
13237 # A list is forced to use old breakpoints if it was interrupted
13238 # by side comments or blank lines, or requested by user.
13239 #---------------------------------------------------------------
13240 if ( $rOpts_break_at_old_comma_breakpoints
13242 || $i_opening_paren < 0 )
13244 copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
13248 #---------------------------------------------------------------
13249 # Looks like a list of items. We have to look at it and size it up.
13250 #---------------------------------------------------------------
13252 my $opening_token = $tokens_to_go[$i_opening_paren];
13253 my $opening_environment =
13254 $container_environment_to_go[$i_opening_paren];
13256 #-------------------------------------------------------------------
13257 # Return if this will fit on one line
13258 #-------------------------------------------------------------------
13260 my $i_opening_minus = find_token_starting_list($i_opening_paren);
13262 unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
13264 #-------------------------------------------------------------------
13265 # Now we know that this block spans multiple lines; we have to set
13266 # at least one breakpoint -- real or fake -- as a signal to break
13267 # open any outer containers.
13268 #-------------------------------------------------------------------
13269 set_fake_breakpoint();
13271 # be sure we do not extend beyond the current list length
13272 if ( $i_effective_last_comma >= $max_index_to_go ) {
13273 $i_effective_last_comma = $max_index_to_go - 1;
13276 # Set a flag indicating if we need to break open to keep -lp
13277 # items aligned. This is necessary if any of the list terms
13278 # exceeds the available space after the '('.
13279 my $need_lp_break_open = $must_break_open;
13280 if ( $rOpts_line_up_parentheses && !$must_break_open ) {
13281 my $columns_if_unbroken =
13282 maximum_line_length($i_opening_minus) -
13283 total_line_length( $i_opening_minus, $i_opening_paren );
13284 $need_lp_break_open =
13285 ( $max_length[0] > $columns_if_unbroken )
13286 || ( $max_length[1] > $columns_if_unbroken )
13287 || ( $first_term_length > $columns_if_unbroken );
13290 # Specify if the list must have an even number of fields or not.
13291 # It is generally safest to assume an even number, because the
13292 # list items might be a hash list. But if we can be sure that
13293 # it is not a hash, then we can allow an odd number for more
13295 my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
13297 if ( $identifier_count >= $item_count - 1
13298 || $is_assignment{$next_nonblank_type}
13299 || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
13305 # do we have a long first term which should be
13306 # left on a line by itself?
13307 my $use_separate_first_term = (
13308 $odd_or_even == 1 # only if we can use 1 field/line
13309 && $item_count > 3 # need several items
13310 && $first_term_length >
13311 2 * $max_length[0] - 2 # need long first term
13312 && $first_term_length >
13313 2 * $max_length[1] - 2 # need long first term
13316 # or do we know from the type of list that the first term should
13318 if ( !$use_separate_first_term ) {
13319 if ( $is_keyword_with_special_leading_term{$list_type} ) {
13320 $use_separate_first_term = 1;
13322 # should the container be broken open?
13323 if ( $item_count < 3 ) {
13324 if ( $i_first_comma - $i_opening_paren < 4 ) {
13325 ${$rdo_not_break_apart} = 1;
13328 elsif ($first_term_length < 20
13329 && $i_first_comma - $i_opening_paren < 4 )
13331 my $columns = table_columns_available($i_first_comma);
13332 if ( $first_term_length < $columns ) {
13333 ${$rdo_not_break_apart} = 1;
13340 if ($use_separate_first_term) {
13342 # ..set a break and update starting values
13343 $use_separate_first_term = 1;
13344 set_forced_breakpoint($i_first_comma);
13345 $i_opening_paren = $i_first_comma;
13346 $i_first_comma = $rcomma_index->[1];
13348 return if $comma_count == 1;
13349 shift @item_lengths;
13350 shift @i_term_begin;
13352 shift @i_term_comma;
13355 # if not, update the metrics to include the first term
13357 if ( $first_term_length > $max_length[0] ) {
13358 $max_length[0] = $first_term_length;
13362 # Field width parameters
13363 my $pair_width = ( $max_length[0] + $max_length[1] );
13365 ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
13367 # Number of free columns across the page width for laying out tables
13368 my $columns = table_columns_available($i_first_comma);
13370 # Estimated maximum number of fields which fit this space
13371 # This will be our first guess
13372 my $number_of_fields_max =
13373 maximum_number_of_fields( $columns, $odd_or_even, $max_width,
13375 my $number_of_fields = $number_of_fields_max;
13377 # Find the best-looking number of fields
13378 # and make this our second guess if possible
13379 my ( $number_of_fields_best, $ri_ragged_break_list,
13380 $new_identifier_count )
13381 = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
13384 if ( $number_of_fields_best != 0
13385 && $number_of_fields_best < $number_of_fields_max )
13387 $number_of_fields = $number_of_fields_best;
13390 # ----------------------------------------------------------------------
13391 # If we are crowded and the -lp option is being used, try to
13392 # undo some indentation
13393 # ----------------------------------------------------------------------
13395 $rOpts_line_up_parentheses
13397 $number_of_fields == 0
13398 || ( $number_of_fields == 1
13399 && $number_of_fields != $number_of_fields_best )
13403 my $available_spaces = get_available_spaces_to_go($i_first_comma);
13404 if ( $available_spaces > 0 ) {
13406 my $spaces_wanted = $max_width - $columns; # for 1 field
13408 if ( $number_of_fields_best == 0 ) {
13409 $number_of_fields_best =
13410 get_maximum_fields_wanted( \@item_lengths );
13413 if ( $number_of_fields_best != 1 ) {
13414 my $spaces_wanted_2 =
13415 1 + $pair_width - $columns; # for 2 fields
13416 if ( $available_spaces > $spaces_wanted_2 ) {
13417 $spaces_wanted = $spaces_wanted_2;
13421 if ( $spaces_wanted > 0 ) {
13422 my $deleted_spaces =
13423 reduce_lp_indentation( $i_first_comma, $spaces_wanted );
13426 if ( $deleted_spaces > 0 ) {
13427 $columns = table_columns_available($i_first_comma);
13428 $number_of_fields_max =
13429 maximum_number_of_fields( $columns, $odd_or_even,
13430 $max_width, $pair_width );
13431 $number_of_fields = $number_of_fields_max;
13433 if ( $number_of_fields_best == 1
13434 && $number_of_fields >= 1 )
13436 $number_of_fields = $number_of_fields_best;
13443 # try for one column if two won't work
13444 if ( $number_of_fields <= 0 ) {
13445 $number_of_fields = int( $columns / $max_width );
13448 # The user can place an upper bound on the number of fields,
13449 # which can be useful for doing maintenance on tables
13450 if ( $rOpts_maximum_fields_per_table
13451 && $number_of_fields > $rOpts_maximum_fields_per_table )
13453 $number_of_fields = $rOpts_maximum_fields_per_table;
13456 # How many columns (characters) and lines would this container take
13457 # if no additional whitespace were added?
13458 my $packed_columns = token_sequence_length( $i_opening_paren + 1,
13459 $i_effective_last_comma + 1 );
13460 if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
13461 my $packed_lines = 1 + int( $packed_columns / $columns );
13463 # are we an item contained in an outer list?
13464 my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
13466 if ( $number_of_fields <= 0 ) {
13468 # #---------------------------------------------------------------
13469 # # We're in trouble. We can't find a single field width that works.
13470 # # There is no simple answer here; we may have a single long list
13472 # #---------------------------------------------------------------
13474 # In many cases, it may be best to not force a break if there is just one
13475 # comma, because the standard continuation break logic will do a better
13478 # In the common case that all but one of the terms can fit
13479 # on a single line, it may look better not to break open the
13480 # containing parens. Consider, for example
13484 # sort { $color_value{$::a} <=> $color_value{$::b}; }
13487 # which will look like this with the container broken:
13491 # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
13494 # Here is an example of this rule for a long last term:
13496 # log_message( 0, 256, 128,
13497 # "Number of routes in adj-RIB-in to be considered: $peercount" );
13499 # And here is an example with a long first term:
13502 # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
13503 # $r, $pu, $ps, $cu, $cs, $tt
13505 # if $style eq 'all';
13507 my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
13508 my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
13509 my $long_first_term =
13510 excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
13512 # break at every comma ...
13515 # if requested by user or is best looking
13516 $number_of_fields_best == 1
13518 # or if this is a sublist of a larger list
13519 || $in_hierarchical_list
13521 # or if multiple commas and we don't have a long first or last
13523 || ( $comma_count > 1
13524 && !( $long_last_term || $long_first_term ) )
13527 foreach ( 0 .. $comma_count - 1 ) {
13528 set_forced_breakpoint( $rcomma_index->[$_] );
13531 elsif ($long_last_term) {
13533 set_forced_breakpoint($i_last_comma);
13534 ${$rdo_not_break_apart} = 1 unless $must_break_open;
13536 elsif ($long_first_term) {
13538 set_forced_breakpoint($i_first_comma);
13542 # let breaks be defined by default bond strength logic
13547 # --------------------------------------------------------
13548 # We have a tentative field count that seems to work.
13549 # How many lines will this require?
13550 # --------------------------------------------------------
13551 my $formatted_lines = $item_count / ($number_of_fields);
13552 if ( $formatted_lines != int $formatted_lines ) {
13553 $formatted_lines = 1 + int $formatted_lines;
13556 # So far we've been trying to fill out to the right margin. But
13557 # compact tables are easier to read, so let's see if we can use fewer
13558 # fields without increasing the number of lines.
13559 $number_of_fields =
13560 compactify_table( $item_count, $number_of_fields, $formatted_lines,
13563 # How many spaces across the page will we fill?
13564 my $columns_per_line =
13565 ( int $number_of_fields / 2 ) * $pair_width +
13566 ( $number_of_fields % 2 ) * $max_width;
13568 my $formatted_columns;
13570 if ( $number_of_fields > 1 ) {
13571 $formatted_columns =
13572 ( $pair_width * ( int( $item_count / 2 ) ) +
13573 ( $item_count % 2 ) * $max_width );
13576 $formatted_columns = $max_width * $item_count;
13578 if ( $formatted_columns < $packed_columns ) {
13579 $formatted_columns = $packed_columns;
13582 my $unused_columns = $formatted_columns - $packed_columns;
13584 # set some empirical parameters to help decide if we should try to
13585 # align; high sparsity does not look good, especially with few lines
13586 my $sparsity = ($unused_columns) / ($formatted_columns);
13587 my $max_allowed_sparsity =
13588 ( $item_count < 3 ) ? 0.1
13589 : ( $packed_lines == 1 ) ? 0.15
13590 : ( $packed_lines == 2 ) ? 0.4
13593 # Begin check for shortcut methods, which avoid treating a list
13594 # as a table for relatively small parenthesized lists. These
13595 # are usually easier to read if not formatted as tables.
13597 $packed_lines <= 2 # probably can fit in 2 lines
13598 && $item_count < 9 # doesn't have too many items
13599 && $opening_environment eq 'BLOCK' # not a sub-container
13600 && $opening_token eq '(' # is paren list
13604 # Shortcut method 1: for -lp and just one comma:
13605 # This is a no-brainer, just break at the comma.
13607 $rOpts_line_up_parentheses # -lp
13608 && $item_count == 2 # two items, one comma
13609 && !$must_break_open
13612 my $i_break = $rcomma_index->[0];
13613 set_forced_breakpoint($i_break);
13614 ${$rdo_not_break_apart} = 1;
13615 set_non_alignment_flags( $comma_count, $rcomma_index );
13620 # method 2 is for most small ragged lists which might look
13621 # best if not displayed as a table.
13623 ( $number_of_fields == 2 && $item_count == 3 )
13625 $new_identifier_count > 0 # isn't all quotes
13626 && $sparsity > 0.15
13627 ) # would be fairly spaced gaps if aligned
13631 my $break_count = set_ragged_breakpoints( \@i_term_comma,
13632 $ri_ragged_break_list );
13633 ++$break_count if ($use_separate_first_term);
13635 # NOTE: we should really use the true break count here,
13636 # which can be greater if there are large terms and
13637 # little space, but usually this will work well enough.
13638 unless ($must_break_open) {
13640 if ( $break_count <= 1 ) {
13641 ${$rdo_not_break_apart} = 1;
13643 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
13645 ${$rdo_not_break_apart} = 1;
13648 set_non_alignment_flags( $comma_count, $rcomma_index );
13652 } # end shortcut methods
13656 FORMATTER_DEBUG_FLAG_SPARSE && do {
13658 "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
13662 #---------------------------------------------------------------
13663 # Compound List Rule 2:
13664 # If this list is too long for one line, and it is an item of a
13665 # larger list, then we must format it, regardless of sparsity
13666 # (ian.t). One reason that we have to do this is to trigger
13667 # Compound List Rule 1, above, which causes breaks at all commas of
13668 # all outer lists. In this way, the structure will be properly
13670 #---------------------------------------------------------------
13672 # Decide if this list is too long for one line unless broken
13673 my $total_columns = table_columns_available($i_opening_paren);
13674 my $too_long = $packed_columns > $total_columns;
13676 # For a paren list, include the length of the token just before the
13677 # '(' because this is likely a sub call, and we would have to
13678 # include the sub name on the same line as the list. This is still
13679 # imprecise, but not too bad. (steve.t)
13680 if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
13682 $too_long = excess_line_length( $i_opening_minus,
13683 $i_effective_last_comma + 1 ) > 0;
13686 # FIXME: For an item after a '=>', try to include the length of the
13687 # thing before the '=>'. This is crude and should be improved by
13688 # actually looking back token by token.
13689 if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
13690 my $i_opening_minus = $i_opening_paren - 4;
13691 if ( $i_opening_minus >= 0 ) {
13692 $too_long = excess_line_length( $i_opening_minus,
13693 $i_effective_last_comma + 1 ) > 0;
13697 # Always break lists contained in '[' and '{' if too long for 1 line,
13698 # and always break lists which are too long and part of a more complex
13700 my $must_break_open_container = $must_break_open
13702 && ( $in_hierarchical_list || $opening_token ne '(' ) );
13704 #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
13706 #---------------------------------------------------------------
13707 # The main decision:
13708 # Now decide if we will align the data into aligned columns. Do not
13709 # attempt to align columns if this is a tiny table or it would be
13710 # too spaced. It seems that the more packed lines we have, the
13711 # sparser the list that can be allowed and still look ok.
13712 #---------------------------------------------------------------
13714 if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
13715 || ( $formatted_lines < 2 )
13716 || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
13720 #---------------------------------------------------------------
13721 # too sparse: would look ugly if aligned in a table;
13722 #---------------------------------------------------------------
13724 # use old breakpoints if this is a 'big' list
13725 # FIXME: goal is to improve set_ragged_breakpoints so that
13726 # this is not necessary.
13727 if ( $packed_lines > 2 && $item_count > 10 ) {
13728 write_logfile_entry("List sparse: using old breakpoints\n");
13729 copy_old_breakpoints( $i_first_comma, $i_last_comma );
13732 # let the continuation logic handle it if 2 lines
13735 my $break_count = set_ragged_breakpoints( \@i_term_comma,
13736 $ri_ragged_break_list );
13737 ++$break_count if ($use_separate_first_term);
13739 unless ($must_break_open_container) {
13740 if ( $break_count <= 1 ) {
13741 ${$rdo_not_break_apart} = 1;
13743 elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
13745 ${$rdo_not_break_apart} = 1;
13748 set_non_alignment_flags( $comma_count, $rcomma_index );
13753 #---------------------------------------------------------------
13754 # go ahead and format as a table
13755 #---------------------------------------------------------------
13756 write_logfile_entry(
13757 "List: auto formatting with $number_of_fields fields/row\n");
13759 my $j_first_break =
13760 $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
13763 my $j = $j_first_break ;
13764 $j < $comma_count ;
13765 $j += $number_of_fields
13768 my $i = $rcomma_index->[$j];
13769 set_forced_breakpoint($i);
13775 sub set_non_alignment_flags {
13777 # set flag which indicates that these commas should not be
13779 my ( $comma_count, $rcomma_index ) = @_;
13780 foreach ( 0 .. $comma_count - 1 ) {
13781 $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
13786 sub study_list_complexity {
13788 # Look for complex tables which should be formatted with one term per line.
13789 # Returns the following:
13791 # \@i_ragged_break_list = list of good breakpoints to avoid lines
13792 # which are hard to read
13793 # $number_of_fields_best = suggested number of fields based on
13794 # complexity; = 0 if any number may be used.
13796 my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
13797 my $item_count = @{$ri_term_begin};
13798 my $complex_item_count = 0;
13799 my $number_of_fields_best = $rOpts_maximum_fields_per_table;
13800 my $i_max = @{$ritem_lengths} - 1;
13801 ##my @item_complexity;
13803 my $i_last_last_break = -3;
13804 my $i_last_break = -2;
13805 my @i_ragged_break_list;
13807 my $definitely_complex = 30;
13808 my $definitely_simple = 12;
13809 my $quote_count = 0;
13811 for my $i ( 0 .. $i_max ) {
13812 my $ib = $ri_term_begin->[$i];
13813 my $ie = $ri_term_end->[$i];
13815 # define complexity: start with the actual term length
13816 my $weighted_length = ( $ritem_lengths->[$i] - 2 );
13818 ##TBD: join types here and check for variations
13819 ##my $str=join "", @tokens_to_go[$ib..$ie];
13822 if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
13826 elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
13830 if ( $ib eq $ie ) {
13831 if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
13832 $complex_item_count++;
13833 $weighted_length *= 2;
13839 if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
13840 $complex_item_count++;
13841 $weighted_length *= 2;
13843 if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
13844 $weighted_length += 4;
13848 # add weight for extra tokens.
13849 $weighted_length += 2 * ( $ie - $ib );
13851 ## my $BUB = join '', @tokens_to_go[$ib..$ie];
13852 ## print "# COMPLEXITY:$weighted_length $BUB\n";
13854 ##push @item_complexity, $weighted_length;
13856 # now mark a ragged break after this item it if it is 'long and
13858 if ( $weighted_length >= $definitely_complex ) {
13860 # if we broke after the previous term
13861 # then break before it too
13862 if ( $i_last_break == $i - 1
13864 && $i_last_last_break != $i - 2 )
13867 ## FIXME: don't strand a small term
13868 pop @i_ragged_break_list;
13869 push @i_ragged_break_list, $i - 2;
13870 push @i_ragged_break_list, $i - 1;
13873 push @i_ragged_break_list, $i;
13874 $i_last_last_break = $i_last_break;
13875 $i_last_break = $i;
13878 # don't break before a small last term -- it will
13879 # not look good on a line by itself.
13880 elsif ($i == $i_max
13881 && $i_last_break == $i - 1
13882 && $weighted_length <= $definitely_simple )
13884 pop @i_ragged_break_list;
13888 my $identifier_count = $i_max + 1 - $quote_count;
13890 # Need more tuning here..
13891 if ( $max_width > 12
13892 && $complex_item_count > $item_count / 2
13893 && $number_of_fields_best != 2 )
13895 $number_of_fields_best = 1;
13898 return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
13901 sub get_maximum_fields_wanted {
13903 # Not all tables look good with more than one field of items.
13904 # This routine looks at a table and decides if it should be
13905 # formatted with just one field or not.
13906 # This coding is still under development.
13907 my ($ritem_lengths) = @_;
13909 my $number_of_fields_best = 0;
13911 # For just a few items, we tentatively assume just 1 field.
13912 my $item_count = @{$ritem_lengths};
13913 if ( $item_count <= 5 ) {
13914 $number_of_fields_best = 1;
13917 # For larger tables, look at it both ways and see what looks best
13921 my @max_length = ( 0, 0 );
13922 my @last_length_2 = ( undef, undef );
13923 my @first_length_2 = ( undef, undef );
13924 my $last_length = undef;
13925 my $total_variation_1 = 0;
13926 my $total_variation_2 = 0;
13927 my @total_variation_2 = ( 0, 0 );
13928 foreach my $j ( 0 .. $item_count - 1 ) {
13930 $is_odd = 1 - $is_odd;
13931 my $length = $ritem_lengths->[$j];
13932 if ( $length > $max_length[$is_odd] ) {
13933 $max_length[$is_odd] = $length;
13936 if ( defined($last_length) ) {
13937 my $dl = abs( $length - $last_length );
13938 $total_variation_1 += $dl;
13940 $last_length = $length;
13942 my $ll = $last_length_2[$is_odd];
13943 if ( defined($ll) ) {
13944 my $dl = abs( $length - $ll );
13945 $total_variation_2[$is_odd] += $dl;
13948 $first_length_2[$is_odd] = $length;
13950 $last_length_2[$is_odd] = $length;
13952 $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
13954 my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
13955 unless ( $total_variation_2 < $factor * $total_variation_1 ) {
13956 $number_of_fields_best = 1;
13959 return ($number_of_fields_best);
13962 sub table_columns_available {
13963 my $i_first_comma = shift;
13965 maximum_line_length($i_first_comma) -
13966 leading_spaces_to_go($i_first_comma);
13968 # Patch: the vertical formatter does not line up lines whose lengths
13969 # exactly equal the available line length because of allowances
13970 # that must be made for side comments. Therefore, the number of
13971 # available columns is reduced by 1 character.
13976 sub maximum_number_of_fields {
13978 # how many fields will fit in the available space?
13979 my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
13980 my $max_pairs = int( $columns / $pair_width );
13981 my $number_of_fields = $max_pairs * 2;
13982 if ( $odd_or_even == 1
13983 && $max_pairs * $pair_width + $max_width <= $columns )
13985 $number_of_fields++;
13987 return $number_of_fields;
13990 sub compactify_table {
13992 # given a table with a certain number of fields and a certain number
13993 # of lines, see if reducing the number of fields will make it look
13995 my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
13996 if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
14000 $min_fields = $number_of_fields ;
14001 $min_fields >= $odd_or_even
14002 && $min_fields * $formatted_lines >= $item_count ;
14003 $min_fields -= $odd_or_even
14006 $number_of_fields = $min_fields;
14009 return $number_of_fields;
14012 sub set_ragged_breakpoints {
14014 # Set breakpoints in a list that cannot be formatted nicely as a
14016 my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
14018 my $break_count = 0;
14019 foreach ( @{$ri_ragged_break_list} ) {
14020 my $j = $ri_term_comma->[$_];
14022 set_forced_breakpoint($j);
14026 return $break_count;
14029 sub copy_old_breakpoints {
14030 my ( $i_first_comma, $i_last_comma ) = @_;
14031 for my $i ( $i_first_comma .. $i_last_comma ) {
14032 if ( $old_breakpoint_to_go[$i] ) {
14033 set_forced_breakpoint($i);
14040 my ( $i, $j ) = @_;
14041 if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
14043 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14044 my ( $a, $b, $c ) = caller();
14046 "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
14049 @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
14052 # shouldn't happen; non-critical error
14054 FORMATTER_DEBUG_FLAG_NOBREAK && do {
14055 my ( $a, $b, $c ) = caller();
14057 "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
14063 sub set_fake_breakpoint {
14065 # Just bump up the breakpoint count as a signal that there are breaks.
14066 # This is useful if we have breaks but may want to postpone deciding where
14068 $forced_breakpoint_count++;
14072 sub set_forced_breakpoint {
14075 return unless defined $i && $i >= 0;
14077 # no breaks between welded tokens
14078 return if ( weld_len_right_to_go($i) );
14080 # when called with certain tokens, use bond strengths to decide
14081 # if we break before or after it
14082 my $token = $tokens_to_go[$i];
14084 if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
14085 if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
14088 # breaks are forced before 'if' and 'unless'
14089 elsif ( $is_if_unless{$token} ) { $i-- }
14091 if ( $i >= 0 && $i <= $max_index_to_go ) {
14092 my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
14094 FORMATTER_DEBUG_FLAG_FORCE && do {
14095 my ( $a, $b, $c ) = caller();
14097 "FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
14100 if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
14101 $forced_breakpoint_to_go[$i_nonblank] = 1;
14103 if ( $i_nonblank > $index_max_forced_break ) {
14104 $index_max_forced_break = $i_nonblank;
14106 $forced_breakpoint_count++;
14107 $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
14110 # if we break at an opening container..break at the closing
14111 if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
14112 set_closing_breakpoint($i_nonblank);
14119 sub clear_breakpoint_undo_stack {
14120 $forced_breakpoint_undo_count = 0;
14124 sub undo_forced_breakpoint_stack {
14126 my $i_start = shift;
14127 if ( $i_start < 0 ) {
14129 my ( $a, $b, $c ) = caller();
14131 "Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
14135 while ( $forced_breakpoint_undo_count > $i_start ) {
14137 $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
14138 if ( $i >= 0 && $i <= $max_index_to_go ) {
14139 $forced_breakpoint_to_go[$i] = 0;
14140 $forced_breakpoint_count--;
14142 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14143 my ( $a, $b, $c ) = caller();
14145 "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
14149 # shouldn't happen, but not a critical error
14151 FORMATTER_DEBUG_FLAG_UNDOBP && do {
14152 my ( $a, $b, $c ) = caller();
14154 "Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
14161 { # begin recombine_breakpoints
14173 @is_amp_amp{@q} = (1) x scalar(@q);
14176 @is_ternary{@q} = (1) x scalar(@q);
14178 @q = qw( + - * / );
14179 @is_math_op{@q} = (1) x scalar(@q);
14182 @is_plus_minus{@q} = (1) x scalar(@q);
14185 @is_mult_div{@q} = (1) x scalar(@q);
14188 sub DUMP_BREAKPOINTS {
14190 # Debug routine to dump current breakpoints...not normally called
14191 # We are given indexes to the current lines:
14192 # $ri_beg = ref to array of BEGinning indexes of each line
14193 # $ri_end = ref to array of ENDing indexes of each line
14194 my ( $ri_beg, $ri_end, $msg ) = @_;
14195 print STDERR "----Dumping breakpoints from: $msg----\n";
14196 for my $n ( 0 .. @{$ri_end} - 1 ) {
14197 my $ibeg = $ri_beg->[$n];
14198 my $iend = $ri_end->[$n];
14200 foreach my $i ( $ibeg .. $iend ) {
14201 $text .= $tokens_to_go[$i];
14203 print STDERR "$n ($ibeg:$iend) $text\n";
14205 print STDERR "----\n";
14209 sub unmask_phantom_semicolons {
14211 my ( $self, $ri_beg, $ri_end ) = @_;
14213 # Walk down the lines of this batch and unmask any invisible line-ending
14214 # semicolons. They were placed by sub respace_tokens but we only now
14215 # know if we actually need them.
14217 my $nmax = @{$ri_end} - 1;
14218 foreach my $n ( 0 .. $nmax ) {
14220 my $i = $ri_end->[$n];
14221 if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
14223 $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
14225 my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
14226 note_added_semicolon($line_number);
14232 sub recombine_breakpoints {
14234 # sub set_continuation_breaks is very liberal in setting line breaks
14235 # for long lines, always setting breaks at good breakpoints, even
14236 # when that creates small lines. Sometimes small line fragments
14237 # are produced which would look better if they were combined.
14238 # That's the task of this routine.
14240 # We are given indexes to the current lines:
14241 # $ri_beg = ref to array of BEGinning indexes of each line
14242 # $ri_end = ref to array of ENDing indexes of each line
14243 my ( $ri_beg, $ri_end ) = @_;
14245 # Make a list of all good joining tokens between the lines
14248 my $nmax = @{$ri_end} - 1;
14249 for my $n ( 1 .. $nmax ) {
14250 my $ibeg_1 = $ri_beg->[ $n - 1 ];
14251 my $iend_1 = $ri_end->[ $n - 1 ];
14252 my $iend_2 = $ri_end->[$n];
14253 my $ibeg_2 = $ri_beg->[$n];
14255 my ( $itok, $itokp, $itokm );
14257 foreach my $itest ( $iend_1, $ibeg_2 ) {
14258 my $type = $types_to_go[$itest];
14259 if ( $is_math_op{$type}
14260 || $is_amp_amp{$type}
14261 || $is_assignment{$type}
14267 $joint[$n] = [$itok];
14270 my $more_to_do = 1;
14272 # We keep looping over all of the lines of this batch
14273 # until there are no more possible recombinations
14274 my $nmax_last = @{$ri_end};
14276 while ($more_to_do) {
14279 my $nmax = @{$ri_end} - 1;
14281 # Safety check for infinite loop
14282 unless ( $nmax < $nmax_last ) {
14284 # Shouldn't happen because splice below decreases nmax on each
14286 Fault("Program bug-infinite loop in recombine breakpoints\n");
14288 $nmax_last = $nmax;
14290 my $skip_Section_3;
14291 my $leading_amp_count = 0;
14292 my $this_line_is_semicolon_terminated;
14294 # loop over all remaining lines in this batch
14295 for my $iter ( 1 .. $nmax ) {
14297 # alternating sweep direction gives symmetric results
14298 # for recombining lines which exceed the line length
14299 # such as eval {{{{.... }}}}
14301 if ($reverse) { $n = 1 + $nmax - $iter; }
14302 else { $n = $iter }
14304 #----------------------------------------------------------
14305 # If we join the current pair of lines,
14306 # line $n-1 will become the left part of the joined line
14307 # line $n will become the right part of the joined line
14309 # Here are Indexes of the endpoint tokens of the two lines:
14311 # -----line $n-1--- | -----line $n-----
14312 # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
14315 # We want to decide if we should remove the line break
14316 # between the tokens at $iend_1 and $ibeg_2
14318 # We will apply a number of ad-hoc tests to see if joining
14319 # here will look ok. The code will just issue a 'next'
14320 # command if the join doesn't look good. If we get through
14321 # the gauntlet of tests, the lines will be recombined.
14322 #----------------------------------------------------------
14324 # beginning and ending tokens of the lines we are working on
14325 my $ibeg_1 = $ri_beg->[ $n - 1 ];
14326 my $iend_1 = $ri_end->[ $n - 1 ];
14327 my $iend_2 = $ri_end->[$n];
14328 my $ibeg_2 = $ri_beg->[$n];
14329 my $ibeg_nmax = $ri_beg->[$nmax];
14331 # combined line cannot be too long
14332 my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
14333 next if ( $excess > 0 );
14335 my $type_iend_1 = $types_to_go[$iend_1];
14336 my $type_iend_2 = $types_to_go[$iend_2];
14337 my $type_ibeg_1 = $types_to_go[$ibeg_1];
14338 my $type_ibeg_2 = $types_to_go[$ibeg_2];
14340 # terminal token of line 2 if any side comment is ignored:
14341 my $iend_2t = $iend_2;
14342 my $type_iend_2t = $type_iend_2;
14344 # some beginning indexes of other lines, which may not exist
14345 my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
14346 my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
14347 my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
14351 #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
14352 # $nesting_depth_to_go[$ibeg_1] );
14354 FORMATTER_DEBUG_FLAG_RECOMBINE && do {
14356 "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
14359 # If line $n is the last line, we set some flags and
14360 # do any special checks for it
14361 if ( $n == $nmax ) {
14363 # a terminal '{' should stay where it is
14364 # unless preceded by a fat comma
14365 next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
14367 if ( $type_iend_2 eq '#'
14368 && $iend_2 - $ibeg_2 >= 2
14369 && $types_to_go[ $iend_2 - 1 ] eq 'b' )
14371 $iend_2t = $iend_2 - 2;
14372 $type_iend_2t = $types_to_go[$iend_2t];
14375 $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
14378 #----------------------------------------------------------
14379 # Recombine Section 0:
14380 # Examine the special token joining this line pair, if any.
14381 # Put as many tests in this section to avoid duplicate code and
14382 # to make formatting independent of whether breaks are to the
14383 # left or right of an operator.
14384 #----------------------------------------------------------
14386 my ($itok) = @{ $joint[$n] };
14389 # FIXME: Patch - may not be necessary
14391 $type_iend_1 eq 'b'
14396 $type_iend_2 eq 'b'
14401 my $type = $types_to_go[$itok];
14403 if ( $type eq ':' ) {
14405 # do not join at a colon unless it disobeys the break request
14406 if ( $itok eq $iend_1 ) {
14407 next unless $want_break_before{$type};
14410 $leading_amp_count++;
14411 next if $want_break_before{$type};
14415 # handle math operators + - * /
14416 elsif ( $is_math_op{$type} ) {
14418 # Combine these lines if this line is a single
14419 # number, or if it is a short term with same
14420 # operator as the previous line. For example, in
14421 # the following code we will combine all of the
14422 # short terms $A, $B, $C, $D, $E, $F, together
14423 # instead of leaving them one per line:
14425 # $A * $B * $C * $D * $E * $F *
14426 # ( 2. * $eps * $sigma * $area ) *
14427 # ( 1. / $tcold**3 - 1. / $thot**3 );
14429 # This can be important in math-intensive code.
14433 my $itokp = min( $inext_to_go[$itok], $iend_2 );
14434 my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
14435 my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
14436 my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
14438 # check for a number on the right
14439 if ( $types_to_go[$itokp] eq 'n' ) {
14441 # ok if nothing else on right
14442 if ( $itokp == $iend_2 ) {
14447 # look one more token to right..
14448 # okay if math operator or some termination
14450 ( ( $itokpp == $iend_2 )
14451 && $is_math_op{ $types_to_go[$itokpp] } )
14452 || $types_to_go[$itokpp] =~ /^[#,;]$/;
14456 # check for a number on the left
14457 if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
14459 # okay if nothing else to left
14460 if ( $itokm == $ibeg_1 ) {
14464 # otherwise look one more token to left
14467 # okay if math operator, comma, or assignment
14468 $good_combo = ( $itokmm == $ibeg_1 )
14469 && ( $is_math_op{ $types_to_go[$itokmm] }
14470 || $types_to_go[$itokmm] =~ /^[,]$/
14471 || $is_assignment{ $types_to_go[$itokmm] }
14476 # look for a single short token either side of the
14478 if ( !$good_combo ) {
14480 # Slight adjustment factor to make results
14481 # independent of break before or after operator in
14482 # long summed lists. (An operator and a space make
14484 my $two = ( $itok eq $iend_1 ) ? 2 : 0;
14488 # numbers or id's on both sides of this joint
14489 $types_to_go[$itokp] =~ /^[in]$/
14490 && $types_to_go[$itokm] =~ /^[in]$/
14492 # one of the two lines must be short:
14495 # no more than 2 nonblank tokens right of
14500 && token_sequence_length( $itokp, $iend_2 )
14502 $rOpts_short_concatenation_item_length
14505 # no more than 2 nonblank tokens left of
14510 && token_sequence_length( $ibeg_1, $itokm )
14512 $rOpts_short_concatenation_item_length
14517 # keep pure terms; don't mix +- with */
14519 $is_plus_minus{$type}
14520 && ( $is_mult_div{ $types_to_go[$itokmm] }
14521 || $is_mult_div{ $types_to_go[$itokpp] } )
14524 $is_mult_div{$type}
14525 && ( $is_plus_minus{ $types_to_go[$itokmm] }
14526 || $is_plus_minus{ $types_to_go[$itokpp] } )
14532 # it is also good to combine if we can reduce to 2 lines
14533 if ( !$good_combo ) {
14535 # index on other line where same token would be in a
14538 ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
14543 && $types_to_go[$iother] ne $type;
14546 next unless ($good_combo);
14550 elsif ( $is_amp_amp{$type} ) {
14554 elsif ( $is_assignment{$type} ) {
14556 } ## end assignment
14559 #----------------------------------------------------------
14560 # Recombine Section 1:
14561 # Join welded nested containers immediately
14562 #----------------------------------------------------------
14563 if ( weld_len_right_to_go($iend_1)
14564 || weld_len_left_to_go($ibeg_2) )
14568 # Old coding alternated sweep direction: no longer needed
14569 # $reverse = 1 - $reverse;
14574 #----------------------------------------------------------
14575 # Recombine Section 2:
14576 # Examine token at $iend_1 (right end of first line of pair)
14577 #----------------------------------------------------------
14579 # an isolated '}' may join with a ';' terminated segment
14580 if ( $type_iend_1 eq '}' ) {
14582 # Check for cases where combining a semicolon terminated
14583 # statement with a previous isolated closing paren will
14584 # allow the combined line to be outdented. This is
14585 # generally a good move. For example, we can join up
14586 # the last two lines here:
14588 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
14589 # $size, $atime, $mtime, $ctime, $blksize, $blocks
14595 # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
14596 # $size, $atime, $mtime, $ctime, $blksize, $blocks
14599 # which makes the parens line up.
14601 # Another example, from Joe Matarazzo, probably looks best
14602 # with the 'or' clause appended to the trailing paren:
14603 # $self->some_method(
14606 # ) or die "Some_method didn't work";
14608 # But we do not want to do this for something like the -lp
14609 # option where the paren is not outdentable because the
14610 # trailing clause will be far to the right.
14612 # The logic here is synchronized with the logic in sub
14613 # sub set_adjusted_indentation, which actually does
14616 $skip_Section_3 ||= $this_line_is_semicolon_terminated
14618 # only one token on last line
14619 && $ibeg_1 == $iend_1
14621 # must be structural paren
14622 && $tokens_to_go[$iend_1] eq ')'
14624 # style must allow outdenting,
14625 && !$closing_token_indentation{')'}
14627 # only leading '&&', '||', and ':' if no others seen
14628 # (but note: our count made below could be wrong
14629 # due to intervening comments)
14630 && ( $leading_amp_count == 0
14631 || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
14633 # but leading colons probably line up with a
14634 # previous colon or question (count could be wrong).
14635 && $type_ibeg_2 ne ':'
14637 # only one step in depth allowed. this line must not
14638 # begin with a ')' itself.
14639 && ( $nesting_depth_to_go[$iend_1] ==
14640 $nesting_depth_to_go[$iend_2] + 1 );
14642 # YVES patch 2 of 2:
14643 # Allow cuddled eval chains, like this:
14650 # This patch works together with a patch in
14651 # setting adjusted indentation (where the closing eval
14652 # brace is outdented if possible).
14653 # The problem is that an 'eval' block has continuation
14654 # indentation and it looks better to undo it in some
14655 # cases. If we do not use this patch we would get:
14663 # The alternative, for uncuddled style, is to create
14664 # a patch in set_adjusted_indentation which undoes
14665 # the indentation of a leading line like 'or do {'.
14666 # This doesn't work well with -icb through
14668 $block_type_to_go[$iend_1] eq 'eval'
14669 && !$rOpts->{'line-up-parentheses'}
14670 && !$rOpts->{'indent-closing-brace'}
14671 && $tokens_to_go[$iend_2] eq '{'
14673 ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
14674 || ( $type_ibeg_2 eq 'k'
14675 && $is_and_or{ $tokens_to_go[$ibeg_2] } )
14676 || $is_if_unless{ $tokens_to_go[$ibeg_2] }
14680 $skip_Section_3 ||= 1;
14687 # handle '.' and '?' specially below
14688 || ( $type_ibeg_2 =~ /^[\.\?]$/ )
14692 elsif ( $type_iend_1 eq '{' ) {
14695 # honor breaks at opening brace
14696 # Added to prevent recombining something like this:
14697 # } || eval { package main;
14698 next if $forced_breakpoint_to_go[$iend_1];
14701 # do not recombine lines with ending &&, ||,
14702 elsif ( $is_amp_amp{$type_iend_1} ) {
14703 next unless $want_break_before{$type_iend_1};
14706 # Identify and recombine a broken ?/: chain
14707 elsif ( $type_iend_1 eq '?' ) {
14709 # Do not recombine different levels
14711 if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
14713 # do not recombine unless next line ends in :
14714 next unless $type_iend_2 eq ':';
14717 # for lines ending in a comma...
14718 elsif ( $type_iend_1 eq ',' ) {
14720 # Do not recombine at comma which is following the
14722 # TODO: might be best to make a special flag
14723 next if ( $old_breakpoint_to_go[$iend_1] );
14725 # an isolated '},' may join with an identifier + ';'
14726 # this is useful for the class of a 'bless' statement (bless.t)
14727 if ( $type_ibeg_1 eq '}'
14728 && $type_ibeg_2 eq 'i' )
14731 unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
14732 && ( $iend_2 == ( $ibeg_2 + 1 ) )
14733 && $this_line_is_semicolon_terminated );
14735 # override breakpoint
14736 $forced_breakpoint_to_go[$iend_1] = 0;
14742 # do not recombine after a comma unless this will leave
14744 next unless ( $n + 1 >= $nmax );
14746 # do not recombine if there is a change in indentation depth
14749 $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
14751 # do not recombine a "complex expression" after a
14752 # comma. "complex" means no parens.
14754 foreach my $ii ( $ibeg_2 .. $iend_2 ) {
14755 if ( $tokens_to_go[$ii] eq '(' ) {
14760 next if $saw_paren;
14765 elsif ( $type_iend_1 eq '(' ) {
14767 # No longer doing this
14770 elsif ( $type_iend_1 eq ')' ) {
14772 # No longer doing this
14775 # keep a terminal for-semicolon
14776 elsif ( $type_iend_1 eq 'f' ) {
14780 # if '=' at end of line ...
14781 elsif ( $is_assignment{$type_iend_1} ) {
14783 # keep break after = if it was in input stream
14784 # this helps prevent 'blinkers'
14785 next if $old_breakpoint_to_go[$iend_1]
14787 # don't strand an isolated '='
14788 && $iend_1 != $ibeg_1;
14790 my $is_short_quote =
14791 ( $type_ibeg_2 eq 'Q'
14792 && $ibeg_2 == $iend_2
14793 && token_sequence_length( $ibeg_2, $ibeg_2 ) <
14794 $rOpts_short_concatenation_item_length );
14796 ( $type_ibeg_1 eq '?'
14797 && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
14799 # always join an isolated '=', a short quote, or if this
14800 # will put ?/: at start of adjacent lines
14801 if ( $ibeg_1 != $iend_1
14802 && !$is_short_quote
14809 # unless we can reduce this to two lines
14812 # or three lines, the last with a leading semicolon
14813 || ( $nmax == $n + 2
14814 && $types_to_go[$ibeg_nmax] eq ';' )
14816 # or the next line ends with a here doc
14817 || $type_iend_2 eq 'h'
14819 # or the next line ends in an open paren or brace
14820 # and the break hasn't been forced [dima.t]
14821 || ( !$forced_breakpoint_to_go[$iend_1]
14822 && $type_iend_2 eq '{' )
14825 # do not recombine if the two lines might align well
14826 # this is a very approximate test for this
14829 # RT#127633 - the leading tokens are not operators
14830 ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
14832 # or they are different
14834 && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
14840 # Recombine if we can make two lines
14843 # -lp users often prefer this:
14844 # my $title = function($env, $env, $sysarea,
14845 # "bubba Borrower Entry");
14846 # so we will recombine if -lp is used we have
14848 && ( !$rOpts_line_up_parentheses
14849 || $type_iend_2 ne ',' )
14853 # otherwise, scan the rhs line up to last token for
14854 # complexity. Note that we are not counting the last
14855 # token in case it is an opening paren.
14857 my $depth = $nesting_depth_to_go[$ibeg_2];
14858 foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
14859 if ( $nesting_depth_to_go[$i] != $depth ) {
14861 last if ( $tv > 1 );
14863 $depth = $nesting_depth_to_go[$i];
14866 # ok to recombine if no level changes before last token
14869 # otherwise, do not recombine if more than two
14871 next if ( $tv > 1 );
14873 # check total complexity of the two adjacent lines
14874 # that will occur if we do this join
14877 ? $ri_end->[ $n + 1 ]
14879 foreach my $i ( $iend_2 .. $istop ) {
14880 if ( $nesting_depth_to_go[$i] != $depth ) {
14882 last if ( $tv > 2 );
14884 $depth = $nesting_depth_to_go[$i];
14887 # do not recombine if total is more than 2 level changes
14888 next if ( $tv > 2 );
14893 unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
14894 $forced_breakpoint_to_go[$iend_1] = 0;
14899 elsif ( $type_iend_1 eq 'k' ) {
14901 # make major control keywords stand out
14906 #/^(last|next|redo|return)$/
14907 $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
14909 # but only if followed by multiple lines
14913 if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
14915 unless $want_break_before{ $tokens_to_go[$iend_1] };
14919 #----------------------------------------------------------
14920 # Recombine Section 3:
14921 # Examine token at $ibeg_2 (left end of second line of pair)
14922 #----------------------------------------------------------
14924 # join lines identified above as capable of
14925 # causing an outdented line with leading closing paren
14926 # Note that we are skipping the rest of this section
14927 # and the rest of the loop to do the join
14928 if ($skip_Section_3) {
14929 $forced_breakpoint_to_go[$iend_1] = 0;
14934 # handle lines with leading &&, ||
14935 elsif ( $is_amp_amp{$type_ibeg_2} ) {
14937 $leading_amp_count++;
14939 # ok to recombine if it follows a ? or :
14940 # and is followed by an open paren..
14942 ( $is_ternary{$type_ibeg_1}
14943 && $tokens_to_go[$iend_2] eq '(' )
14945 # or is followed by a ? or : at same depth
14947 # We are looking for something like this. We can
14948 # recombine the && line with the line above to make the
14949 # structure more clear:
14951 # exists $G->{Attr}->{V}
14952 # && exists $G->{Attr}->{V}->{$u}
14953 # ? %{ $G->{Attr}->{V}->{$u} }
14956 # We should probably leave something like this alone:
14958 # exists $G->{Attr}->{E}
14959 # && exists $G->{Attr}->{E}->{$u}
14960 # && exists $G->{Attr}->{E}->{$u}->{$v}
14961 # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
14963 # so that we either have all of the &&'s (or ||'s)
14964 # on one line, as in the first example, or break at
14965 # each one as in the second example. However, it
14966 # sometimes makes things worse to check for this because
14967 # it prevents multiple recombinations. So this is not done.
14969 && $is_ternary{ $types_to_go[$ibeg_3] }
14970 && $nesting_depth_to_go[$ibeg_3] ==
14971 $nesting_depth_to_go[$ibeg_2] );
14973 next if !$ok && $want_break_before{$type_ibeg_2};
14974 $forced_breakpoint_to_go[$iend_1] = 0;
14976 # tweak the bond strength to give this joint priority
14981 # Identify and recombine a broken ?/: chain
14982 elsif ( $type_ibeg_2 eq '?' ) {
14984 # Do not recombine different levels
14985 my $lev = $levels_to_go[$ibeg_2];
14986 next if ( $lev ne $levels_to_go[$ibeg_1] );
14988 # Do not recombine a '?' if either next line or
14989 # previous line does not start with a ':'. The reasons
14990 # are that (1) no alignment of the ? will be possible
14991 # and (2) the expression is somewhat complex, so the
14992 # '?' is harder to see in the interior of the line.
14993 my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
14994 my $precedes_colon =
14995 $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
14996 next unless ( $follows_colon || $precedes_colon );
14998 # we will always combining a ? line following a : line
14999 if ( !$follows_colon ) {
15001 # ...otherwise recombine only if it looks like a chain.
15002 # we will just look at a few nearby lines to see if
15003 # this looks like a chain.
15004 my $local_count = 0;
15005 foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
15008 && $types_to_go[$ii] eq ':'
15009 && $levels_to_go[$ii] == $lev;
15011 next unless ( $local_count > 1 );
15013 $forced_breakpoint_to_go[$iend_1] = 0;
15016 # do not recombine lines with leading '.'
15017 elsif ( $type_ibeg_2 eq '.' ) {
15018 my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
15022 # ... unless there is just one and we can reduce
15023 # this to two lines if we do. For example, this
15027 # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
15029 # looks better than this:
15030 # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
15031 # . '$args .= $pat;'
15036 && $type_ibeg_1 ne $type_ibeg_2
15039 # ... or this would strand a short quote , like this
15040 # . "some long quote"
15043 || ( $types_to_go[$i_next_nonblank] eq 'Q'
15044 && $i_next_nonblank >= $iend_2 - 1
15045 && $token_lengths_to_go[$i_next_nonblank] <
15046 $rOpts_short_concatenation_item_length )
15050 # handle leading keyword..
15051 elsif ( $type_ibeg_2 eq 'k' ) {
15053 # handle leading "or"
15054 if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
15057 $this_line_is_semicolon_terminated
15060 # following 'if' or 'unless' or 'or'
15061 $type_ibeg_1 eq 'k'
15062 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
15064 # important: only combine a very simple or
15065 # statement because the step below may have
15066 # combined a trailing 'and' with this or,
15067 # and we do not want to then combine
15068 # everything together
15069 && ( $iend_2 - $ibeg_2 <= 7 )
15074 $forced_breakpoint_to_go[$iend_1] = 0
15075 unless $old_breakpoint_to_go[$iend_1];
15078 # handle leading 'and'
15079 elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
15081 # Decide if we will combine a single terminal 'and'
15082 # after an 'if' or 'unless'.
15084 # This looks best with the 'and' on the same
15085 # line as the 'if':
15088 # if $seconds and $nu < 2;
15090 # But this looks better as shown:
15093 # if !$this->{Parents}{$_}
15094 # or $this->{Parents}{$_} eq $_;
15098 $this_line_is_semicolon_terminated
15101 # following 'if' or 'unless' or 'or'
15102 $type_ibeg_1 eq 'k'
15103 && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
15104 || $tokens_to_go[$ibeg_1] eq 'or' )
15109 # handle leading "if" and "unless"
15110 elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
15112 # FIXME: This is still experimental..may not be too useful
15115 $this_line_is_semicolon_terminated
15117 # previous line begins with 'and' or 'or'
15118 && $type_ibeg_1 eq 'k'
15119 && $is_and_or{ $tokens_to_go[$ibeg_1] }
15124 # handle all other leading keywords
15127 # keywords look best at start of lines,
15128 # but combine things like "1 while"
15129 unless ( $is_assignment{$type_iend_1} ) {
15131 if ( ( $type_iend_1 ne 'k' )
15132 && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
15137 # similar treatment of && and || as above for 'and' and 'or':
15138 # NOTE: This block of code is currently bypassed because
15139 # of a previous block but is retained for possible future use.
15140 elsif ( $is_amp_amp{$type_ibeg_2} ) {
15142 # maybe looking at something like:
15143 # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
15147 $this_line_is_semicolon_terminated
15149 # previous line begins with an 'if' or 'unless' keyword
15150 && $type_ibeg_1 eq 'k'
15151 && $is_if_unless{ $tokens_to_go[$ibeg_1] }
15156 # handle line with leading = or similar
15157 elsif ( $is_assignment{$type_ibeg_2} ) {
15158 next unless ( $n == 1 || $n == $nmax );
15159 next if $old_breakpoint_to_go[$iend_1];
15163 # unless we can reduce this to two lines
15166 # or three lines, the last with a leading semicolon
15167 || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
15169 # or the next line ends with a here doc
15170 || $type_iend_2 eq 'h'
15172 # or this is a short line ending in ;
15173 || ( $n == $nmax && $this_line_is_semicolon_terminated )
15175 $forced_breakpoint_to_go[$iend_1] = 0;
15178 #----------------------------------------------------------
15179 # Recombine Section 4:
15180 # Combine the lines if we arrive here and it is possible
15181 #----------------------------------------------------------
15183 # honor hard breakpoints
15184 next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
15186 my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
15188 # Require a few extra spaces before recombining lines if we are
15189 # at an old breakpoint unless this is a simple list or terminal
15190 # line. The goal is to avoid oscillating between two
15191 # quasi-stable end states. For example this snippet caused
15195 ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
15199 if ( $old_breakpoint_to_go[$iend_1]
15200 && !$this_line_is_semicolon_terminated
15203 && $type_iend_2 ne ',' );
15205 # do not recombine if we would skip in indentation levels
15206 if ( $n < $nmax ) {
15207 my $if_next = $ri_beg->[ $n + 1 ];
15210 $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
15211 && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
15213 # but an isolated 'if (' is undesirable
15216 && $iend_1 - $ibeg_1 <= 2
15217 && $type_ibeg_1 eq 'k'
15218 && $tokens_to_go[$ibeg_1] eq 'if'
15219 && $tokens_to_go[$iend_1] ne '('
15225 next if ( $bs >= NO_BREAK - 1 );
15227 # remember the pair with the greatest bond strength
15234 if ( $bs > $bs_best ) {
15241 # recombine the pair with the greatest bond strength
15243 splice @{$ri_beg}, $n_best, 1;
15244 splice @{$ri_end}, $n_best - 1, 1;
15245 splice @joint, $n_best, 1;
15247 # keep going if we are still making progress
15251 return ( $ri_beg, $ri_end );
15253 } # end recombine_breakpoints
15255 sub break_all_chain_tokens {
15257 # scan the current breakpoints looking for breaks at certain "chain
15258 # operators" (. : && || + etc) which often occur repeatedly in a long
15259 # statement. If we see a break at any one, break at all similar tokens
15260 # within the same container.
15262 my ( $ri_left, $ri_right ) = @_;
15264 my %saw_chain_type;
15265 my %left_chain_type;
15266 my %right_chain_type;
15267 my %interior_chain_type;
15268 my $nmax = @{$ri_right} - 1;
15270 # scan the left and right end tokens of all lines
15272 for my $n ( 0 .. $nmax ) {
15273 my $il = $ri_left->[$n];
15274 my $ir = $ri_right->[$n];
15275 my $typel = $types_to_go[$il];
15276 my $typer = $types_to_go[$ir];
15277 $typel = '+' if ( $typel eq '-' ); # treat + and - the same
15278 $typer = '+' if ( $typer eq '-' );
15279 $typel = '*' if ( $typel eq '/' ); # treat * and / the same
15280 $typer = '*' if ( $typer eq '/' );
15281 my $tokenl = $tokens_to_go[$il];
15282 my $tokenr = $tokens_to_go[$ir];
15284 if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
15285 next if ( $typel eq '?' );
15286 push @{ $left_chain_type{$typel} }, $il;
15287 $saw_chain_type{$typel} = 1;
15290 if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
15291 next if ( $typer eq '?' );
15292 push @{ $right_chain_type{$typer} }, $ir;
15293 $saw_chain_type{$typer} = 1;
15297 return unless $count;
15299 # now look for any interior tokens of the same types
15301 for my $n ( 0 .. $nmax ) {
15302 my $il = $ri_left->[$n];
15303 my $ir = $ri_right->[$n];
15304 foreach my $i ( $il + 1 .. $ir - 1 ) {
15305 my $type = $types_to_go[$i];
15306 $type = '+' if ( $type eq '-' );
15307 $type = '*' if ( $type eq '/' );
15308 if ( $saw_chain_type{$type} ) {
15309 push @{ $interior_chain_type{$type} }, $i;
15314 return unless $count;
15316 # now make a list of all new break points
15319 # loop over all chain types
15320 foreach my $type ( keys %saw_chain_type ) {
15322 # quit if just ONE continuation line with leading . For example--
15323 # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
15325 last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
15327 # loop over all interior chain tokens
15328 foreach my $itest ( @{ $interior_chain_type{$type} } ) {
15330 # loop over all left end tokens of same type
15331 if ( $left_chain_type{$type} ) {
15332 next if $nobreak_to_go[ $itest - 1 ];
15333 foreach my $i ( @{ $left_chain_type{$type} } ) {
15334 next unless in_same_container( $i, $itest );
15335 push @insert_list, $itest - 1;
15337 # Break at matching ? if this : is at a different level.
15338 # For example, the ? before $THRf_DEAD in the following
15339 # should get a break if its : gets a break.
15342 # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
15343 # : ( $_ & 4 ) ? $THRf_R_DETACHED
15344 # : $THRf_R_JOINABLE;
15346 && $levels_to_go[$i] != $levels_to_go[$itest] )
15348 my $i_question = $mate_index_to_go[$itest];
15349 if ( $i_question > 0 ) {
15350 push @insert_list, $i_question - 1;
15357 # loop over all right end tokens of same type
15358 if ( $right_chain_type{$type} ) {
15359 next if $nobreak_to_go[$itest];
15360 foreach my $i ( @{ $right_chain_type{$type} } ) {
15361 next unless in_same_container( $i, $itest );
15362 push @insert_list, $itest;
15364 # break at matching ? if this : is at a different level
15366 && $levels_to_go[$i] != $levels_to_go[$itest] )
15368 my $i_question = $mate_index_to_go[$itest];
15369 if ( $i_question >= 0 ) {
15370 push @insert_list, $i_question;
15379 # insert any new break points
15380 if (@insert_list) {
15381 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15388 # Look for assignment operators that could use a breakpoint.
15389 # For example, in the following snippet
15391 # $HOME = $ENV{HOME}
15394 # || die "no home directory for user $<";
15396 # we could break at the = to get this, which is a little nicer:
15401 # || die "no home directory for user $<";
15403 # The logic here follows the logic in set_logical_padding, which
15404 # will add the padding in the second line to improve alignment.
15406 my ( $ri_left, $ri_right ) = @_;
15407 my $nmax = @{$ri_right} - 1;
15408 return unless ( $nmax >= 2 );
15410 # scan the left ends of first two lines
15413 for my $n ( 1 .. 2 ) {
15414 my $il = $ri_left->[$n];
15415 my $typel = $types_to_go[$il];
15416 my $tokenl = $tokens_to_go[$il];
15418 my $has_leading_op = ( $tokenl =~ /^\w/ )
15419 ? $is_chain_operator{$tokenl} # + - * / : ? && ||
15420 : $is_chain_operator{$typel}; # and, or
15421 return unless ($has_leading_op);
15424 unless ( $tokenl eq $tokbeg
15425 && $nesting_depth_to_go[$il] eq $depth_beg );
15428 $depth_beg = $nesting_depth_to_go[$il];
15431 # now look for any interior tokens of the same types
15432 my $il = $ri_left->[0];
15433 my $ir = $ri_right->[0];
15435 # now make a list of all new break points
15437 for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
15438 my $type = $types_to_go[$i];
15439 if ( $is_assignment{$type}
15440 && $nesting_depth_to_go[$i] eq $depth_beg )
15442 if ( $want_break_before{$type} ) {
15443 push @insert_list, $i - 1;
15446 push @insert_list, $i;
15451 # Break after a 'return' followed by a chain of operators
15452 # return ( $^O !~ /win32|dos/i )
15453 # && ( $^O ne 'VMS' )
15454 # && ( $^O ne 'OS2' )
15455 # && ( $^O ne 'MacOS' );
15458 # ( $^O !~ /win32|dos/i )
15459 # && ( $^O ne 'VMS' )
15460 # && ( $^O ne 'OS2' )
15461 # && ( $^O ne 'MacOS' );
15463 if ( $types_to_go[$i] eq 'k'
15464 && $tokens_to_go[$i] eq 'return'
15466 && $nesting_depth_to_go[$i] eq $depth_beg )
15468 push @insert_list, $i;
15471 return unless (@insert_list);
15473 # One final check...
15474 # scan second and third lines and be sure there are no assignments
15475 # we want to avoid breaking at an = to make something like this:
15477 # $html_icons{"$type-$state"}
15478 # or $icon = $html_icons{$type}
15479 # or $icon = $html_icons{$state} )
15480 for my $n ( 1 .. 2 ) {
15481 my $il = $ri_left->[$n];
15482 my $ir = $ri_right->[$n];
15483 foreach my $i ( $il + 1 .. $ir ) {
15484 my $type = $types_to_go[$i];
15486 if ( $is_assignment{$type}
15487 && $nesting_depth_to_go[$i] eq $depth_beg );
15491 # ok, insert any new break point
15492 if (@insert_list) {
15493 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15498 sub insert_final_breaks {
15500 my ( $ri_left, $ri_right ) = @_;
15502 my $nmax = @{$ri_right} - 1;
15504 # scan the left and right end tokens of all lines
15506 my $i_first_colon = -1;
15507 for my $n ( 0 .. $nmax ) {
15508 my $il = $ri_left->[$n];
15509 my $ir = $ri_right->[$n];
15510 my $typel = $types_to_go[$il];
15511 my $typer = $types_to_go[$ir];
15512 return if ( $typel eq '?' );
15513 return if ( $typer eq '?' );
15514 if ( $typel eq ':' ) { $i_first_colon = $il; last; }
15515 elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
15518 # For long ternary chains,
15519 # if the first : we see has its # ? is in the interior
15520 # of a preceding line, then see if there are any good
15521 # breakpoints before the ?.
15522 if ( $i_first_colon > 0 ) {
15523 my $i_question = $mate_index_to_go[$i_first_colon];
15524 if ( $i_question > 0 ) {
15526 for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
15527 my $token = $tokens_to_go[$ii];
15528 my $type = $types_to_go[$ii];
15530 # For now, a good break is either a comma or,
15531 # in a long chain, a 'return'.
15532 # Patch for RT #126633: added the $nmax>1 check to avoid
15533 # breaking after a return for a simple ternary. For longer
15534 # chains the break after return allows vertical alignment, so
15535 # it is still done. So perltidy -wba='?' will not break
15536 # immediately after the return in the following statement:
15538 # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
15539 # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
15544 || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
15546 && in_same_container( $ii, $i_question )
15549 push @insert_list, $ii;
15553 ## # For now, a good break is either a comma or a 'return'.
15554 ## if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
15555 ## && in_same_container( $ii, $i_question ) )
15557 ## push @insert_list, $ii;
15562 # insert any new break points
15563 if (@insert_list) {
15564 insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
15571 sub in_same_container {
15573 # check to see if tokens at i1 and i2 are in the
15574 # same container, and not separated by a comma, ? or :
15575 # FIXME: this can be written more efficiently now
15576 my ( $i1, $i2 ) = @_;
15577 my $type = $types_to_go[$i1];
15578 my $depth = $nesting_depth_to_go[$i1];
15579 return unless ( $nesting_depth_to_go[$i2] == $depth );
15580 if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
15582 ###########################################################
15583 # This is potentially a very slow routine and not critical.
15584 # For safety just give up for large differences.
15585 # See test file 'infinite_loop.txt'
15586 # TODO: replace this loop with a data structure
15587 ###########################################################
15588 return if ( $i2 - $i1 > 200 );
15590 foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
15591 next if ( $nesting_depth_to_go[$i] > $depth );
15592 return if ( $nesting_depth_to_go[$i] < $depth );
15594 my $tok = $tokens_to_go[$i];
15595 $tok = ',' if $tok eq '=>'; # treat => same as ,
15597 # Example: we would not want to break at any of these .'s
15598 # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
15599 if ( $type ne ':' ) {
15600 return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
15603 return if ( $tok =~ /^[\,]$/ );
15609 sub set_continuation_breaks {
15611 # Define an array of indexes for inserting newline characters to
15612 # keep the line lengths below the maximum desired length. There is
15613 # an implied break after the last token, so it need not be included.
15616 # This routine is part of series of routines which adjust line
15617 # lengths. It is only called if a statement is longer than the
15618 # maximum line length, or if a preliminary scanning located
15619 # desirable break points. Sub scan_list has already looked at
15620 # these tokens and set breakpoints (in array
15621 # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
15622 # after commas, after opening parens, and before closing parens).
15623 # This routine will honor these breakpoints and also add additional
15624 # breakpoints as necessary to keep the line length below the maximum
15625 # requested. It bases its decision on where the 'bond strength' is
15628 # Output: returns references to the arrays:
15631 # which contain the indexes $i of the first and last tokens on each
15634 # In addition, the array:
15635 # $forced_breakpoint_to_go[$i]
15636 # may be updated to be =1 for any index $i after which there must be
15637 # a break. This signals later routines not to undo the breakpoint.
15639 my $saw_good_break = shift;
15640 my @i_first = (); # the first index to output
15641 my @i_last = (); # the last index to output
15642 my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
15643 if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
15645 set_bond_strengths();
15648 my $imax = $max_index_to_go;
15649 if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
15650 if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
15651 my $i_begin = $imin; # index for starting next iteration
15653 my $leading_spaces = leading_spaces_to_go($imin);
15654 my $line_count = 0;
15655 my $last_break_strength = NO_BREAK;
15656 my $i_last_break = -1;
15657 my $max_bias = 0.001;
15658 my $tiny_bias = 0.0001;
15659 my $leading_alignment_token = "";
15660 my $leading_alignment_type = "";
15662 # see if any ?/:'s are in order
15663 my $colons_in_order = 1;
15665 my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
15666 my $colon_count = @colon_list;
15667 foreach (@colon_list) {
15668 if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
15672 # This is a sufficient but not necessary condition for colon chain
15673 my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
15675 #-------------------------------------------------------
15676 # BEGINNING of main loop to set continuation breakpoints
15677 # Keep iterating until we reach the end
15678 #-------------------------------------------------------
15679 while ( $i_begin <= $imax ) {
15680 my $lowest_strength = NO_BREAK;
15681 my $starting_sum = $summed_lengths_to_go[$i_begin];
15684 my $lowest_next_token = '';
15685 my $lowest_next_type = 'b';
15686 my $i_lowest_next_nonblank = -1;
15688 #-------------------------------------------------------
15689 # BEGINNING of inner loop to find the best next breakpoint
15690 #-------------------------------------------------------
15691 for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
15692 my $type = $types_to_go[$i_test];
15693 my $token = $tokens_to_go[$i_test];
15694 my $next_type = $types_to_go[ $i_test + 1 ];
15695 my $next_token = $tokens_to_go[ $i_test + 1 ];
15696 my $i_next_nonblank = $inext_to_go[$i_test];
15697 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
15698 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
15699 my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
15700 my $strength = $bond_strength_to_go[$i_test];
15701 my $maximum_line_length = maximum_line_length($i_begin);
15703 # use old breaks as a tie-breaker. For example to
15704 # prevent blinkers with -pbp in this code:
15707 ## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
15710 # At the same time try to prevent a leading * in this code
15711 # with the default formatting:
15714 ## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
15715 ## * ( $x**( $a - 1 ) )
15716 ## * ( ( 1 - $x )**( $b - 1 ) );
15718 # reduce strength a bit to break ties at an old breakpoint ...
15720 $old_breakpoint_to_go[$i_test]
15722 # which is a 'good' breakpoint, meaning ...
15723 # we don't want to break before it
15724 && !$want_break_before{$type}
15726 # and either we want to break before the next token
15727 # or the next token is not short (i.e. not a '*', '/' etc.)
15728 && $i_next_nonblank <= $imax
15729 && ( $want_break_before{$next_nonblank_type}
15730 || $token_lengths_to_go[$i_next_nonblank] > 2
15731 || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
15734 $strength -= $tiny_bias;
15737 # otherwise increase strength a bit if this token would be at the
15738 # maximum line length. This is necessary to avoid blinking
15739 # in the above example when the -iob flag is added.
15743 $summed_lengths_to_go[ $i_test + 1 ] -
15745 if ( $len >= $maximum_line_length ) {
15746 $strength += $tiny_bias;
15750 my $must_break = 0;
15752 # Force an immediate break at certain operators
15753 # with lower level than the start of the line,
15754 # unless we've already seen a better break.
15756 ##############################################
15757 # Note on an issue with a preceding ?
15758 ##############################################
15759 # We don't include a ? in the above list, but there may
15760 # be a break at a previous ? if the line is long.
15761 # Because of this we do not want to force a break if
15762 # there is a previous ? on this line. For now the best way
15763 # to do this is to not break if we have seen a lower strength
15764 # point, which is probably a ?.
15766 # Example of unwanted breaks we are avoiding at a '.' following a ?
15767 # from pod2html using perltidy -gnu:
15769 # ? "\n<A NAME=\""
15771 # . "\">\n$text</A>\n"
15772 # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
15775 $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
15776 || ( $next_nonblank_type eq 'k'
15777 && $next_nonblank_token =~ /^(and|or)$/ )
15779 && ( $nesting_depth_to_go[$i_begin] >
15780 $nesting_depth_to_go[$i_next_nonblank] )
15781 && ( $strength <= $lowest_strength )
15784 set_forced_breakpoint($i_next_nonblank);
15789 # Try to put a break where requested by scan_list
15790 $forced_breakpoint_to_go[$i_test]
15792 # break between ) { in a continued line so that the '{' can
15794 # See similar logic in scan_list which catches instances
15795 # where a line is just something like ') {'. We have to
15796 # be careful because the corresponding block keyword might
15797 # not be on the first line, such as 'for' here:
15801 # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
15807 && ( $token eq ')' )
15808 && ( $next_nonblank_type eq '{' )
15809 && ($next_nonblank_block_type)
15810 && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
15812 # RT #104427: Dont break before opening sub brace because
15813 # sub block breaks handled at higher level, unless
15814 # it looks like the preceeding list is long and broken
15816 $next_nonblank_block_type =~ /^sub\b/
15817 && ( $nesting_depth_to_go[$i_begin] ==
15818 $nesting_depth_to_go[$i_next_nonblank] )
15821 && !$rOpts->{'opening-brace-always-on-right'}
15824 # There is an implied forced break at a terminal opening brace
15825 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
15829 # Forced breakpoints must sometimes be overridden, for example
15830 # because of a side comment causing a NO_BREAK. It is easier
15831 # to catch this here than when they are set.
15832 if ( $strength < NO_BREAK - 1 ) {
15833 $strength = $lowest_strength - $tiny_bias;
15838 # quit if a break here would put a good terminal token on
15839 # the next line and we already have a possible break
15842 && ( $next_nonblank_type =~ /^[\;\,]$/ )
15846 $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
15848 ) > $maximum_line_length
15852 last if ( $i_lowest >= 0 );
15855 # Avoid a break which would strand a single punctuation
15856 # token. For example, we do not want to strand a leading
15857 # '.' which is followed by a long quoted string.
15858 # But note that we do want to do this with -extrude (l=1)
15859 # so please test any changes to this code on -extrude.
15862 && ( $i_test == $i_begin )
15863 && ( $i_test < $imax )
15864 && ( $token eq $type )
15868 $summed_lengths_to_go[ $i_test + 1 ] -
15870 ) < $maximum_line_length
15874 $i_test = min( $imax, $inext_to_go[$i_test] );
15878 if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
15881 # break at previous best break if it would have produced
15882 # a leading alignment of certain common tokens, and it
15883 # is different from the latest candidate break
15885 if ($leading_alignment_type);
15887 # Force at least one breakpoint if old code had good
15888 # break It is only called if a breakpoint is required or
15889 # desired. This will probably need some adjustments
15890 # over time. A goal is to try to be sure that, if a new
15891 # side comment is introduced into formatted text, then
15892 # the same breakpoints will occur. scbreak.t
15895 $i_test == $imax # we are at the end
15896 && !$forced_breakpoint_count #
15897 && $saw_good_break # old line had good break
15898 && $type =~ /^[#;\{]$/ # and this line ends in
15899 # ';' or side comment
15900 && $i_last_break < 0 # and we haven't made a break
15901 && $i_lowest >= 0 # and we saw a possible break
15902 && $i_lowest < $imax - 1 # (but not just before this ;)
15903 && $strength - $lowest_strength < 0.5 * WEAK # and it's good
15906 # Do not skip past an important break point in a short final
15907 # segment. For example, without this check we would miss the
15908 # break at the final / in the following code:
15911 # ( $tau * $mass_pellet * $q_0 *
15912 # ( 1. - exp( -$t_stop / $tau ) ) -
15913 # 4. * $pi * $factor * $k_ice *
15914 # ( $t_melt - $t_ice ) *
15917 # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
15919 if ( $line_count > 2
15920 && $i_lowest < $i_test
15921 && $i_test > $imax - 2
15922 && $nesting_depth_to_go[$i_begin] >
15923 $nesting_depth_to_go[$i_lowest]
15924 && $lowest_strength < $last_break_strength - .5 * WEAK )
15926 # Make this break for math operators for now
15927 my $ir = $inext_to_go[$i_lowest];
15928 my $il = $iprev_to_go[$ir];
15930 if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
15931 || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
15934 # Update the minimum bond strength location
15935 $lowest_strength = $strength;
15936 $i_lowest = $i_test;
15937 $lowest_next_token = $next_nonblank_token;
15938 $lowest_next_type = $next_nonblank_type;
15939 $i_lowest_next_nonblank = $i_next_nonblank;
15940 last if $must_break;
15942 # set flags to remember if a break here will produce a
15943 # leading alignment of certain common tokens
15944 if ( $line_count > 0
15946 && ( $lowest_strength - $last_break_strength <= $max_bias )
15949 my $i_last_end = $iprev_to_go[$i_begin];
15950 my $tok_beg = $tokens_to_go[$i_begin];
15951 my $type_beg = $types_to_go[$i_begin];
15954 # check for leading alignment of certain tokens
15956 $tok_beg eq $next_nonblank_token
15957 && $is_chain_operator{$tok_beg}
15958 && ( $type_beg eq 'k'
15959 || $type_beg eq $tok_beg )
15960 && $nesting_depth_to_go[$i_begin] >=
15961 $nesting_depth_to_go[$i_next_nonblank]
15964 || ( $tokens_to_go[$i_last_end] eq $token
15965 && $is_chain_operator{$token}
15966 && ( $type eq 'k' || $type eq $token )
15967 && $nesting_depth_to_go[$i_last_end] >=
15968 $nesting_depth_to_go[$i_test] )
15971 $leading_alignment_token = $next_nonblank_token;
15972 $leading_alignment_type = $next_nonblank_type;
15977 my $too_long = ( $i_test >= $imax );
15978 if ( !$too_long ) {
15981 $summed_lengths_to_go[ $i_test + 2 ] -
15983 $too_long = $next_length > $maximum_line_length;
15985 # To prevent blinkers we will avoid leaving a token exactly at
15986 # the line length limit unless it is the last token or one of
15987 # several "good" types.
15989 # The following code was a blinker with -pbp before this
15991 ## $last_nonblank_token eq '('
15992 ## && $is_indirect_object_taker{ $paren_type
15993 ## [$paren_depth] }
15994 # The issue causing the problem is that if the
15995 # term [$paren_depth] gets broken across a line then
15996 # the whitespace routine doesn't see both opening and closing
15997 # brackets and will format like '[ $paren_depth ]'. This
15998 # leads to an oscillation in length depending if we break
15999 # before the closing bracket or not.
16001 && $i_test + 1 < $imax
16002 && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
16004 $too_long = $next_length >= $maximum_line_length;
16008 FORMATTER_DEBUG_FLAG_BREAK
16011 my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
16012 my $i_testp2 = $i_test + 2;
16013 if ( $i_testp2 > $max_index_to_go + 1 ) {
16014 $i_testp2 = $max_index_to_go + 1;
16016 if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
16017 if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
16019 "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
16022 # allow one extra terminal token after exceeding line length
16023 # if it would strand this token.
16024 if ( $rOpts_fuzzy_line_length
16026 && $i_lowest == $i_test
16027 && $token_lengths_to_go[$i_test] > 1
16028 && $next_nonblank_type =~ /^[\;\,]$/ )
16035 ( $i_test == $imax ) # we're done if no more tokens,
16037 ( $i_lowest >= 0 ) # or no more space and we have a break
16043 #-------------------------------------------------------
16044 # END of inner loop to find the best next breakpoint
16045 # Now decide exactly where to put the breakpoint
16046 #-------------------------------------------------------
16048 # it's always ok to break at imax if no other break was found
16049 if ( $i_lowest < 0 ) { $i_lowest = $imax }
16051 # semi-final index calculation
16052 my $i_next_nonblank = $inext_to_go[$i_lowest];
16053 my $next_nonblank_type = $types_to_go[$i_next_nonblank];
16054 my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16056 #-------------------------------------------------------
16057 # ?/: rule 1 : if a break here will separate a '?' on this
16058 # line from its closing ':', then break at the '?' instead.
16059 #-------------------------------------------------------
16060 foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
16061 next unless ( $tokens_to_go[$i] eq '?' );
16063 # do not break if probable sequence of ?/: statements
16064 next if ($is_colon_chain);
16066 # do not break if statement is broken by side comment
16069 $tokens_to_go[$max_index_to_go] eq '#'
16070 && terminal_type( \@types_to_go, \@block_type_to_go, 0,
16071 $max_index_to_go ) !~ /^[\;\}]$/
16074 # no break needed if matching : is also on the line
16076 if ( $mate_index_to_go[$i] >= 0
16077 && $mate_index_to_go[$i] <= $i_next_nonblank );
16080 if ( $want_break_before{'?'} ) { $i_lowest-- }
16084 #-------------------------------------------------------
16085 # END of inner loop to find the best next breakpoint:
16086 # Break the line after the token with index i=$i_lowest
16087 #-------------------------------------------------------
16089 # final index calculation
16090 $i_next_nonblank = $inext_to_go[$i_lowest];
16091 $next_nonblank_type = $types_to_go[$i_next_nonblank];
16092 $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16094 FORMATTER_DEBUG_FLAG_BREAK
16096 "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
16098 #-------------------------------------------------------
16099 # ?/: rule 2 : if we break at a '?', then break at its ':'
16101 # Note: this rule is also in sub scan_list to handle a break
16102 # at the start and end of a line (in case breaks are dictated
16103 # by side comments).
16104 #-------------------------------------------------------
16105 if ( $next_nonblank_type eq '?' ) {
16106 set_closing_breakpoint($i_next_nonblank);
16108 elsif ( $types_to_go[$i_lowest] eq '?' ) {
16109 set_closing_breakpoint($i_lowest);
16112 #-------------------------------------------------------
16113 # ?/: rule 3 : if we break at a ':' then we save
16114 # its location for further work below. We may need to go
16115 # back and break at its '?'.
16116 #-------------------------------------------------------
16117 if ( $next_nonblank_type eq ':' ) {
16118 push @i_colon_breaks, $i_next_nonblank;
16120 elsif ( $types_to_go[$i_lowest] eq ':' ) {
16121 push @i_colon_breaks, $i_lowest;
16124 # here we should set breaks for all '?'/':' pairs which are
16125 # separated by this line
16129 # save this line segment, after trimming blanks at the ends
16131 ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
16133 ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
16135 # set a forced breakpoint at a container opening, if necessary, to
16136 # signal a break at a closing container. Excepting '(' for now.
16137 if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
16138 && !$forced_breakpoint_to_go[$i_lowest] )
16140 set_closing_breakpoint($i_lowest);
16143 # get ready to go again
16144 $i_begin = $i_lowest + 1;
16145 $last_break_strength = $lowest_strength;
16146 $i_last_break = $i_lowest;
16147 $leading_alignment_token = "";
16148 $leading_alignment_type = "";
16149 $lowest_next_token = '';
16150 $lowest_next_type = 'b';
16152 if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
16156 # update indentation size
16157 if ( $i_begin <= $imax ) {
16158 $leading_spaces = leading_spaces_to_go($i_begin);
16162 #-------------------------------------------------------
16163 # END of main loop to set continuation breakpoints
16164 # Now go back and make any necessary corrections
16165 #-------------------------------------------------------
16167 #-------------------------------------------------------
16168 # ?/: rule 4 -- if we broke at a ':', then break at
16169 # corresponding '?' unless this is a chain of ?: expressions
16170 #-------------------------------------------------------
16171 if (@i_colon_breaks) {
16173 # using a simple method for deciding if we are in a ?/: chain --
16174 # this is a chain if it has multiple ?/: pairs all in order;
16176 # Note that if line starts in a ':' we count that above as a break
16177 my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
16179 unless ($is_chain) {
16180 my @insert_list = ();
16181 foreach (@i_colon_breaks) {
16182 my $i_question = $mate_index_to_go[$_];
16183 if ( $i_question >= 0 ) {
16184 if ( $want_break_before{'?'} ) {
16185 $i_question = $iprev_to_go[$i_question];
16188 if ( $i_question >= 0 ) {
16189 push @insert_list, $i_question;
16192 insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
16196 return ( \@i_first, \@i_last, $colon_count );
16199 sub insert_additional_breaks {
16201 # this routine will add line breaks at requested locations after
16202 # sub set_continuation_breaks has made preliminary breaks.
16204 my ( $ri_break_list, $ri_first, $ri_last ) = @_;
16207 my $line_number = 0;
16208 foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
16210 $i_f = $ri_first->[$line_number];
16211 $i_l = $ri_last->[$line_number];
16212 while ( $i_break_left >= $i_l ) {
16215 # shouldn't happen unless caller passes bad indexes
16216 if ( $line_number >= @{$ri_last} ) {
16218 "Non-fatal program bug: couldn't set break at $i_break_left\n"
16220 report_definite_bug();
16223 $i_f = $ri_first->[$line_number];
16224 $i_l = $ri_last->[$line_number];
16227 # Do not leave a blank at the end of a line; back up if necessary
16228 if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
16230 my $i_break_right = $inext_to_go[$i_break_left];
16231 if ( $i_break_left >= $i_f
16232 && $i_break_left < $i_l
16233 && $i_break_right > $i_f
16234 && $i_break_right <= $i_l )
16236 splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
16237 splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
16243 sub set_closing_breakpoint {
16245 # set a breakpoint at a matching closing token
16246 # at present, this is only used to break at a ':' which matches a '?'
16247 my $i_break = shift;
16249 if ( $mate_index_to_go[$i_break] >= 0 ) {
16251 # CAUTION: infinite recursion possible here:
16252 # set_closing_breakpoint calls set_forced_breakpoint, and
16253 # set_forced_breakpoint call set_closing_breakpoint
16254 # ( test files attrib.t, BasicLyx.pm.html).
16255 # Don't reduce the '2' in the statement below
16256 if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
16258 # break before } ] and ), but sub set_forced_breakpoint will decide
16259 # to break before or after a ? and :
16260 my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
16261 set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
16265 my $type_sequence = $type_sequence_to_go[$i_break];
16266 if ($type_sequence) {
16267 my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
16268 $postponed_breakpoint{$type_sequence} = 1;
16274 sub compare_indentation_levels {
16276 # check to see if output line tabbing agrees with input line
16277 # this can be very useful for debugging a script which has an extra
16279 my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
16280 if ( $guessed_indentation_level ne $structural_indentation_level ) {
16281 $last_tabbing_disagreement = $input_line_number;
16283 if ($in_tabbing_disagreement) {
16286 $tabbing_disagreement_count++;
16288 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
16289 write_logfile_entry(
16290 "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
16293 $in_tabbing_disagreement = $input_line_number;
16294 $first_tabbing_disagreement = $in_tabbing_disagreement
16295 unless ($first_tabbing_disagreement);
16300 if ($in_tabbing_disagreement) {
16302 if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
16303 write_logfile_entry(
16304 "End indentation disagreement from input line $in_tabbing_disagreement\n"
16307 if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
16308 write_logfile_entry(
16309 "No further tabbing disagreements will be noted\n");
16312 $in_tabbing_disagreement = 0;