-#####################################################################
+####################################################################
#
# The Perl::Tidy::Formatter package adds indentation, whitespace, and
# line breaks to the token stream
{ #<<< A non-indenting brace to contain all lexical variables
use Carp;
-use English qw( -no_match_vars );
-our $VERSION = '20220613';
+use English qw( -no_match_vars );
+use List::Util qw( min max ); # min, max are in Perl 5.8
+our $VERSION = '20230309';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $pkg = __PACKAGE__;
+
my $input_stream_name = get_input_stream_name();
Die(<<EOM);
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
This is probably an error introduced by a recent programming change.
-Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
+$pkg reports VERSION='$VERSION'.
==============================================================================
EOM
return;
} ## end sub Fault
+sub Fault_Warn {
+ my ($msg) = @_;
+
+ # This is the same as Fault except that it calls Warn instead of Die
+ # and returns.
+ my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+ my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+ my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $input_stream_name = get_input_stream_name();
+
+ Warn(<<EOM);
+==============================================================================
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
+==============================================================================
+EOM
+
+ return;
+} ## end sub Fault_Warn
+
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
# parameters. They remain constant as a file is being processed.
#-----------------------------------------------------------------
- # user parameters and shortcuts
+ # INITIALIZER: sub check_options
$rOpts,
+
+ # short-cut option variables
+ # INITIALIZER: sub initialize_global_option_vars
$rOpts_add_newlines,
$rOpts_add_whitespace,
+ $rOpts_add_trailing_commas,
$rOpts_blank_lines_after_opening_block,
$rOpts_block_brace_tightness,
$rOpts_block_brace_vertical_tightness,
+ $rOpts_brace_follower_vertical_tightness,
$rOpts_break_after_labels,
$rOpts_break_at_old_attribute_breakpoints,
$rOpts_break_at_old_comma_breakpoints,
$rOpts_closing_side_comment_maximum_text,
$rOpts_comma_arrow_breakpoints,
$rOpts_continuation_indentation,
+ $rOpts_cuddled_paren_brace,
$rOpts_delete_closing_side_comments,
$rOpts_delete_old_whitespace,
$rOpts_delete_side_comments,
+ $rOpts_delete_trailing_commas,
+ $rOpts_delete_weld_interfering_commas,
$rOpts_extended_continuation_indentation,
$rOpts_format_skipping,
$rOpts_freeze_whitespace,
$rOpts_outdent_static_block_comments,
$rOpts_recombine,
$rOpts_short_concatenation_item_length,
+ $rOpts_space_prototype_paren,
$rOpts_stack_closing_block_brace,
$rOpts_static_block_comments,
- $rOpts_sub_alias_list,
$rOpts_tee_block_comments,
$rOpts_tee_pod,
$rOpts_tee_side_comments,
$rOpts_variable_maximum_line_length,
- $rOpts_valign,
$rOpts_valign_code,
$rOpts_valign_side_comments,
$rOpts_whitespace_cycle,
$rOpts_extended_line_up_parentheses,
- # Static hashes initialized in a BEGIN block
+ # Static hashes
+ # INITIALIZER: BEGIN block
%is_assignment,
+ %is_non_list_type,
%is_if_unless_and_or_last_next_redo_return,
%is_if_elsif_else_unless_while_until_for_foreach,
%is_if_unless_while_until_for_foreach,
%is_counted_type,
%is_opening_sequence_token,
%is_closing_sequence_token,
+ %matching_token,
%is_container_label_type,
%is_die_confess_croak_warn,
%is_my_our_local,
-
+ %is_soft_keep_break_type,
+ %is_indirect_object_taker,
@all_operators,
-
- # Initialized in check_options. These are constants and could
- # just as well be initialized in a BEGIN block.
%is_do_follower,
%is_anon_sub_brace_follower,
%is_anon_sub_1_brace_follower,
%is_other_brace_follower,
- # Initialized and re-initialized in sub initialize_grep_and_friends;
+ # INITIALIZER: sub check_options
+ $controlled_comma_style,
+ %keep_break_before_type,
+ %keep_break_after_type,
+ %outdent_keyword,
+ %keyword_paren_inner_tightness,
+ %container_indentation_options,
+ %tightness,
+ %line_up_parentheses_control_hash,
+ $line_up_parentheses_control_is_lxpl,
+
# These can be modified by grep-alias-list
+ # INITIALIZER: sub initialize_grep_and_friends
%is_sort_map_grep,
%is_sort_map_grep_eval,
%is_sort_map_grep_eval_do,
%is_block_with_ci,
%is_keyword_returning_list,
- %block_type_map,
+ %block_type_map, # initialized in BEGIN, but may be changed
+ %want_one_line_block, # may be changed in prepare_cuddled_block_types
- # Initialized in sub initialize_whitespace_hashes;
- # Some can be modified according to user parameters.
+ # INITIALIZER: sub prepare_cuddled_block_types
+ $rcuddled_block_types,
+
+ # INITIALIZER: sub initialize_whitespace_hashes
%binary_ws_rules,
%want_left_space,
%want_right_space,
- # Configured in sub initialize_bond_strength_hashes
+ # INITIALIZER: sub initialize_bond_strength_hashes
%right_bond_strength,
%left_bond_strength,
- # Hashes for -kbb=s and -kba=s
- %keep_break_before_type,
- %keep_break_after_type,
-
- # Initialized in check_options, modified by prepare_cuddled_block_types:
- %want_one_line_block,
-
- # Initialized in sub prepare_cuddled_block_types
- $rcuddled_block_types,
-
- # Initialized and configured in check_options
- %outdent_keyword,
- %keyword_paren_inner_tightness,
-
+ # INITIALIZER: sub initialize_token_break_preferences
%want_break_before,
-
%break_before_container_types,
- %container_indentation_options,
+ # INITIALIZER: sub initialize_space_after_keyword
%space_after_keyword,
- %tightness,
- %matching_token,
-
+ # INITIALIZED BY initialize_global_option_vars
%opening_vertical_tightness,
%closing_vertical_tightness,
%closing_token_indentation,
$some_closing_token_indentation,
-
%opening_token_right,
%stack_opening_token,
%stack_closing_token,
+ # INITIALIZER: sub initialize_weld_nested_exclusion_rules
%weld_nested_exclusion_rules,
- %line_up_parentheses_control_hash,
- $line_up_parentheses_control_is_lxpl,
+
+ # INITIALIZER: sub initialize_weld_fat_comma_rules
+ %weld_fat_comma_rules,
+
+ # INITIALIZER: sub initialize_trailing_comma_rules
+ %trailing_comma_rules,
# regex patterns for text identification.
- # Most are initialized in a sub make_**_pattern during configuration.
# Most can be configured by user parameters.
+ # Most are initialized in a sub make_**_pattern during configuration.
+
+ # INITIALIZER: sub make_sub_matching_pattern
$SUB_PATTERN,
$ASUB_PATTERN,
+
+ # INITIALIZER: make_static_block_comment_pattern
$static_block_comment_pattern,
+
+ # INITIALIZER: sub make_static_side_comment_pattern
$static_side_comment_pattern,
+
+ # INITIALIZER: make_format_skipping_pattern
$format_skipping_pattern_begin,
$format_skipping_pattern_end,
+
+ # INITIALIZER: sub make_non_indenting_brace_pattern
$non_indenting_brace_pattern,
+
+ # INITIALIZER: sub make_bl_pattern
$bl_exclusion_pattern,
+
+ # INITIALIZER: make_bl_pattern
$bl_pattern,
+
+ # INITIALIZER: sub make_bli_pattern
$bli_exclusion_pattern,
+
+ # INITIALIZER: sub make_bli_pattern
$bli_pattern,
+
+ # INITIALIZER: sub make_block_brace_vertical_tightness_pattern
$block_brace_vertical_tightness_pattern,
+
+ # INITIALIZER: sub make_blank_line_pattern
$blank_lines_after_opening_block_pattern,
$blank_lines_before_closing_block_pattern,
+
+ # INITIALIZER: sub make_keyword_group_list_pattern
$keyword_group_list_pattern,
$keyword_group_list_comment_pattern,
+
+ # INITIALIZER: sub make_closing_side_comment_prefix
$closing_side_comment_prefix_pattern,
+
+ # INITIALIZER: sub make_closing_side_comment_list_pattern
$closing_side_comment_list_pattern,
# Table to efficiently find indentation and max line length
# from level.
+ # INITIALIZER: sub initialize_line_length_vars
@maximum_line_length_at_level,
@maximum_text_length_at_level,
$stress_level_alpha,
$stress_level_beta,
+ $high_stress_level,
# Total number of sequence items in a weld, for quick checks
+ # INITIALIZER: weld_containers
$total_weld_count,
#--------------------------------------------------------
#--------------------------------------------------------
# These are re-initialized for each batch of code
- # in sub initialize_batch_variables.
+ # INITIALIZER: sub initialize_batch_variables
$max_index_to_go,
@block_type_to_go,
@type_sequence_to_go,
@K_to_go,
@types_to_go,
@inext_to_go,
- @iprev_to_go,
@parent_seqno_to_go,
# forced breakpoint variables associated with each batch of code
# Number of token variables; must be last in list:
_NVARS => $i++,
};
-}
+} ## end BEGIN
BEGIN {
my $i = 0;
use constant {
_rlines_ => $i++,
- _rlines_new_ => $i++,
_rLL_ => $i++,
_Klimit_ => $i++,
_rdepth_of_opening_seqno_ => $i++,
_K_opening_ternary_ => $i++,
_K_closing_ternary_ => $i++,
_K_first_seq_item_ => $i++,
- _rK_phantom_semicolons_ => $i++,
_rtype_count_by_seqno_ => $i++,
_ris_function_call_paren_ => $i++,
_rlec_count_by_seqno_ => $i++,
_ris_broken_container_ => $i++,
_ris_permanently_broken_ => $i++,
+ _rblank_and_comment_count_ => $i++,
_rhas_list_ => $i++,
_rhas_broken_list_ => $i++,
_rhas_broken_list_with_lec_ => $i++,
+ _rfirst_comma_line_index_ => $i++,
_rhas_code_block_ => $i++,
_rhas_broken_code_block_ => $i++,
_rhas_ternary_ => $i++,
_rparent_of_seqno_ => $i++,
_rchildren_of_seqno_ => $i++,
_ris_list_by_seqno_ => $i++,
+ _ris_cuddled_closing_brace_ => $i++,
_rbreak_container_ => $i++,
_rshort_nested_ => $i++,
_length_function_ => $i++,
_radjusted_levels_ => $i++,
_this_batch_ => $i++,
+ _ris_special_identifier_token_ => $i++,
_last_output_short_opening_token_ => $i++,
- _last_line_leading_type_ => $i++,
- _last_line_leading_level_ => $i++,
- _last_last_line_leading_level_ => $i++,
+ _last_line_leading_type_ => $i++,
+ _last_line_leading_level_ => $i++,
_added_semicolon_count_ => $i++,
_first_added_semicolon_at_ => $i++,
_rKrange_code_without_comments_ => $i++,
_rbreak_before_Kfirst_ => $i++,
_rbreak_after_Klast_ => $i++,
- _rwant_container_open_ => $i++,
_converged_ => $i++,
_rstarting_multiline_qw_seqno_by_K_ => $i++,
_rcollapsed_length_by_seqno_ => $i++,
_rbreak_before_container_by_seqno_ => $i++,
- _ris_essential_old_breakpoint_ => $i++,
_roverride_cab3_ => $i++,
_ris_assigned_structure_ => $i++,
+ _ris_short_broken_eval_block_ => $i++,
+ _ris_bare_trailing_comma_by_seqno_ => $i++,
- _rseqno_non_indenting_brace_by_ix_ => $i++,
- _rreduce_vertical_tightness_by_seqno_ => $i++,
+ _rseqno_non_indenting_brace_by_ix_ => $i++,
+ _rmax_vertical_tightness_ => $i++,
+
+ _no_vertical_tightness_flags_ => $i++,
_LAST_SELF_INDEX_ => $i - 1,
};
-}
+} ## end BEGIN
BEGIN {
_rix_seqno_controlling_ci_ => $i++,
_batch_CODE_type_ => $i++,
_ri_starting_one_line_block_ => $i++,
+ _runmatched_opening_indexes_ => $i++,
};
-}
+} ## end BEGIN
BEGIN {
);
@is_assignment{@q} = (1) x scalar(@q);
+ # a hash needed by break_lists for efficiency:
+ push @q, qw{ ; < > ~ f };
+ @is_non_list_type{@q} = (1) x scalar(@q);
+
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
@q = qw< } ) ] : >;
@is_closing_sequence_token{@q} = (1) x scalar(@q);
+ %matching_token = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '?' => ':',
+
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ ':' => '?',
+ );
+
# a hash needed by sub break_lists for labeling containers
@q = qw( k => && || ? : . );
@is_container_label_type{@q} = (1) x scalar(@q);
push @q, ',';
@is_counted_type{@q} = (1) x scalar(@q);
-}
+ # Tokens where --keep-old-break-xxx flags make soft breaks instead
+ # of hard breaks. See b1433 and b1436.
+ # NOTE: $type is used as the hash key for now; if other container tokens
+ # are added it might be necessary to use a token/type mixture.
+ @q = qw# -> ? : && || + - / * #;
+ @is_soft_keep_break_type{@q} = (1) x scalar(@q);
+
+ # these functions allow an identifier in the indirect object slot
+ @q = qw( print printf sort exec system say);
+ @is_indirect_object_taker{@q} = (1) x scalar(@q);
+
+ # Define here tokens which may follow the closing brace of a do statement
+ # on the same line, as in:
+ # } while ( $something);
+ my @dof = qw(until while unless if ; : );
+ push @dof, ',';
+ @is_do_follower{@dof} = (1) x scalar(@dof);
+
+ # what can follow a multi-line anonymous sub definition closing curly:
+ my @asf = qw# ; : => or and && || ~~ !~~ ) #;
+ push @asf, ',';
+ @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
+
+ # what can follow a one-line anonymous sub closing curly:
+ # one-line anonymous subs also have ']' here...
+ # see tk3.t and PP.pm
+ my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
+ push @asf1, ',';
+ @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
+
+ # What can follow a closing curly of a block
+ # which is not an if/elsif/else/do/sort/map/grep/eval/sub
+ # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
+ my @obf = qw# ; : => or and && || ) #;
+ push @obf, ',';
+ @is_other_brace_follower{@obf} = (1) x scalar(@obf);
+
+} ## end BEGIN
{ ## begin closure to count instances
initialize_undo_ci();
initialize_process_line_of_CODE();
initialize_grind_batch_of_CODE();
- initialize_final_indentation_adjustment();
+ initialize_get_final_indentation();
initialize_postponed_breakpoint();
initialize_batch_variables();
initialize_write_line();
file_writer_object => $file_writer_object,
logger_object => $logger_object,
diagnostics_object => $diagnostics_object,
- length_function => $length_function
+ length_function => $length_function,
);
write_logfile_entry("\nStarting tokenization pass...\n");
my $self = [];
# Basic data structures...
- $self->[_rlines_] = []; # = ref to array of lines of the file
- $self->[_rlines_new_] = []; # = ref to array of output lines
+ $self->[_rlines_] = []; # = ref to array of lines of the file
# 'rLL' = reference to the continuous liner array of all tokens in a file.
# 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
$self->[_K_closing_ternary_] = {};
$self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
- # Array of phantom semicolons, in case we ever need to undo them
- $self->[_rK_phantom_semicolons_] = undef;
-
# 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
# numbers with + or - indicating opening or closing. This list represents
# the entire container tree and is invariant under reformatting. It can be
$self->[_rlec_count_by_seqno_] = {};
$self->[_ris_broken_container_] = {};
$self->[_ris_permanently_broken_] = {};
+ $self->[_rblank_and_comment_count_] = {};
$self->[_rhas_list_] = {};
$self->[_rhas_broken_list_] = {};
$self->[_rhas_broken_list_with_lec_] = {};
+ $self->[_rfirst_comma_line_index_] = {};
$self->[_rhas_code_block_] = {};
$self->[_rhas_broken_code_block_] = {};
$self->[_rhas_ternary_] = {};
$self->[_rparent_of_seqno_] = {};
$self->[_rchildren_of_seqno_] = {};
$self->[_ris_list_by_seqno_] = {};
+ $self->[_ris_cuddled_closing_brace_] = {};
$self->[_rbreak_container_] = {}; # prevent one-line blocks
$self->[_rshort_nested_] = {}; # blocks not forced open
$self->[_this_batch_] = [];
# Memory of processed text...
- $self->[_last_last_line_leading_level_] = 0;
+ $self->[_ris_special_identifier_token_] = {};
$self->[_last_line_leading_level_] = 0;
$self->[_last_line_leading_type_] = '#';
$self->[_last_output_short_opening_token_] = 0;
$self->[_rKrange_code_without_comments_] = [];
$self->[_rbreak_before_Kfirst_] = {};
$self->[_rbreak_after_Klast_] = {};
- $self->[_rwant_container_open_] = {};
$self->[_converged_] = 0;
# qw stuff
$self->[_rcollapsed_length_by_seqno_] = {};
$self->[_rbreak_before_container_by_seqno_] = {};
- $self->[_ris_essential_old_breakpoint_] = {};
$self->[_roverride_cab3_] = {};
$self->[_ris_assigned_structure_] = {};
+ $self->[_ris_short_broken_eval_block_] = {};
+ $self->[_ris_bare_trailing_comma_by_seqno_] = {};
- $self->[_rseqno_non_indenting_brace_by_ix_] = {};
- $self->[_rreduce_vertical_tightness_by_seqno_] = {};
+ $self->[_rseqno_non_indenting_brace_by_ix_] = {};
+ $self->[_rmax_vertical_tightness_] = {};
+
+ $self->[_no_vertical_tightness_flags_] = 0;
# This flag will be updated later by a call to get_save_logfile()
$self->[_save_logfile_] = defined($logger_object);
);
@valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
- }
+ } ## end BEGIN
sub check_line_hashes {
my $self = shift;
$input_stream_name = $logger_object->get_input_stream_name();
}
return $input_stream_name;
- }
+ } ## end sub get_input_stream_name
# interface to Perl::Tidy::Logger routines
sub warning {
$logger_object->complain($msg);
}
return;
- }
+ } ## end sub complain
sub write_logfile_entry {
my @msg = @_;
$logger_object->write_logfile_entry(@msg);
}
return;
- }
+ } ## end sub write_logfile_entry
sub get_saw_brace_error {
if ($logger_object) {
return $logger_object->get_saw_brace_error();
}
return;
- }
+ } ## end sub get_saw_brace_error
sub we_are_at_the_last_line {
if ($logger_object) {
$logger_object->we_are_at_the_last_line();
}
return;
- }
+ } ## end sub we_are_at_the_last_line
} ## end closure for logger routines
$diagnostics_object->write_diagnostics($msg);
}
return;
- }
+ } ## end sub write_diagnostics
} ## end closure for diagnostics routines
sub get_convergence_check {
return $self->[_converged_];
}
-sub get_added_semicolon_count {
- my $self = shift;
- return $self->[_added_semicolon_count_];
-}
-
sub get_output_line_number {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->want_blank_line();
return;
-}
+} ## end sub want_blank_line
sub write_unindented_line {
my ( $self, $line ) = @_;
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_line($line);
return;
-}
+} ## end sub write_unindented_line
sub consecutive_nonblank_lines {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
return $file_writer_object->get_consecutive_nonblank_lines() +
$vao->get_cached_line_count();
-}
-
-sub max {
- my (@vals) = @_;
- my $max = shift @vals;
- for (@vals) { $max = $_ > $max ? $_ : $max }
- return $max;
-}
-
-sub min {
- my (@vals) = @_;
- my $min = shift @vals;
- for (@vals) { $min = $_ < $min ? $_ : $min }
- return $min;
-}
+} ## end sub consecutive_nonblank_lines
sub split_words {
# and to configure the control hashes to them.
$rOpts = shift;
+ $controlled_comma_style = 0;
+
initialize_whitespace_hashes();
initialize_bond_strength_hashes();
# This function must be called early to get hashes with grep initialized
- initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
+ initialize_grep_and_friends();
# Make needed regex patterns for matching text.
# NOTE: sub_matching_patterns must be made first because later patterns use
# them; see RT #133130.
- make_sub_matching_pattern();
+ make_sub_matching_pattern(); # must be first pattern made
make_static_block_comment_pattern();
make_static_side_comment_pattern();
make_closing_side_comment_prefix();
}
make_bli_pattern();
+
make_bl_pattern();
+
make_block_brace_vertical_tightness_pattern();
+
make_blank_line_pattern();
- make_keyword_group_list_pattern();
- # Make initial list of desired one line block types
- # They will be modified by 'prepare_cuddled_block_types'
- # NOTE: this line must come after is_sort_map_grep_eval is
- # initialized in sub 'initialize_grep_and_friends'
- %want_one_line_block = %is_sort_map_grep_eval;
+ make_keyword_group_list_pattern();
prepare_cuddled_block_types();
+
if ( $rOpts->{'dump-cuddled-block-list'} ) {
dump_cuddled_block_list(*STDOUT);
Exit(0);
Exit(0);
}
- # default keywords for which space is introduced before an opening paren
- # (at present, including them messes up vertical alignment)
- my @sak = qw(my local our and or xor err eq ne if else elsif until
- unless while for foreach return switch case given when catch);
- %space_after_keyword = map { $_ => 1 } @sak;
-
- # first remove any or all of these if desired
- if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
-
- # -nsak='*' selects all the above keywords
- if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
- @space_after_keyword{@q} = (0) x scalar(@q);
- }
-
- # then allow user to add to these defaults
- if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
- @space_after_keyword{@q} = (1) x scalar(@q);
- }
-
- # implement user break preferences
- my $break_after = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
- }
- }
- return;
- };
-
- my $break_before = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
- }
- }
- return;
- };
-
- $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
- $break_before->(@all_operators)
- if ( $rOpts->{'break-before-all-operators'} );
-
- $break_after->( split_words( $rOpts->{'want-break-after'} ) );
- $break_before->( split_words( $rOpts->{'want-break-before'} ) );
-
- # make note if breaks are before certain key types
- %want_break_before = ();
- foreach my $tok ( @all_operators, ',' ) {
- $want_break_before{$tok} =
- $left_bond_strength{$tok} < $right_bond_strength{$tok};
- }
-
- # Coordinate ?/: breaks, which must be similar
- # The small strength 0.01 which is added is 1% of the strength of one
- # indentation level and seems to work okay.
- if ( !$want_break_before{':'} ) {
- $want_break_before{'?'} = $want_break_before{':'};
- $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
- $left_bond_strength{'?'} = NO_BREAK;
- }
+ initialize_space_after_keyword();
- # Only make a hash entry for the next parameters if values are defined.
- # That allows a quick check to be made later.
- %break_before_container_types = ();
- for ( $rOpts->{'break-before-hash-brace'} ) {
- $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
- }
- for ( $rOpts->{'break-before-square-bracket'} ) {
- $break_before_container_types{'['} = $_ if $_ && $_ > 0;
- }
- for ( $rOpts->{'break-before-paren'} ) {
- $break_before_container_types{'('} = $_ if $_ && $_ > 0;
- }
+ initialize_token_break_preferences();
#--------------------------------------------------------------
# The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
}
+ #-----------------------------------------------------------
+ # The combination -lp -vmll -atc -dtc can be unstable
+ #-----------------------------------------------------------
+ # This fixes b1386 b1387 b1388 which had -wtc='b'
+ # Updated to to include any -wtc to fix b1426
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'line-up-parentheses'}
+ && $rOpts->{'add-trailing-commas'}
+ && $rOpts->{'delete-trailing-commas'}
+ && $rOpts->{'want-trailing-commas'} )
+ {
+ $rOpts->{'delete-trailing-commas'} = 0;
+## Issuing a warning message causes trouble with test cases, and this combo is
+## so rare that it is unlikely to not occur in practice. So skip warning.
+## Warn(
+##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n"
+## );
+ }
+
%container_indentation_options = ();
foreach my $pair (
[ 'break-before-hash-brace-and-indent', '{' ],
# (1) -lp is not compatible with opt=2, silently set to opt=0
# (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
+ # (3) set opt=0 if -i < -ci (can be unstable, case b1355)
if ( $opt == 2 ) {
- if ( $rOpts->{'line-up-parentheses'}
- || $rOpts->{'indent-columns'} ==
- $rOpts->{'continuation-indentation'} )
+ if (
+ $rOpts->{'line-up-parentheses'}
+ || ( $rOpts->{'indent-columns'} <=
+ $rOpts->{'continuation-indentation'} )
+ )
{
$opt = 0;
}
}
}
- # Define here tokens which may follow the closing brace of a do statement
- # on the same line, as in:
- # } while ( $something);
- my @dof = qw(until while unless if ; : );
- push @dof, ',';
- @is_do_follower{@dof} = (1) x scalar(@dof);
-
- # what can follow a multi-line anonymous sub definition closing curly:
- my @asf = qw# ; : => or and && || ~~ !~~ ) #;
- push @asf, ',';
- @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
+ $right_bond_strength{'{'} = WEAK;
+ $left_bond_strength{'{'} = VERY_STRONG;
- # what can follow a one-line anonymous sub closing curly:
- # one-line anonymous subs also have ']' here...
- # see tk3.t and PP.pm
- my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
- push @asf1, ',';
- @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
-
- # What can follow a closing curly of a block
- # which is not an if/elsif/else/do/sort/map/grep/eval/sub
- # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
- my @obf = qw# ; : => or and && || ) #;
- push @obf, ',';
- @is_other_brace_follower{@obf} = (1) x scalar(@obf);
-
- $right_bond_strength{'{'} = WEAK;
- $left_bond_strength{'{'} = VERY_STRONG;
-
- # make -l=0 equal to -l=infinite
- if ( !$rOpts->{'maximum-line-length'} ) {
- $rOpts->{'maximum-line-length'} = 1_000_000;
- }
+ # make -l=0 equal to -l=infinite
+ if ( !$rOpts->{'maximum-line-length'} ) {
+ $rOpts->{'maximum-line-length'} = 1_000_000;
+ }
# make -lbl=0 equal to -lbl=infinite
if ( !$rOpts->{'long-block-line-count'} ) {
$rOpts->{'long-block-line-count'} = 1_000_000;
}
- my $ole = $rOpts->{'output-line-ending'};
- if ($ole) {
- my %endings = (
- dos => "\015\012",
- win => "\015\012",
- mac => "\015",
- unix => "\012",
- );
-
- # Patch for RT #99514, a memoization issue.
- # Normally, the user enters one of 'dos', 'win', etc, and we change the
- # value in the options parameter to be the corresponding line ending
- # character. But, if we are using memoization, on later passes through
- # here the option parameter will already have the desired ending
- # character rather than the keyword 'dos', 'win', etc. So
- # we must check to see if conversion has already been done and, if so,
- # bypass the conversion step.
- my %endings_inverted = (
- "\015\012" => 'dos',
- "\015\012" => 'win',
- "\015" => 'mac',
- "\012" => 'unix',
- );
-
- if ( defined( $endings_inverted{$ole} ) ) {
-
- # we already have valid line ending, nothing more to do
- }
- else {
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join SPACE, keys %endings;
- Die(<<EOM);
-Unrecognized line ending '$ole'; expecting one of: $str
-EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- Warn("Ignoring -ple; conflicts with -ole\n");
- $rOpts->{'preserve-line-endings'} = undef;
- }
- }
- }
-
# hashes used to simplify setting whitespace
%tightness = (
'{' => $rOpts->{'brace-tightness'},
'[' => $rOpts->{'square-bracket-tightness'},
']' => $rOpts->{'square-bracket-tightness'},
);
- %matching_token = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '?' => ':',
- );
if ( $rOpts->{'ignore-old-breakpoints'} ) {
initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
'kba', \%keep_break_after_type );
- #------------------------------------------------------------
- # Make global vars for frequently used options for efficiency
- #------------------------------------------------------------
-
- $rOpts_add_newlines = $rOpts->{'add-newlines'};
- $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- $rOpts_blank_lines_after_opening_block =
- $rOpts->{'blank-lines-after-opening-block'};
- $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
- $rOpts_block_brace_vertical_tightness =
- $rOpts->{'block-brace-vertical-tightness'};
- $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
- $rOpts_break_at_old_attribute_breakpoints =
- $rOpts->{'break-at-old-attribute-breakpoints'};
- $rOpts_break_at_old_comma_breakpoints =
- $rOpts->{'break-at-old-comma-breakpoints'};
- $rOpts_break_at_old_keyword_breakpoints =
- $rOpts->{'break-at-old-keyword-breakpoints'};
- $rOpts_break_at_old_logical_breakpoints =
- $rOpts->{'break-at-old-logical-breakpoints'};
- $rOpts_break_at_old_semicolon_breakpoints =
- $rOpts->{'break-at-old-semicolon-breakpoints'};
- $rOpts_break_at_old_ternary_breakpoints =
- $rOpts->{'break-at-old-ternary-breakpoints'};
- $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
- $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
- $rOpts_closing_side_comment_else_flag =
- $rOpts->{'closing-side-comment-else-flag'};
- $rOpts_closing_side_comment_maximum_text =
- $rOpts->{'closing-side-comment-maximum-text'};
- $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
- $rOpts_delete_closing_side_comments =
- $rOpts->{'delete-closing-side-comments'};
- $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
- $rOpts_extended_continuation_indentation =
- $rOpts->{'extended-continuation-indentation'};
- $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
- $rOpts_function_paren_vertical_alignment =
- $rOpts->{'function-paren-vertical-alignment'};
- $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_ignore_side_comment_lengths =
- $rOpts->{'ignore-side-comment-lengths'};
- $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_indent_only = $rOpts->{'indent-only'};
- $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
- $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
- $rOpts_extended_line_up_parentheses =
- $rOpts->{'extended-line-up-parentheses'};
- $rOpts_logical_padding = $rOpts->{'logical-padding'};
- $rOpts_maximum_consecutive_blank_lines =
- $rOpts->{'maximum-consecutive-blank-lines'};
- $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
- $rOpts_opening_brace_always_on_right =
- $rOpts->{'opening-brace-always-on-right'};
- $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
- $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
- $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
- $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
- $rOpts_outdent_static_block_comments =
- $rOpts->{'outdent-static-block-comments'};
- $rOpts_recombine = $rOpts->{'recombine'};
- $rOpts_short_concatenation_item_length =
- $rOpts->{'short-concatenation-item-length'};
- $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
- $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
- $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'};
- $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
- $rOpts_tee_pod = $rOpts->{'tee-pod'};
- $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
- $rOpts_valign = $rOpts->{'valign'};
- $rOpts_valign_code = $rOpts->{'valign-code'};
- $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
-
- # Note that both opening and closing tokens can access the opening
- # and closing flags of their container types.
- %opening_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness'},
- '{' => $rOpts->{'brace-vertical-tightness'},
- '[' => $rOpts->{'square-bracket-vertical-tightness'},
- ')' => $rOpts->{'paren-vertical-tightness'},
- '}' => $rOpts->{'brace-vertical-tightness'},
- ']' => $rOpts->{'square-bracket-vertical-tightness'},
- );
-
- %closing_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness-closing'},
- '{' => $rOpts->{'brace-vertical-tightness-closing'},
- '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- ')' => $rOpts->{'paren-vertical-tightness-closing'},
- '}' => $rOpts->{'brace-vertical-tightness-closing'},
- ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- );
-
- # assume flag for '>' same as ')' for closing qw quotes
- %closing_token_indentation = (
- ')' => $rOpts->{'closing-paren-indentation'},
- '}' => $rOpts->{'closing-brace-indentation'},
- ']' => $rOpts->{'closing-square-bracket-indentation'},
- '>' => $rOpts->{'closing-paren-indentation'},
- );
-
- # flag indicating if any closing tokens are indented
- $some_closing_token_indentation =
- $rOpts->{'closing-paren-indentation'}
- || $rOpts->{'closing-brace-indentation'}
- || $rOpts->{'closing-square-bracket-indentation'}
- || $rOpts->{'indent-closing-brace'};
-
- %opening_token_right = (
- '(' => $rOpts->{'opening-paren-right'},
- '{' => $rOpts->{'opening-hash-brace-right'},
- '[' => $rOpts->{'opening-square-bracket-right'},
- );
-
- %stack_opening_token = (
- '(' => $rOpts->{'stack-opening-paren'},
- '{' => $rOpts->{'stack-opening-hash-brace'},
- '[' => $rOpts->{'stack-opening-square-bracket'},
- );
-
- %stack_closing_token = (
- ')' => $rOpts->{'stack-closing-paren'},
- '}' => $rOpts->{'stack-closing-hash-brace'},
- ']' => $rOpts->{'stack-closing-square-bracket'},
- );
-
- # Create a table of maximum line length vs level for later efficient use.
- # We will make the tables very long to be sure it will not be exceeded.
- # But we have to choose a fixed length. A check will be made at the start
- # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
- # my standard test problems have indentation levels of about 150, so this
- # should be fairly large. If the choice of a maximum level ever becomes
- # an issue then these table values could be returned in a sub with a simple
- # memoization scheme.
-
- # Also create a table of the maximum spaces available for text due to the
- # level only. If a line has continuation indentation, then that space must
- # be subtracted from the table value. This table is used for preliminary
- # estimates in welding, extended_ci, BBX, and marking short blocks.
- use constant LEVEL_TABLE_MAX => 1000;
-
- # The basic scheme:
- foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
- my $indent = $level * $rOpts_indent_columns;
- $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
- $maximum_text_length_at_level[$level] =
- $rOpts_maximum_line_length - $indent;
- }
-
- # Correct the maximum_text_length table if the -wc=n flag is used
- $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
- if ($rOpts_whitespace_cycle) {
- if ( $rOpts_whitespace_cycle > 0 ) {
- foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
- my $level_mod = $level % $rOpts_whitespace_cycle;
- my $indent = $level_mod * $rOpts_indent_columns;
- $maximum_text_length_at_level[$level] =
- $rOpts_maximum_line_length - $indent;
- }
- }
- else {
- $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
+ # Modify %keep_break_before and %keep_break_after to avoid conflicts
+ # with %want_break_before; fixes b1436.
+ # This became necessary after breaks for some tokens were converted
+ # from hard to soft (see b1433).
+ # We could do this for all tokens, but to minimize changes to existing
+ # code we currently only do this for the soft break tokens.
+ foreach my $key ( keys %keep_break_before_type ) {
+ if ( defined( $want_break_before{$key} )
+ && !$want_break_before{$key}
+ && $is_soft_keep_break_type{$key} )
+ {
+ $keep_break_after_type{$key} = $keep_break_before_type{$key};
+ delete $keep_break_before_type{$key};
}
}
-
- # Correct the tables if the -vmll flag is used. These values override the
- # previous values.
- if ($rOpts_variable_maximum_line_length) {
- foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
- $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
- $maximum_line_length_at_level[$level] =
- $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
+ foreach my $key ( keys %keep_break_after_type ) {
+ if ( defined( $want_break_before{$key} )
+ && $want_break_before{$key}
+ && $is_soft_keep_break_type{$key} )
+ {
+ $keep_break_before_type{$key} = $keep_break_after_type{$key};
+ delete $keep_break_after_type{$key};
}
}
- # Define two measures of indentation level, alpha and beta, at which some
- # formatting features come under stress and need to start shutting down.
- # Some combination of the two will be used to shut down different
- # formatting features.
- # Put a reasonable upper limit on stress level (say 100) in case the
- # whitespace-cycle variable is used.
- my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
+ $controlled_comma_style ||= $keep_break_before_type{','};
+ $controlled_comma_style ||= $keep_break_after_type{','};
- # Find stress_level_alpha, targeted at very short maximum line lengths.
- $stress_level_alpha = $stress_level_limit + 1;
- foreach my $level_test ( 0 .. $stress_level_limit ) {
- my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
- my $excess_inside_space =
- $max_len -
- $rOpts_continuation_indentation -
- $rOpts_indent_columns - 8;
- if ( $excess_inside_space <= 0 ) {
- $stress_level_alpha = $level_test;
- last;
- }
- }
+ initialize_global_option_vars();
- # Find stress level beta, a stress level targeted at formatting
- # at deep levels near the maximum line length. We start increasing
- # from zero and stop at the first level which shows no more space.
+ initialize_line_length_vars(); # after 'initialize_global_option_vars'
- # 'const' is a fixed number of spaces for a typical variable.
- # Cases b1197-b1204 work ok with const=12 but not with const=8
- my $const = 16;
- my $denom = max( 1, $rOpts_indent_columns );
- $stress_level_beta = 0;
- foreach my $level ( 0 .. $stress_level_limit ) {
- my $remaining_cycles = max(
- 0,
- (
- $maximum_text_length_at_level[$level] -
- $rOpts_continuation_indentation - $const
- ) / $denom
- );
- last if ( $remaining_cycles <= 3 ); # 2 does not work
- $stress_level_beta = $level;
- }
+ initialize_trailing_comma_rules(); # after 'initialize_line_length_vars'
initialize_weld_nested_exclusion_rules();
+ initialize_weld_fat_comma_rules();
+
%line_up_parentheses_control_hash = ();
$line_up_parentheses_control_is_lxpl = 1;
my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
use constant ALIGN_GREP_ALIASES => 0;
sub initialize_grep_and_friends {
- my ($str) = @_;
# Initialize or re-initialize hashes with 'grep' and grep aliases. This
# must be done after each set of options because new grep aliases may be
# used.
- # re-initialize the hash ... this is critical!
+ # re-initialize the hashes ... this is critical!
%is_sort_map_grep = ();
my @q = qw(sort map grep);
@is_sort_map_grep{@q} = (1) x scalar(@q);
+ my $olbxl = $rOpts->{'one-line-block-exclusion-list'};
+ my %is_olb_exclusion_word;
+ if ( defined($olbxl) ) {
+ my @list = split_words($olbxl);
+ if (@list) {
+ @is_olb_exclusion_word{@list} = (1) x scalar(@list);
+ }
+ }
+
+ # Make the list of block types which may be re-formed into one line.
+ # They will be modified with the grep-alias-list below and
+ # by sub 'prepare_cuddled_block_types'.
+ # Note that it is essential to always re-initialize the hash here:
+ %want_one_line_block = ();
+ if ( !$is_olb_exclusion_word{'*'} ) {
+ foreach (qw(sort map grep eval)) {
+ if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 }
+ }
+ }
+
# Note that any 'grep-alias-list' string has been preprocessed to be a
# trimmed, space-separated list.
+ my $str = $rOpts->{'grep-alias-list'};
my @grep_aliases = split /\s+/, $str;
- @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
- ##@q = qw(sort map grep eval);
- %is_sort_map_grep_eval = %is_sort_map_grep;
- $is_sort_map_grep_eval{'eval'} = 1;
+ if (@grep_aliases) {
+
+ @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
+
+ if ( $want_one_line_block{'grep'} ) {
+ @{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases);
+ }
+ }
+
+ ##@q = qw(sort map grep eval);
+ %is_sort_map_grep_eval = %is_sort_map_grep;
+ $is_sort_map_grep_eval{'eval'} = 1;
##@q = qw(sort map grep eval do);
%is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
return;
} ## end sub initialize_weld_nested_exclusion_rules
+sub initialize_weld_fat_comma_rules {
+
+ # Initialize a hash controlling which opening token types can be
+ # welded around a fat comma
+ %weld_fat_comma_rules = ();
+
+ # The -wfc flag turns on welding of '=>' after an opening paren
+ if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
+
+ # This could be generalized in the future by introducing a parameter
+ # -weld-fat-comma-after=str (-wfca=str), where str contains any of:
+ # * { [ (
+ # to indicate which opening parens may weld to a subsequent '=>'
+
+ # The flag -wfc would then be equivalent to -wfca='('
+
+ # This has not been done because it is not yet clear how useful
+ # this generalization would be.
+ return;
+} ## end sub initialize_weld_fat_comma_rules
+
sub initialize_line_up_parentheses_control_hash {
my ( $str, $opt_name ) = @_;
return unless ($str);
return;
} ## end sub initialize_line_up_parentheses_control_hash
+sub initialize_space_after_keyword {
+
+ # default keywords for which space is introduced before an opening paren
+ # (at present, including them messes up vertical alignment)
+ my @sak = qw(my local our and or xor err eq ne if else elsif until
+ unless while for foreach return switch case given when catch);
+ %space_after_keyword = map { $_ => 1 } @sak;
+
+ # first remove any or all of these if desired
+ if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
+
+ # -nsak='*' selects all the above keywords
+ if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
+ @space_after_keyword{@q} = (0) x scalar(@q);
+ }
+
+ # then allow user to add to these defaults
+ if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
+ @space_after_keyword{@q} = (1) x scalar(@q);
+ }
+
+ return;
+} ## end sub initialize_space_after_keyword
+
+sub initialize_token_break_preferences {
+
+ # implement user break preferences
+ my $break_after = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
+ if ( $tok eq ',' ) { $controlled_comma_style = 1 }
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
+ }
+ return;
+ };
+
+ my $break_before = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ if ( $tok eq ',' ) { $controlled_comma_style = 1 }
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
+ }
+ return;
+ };
+
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
+
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
+
+ # make note if breaks are before certain key types
+ %want_break_before = ();
+ foreach my $tok ( @all_operators, ',' ) {
+ $want_break_before{$tok} =
+ $left_bond_strength{$tok} < $right_bond_strength{$tok};
+ }
+
+ # Coordinate ?/: breaks, which must be similar
+ # The small strength 0.01 which is added is 1% of the strength of one
+ # indentation level and seems to work okay.
+ if ( !$want_break_before{':'} ) {
+ $want_break_before{'?'} = $want_break_before{':'};
+ $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
+ $left_bond_strength{'?'} = NO_BREAK;
+ }
+
+ # Only make a hash entry for the next parameters if values are defined.
+ # That allows a quick check to be made later.
+ %break_before_container_types = ();
+ for ( $rOpts->{'break-before-hash-brace'} ) {
+ $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-square-bracket'} ) {
+ $break_before_container_types{'['} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-paren'} ) {
+ $break_before_container_types{'('} = $_ if $_ && $_ > 0;
+ }
+ return;
+} ## end sub initialize_token_break_preferences
+
use constant DEBUG_KB => 0;
sub initialize_keep_old_breakpoints {
# Ignore kbb='(' and '[' and '{': can cause unstable math formatting
# (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
+ # Also always ignore ? and : (b1440 and b1433-b1439)
if ( $short_name eq 'kbb' ) {
- @list = grep { !m/[\(\[\{]/ } @list;
+ @list = grep { !m/[\(\[\{\?\:]/ } @list;
}
elsif ( $short_name eq 'kba' ) {
- @list = grep { !m/[\)\]\}]/ } @list;
+ @list = grep { !m/[\)\]\}\?\:]/ } @list;
}
# pull out any any leading container code, like f( or *{
} ## end sub initialize_keep_old_breakpoints
-sub initialize_whitespace_hashes {
+sub initialize_global_option_vars {
- # This is called once before formatting begins to initialize these global
- # hashes, which control the use of whitespace around tokens:
- #
- # %binary_ws_rules
- # %want_left_space
- # %want_right_space
- # %space_after_keyword
- #
- # Many token types are identical to the tokens themselves.
- # See the tokenizer for a complete list. Here are some special types:
- # k = perl keyword
- # f = semicolon in for statement
- # m = unary minus
- # p = unary plus
- # Note that :: is excluded since it should be contained in an identifier
- # Note that '->' is excluded because it never gets space
- # parentheses and brackets are excluded since they are handled specially
- # curly braces are included but may be overridden by logic, such as
- # newline logic.
+ #------------------------------------------------------------
+ # Make global vars for frequently used options for efficiency
+ #------------------------------------------------------------
- # NEW_TOKENS: create a whitespace rule here. This can be as
- # simple as adding your new letter to @spaces_both_sides, for
- # example.
+ $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
+ $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ $rOpts_blank_lines_after_opening_block =
+ $rOpts->{'blank-lines-after-opening-block'};
+ $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
+ $rOpts_block_brace_vertical_tightness =
+ $rOpts->{'block-brace-vertical-tightness'};
+ $rOpts_brace_follower_vertical_tightness =
+ $rOpts->{'brace-follower-vertical-tightness'};
+ $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
+ $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
+ $rOpts_break_at_old_comma_breakpoints =
+ $rOpts->{'break-at-old-comma-breakpoints'};
+ $rOpts_break_at_old_keyword_breakpoints =
+ $rOpts->{'break-at-old-keyword-breakpoints'};
+ $rOpts_break_at_old_logical_breakpoints =
+ $rOpts->{'break-at-old-logical-breakpoints'};
+ $rOpts_break_at_old_semicolon_breakpoints =
+ $rOpts->{'break-at-old-semicolon-breakpoints'};
+ $rOpts_break_at_old_ternary_breakpoints =
+ $rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
+ $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
+ $rOpts_closing_side_comment_else_flag =
+ $rOpts->{'closing-side-comment-else-flag'};
+ $rOpts_closing_side_comment_maximum_text =
+ $rOpts->{'closing-side-comment-maximum-text'};
+ $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+ $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
+ $rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'};
+ $rOpts_delete_closing_side_comments =
+ $rOpts->{'delete-closing-side-comments'};
+ $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
+ $rOpts_extended_continuation_indentation =
+ $rOpts->{'extended-continuation-indentation'};
+ $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
+ $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
+ $rOpts_delete_weld_interfering_commas =
+ $rOpts->{'delete-weld-interfering-commas'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
+ $rOpts_function_paren_vertical_alignment =
+ $rOpts->{'function-paren-vertical-alignment'};
+ $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_ignore_side_comment_lengths =
+ $rOpts->{'ignore-side-comment-lengths'};
+ $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
+ $rOpts_indent_columns = $rOpts->{'indent-columns'};
+ $rOpts_indent_only = $rOpts->{'indent-only'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
+ $rOpts_extended_line_up_parentheses =
+ $rOpts->{'extended-line-up-parentheses'};
+ $rOpts_logical_padding = $rOpts->{'logical-padding'};
+ $rOpts_maximum_consecutive_blank_lines =
+ $rOpts->{'maximum-consecutive-blank-lines'};
+ $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
+ $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ $rOpts_opening_brace_always_on_right =
+ $rOpts->{'opening-brace-always-on-right'};
+ $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
+ $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
+ $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
+ $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
+ $rOpts_outdent_static_block_comments =
+ $rOpts->{'outdent-static-block-comments'};
+ $rOpts_recombine = $rOpts->{'recombine'};
+ $rOpts_short_concatenation_item_length =
+ $rOpts->{'short-concatenation-item-length'};
+ $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
+ $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
+ $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
+ $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
+ $rOpts_tee_pod = $rOpts->{'tee-pod'};
+ $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
+ $rOpts_valign_code = $rOpts->{'valign-code'};
+ $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
- my @opening_type = qw< L { ( [ >;
- @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
+ # Note that both opening and closing tokens can access the opening
+ # and closing flags of their container types.
+ %opening_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness'},
+ '{' => $rOpts->{'brace-vertical-tightness'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness'},
+ ')' => $rOpts->{'paren-vertical-tightness'},
+ '}' => $rOpts->{'brace-vertical-tightness'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness'},
+ );
- my @closing_type = qw< R } ) ] >;
- @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
+ %closing_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness-closing'},
+ '{' => $rOpts->{'brace-vertical-tightness-closing'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ ')' => $rOpts->{'paren-vertical-tightness-closing'},
+ '}' => $rOpts->{'brace-vertical-tightness-closing'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ );
- my @spaces_both_sides = qw#
- + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
- .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
- &&= ||= //= <=> A k f w F n C Y U G v
- #;
+ # assume flag for '>' same as ')' for closing qw quotes
+ %closing_token_indentation = (
+ ')' => $rOpts->{'closing-paren-indentation'},
+ '}' => $rOpts->{'closing-brace-indentation'},
+ ']' => $rOpts->{'closing-square-bracket-indentation'},
+ '>' => $rOpts->{'closing-paren-indentation'},
+ );
- my @spaces_left_side = qw<
- t ! ~ m p { \ h pp mm Z j
- >;
- push( @spaces_left_side, '#' ); # avoids warning message
+ # flag indicating if any closing tokens are indented
+ $some_closing_token_indentation =
+ $rOpts->{'closing-paren-indentation'}
+ || $rOpts->{'closing-brace-indentation'}
+ || $rOpts->{'closing-square-bracket-indentation'}
+ || $rOpts->{'indent-closing-brace'};
- my @spaces_right_side = qw<
- ; } ) ] R J ++ -- **=
- >;
- push( @spaces_right_side, ',' ); # avoids warning message
+ %opening_token_right = (
+ '(' => $rOpts->{'opening-paren-right'},
+ '{' => $rOpts->{'opening-hash-brace-right'},
+ '[' => $rOpts->{'opening-square-bracket-right'},
+ );
- %want_left_space = ();
- %want_right_space = ();
- %binary_ws_rules = ();
+ %stack_opening_token = (
+ '(' => $rOpts->{'stack-opening-paren'},
+ '{' => $rOpts->{'stack-opening-hash-brace'},
+ '[' => $rOpts->{'stack-opening-square-bracket'},
+ );
+
+ %stack_closing_token = (
+ ')' => $rOpts->{'stack-closing-paren'},
+ '}' => $rOpts->{'stack-closing-hash-brace'},
+ ']' => $rOpts->{'stack-closing-square-bracket'},
+ );
+ return;
+} ## end sub initialize_global_option_vars
+
+sub initialize_line_length_vars {
+
+ # Create a table of maximum line length vs level for later efficient use.
+ # We will make the tables very long to be sure it will not be exceeded.
+ # But we have to choose a fixed length. A check will be made at the start
+ # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
+ # my standard test problems have indentation levels of about 150, so this
+ # should be fairly large. If the choice of a maximum level ever becomes
+ # an issue then these table values could be returned in a sub with a simple
+ # memoization scheme.
+
+ # Also create a table of the maximum spaces available for text due to the
+ # level only. If a line has continuation indentation, then that space must
+ # be subtracted from the table value. This table is used for preliminary
+ # estimates in welding, extended_ci, BBX, and marking short blocks.
+ use constant LEVEL_TABLE_MAX => 1000;
+
+ # The basic scheme:
+ foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
+ my $indent = $level * $rOpts_indent_columns;
+ $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
+ $maximum_text_length_at_level[$level] =
+ $rOpts_maximum_line_length - $indent;
+ }
+
+ # Correct the maximum_text_length table if the -wc=n flag is used
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+ if ($rOpts_whitespace_cycle) {
+ if ( $rOpts_whitespace_cycle > 0 ) {
+ foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
+ my $level_mod = $level % $rOpts_whitespace_cycle;
+ my $indent = $level_mod * $rOpts_indent_columns;
+ $maximum_text_length_at_level[$level] =
+ $rOpts_maximum_line_length - $indent;
+ }
+ }
+ else {
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
+ }
+ }
+
+ # Correct the tables if the -vmll flag is used. These values override the
+ # previous values.
+ if ($rOpts_variable_maximum_line_length) {
+ foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
+ $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
+ $maximum_line_length_at_level[$level] =
+ $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
+ }
+ }
+
+ # Define two measures of indentation level, alpha and beta, at which some
+ # formatting features come under stress and need to start shutting down.
+ # Some combination of the two will be used to shut down different
+ # formatting features.
+ # Put a reasonable upper limit on stress level (say 100) in case the
+ # whitespace-cycle variable is used.
+ my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
+
+ # Find stress_level_alpha, targeted at very short maximum line lengths.
+ $stress_level_alpha = $stress_level_limit + 1;
+ foreach my $level_test ( 0 .. $stress_level_limit ) {
+ my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
+ my $excess_inside_space =
+ $max_len -
+ $rOpts_continuation_indentation -
+ $rOpts_indent_columns - 8;
+ if ( $excess_inside_space <= 0 ) {
+ $stress_level_alpha = $level_test;
+ last;
+ }
+ }
+
+ # Find stress level beta, a stress level targeted at formatting
+ # at deep levels near the maximum line length. We start increasing
+ # from zero and stop at the first level which shows no more space.
+
+ # 'const' is a fixed number of spaces for a typical variable.
+ # Cases b1197-b1204 work ok with const=12 but not with const=8
+ my $const = 16;
+ my $denom = max( 1, $rOpts_indent_columns );
+ $stress_level_beta = 0;
+ foreach my $level ( 0 .. $stress_level_limit ) {
+ my $remaining_cycles = max(
+ 0,
+ (
+ $maximum_text_length_at_level[$level] -
+ $rOpts_continuation_indentation - $const
+ ) / $denom
+ );
+ last if ( $remaining_cycles <= 3 ); # 2 does not work
+ $stress_level_beta = $level;
+ }
+
+ # This is a combined level which works well for turning off formatting
+ # features in most cases:
+ $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
+
+ return;
+} ## end sub initialize_line_length_vars
+
+sub initialize_trailing_comma_rules {
+
+ # Setup control hash for trailing commas
+
+ # -wtc=s defines desired trailing comma policy:
+ #
+ # =" " stable
+ # [ both -atc and -dtc ignored ]
+ # =0 : none
+ # [requires -dtc; -atc ignored]
+ # =1 or * : all
+ # [requires -atc; -dtc ignored]
+ # =m : multiline lists require trailing comma
+ # if -atc set => will add missing multiline trailing commas
+ # if -dtc set => will delete trailing single line commas
+ # =b or 'bare' (multiline) lists require trailing comma
+ # if -atc set => will add missing bare trailing commas
+ # if -dtc set => will delete non-bare trailing commas
+ # =h or 'hash': single column stable bare lists require trailing comma
+ # if -atc set will add these
+ # if -dtc set will delete other trailing commas
+
+ #-------------------------------------------------------------------
+ # This routine must be called after the alpha and beta stress levels
+ # have been defined in sub 'initialize_line_length_vars'.
+ #-------------------------------------------------------------------
+
+ %trailing_comma_rules = ();
+
+ my $rvalid_flags = [qw(0 1 * m b h i)];
+
+ my $option = $rOpts->{'want-trailing-commas'};
+
+ if ($option) {
+ $option =~ s/^\s+//;
+ $option =~ s/\s+$//;
+ }
+
+ # We need to use length() here because '0' is a possible option
+ if ( defined($option) && length($option) ) {
+ my $error_message;
+ my %rule_hash;
+ my @q = @{$rvalid_flags};
+ my %is_valid_flag;
+ @is_valid_flag{@q} = (1) x scalar(@q);
+
+ # handle single character control, such as -wtc='b'
+ if ( length($option) == 1 ) {
+ foreach (qw< ) ] } >) {
+ $rule_hash{$_} = [ $option, EMPTY_STRING ];
+ }
+ }
+
+ # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
+ else {
+ my @parts = split /\s+/, $option;
+ foreach my $part (@parts) {
+ if ( length($part) >= 2 && length($part) <= 3 ) {
+ my $val = substr( $part, -1, 1 );
+ my $key_o = substr( $part, -2, 1 );
+ if ( $is_opening_token{$key_o} ) {
+ my $paren_flag = EMPTY_STRING;
+ if ( length($part) == 3 ) {
+ $paren_flag = substr( $part, 0, 1 );
+ }
+ my $key = $matching_token{$key_o};
+ $rule_hash{$key} = [ $val, $paren_flag ];
+ }
+ else {
+ $error_message .= "Unrecognized term: '$part'\n";
+ }
+ }
+ else {
+ $error_message .= "Unrecognized term: '$part'\n";
+ }
+ }
+ }
+
+ # check for valid control characters
+ if ( !$error_message ) {
+ foreach my $key ( keys %rule_hash ) {
+ my $item = $rule_hash{$key};
+ my ( $val, $paren_flag ) = @{$item};
+ if ( $val && !$is_valid_flag{$val} ) {
+ my $valid_str = join( SPACE, @{$rvalid_flags} );
+ $error_message .=
+ "Unexpected value '$val'; must be one of: $valid_str\n";
+ last;
+ }
+ if ($paren_flag) {
+ if ( $paren_flag !~ /^[kKfFwW]$/ ) {
+ $error_message .=
+"Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
+ last;
+ }
+ if ( $key ne ')' ) {
+ $error_message .=
+"paren flag '$paren_flag' is only allowed before a '('\n";
+ last;
+ }
+ }
+ }
+ }
+
+ if ($error_message) {
+ Warn(<<EOM);
+Error parsing --want-trailing-commas='$option':
+$error_message
+EOM
+ }
+
+ # Set the control hash if no errors
+ else {
+ %trailing_comma_rules = %rule_hash;
+ }
+ }
+
+ # Both adding and deleting commas can lead to instability in extreme cases
+ if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
+
+ # If the possible instability is significant, then we can turn off
+ # -dtc as a defensive measure to prevent it.
+
+ # We must turn off -dtc for very small values of --whitespace-cycle
+ # to avoid instability. A minimum value of -wc=3 fixes b1393, but a
+ # value of 4 is used here for safety. This parameter is seldom used,
+ # and much larger than this when used, so the cutoff value is not
+ # critical.
+ if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
+ $rOpts_delete_trailing_commas = 0;
+ }
+ }
+
+ return;
+} ## end sub initialize_trailing_comma_rules
+
+sub initialize_whitespace_hashes {
+
+ # This is called once before formatting begins to initialize these global
+ # hashes, which control the use of whitespace around tokens:
+ #
+ # %binary_ws_rules
+ # %want_left_space
+ # %want_right_space
+ # %space_after_keyword
+ #
+ # Many token types are identical to the tokens themselves.
+ # See the tokenizer for a complete list. Here are some special types:
+ # k = perl keyword
+ # f = semicolon in for statement
+ # m = unary minus
+ # p = unary plus
+ # Note that :: is excluded since it should be contained in an identifier
+ # Note that '->' is excluded because it never gets space
+ # parentheses and brackets are excluded since they are handled specially
+ # curly braces are included but may be overridden by logic, such as
+ # newline logic.
+
+ # NEW_TOKENS: create a whitespace rule here. This can be as
+ # simple as adding your new letter to @spaces_both_sides, for
+ # example.
+
+ my @spaces_both_sides = qw#
+ + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
+ .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
+ &&= ||= //= <=> A k f w F n C Y U G v
+ #;
+
+ my @spaces_left_side = qw<
+ t ! ~ m p { \ h pp mm Z j
+ >;
+ push( @spaces_left_side, '#' ); # avoids warning message
+
+ my @spaces_right_side = qw<
+ ; } ) ] R J ++ -- **=
+ >;
+ push( @spaces_right_side, ',' ); # avoids warning message
+
+ %want_left_space = ();
+ %want_right_space = ();
+ %binary_ws_rules = ();
# Note that we setting defaults here. Later in processing
# the values of %want_left_space and %want_right_space
} ## end sub initialize_whitespace_hashes
+{ #<<< begin closure set_whitespace_flags
+
my %is_special_ws_type;
my %is_wCUG;
my %is_wi;
# The following hash is used to skip over needless if tests.
# Be sure to update it when adding new checks in its block.
- my @q = qw(k w i C m - Q);
+ my @q = qw(k w C m - Q);
push @q, '#';
@is_special_ws_type{@q} = (1) x scalar(@q);
@q = qw( w i );
@is_wi{@q} = (1) x scalar(@q);
-}
+} ## end BEGIN
use constant DEBUG_WHITE => 0;
+# Hashes to set spaces around container tokens according to their
+# sequence numbers. These are set as keywords are examined.
+# They are controlled by the -kpit and -kpitl flags.
+my %opening_container_inside_ws;
+my %closing_container_inside_ws;
+
sub set_whitespace_flags {
# This routine is called once per file to set whitespace flags for that
my $self = shift;
- my $rLL = $self->[_rLL_];
+ my $j_tight_closing_paren = -1;
+ my $rLL = $self->[_rLL_];
+ my $jmax = @{$rLL} - 1;
+
+ %opening_container_inside_ws = ();
+ %closing_container_inside_ws = ();
+
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $jmax = @{$rLL} - 1;
my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
- my ( $rtokh, $token, $type );
- my $rtokh_last = $rLL->[0];
- my $rtokh_last_last = $rtokh_last;
-
- my $last_type = EMPTY_STRING;
- my $last_token = EMPTY_STRING;
-
- my $j_tight_closing_paren = -1;
-
- $rtokh = [ @{ $rLL->[0] } ];
- $token = SPACE;
- $type = 'b';
-
- $rtokh->[_TOKEN_] = $token;
- $rtokh->[_TYPE_] = $type;
- $rtokh->[_TYPE_SEQUENCE_] = EMPTY_STRING;
- $rtokh->[_LINE_INDEX_] = 0;
-
- # This is some logic moved to a sub to avoid deep nesting of if stmts
- my $ws_in_container = sub {
-
- my ($j) = @_;
- my $ws = WS_YES;
- if ( $j + 1 > $jmax ) { return (WS_NO) }
-
- # Patch to count '-foo' as single token so that
- # each of $a{-foo} and $a{foo} and $a{'foo'} do
- # not get spaces with default formatting.
- my $j_here = $j;
- ++$j_here
- if ( $token eq '-'
- && $last_token eq '{'
- && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
-
- # Patch to count a sign separated from a number as a single token, as
- # in the following line. Otherwise, it takes two steps to converge:
- # deg2rad(- 0.5)
- if ( ( $type eq 'm' || $type eq 'p' )
- && $j < $jmax + 1
- && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
- && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
- && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
- {
- $j_here = $j + 2;
- }
-
- # $j_next is where a closing token should be if
- # the container has a single token
- if ( $j_here + 1 > $jmax ) { return (WS_NO) }
- my $j_next =
- ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
- ? $j_here + 2
- : $j_here + 1;
-
- if ( $j_next > $jmax ) { return WS_NO }
- my $tok_next = $rLL->[$j_next]->[_TOKEN_];
- my $type_next = $rLL->[$j_next]->[_TYPE_];
-
- # for tightness = 1, if there is just one token
- # within the matching pair, we will keep it tight
- if (
- $tok_next eq $matching_token{$last_token}
-
- # but watch out for this: [ [ ] (misc.t)
- && $last_token ne $token
-
- # double diamond is usually spaced
- && $token ne '<<>>'
-
- )
- {
-
- # remember where to put the space for the closing paren
- $j_tight_closing_paren = $j_next;
- return (WS_NO);
- }
- return (WS_YES);
- };
-
- # Local hashes to set spaces around container tokens according to their
- # sequence numbers. These are set as keywords are examined.
- # They are controlled by the -kpit and -kpitl flags.
- my %opening_container_inside_ws;
- my %closing_container_inside_ws;
- my $set_container_ws_by_keyword = sub {
+ my $last_token = SPACE;
+ my $last_type = 'b';
- return unless (%keyword_paren_inner_tightness);
+ my $rtokh_last = [ @{ $rLL->[0] } ];
+ $rtokh_last->[_TOKEN_] = $last_token;
+ $rtokh_last->[_TYPE_] = $last_type;
+ $rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING;
+ $rtokh_last->[_LINE_INDEX_] = 0;
- my ( $word, $sequence_number ) = @_;
-
- # We just saw a keyword (or other function name) followed by an opening
- # paren. Now check to see if the following paren should have special
- # treatment for its inside space. If so we set a hash value using the
- # sequence number as key.
- if ( $word && $sequence_number ) {
- my $tightness = $keyword_paren_inner_tightness{$word};
- if ( defined($tightness) && $tightness != 1 ) {
- my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
- $opening_container_inside_ws{$sequence_number} = $ws_flag;
- $closing_container_inside_ws{$sequence_number} = $ws_flag;
- }
- }
- return;
- };
+ my $rtokh_last_last = $rtokh_last;
my ( $ws_1, $ws_2, $ws_3, $ws_4 );
# main loop over all tokens to define the whitespace flags
- foreach my $j ( 0 .. $jmax ) {
+ my $last_type_is_opening;
+ my ( $token, $type );
+ my $j = -1;
+ foreach my $rtokh ( @{$rLL} ) {
- if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
+ $j++;
+
+ $type = $rtokh->[_TYPE_];
+ if ( $type eq 'b' ) {
$rwhitespace_flags->[$j] = WS_OPTIONAL;
next;
}
- $rtokh_last_last = $rtokh_last;
-
- $rtokh_last = $rtokh;
- $last_token = $token;
- $last_type = $type;
-
- $rtokh = $rLL->[$j];
$token = $rtokh->[_TOKEN_];
- $type = $rtokh->[_TYPE_];
my $ws;
#---------------------------------------------------------------
# /^[L\{\(\[]$/
- if ( $is_opening_type{$last_type} ) {
+ if ($last_type_is_opening) {
+
+ $last_type_is_opening = 0;
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
my $block_type = $rblock_type_of_seqno->{$seqno};
$ws = WS_NO;
}
else {
- $ws = $ws_in_container->($j);
+
+ # find the index of the closing token
+ my $j_closing =
+ $self->[_K_closing_container_]->{$last_seqno};
+
+ # If the closing token is less than five characters ahead
+ # we must take a closer look
+ if ( defined($j_closing)
+ && $j_closing - $j < 5
+ && $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq
+ $last_seqno )
+ {
+ $ws =
+ ws_in_container( $j, $j_closing, $rLL, $type, $token,
+ $last_token );
+ if ( $ws == WS_NO ) {
+ $j_tight_closing_paren = $j_closing;
+ }
+ }
+ else {
+ $ws = WS_YES;
+ }
}
}
#---------------------------------------------------------------
# The hash '%is_special_ws_type' significantly speeds up this routine,
# but be sure to update it if a new check is added.
- # Currently has types: qw(k w i C m - Q #)
+ # Currently has types: qw(k w C m - Q #)
if ( $is_special_ws_type{$type} ) {
- if ( $type eq 'i' ) {
-
- # never a space before ->
- if ( substr( $token, 0, 2 ) eq '->' ) {
- $ws = WS_NO;
- }
- }
- elsif ( $type eq 'k' ) {
+ if ( $type eq 'k' ) {
# Keywords 'for', 'foreach' are special cases for -kpit since
# the opening paren does not always immediately follow the
last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
- $set_container_ws_by_keyword->( $token, $seqno_p );
+ set_container_ws_by_keyword( $token, $seqno_p );
last;
}
}
}
- # retain any space between '-' and bare word
- elsif ( $type eq 'w' || $type eq 'C' ) {
- $ws = WS_OPTIONAL if $last_type eq '-';
+ # handle a comment
+ elsif ( $type eq '#' ) {
- # never a space before ->
- if ( substr( $token, 0, 2 ) eq '->' ) {
- $ws = WS_NO;
- }
+ # newline before block comment ($j==0), and
+ # space before side comment ($j>0), so ..
+ $ws = WS_YES;
+
+ #---------------------------------
+ # Nothing more to do for a comment
+ #---------------------------------
+ $rwhitespace_flags->[$j] = $ws;
+ next;
+ }
+
+ # retain any space between '-' and bare word
+ elsif ( $type eq 'w' || $type eq 'C' ) {
+ $ws = WS_OPTIONAL if $last_type eq '-';
}
# retain any space between '-' and bare word; for example
$ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
- # always space before side comment
- elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
-
# space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
# allow a space between a backslash and single or double quote
# to avoid fooling html formatters
# /^[L\{\(\[]$/
elsif ( $is_opening_type{$type} ) {
+ $last_type_is_opening = 1;
+
if ( $token eq '(' ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
|| $space_after_keyword{$last_token} );
# Set inside space flag if requested
- $set_container_ws_by_keyword->( $last_token, $seqno );
+ set_container_ws_by_keyword( $last_token, $seqno );
}
# Space between function and '('
# NOTE: this would be the place to allow spaces between
# repeated parens, like () () (), as in case c017, but I
# decided that would not be a good idea.
+
+ # Updated to allow detached '->' from tokenizer (issue c140)
elsif (
- ##$last_type =~ /^[wCUG]$/
+
+ # /^[wCUG]$/
$is_wCUG{$last_type}
+
|| (
- ##$last_type =~ /^[wi]$/
+
+ # /^[wi]$/
$is_wi{$last_type}
&& (
+
+ # with prefix '->' or '&'
$last_token =~ /^([\&]|->)/
- # or -> or & split from bareword by newline (b1337)
- || (
- $last_token =~ /^\w/
- && (
- $rtokh_last_last->[_TYPE_] eq '->'
- || ( $rtokh_last_last->[_TYPE_] eq 't'
- && $rtokh_last_last->[_TOKEN_] =~
- /^\&\s*$/ )
- )
- )
+ # or preceding token '->' (see b1337; c140)
+ || $rtokh_last_last->[_TYPE_] eq '->'
+
+ # or preceding sub call operator token '&'
+ || ( $rtokh_last_last->[_TYPE_] eq 't'
+ && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
)
)
)
{
- $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
- $set_container_ws_by_keyword->( $last_token, $seqno );
+ $ws =
+ $rOpts_space_function_paren
+ ? $self->ws_space_function_paren( $j, $rtokh_last_last )
+ : WS_NO;
+
+ set_container_ws_by_keyword( $last_token, $seqno );
$ris_function_call_paren->{$seqno} = 1;
}
# space between something like $i and ( in 'snippets/space2.in'
# for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' could be split into multiple
- # token types so this can be a hardwired rule.
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
$ws = WS_YES;
}
$ws = WS_OPTIONAL;
}
- # keep space between 'sub' and '{' for anonymous sub definition
+ # keep space between 'sub' and '{' for anonymous sub definition,
+ # be sure type = 'k' (added for c140)
if ( $type eq '{' ) {
- if ( $last_token eq 'sub' ) {
+ if ( $last_token eq 'sub' && $last_type eq 'k' ) {
$ws = WS_YES;
}
}
} ## end if ( $is_opening_type{$type} ) {
- # always preserver whatever space was used after a possible
+ # always preserve whatever space was used after a possible
# filehandle (except _) or here doc operator
if (
- $type ne '#'
- && ( ( $last_type eq 'Z' && $last_token ne '_' )
- || $last_type eq 'h' )
+ (
+ ( $last_type eq 'Z' && $last_token ne '_' )
+ || $last_type eq 'h'
+ )
+ && $type ne '#' # no longer required due to early exit for '#' above
)
{
$ws = WS_OPTIONAL;
# Whitespace Rules Section 4:
# Use the binary rule table.
#---------------------------------------------------------------
- $ws = $binary_ws_rules{$last_type}{$type};
- $ws_4 = $ws if DEBUG_WHITE;
+ if ( defined( $binary_ws_rules{$last_type}{$type} ) ) {
+ $ws = $binary_ws_rules{$last_type}{$type};
+ $ws_4 = $ws if DEBUG_WHITE;
+ }
#---------------------------------------------------------------
# Whitespace Rules Section 5:
#
# -1 vs 1 --> -1
# 1 vs -1 --> -1
- if ( !defined($ws) ) {
+ else {
my $wl = $want_left_space{$type};
my $wr = $want_right_space{$last_type};
if ( !defined($wl) ) {
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
- if ( $ws == 0
+ if ( !$ws
&& $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
{
- $ws = 1;
+ $ws = WS_YES;
}
$rwhitespace_flags->[$j] = $ws;
- if (DEBUG_WHITE) {
- my $str = substr( $last_token, 0, 15 );
- $str .= SPACE x ( 16 - length($str) );
- if ( !defined($ws_1) ) { $ws_1 = "*" }
- if ( !defined($ws_2) ) { $ws_2 = "*" }
- if ( !defined($ws_3) ) { $ws_3 = "*" }
- if ( !defined($ws_4) ) { $ws_4 = "*" }
- print STDOUT
+ # remember non-blank, non-comment tokens
+ $last_token = $token;
+ $last_type = $type;
+ $rtokh_last_last = $rtokh_last;
+ $rtokh_last = $rtokh;
+
+ next if ( !DEBUG_WHITE );
+
+ my $str = substr( $last_token, 0, 15 );
+ $str .= SPACE x ( 16 - length($str) );
+ if ( !defined($ws_1) ) { $ws_1 = "*" }
+ if ( !defined($ws_2) ) { $ws_2 = "*" }
+ if ( !defined($ws_3) ) { $ws_3 = "*" }
+ if ( !defined($ws_4) ) { $ws_4 = "*" }
+ print STDOUT
"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
- # reset for next pass
- $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
- }
+ # reset for next pass
+ $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
+
} ## end main loop
if ( $rOpts->{'tight-secret-operators'} ) {
} ## end sub set_whitespace_flags
+sub set_container_ws_by_keyword {
+
+ my ( $word, $sequence_number ) = @_;
+ return unless (%keyword_paren_inner_tightness);
+
+ # We just saw a keyword (or other function name) followed by an opening
+ # paren. Now check to see if the following paren should have special
+ # treatment for its inside space. If so we set a hash value using the
+ # sequence number as key.
+ if ( $word && $sequence_number ) {
+ my $tightness = $keyword_paren_inner_tightness{$word};
+ if ( defined($tightness) && $tightness != 1 ) {
+ my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
+ $opening_container_inside_ws{$sequence_number} = $ws_flag;
+ $closing_container_inside_ws{$sequence_number} = $ws_flag;
+ }
+ }
+ return;
+} ## end sub set_container_ws_by_keyword
+
+sub ws_in_container {
+
+ my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_;
+
+ # Given:
+ # $j = index of token following an opening container token
+ # $type, $token = the type and token at index $j
+ # $j_closing = closing token of the container
+ # $last_token = the opening token of the container
+ # Return:
+ # WS_NO if there is just one token in the container (with exceptions)
+ # WS_YES otherwise
+
+ #------------------------------------
+ # Look forward for the closing token;
+ #------------------------------------
+ if ( $j + 1 > $j_closing ) { return WS_NO }
+
+ # Patch to count '-foo' as single token so that
+ # each of $a{-foo} and $a{foo} and $a{'foo'} do
+ # not get spaces with default formatting.
+ my $j_here = $j;
+ ++$j_here
+ if ( $token eq '-'
+ && $last_token eq '{'
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
+
+ # Patch to count a sign separated from a number as a single token, as
+ # in the following line. Otherwise, it takes two steps to converge:
+ # deg2rad(- 0.5)
+ if ( ( $type eq 'm' || $type eq 'p' )
+ && $j < $j_closing + 1
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
+ && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
+ && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
+ {
+ $j_here = $j + 2;
+ }
+
+ # $j_next is where a closing token should be if the container has
+ # just a "single" token
+ if ( $j_here + 1 > $j_closing ) { return WS_NO }
+ my $j_next =
+ ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
+ ? $j_here + 2
+ : $j_here + 1;
+
+ #-----------------------------------------------------------------
+ # Now decide: if we get to the closing token we will keep it tight
+ #-----------------------------------------------------------------
+ if (
+ $j_next == $j_closing
+
+ # OLD PROBLEM: but watch out for this: [ [ ] (misc.t)
+ # No longer necessary because of the previous check on sequence numbers
+ ##&& $last_token ne $token
+
+ # double diamond is usually spaced
+ && $token ne '<<>>'
+
+ )
+ {
+ return WS_NO;
+ }
+
+ return WS_YES;
+
+} ## end sub ws_in_container
+
+sub ws_space_function_paren {
+
+ my ( $self, $j, $rtokh_last_last ) = @_;
+
+ # Called if --space-function-paren is set to see if it might cause
+ # a problem. The manual warns the user about potential problems with
+ # this flag. Here we just try to catch one common problem.
+
+ # Given:
+ # $j = index of '(' after function name
+ # Return:
+ # WS_NO if no space
+ # WS_YES otherwise
+
+ # This was added to fix for issue c166. Ignore -sfp at a possible indirect
+ # object location. For example, do not convert this:
+ # print header() ...
+ # to this:
+ # print header () ...
+ # because in this latter form, header may be taken to be a file handle
+ # instead of a function call.
+
+ # Start with the normal value for -sfp:
+ my $ws = WS_YES;
+
+ # now check to be sure we don't cause a problem:
+ my $type_ll = $rtokh_last_last->[_TYPE_];
+ my $tok_ll = $rtokh_last_last->[_TOKEN_];
+
+ # NOTE: this is just a minimal check. For example, we might also check
+ # for something like this:
+ # print ( header ( ..
+ if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) {
+ $ws = WS_NO;
+ }
+
+ return $ws;
+
+} ## end sub ws_space_function_paren
+
+} ## end closure set_whitespace_flags
+
sub dump_want_left_space {
my $fh = shift;
local $LIST_SEPARATOR = "\n";
qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
@{is_special_variable_char}{@q} = (1) x scalar(@q);
- }
+ } ## end BEGIN
sub is_essential_whitespace {
my $tok = $value->[0];
push @{ $is_leading_secret_token{$tok} }, $value;
}
- }
+ } ## end BEGIN
sub new_secret_operator_whitespace {
} ## End Loop over all operators
} ## End loop over all tokens
return;
- } # End sub
+ } ## end sub new_secret_operator_whitespace
} ## end closure new_secret_operator_whitespace
{ ## begin closure set_bond_strengths
# $a->$b($c);
$binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
+ # Added for c140 to make 'w ->' and 'i ->' behave the same
+ $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
+
# Note that the following alternative strength would make the break at the
# '->' rather than opening the '('. Both have advantages and disadvantages.
# $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
my ($self) = @_;
+ #-----------------------------------------------------------------
+ # Define a 'bond strength' for each token pair in an output batch.
+ # See comments above for definition of bond strength.
+ #-----------------------------------------------------------------
+
my $rbond_strength_to_go = [];
my $rLL = $self->[_rLL_];
elsif ( $type eq 'w' ) {
$bond_str = NO_BREAK
if ( !$old_breakpoint_to_go[$i]
- && substr( $next_nonblank_token, 0, 1 ) eq '/' );
+ && substr( $next_nonblank_token, 0, 1 ) eq '/'
+ && $next_nonblank_type ne '//' );
}
$bond_str_2 = $bond_str if (DEBUG_BOND);
# be absolutely sure that we do not allow a break. So for
# these the nobreak flag exceeds 1 as a signal. Otherwise we
# can run into trouble when small tolerances are added.
- $strength += 1 if ( $nobreak_to_go[$i] > 1 );
+ $strength += 1
+ if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 );
}
#---------------------------------------------------------------
# but it should be safe because the pattern has been constructed
# by this program.
my ($pattern) = @_;
- eval "'##'=~/$pattern/";
- return $EVAL_ERROR;
-}
+ my $ok = eval "'##'=~/$pattern/";
+ return !defined($ok) || $EVAL_ERROR;
+} ## end sub bad_pattern
{ ## begin closure prepare_cuddled_block_types
return;
} ## end sub check_sequence_numbers
+ sub store_block_type {
+ my ( $self, $block_type, $seqno ) = @_;
+
+ return if ( !$block_type );
+
+ $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
+
+ if ( $block_type =~ /$ASUB_PATTERN/ ) {
+ $self->[_ris_asub_block_]->{$seqno} = 1;
+ }
+ elsif ( $block_type =~ /$SUB_PATTERN/ ) {
+ $self->[_ris_sub_block_]->{$seqno} = 1;
+ }
+ return;
+ } ## end sub store_block_type
+
sub write_line {
# This routine receives lines one-by-one from the tokenizer and stores
# to do the actual formatting.
my ( $self, $line_of_tokens_old ) = @_;
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines_new = $self->[_rlines_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $rSS = $self->[_rSS_];
- my $Iss_opening = $self->[_Iss_opening_];
- my $Iss_closing = $self->[_Iss_closing_];
-
- my $Kfirst;
+ my $rLL = $self->[_rLL_];
my $line_of_tokens = {};
foreach (
qw(
$line_of_tokens->{$_} = $line_of_tokens_old->{$_};
}
- # Data needed by Logger
- $line_of_tokens->{_level_0} = 0;
- $line_of_tokens->{_ci_level_0} = 0;
- $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
- $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
-
- # Needed to avoid trimming quotes
- $line_of_tokens->{_ended_in_blank_token} = undef;
-
- my $line_type = $line_of_tokens_old->{_line_type};
- my $line_number = $line_of_tokens_old->{_line_number};
- my $CODE_type = EMPTY_STRING;
+ my $line_type = $line_of_tokens_old->{_line_type};
my $tee_output;
+ my $Klimit = $self->[_Klimit_];
+ my $Kfirst;
+
# Handle line of non-code
if ( $line_type ne 'CODE' ) {
$tee_output ||= $rOpts_tee_pod
&& substr( $line_type, 0, 3 ) eq 'POD';
+
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
+ $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
+ $line_of_tokens->{_ended_in_blank_token} = undef;
+
}
# Handle line of code
else {
- my $rtokens = $line_of_tokens_old->{_rtokens};
- my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
- my $rblock_type = $line_of_tokens_old->{_rblock_type};
- my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
- my $rlevels = $line_of_tokens_old->{_rlevels};
- my $rci_levels = $line_of_tokens_old->{_rci_levels};
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $jmax = @{$rtokens} - 1;
- my $jmax = @{$rtokens} - 1;
if ( $jmax >= 0 ) {
- $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- DEVEL_MODE
- && check_sequence_numbers( $rtokens, $rtoken_type,
- $rtype_sequence, $line_number );
-
- # Find the starting nesting depth ...
- # It must be the value of variable 'level' of the first token
- # because the nesting depth is used as a token tag in the
- # vertical aligner and is compared to actual levels.
- # So vertical alignment problems will occur with any other
- # starting value.
- if ( !defined($nesting_depth) ) {
- $nesting_depth = $rlevels->[0];
- $nesting_depth = 0 if ( $nesting_depth < 0 );
- $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
- }
-
- foreach my $j ( 0 .. $jmax ) {
-
- # Do not clip the 'level' variable yet. We will do this
- # later, in sub 'store_token_to_go'. The reason is that in
- # files with level errors, the logic in 'weld_cuddled_else'
- # uses a stack logic that will give bad welds if we clip
- # levels here.
- ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
-
- # Handle tokens with sequence numbers ...
- my $seqno = $rtype_sequence->[$j];
- if ($seqno) {
- my $token = $rtokens->[$j];
- my $sign = 1;
- if ( $is_opening_token{$token} ) {
- $K_opening_container->{$seqno} = @{$rLL};
- $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
- $nesting_depth++;
-
- # Save a sequenced block type at its opening token.
- # Note that unsequenced block types can occur in
- # unbalanced code with errors but are ignored here.
- if ( $rblock_type->[$j] ) {
- my $block_type = $rblock_type->[$j];
- $rblock_type_of_seqno->{$seqno} = $block_type;
- if ( substr( $block_type, 0, 3 ) eq 'sub'
- || $rOpts_sub_alias_list )
- {
- if ( $block_type =~ /$ASUB_PATTERN/ ) {
- $self->[_ris_asub_block_]->{$seqno} = 1;
- }
- elsif ( $block_type =~ /$SUB_PATTERN/ ) {
- $self->[_ris_sub_block_]->{$seqno} = 1;
- }
- }
- }
- }
- elsif ( $is_closing_token{$token} ) {
-
- # The opening depth should always be defined, and
- # it should equal $nesting_depth-1. To protect
- # against unforseen error conditions, however, we
- # will check this and fix things if necessary. For
- # a test case see issue c055.
- my $opening_depth =
- $rdepth_of_opening_seqno->[$seqno];
- if ( !defined($opening_depth) ) {
- $opening_depth = $nesting_depth - 1;
- $opening_depth = 0 if ( $opening_depth < 0 );
- $rdepth_of_opening_seqno->[$seqno] =
- $opening_depth;
-
- # This is not fatal but should not happen. The
- # tokenizer generates sequence numbers
- # incrementally upon encountering each new
- # opening token, so every positive sequence
- # number should correspond to an opening token.
- if (DEVEL_MODE) {
- Fault(<<EOM);
-No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
-EOM
- }
- }
- $K_closing_container->{$seqno} = @{$rLL};
- $nesting_depth = $opening_depth;
- $sign = -1;
- }
- elsif ( $token eq '?' ) {
- }
- elsif ( $token eq ':' ) {
- $sign = -1;
- }
-
- # The only sequenced types output by the tokenizer are
- # the opening & closing containers and the ternary
- # types. So we would only get here if the tokenizer has
- # been changed to mark some other tokens with sequence
- # numbers, or if an error has been introduced in a
- # hash such as %is_opening_container
- else {
- if (DEVEL_MODE) {
- Fault(<<EOM);
-Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
-Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
-EOM
- }
- }
-
- if ( $sign > 0 ) {
- $Iss_opening->[$seqno] = @{$rSS};
-
- # For efficiency, we find the maximum level of
- # opening tokens of any type. The actual maximum
- # level will be that of their contents which is 1
- # greater. That will be fixed in sub
- # 'finish_formatting'.
- my $level = $rlevels->[$j];
- if ( $level > $self->[_maximum_level_] ) {
- $self->[_maximum_level_] = $level;
- $self->[_maximum_level_at_line_] = $line_number;
- }
- }
- else { $Iss_closing->[$seqno] = @{$rSS} }
- push @{$rSS}, $sign * $seqno;
-
- }
- else {
- $seqno = EMPTY_STRING unless ( defined($seqno) );
- }
+ $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- my @tokary;
- @tokary[
- _TOKEN_, _TYPE_, _TYPE_SEQUENCE_,
- _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
- ]
- = (
- $rtokens->[$j], $rtoken_type->[$j],
- $seqno, $rlevels->[$j],
- $rci_levels->[$j], $line_number - 1,
- );
- push @{$rLL}, \@tokary;
- } ## end foreach my $j ( 0 .. $jmax )
+ #----------------------------
+ # get the tokens on this line
+ #----------------------------
+ $self->write_line_inner_loop( $line_of_tokens_old,
+ $line_of_tokens );
+ # update Klimit for added tokens
$Klimit = @{$rLL} - 1;
- # Need to remember if we can trim the input line
- $line_of_tokens->{_ended_in_blank_token} =
- $rtoken_type->[$jmax] eq 'b';
+ } ## end if ( $jmax >= 0 )
+ else {
- $line_of_tokens->{_level_0} = $rlevels->[0];
- $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
- $line_of_tokens->{_nesting_blocks_0} =
- $line_of_tokens_old->{_nesting_blocks_0};
- $line_of_tokens->{_nesting_tokens_0} =
- $line_of_tokens_old->{_nesting_tokens_0};
+ # blank line
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
+ $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
+ $line_of_tokens->{_ended_in_blank_token} = undef;
- } ## end if ( $jmax >= 0 )
+ }
$tee_output ||=
$rOpts_tee_block_comments
} ## end if ( $line_type eq 'CODE')
# Finish storing line variables
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
+ $self->[_Klimit_] = $Klimit;
+ my $rlines = $self->[_rlines_];
+ push @{$rlines}, $line_of_tokens;
+
if ($tee_output) {
my $fh_tee = $self->[_fh_tee_];
my $line_text = $line_of_tokens_old->{_line_text};
$fh_tee->print($line_text) if ($fh_tee);
}
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
- $line_of_tokens->{_code_type} = $CODE_type;
- $self->[_Klimit_] = $Klimit;
-
- push @{$rlines_new}, $line_of_tokens;
return;
} ## end sub write_line
+
+ sub write_line_inner_loop {
+ my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
+
+ #---------------------------------------------------------------------
+ # Copy the tokens on one line received from the tokenizer to their new
+ # storage locations.
+ #---------------------------------------------------------------------
+
+ # Input parameters:
+ # $line_of_tokens_old = line received from tokenizer
+ # $line_of_tokens = line of tokens being formed for formatter
+
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $jmax = @{$rtokens} - 1;
+ if ( $jmax < 0 ) {
+
+ # safety check; shouldn't happen
+ DEVEL_MODE && Fault("unexpected jmax=$jmax\n");
+ return;
+ }
+
+ my $line_index = $line_of_tokens_old->{_line_number} - 1;
+ my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
+ my $rblock_type = $line_of_tokens_old->{_rblock_type};
+ my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
+ my $rlevels = $line_of_tokens_old->{_rlevels};
+ my $rci_levels = $line_of_tokens_old->{_rci_levels};
+
+ my $rLL = $self->[_rLL_];
+ my $rSS = $self->[_rSS_];
+ my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
+
+ DEVEL_MODE
+ && check_sequence_numbers( $rtokens, $rtoken_type,
+ $rtype_sequence, $line_index + 1 );
+
+ # Find the starting nesting depth ...
+ # It must be the value of variable 'level' of the first token
+ # because the nesting depth is used as a token tag in the
+ # vertical aligner and is compared to actual levels.
+ # So vertical alignment problems will occur with any other
+ # starting value.
+ if ( !defined($nesting_depth) ) {
+ $nesting_depth = $rlevels->[0];
+ $nesting_depth = 0 if ( $nesting_depth < 0 );
+ $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
+ }
+
+ my $j = -1;
+
+ # NOTE: coding efficiency is critical in this loop over all tokens
+ foreach my $token ( @{$rtokens} ) {
+
+ # Do not clip the 'level' variable yet. We will do this
+ # later, in sub 'store_token_to_go'. The reason is that in
+ # files with level errors, the logic in 'weld_cuddled_else'
+ # uses a stack logic that will give bad welds if we clip
+ # levels here.
+ ## $j++;
+ ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
+
+ my $seqno = EMPTY_STRING;
+
+ # Handle tokens with sequence numbers ...
+ # note the ++ increment hidden here for efficiency
+ if ( $rtype_sequence->[ ++$j ] ) {
+ $seqno = $rtype_sequence->[$j];
+ my $sign = 1;
+ if ( $is_opening_token{$token} ) {
+ $self->[_K_opening_container_]->{$seqno} = @{$rLL};
+ $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
+ $nesting_depth++;
+
+ # Save a sequenced block type at its opening token.
+ # Note that unsequenced block types can occur in
+ # unbalanced code with errors but are ignored here.
+ $self->store_block_type( $rblock_type->[$j], $seqno )
+ if ( $rblock_type->[$j] );
+ }
+ elsif ( $is_closing_token{$token} ) {
+
+ # The opening depth should always be defined, and
+ # it should equal $nesting_depth-1. To protect
+ # against unforseen error conditions, however, we
+ # will check this and fix things if necessary. For
+ # a test case see issue c055.
+ my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
+ if ( !defined($opening_depth) ) {
+ $opening_depth = $nesting_depth - 1;
+ $opening_depth = 0 if ( $opening_depth < 0 );
+ $rdepth_of_opening_seqno->[$seqno] = $opening_depth;
+
+ # This is not fatal but should not happen. The
+ # tokenizer generates sequence numbers
+ # incrementally upon encountering each new
+ # opening token, so every positive sequence
+ # number should correspond to an opening token.
+ DEVEL_MODE && Fault(<<EOM);
+No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
+EOM
+ }
+ $self->[_K_closing_container_]->{$seqno} = @{$rLL};
+ $nesting_depth = $opening_depth;
+ $sign = -1;
+ }
+ elsif ( $token eq '?' ) {
+ }
+ elsif ( $token eq ':' ) {
+ $sign = -1;
+ }
+
+ # The only sequenced types output by the tokenizer are
+ # the opening & closing containers and the ternary
+ # types. So we would only get here if the tokenizer has
+ # been changed to mark some other tokens with sequence
+ # numbers, or if an error has been introduced in a
+ # hash such as %is_opening_container
+ else {
+ DEVEL_MODE && Fault(<<EOM);
+Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
+Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
+EOM
+ }
+
+ if ( $sign > 0 ) {
+ $self->[_Iss_opening_]->[$seqno] = @{$rSS};
+
+ # For efficiency, we find the maximum level of
+ # opening tokens of any type. The actual maximum
+ # level will be that of their contents which is 1
+ # greater. That will be fixed in sub
+ # 'finish_formatting'.
+ my $level = $rlevels->[$j];
+ if ( $level > $self->[_maximum_level_] ) {
+ $self->[_maximum_level_] = $level;
+ $self->[_maximum_level_at_line_] = $line_index + 1;
+ }
+ }
+ else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} }
+ push @{$rSS}, $sign * $seqno;
+
+ }
+
+ my @tokary;
+ @tokary[
+
+ _TOKEN_,
+ _TYPE_,
+ _TYPE_SEQUENCE_,
+ _LEVEL_,
+ _CI_LEVEL_,
+ _LINE_INDEX_,
+
+ ] = (
+
+ $token,
+ $rtoken_type->[$j],
+ $seqno,
+ $rlevels->[$j],
+ $rci_levels->[$j],
+ $line_index,
+
+ );
+ push @{$rLL}, \@tokary;
+ } ## end token loop
+
+ # Need to remember if we can trim the input line
+ $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
+
+ # Values needed by Logger
+ $line_of_tokens->{_level_0} = $rlevels->[0];
+ $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
+ $line_of_tokens->{_nesting_blocks_0} =
+ $line_of_tokens_old->{_nesting_blocks_0};
+ $line_of_tokens->{_nesting_tokens_0} =
+ $line_of_tokens_old->{_nesting_tokens_0};
+
+ return;
+
+ } ## end sub write_line_inner_loop
+
} ## end closure write_line
#############################################
# The file has been tokenized and is ready to be formatted.
# All of the relevant data is stored in $self, ready to go.
+ # Returns:
+ # true if input file was copied verbatim due to errors
+ # false otherwise
+
+ # Some of the code in sub break_lists is not robust enough to process code
+ # with arbitrary brace errors. The simplest fix is to just return the file
+ # verbatim if there are brace errors. This fixes issue c160.
+ $severe_error ||= get_saw_brace_error();
+
# Check the maximum level. If it is extremely large we will give up and
# output the file verbatim. Note that the actual maximum level is 1
# greater than the saved value, so we fix that here.
EOM
}
+ # Dump any requested block summary data
+ if ( $rOpts->{'dump-block-summary'} ) {
+ if ($severe_error) { Exit(1) }
+ $self->dump_block_summary();
+ Exit(0);
+ }
+
# output file verbatim if severe error or no formatting requested
if ( $severe_error || $rOpts->{notidy} ) {
$self->dump_verbatim();
- $self->wrapup();
- return;
+ $self->wrapup($severe_error);
+ return 1;
}
# Update the 'save_logfile' flag based to include any tokenization errors.
# We can save time by skipping logfile calls if it is not going to be saved.
my $logger_object = $self->[_logger_object_];
if ($logger_object) {
- $self->[_save_logfile_] = $logger_object->get_save_logfile();
+ my $save_logfile = $logger_object->get_save_logfile();
+ $self->[_save_logfile_] = $save_logfile;
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->set_save_logfile($save_logfile);
}
- my $rix_side_comments = $self->set_CODE_type();
+ {
+ my $rix_side_comments = $self->set_CODE_type();
- $self->find_non_indenting_braces($rix_side_comments);
+ $self->find_non_indenting_braces($rix_side_comments);
- # Handle any requested side comment deletions. It is easier to get
- # this done here rather than farther down the pipeline because IO
- # lines take a different route, and because lines with deleted HSC
- # become BL lines. We have already handled any tee requests in sub
- # getline, so it is safe to delete side comments now.
- $self->delete_side_comments($rix_side_comments)
- if ( $rOpts_delete_side_comments
- || $rOpts_delete_closing_side_comments );
+ # Handle any requested side comment deletions. It is easier to get
+ # this done here rather than farther down the pipeline because IO
+ # lines take a different route, and because lines with deleted HSC
+ # become BL lines. We have already handled any tee requests in sub
+ # getline, so it is safe to delete side comments now.
+ $self->delete_side_comments($rix_side_comments)
+ if ( $rOpts_delete_side_comments
+ || $rOpts_delete_closing_side_comments );
+ }
# Verify that the line hash does not have any unknown keys.
$self->check_line_hashes() if (DEVEL_MODE);
- # Make a pass through all tokens, adding or deleting any whitespace as
- # required. Also make any other changes, such as adding semicolons.
- # All token changes must be made here so that the token data structure
- # remains fixed for the rest of this iteration.
- $self->respace_tokens();
+ {
+ # Make a pass through all tokens, adding or deleting any whitespace as
+ # required. Also make any other changes, such as adding semicolons.
+ # All token changes must be made here so that the token data structure
+ # remains fixed for the rest of this iteration.
+ my ( $error, $rqw_lines ) = $self->respace_tokens();
+ if ($error) {
+ $self->dump_verbatim();
+ $self->wrapup();
+ return 1;
+ }
+
+ $self->find_multiline_qw($rqw_lines);
+ }
+
+ $self->examine_vertical_tightness_flags();
$self->set_excluded_lp_containers();
- $self->find_multiline_qw();
-
$self->keep_old_line_breaks();
# Implement any welding needed for the -wn or -cb options
$self->weld_containers();
- $self->collapsed_lengths()
+ # Collect info needed to implement the -xlp style
+ $self->xlp_collapsed_lengths()
if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
# Locate small nested blocks which should not be broken
$self->mark_short_nested_blocks();
- $self->adjust_indentation_levels();
+ $self->special_indentation_adjustments();
# Verify that the main token array looks OK. If this ever causes a fault
# then place similar checks before the sub calls above to localize the
return;
} ## end sub finish_formatting
-sub set_CODE_type {
- my ($self) = @_;
-
- # Examine each line of code and set a flag '$CODE_type' to describe it.
- # Also return a list of lines with side comments.
+my %is_loop_type;
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines = $self->[_rlines_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+BEGIN {
+ my @q = qw( for foreach while do until );
+ @{is_loop_type}{@q} = (1) x scalar(@q);
+}
- my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
- my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
- my $rOpts_static_block_comment_prefix =
- $rOpts->{'static-block-comment-prefix'};
+sub find_level_info {
- # Remember indexes of lines with side comments
- my @ix_side_comments;
+ # Find level ranges and total variations of all code blocks in this file.
- my $In_format_skipping_section = 0;
- my $Saw_VERSION_in_this_file = 0;
- my $has_side_comment = 0;
- my ( $Kfirst, $Klast );
- my $CODE_type;
+ # Returns:
+ # ref to hash with block info, with seqno as key (see below)
- # Loop to set CODE_type
+ my ($self) = @_;
- # Possible CODE_types
- # 'VB' = Verbatim - line goes out verbatim (a quote)
- # 'FS' = Format Skipping - line goes out verbatim
- # 'BL' = Blank Line
- # 'HSC' = Hanging Side Comment - fix this hanging side comment
- # 'SBCX'= Static Block Comment Without Leading Space
- # 'SBC' = Static Block Comment
- # 'BC' = Block Comment - an ordinary full line comment
- # 'IO' = Indent Only - line goes out unchanged except for indentation
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'VER' = VERSION statement
- # '' = ordinary line of code with no restrictions
+ # The array _rSS_ has the complete container tree for this file.
+ my $rSS = $self->[_rSS_];
- my $ix_line = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $ix_line++;
- my $input_line_no = $line_of_tokens->{_line_number};
- my $line_type = $line_of_tokens->{_line_type};
+ # We will be ignoring everything except code block containers
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $Last_line_had_side_comment = $has_side_comment;
- if ($has_side_comment) {
- push @ix_side_comments, $ix_line - 1;
+ my @stack;
+ my %level_info;
+
+ # TREE_LOOP:
+ foreach my $sseq ( @{$rSS} ) {
+ my $stack_depth = @stack;
+ my $seq_next = $sseq > 0 ? $sseq : -$sseq;
+
+ next if ( !$rblock_type_of_seqno->{$seq_next} );
+ if ( $sseq > 0 ) {
+
+ # STACK_LOOP:
+ my $item;
+ foreach my $seq (@stack) {
+ $item = $level_info{$seq};
+ if ( $item->{maximum_depth} < $stack_depth ) {
+ $item->{maximum_depth} = $stack_depth;
+ }
+ $item->{block_count}++;
+ } ## end STACK LOOP
+
+ push @stack, $seq_next;
+ my $block_type = $rblock_type_of_seqno->{$seq_next};
+
+ # If this block is a loop nested within a loop, then we
+ # will mark it as an 'inner_loop'. This is a useful
+ # complexity measure.
+ my $is_inner_loop = 0;
+ if ( $is_loop_type{$block_type} && defined($item) ) {
+ $is_inner_loop = $is_loop_type{ $item->{block_type} };
+ }
+
+ $level_info{$seq_next} = {
+ starting_depth => $stack_depth,
+ maximum_depth => $stack_depth,
+ block_count => 1,
+ block_type => $block_type,
+ is_inner_loop => $is_inner_loop,
+ };
}
- $has_side_comment = 0;
+ else {
+ my $seq_test = pop @stack;
- next unless ( $line_type eq 'CODE' );
+ # error check
+ if ( $seq_test != $seq_next ) {
- my $Klast_prev = $Klast;
+ # Shouldn't happen - the $rSS array must have an error
+ DEVEL_MODE && Fault("stack error finding total depths\n");
- my $rK_range = $line_of_tokens->{_rK_range};
- ( $Kfirst, $Klast ) = @{$rK_range};
+ %level_info = ();
+ last;
+ }
+ }
+ } ## end TREE_LOOP
+ return \%level_info;
+} ## end sub find_level_info
- my $last_CODE_type = $CODE_type;
- $CODE_type = EMPTY_STRING;
+sub find_loop_label {
- my $input_line = $line_of_tokens->{_line_text};
- my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
+ my ( $self, $seqno ) = @_;
- my $is_block_comment = 0;
- if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
- if ( $jmax == 0 ) { $is_block_comment = 1; }
- else { $has_side_comment = 1 }
- }
+ # Given:
+ # $seqno = sequence number of a block of code for a loop
+ # Return:
+ # $label = the loop label text, if any, or an empty string
- # Write line verbatim if we are in a formatting skip section
- if ($In_format_skipping_section) {
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
- # Note: extra space appended to comment simplifies pattern matching
- if (
- $is_block_comment
+ my $label = EMPTY_STRING;
+ my $K_opening = $K_opening_container->{$seqno};
- # optional fast pre-check
- && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
- || $rOpts_format_skipping_end )
+ # backup to the line with the opening paren, if any, in case the
+ # keyword is on a different line
+ my $Kp = $self->K_previous_code($K_opening);
+ return $label unless ( defined($Kp) );
+ if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) {
+ $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
+ $K_opening = $K_opening_container->{$seqno};
+ }
- && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
- /$format_skipping_pattern_end/
- )
- {
- $In_format_skipping_section = 0;
- write_logfile_entry(
- "Line $input_line_no: Exiting format-skipping section\n");
- }
- $CODE_type = 'FS';
- goto NEXT;
- }
+ return $label unless ( defined($K_opening) );
+ my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
- # Check for a continued quote..
- if ( $line_of_tokens->{_starting_in_quote} ) {
+ # look for a lable within a few lines; allow a couple of blank lines
+ foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) {
+ last if ( $lx < 0 );
+ my $line_of_tokens = $rlines->[$lx];
+ my $line_type = $line_of_tokens->{_line_type};
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- my $input_line_number = $line_of_tokens->{_line_number};
- $self->note_embedded_tab($input_line_number);
- }
- $CODE_type = 'VB';
- goto NEXT;
- }
- }
+ # stop search on a non-code line
+ last if ( $line_type ne 'CODE' );
- # See if we are entering a formatting skip section
- if (
- $is_block_comment
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
- # optional fast pre-check
- && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
- || $rOpts_format_skipping_begin )
+ # skip a blank line
+ next if ( !defined($Kfirst) );
- && $rOpts_format_skipping
- && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
- /$format_skipping_pattern_begin/
- )
- {
- $In_format_skipping_section = 1;
- write_logfile_entry(
- "Line $input_line_no: Entering format-skipping section\n");
- $CODE_type = 'FS';
- goto NEXT;
+ # check for a lable
+ if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) {
+ $label = $rLL->[$Kfirst]->[_TOKEN_];
+ last;
}
- # ignore trailing blank tokens (they will get deleted later)
- if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
- $jmax--;
- }
+ # quit the search if we are above the starting line
+ last if ( $lx < $lx_open );
+ }
- # blank line..
- if ( $jmax < 0 ) {
- $CODE_type = 'BL';
- goto NEXT;
- }
+ return $label;
+} ## end sub find_loop_label
- # Handle comments
- if ($is_block_comment) {
+{ ## closure find_mccabe_count
+ my %is_mccabe_logic_keyword;
+ my %is_mccabe_logic_operator;
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment = 0;
- my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
- if (
+ BEGIN {
+ my @q = (qw( && || ||= &&= ? <<= >>= ));
+ @is_mccabe_logic_operator{@q} = (1) x scalar(@q);
- # optional fast pre-check
- (
- substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
- || $rOpts_static_block_comment_prefix
- )
+ @q = (qw( and or xor if else elsif unless until while for foreach ));
+ @is_mccabe_logic_keyword{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- && $rOpts_static_block_comments
- && $input_line =~ /$static_block_comment_pattern/
- )
- {
- $is_static_block_comment = 1;
- }
+ sub find_mccabe_count {
+ my ($self) = @_;
- # Check for comments which are line directives
- # Treat exactly as static block comments without leading space
- # reference: perlsyn, near end, section Plain Old Comments (Not!)
- # example: '# line 42 "new_filename.plx"'
- if (
- $no_leading_space
- && $input_line =~ /^\# \s*
- line \s+ (\d+) \s*
- (?:\s("?)([^"]+)\2)? \s*
- $/x
- )
- {
- $is_static_block_comment = 1;
+ # Find the cumulative mccabe count to each token
+ # Return '$rmccabe_count_sum' = ref to array with cumulative
+ # mccabe count to each token $K
+
+ # NOTE: This sub currently follows the definitions in Perl::Critic
+
+ my $rmccabe_count_sum;
+ my $rLL = $self->[_rLL_];
+ my $count = 0;
+ my $Klimit = $self->[_Klimit_];
+ foreach my $KK ( 0 .. $Klimit ) {
+ $rmccabe_count_sum->{$KK} = $count;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $type eq 'k' ) {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $is_mccabe_logic_keyword{$token} ) { $count++ }
+ }
+ elsif ( $is_mccabe_logic_operator{$type} ) {
+ $count++;
}
+ }
+ $rmccabe_count_sum->{ $Klimit + 1 } = $count;
+ return $rmccabe_count_sum;
+ } ## end sub find_mccabe_count
+} ## end closure find_mccabe_count
- # look for hanging side comment ...
- if (
- $Last_line_had_side_comment # last line had side comment
- && !$no_leading_space # there is some leading space
- && !
- $is_static_block_comment # do not make static comment hanging
- )
- {
+sub find_code_line_count {
+ my ($self) = @_;
- # continuing an existing HSC chain?
- if ( $last_CODE_type eq 'HSC' ) {
- $has_side_comment = 1;
- $CODE_type = 'HSC';
- goto NEXT;
- }
+ # Find the cumulative number of lines of code, excluding blanks,
+ # comments and pod.
+ # Return '$rcode_line_count' = ref to array with cumulative
+ # code line count for each input line number.
- # starting a new HSC chain?
- elsif (
+ my $rcode_line_count;
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $ix_line = -1;
+ my $code_line_count = 0;
- $rOpts->{'hanging-side-comments'} # user is allowing
- # hanging side comments
- # like this
+ # loop over all lines
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $ix_line++;
- && ( defined($Klast_prev) && $Klast_prev > 1 )
+ # what type of line?
+ my $line_type = $line_of_tokens->{_line_type};
- # and the previous side comment was not static (issue c070)
- && !(
- $rOpts->{'static-side-comments'}
- && $rLL->[$Klast_prev]->[_TOKEN_] =~
- /$static_side_comment_pattern/
- )
+ # if 'CODE' it must be non-blank and non-comment
+ if ( $line_type eq 'CODE' ) {
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
- )
- {
+ if ( defined($Kfirst) ) {
- # and it is not a closing side comment (issue c070).
- my $K_penult = $Klast_prev - 1;
- $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
- my $follows_csc =
- ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
- && $rLL->[$K_penult]->[_TYPE_] eq '}'
- && $rLL->[$Klast_prev]->[_TOKEN_] =~
- /$closing_side_comment_prefix_pattern/ );
+ # it is non-blank
+ my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
+ if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
- if ( !$follows_csc ) {
- $has_side_comment = 1;
- $CODE_type = 'HSC';
- goto NEXT;
- }
+ # ok, it is a non-comment
+ $code_line_count++;
}
}
-
- if ($is_static_block_comment) {
- $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
- goto NEXT;
- }
- elsif ($Last_line_had_side_comment
- && !$rOpts_maximum_consecutive_blank_lines
- && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
- {
- # Emergency fix to keep a block comment from becoming a hanging
- # side comment. This fix is for the case that blank lines
- # cannot be inserted. There is related code in sub
- # 'process_line_of_CODE'
- $CODE_type = 'SBCX';
- goto NEXT;
- }
- else {
- $CODE_type = 'BC';
- goto NEXT;
- }
}
- # End of comments. Handle a line of normal code:
+ # Count all other special line types except pod;
+ # For a list of line types see sub 'process_all_lines'
+ elsif ( $line_type !~ /^POD/ ) { $code_line_count++ }
- if ($rOpts_indent_only) {
- $CODE_type = 'IO';
- goto NEXT;
- }
+ # Store the cumulative count using the input line index
+ $rcode_line_count->[$ix_line] = $code_line_count;
+ }
+ return $rcode_line_count;
+} ## end sub find_code_line_count
- if ( !$rOpts_add_newlines ) {
- $CODE_type = 'NIN';
- goto NEXT;
- }
+sub find_selected_packages {
- # Patch needed for MakeMaker. Do not break a statement
- # in which $VERSION may be calculated. See MakeMaker.pm;
- # this is based on the coding in it.
- # The first line of a file that matches this will be eval'd:
- # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- # Examples:
- # *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
- # We will pass such a line straight through without breaking
- # it unless -npvl is used.
+ my ( $self, $rdump_block_types ) = @_;
- # Patch for problem reported in RT #81866, where files
- # had been flattened into a single line and couldn't be
- # tidied without -npvl. There are two parts to this patch:
- # First, it is not done for a really long line (80 tokens for now).
- # Second, we will only allow up to one semicolon
- # before the VERSION. We need to allow at least one semicolon
- # for statements like this:
- # require Exporter; our $VERSION = $Exporter::VERSION;
- # where both statements must be on a single line for MakeMaker
+ # returns a list of all package statements in a file if requested
- if ( !$Saw_VERSION_in_this_file
- && $jmax < 80
- && $input_line =~
- /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
- {
- $Saw_VERSION_in_this_file = 1;
- write_logfile_entry("passing VERSION line; -npvl deactivates\n");
+ unless ( $rdump_block_types->{'*'}
+ || $rdump_block_types->{'package'}
+ || $rdump_block_types->{'class'} )
+ {
+ return;
+ }
- # This code type has lower priority than others
- $CODE_type = 'VER';
- goto NEXT;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+
+ my $K_closing_container = $self->[_K_closing_container_];
+ my @package_list;
+ my @package_sweep;
+ foreach my $KK ( 0 .. $Klimit ) {
+ my $item = $rLL->[$KK];
+ my $type = $item->[_TYPE_];
+ if ( $type ne 'i' ) {
+ next;
}
+ my $token = $item->[_TOKEN_];
+ if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/
+ || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ )
+ {
- NEXT:
- $line_of_tokens->{_code_type} = $CODE_type;
- }
+ $token =~ s/\s+/ /g;
+ my ( $keyword, $name ) = split /\s+/, $token, 2;
+
+ my $lx_start = $item->[_LINE_INDEX_];
+ my $level = $item->[_LEVEL_];
+ my $parent_seqno = $self->parent_seqno_by_K($KK);
+
+ # Skip a class BLOCK because it will be handled as a block
+ if ( $keyword eq 'class' ) {
+ my $line_of_tokens = $rlines->[$lx_start];
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $K_first, $K_last ) = @{$rK_range};
+ if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
+ $K_last = $self->K_previous_code($K_last);
+ }
+ if ( defined($K_last) ) {
+ my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_];
+ my $block_type_next =
+ $self->[_rblock_type_of_seqno_]->{$seqno_class};
+
+ # these block types are currently marked 'package'
+ # but may be 'class' in the future, so allow both.
+ if ( defined($block_type_next)
+ && $block_type_next =~ /^(class|package)\b/ )
+ {
+ next;
+ }
+ }
+ }
- if ($has_side_comment) {
- push @ix_side_comments, $ix_line;
+ my $K_closing = $Klimit;
+ if ( $parent_seqno != SEQ_ROOT ) {
+ my $Kc = $K_closing_container->{$parent_seqno};
+ if ( defined($Kc) ) {
+ $K_closing = $Kc;
+ }
+ }
+
+ # This package ends any previous package at this level
+ if ( defined( my $ix = $package_sweep[$level] ) ) {
+ my $rpk = $package_list[$ix];
+ my $Kc = $rpk->{K_closing};
+ if ( $Kc > $KK ) {
+ $rpk->{K_closing} = $KK - 1;
+ }
+ }
+ $package_sweep[$level] = @package_list;
+
+ # max_change and block_count are not currently reported 'package'
+ push @package_list,
+ {
+ line_start => $lx_start + 1,
+ K_opening => $KK,
+ K_closing => $Klimit,
+ name => $name,
+ type => $keyword,
+ level => $level,
+ max_change => 0,
+ block_count => 0,
+ };
+ }
}
- return \@ix_side_comments;
-} ## end sub set_CODE_type
+ return \@package_list;
+} ## end sub find_selected_packages
-sub find_non_indenting_braces {
+sub find_selected_blocks {
- my ( $self, $rix_side_comments ) = @_;
- return unless ( $rOpts->{'non-indenting-braces'} );
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- return unless ( defined($rLL) && @{$rLL} );
+ my ( $self, $rdump_block_types ) = @_;
+
+ # Find blocks needed for --dump-block-summary
+ # Returns:
+ # $rslected_blocks = ref to a list of information on the selected blocks
+
+ my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $rseqno_non_indenting_brace_by_ix =
- $self->[_rseqno_non_indenting_brace_by_ix_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $ris_sub_block = $self->[_ris_sub_block_];
- foreach my $ix ( @{$rix_side_comments} ) {
- my $line_of_tokens = $rlines->[$ix];
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type ne 'CODE' ) {
-
- # shouldn't happen
- next;
- }
- my $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
-
- # shouldn't happen
- next;
- }
- next unless ( $Klast > $Kfirst ); # maybe HSC
- my $token_sc = $rLL->[$Klast]->[_TOKEN_];
- my $K_m = $Klast - 1;
- my $type_m = $rLL->[$K_m]->[_TYPE_];
- if ( $type_m eq 'b' && $K_m > $Kfirst ) {
- $K_m--;
- $type_m = $rLL->[$K_m]->[_TYPE_];
- }
- my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
- if ($seqno_m) {
- my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ my $dump_all_types = $rdump_block_types->{'*'};
- # The pattern ends in \s but we have removed the newline, so
- # we added it back for the match. That way we require an exact
- # match to the special string and also allow additional text.
- $token_sc .= "\n";
- if ( $block_type_m
- && $is_opening_type{$type_m}
- && $token_sc =~ /$non_indenting_brace_pattern/ )
- {
- $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
- }
- }
- }
- return;
-} ## end sub find_non_indenting_braces
+ # Get level variation info for code blocks
+ my $rlevel_info = $self->find_level_info();
-sub delete_side_comments {
- my ( $self, $rix_side_comments ) = @_;
+ my @selected_blocks;
- # Given a list of indexes of lines with side comments, handle any
- # requested side comment deletions.
+ #---------------------------------------------------
+ # BEGIN loop over all blocks to find selected blocks
+ #---------------------------------------------------
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
- my $rLL = $self->[_rLL_];
- my $rlines = $self->[_rlines_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $rseqno_non_indenting_brace_by_ix =
- $self->[_rseqno_non_indenting_brace_by_ix_];
+ my $type;
+ my $name = EMPTY_STRING;
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ my $K_opening = $K_opening_container->{$seqno};
+ my $K_closing = $K_closing_container->{$seqno};
+ my $level = $rLL->[$K_opening]->[_LEVEL_];
- foreach my $ix ( @{$rix_side_comments} ) {
- my $line_of_tokens = $rlines->[$ix];
- my $line_type = $line_of_tokens->{_line_type};
+ my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
+ my $line_of_tokens = $rlines->[$lx_open];
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) {
+ my $line_type = $line_of_tokens->{_line_type};
- # This fault shouldn't happen because we only saved CODE lines with
- # side comments in the TASK 1 loop above.
- if ( $line_type ne 'CODE' ) {
- if (DEVEL_MODE) {
- my $lno = $ix + 1;
- Fault(<<EOM);
-Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
+ # shouldn't happen
+ my $CODE_type = $line_of_tokens->{_code_type};
+ DEVEL_MODE && Fault(<<EOM);
+unexpected line_type=$line_type at line $lx_open, code type=$CODE_type
EOM
- }
next;
}
- my $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
+ my ( $max_change, $block_count, $inner_loop_plus ) =
+ ( 0, 0, EMPTY_STRING );
+ my $item = $rlevel_info->{$seqno};
+ if ( defined($item) ) {
+ my $starting_depth = $item->{starting_depth};
+ my $maximum_depth = $item->{maximum_depth};
+ $block_count = $item->{block_count};
+ $max_change = $maximum_depth - $starting_depth + 1;
- unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
- if (DEVEL_MODE) {
- my $lno = $ix + 1;
- Fault(<<EOM);
-Did not find side comment near line $lno while deleting side comments
-EOM
- }
- next;
+ # this is a '+' character if this block is an inner loops
+ $inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING;
}
- my $delete_side_comment =
- $rOpts_delete_side_comments
- && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
- && (!$CODE_type
- || $CODE_type eq 'HSC'
- || $CODE_type eq 'IO'
- || $CODE_type eq 'NIN' );
-
- # Do not delete special control side comments
- if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
- $delete_side_comment = 0;
+ # Skip closures unless type 'closure' is explicitely requested
+ if ( ( $block_type eq '}' || $block_type eq ';' )
+ && $rdump_block_types->{'closure'} )
+ {
+ $type = 'closure';
}
- if (
- $rOpts_delete_closing_side_comments
- && !$delete_side_comment
- && $Klast > $Kfirst
- && ( !$CODE_type
- || $CODE_type eq 'HSC'
- || $CODE_type eq 'IO'
- || $CODE_type eq 'NIN' )
+ # Both 'sub' and 'asub' select an anonymous sub.
+ # This allows anonymous subs to be explicitely selected
+ elsif (
+ $ris_asub_block->{$seqno}
+ && ( $dump_all_types
+ || $rdump_block_types->{'sub'}
+ || $rdump_block_types->{'asub'} )
)
{
- my $token = $rLL->[$Klast]->[_TOKEN_];
- my $K_m = $Klast - 1;
- my $type_m = $rLL->[$K_m]->[_TYPE_];
- if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
- my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
- if ($seqno_m) {
- my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
- if ( $block_type_m
- && $token =~ /$closing_side_comment_prefix_pattern/
- && $block_type_m =~ /$closing_side_comment_list_pattern/ )
+ $type = 'asub';
+
+ # Look back to try to find some kind of name, such as
+ # my $var = sub { - var is type 'i'
+ # var => sub { - var is type 'w'
+ # -var => sub { - var is type 'w'
+ # 'var' => sub { - var is type 'Q'
+ my ( $saw_equals, $saw_fat_comma, $blank_count );
+ foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) {
+ my $token_type = $rLL->[$KK]->[_TYPE_];
+ if ( $token_type eq 'b' ) { $blank_count++; next }
+ if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
+ if ( $token_type eq '=' ) { $saw_equals++; next }
+ if ( $token_type eq 'i' && $saw_equals
+ || ( $token_type eq 'w' || $token_type eq 'Q' )
+ && $saw_fat_comma )
{
- $delete_side_comment = 1;
+ $name = $rLL->[$KK]->[_TOKEN_];
+ last;
}
}
- } ## end if ( $rOpts_delete_closing_side_comments...)
-
- if ($delete_side_comment) {
-
- # We are actually just changing the side comment to a blank.
- # This may produce multiple blanks in a row, but sub respace_tokens
- # will check for this and fix it.
- $rLL->[$Klast]->[_TYPE_] = 'b';
- $rLL->[$Klast]->[_TOKEN_] = SPACE;
+ }
+ elsif ( $ris_sub_block->{$seqno}
+ && ( $dump_all_types || $rdump_block_types->{'sub'} ) )
+ {
+ $type = 'sub';
- # The -io option outputs the line text, so we have to update
- # the line text so that the comment does not reappear.
- if ( $CODE_type eq 'IO' ) {
- my $line = EMPTY_STRING;
- foreach my $KK ( $Kfirst .. $Klast - 1 ) {
- $line .= $rLL->[$KK]->[_TOKEN_];
- }
- $line =~ s/\s+$//;
- $line_of_tokens->{_line_text} = $line . "\n";
+ # what we want:
+ # $block_type $name
+ # 'sub setidentifier($)' => 'setidentifier'
+ # 'method setidentifier($)' => 'setidentifier'
+ my @parts = split /\s+/, $block_type;
+ $name = $parts[1];
+ $name =~ s/\(.*$//;
+ }
+ elsif (
+ $block_type =~ /^(package|class)\b/
+ && ( $dump_all_types
+ || $rdump_block_types->{'package'}
+ || $rdump_block_types->{'class'} )
+ )
+ {
+ $type = 'class';
+ my @parts = split /\s+/, $block_type;
+ $name = $parts[1];
+ $name =~ s/\(.*$//;
+ }
+ elsif (
+ $is_loop_type{$block_type}
+ && ( $dump_all_types
+ || $rdump_block_types->{$block_type}
+ || $rdump_block_types->{ $block_type . $inner_loop_plus }
+ || $rdump_block_types->{$inner_loop_plus} )
+ )
+ {
+ $type = $block_type . $inner_loop_plus;
+ }
+ elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) {
+ if ( $is_loop_type{$block_type} ) {
+ $name = $self->find_loop_label($seqno);
}
-
- # If we delete a hanging side comment the line becomes blank.
- if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
+ $type = $block_type;
+ }
+ else {
+ next;
}
- }
- return;
-} ## end sub delete_side_comments
-
-sub dump_verbatim {
- my $self = shift;
- my $rlines = $self->[_rlines_];
- foreach my $line ( @{$rlines} ) {
- my $input_line = $line->{_line_text};
- $self->write_unindented_line($input_line);
- }
- return;
-}
-my %wU;
-my %wiq;
-my %is_wit;
-my %is_sigil;
-my %is_nonlist_keyword;
-my %is_nonlist_type;
-my %is_s_y_m_slash;
-my %is_unexpected_equals;
+ push @selected_blocks,
+ {
+ K_opening => $K_opening,
+ K_closing => $K_closing,
+ line_start => $lx_open + 1,
+ name => $name,
+ type => $type,
+ level => $level,
+ max_change => $max_change,
+ block_count => $block_count,
+ };
+ } ## END loop to get info for selected blocks
+ return \@selected_blocks;
+} ## end sub find_selected_blocks
+
+sub dump_block_summary {
+ my ($self) = @_;
-BEGIN {
+ # Dump information about selected code blocks to STDOUT
+ # This sub is called when
+ # --dump-block-summary (-dbs) is set.
- # added 'U' to fix cases b1125 b1126 b1127
- my @q = qw(w U);
- @{wU}{@q} = (1) x scalar(@q);
+ # The following controls are available:
+ # --dump-block-types=s (-dbt=s), where s is a list of block types
+ # (if else elsif for foreach while do ... sub) ; default is 'sub'
+ # --dump-block-minimum-lines=n (-dbml=n), where n is the minimum
+ # number of lines for a block to be included; default is 20.
- @q = qw(w i q Q G C Z);
- @{wiq}{@q} = (1) x scalar(@q);
+ my $rOpts_dump_block_types = $rOpts->{'dump-block-types'};
+ if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' }
+ $rOpts_dump_block_types =~ s/^\s+//;
+ $rOpts_dump_block_types =~ s/\s+$//;
+ my @list = split /\s+/, $rOpts_dump_block_types;
+ my %dump_block_types;
+ @{dump_block_types}{@list} = (1) x scalar(@list);
- @q = qw(w i t);
- @{is_wit}{@q} = (1) x scalar(@q);
+ # Get block info
+ my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types );
- @q = qw($ & % * @);
- @{is_sigil}{@q} = (1) x scalar(@q);
+ # Get package info
+ my $rpackage_list = $self->find_selected_packages( \%dump_block_types );
- # Parens following these keywords will not be marked as lists. Note that
- # 'for' is not included and is handled separately, by including 'f' in the
- # hash %is_counted_type, since it may or may not be a c-style for loop.
- @q = qw( if elsif unless and or );
- @is_nonlist_keyword{@q} = (1) x scalar(@q);
+ return if ( !@{$rselected_blocks} && !@{$rpackage_list} );
- # Parens following these types will not be marked as lists
- @q = qw( && || );
- @is_nonlist_type{@q} = (1) x scalar(@q);
+ my $input_stream_name = get_input_stream_name();
- @q = qw( s y m / );
- @is_s_y_m_slash{@q} = (1) x scalar(@q);
+ # Get code line count
+ my $rcode_line_count = $self->find_code_line_count();
- @q = qw( = == != );
- @is_unexpected_equals{@q} = (1) x scalar(@q);
+ # Get mccabe count
+ my $rmccabe_count_sum = $self->find_mccabe_count();
-}
+ my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'};
+ if ( !defined($rOpts_dump_block_minimum_lines) ) {
+ $rOpts_dump_block_minimum_lines = 20;
+ }
-sub respace_tokens {
+ my $rLL = $self->[_rLL_];
- my $self = shift;
- return if $rOpts->{'indent-only'};
+ # merge blocks and packages, add various counts, filter and print to STDOUT
+ my $routput_lines = [];
+ foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) {
- # This routine is called once per file to do as much formatting as possible
- # before new line breaks are set.
+ my $K_opening = $item->{K_opening};
+ my $K_closing = $item->{K_closing};
- # This routine makes all necessary and possible changes to the tokenization
- # after the initial tokenization of the file. This is a tedious routine,
- # but basically it consists of inserting and deleting whitespace between
- # nonblank tokens according to the selected parameters. In a few cases
- # non-space characters are added, deleted or modified.
+ # define total number of lines
+ my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
+ my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_];
+ my $line_count = $lx_close - $lx_open + 1;
- # The goal of this routine is to create a new token array which only needs
- # the definition of new line breaks and padding to complete formatting. In
- # a few cases we have to cheat a little to achieve this goal. In
- # particular, we may not know if a semicolon will be needed, because it
- # depends on how the line breaks go. To handle this, we include the
- # semicolon as a 'phantom' which can be displayed as normal or as an empty
- # string.
+ # define total number of lines of code excluding blanks, comments, pod
+ my $code_lines_open = $rcode_line_count->[$lx_open];
+ my $code_lines_close = $rcode_line_count->[$lx_close];
+ my $code_lines = 0;
+ if ( defined($code_lines_open) && defined($code_lines_close) ) {
+ $code_lines = $code_lines_close - $code_lines_open + 1;
+ }
- # Method: The old tokens are copied one-by-one, with changes, from the old
- # linear storage array $rLL to a new array $rLL_new.
+ # filter out blocks below the selected code line limit
+ if ( $code_lines < $rOpts_dump_block_minimum_lines ) {
+ next;
+ }
- my $rLL = $self->[_rLL_];
- my $Klimit_old = $self->[_Klimit_];
- my $rlines = $self->[_rlines_];
- my $length_function = $self->[_length_function_];
- my $is_encoded_data = $self->[_is_encoded_data_];
+ # add mccabe_count for this block
+ my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 };
+ my $mccabe_opening = $rmccabe_count_sum->{$K_opening};
+ my $mccabe_count = 1; # add 1 to match Perl::Critic
+ if ( defined($mccabe_opening) && defined($mccabe_closing) ) {
+ $mccabe_count += $mccabe_closing - $mccabe_opening;
+ }
- my $rLL_new = []; # This is the new array
- my $rtoken_vars;
- my $Ktoken_vars; # the old K value of $rtoken_vars
- my ( $Kfirst_old, $Klast_old ); # Range of old line
- my $Klast_old_code; # K of last token if side comment
- my $Kmax = @{$rLL} - 1;
+ # Store the final set of print variables
+ push @{$routput_lines}, [
- my $CODE_type = EMPTY_STRING;
- my $line_type = EMPTY_STRING;
+ $input_stream_name,
+ $item->{line_start},
+ $line_count,
+ $code_lines,
+ $item->{type},
+ $item->{name},
+ $item->{level},
+ $item->{max_change},
+ $item->{block_count},
+ $mccabe_count,
- # Set the whitespace flags, which indicate the token spacing preference.
- my $rwhitespace_flags = $self->set_whitespace_flags();
+ ];
+ }
- # we will be setting token lengths as we go
- my $cumulative_length = 0;
+ return unless @{$routput_lines};
- my %seqno_stack;
- my %K_old_opening_by_seqno = (); # Note: old K index
- my $depth_next = 0;
- my $depth_next_max = 0;
+ # Sort blocks and packages on starting line number
+ my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines};
- # Note that $K_opening_container and $K_closing_container have values
- # defined in sub get_line() for the previous K indexes. They were needed
- # in case option 'indent-only' was set, and we didn't get here. We no longer
- # need those and will eliminate them now to avoid any possible mixing of
- # old and new values.
- my $K_opening_container = $self->[_K_opening_container_] = {};
- my $K_closing_container = $self->[_K_closing_container_] = {};
-
- my $K_closing_ternary = $self->[_K_closing_ternary_];
- my $K_opening_ternary = $self->[_K_opening_ternary_];
- my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
- my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
- my $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
- my $rhas_broken_list = $self->[_rhas_broken_list_];
- my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
- my $rhas_code_block = $self->[_rhas_code_block_];
- my $rhas_list = $self->[_rhas_list_];
- my $rhas_ternary = $self->[_rhas_ternary_];
- my $ris_assigned_structure = $self->[_ris_assigned_structure_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
- my $ris_permanently_broken = $self->[_ris_permanently_broken_];
- my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
- my $roverride_cab3 = $self->[_roverride_cab3_];
- my $rparent_of_seqno = $self->[_rparent_of_seqno_];
- my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ print STDOUT
+"file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n";
- my $last_nonblank_code_type = ';';
- my $last_nonblank_code_token = ';';
- my $last_nonblank_block_type = EMPTY_STRING;
- my $last_last_nonblank_code_type = ';';
- my $last_last_nonblank_code_token = ';';
+ foreach my $rline_vars (@sorted_lines) {
+ my $line = join( ",", @{$rline_vars} ) . "\n";
+ print STDOUT $line;
+ }
+ return;
+} ## end sub dump_block_summary
- my %K_first_here_doc_by_seqno;
+sub set_CODE_type {
+ my ($self) = @_;
- my $set_permanently_broken = sub {
- my ($seqno) = @_;
- while ( defined($seqno) ) {
- $ris_permanently_broken->{$seqno} = 1;
- $seqno = $rparent_of_seqno->{$seqno};
- }
- return;
- };
- my $store_token = sub {
- my ($item) = @_;
+ # Examine each line of code and set a flag '$CODE_type' to describe it.
+ # Also return a list of lines with side comments.
- # This will be the index of this item in the new array
- my $KK_new = @{$rLL_new};
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
- #------------------------------------------------------------------
- # NOTE: called once per token so coding efficiency is critical here
- #------------------------------------------------------------------
+ my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
+ my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
+ my $rOpts_static_block_comment_prefix =
+ $rOpts->{'static-block-comment-prefix'};
- my $type = $item->[_TYPE_];
- my $is_blank = $type eq 'b';
- my $block_type = EMPTY_STRING;
+ # Remember indexes of lines with side comments
+ my @ix_side_comments;
- # Do not output consecutive blanks. This situation should have been
- # prevented earlier, but it is worth checking because later routines
- # make this assumption.
- if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
- return;
- }
-
- # check for a sequenced item (i.e., container or ?/:)
- my $type_sequence = $item->[_TYPE_SEQUENCE_];
- my $token = $item->[_TOKEN_];
- if ($type_sequence) {
-
- if ( $is_opening_token{$token} ) {
-
- $K_opening_container->{$type_sequence} = $KK_new;
- $block_type = $rblock_type_of_seqno->{$type_sequence};
+ my $In_format_skipping_section = 0;
+ my $Saw_VERSION_in_this_file = 0;
+ my $has_side_comment = 0;
+ my ( $Kfirst, $Klast );
+ my $CODE_type;
- # Fix for case b1100: Count a line ending in ', [' as having
- # a line-ending comma. Otherwise, these commas can be hidden
- # with something like --opening-square-bracket-right
- if ( $last_nonblank_code_type eq ','
- && $Ktoken_vars == $Klast_old_code
- && $Ktoken_vars > $Kfirst_old )
- {
- $rlec_count_by_seqno->{$type_sequence}++;
- }
+ # Loop to set CODE_type
- if ( $last_nonblank_code_type eq '='
- || $last_nonblank_code_type eq '=>' )
- {
- $ris_assigned_structure->{$type_sequence} =
- $last_nonblank_code_type;
- }
+ # Possible CODE_types
+ # 'VB' = Verbatim - line goes out verbatim (a quote)
+ # 'FS' = Format Skipping - line goes out verbatim
+ # 'BL' = Blank Line
+ # 'HSC' = Hanging Side Comment - fix this hanging side comment
+ # 'SBCX'= Static Block Comment Without Leading Space
+ # 'SBC' = Static Block Comment
+ # 'BC' = Block Comment - an ordinary full line comment
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'VER' = VERSION statement
+ # '' = ordinary line of code with no restrictions
- my $seqno_parent = $seqno_stack{ $depth_next - 1 };
- $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
- push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
- $rparent_of_seqno->{$type_sequence} = $seqno_parent;
- $seqno_stack{$depth_next} = $type_sequence;
- $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
- $depth_next++;
+ my $ix_line = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $ix_line++;
+ my $line_type = $line_of_tokens->{_line_type};
- if ( $depth_next > $depth_next_max ) {
- $depth_next_max = $depth_next;
- }
- }
- elsif ( $is_closing_token{$token} ) {
+ my $Last_line_had_side_comment = $has_side_comment;
+ if ($has_side_comment) {
+ push @ix_side_comments, $ix_line - 1;
+ $has_side_comment = 0;
+ }
- $K_closing_container->{$type_sequence} = $KK_new;
- $block_type = $rblock_type_of_seqno->{$type_sequence};
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = EMPTY_STRING;
- # Do not include terminal commas in counts
- if ( $last_nonblank_code_type eq ','
- || $last_nonblank_code_type eq '=>' )
- {
- my $seqno = $seqno_stack{ $depth_next - 1 };
- if ($seqno) {
- $rtype_count_by_seqno->{$seqno}
- ->{$last_nonblank_code_type}--;
-
- if ( $Ktoken_vars == $Kfirst_old
- && $last_nonblank_code_type eq ','
- && $rlec_count_by_seqno->{$seqno} )
- {
- $rlec_count_by_seqno->{$seqno}--;
- }
- }
- }
+ if ( $line_type ne 'CODE' ) {
+ next;
+ }
- # Update the stack...
- $depth_next--;
- }
- else {
+ my $Klast_prev = $Klast;
- # For ternary, note parent but do not include as child
- my $seqno_parent = $seqno_stack{ $depth_next - 1 };
- $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
- $rparent_of_seqno->{$type_sequence} = $seqno_parent;
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $Kfirst, $Klast ) = @{$rK_range};
- # These are not yet used but could be useful
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK_new;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK_new;
- }
- else {
+ my $input_line = $line_of_tokens->{_line_text};
+ my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
- # We really shouldn't arrive here, just being cautious:
- # The only sequenced types output by the tokenizer are the
- # opening & closing containers and the ternary types. Each
- # of those was checked above. So we would only get here
- # if the tokenizer has been changed to mark some other
- # tokens with sequence numbers.
- if (DEVEL_MODE) {
- Fault(
-"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
- );
- }
- }
- }
+ my $is_block_comment = 0;
+ if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
+ if ( $jmax == 0 ) { $is_block_comment = 1; }
+ else { $has_side_comment = 1 }
}
- # Find the length of this token. Later it may be adjusted if phantom
- # or ignoring side comment lengths.
- my $token_length =
- $is_encoded_data
- ? $length_function->($token)
- : length($token);
-
- # handle comments
- my $is_comment = $type eq '#';
- if ($is_comment) {
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
- # trim comments if necessary
- my $ord = ord( substr( $token, -1, 1 ) );
+ # Note: extra space appended to comment simplifies pattern matching
if (
- $ord > 0
- && ( $ord < ORD_PRINTABLE_MIN
- || $ord > ORD_PRINTABLE_MAX )
- && $token =~ s/\s+$//
- )
- {
- $token_length = $length_function->($token);
- $item->[_TOKEN_] = $token;
- }
+ $is_block_comment
- # Mark length of side comments as just 1 if sc lengths are ignored
- if ( $rOpts_ignore_side_comment_lengths
- && ( !$CODE_type || $CODE_type eq 'HSC' ) )
- {
- $token_length = 1;
- }
- my $seqno = $seqno_stack{ $depth_next - 1 };
- if ( defined($seqno)
- && !$ris_permanently_broken->{$seqno} )
+ # optional fast pre-check
+ && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
+ || $rOpts_format_skipping_end )
+
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
+ /$format_skipping_pattern_end/
+ )
{
- $set_permanently_broken->($seqno);
+ $In_format_skipping_section = 0;
+ my $input_line_no = $line_of_tokens->{_line_number};
+ write_logfile_entry(
+ "Line $input_line_no: Exiting format-skipping section\n");
}
+ $CODE_type = 'FS';
+ next;
}
- $item->[_TOKEN_LENGTH_] = $token_length;
-
- # and update the cumulative length
- $cumulative_length += $token_length;
-
- # Save the length sum to just AFTER this token
- $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
-
- if ( !$is_blank && !$is_comment ) {
-
- # Remember the most recent two non-blank, non-comment tokens.
- # NOTE: the phantom semicolon code may change the output stack
- # without updating these values. Phantom semicolons are considered
- # the same as blanks for now, but future needs might change that.
- # See the related note in sub '$add_phantom_semicolon'.
- $last_last_nonblank_code_type = $last_nonblank_code_type;
- $last_last_nonblank_code_token = $last_nonblank_code_token;
-
- $last_nonblank_code_type = $type;
- $last_nonblank_code_token = $token;
- $last_nonblank_block_type = $block_type;
-
- # count selected types
- if ( $is_counted_type{$type} ) {
- my $seqno = $seqno_stack{ $depth_next - 1 };
- if ( defined($seqno) ) {
- $rtype_count_by_seqno->{$seqno}->{$type}++;
-
- # Count line-ending commas for -bbx
- if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
- $rlec_count_by_seqno->{$seqno}++;
- }
+ # Check for a continued quote..
+ if ( $line_of_tokens->{_starting_in_quote} ) {
- # Remember index of first here doc target
- if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
- $K_first_here_doc_by_seqno{$seqno} = $KK_new;
- }
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->note_embedded_tab($input_line_number);
}
+ $CODE_type = 'VB';
+ next;
}
}
- # For reference, here is how to get the parent sequence number.
- # This is not used because it is slower than finding it on the fly
- # in sub parent_seqno_by_K:
-
- # my $seqno_parent =
- # $type_sequence && $is_opening_token{$token}
- # ? $seqno_stack{ $depth_next - 2 }
- # : $seqno_stack{ $depth_next - 1 };
- # my $KK = @{$rLL_new};
- # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
-
- # and finally, add this item to the new array
- push @{$rLL_new}, $item;
- return;
- };
-
- my $store_token_and_space = sub {
- my ( $item, $want_space ) = @_;
+ # See if we are entering a formatting skip section
+ if (
+ $is_block_comment
- # store a token with preceding space if requested and needed
+ # optional fast pre-check
+ && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
+ || $rOpts_format_skipping_begin )
- # First store the space
- if ( $want_space
- && @{$rLL_new}
- && $rLL_new->[-1]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace )
+ && $rOpts_format_skipping
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
+ /$format_skipping_pattern_begin/
+ )
{
- my $rcopy = [ @{$item} ];
- $rcopy->[_TYPE_] = 'b';
- $rcopy->[_TOKEN_] = SPACE;
- $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING;
-
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
-
- # Patch 23-Jan-2021 to fix -lp blinkers:
- # The level and ci_level of newly created spaces should be the same
- # as the previous token. Otherwise the coding for the -lp option
- # can create a blinking state in some rare cases.
- $rcopy->[_LEVEL_] =
- $rLL_new->[-1]->[_LEVEL_];
- $rcopy->[_CI_LEVEL_] =
- $rLL_new->[-1]->[_CI_LEVEL_];
+ $In_format_skipping_section = 1;
+ my $input_line_no = $line_of_tokens->{_line_number};
+ write_logfile_entry(
+ "Line $input_line_no: Entering format-skipping section\n");
+ $CODE_type = 'FS';
+ next;
+ }
- $store_token->($rcopy);
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
}
- # then the token
- $store_token->($item);
- return;
- };
+ # blank line..
+ if ( $jmax < 0 ) {
+ $CODE_type = 'BL';
+ next;
+ }
- my $add_phantom_semicolon = sub {
+ # Handle comments
+ if ($is_block_comment) {
- my ($KK) = @_;
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment = 0;
+ my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
+ if (
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
+ # optional fast pre-check
+ (
+ substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
+ || $rOpts_static_block_comment_prefix
+ )
- # we are only adding semicolons for certain block types
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- return unless ($type_sequence);
- my $block_type = $rblock_type_of_seqno->{$type_sequence};
- return unless ($block_type);
- return
- unless ( $ok_to_add_semicolon_for_block_type{$block_type}
- || $block_type =~ /^(sub|package)/
- || $block_type =~ /^\w+\:$/ );
+ && $rOpts_static_block_comments
+ && $input_line =~ /$static_block_comment_pattern/
+ )
+ {
+ $is_static_block_comment = 1;
+ }
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $no_leading_space
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ }
- # Do not add a semicolon if...
- return
- if (
+ # look for hanging side comment ...
+ if (
+ $Last_line_had_side_comment # last line had side comment
+ && !$no_leading_space # there is some leading space
+ && !
+ $is_static_block_comment # do not make static comment hanging
+ )
+ {
- # it would follow a comment (and be isolated)
- $type_p eq '#'
+ # continuing an existing HSC chain?
+ if ( $last_CODE_type eq 'HSC' ) {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ next;
+ }
- # it follows a code block ( because they are not always wanted
- # there and may add clutter)
- || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
+ # starting a new HSC chain?
+ elsif (
- # it would follow a label
- || $type_p eq 'J'
+ $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
- # it would be inside a 'format' statement (and cause syntax error)
- || ( $type_p eq 'k'
- && $token_p =~ /format/ )
+ && ( defined($Klast_prev) && $Klast_prev > 1 )
- );
+ # and the previous side comment was not static (issue c070)
+ && !(
+ $rOpts->{'static-side-comments'}
+ && $rLL->[$Klast_prev]->[_TOKEN_] =~
+ /$static_side_comment_pattern/
+ )
- # Do not add a semicolon if it would impede a weld with an immediately
- # following closing token...like this
- # { ( some code ) }
- # ^--No semicolon can go here
-
- # look at the previous token... note use of the _NEW rLL array here,
- # but sequence numbers are invariant.
- my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
-
- # If it is also a CLOSING token we have to look closer...
- if (
- $seqno_inner
- && $is_closing_token{$token_p}
-
- # we only need to look if there is just one inner container..
- && defined( $rchildren_of_seqno->{$type_sequence} )
- && @{ $rchildren_of_seqno->{$type_sequence} } == 1
- )
- {
+ )
+ {
- # Go back and see if the corresponding two OPENING tokens are also
- # together. Note that we are using the OLD K indexing here:
- my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
- if ( defined($K_outer_opening) ) {
- my $K_nxt = $self->K_next_nonblank($K_outer_opening);
- if ( defined($K_nxt) ) {
- my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
+ # and it is not a closing side comment (issue c070).
+ my $K_penult = $Klast_prev - 1;
+ $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
+ my $follows_csc =
+ ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
+ && $rLL->[$K_penult]->[_TYPE_] eq '}'
+ && $rLL->[$Klast_prev]->[_TOKEN_] =~
+ /$closing_side_comment_prefix_pattern/ );
- # Is the next token after the outer opening the same as
- # our inner closing (i.e. same sequence number)?
- # If so, do not insert a semicolon here.
- return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
+ if ( !$follows_csc ) {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ next;
+ }
}
}
+
+ if ($is_static_block_comment) {
+ $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
+ next;
+ }
+ elsif ($Last_line_had_side_comment
+ && !$rOpts_maximum_consecutive_blank_lines
+ && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
+ {
+ # Emergency fix to keep a block comment from becoming a hanging
+ # side comment. This fix is for the case that blank lines
+ # cannot be inserted. There is related code in sub
+ # 'process_line_of_CODE'
+ $CODE_type = 'SBCX';
+ next;
+ }
+ else {
+ $CODE_type = 'BC';
+ next;
+ }
}
- # We will insert an empty semicolon here as a placeholder. Later, if
- # it becomes the last token on a line, we will bring it to life. The
- # advantage of doing this is that (1) we just have to check line
- # endings, and (2) the phantom semicolon has zero width and therefore
- # won't cause needless breaks of one-line blocks.
- my $Ktop = -1;
- if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
- && $want_left_space{';'} == WS_NO )
- {
+ # End of comments. Handle a line of normal code:
- # convert the blank into a semicolon..
- # be careful: we are working on the new stack top
- # on a token which has been stored.
- my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
+ if ($rOpts_indent_only) {
+ $CODE_type = 'IO';
+ next;
+ }
- # Convert the existing blank to:
- # a phantom semicolon for one_line_block option = 0 or 1
- # a real semicolon for one_line_block option = 2
- my $tok = EMPTY_STRING;
- my $len_tok = 0;
- if ( $rOpts_one_line_block_semicolons == 2 ) {
- $tok = ';';
- $len_tok = 1;
- }
+ if ( !$rOpts_add_newlines ) {
+ $CODE_type = 'NIN';
+ next;
+ }
- $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
- $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
- $rLL_new->[$Ktop]->[_TYPE_] = ';';
+ # Patch needed for MakeMaker. Do not break a statement
+ # in which $VERSION may be calculated. See MakeMaker.pm;
+ # this is based on the coding in it.
+ # The first line of a file that matches this will be eval'd:
+ # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
+ # Examples:
+ # *VERSION = \'1.01';
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # We will pass such a line straight through without breaking
+ # it unless -npvl is used.
- # NOTE: we are changing the output stack without updating variables
- # $last_nonblank_code_type, etc. Future needs might require that
- # those variables be updated here. For now, it seems ok to skip
- # this.
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
- # Save list of new K indexes of phantom semicolons.
- # This will be needed if we want to undo them for iterations in
- # future coding.
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ if ( !$Saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
+ {
+ $Saw_VERSION_in_this_file = 1;
+ write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- # Then store a new blank
- $store_token->($rcopy);
+ # This code type has lower priority than others
+ $CODE_type = 'VER';
+ next;
}
- else {
+ }
+ continue {
+ $line_of_tokens->{_code_type} = $CODE_type;
+ }
- # Patch for issue c078: keep line indexes in order. If the top
- # token is a space that we are keeping (due to '-wls=';') then
- # we have to check that old line indexes stay in order.
- # In very rare
- # instances in which side comments have been deleted and converted
- # into blanks, we may have filtered down multiple blanks into just
- # one. In that case the top blank may have a higher line number
- # than the previous nonblank token. Although the line indexes of
- # blanks are not really significant, we need to keep them in order
- # in order to pass error checks.
- if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
- my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
- my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
- if ( $new_top_ix < $old_top_ix ) {
- $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
- }
- }
-
- my $rcopy =
- copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
- $store_token->($rcopy);
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- }
- return;
- };
+ if ($has_side_comment) {
+ push @ix_side_comments, $ix_line;
+ }
- my $check_Q = sub {
+ return \@ix_side_comments;
+} ## end sub set_CODE_type
- # Check that a quote looks okay
- # This sub works but needs to by sync'd with the log file output
- # before it can be used.
- my ( $KK, $Kfirst, $line_number ) = @_;
- my $token = $rLL->[$KK]->[_TOKEN_];
- $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
+sub find_non_indenting_braces {
- # The remainder of this routine looks for something like
- # '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
+ my ( $self, $rix_side_comments ) = @_;
+ return unless ( $rOpts->{'non-indenting-braces'} );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $rlines = $self->[_rlines_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rseqno_non_indenting_brace_by_ix =
+ $self->[_rseqno_non_indenting_brace_by_ix_];
- # Start by looking for a token beginning with one of: s y m / tr
- return
- unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
- || substr( $token, 0, 2 ) eq 'tr' );
+ foreach my $ix ( @{$rix_side_comments} ) {
+ my $line_of_tokens = $rlines->[$ix];
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type ne 'CODE' ) {
- # ... and preceded by one of: = == !=
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- return unless ( $is_unexpected_equals{$previous_nonblank_type} );
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ # shouldn't happen
+ DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
+ next;
+ }
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
- my $previous_nonblank_type_2 = 'b';
- my $previous_nonblank_token_2 = EMPTY_STRING;
- my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
- $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
+ # shouldn't happen
+ DEVEL_MODE && Fault("did not get a comment\n");
+ next;
+ }
+ next unless ( $Klast > $Kfirst ); # maybe HSC
+ my $token_sc = $rLL->[$Klast]->[_TOKEN_];
+ my $K_m = $Klast - 1;
+ my $type_m = $rLL->[$K_m]->[_TYPE_];
+ if ( $type_m eq 'b' && $K_m > $Kfirst ) {
+ $K_m--;
+ $type_m = $rLL->[$K_m]->[_TYPE_];
}
+ my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
+ if ($seqno_m) {
+ my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
- my $next_nonblank_token = EMPTY_STRING;
- my $Kn = $KK + 1;
- if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
- if ( $Kn <= $Kmax ) {
- $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ # The pattern ends in \s but we have removed the newline, so
+ # we added it back for the match. That way we require an exact
+ # match to the special string and also allow additional text.
+ $token_sc .= "\n";
+ if ( $block_type_m
+ && $is_opening_type{$type_m}
+ && $token_sc =~ /$non_indenting_brace_pattern/ )
+ {
+ $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
+ }
}
+ }
+ return;
+} ## end sub find_non_indenting_braces
- my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
- my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
+sub delete_side_comments {
+ my ( $self, $rix_side_comments ) = @_;
- if (
- ##$token =~ /^(s|tr|y|m|\/)/
- ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
- 1
+ # Given a list of indexes of lines with side comments, handle any
+ # requested side comment deletions.
- # preceded by simple scalar
- && $previous_nonblank_type_2 eq 'i'
- && $previous_nonblank_token_2 =~ /^\$/
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rseqno_non_indenting_brace_by_ix =
+ $self->[_rseqno_non_indenting_brace_by_ix_];
- # followed by some kind of termination
- # (but give complaint if we can not see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
+ foreach my $ix ( @{$rix_side_comments} ) {
+ my $line_of_tokens = $rlines->[$ix];
+ my $line_type = $line_of_tokens->{_line_type};
- # scalar is not declared
- ## =~ /^(my|our|local)$/
- && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
- )
- {
- my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
- my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
- complain(
-"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
- );
+ # This fault shouldn't happen because we only saved CODE lines with
+ # side comments in the TASK 1 loop above.
+ if ( $line_type ne 'CODE' ) {
+ if (DEVEL_MODE) {
+ my $lno = $ix + 1;
+ Fault(<<EOM);
+Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
+EOM
+ }
+ next;
}
- return;
- };
-
- #-------------------------------------------
- # Main loop to respace all lines of the file
- #-------------------------------------------
- my $last_K_out;
-
- foreach my $line_of_tokens ( @{$rlines} ) {
- my $input_line_number = $line_of_tokens->{_line_number};
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $last_CODE_type = $CODE_type;
- $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
+ my $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless defined($Kfirst);
- ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
- $Klast_old_code = $Klast_old;
-
- # Be sure an old K value is defined for sub $store_token
- $Ktoken_vars = $Kfirst;
- # Check for correct sequence of token indexes...
- # An error here means that sub write_line() did not correctly
- # package the tokenized lines as it received them. If we
- # get a fault here it has not output a continuous sequence
- # of K values. Or a line of CODE may have been mis-marked as
- # something else. There is no good way to continue after such an
- # error.
- # FIXME: Calling Fault will produce zero output; it would be best to
- # find a way to dump the input file.
- if ( defined($last_K_out) ) {
- if ( $Kfirst != $last_K_out + 1 ) {
- Fault(
- "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
- );
+ unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
+ if (DEVEL_MODE) {
+ my $lno = $ix + 1;
+ Fault(<<EOM);
+Did not find side comment near line $lno while deleting side comments
+EOM
}
+ next;
}
- else {
- # The first token should always have been given index 0 by sub
- # write_line()
- if ( $Kfirst != 0 ) {
- Fault("Program Bug: first K is $Kfirst but should be 0");
- }
+ my $delete_side_comment =
+ $rOpts_delete_side_comments
+ && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
+ && (!$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' );
+
+ # Do not delete special control side comments
+ if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
+ $delete_side_comment = 0;
}
- $last_K_out = $Klast;
- # Handle special lines of code
- if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+ if (
+ $rOpts_delete_closing_side_comments
+ && !$delete_side_comment
+ && $Klast > $Kfirst
+ && ( !$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' )
+ )
+ {
+ my $token = $rLL->[$Klast]->[_TOKEN_];
+ my $K_m = $Klast - 1;
+ my $type_m = $rLL->[$K_m]->[_TYPE_];
+ if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
+ my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
+ if ($seqno_m) {
+ my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ if ( $block_type_m
+ && $token =~ /$closing_side_comment_prefix_pattern/
+ && $block_type_m =~ /$closing_side_comment_list_pattern/ )
+ {
+ $delete_side_comment = 1;
+ }
+ }
+ } ## end if ( $rOpts_delete_closing_side_comments...)
- # CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'FS' = Format Skipping - line goes out verbatim, no blanks
- # 'IO' = Indent Only - only indentation may be changed
+ if ($delete_side_comment) {
+
+ # We are actually just changing the side comment to a blank.
+ # This may produce multiple blanks in a row, but sub respace_tokens
+ # will check for this and fix it.
+ $rLL->[$Klast]->[_TYPE_] = 'b';
+ $rLL->[$Klast]->[_TOKEN_] = SPACE;
+
+ # The -io option outputs the line text, so we have to update
+ # the line text so that the comment does not reappear.
+ if ( $CODE_type eq 'IO' ) {
+ my $line = EMPTY_STRING;
+ foreach my $KK ( $Kfirst .. $Klast - 1 ) {
+ $line .= $rLL->[$KK]->[_TOKEN_];
+ }
+ $line =~ s/\s+$//;
+ $line_of_tokens->{_line_text} = $line . "\n";
+ }
+
+ # If we delete a hanging side comment the line becomes blank.
+ if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
+ }
+ }
+ return;
+} ## end sub delete_side_comments
+
+sub dump_verbatim {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $line ( @{$rlines} ) {
+ my $input_line = $line->{_line_text};
+ $self->write_unindented_line($input_line);
+ }
+ return;
+} ## end sub dump_verbatim
+
+my %wU;
+my %wiq;
+my %is_wit;
+my %is_sigil;
+my %is_nonlist_keyword;
+my %is_nonlist_type;
+my %is_s_y_m_slash;
+my %is_unexpected_equals;
+
+BEGIN {
+
+ # added 'U' to fix cases b1125 b1126 b1127
+ my @q = qw(w U);
+ @{wU}{@q} = (1) x scalar(@q);
+
+ @q = qw(w i q Q G C Z);
+ @{wiq}{@q} = (1) x scalar(@q);
+
+ @q = qw(w i t);
+ @{is_wit}{@q} = (1) x scalar(@q);
+
+ @q = qw($ & % * @);
+ @{is_sigil}{@q} = (1) x scalar(@q);
+
+ # Parens following these keywords will not be marked as lists. Note that
+ # 'for' is not included and is handled separately, by including 'f' in the
+ # hash %is_counted_type, since it may or may not be a c-style for loop.
+ @q = qw( if elsif unless and or );
+ @is_nonlist_keyword{@q} = (1) x scalar(@q);
+
+ # Parens following these types will not be marked as lists
+ @q = qw( && || );
+ @is_nonlist_type{@q} = (1) x scalar(@q);
+
+ @q = qw( s y m / );
+ @is_s_y_m_slash{@q} = (1) x scalar(@q);
+
+ @q = qw( = == != );
+ @is_unexpected_equals{@q} = (1) x scalar(@q);
+
+} ## end BEGIN
+
+{ #<<< begin clousure respace_tokens
+
+my $rLL_new; # This will be the new array of tokens
+
+# These are variables in $self
+my $rLL;
+my $length_function;
+my $is_encoded_data;
+
+my $K_closing_ternary;
+my $K_opening_ternary;
+my $rchildren_of_seqno;
+my $rhas_broken_code_block;
+my $rhas_broken_list;
+my $rhas_broken_list_with_lec;
+my $rhas_code_block;
+my $rhas_list;
+my $rhas_ternary;
+my $ris_assigned_structure;
+my $ris_broken_container;
+my $ris_excluded_lp_container;
+my $ris_list_by_seqno;
+my $ris_permanently_broken;
+my $rlec_count_by_seqno;
+my $roverride_cab3;
+my $rparent_of_seqno;
+my $rtype_count_by_seqno;
+my $rblock_type_of_seqno;
+
+my $K_opening_container;
+my $K_closing_container;
+
+my %K_first_here_doc_by_seqno;
+
+my $last_nonblank_code_type;
+my $last_nonblank_code_token;
+my $last_nonblank_block_type;
+my $last_last_nonblank_code_type;
+my $last_last_nonblank_code_token;
+
+my %seqno_stack;
+my %K_old_opening_by_seqno;
+my $depth_next;
+my $depth_next_max;
+
+my $cumulative_length;
+
+# Variables holding the current line info
+my $Ktoken_vars;
+my $Kfirst_old;
+my $Klast_old;
+my $Klast_old_code;
+my $CODE_type;
+
+my $rwhitespace_flags;
+
+sub initialize_respace_tokens_closure {
+
+ my ($self) = @_;
+
+ $rLL_new = []; # This is the new array
+
+ $rLL = $self->[_rLL_];
+ $length_function = $self->[_length_function_];
+ $is_encoded_data = $self->[_is_encoded_data_];
+
+ $K_closing_ternary = $self->[_K_closing_ternary_];
+ $K_opening_ternary = $self->[_K_opening_ternary_];
+ $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
+ $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
+ $rhas_broken_list = $self->[_rhas_broken_list_];
+ $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
+ $rhas_code_block = $self->[_rhas_code_block_];
+ $rhas_list = $self->[_rhas_list_];
+ $rhas_ternary = $self->[_rhas_ternary_];
+ $ris_assigned_structure = $self->[_ris_assigned_structure_];
+ $ris_broken_container = $self->[_ris_broken_container_];
+ $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
+ $roverride_cab3 = $self->[_roverride_cab3_];
+ $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ %K_first_here_doc_by_seqno = ();
+
+ $last_nonblank_code_type = ';';
+ $last_nonblank_code_token = ';';
+ $last_nonblank_block_type = EMPTY_STRING;
+ $last_last_nonblank_code_type = ';';
+ $last_last_nonblank_code_token = ';';
+
+ %seqno_stack = ();
+ %K_old_opening_by_seqno = (); # Note: old K index
+ $depth_next = 0;
+ $depth_next_max = 0;
+
+ # we will be setting token lengths as we go
+ $cumulative_length = 0;
+
+ $Ktoken_vars = undef; # the old K value of $rtoken_vars
+ $Kfirst_old = undef; # min K of old line
+ $Klast_old = undef; # max K of old line
+ $Klast_old_code = undef; # K of last token if side comment
+ $CODE_type = EMPTY_STRING;
+
+ # Set the whitespace flags, which indicate the token spacing preference.
+ $rwhitespace_flags = $self->set_whitespace_flags();
+
+ # Note that $K_opening_container and $K_closing_container have values
+ # defined in sub get_line() for the previous K indexes. They were needed
+ # in case option 'indent-only' was set, and we didn't get here. We no
+ # longer need those and will eliminate them now to avoid any possible
+ # mixing of old and new values. This must be done AFTER the call to
+ # set_whitespace_flags, which needs these.
+ $K_opening_container = $self->[_K_opening_container_] = {};
+ $K_closing_container = $self->[_K_closing_container_] = {};
+
+ return;
+
+} ## end sub initialize_respace_tokens_closure
+
+sub respace_tokens {
+
+ my $self = shift;
+
+ #--------------------------------------------------------------------------
+ # This routine is called once per file to do as much formatting as possible
+ # before new line breaks are set.
+ #--------------------------------------------------------------------------
+
+ # Return parameters:
+ # Set $severe_error=true if processing must terminate immediately
+ my ( $severe_error, $rqw_lines );
+
+ # We change any spaces in --indent-only mode
+ if ( $rOpts->{'indent-only'} ) {
+
+ # We need to define lengths for -indent-only to avoid undefs, even
+ # though these values are not actually needed for option --indent-only.
+
+ $rLL = $self->[_rLL_];
+ $length_function = $self->[_length_function_];
+ $cumulative_length = 0;
+
+ foreach my $item ( @{$rLL} ) {
+ my $token = $item->[_TOKEN_];
+ my $token_length = $length_function->($token);
+ $cumulative_length += $token_length;
+ $item->[_TOKEN_LENGTH_] = $token_length;
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ }
+
+ return ( $severe_error, $rqw_lines );
+ }
+
+ # This routine makes all necessary and possible changes to the tokenization
+ # after the initial tokenization of the file. This is a tedious routine,
+ # but basically it consists of inserting and deleting whitespace between
+ # nonblank tokens according to the selected parameters. In a few cases
+ # non-space characters are added, deleted or modified.
+
+ # The goal of this routine is to create a new token array which only needs
+ # the definition of new line breaks and padding to complete formatting. In
+ # a few cases we have to cheat a little to achieve this goal. In
+ # particular, we may not know if a semicolon will be needed, because it
+ # depends on how the line breaks go. To handle this, we include the
+ # semicolon as a 'phantom' which can be displayed as normal or as an empty
+ # string.
+
+ # Method: The old tokens are copied one-by-one, with changes, from the old
+ # linear storage array $rLL to a new array $rLL_new.
+
+ # (re-)initialize closure variables for this problem
+ $self->initialize_respace_tokens_closure();
+
+ #--------------------------------
+ # Main over all lines of the file
+ #--------------------------------
+ my $rlines = $self->[_rlines_];
+ my $line_type = EMPTY_STRING;
+ my $last_K_out;
+
+ foreach my $line_of_tokens ( @{$rlines} ) {
+
+ my $input_line_number = $line_of_tokens->{_line_number};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ $CODE_type = $line_of_tokens->{_code_type};
+
+ if ( $CODE_type eq 'BL' ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno) ) {
+ $self->[_rblank_and_comment_count_]->{$seqno} += 1;
+ $self->set_permanently_broken($seqno)
+ if (!$ris_permanently_broken->{$seqno}
+ && $rOpts_maximum_consecutive_blank_lines );
+ }
+ }
+
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+ ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
+ $Klast_old_code = $Klast_old;
+
+ # Be sure an old K value is defined for sub store_token
+ $Ktoken_vars = $Kfirst;
+
+ # Check for correct sequence of token indexes...
+ # An error here means that sub write_line() did not correctly
+ # package the tokenized lines as it received them. If we
+ # get a fault here it has not output a continuous sequence
+ # of K values. Or a line of CODE may have been mis-marked as
+ # something else. There is no good way to continue after such an
+ # error.
+ if ( defined($last_K_out) ) {
+ if ( $Kfirst != $last_K_out + 1 ) {
+ Fault_Warn(
+ "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
+ );
+ $severe_error = 1;
+ return ( $severe_error, $rqw_lines );
+ }
+ }
+ else {
+
+ # The first token should always have been given index 0 by sub
+ # write_line()
+ if ( $Kfirst != 0 ) {
+ Fault("Program Bug: first K is $Kfirst but should be 0");
+ }
+ }
+ $last_K_out = $Klast;
+
+ # Handle special lines of code
+ if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+
+ # CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'FS' = Format Skipping - line goes out verbatim, no blanks
+ # 'IO' = Indent Only - only indentation may be changed
# 'NIN' = No Internal Newlines - line does not get broken
# 'HSC'=Hanging Side Comment - fix this hanging side comment
# 'BC'=Block Comment - an ordinary full line comment
# the -extrude and -mangle options.
my $rcopy =
copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING );
- $store_token->($rcopy);
+ $self->store_token($rcopy);
$rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE );
- $store_token->($rcopy);
- $store_token->($rvars_Kfirst);
+ $self->store_token($rcopy);
+ $self->store_token($rvars_Kfirst);
next;
}
else {
}
}
- if ( $CODE_type eq 'BL' ) {
- my $seqno = $seqno_stack{ $depth_next - 1 };
- if ( defined($seqno)
- && !$ris_permanently_broken->{$seqno}
- && $rOpts_maximum_consecutive_blank_lines )
- {
- $set_permanently_broken->($seqno);
- }
- }
-
# Copy tokens unchanged
foreach my $KK ( $Kfirst .. $Klast ) {
$Ktoken_vars = $KK;
- $store_token->( $rLL->[$KK] );
+ $self->store_token( $rLL->[$KK] );
}
next;
}
# if last line was normal CODE.
# Patch for rt #125012: use K_previous_code rather than '_nonblank'
# because comments may disappear.
+ # Note that we must do this even if --noadd-whitespace is set
if ( $last_line_type eq 'CODE' ) {
my $type_next = $rLL->[$Kfirst]->[_TYPE_];
my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
)
)
{
-
- # Copy this first token as blank, but use previous line number
- my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
-
- # The level and ci_level of newly created spaces should be the
- # same as the previous token. Otherwise blinking states can
- # be created if the -lp mode is used. See similar coding in
- # sub 'store_token_and_space'. Fixes cases b1109 b1110.
- $rcopy->[_LEVEL_] =
- $rLL_new->[-1]->[_LEVEL_];
- $rcopy->[_CI_LEVEL_] =
- $rLL_new->[-1]->[_CI_LEVEL_];
-
- $store_token->($rcopy);
+ $self->store_space();
}
}
- #-------------------------------------------------------
- # Loop to copy all tokens on this line, with any changes
- #-------------------------------------------------------
- my $type_sequence;
- foreach my $KK ( $Kfirst .. $Klast ) {
- $Ktoken_vars = $KK;
- $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- my $last_type_sequence = $type_sequence;
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
-
- # Handle a blank space ...
- if ( $type eq 'b' ) {
+ #-----------------------------------------------
+ # Inner loop to respace tokens on a line of code
+ #-----------------------------------------------
- # Delete it if not wanted by whitespace rules
- # or we are deleting all whitespace
- # Note that whitespace flag is a flag indicating whether a
- # white space BEFORE the token is needed
- next if ( $KK >= $Klast ); # skip terminal blank
- my $Knext = $KK + 1;
+ # The inner loop is in a separate sub for clarity
+ $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
- if ($rOpts_freeze_whitespace) {
- $store_token->($rtoken_vars);
- next;
- }
+ } # End line loop
- my $ws = $rwhitespace_flags->[$Knext];
- if ( $ws == -1
- || $rOpts_delete_old_whitespace )
- {
+ # finalize data structures
+ $self->respace_post_loop_ops();
- my $token_next = $rLL->[$Knext]->[_TOKEN_];
- my $type_next = $rLL->[$Knext]->[_TYPE_];
+ # Reset memory to be the new array
+ $self->[_rLL_] = $rLL_new;
+ my $Klimit;
+ if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
+ $self->[_Klimit_] = $Klimit;
- my $do_not_delete = is_essential_whitespace(
- $last_last_nonblank_code_token,
- $last_last_nonblank_code_type,
- $last_nonblank_code_token,
- $last_nonblank_code_type,
- $token_next,
- $type_next,
- );
+ # During development, verify that the new array still looks okay.
+ DEVEL_MODE && $self->check_token_array();
- # Note that repeated blanks will get filtered out here
- next unless ($do_not_delete);
- }
+ # update the token limits of each line
+ ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
- # make it just one character
- $rtoken_vars->[_TOKEN_] = SPACE;
- $store_token->($rtoken_vars);
- next;
- }
+ return ( $severe_error, $rqw_lines );
+} ## end sub respace_tokens
- # Handle a nonblank token...
+sub respace_tokens_inner_loop {
- if ($type_sequence) {
+ my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
- # Insert a tentative missing semicolon if the next token is
- # a closing block brace
- if (
- $type eq '}'
- && $token eq '}'
+ #-----------------------------------------------------------------
+ # Loop to copy all tokens on one line, making any spacing changes,
+ # while also collecting information needed by later subs.
+ #-----------------------------------------------------------------
+ foreach my $KK ( $Kfirst .. $Klast ) {
- # not preceded by a ';'
- && $last_nonblank_code_type ne ';'
+ # TODO: consider eliminating this closure var by passing directly to
+ # store_token following pattern of store_tokens_to_go.
+ $Ktoken_vars = $KK;
- # and this is not a VERSION stmt (is all one line, we
- # are not inserting semicolons on one-line blocks)
- && $CODE_type ne 'VER'
+ my $rtoken_vars = $rLL->[$KK];
+ my $type = $rtoken_vars->[_TYPE_];
- # and we are allowed to add semicolons
- && $rOpts->{'add-semicolons'}
- )
- {
- $add_phantom_semicolon->($KK);
- }
+ # Handle a blank space ...
+ if ( $type eq 'b' ) {
+
+ # Delete it if not wanted by whitespace rules
+ # or we are deleting all whitespace
+ # Note that whitespace flag is a flag indicating whether a
+ # white space BEFORE the token is needed
+ next if ( $KK >= $Klast ); # skip terminal blank
+ my $Knext = $KK + 1;
+
+ if ($rOpts_freeze_whitespace) {
+ $self->store_token($rtoken_vars);
+ next;
}
- # Modify certain tokens here for whitespace
- # The following is not yet done, but could be:
- # sub (x x x)
- # ( $type =~ /^[wit]$/ )
- elsif ( $is_wit{$type} ) {
+ my $ws = $rwhitespace_flags->[$Knext];
+ if ( $ws == -1
+ || $rOpts_delete_old_whitespace )
+ {
- # change '$ var' to '$var' etc
- # change '@ ' to '@'
- # Examples: <<snippets/space1.in>>
- my $ord = ord( substr( $token, 1, 1 ) );
- if (
+ my $token_next = $rLL->[$Knext]->[_TOKEN_];
+ my $type_next = $rLL->[$Knext]->[_TYPE_];
- # quick test for possible blank at second char
- $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
- || $ord > ORD_PRINTABLE_MAX )
- )
- {
- my ( $sigil, $word ) = split /\s+/, $token, 2;
+ my $do_not_delete = is_essential_whitespace(
+ $last_last_nonblank_code_token,
+ $last_last_nonblank_code_type,
+ $last_nonblank_code_token,
+ $last_nonblank_code_type,
+ $token_next,
+ $type_next,
+ );
- # $sigil =~ /^[\$\&\%\*\@]$/ )
- if ( $is_sigil{$sigil} ) {
- $token = $sigil;
- $token .= $word if ( defined($word) ); # fix c104
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
+ # Note that repeated blanks will get filtered out here
+ next unless ($do_not_delete);
+ }
- # Split identifiers with leading arrows, inserting blanks
- # if necessary. It is easier and safer here than in the
- # tokenizer. For example '->new' becomes two tokens, '->'
- # and 'new' with a possible blank between.
- #
- # Note: there is a related patch in sub set_whitespace_flags
- elsif (length($token) > 2
- && substr( $token, 0, 2 ) eq '->'
- && $token =~ /^\-\>(.*)$/
- && $1 )
- {
+ # make it just one character
+ $rtoken_vars->[_TOKEN_] = SPACE;
+ $self->store_token($rtoken_vars);
+ next;
+ }
- my $token_save = $1;
- my $type_save = $type;
+ my $token = $rtoken_vars->[_TOKEN_];
- # Change '-> new' to '->new'
- $token_save =~ s/^\s+//g;
+ # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
+ if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
- # store a blank to left of arrow if necessary
- my $Kprev = $self->K_previous_nonblank($KK);
- if ( defined($Kprev)
- && $rLL->[$Kprev]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace
- && $want_left_space{'->'} == WS_YES )
- {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'b', SPACE );
- $store_token->($rcopy);
- }
+ # One of ) ] } ...
+ if ( $is_closing_token{$token} ) {
- # then store the arrow
- my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
- $store_token->($rcopy);
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
- # store a blank after the arrow if requested
- # added for issue git #33
- if ( $want_right_space{'->'} == WS_YES ) {
- my $rcopy_b =
- copy_token_as_type( $rtoken_vars, 'b', SPACE );
- $store_token->($rcopy_b);
- }
+ #---------------------------------------------
+ # check for semicolon addition in a code block
+ #---------------------------------------------
+ if ($block_type) {
- # then reset the current token to be the remainder,
- # and reset the whitespace flag according to the arrow
- $token = $rtoken_vars->[_TOKEN_] = $token_save;
- $type = $rtoken_vars->[_TYPE_] = $type_save;
- $store_token->($rtoken_vars);
- next;
+ # if not preceded by a ';' ..
+ if ( $last_nonblank_code_type ne ';' ) {
+
+ # tentatively insert a semicolon if appropriate
+ $self->add_phantom_semicolon($KK)
+ if $rOpts->{'add-semicolons'};
+ }
}
- # Trim certain spaces in identifiers
- if ( $type eq 'i' ) {
+ #----------------------------------------------------------
+ # check for addition/deletion of a trailing comma in a list
+ #----------------------------------------------------------
+ else {
- if (
- (
- substr( $token, 0, 3 ) eq 'sub'
- || $rOpts_sub_alias_list
- )
- && $token =~ /$SUB_PATTERN/
- )
+ # if this is a list ..
+ my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
+ if ( $rtype_count
+ && $rtype_count->{','}
+ && !$rtype_count->{';'}
+ && !$rtype_count->{'f'} )
{
- # -spp = 0 : no space before opening prototype paren
- # -spp = 1 : stable (follow input spacing)
- # -spp = 2 : always space before opening prototype paren
- my $spp = $rOpts->{'space-prototype-paren'};
- if ( defined($spp) ) {
- if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
- elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ # if NOT preceded by a comma..
+ if ( $last_nonblank_code_type ne ',' ) {
+
+ # insert a comma if requested
+ if ( $rOpts_add_trailing_commas
+ && %trailing_comma_rules )
+ {
+ $self->add_trailing_comma( $KK, $Kfirst,
+ $trailing_comma_rules{$token} );
+ }
}
- # one space max, and no tabs
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # if preceded by a comma ..
+ else {
- # clean up spaces in package identifiers, like
- # "package Bob::Dog;"
- elsif ( substr( $token, 0, 7 ) eq 'package'
- && $token =~ /^package\s/ )
- {
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
+ # delete a trailing comma if requested
+ my $deleted;
+ if ( $rOpts_delete_trailing_commas
+ && %trailing_comma_rules )
+ {
+ $deleted =
+ $self->delete_trailing_comma( $KK, $Kfirst,
+ $trailing_comma_rules{$token} );
+ }
+
+ # delete a weld-interfering comma if requested
+ if ( !$deleted
+ && $rOpts_delete_weld_interfering_commas
+ && $is_closing_type{
+ $last_last_nonblank_code_type} )
+ {
+ $self->delete_weld_interfering_comma($KK);
+ }
+ }
}
+ }
+ }
+ }
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- my $ord_ch = ord( substr( $token, -1, 1 ) );
- if (
+ # Modify certain tokens here for whitespace
+ # The following is not yet done, but could be:
+ # sub (x x x)
+ # ( $type =~ /^[wit]$/ )
+ elsif ( $is_wit{$type} ) {
- # quick check for possible ending space
- $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
- || $ord_ch > ORD_PRINTABLE_MAX )
- )
+ # change '$ var' to '$var' etc
+ # change '@ ' to '@'
+ # Examples: <<snippets/space1.in>>
+ my $ord = ord( substr( $token, 1, 1 ) );
+ if (
+
+ # quick test for possible blank at second char
+ $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ )
+ {
+ my ( $sigil, $word ) = split /\s+/, $token, 2;
+
+ # $sigil =~ /^[\$\&\%\*\@]$/ )
+ if ( $is_sigil{$sigil} ) {
+ $token = $sigil;
+ $token .= $word if ( defined($word) ); # fix c104
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+
+ # Trim certain spaces in identifiers
+ if ( $type eq 'i' ) {
+
+ if ( $token =~ /$SUB_PATTERN/ ) {
+
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ if ( !defined($rOpts_space_prototype_paren)
+ || $rOpts_space_prototype_paren == 1 )
{
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
+ ## default: stable
+ }
+ elsif ( $rOpts_space_prototype_paren == 0 ) {
+ $token =~ s/\s+\(/\(/;
}
+ elsif ( $rOpts_space_prototype_paren == 2 ) {
+ $token =~ s/\(/ (/;
+ }
+
+ # one space max, and no tabs
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+
+ $self->[_ris_special_identifier_token_]->{$token} = 'sub';
+
}
- }
- # handle semicolons
- elsif ( $type eq ';' ) {
+ # clean up spaces in package identifiers, like
+ # "package Bob::Dog;"
+ elsif ( substr( $token, 0, 7 ) eq 'package'
+ && $token =~ /^package\s/ )
+ {
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+
+ $self->[_ris_special_identifier_token_]->{$token} =
+ 'package';
+
+ }
- # Remove unnecessary semicolons, but not after bare
- # blocks, where it could be unsafe if the brace is
- # mis-tokenized.
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ my $ord_ch = ord( substr( $token, -1, 1 ) );
if (
- $rOpts->{'delete-semicolons'}
- && (
- (
- $last_nonblank_block_type
- && $last_nonblank_code_type eq '}'
- && (
- $is_block_without_semicolon{
- $last_nonblank_block_type}
- || $last_nonblank_block_type =~ /$SUB_PATTERN/
- || $last_nonblank_block_type =~ /^\w+:$/
- )
- )
- || $last_nonblank_code_type eq ';'
- )
+
+ # quick check for possible ending space
+ $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
+ || $ord_ch > ORD_PRINTABLE_MAX )
)
{
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+ }
- # This looks like a deletable semicolon, but even if a
- # semicolon can be deleted it is not necessarily best to do
- # so. We apply these additional rules for deletion:
- # - Always ok to delete a ';' at the end of a line
- # - Never delete a ';' before a '#' because it would
- # promote it to a block comment.
- # - If a semicolon is not at the end of line, then only
- # delete if it is followed by another semicolon or closing
- # token. This includes the comment rule. It may take
- # two passes to get to a final state, but it is a little
- # safer. For example, keep the first semicolon here:
- # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
- # It is not required but adds some clarity.
- my $ok_to_delete = 1;
- if ( $KK < $Klast ) {
- my $Kn = $self->K_next_nonblank($KK);
- if ( defined($Kn) && $Kn <= $Klast ) {
- my $next_nonblank_token_type =
- $rLL->[$Kn]->[_TYPE_];
- $ok_to_delete = $next_nonblank_token_type eq ';'
- || $next_nonblank_token_type eq '}';
- }
- }
+ # handle semicolons
+ elsif ( $type eq ';' ) {
- # do not delete only nonblank token in a file
- else {
- my $Kp = $self->K_previous_code( undef, $rLL_new );
- my $Kn = $self->K_next_nonblank($KK);
- $ok_to_delete = defined($Kn) || defined($Kp);
- }
+ # Remove unnecessary semicolons, but not after bare
+ # blocks, where it could be unsafe if the brace is
+ # mis-tokenized.
+ if (
+ $rOpts->{'delete-semicolons'}
+ && (
+ (
+ $last_nonblank_block_type
+ && $last_nonblank_code_type eq '}'
+ && (
+ $is_block_without_semicolon{
+ $last_nonblank_block_type}
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
+ || $last_nonblank_block_type =~ /^\w+:$/
+ )
+ )
+ || $last_nonblank_code_type eq ';'
+ )
+ )
+ {
- if ($ok_to_delete) {
- $self->note_deleted_semicolon($input_line_number);
- next;
- }
- else {
- write_logfile_entry("Extra ';'\n");
+ # This looks like a deletable semicolon, but even if a
+ # semicolon can be deleted it is not necessarily best to do
+ # so. We apply these additional rules for deletion:
+ # - Always ok to delete a ';' at the end of a line
+ # - Never delete a ';' before a '#' because it would
+ # promote it to a block comment.
+ # - If a semicolon is not at the end of line, then only
+ # delete if it is followed by another semicolon or closing
+ # token. This includes the comment rule. It may take
+ # two passes to get to a final state, but it is a little
+ # safer. For example, keep the first semicolon here:
+ # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
+ # It is not required but adds some clarity.
+ my $ok_to_delete = 1;
+ if ( $KK < $Klast ) {
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) && $Kn <= $Klast ) {
+ my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
+ $ok_to_delete = $next_nonblank_token_type eq ';'
+ || $next_nonblank_token_type eq '}';
}
}
+
+ # do not delete only nonblank token in a file
+ else {
+ my $Kp = $self->K_previous_code( undef, $rLL_new );
+ my $Kn = $self->K_next_nonblank($KK);
+ $ok_to_delete = defined($Kn) || defined($Kp);
+ }
+
+ if ($ok_to_delete) {
+ $self->note_deleted_semicolon($input_line_number);
+ next;
+ }
+ else {
+ write_logfile_entry("Extra ';'\n");
+ }
}
+ }
- # Old patch to add space to something like "x10".
- # Note: This is now done in the Tokenizer, but this code remains
- # for reference.
- elsif ( $type eq 'n' ) {
- if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
- $token =~ s/x/x /;
- $rtoken_vars->[_TOKEN_] = $token;
- if (DEVEL_MODE) {
- Fault(<<EOM);
+ # Old patch to add space to something like "x10".
+ # Note: This is now done in the Tokenizer, but this code remains
+ # for reference.
+ elsif ( $type eq 'n' ) {
+ if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
+ $token =~ s/x/x /;
+ $rtoken_vars->[_TOKEN_] = $token;
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
EOM
- }
}
}
+ }
- # check for a qw quote
- elsif ( $type eq 'q' ) {
-
- # trim blanks from right of qw quotes
- # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
- # this)
- $token =~ s/\s*$//;
- $rtoken_vars->[_TOKEN_] = $token;
- $self->note_embedded_tab($input_line_number)
- if ( $token =~ "\t" );
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
- next;
- } ## end if ( $type eq 'q' )
+ # check for a qw quote
+ elsif ( $type eq 'q' ) {
- # change 'LABEL :' to 'LABEL:'
- elsif ( $type eq 'J' ) {
- $token =~ s/\s+//g;
- $rtoken_vars->[_TOKEN_] = $token;
+ # trim blanks from right of qw quotes
+ # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
+ # this)
+ $token =~ s/\s*$//;
+ $rtoken_vars->[_TOKEN_] = $token;
+ if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
+ $self->note_embedded_tab($input_line_number);
}
-
- # check a quote for problems
- elsif ( $type eq 'Q' ) {
- $check_Q->( $KK, $Kfirst, $input_line_number );
+ if ( $rwhitespace_flags->[$KK] == WS_YES
+ && @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ $self->store_space();
}
+ $self->store_token($rtoken_vars);
+ next;
+ } ## end if ( $type eq 'q' )
- # Store this token with possible previous blank
- if ( $rwhitespace_flags->[$KK] == WS_YES ) {
- $store_token_and_space->( $rtoken_vars, 1 );
+ # delete repeated commas if requested
+ elsif ( $type eq ',' ) {
+ if ( $last_nonblank_code_type eq ','
+ && $rOpts->{'delete-repeated-commas'} )
+ {
+ # Could note this deletion as a possible future update:
+ ## $self->note_deleted_comma($input_line_number);
+ next;
}
- else {
- $store_token->($rtoken_vars);
+
+ # remember input line index of first comma if -wtc is used
+ if (%trailing_comma_rules) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno)
+ && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
+ )
+ {
+ $self->[_rfirst_comma_line_index_]->{$seqno} =
+ $rtoken_vars->[_LINE_INDEX_];
+ }
}
+ }
- } # End token loop
- } # End line loop
+ # change 'LABEL :' to 'LABEL:'
+ elsif ( $type eq 'J' ) {
+ $token =~ s/\s+//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # check a quote for problems
+ elsif ( $type eq 'Q' ) {
+ $self->check_Q( $KK, $Kfirst, $input_line_number )
+ if ( $self->[_save_logfile_] );
+ }
+
+ # Store this token with possible previous blank
+ if ( $rwhitespace_flags->[$KK] == WS_YES
+ && @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ $self->store_space();
+ }
+ $self->store_token($rtoken_vars);
+
+ } # End token loop
+
+ return;
+} ## end sub respace_tokens_inner_loop
+
+sub respace_post_loop_ops {
+
+ my ($self) = @_;
# Walk backwards through the tokens, making forward links to sequence items.
if ( @{$rLL_new} ) {
}
# Find and remember lists by sequence number
+ my %is_C_style_for;
foreach my $seqno ( keys %{$K_opening_container} ) {
my $K_opening = $K_opening_container->{$seqno};
next unless defined($K_opening);
if ($rtype_count) {
my $comma_count = $rtype_count->{','};
my $fat_comma_count = $rtype_count->{'=>'};
- my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
+ my $semicolon_count = $rtype_count->{';'};
+ if ( $rtype_count->{'f'} ) {
+ $semicolon_count += $rtype_count->{'f'};
+ $is_C_style_for{$seqno} = 1;
+ }
# We will define a list to be a container with one or more commas
# and no semicolons. Note that we have included the semicolons
if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
if ( defined($Kp) ) {
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
- if ( $type_p eq 'k' ) {
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
- }
- else {
- $is_list = 0 if ( $is_nonlist_type{$type_p} );
- }
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ $is_list =
+ $type_p eq 'k'
+ ? !$is_nonlist_keyword{$token_p}
+ : !$is_nonlist_type{$type_p};
}
}
}
next unless ( $rtype_count && $rtype_count->{'=>'} );
# override -cab=3 if this contains a sub-list
- if ( $rhas_list->{$seqno} ) {
- $roverride_cab3->{$seqno} = 1;
- }
+ if ( !defined( $roverride_cab3->{$seqno} ) ) {
+ if ( $rhas_list->{$seqno} ) {
+ $roverride_cab3->{$seqno} = 2;
+ }
- # or if this is a sub-list of its parent container
- else {
- my $seqno_parent = $rparent_of_seqno->{$seqno};
- if ( defined($seqno_parent)
- && $ris_list_by_seqno->{$seqno_parent} )
- {
- $roverride_cab3->{$seqno} = 1;
+ # or if this is a sub-list of its parent container
+ else {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ if ( defined($seqno_parent)
+ && $ris_list_by_seqno->{$seqno_parent} )
+ {
+ $roverride_cab3->{$seqno} = 2;
+ }
}
}
}
}
- # Reset memory to be the new array
- $self->[_rLL_] = $rLL_new;
- my $Klimit;
- if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
- $self->[_Klimit_] = $Klimit;
-
- # During development, verify that the new array still looks okay.
- DEVEL_MODE && $self->check_token_array();
+ # Add -ci to C-style for loops (issue c154)
+ # This is much easier to do here than in the tokenizer.
+ foreach my $seqno ( keys %is_C_style_for ) {
+ my $K_opening = $K_opening_container->{$seqno};
+ my $K_closing = $K_closing_container->{$seqno};
+ my $type_last = 'f';
+ for my $KK ( $K_opening + 1 .. $K_closing - 1 ) {
+ $rLL_new->[$KK]->[_CI_LEVEL_] = $type_last eq 'f' ? 0 : 1;
+ my $type = $rLL_new->[$KK]->[_TYPE_];
+ if ( $type ne 'b' && $type ne '#' ) { $type_last = $type }
+ }
+ }
- # reset the token limits of each line
- $self->resync_lines_and_tokens();
+ return;
+} ## end sub respace_post_loop_ops
+sub set_permanently_broken {
+ my ( $self, $seqno ) = @_;
+ while ( defined($seqno) ) {
+ $ris_permanently_broken->{$seqno} = 1;
+ $seqno = $rparent_of_seqno->{$seqno};
+ }
return;
-} ## end sub respace_tokens
+} ## end sub set_permanently_broken
-sub copy_token_as_type {
+sub store_token {
- # This provides a quick way to create a new token by
- # slightly modifying an existing token.
- my ( $rold_token, $type, $token ) = @_;
- if ( $type eq 'b' ) {
- $token = SPACE unless defined($token);
- }
- elsif ( $type eq 'q' ) {
- $token = EMPTY_STRING unless defined($token);
- }
- elsif ( $type eq '->' ) {
- $token = '->' unless defined($token);
- }
- elsif ( $type eq ';' ) {
- $token = ';' unless defined($token);
- }
- else {
+ my ( $self, $item ) = @_;
- # Unexpected type ... this sub will work as long as both $token and
- # $type are defined, but we should catch any unexpected types during
- # development.
- if (DEVEL_MODE) {
- Fault(<<EOM);
-sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
-EOM
- }
- else {
- # shouldn't happen
- }
- }
+ #------------------------------------------
+ # Store one token during respace operations
+ #------------------------------------------
- my @rnew_token = @{$rold_token};
- $rnew_token[_TYPE_] = $type;
- $rnew_token[_TOKEN_] = $token;
- $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
- return \@rnew_token;
-} ## end sub copy_token_as_type
+ # Input parameter:
+ # $item = ref to a token
-sub Debug_dump_tokens {
+ # NOTE: this sub is called once per token so coding efficiency is critical.
- # a debug routine, not normally used
- my ( $self, $msg ) = @_;
- my $rLL = $self->[_rLL_];
- my $nvars = @{$rLL};
- print STDERR "$msg\n";
- print STDERR "ntokens=$nvars\n";
- print STDERR "K\t_TOKEN_\t_TYPE_\n";
- my $K = 0;
+ # The next multiple assignment statements are significantly faster than
+ # doing them one-by-one.
+ my (
- foreach my $item ( @{$rLL} ) {
- print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
- $K++;
- }
- return;
-} ## end sub Debug_dump_tokens
+ $type,
+ $token,
+ $type_sequence,
-sub K_next_code {
- my ( $self, $KK, $rLL ) = @_;
+ ) = @{$item}[
- # return the index K of the next nonblank, non-comment token
- return unless ( defined($KK) && $KK >= 0 );
+ _TYPE_,
+ _TOKEN_,
+ _TYPE_SEQUENCE_,
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
+ ];
- # We seem to have encountered a gap in our array.
- # This shouldn't happen because sub write_line() pushed
- # items into the $rLL array.
- Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ # Set the token length. Later it may be adjusted again if phantom or
+ # ignoring side comment lengths.
+ my $token_length =
+ $is_encoded_data ? $length_function->($token) : length($token);
+
+ # handle blanks
+ if ( $type eq 'b' ) {
+
+ # Do not output consecutive blanks. This situation should have been
+ # prevented earlier, but it is worth checking because later routines
+ # make this assumption.
+ if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
return;
}
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
- && $rLL->[$Knnb]->[_TYPE_] ne '#' )
- {
- return $Knnb;
- }
- $Knnb++;
}
- return;
-} ## end sub K_next_code
-
-sub K_next_nonblank {
- my ( $self, $KK, $rLL ) = @_;
-
- # return the index K of the next nonblank token, or
- # return undef if none
- return unless ( defined($KK) && $KK >= 0 );
- # The third arg allows this routine to be used on any array. This is
- # useful in sub respace_tokens when we are copying tokens from an old $rLL
- # to a new $rLL array. But usually the third arg will not be given and we
- # will just use the $rLL array in $self.
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- return unless ( $Knnb < $Num );
- return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
- return unless ( ++$Knnb < $Num );
- return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+ # handle comments
+ elsif ( $type eq '#' ) {
- # Backup loop. Very unlikely to get here; it means we have neighboring
- # blanks in the token stream.
- $Knnb++;
- while ( $Knnb < $Num ) {
+ # trim comments if necessary
+ my $ord = ord( substr( $token, -1, 1 ) );
+ if (
+ $ord > 0
+ && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ && $token =~ s/\s+$//
+ )
+ {
+ $token_length = $length_function->($token);
+ $item->[_TOKEN_] = $token;
+ }
- # Safety check, this fault shouldn't happen: The $rLL array is the
- # main array of tokens, so all entries should be used. It is
- # initialized in sub write_line, and then re-initialized by sub
- # $store_token() within sub respace_tokens. Tokens are pushed on
- # so there shouldn't be any gaps.
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
- return;
+ # Mark length of side comments as just 1 if sc lengths are ignored
+ if ( $rOpts_ignore_side_comment_lengths
+ && ( !$CODE_type || $CODE_type eq 'HSC' ) )
+ {
+ $token_length = 1;
+ }
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno) ) {
+ $self->[_rblank_and_comment_count_]->{$seqno} += 1
+ if ( $CODE_type eq 'BC' );
+ $self->set_permanently_broken($seqno)
+ if !$ris_permanently_broken->{$seqno};
}
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
- $Knnb++;
}
- return;
-} ## end sub K_next_nonblank
-sub K_previous_code {
+ # handle non-blanks and non-comments
+ else {
- # return the index K of the previous nonblank, non-comment token
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ my $block_type;
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # check for a sequenced item (i.e., container or ?/:)
+ if ($type_sequence) {
- # This fault can be caused by a programming error in which a bad $KK is
- # given. The caller should make the first call with KK_new=undef to
- # avoid this error.
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- ) if (DEVEL_MODE);
- return;
- }
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
- && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
- {
- return $Kpnb;
- }
- $Kpnb--;
- }
- return;
-} ## end sub K_previous_code
+ # This will be the index of this item in the new array
+ my $KK_new = @{$rLL_new};
-sub K_previous_nonblank {
+ if ( $is_opening_token{$token} ) {
- # return index of previous nonblank token before item K;
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ $K_opening_container->{$type_sequence} = $KK_new;
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
- # use the standard array unless given otherwise
- $rLL = $self->[_rLL_] unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # Fix for case b1100: Count a line ending in ', [' as having
+ # a line-ending comma. Otherwise, these commas can be hidden
+ # with something like --opening-square-bracket-right
+ if ( $last_nonblank_code_type eq ','
+ && $Ktoken_vars == $Klast_old_code
+ && $Ktoken_vars > $Kfirst_old )
+ {
+ $rlec_count_by_seqno->{$type_sequence}++;
+ }
- # This fault can be caused by a programming error in which a bad $KK is
- # given. The caller should make the first call with KK_new=undef to
- # avoid this error.
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- ) if (DEVEL_MODE);
- return;
- }
- my $Kpnb = $KK - 1;
- return unless ( $Kpnb >= 0 );
- return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
- return unless ( --$Kpnb >= 0 );
- return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+ if ( $last_nonblank_code_type eq '='
+ || $last_nonblank_code_type eq '=>' )
+ {
+ $ris_assigned_structure->{$type_sequence} =
+ $last_nonblank_code_type;
+ }
- # Backup loop. We should not get here unless some routine
- # slipped repeated blanks into the token stream.
- return unless ( --$Kpnb >= 0 );
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
- $Kpnb--;
- }
- return;
-} ## end sub K_previous_nonblank
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
+ $seqno_stack{$depth_next} = $type_sequence;
+ $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
+ $depth_next++;
-sub parent_seqno_by_K {
+ if ( $depth_next > $depth_next_max ) {
+ $depth_next_max = $depth_next;
+ }
+ }
+ elsif ( $is_closing_token{$token} ) {
- # Return the sequence number of the parent container of token K, if any.
+ $K_closing_container->{$type_sequence} = $KK_new;
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
- my ( $self, $KK ) = @_;
- my $rLL = $self->[_rLL_];
+ # Do not include terminal commas in counts
+ if ( $last_nonblank_code_type eq ','
+ || $last_nonblank_code_type eq '=>' )
+ {
+ $rtype_count_by_seqno->{$type_sequence}
+ ->{$last_nonblank_code_type}--;
- # The task is to jump forward to the next container token
- # and use the sequence number of either it or its parent.
+ if ( $Ktoken_vars == $Kfirst_old
+ && $last_nonblank_code_type eq ','
+ && $rlec_count_by_seqno->{$type_sequence} )
+ {
+ $rlec_count_by_seqno->{$type_sequence}--;
+ }
+ }
- # For example, consider the following with seqno=5 of the '[' and ']'
- # being called with index K of the first token of each line:
+ # Update the stack...
+ $depth_next--;
+ }
+ else {
- # # result
- # push @tests, # -
- # [ # -
- # sub { 99 }, 'do {&{%s} for 1,2}', # 5
- # '(&{})(&{})', undef, # 5
- # [ 2, 2, 0 ], 0 # 5
- # ]; # -
+ # For ternary, note parent but do not include as child
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
- # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
- # unbalanced files, last sequence number will either be undefined or it may
- # be at a deeper level. In either case we will just return SEQ_ROOT to
- # have a defined value and allow formatting to proceed.
- my $parent_seqno = SEQ_ROOT;
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
- $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
- }
- else {
- my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
- if ( defined($Kt) ) {
- $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
- my $type = $rLL->[$Kt]->[_TYPE_];
+ # These are not yet used but could be useful
+ if ( $token eq '?' ) {
+ $K_opening_ternary->{$type_sequence} = $KK_new;
+ }
+ elsif ( $token eq ':' ) {
+ $K_closing_ternary->{$type_sequence} = $KK_new;
+ }
+ else {
- # if next container token is closing, it is the parent seqno
- if ( $is_closing_type{$type} ) {
- $parent_seqno = $type_sequence;
+ # We really shouldn't arrive here, just being cautious:
+ # The only sequenced types output by the tokenizer are the
+ # opening & closing containers and the ternary types. Each
+ # of those was checked above. So we would only get here
+ # if the tokenizer has been changed to mark some other
+ # tokens with sequence numbers.
+ if (DEVEL_MODE) {
+ Fault(
+"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
+ );
+ }
+ }
}
+ }
- # otherwise we want its parent container
- else {
- $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ # Remember the most recent two non-blank, non-comment tokens.
+ # NOTE: the phantom semicolon code may change the output stack
+ # without updating these values. Phantom semicolons are considered
+ # the same as blanks for now, but future needs might change that.
+ # See the related note in sub 'add_phantom_semicolon'.
+ $last_last_nonblank_code_type = $last_nonblank_code_type;
+ $last_last_nonblank_code_token = $last_nonblank_code_token;
+
+ $last_nonblank_code_type = $type;
+ $last_nonblank_code_token = $token;
+ $last_nonblank_block_type = $block_type;
+
+ # count selected types
+ if ( $is_counted_type{$type} ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno) ) {
+ $rtype_count_by_seqno->{$seqno}->{$type}++;
+
+ # Count line-ending commas for -bbx
+ if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
+ $rlec_count_by_seqno->{$seqno}++;
+ }
+
+ # Remember index of first here doc target
+ if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
+ my $KK_new = @{$rLL_new};
+ $K_first_here_doc_by_seqno{$seqno} = $KK_new;
+ }
}
}
}
- $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
- return $parent_seqno;
-} ## end sub parent_seqno_by_K
-sub is_in_block_by_i {
- my ( $self, $i ) = @_;
+ # cumulative length is the length sum including this token
+ $cumulative_length += $token_length;
- # returns true if
- # token at i is contained in a BLOCK
- # or is at root level
- # or there is some kind of error (i.e. unbalanced file)
- # returns false otherwise
- return 1 if ( $i < 0 ); # shouldn't happen, bad call
- my $seqno = $parent_seqno_to_go[$i];
- return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
- return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ $item->[_TOKEN_LENGTH_] = $token_length;
+
+ # For reference, here is how to get the parent sequence number.
+ # This is not used because it is slower than finding it on the fly
+ # in sub parent_seqno_by_K:
+
+ # my $seqno_parent =
+ # $type_sequence && $is_opening_token{$token}
+ # ? $seqno_stack{ $depth_next - 2 }
+ # : $seqno_stack{ $depth_next - 1 };
+ # my $KK = @{$rLL_new};
+ # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
+
+ # and finally, add this item to the new array
+ push @{$rLL_new}, $item;
return;
-} ## end sub is_in_block_by_i
+} ## end sub store_token
-sub is_in_list_by_i {
- my ( $self, $i ) = @_;
+sub store_space {
+ my ($self) = @_;
- # returns true if token at i is contained in a LIST
- # returns false otherwise
- my $seqno = $parent_seqno_to_go[$i];
- return unless ( $seqno && $seqno ne SEQ_ROOT );
- if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
- return 1;
+ # Store a blank space in the new array
+ # - but never start the array with a space
+ # - and never store two consecutive spaces
+ if ( @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b' )
+ {
+ my $ritem = [];
+ $ritem->[_TYPE_] = 'b';
+ $ritem->[_TOKEN_] = SPACE;
+ $ritem->[_TYPE_SEQUENCE_] = EMPTY_STRING;
+
+ $ritem->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+
+ # The level and ci_level of newly created spaces should be the same
+ # as the previous token. Otherwise the coding for the -lp option
+ # can create a blinking state in some rare cases (see b1109, b1110).
+ $ritem->[_LEVEL_] =
+ $rLL_new->[-1]->[_LEVEL_];
+ $ritem->[_CI_LEVEL_] =
+ $rLL_new->[-1]->[_CI_LEVEL_];
+
+ $self->store_token($ritem);
}
+
return;
-} ## end sub is_in_list_by_i
+} ## end sub store_space
-sub is_list_by_K {
+sub add_phantom_semicolon {
- # Return true if token K is in a list
my ( $self, $KK ) = @_;
- my $parent_seqno = $self->parent_seqno_by_K($KK);
- return unless defined($parent_seqno);
- return $self->[_ris_list_by_seqno_]->{$parent_seqno};
-}
+ # The token at old index $KK is a closing block brace, and not preceded
+ # by a semicolon. Before we push it onto the new token list, we may
+ # want to add a phantom semicolon which can be activated if the the
+ # block is broken on output.
-sub is_list_by_seqno {
+ # We are only adding semicolons for certain block types
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ return unless ($type_sequence);
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ return unless ($block_type);
+ return
+ unless ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ );
- # Return true if the immediate contents of a container appears to be a
- # list.
- my ( $self, $seqno ) = @_;
- return unless defined($seqno);
- return $self->[_ris_list_by_seqno_]->{$seqno};
-}
+ # Find the most recent token in the new token list
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) ); # shouldn't happen except for bad input
-sub resync_lines_and_tokens {
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
- my $self = shift;
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines = $self->[_rlines_];
- my @Krange_code_without_comments;
- my @Klast_valign_code;
+ # Do not add a semicolon if...
+ return
+ if (
- # Re-construct the arrays of tokens associated with the original input lines
- # since they have probably changed due to inserting and deleting blanks
- # and a few other tokens.
+ # it would follow a comment (and be isolated)
+ $type_p eq '#'
- # This is the next token and its line index:
- my $Knext = 0;
- my $Kmax = defined($Klimit) ? $Klimit : -1;
+ # it follows a code block ( because they are not always wanted
+ # there and may add clutter)
+ || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
- # Verify that old line indexes are in still order. If this error occurs,
- # check locations where sub 'respace_tokens' creates new tokens (like
- # blank spaces). It must have set a bad old line index.
- if ( DEVEL_MODE && defined($Klimit) ) {
- my $iline = $rLL->[0]->[_LINE_INDEX_];
- foreach my $KK ( 1 .. $Klimit ) {
- my $iline_last = $iline;
- $iline = $rLL->[$KK]->[_LINE_INDEX_];
- if ( $iline < $iline_last ) {
- my $KK_m = $KK - 1;
- my $token_m = $rLL->[$KK_m]->[_TOKEN_];
- my $token = $rLL->[$KK]->[_TOKEN_];
- my $type_m = $rLL->[$KK_m]->[_TYPE_];
- my $type = $rLL->[$KK]->[_TYPE_];
- Fault(<<EOM);
-Line indexes out of order at index K=$KK:
-at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
-at KK =$KK: old line=$iline, type='$type', token='$token',
-EOM
- }
- }
- }
+ # it would follow a label
+ || $type_p eq 'J'
- my $iline = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $iline++;
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type eq 'CODE' ) {
+ # it would be inside a 'format' statement (and cause syntax error)
+ || ( $type_p eq 'k'
+ && $token_p =~ /format/ )
- # Get the old number of tokens on this line
- my $rK_range_old = $line_of_tokens->{_rK_range};
- my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
- my $Kdiff_old = 0;
- if ( defined($Kfirst_old) ) {
- $Kdiff_old = $Klast_old - $Kfirst_old;
- }
+ );
- # Find the range of NEW K indexes for the line:
- # $Kfirst = index of first token on line
- # $Klast = index of last token on line
- my ( $Kfirst, $Klast );
+ # Do not add a semicolon if it would impede a weld with an immediately
+ # following closing token...like this
+ # { ( some code ) }
+ # ^--No semicolon can go here
- my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
+ # look at the previous token... note use of the _NEW rLL array here,
+ # but sequence numbers are invariant.
+ my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
- # Optimization: Although the actual K indexes may be completely
- # changed after respacing, the number of tokens on any given line
- # will often be nearly unchanged. So we will see if we can start
- # our search by guessing that the new line has the same number
- # of tokens as the old line.
- my $Knext_guess = $Knext + $Kdiff_old;
- if ( $Knext_guess > $Knext
- && $Knext_guess < $Kmax
- && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
- {
+ # If it is also a CLOSING token we have to look closer...
+ if (
+ $seqno_inner
+ && $is_closing_token{$token_p}
- # the guess is good, so we can start our search here
- $Knext = $Knext_guess + 1;
- }
+ # we only need to look if there is just one inner container..
+ && defined( $rchildren_of_seqno->{$type_sequence} )
+ && @{ $rchildren_of_seqno->{$type_sequence} } == 1
+ )
+ {
- while ($Knext <= $Kmax
- && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
- {
- $Knext++;
- }
+ # Go back and see if the corresponding two OPENING tokens are also
+ # together. Note that we are using the OLD K indexing here:
+ my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
+ if ( defined($K_outer_opening) ) {
+ my $K_nxt = $self->K_next_nonblank($K_outer_opening);
+ if ( defined($K_nxt) ) {
+ my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
- if ( $Knext > $Knext_beg ) {
+ # Is the next token after the outer opening the same as
+ # our inner closing (i.e. same sequence number)?
+ # If so, do not insert a semicolon here.
+ return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
+ }
+ }
+ }
- $Klast = $Knext - 1;
+ # We will insert an empty semicolon here as a placeholder. Later, if
+ # it becomes the last token on a line, we will bring it to life. The
+ # advantage of doing this is that (1) we just have to check line
+ # endings, and (2) the phantom semicolon has zero width and therefore
+ # won't cause needless breaks of one-line blocks.
+ my $Ktop = -1;
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
+ && $want_left_space{';'} == WS_NO )
+ {
- # Delete any terminal blank token
- if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
+ # convert the blank into a semicolon..
+ # be careful: we are working on the new stack top
+ # on a token which has been stored.
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
- if ( $Klast < $Knext_beg ) {
- $Klast = undef;
- }
- else {
+ # Convert the existing blank to:
+ # a phantom semicolon for one_line_block option = 0 or 1
+ # a real semicolon for one_line_block option = 2
+ my $tok = EMPTY_STRING;
+ my $len_tok = 0;
+ if ( $rOpts_one_line_block_semicolons == 2 ) {
+ $tok = ';';
+ $len_tok = 1;
+ }
- $Kfirst = $Knext_beg;
+ $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
+ $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
+ $rLL_new->[$Ktop]->[_TYPE_] = ';';
- # Save ranges of non-comment code. This will be used by
- # sub keep_old_line_breaks.
- if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
- push @Krange_code_without_comments, [ $Kfirst, $Klast ];
- }
+ $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
- # Only save ending K indexes of code types which are blank
- # or 'VER'. These will be used for a convergence check.
- # See related code in sub 'convey_batch_to_vertical_aligner'
- my $CODE_type = $line_of_tokens->{_code_type};
- if ( !$CODE_type
- || $CODE_type eq 'VER' )
- {
- push @Klast_valign_code, $Klast;
- }
- }
- }
+ # NOTE: we are changing the output stack without updating variables
+ # $last_nonblank_code_type, etc. Future needs might require that
+ # those variables be updated here. For now, it seems ok to skip
+ # this.
- # It is only safe to trim the actual line text if the input
- # line had a terminal blank token. Otherwise, we may be
- # in a quote.
- if ( $line_of_tokens->{_ended_in_blank_token} ) {
- $line_of_tokens->{_line_text} =~ s/\s+$//;
- }
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+ # Then store a new blank
+ $self->store_token($rcopy);
+ }
+ else {
- # Deleting semicolons can create new empty code lines
- # which should be marked as blank
- if ( !defined($Kfirst) ) {
- my $CODE_type = $line_of_tokens->{_code_type};
- if ( !$CODE_type ) {
- $line_of_tokens->{_code_type} = 'BL';
- }
+ # Patch for issue c078: keep line indexes in order. If the top
+ # token is a space that we are keeping (due to '-wls=';') then
+ # we have to check that old line indexes stay in order.
+ # In very rare
+ # instances in which side comments have been deleted and converted
+ # into blanks, we may have filtered down multiple blanks into just
+ # one. In that case the top blank may have a higher line number
+ # than the previous nonblank token. Although the line indexes of
+ # blanks are not really significant, we need to keep them in order
+ # in order to pass error checks.
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
+ my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
+ my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
+ if ( $new_top_ix < $old_top_ix ) {
+ $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
}
}
+
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
+ $self->store_token($rcopy);
}
+ return;
+} ## end sub add_phantom_semicolon
- # There shouldn't be any nodes beyond the last one. This routine is
- # relinking lines and tokens after the tokens have been respaced. A fault
- # here indicates some kind of bug has been introduced into the above loops.
- # There is not good way to keep going; we better stop here.
- # FIXME: This will produce zero output. it would be best to find a way to
- # dump the input file.
- if ( $Knext <= $Kmax ) {
+sub add_trailing_comma {
- Fault("unexpected tokens at end of file when reconstructing lines");
- }
- $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
+ # Implement the --add-trailing-commas flag to the line end before index $KK:
- # Setup the convergence test in the FileWriter based on line-ending indexes
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->setup_convergence_test( \@Klast_valign_code );
+ my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
- # Mark essential old breakpoints if combination -iob -lp is used. These
- # two options do not work well together, but we can avoid turning -iob off
- # by ignoring -iob at certain essential line breaks.
- # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
- if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
- my %is_assignment_or_fat_comma = %is_assignment;
- $is_assignment_or_fat_comma{'=>'} = 1;
- my $ris_essential_old_breakpoint =
- $self->[_ris_essential_old_breakpoint_];
- my ( $Kfirst, $Klast );
- foreach my $line_of_tokens ( @{$rlines} ) {
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type ne 'CODE' ) {
- ( $Kfirst, $Klast ) = ( undef, undef );
- next;
- }
- my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
- ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
+ # Input parameter:
+ # $KK = index of closing token in old ($rLL) token list
+ # which starts a new line and is not preceded by a comma
+ # $Kfirst = index of first token on the current line of input tokens
+ # $add_flags = user control flags
- next unless defined($Klast_prev);
- next unless defined($Kfirst);
- my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
- my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
- next
- unless ( $is_assignment_or_fat_comma{$type_last}
- || $is_assignment_or_fat_comma{$type_first} );
- $ris_essential_old_breakpoint->{$Klast_prev} = 1;
- }
- }
- return;
-} ## end sub resync_lines_and_tokens
+ # For example, we might want to add a comma here:
-sub keep_old_line_breaks {
+ # bless {
+ # _name => $name,
+ # _price => $price,
+ # _rebate => $rebate <------ location of possible bare comma
+ # }, $pkg;
+ # ^-------------------closing token at index $KK on new line
- # Called once per file to find and mark any old line breaks which
- # should be kept. We will be translating the input hashes into
- # token indexes.
+ # Do not add a comma if it would follow a comment
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ return if ( $type_p eq '#' );
- # A flag is set as follows:
- # = 1 make a hard break (flush the current batch)
- # best for something like leading commas (-kbb=',')
- # = 2 make a soft break (keep building current batch)
- # best for something like leading ->
+ # see if the user wants a trailing comma here
+ my $match =
+ $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
+ $trailing_comma_rule, 1 );
- my ($self) = @_;
+ # if so, add a comma
+ if ($match) {
+ my $Knew = $self->store_new_token( ',', ',', $Kp );
+ }
- my $rLL = $self->[_rLL_];
- my $rKrange_code_without_comments =
- $self->[_rKrange_code_without_comments_];
- my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
- my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
- my $rwant_container_open = $self->[_rwant_container_open_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ return;
- # This code moved here from sub break_lists to fix b1120
- if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
- foreach my $item ( @{$rKrange_code_without_comments} ) {
- my ( $Kfirst, $Klast ) = @{$item};
- my $type = $rLL->[$Kfirst]->[_TYPE_];
- my $token = $rLL->[$Kfirst]->[_TOKEN_];
+} ## end sub add_trailing_comma
- # leading '->' use a value of 2 which causes a soft
- # break rather than a hard break
- if ( $type eq '->' ) {
- $rbreak_before_Kfirst->{$Kfirst} = 2;
- }
+sub delete_trailing_comma {
- # leading ')->' use a special flag to insure that both
- # opening and closing parens get opened
- # Fix for b1120: only for parens, not braces
- elsif ( $token eq ')' ) {
- my $Kn = $self->K_next_nonblank($Kfirst);
- next
- unless ( defined($Kn)
- && $Kn <= $Klast
- && $rLL->[$Kn]->[_TYPE_] eq '->' );
- my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
- next unless ($seqno);
+ my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_;
- # Note: in previous versions there was a fix here to avoid
- # instability between conflicting -bom and -pvt or -pvtc flags.
- # The fix skipped -bom for a small line difference. But this
- # was troublesome, and instead the fix has been moved to
- # sub set_vertical_tightness_flags where priority is given to
- # the -bom flag over -pvt and -pvtc flags. Both opening and
- # closing paren flags are involved because even though -bom only
- # requests breaking before the closing paren, automated logic
- # opens the opening paren when the closing paren opens.
- # Relevant cases are b977, b1215, b1270, b1303
+ # Apply the --delete-trailing-commas flag to the comma before index $KK
- $rwant_container_open->{$seqno} = 1;
- }
- }
- }
+ # Input parameter:
+ # $KK = index of a closing token in OLD ($rLL) token list
+ # which is preceded by a comma on the same line.
+ # $Kfirst = index of first token on the current line of input tokens
+ # $delete_option = user control flag
- return unless ( %keep_break_before_type || %keep_break_after_type );
+ # Returns true if the comma was deleted
- my $check_for_break = sub {
- my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ # For example, we might want to delete this comma:
+ # my @asset = ("FASMX", "FASGX", "FASIX",);
+ # | |^--------token at index $KK
+ # | ^------comma of interest
+ # ^-------------token at $Kfirst
- # non-container tokens use the type as the key
- if ( !$seqno ) {
- my $type = $rLL->[$KK]->[_TYPE_];
- if ( $rkeep_break_hash->{$type} ) {
- $rbreak_hash->{$KK} = 1;
- }
- }
+ # Verify that the previous token is a comma. Note that we are working in
+ # the new token list $rLL_new.
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
- # container tokens use the token as the key
- else {
- my $token = $rLL->[$KK]->[_TOKEN_];
- my $flag = $rkeep_break_hash->{$token};
- if ($flag) {
+ # there must be a '#' between the ',' and closing token; give up.
+ return;
+ }
+
+ # Do not delete commas when formatting under stress to avoid instability.
+ # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
+ # been found to work well for trailing commas.
+ if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
+ return;
+ }
- my $match = $flag eq '1' || $flag eq '*';
+ # See if the user wants this trailing comma
+ my $match =
+ $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
+ $trailing_comma_rule, 0 );
- # check for special matching codes
- if ( !$match ) {
- if ( $token eq '(' || $token eq ')' ) {
- $match = $self->match_paren_flag( $KK, $flag );
- }
- elsif ( $token eq '{' || $token eq '}' ) {
-
- # These tentative codes 'b' and 'B' for brace types are
- # placeholders for possible future brace types. They
- # are not documented and may be changed.
- my $block_type =
- $self->[_rblock_type_of_seqno_]->{$seqno};
- if ( $flag eq 'b' ) { $match = $block_type }
- elsif ( $flag eq 'B' ) { $match = !$block_type }
- else {
- # unknown code - no match
- }
- }
- }
- $rbreak_hash->{$KK} = 1 if ($match);
- }
+ # Patch: the --noadd-whitespace flag can cause instability in complex
+ # structures. In this case do not delete the comma. Fixes b1409.
+ if ( !$match && !$rOpts_add_whitespace ) {
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) ) {
+ my $type_n = $rLL->[$Kn]->[_TYPE_];
+ if ( $type_n ne ';' && $type_n ne '#' ) { return }
}
- };
+ }
- foreach my $item ( @{$rKrange_code_without_comments} ) {
- my ( $Kfirst, $Klast ) = @{$item};
- $check_for_break->(
- $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
- );
- $check_for_break->(
- $Klast, \%keep_break_after_type, $rbreak_after_Klast
- );
+ # If no match, delete it
+ if ( !$match ) {
+
+ return $self->unstore_last_nonblank_token(',');
}
return;
-} ## end sub keep_old_line_breaks
-sub weld_containers {
+} ## end sub delete_trailing_comma
- # Called once per file to do any welding operations requested by --weld*
- # flags.
- my ($self) = @_;
+sub delete_weld_interfering_comma {
- # This count is used to eliminate needless calls for weld checks elsewhere
- $total_weld_count = 0;
+ my ( $self, $KK ) = @_;
- return if ( $rOpts->{'indent-only'} );
- return unless ($rOpts_add_newlines);
+ # Apply the flag '--delete-weld-interfering-commas' to the comma
+ # before index $KK
- # Important: sub 'weld_cuddled_blocks' must be called before
- # sub 'weld_nested_containers'. This is because the cuddled option needs to
- # use the original _LEVEL_ values of containers, but the weld nested
- # containers changes _LEVEL_ of welded containers.
+ # Input parameter:
+ # $KK = index of a closing token in OLD ($rLL) token list
+ # which is preceded by a comma on the same line.
- # Here is a good test case to be sure that both cuddling and welding
- # are working and not interfering with each other: <<snippets/ce_wn1.in>>
+ # Returns true if the comma was deleted
- # perltidy -wn -ce
+ # For example, we might want to delete this comma:
- # if ($BOLD_MATH) { (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # ) } else { (
- # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
- # $after
- # ) }
+ # my $tmpl = { foo => {no_override => 1, default => 42}, };
+ # || ^------$KK
+ # |^---$Kp
+ # $Kpp---^
+ #
+ # Note that:
+ # index $KK is in the old $rLL array, but
+ # indexes $Kp and $Kpp are in the new $rLL_new array.
- $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ return unless ($type_sequence);
- if ( $rOpts->{'weld-nested-containers'} ) {
+ # Find the previous token and verify that it is a comma.
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
- $self->weld_nested_containers();
+ # it is not a comma, so give up ( it is probably a '#' )
+ return;
+ }
- $self->weld_nested_quotes();
+ # This must be the only comma in this list
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
+ return
+ unless ( defined($rtype_count)
+ && $rtype_count->{','}
+ && $rtype_count->{','} == 1 );
+
+ # Back up to the previous closing token
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ return unless ( defined($Kpp) );
+ my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
+ my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
+
+ # The containers must be nesting (i.e., sequence numbers must differ by 1 )
+ if ( $seqno_pp && $is_closing_type{$type_pp} ) {
+ if ( $seqno_pp == $type_sequence + 1 ) {
+
+ # remove the ',' from the top of the new token list
+ return $self->unstore_last_nonblank_token(',');
+ }
}
+ return;
- #-------------------------------------------------------------
- # All welding is done. Finish setting up weld data structures.
- #-------------------------------------------------------------
+} ## end sub delete_weld_interfering_comma
- my $rLL = $self->[_rLL_];
- my $rK_weld_left = $self->[_rK_weld_left_];
- my $rK_weld_right = $self->[_rK_weld_right_];
- my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
+sub unstore_last_nonblank_token {
- my @K_multi_weld;
- my @keys = keys %{$rK_weld_right};
- $total_weld_count = @keys;
+ my ( $self, $type ) = @_;
- # First pass to process binary welds.
- # This loop is processed in unsorted order for efficiency.
- foreach my $Kstart (@keys) {
- my $Kend = $rK_weld_right->{$Kstart};
+ # remove the most recent nonblank token from the new token list
+ # Input parameter:
+ # $type = type to be removed (for safety check)
- # An error here would be due to an incorrect initialization introduced
- # in one of the above weld routines, like sub weld_nested.
- if ( $Kend <= $Kstart ) {
- Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
- if (DEVEL_MODE);
- next;
- }
+ # Returns true if success
+ # false if error
- # Set weld values for all tokens this welded pair
- foreach ( $Kstart + 1 .. $Kend ) {
- $rK_weld_left->{$_} = $Kstart;
- }
- foreach my $Kx ( $Kstart .. $Kend - 1 ) {
- $rK_weld_right->{$Kx} = $Kend;
- $rweld_len_right_at_K->{$Kx} =
- $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
- }
+ # This was written and is used for removing commas, but might
+ # be useful for other tokens. If it is ever used for other tokens
+ # then the issue of what to do about the other variables, such
+ # as token counts and the '$last...' vars needs to be considered.
- # Remember the leftmost index of welds which continue to the right
- if ( defined( $rK_weld_right->{$Kend} )
- && !defined( $rK_weld_left->{$Kstart} ) )
- {
- push @K_multi_weld, $Kstart;
- }
+ # Safety check, shouldn't happen
+ if ( @{$rLL_new} < 3 ) {
+ DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
+ return;
}
- # Second pass to process chains of welds (these are rare).
- # This has to be processed in sorted order.
- if (@K_multi_weld) {
- my $Kend = -1;
- foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
+ my ( $rcomma, $rblank );
- # Skip any interior K which was originally missing a left link
- next if ( $Kstart <= $Kend );
+ # case 1: pop comma from top of stack
+ if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
+ $rcomma = pop @{$rLL_new};
+ }
- # Find the end of this chain
- $Kend = $rK_weld_right->{$Kstart};
- my $Knext = $rK_weld_right->{$Kend};
- while ( defined($Knext) ) {
- $Kend = $Knext;
- $Knext = $rK_weld_right->{$Kend};
- }
+ # case 2: pop blank and then comma from top of stack
+ elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
+ && $rLL_new->[-2]->[_TYPE_] eq $type )
+ {
+ $rblank = pop @{$rLL_new};
+ $rcomma = pop @{$rLL_new};
+ }
- # Set weld values this chain
- foreach ( $Kstart + 1 .. $Kend ) {
- $rK_weld_left->{$_} = $Kstart;
- }
- foreach my $Kx ( $Kstart .. $Kend - 1 ) {
- $rK_weld_right->{$Kx} = $Kend;
- $rweld_len_right_at_K->{$Kx} =
- $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
- }
+ # case 3: error, shouldn't happen unless bad call
+ else {
+ DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
+ return;
+ }
+
+ # A note on updating vars set by sub store_token for this comma: If we
+ # reduce the comma count by 1 then we also have to change the variable
+ # $last_nonblank_code_type to be $last_last_nonblank_code_type because
+ # otherwise sub store_token is going to ALSO reduce the comma count.
+ # Alternatively, we can leave the count alone and the
+ # $last_nonblank_code_type alone. Then sub store_token will produce
+ # the correct result. This is simpler and is done here.
+
+ # Now add a blank space after the comma if appropriate.
+ # Some unusual spacing controls might need another iteration to
+ # reach a final state.
+ if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
+ if ( defined($rblank) ) {
+ $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma
+ push @{$rLL_new}, $rblank;
}
}
+ return 1;
+} ## end sub unstore_last_nonblank_token
+
+sub match_trailing_comma_rule {
+
+ my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_;
+
+ # Decide if a trailing comma rule is matched.
+
+ # Input parameter:
+ # $KK = index of closing token in old ($rLL) token list which follows
+ # the location of a possible trailing comma. See diagram below.
+ # $Kfirst = (old) index of first token on the current line of input tokens
+ # $Kp = index of previous nonblank token in new ($rLL_new) array
+ # $trailing_comma_rule = packed user control flags
+ # $if_add = true if adding comma, false if deleteing comma
+
+ # Returns:
+ # false if no match
+ # true if match
+
+ # For example, we might be checking for addition of a comma here:
+
+ # bless {
+ # _name => $name,
+ # _price => $price,
+ # _rebate => $rebate <------ location of possible trailing comma
+ # }, $pkg;
+ # ^-------------------closing token at index $KK
+
+ return unless ($trailing_comma_rule);
+ my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule};
+
+ # List of $trailing_comma_style values:
+ # undef stable: do not change
+ # '0' : no list should have a trailing comma
+ # '1' or '*' : every list should have a trailing comma
+ # 'm' a multi-line list should have a trailing commas
+ # 'b' trailing commas should be 'bare' (comma followed by newline)
+ # 'h' lists of key=>value pairs with a bare trailing comma
+ # 'i' same as s=h but also include any list with no more than about one
+ # comma per line
+ # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
+
+ # Note: an interesting generalization would be to let an upper case
+ # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
+ # be useful for undoing operations. It would be implemented as a wrapper
+ # around this routine.
+
+ #-----------------------------------------
+ # No style defined : do not add or delete
+ #-----------------------------------------
+ if ( !defined($trailing_comma_style) ) { return !$if_add }
+
+ #----------------------------------------
+ # Set some flags describing this location
+ #----------------------------------------
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ return unless ($type_sequence);
+ my $closing_token = $rLL->[$KK]->[_TOKEN_];
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
+ return unless ( defined($rtype_count) && $rtype_count->{','} );
+ my $is_permanently_broken =
+ $self->[_ris_permanently_broken_]->{$type_sequence};
+
+ # Note that _ris_broken_container_ also stores the line diff
+ # but it is not available at this early stage.
+ my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
+ return if ( !defined($K_opening) );
+
+ # multiline definition 1: opening and closing tokens on different lines
+ my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
+ my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
+ my $line_diff_containers = $iline_c - $iline_o;
+ my $has_multiline_containers = $line_diff_containers > 0;
+
+ # multiline definition 2: first and last commas on different lines
+ my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence};
+ my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_];
+ my $has_multiline_commas;
+ my $line_diff_commas = 0;
+ if ( !defined($iline_first) ) {
+
+ # shouldn't happen if caller checked comma count
+ my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
+ Fault(
+"at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
+ ) if (DEVEL_MODE);
+ }
+ else {
+ $line_diff_commas = $iline_last - $iline_first;
+ $has_multiline_commas = $line_diff_commas > 0;
+ }
- return;
-} ## end sub weld_containers
+ # To avoid instability in edge cases, when adding commas we uses the
+ # multiline_commas definition, but when deleting we use multiline
+ # containers. This fixes b1384, b1396, b1397, b1398, b1400.
+ my $is_multiline =
+ $if_add ? $has_multiline_commas : $has_multiline_containers;
-sub cumulative_length_before_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->[_rLL_];
- return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
-}
+ my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst;
-sub weld_cuddled_blocks {
- my ($self) = @_;
+ my $match;
- # Called once per file to handle cuddled formatting
+ #----------------------------
+ # 0 : does not match any list
+ #----------------------------
+ if ( $trailing_comma_style eq '0' ) {
+ $match = 0;
+ }
- my $rK_weld_left = $self->[_rK_weld_left_];
- my $rK_weld_right = $self->[_rK_weld_right_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ #------------------------------
+ # '*' or '1' : matches any list
+ #------------------------------
+ elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
+ $match = 1;
+ }
- # This routine implements the -cb flag by finding the appropriate
- # closing and opening block braces and welding them together.
- return unless ( %{$rcuddled_block_types} );
+ #-----------------------------
+ # 'm' matches a Multiline list
+ #-----------------------------
+ elsif ( $trailing_comma_style eq 'm' ) {
+ $match = $is_multiline;
+ }
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $rbreak_container = $self->[_rbreak_container_];
+ #----------------------------------
+ # 'b' matches a Bare trailing comma
+ #----------------------------------
+ elsif ( $trailing_comma_style eq 'b' ) {
+ $match = $is_bare_multiline_comma;
+ }
+
+ #--------------------------------------------------------------------------
+ # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
+ # 'i' matches a bare stable list with about 1 comma per line.
+ #--------------------------------------------------------------------------
+ elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
+
+ # We can treat these together because they are similar.
+ # The set of 'i' matches includes the set of 'h' matches.
+
+ # the trailing comma must be bare for both 'h' and 'i'
+ return if ( !$is_bare_multiline_comma );
+
+ # There must be no more than one comma per line for both 'h' and 'i'
+ # The new_comma_count here will include the trailing comma.
+ my $new_comma_count = $rtype_count->{','};
+ $new_comma_count += 1 if ($if_add);
+ my $excess_commas = $new_comma_count - $line_diff_commas - 1;
+ if ( $excess_commas > 0 ) {
+
+ # Exception for a special edge case for option 'i': if the trailing
+ # comma is followed by a blank line or comment, then it cannot be
+ # covered. Then we can safely accept a small list to avoid
+ # instability (issue b1443).
+ if ( $trailing_comma_style eq 'i'
+ && $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1
+ && $new_comma_count <= 2 )
+ {
+ $match = 1;
+ }
+ else {
+ return;
+ }
+ }
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
+ # a list of key=>value pairs with at least 2 fat commas is a match
+ # for both 'h' and 'i'
+ my $fat_comma_count = $rtype_count->{'=>'};
+ if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) {
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # comma count (including trailer) and fat comma count must differ by
+ # by no more than 1. This allows for some small variations.
+ my $comma_diff = $new_comma_count - $fat_comma_count;
+ $match = ( $comma_diff >= -1 && $comma_diff <= 1 );
+ }
- my $is_broken_block = sub {
+ # For 'i' only, a list that can be shown to be stable is a match
+ if ( !$match && $trailing_comma_style eq 'i' ) {
+ $match = (
+ $is_permanently_broken
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && !$rOpts_ignore_old_breakpoints )
+ );
+ }
+ }
- # a block is broken if the input line numbers of the braces differ
- # we can only cuddle between broken blocks
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ #-------------------------------------------------------------------------
+ # Unrecognized parameter. This should have been caught in the input check.
+ #-------------------------------------------------------------------------
+ else {
- # A stack to remember open chains at all levels: This is a hash rather than
- # an array for safety because negative levels can occur in files with
- # errors. This allows us to keep processing with negative levels.
- # $in_chain{$level} = [$chain_type, $type_sequence];
- my %in_chain;
- my $CBO = $rOpts->{'cuddled-break-option'};
+ DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
- # loop over structure items to find cuddled pairs
- my $level = 0;
- my $KNEXT = $self->[_K_first_seq_item_];
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
+ # do not add or delete
+ return !$if_add;
+ }
- # A fault here implies that an error was made in the little loop at
- # the bottom of sub 'respace_tokens' which set the values of
- # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
- # loop control lines above.
- Fault("sequence = $type_sequence not defined at K=$KK")
- if (DEVEL_MODE);
- next;
- }
+ # Now do any special paren check
+ if ( $match
+ && $paren_flag
+ && $paren_flag ne '1'
+ && $paren_flag ne '*'
+ && $closing_token eq ')' )
+ {
+ $match &&=
+ $self->match_paren_control_flag( $type_sequence, $paren_flag,
+ $rLL_new );
+ }
- # NOTE: we must use the original levels here. They can get changed
- # by sub 'weld_nested_containers', so this routine must be called
- # before sub 'weld_nested_containers'.
- my $last_level = $level;
- $level = $rtoken_vars->[_LEVEL_];
+ # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
+ # for use by -vtc logic to avoid instability when -dtc and -atc are both
+ # active.
+ if ($match) {
+ if ( $if_add && $rOpts_delete_trailing_commas
+ || !$if_add && $rOpts_add_trailing_commas )
+ {
+ $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
- if ( $level < $last_level ) { $in_chain{$last_level} = undef }
- elsif ( $level > $last_level ) { $in_chain{$level} = undef }
+ # The combination of -atc and -dtc and -cab=3 can be unstable
+ # (b1394). So we deactivate -cab=3 in this case.
+ # A value of '0' or '4' is required for stability of case b1451.
+ if ( $rOpts_comma_arrow_breakpoints == 3 ) {
+ $self->[_roverride_cab3_]->{$type_sequence} = 0;
+ }
+ }
+ }
+ return $match;
+} ## end sub match_trailing_comma_rule
- # We are only looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
+sub store_new_token {
- if ( $token eq '{' ) {
+ my ( $self, $type, $token, $Kp ) = @_;
- my $block_type = $rblock_type_of_seqno->{$type_sequence};
- if ( !$block_type ) {
+ # Create and insert a completely new token into the output stream
- # patch for unrecognized block types which may not be labeled
- my $Kp = $self->K_previous_nonblank($KK);
- while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
- $Kp = $self->K_previous_nonblank($Kp);
- }
- next unless $Kp;
- $block_type = $rLL->[$Kp]->[_TOKEN_];
- }
- if ( $in_chain{$level} ) {
+ # Input parameters:
+ # $type = the token type
+ # $token = the token text
+ # $Kp = index of the previous token in the new list, $rLL_new
- # we are in a chain and are at an opening block brace.
- # See if we are welding this opening brace with the previous
- # block brace. Get their identification numbers:
- my $closing_seqno = $in_chain{$level}->[1];
- my $opening_seqno = $type_sequence;
+ # Returns:
+ # $Knew = index in $rLL_new of the new token
- # The preceding block must be on multiple lines so that its
- # closing brace will start a new line.
- if ( !$is_broken_block->($closing_seqno) ) {
- next unless ( $CBO == 2 );
- $rbreak_container->{$closing_seqno} = 1;
- }
+ # This operation is a little tricky because we are creating a new token and
+ # we have to take care to follow the requested whitespace rules.
- # we will let the trailing block be either broken or intact
- ## && $is_broken_block->($opening_seqno);
+ my $Ktop = @{$rLL_new} - 1;
+ my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
+ my $Knew;
+ if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
- # We can weld the closing brace to its following word ..
- my $Ko = $K_closing_container->{$closing_seqno};
- my $Kon;
- if ( defined($Ko) ) {
- $Kon = $self->K_next_nonblank($Ko);
- }
+ #----------------------------------------------------
+ # Method 1: Convert the top blank into the new token.
+ #----------------------------------------------------
- # ..unless it is a comment
- if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
+ # Be Careful: we are working on the top of the new stack, on a token
+ # which has been stored.
- # OK to weld these two tokens...
- $rK_weld_right->{$Ko} = $Kon;
- $rK_weld_left->{$Kon} = $Ko;
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
- # Set flag that we want to break the next container
- # so that the cuddled line is balanced.
- $rbreak_container->{$opening_seqno} = 1
- if ($CBO);
- }
+ $Knew = $Ktop;
+ $rLL_new->[$Knew]->[_TOKEN_] = $token;
+ $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token);
+ $rLL_new->[$Knew]->[_TYPE_] = $type;
- }
- else {
+ # NOTE: we are changing the output stack without updating variables
+ # $last_nonblank_code_type, etc. Future needs might require that
+ # those variables be updated here. For now, we just update the
+ # type counts as necessary.
- # We are not in a chain. Start a new chain if we see the
- # starting block type.
- if ( $rcuddled_block_types->{$block_type} ) {
- $in_chain{$level} = [ $block_type, $type_sequence ];
- }
- else {
- $block_type = '*';
- $in_chain{$level} = [ $block_type, $type_sequence ];
- }
+ if ( $is_counted_type{$type} ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ($seqno) {
+ $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
}
}
- elsif ( $token eq '}' ) {
- if ( $in_chain{$level} ) {
- # We are in a chain at a closing brace. See if this chain
- # continues..
- my $Knn = $self->K_next_code($KK);
- next unless $Knn;
+ # Then store a new blank
+ $self->store_token($rcopy);
+ }
+ else {
- my $chain_type = $in_chain{$level}->[0];
- my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
- if (
- $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
- )
- {
+ #----------------------------------------
+ # Method 2: Use the normal storage method
+ #----------------------------------------
- # Note that we do not weld yet because we must wait until
- # we we are sure that an opening brace for this follows.
- $in_chain{$level}->[1] = $type_sequence;
- }
- else { $in_chain{$level} = undef }
+ # Patch for issue c078: keep line indexes in order. If the top
+ # token is a space that we are keeping (due to '-wls=...) then
+ # we have to check that old line indexes stay in order.
+ # In very rare
+ # instances in which side comments have been deleted and converted
+ # into blanks, we may have filtered down multiple blanks into just
+ # one. In that case the top blank may have a higher line number
+ # than the previous nonblank token. Although the line indexes of
+ # blanks are not really significant, we need to keep them in order
+ # in order to pass error checks.
+ if ($top_is_space) {
+ my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
+ my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
+ if ( $new_top_ix < $old_top_ix ) {
+ $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
}
}
- }
- return;
-} ## end sub weld_cuddled_blocks
-sub find_nested_pairs {
- my $self = shift;
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
+ $self->store_token($rcopy);
+ $Knew = @{$rLL_new} - 1;
+ }
+ return $Knew;
+} ## end sub store_new_token
- # This routine is called once per file to do preliminary work needed for
- # the --weld-nested option. This information is also needed for adding
- # semicolons.
+sub check_Q {
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $Num = @{$rLL};
+ # Check that a quote looks okay, and report possible problems
+ # to the logfile.
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my ( $self, $KK, $Kfirst, $line_number ) = @_;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $token =~ /\t/ ) {
+ $self->note_embedded_tab($line_number);
+ }
- # We define an array of pairs of nested containers
- my @nested_pairs;
+ # The remainder of this routine looks for something like
+ # '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
- # Names of calling routines can either be marked as 'i' or 'w',
- # and they may invoke a sub call with an '->'. We will consider
- # any consecutive string of such types as a single unit when making
- # weld decisions. We also allow a leading !
- my $is_name_type = {
- 'i' => 1,
- 'w' => 1,
- 'U' => 1,
- '->' => 1,
- '!' => 1,
- };
+ # Start by looking for a token beginning with one of: s y m / tr
+ return
+ unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
+ || substr( $token, 0, 2 ) eq 'tr' );
- # Loop over all closing container tokens
- foreach my $inner_seqno ( keys %{$K_closing_container} ) {
- my $K_inner_closing = $K_closing_container->{$inner_seqno};
+ # ... and preceded by one of: = == !=
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ return unless ( $is_unexpected_equals{$previous_nonblank_type} );
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
- # See if it is immediately followed by another, outer closing token
- my $K_outer_closing = $K_inner_closing + 1;
- $K_outer_closing += 1
- if ( $K_outer_closing < $Num
- && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
+ my $previous_nonblank_type_2 = 'b';
+ my $previous_nonblank_token_2 = EMPTY_STRING;
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
+ $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
+ }
- next unless ( $K_outer_closing < $Num );
- my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
- next unless ($outer_seqno);
- my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
- next unless ( $is_closing_token{$token_outer_closing} );
+ my $next_nonblank_token = EMPTY_STRING;
+ my $Kn = $KK + 1;
+ my $Kmax = @{$rLL} - 1;
+ if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
+ if ( $Kn <= $Kmax ) {
+ $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ }
- # Now we have to check the opening tokens.
- my $K_outer_opening = $K_opening_container->{$outer_seqno};
- my $K_inner_opening = $K_opening_container->{$inner_seqno};
- next unless defined($K_outer_opening) && defined($K_inner_opening);
+ my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
- my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
- my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
+ if (
- # Verify that the inner opening token is the next container after the
- # outer opening token.
- my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
- next unless defined($K_io_check);
- if ( $K_io_check != $K_inner_opening ) {
+ # preceded by simple scalar
+ $previous_nonblank_type_2 eq 'i'
+ && $previous_nonblank_token_2 =~ /^\$/
- # The inner opening container does not immediately follow the outer
- # opening container, but we may still allow a weld if they are
- # separated by a sub signature. For example, we may have something
- # like this, where $K_io_check may be at the first 'x' instead of
- # 'io'. So we need to hop over the signature and see if we arrive
- # at 'io'.
+ # followed by some kind of termination
+ # (but give complaint if we can not see far enough ahead)
+ && $next_nonblank_token =~ /^[; \)\}]$/
- # oo io
- # | x x |
- # $obj->then( sub ( $code ) {
- # ...
- # return $c->render(text => '', status => $code);
- # } );
- # | |
- # ic oc
-
- next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
- next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
- my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
- next unless defined($seqno_signature);
- my $K_signature_closing = $K_closing_container->{$seqno_signature};
- next unless defined($K_signature_closing);
- my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
- next
- unless ( defined($K_test) && $K_test == $K_inner_opening );
+ # scalar is not declared
+ ## =~ /^(my|our|local)$/
+ && !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
+ )
+ {
+ my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
+ my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
+ complain(
+"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+ );
+ }
+ return;
+} ## end sub check_Q
- # OK, we have arrived at 'io' in the above diagram. We should put
- # a limit on the length or complexity of the signature here. There
- # is no perfect way to do this, one way is to put a limit on token
- # count. For consistency with older versions, we should allow a
- # signature with a single variable to weld, but not with
- # multiple variables. A single variable as in 'sub ($code) {' can
- # have a $Kdiff of 2 to 4, depending on spacing.
+} ## end closure respace_tokens
- # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
- # 7, depending on spacing. So to keep formatting consistent with
- # previous versions, we will also avoid welding if there is a comma
- # in the signature.
+sub copy_token_as_type {
- my $Kdiff = $K_signature_closing - $K_io_check;
- next if ( $Kdiff > 4 );
+ # This provides a quick way to create a new token by
+ # slightly modifying an existing token.
+ my ( $rold_token, $type, $token ) = @_;
+ if ( !defined($token) ) {
+ if ( $type eq 'b' ) {
+ $token = SPACE;
+ }
+ elsif ( $type eq 'q' ) {
+ $token = EMPTY_STRING;
+ }
+ elsif ( $type eq '->' ) {
+ $token = '->';
+ }
+ elsif ( $type eq ';' ) {
+ $token = ';';
+ }
+ elsif ( $type eq ',' ) {
+ $token = ',';
+ }
+ else {
- my $saw_comma;
- foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
- if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
+ # Unexpected type ... this sub will work as long as both $token and
+ # $type are defined, but we should catch any unexpected types during
+ # development.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
+EOM
}
- next if ($saw_comma);
+
+ # Shouldn't get here
+ $token = $type;
}
+ }
- # Yes .. this is a possible nesting pair.
- # They can be separated by a small amount.
- my $K_diff = $K_inner_opening - $K_outer_opening;
+ my @rnew_token = @{$rold_token};
+ $rnew_token[_TYPE_] = $type;
+ $rnew_token[_TOKEN_] = $token;
+ $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
+ return \@rnew_token;
+} ## end sub copy_token_as_type
- # Count nonblank characters separating them.
- if ( $K_diff < 0 ) { next } # Shouldn't happen
- my $nonblank_count = 0;
- my $type;
- my $is_name;
+sub K_next_code {
+ my ( $self, $KK, $rLL ) = @_;
- # Here is an example of a long identifier chain which counts as a
- # single nonblank here (this spans about 10 K indexes):
- # if ( !Boucherot::SetOfConnections->new->handler->execute(
- # ^--K_o_o ^--K_i_o
- # @array) )
- my $Kn_first = $K_outer_opening;
- my $Kn_last_nonblank;
- my $saw_comment;
- foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
- next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
- if ( !$nonblank_count ) { $Kn_first = $Kn }
- if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
- $Kn_last_nonblank = $Kn;
+ # return the index K of the next nonblank, non-comment token
+ return unless ( defined($KK) && $KK >= 0 );
- # skip chain of identifier tokens
- my $last_type = $type;
- my $last_is_name = $is_name;
- $type = $rLL->[$Kn]->[_TYPE_];
- if ( $type eq '#' ) { $saw_comment = 1; last }
- $is_name = $is_name_type->{$type};
- next if ( $is_name && $last_is_name );
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
- $nonblank_count++;
- last if ( $nonblank_count > 2 );
+ # We seem to have encountered a gap in our array.
+ # This shouldn't happen because sub write_line() pushed
+ # items into the $rLL array.
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Knnb]->[_TYPE_] ne '#' )
+ {
+ return $Knnb;
}
+ $Knnb++;
+ }
+ return;
+} ## end sub K_next_code
- # Do not weld across a comment .. fix for c058.
- next if ($saw_comment);
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
- # Patch for b1104: do not weld to a paren preceded by sort/map/grep
- # because the special line break rules may cause a blinking state
- if ( defined($Kn_last_nonblank)
- && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
- && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
- {
- my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
+ # return the index K of the next nonblank token, or
+ # return undef if none
+ return unless ( defined($KK) && $KK >= 0 );
- # Turn off welding at sort/map/grep (
- if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
+ # The third arg allows this routine to be used on any array. This is
+ # useful in sub respace_tokens when we are copying tokens from an old $rLL
+ # to a new $rLL array. But usually the third arg will not be given and we
+ # will just use the $rLL array in $self.
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ return unless ( $Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+ return unless ( ++$Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. Very unlikely to get here; it means we have neighboring
+ # blanks in the token stream.
+ $Knnb++;
+ while ( $Knnb < $Num ) {
+
+ # Safety check, this fault shouldn't happen: The $rLL array is the
+ # main array of tokens, so all entries should be used. It is
+ # initialized in sub write_line, and then re-initialized by sub
+ # store_token() within sub respace_tokens. Tokens are pushed on
+ # so there shouldn't be any gaps.
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
}
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
+ }
+ return;
+} ## end sub K_next_nonblank
- if (
+sub K_previous_code {
- # adjacent opening containers, like: do {{
- $nonblank_count == 1
+ # return the index K of the previous nonblank, non-comment token
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
- # short item following opening paren, like: fun( yyy (
- || ( $nonblank_count == 2
- && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
- # anonymous sub + prototype or sig: )->then( sub ($code) {
- # ... but it seems best not to stack two structural blocks, like
- # this
- # sub make_anon_with_my_sub { sub {
- # because it probably hides the structure a little too much.
- || ( $inner_blocktype
- && $inner_blocktype eq 'sub'
- && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
- && !$outer_blocktype )
- )
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ ) if (DEVEL_MODE);
+ return;
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
{
- push @nested_pairs,
- [ $inner_seqno, $outer_seqno, $K_inner_closing ];
+ return $Kpnb;
}
- next;
+ $Kpnb--;
}
+ return;
+} ## end sub K_previous_code
- # The weld routine expects the pairs in order in the form
- # [$seqno_inner, $seqno_outer]
- # And they must be in the same order as the inner closing tokens
- # (otherwise, welds of three or more adjacent tokens will not work). The K
- # value of this inner closing token has temporarily been stored for
- # sorting.
- @nested_pairs =
+sub K_previous_nonblank {
- # Drop the K index after sorting (it would cause trouble downstream)
- map { [ $_->[0], $_->[1] ] }
+ # return index of previous nonblank token before item K;
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
- # Sort on the K values
- sort { $a->[2] <=> $b->[2] } @nested_pairs;
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
- return \@nested_pairs;
-} ## end sub find_nested_pairs
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ ) if (DEVEL_MODE);
+ return;
+ }
+ my $Kpnb = $KK - 1;
+ return unless ( $Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+ return unless ( --$Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
-sub match_paren_flag {
+ # Backup loop. We should not get here unless some routine
+ # slipped repeated blanks into the token stream.
+ return unless ( --$Kpnb >= 0 );
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
+ return;
+} ## end sub K_previous_nonblank
- # Decide if this paren is excluded by user request:
- # undef matches no parens
- # '*' matches all parens
- # 'k' matches only if the previous nonblank token is a perl builtin
- # keyword (such as 'if', 'while'),
- # 'K' matches if 'k' does not, meaning if the previous token is not a
- # keyword.
- # 'f' matches if the previous token is a function other than a keyword.
- # 'F' matches if 'f' does not.
- # 'w' matches if either 'k' or 'f' match.
- # 'W' matches if 'w' does not.
- my ( $self, $KK, $flag ) = @_;
+sub parent_seqno_by_K {
- return 0 unless ( defined($flag) );
- return 0 if $flag eq '0';
- return 1 if $flag eq '1';
- return 1 if $flag eq '*';
- return 0 unless ( defined($KK) );
+ # Return the sequence number of the parent container of token K, if any.
- my $rLL = $self->[_rLL_];
- my $rtoken_vars = $rLL->[$KK];
- my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- return 0 unless ($seqno);
- my $token = $rtoken_vars->[_TOKEN_];
- my $K_opening = $KK;
- if ( !$is_opening_token{$token} ) {
- $K_opening = $self->[_K_opening_container_]->{$seqno};
- }
- return unless ( defined($K_opening) );
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
- my ( $is_f, $is_k, $is_w );
- my $Kp = $self->K_previous_nonblank($K_opening);
- if ( defined($Kp) ) {
- my $type_p = $rLL->[$Kp]->[_TYPE_];
+ # The task is to jump forward to the next container token
+ # and use the sequence number of either it or its parent.
- # keyword?
- $is_k = $type_p eq 'k';
+ # For example, consider the following with seqno=5 of the '[' and ']'
+ # being called with index K of the first token of each line:
- # function call?
- $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+ # # result
+ # push @tests, # -
+ # [ # -
+ # sub { 99 }, 'do {&{%s} for 1,2}', # 5
+ # '(&{})(&{})', undef, # 5
+ # [ 2, 2, 0 ], 0 # 5
+ # ]; # -
- # either keyword or function call?
- $is_w = $is_k || $is_f;
+ # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
+ # unbalanced files, last sequence number will either be undefined or it may
+ # be at a deeper level. In either case we will just return SEQ_ROOT to
+ # have a defined value and allow formatting to proceed.
+ my $parent_seqno = SEQ_ROOT;
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
- my $match;
- if ( $flag eq 'k' ) { $match = $is_k }
- elsif ( $flag eq 'K' ) { $match = !$is_k }
- elsif ( $flag eq 'f' ) { $match = $is_f }
- elsif ( $flag eq 'F' ) { $match = !$is_f }
- elsif ( $flag eq 'w' ) { $match = $is_w }
- elsif ( $flag eq 'W' ) { $match = !$is_w }
- return $match;
-} ## end sub match_paren_flag
+ else {
+ my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type = $rLL->[$Kt]->[_TYPE_];
-sub is_excluded_weld {
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
+ $parent_seqno = $type_sequence;
+ }
- # decide if this weld is excluded by user request
- my ( $self, $KK, $is_leading ) = @_;
- my $rLL = $self->[_rLL_];
- my $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $rflags = $weld_nested_exclusion_rules{$token};
- return 0 unless ( defined($rflags) );
- my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
- return 0 unless ( defined($flag) );
- return 1 if $flag eq '*';
- return $self->match_paren_flag( $KK, $flag );
-} ## end sub is_excluded_weld
+ # otherwise we want its parent container
+ else {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ }
+ }
+ }
+ $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
+ return $parent_seqno;
+} ## end sub parent_seqno_by_K
-# hashes to simplify welding logic
-my %type_ok_after_bareword;
-my %has_tight_paren;
+sub is_in_block_by_i {
+ my ( $self, $i ) = @_;
-BEGIN {
+ # returns true if
+ # token at i is contained in a BLOCK
+ # or is at root level
+ # or there is some kind of error (i.e. unbalanced file)
+ # returns false otherwise
- # types needed for welding RULE 6
- my @q = qw# => -> { ( [ #;
- @type_ok_after_bareword{@q} = (1) x scalar(@q);
+ if ( $i < 0 ) {
+ DEVEL_MODE && Fault("Bad call, i='$i'\n");
+ return 1;
+ }
- # these types do not 'like' to be separated from a following paren
- @q = qw(w i q Q G C Z U);
- @{has_tight_paren}{@q} = (1) x scalar(@q);
-}
+ my $seqno = $parent_seqno_to_go[$i];
+ return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
+ return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
+ return;
+} ## end sub is_in_block_by_i
-use constant DEBUG_WELD => 0;
+sub is_in_list_by_i {
+ my ( $self, $i ) = @_;
-sub setup_new_weld_measurements {
+ # returns true if token at i is contained in a LIST
+ # returns false otherwise
+ my $seqno = $parent_seqno_to_go[$i];
+ return unless ( $seqno && $seqno ne SEQ_ROOT );
+ if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
+ return 1;
+ }
+ return;
+} ## end sub is_in_list_by_i
- # Define quantities to check for excess line lengths when welded.
- # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
+sub is_list_by_K {
- my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
+ # Return true if token K is in a list
+ my ( $self, $KK ) = @_;
- # Given indexes of outer and inner opening containers to be welded:
- # $Kouter_opening, $Kinner_opening
+ my $parent_seqno = $self->parent_seqno_by_K($KK);
+ return unless defined($parent_seqno);
+ return $self->[_ris_list_by_seqno_]->{$parent_seqno};
+} ## end sub is_list_by_K
- # Returns these variables:
- # $new_weld_ok = true (new weld ok) or false (do not start new weld)
- # $starting_indent = starting indentation
- # $starting_lentot = starting cumulative length
- # $msg = diagnostic message for debugging
+sub is_list_by_seqno {
- my $rLL = $self->[_rLL_];
- my $rlines = $self->[_rlines_];
+ # Return true if the immediate contents of a container appears to be a
+ # list.
+ my ( $self, $seqno ) = @_;
+ return unless defined($seqno);
+ return $self->[_ris_list_by_seqno_]->{$seqno};
+} ## end sub is_list_by_seqno
- my $starting_level;
- my $starting_ci;
- my $starting_lentot;
- my $maximum_text_length;
- my $msg = EMPTY_STRING;
+sub resync_lines_and_tokens {
- my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
- my $rK_range = $rlines->[$iline_oo]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $self = shift;
- #-------------------------------------------------------------------------
- # We now define a reference index, '$Kref', from which to start measuring
- # This choice turns out to be critical for keeping welds stable during
- # iterations, so we go through a number of STEPS...
- #-------------------------------------------------------------------------
+ # Re-construct the arrays of tokens associated with the original input
+ # lines since they have probably changed due to inserting and deleting
+ # blanks and a few other tokens.
- # STEP 1: Our starting guess is to use measure from the first token of the
- # current line. This is usually a good guess.
- my $Kref = $Kfirst;
+ # Return paremeters:
+ # set severe_error = true if processing needs to terminate
+ my $severe_error;
+ my $rqw_lines = [];
- # STEP 2: See if we should go back a little farther
- my $Kprev = $self->K_previous_nonblank($Kfirst);
- if ( defined($Kprev) ) {
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my @Krange_code_without_comments;
+ my @Klast_valign_code;
- # Avoid measuring from between an opening paren and a previous token
- # which should stay close to it ... fixes b1185
- my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
- my $type_prev = $rLL->[$Kprev]->[_TYPE_];
- if ( $Kouter_opening == $Kfirst
- && $token_oo eq '('
- && $has_tight_paren{$type_prev} )
- {
- $Kref = $Kprev;
+ # This is the next token and its line index:
+ my $Knext = 0;
+ my $Kmax = defined($Klimit) ? $Klimit : -1;
+
+ # Verify that old line indexes are in still order. If this error occurs,
+ # check locations where sub 'respace_tokens' creates new tokens (like
+ # blank spaces). It must have set a bad old line index.
+ if ( DEVEL_MODE && defined($Klimit) ) {
+ my $iline = $rLL->[0]->[_LINE_INDEX_];
+ foreach my $KK ( 1 .. $Klimit ) {
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline < $iline_last ) {
+ my $KK_m = $KK - 1;
+ my $token_m = $rLL->[$KK_m]->[_TOKEN_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $type_m = $rLL->[$KK_m]->[_TYPE_];
+ my $type = $rLL->[$KK]->[_TYPE_];
+ Fault(<<EOM);
+Line indexes out of order at index K=$KK:
+at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
+at KK =$KK: old line=$iline, type='$type', token='$token',
+EOM
+ }
}
+ }
- # Back up and count length from a token like '=' or '=>' if -lp
- # is used (this fixes b520)
- # ...or if a break is wanted before there
- elsif ($rOpts_line_up_parentheses
- || $want_break_before{$type_prev} )
- {
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type eq 'CODE' ) {
- # If there are other sequence items between the start of this line
- # and the opening token in question, then do not include tokens on
- # the previous line in length calculations. This check added to
- # fix case b1174 which had a '?' on the line
- my $no_previous_seq_item = $Kref == $Kouter_opening
- || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
+ # Get the old number of tokens on this line
+ my $rK_range_old = $line_of_tokens->{_rK_range};
+ my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
+ my $Kdiff_old = 0;
+ if ( defined($Kfirst_old) ) {
+ $Kdiff_old = $Klast_old - $Kfirst_old;
+ }
- if ( $no_previous_seq_item
- && substr( $type_prev, 0, 1 ) eq '=' )
+ # Find the range of NEW K indexes for the line:
+ # $Kfirst = index of first token on line
+ # $Klast = index of last token on line
+ my ( $Kfirst, $Klast );
+
+ my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
+
+ # Optimization: Although the actual K indexes may be completely
+ # changed after respacing, the number of tokens on any given line
+ # will often be nearly unchanged. So we will see if we can start
+ # our search by guessing that the new line has the same number
+ # of tokens as the old line.
+ my $Knext_guess = $Knext + $Kdiff_old;
+ if ( $Knext_guess > $Knext
+ && $Knext_guess < $Kmax
+ && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
{
- $Kref = $Kprev;
- # Fix for b1144 and b1112: backup to the first nonblank
- # character before the =>, or to the start of its line.
- if ( $type_prev eq '=>' ) {
- my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
- my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
- my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
- foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
- next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
- $Kref = $KK;
- last;
- }
- }
+ # the guess is good, so we can start our search here
+ $Knext = $Knext_guess + 1;
}
- }
- }
- # STEP 3: Now look ahead for a ternary and, if found, use it.
- # This fixes case b1182.
- # Also look for a ')' at the same level and, if found, use it.
- # This fixes case b1224.
- if ( $Kref < $Kouter_opening ) {
- my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
- my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
- while ( $Knext < $Kouter_opening ) {
- if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
- if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
- || $rLL->[$Knext]->[_TOKEN_] eq ')' )
- {
- $Kref = $Knext;
- last;
- }
+ while ($Knext <= $Kmax
+ && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
+ {
+ $Knext++;
}
- $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
- }
- }
- # Define the starting measurements we will need
- $starting_lentot =
- $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_level = $rLL->[$Kref]->[_LEVEL_];
- $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
+ if ( $Knext > $Knext_beg ) {
- $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
- $starting_ci * $rOpts_continuation_indentation;
+ $Klast = $Knext - 1;
- # STEP 4: Switch to using the outer opening token as the reference
- # point if a line break before it would make a longer line.
- # Fixes case b1055 and is also an alternate fix for b1065.
- my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
- if ( $Kref < $Kouter_opening ) {
- my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
- my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
- my $maximum_text_length_oo =
- $maximum_text_length_at_level[$starting_level_oo] -
- $starting_ci_oo * $rOpts_continuation_indentation;
+ # Delete any terminal blank token
+ if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
- # The excess length to any cumulative length K = lenK is either
- # $excess = $lenk - ($lentot + $maximum_text_length), or
- # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
- # so the worst case (maximum excess) corresponds to the configuration
- # with minimum value of the sum: $lentot + $maximum_text_length
- if ( $lentot_oo + $maximum_text_length_oo <
- $starting_lentot + $maximum_text_length )
- {
- $Kref = $Kouter_opening;
- $starting_level = $starting_level_oo;
- $starting_ci = $starting_ci_oo;
- $starting_lentot = $lentot_oo;
- $maximum_text_length = $maximum_text_length_oo;
- }
- }
+ if ( $Klast < $Knext_beg ) {
+ $Klast = undef;
+ }
+ else {
- my $new_weld_ok = 1;
+ $Kfirst = $Knext_beg;
- # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
- # combination -wn -lp -dws -naws does not work well and can cause blinkers.
- # It will probably only occur in stress testing. For this situation we
- # will only start a new weld if we start at a 'good' location.
- # - Added 'if' to fix case b1032.
- # - Require blank before certain previous characters to fix b1111.
- # - Add ';' to fix case b1139
- # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
- # - relaxed constraints for b1227
- if ( $starting_ci
- && $rOpts_line_up_parentheses
- && $rOpts_delete_old_whitespace
- && !$rOpts_add_whitespace
- && defined($Kprev) )
- {
- my $type_first = $rLL->[$Kfirst]->[_TYPE_];
- my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
- my $type_prev = $rLL->[$Kprev]->[_TYPE_];
- my $type_pp = 'b';
- if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
- unless (
- $type_prev =~ /^[\,\.\;]/
- || $type_prev =~ /^[=\{\[\(\L]/
- && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
- || $type_first =~ /^[=\,\.\;\{\[\(\L]/
- || $type_first eq '||'
- || (
- $type_first eq 'k'
- && ( $token_first eq 'if'
- || $token_first eq 'or' )
- )
- )
- {
- $msg =
-"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
- $new_weld_ok = 0;
- }
- }
- return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
-} ## end sub setup_new_weld_measurements
+ # Save ranges of non-comment code. This will be used by
+ # sub keep_old_line_breaks.
+ if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
+ push @Krange_code_without_comments, [ $Kfirst, $Klast ];
+ }
-sub excess_line_length_for_Krange {
- my ( $self, $Kfirst, $Klast ) = @_;
+ # Only save ending K indexes of code types which are blank
+ # or 'VER'. These will be used for a convergence check.
+ # See related code in sub 'convey_batch_to_vertical_aligner'
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type
+ || $CODE_type eq 'VER' )
+ {
+ push @Klast_valign_code, $Klast;
+ }
+ }
+ }
- # returns $excess_length =
- # by how many characters a line composed of tokens $Kfirst .. $Klast will
- # exceed the allowed line length
+ # It is only safe to trim the actual line text if the input
+ # line had a terminal blank token. Otherwise, we may be
+ # in a quote.
+ if ( $line_of_tokens->{_ended_in_blank_token} ) {
+ $line_of_tokens->{_line_text} =~ s/\s+$//;
+ }
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
- my $rLL = $self->[_rLL_];
- my $length_before_Kfirst =
- $Kfirst <= 0
- ? 0
- : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
+ # Deleting semicolons can create new empty code lines
+ # which should be marked as blank
+ if ( !defined($Kfirst) ) {
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type ) {
+ $line_of_tokens->{_code_type} = 'BL';
+ }
+ }
+ else {
- # backup before a side comment if necessary
- my $Kend = $Klast;
- if ( $rOpts_ignore_side_comment_lengths
- && $rLL->[$Klast]->[_TYPE_] eq '#' )
- {
- my $Kprev = $self->K_previous_nonblank($Klast);
- if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
+ #---------------------------------------------------
+ # save indexes of all lines with a 'q' at either end
+ # for later use by sub find_multiline_qw
+ #---------------------------------------------------
+ if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
+ || $rLL->[$Klast]->[_TYPE_] eq 'q' )
+ {
+ push @{$rqw_lines}, $iline;
+ }
+ }
+ }
}
- # get the length of the text
- my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
-
- # get the size of the text window
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
- my $max_text_length = $maximum_text_length_at_level[$level] -
- $ci_level * $rOpts_continuation_indentation;
-
- my $excess_length = $length - $max_text_length;
+ # There shouldn't be any nodes beyond the last one. This routine is
+ # relinking lines and tokens after the tokens have been respaced. A fault
+ # here indicates some kind of bug has been introduced into the above loops.
+ # There is not good way to keep going; we better stop here.
+ if ( $Knext <= $Kmax ) {
+ Fault_Warn(
+ "unexpected tokens at end of file when reconstructing lines");
+ $severe_error = 1;
+ return ( $severe_error, $rqw_lines );
+ }
+ $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
- DEBUG_WELD
- && print
-"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
- return ($excess_length);
-} ## end sub excess_line_length_for_Krange
+ # Setup the convergence test in the FileWriter based on line-ending indexes
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->setup_convergence_test( \@Klast_valign_code );
-sub weld_nested_containers {
- my ($self) = @_;
+ return ( $severe_error, $rqw_lines );
- # Called once per file for option '--weld-nested-containers'
+} ## end sub resync_lines_and_tokens
- my $rK_weld_left = $self->[_rK_weld_left_];
- my $rK_weld_right = $self->[_rK_weld_right_];
+sub check_for_old_break {
+ my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
- # This routine implements the -wn flag by "welding together"
- # the nested closing and opening tokens which were previously
- # identified by sub 'find_nested_pairs'. "welding" simply
- # involves setting certain hash values which will be checked
- # later during formatting.
+ # This sub is called to help implement flags:
+ # --keep-old-breakpoints-before and --keep-old-breakpoints-after
+ # Given:
+ # $KK = index of a token,
+ # $rkeep_break_hash = user control for --keep-old-...
+ # $rbreak_hash = hash of tokens where breaks are requested
+ # Set $rbreak_hash as follows if a user break is requested:
+ # = 1 make a hard break (flush the current batch)
+ # best for something like leading commas (-kbb=',')
+ # = 2 make a soft break (keep building current batch)
+ # best for something like leading ->
- my $rLL = $self->[_rLL_];
- my $rlines = $self->[_rlines_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- my $ris_asub_block = $self->[_ris_asub_block_];
- my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+ my $rLL = $self->[_rLL_];
- # Find nested pairs of container tokens for any welding.
- my $rnested_pairs = $self->find_nested_pairs();
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- # Return unless there are nested pairs to weld
- return unless defined($rnested_pairs) && @{$rnested_pairs};
+ # non-container tokens use the type as the key
+ if ( !$seqno ) {
+ my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $rkeep_break_hash->{$type} ) {
+ $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
+ }
+ }
- # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
- # pairs. But it isn't clear if this is possible because we don't know
- # which sequences might actually start a weld.
+ # container tokens use the token as the key
+ else {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $flag = $rkeep_break_hash->{$token};
+ if ($flag) {
- # Setup a hash to avoid instabilities with combination -lp -wn -pvt=2.
- # We do this by reducing -vt=2 to -vt=1 where there could be a conflict
- # with welding at the same tokens.
- # See issues b1338, b1339, b1340, b1341, b1342, b1343.
- if ($rOpts_line_up_parentheses) {
+ my $match = $flag eq '1' || $flag eq '*';
- # NOTE: just parens for now but this could be applied to all types if
- # necessary.
- if ( $opening_vertical_tightness{'('} == 2 ) {
- my $rreduce_vertical_tightness_by_seqno =
- $self->[_rreduce_vertical_tightness_by_seqno_];
- foreach my $item ( @{$rnested_pairs} ) {
- my ( $inner_seqno, $outer_seqno ) = @{$item};
- if ( !$ris_excluded_lp_container->{$outer_seqno} ) {
+ # check for special matching codes
+ if ( !$match ) {
+ if ( $token eq '(' || $token eq ')' ) {
+ $match = $self->match_paren_control_flag( $seqno, $flag );
+ }
+ elsif ( $token eq '{' || $token eq '}' ) {
- # Set a flag which means that if a token has -vt=2
- # then reduce it to -vt=1.
- $rreduce_vertical_tightness_by_seqno->{$outer_seqno} = 1;
+ # These tentative codes 'b' and 'B' for brace types are
+ # placeholders for possible future brace types. They
+ # are not documented and may be changed.
+ my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno};
+ if ( $flag eq 'b' ) { $match = $block_type }
+ elsif ( $flag eq 'B' ) { $match = !$block_type }
+ else {
+ # unknown code - no match
+ }
}
}
+ if ($match) {
+ my $type = $rLL->[$KK]->[_TYPE_];
+ $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
+ }
}
}
+ return;
+} ## end sub check_for_old_break
- my $rOpts_break_at_old_method_breakpoints =
- $rOpts->{'break-at-old-method-breakpoints'};
+sub keep_old_line_breaks {
- # This array will hold the sequence numbers of the tokens to be welded.
- my @welds;
+ # Called once per file to find and mark any old line breaks which
+ # should be kept. We will be translating the input hashes into
+ # token indexes.
- # Variables needed for estimating line lengths
- my $maximum_text_length; # maximum spaces available for text
- my $starting_lentot; # cumulative text to start of current line
+ # A flag is set as follows:
+ # = 1 make a hard break (flush the current batch)
+ # best for something like leading commas (-kbb=',')
+ # = 2 make a soft break (keep building current batch)
+ # best for something like leading ->
- my $iline_outer_opening = -1;
- my $weld_count_this_start = 0;
+ my ($self) = @_;
- # OLD: $single_line_tol added to fix cases b1180 b1181
- # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
- # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
- my $single_line_tol = 0;
+ my $rLL = $self->[_rLL_];
+ my $rKrange_code_without_comments =
+ $self->[_rKrange_code_without_comments_];
+ my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
+ my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
+ my $rbreak_container = $self->[_rbreak_container_];
- my $multiline_tol = $single_line_tol + 1 +
- max( $rOpts_indent_columns, $rOpts_continuation_indentation );
+ #----------------------------------------
+ # Apply --break-at-old-method-breakpoints
+ #----------------------------------------
- # Define a welding cutoff level: do not start a weld if the inside
- # container level equals or exceeds this level.
+ # This code moved here from sub break_lists to fix b1120
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
+ my $token = $rLL->[$Kfirst]->[_TOKEN_];
- # We use the minimum of two criteria, either of which may be more
- # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
- # the 'beta' value is more restrictive in other cases (b1243).
+ # leading '->' use a value of 2 which causes a soft
+ # break rather than a hard break
+ if ( $type eq '->' ) {
+ $rbreak_before_Kfirst->{$Kfirst} = 2;
+ }
- my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
+ # leading ')->' use a special flag to insure that both
+ # opening and closing parens get opened
+ # Fix for b1120: only for parens, not braces
+ elsif ( $token eq ')' ) {
+ my $Kn = $self->K_next_nonblank($Kfirst);
+ next
+ unless ( defined($Kn)
+ && $Kn <= $Klast
+ && $rLL->[$Kn]->[_TYPE_] eq '->' );
+ my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
+ next unless ($seqno);
- # The vertical tightness flags can throw off line length calculations.
- # This patch was added to fix instability issue b1284.
- # It works to always use a tol of 1 for 1 line block length tests, but
- # this restricted value keeps test case wn6.wn working as before.
- # It may be necessary to include '[' and '{' here in the future.
- my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
+ # Note: in previous versions there was a fix here to avoid
+ # instability between conflicting -bom and -pvt or -pvtc flags.
+ # The fix skipped -bom for a small line difference. But this
+ # was troublesome, and instead the fix has been moved to
+ # sub set_vertical_tightness_flags where priority is given to
+ # the -bom flag over -pvt and -pvtc flags. Both opening and
+ # closing paren flags are involved because even though -bom only
+ # requests breaking before the closing paren, automated logic
+ # opens the opening paren when the closing paren opens.
+ # Relevant cases are b977, b1215, b1270, b1303
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = defined($KK)
- && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
- return $lentot;
- };
+ $rbreak_container->{$seqno} = 1;
+ }
+ }
+ }
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = defined($KK)
- && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
- return $lentot;
- };
+ #---------------------------------------------------------------------
+ # Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after
+ #---------------------------------------------------------------------
- # Abbreviations:
- # _oo=outer opening, i.e. first of { {
- # _io=inner opening, i.e. second of { {
- # _oc=outer closing, i.e. second of } {
- # _ic=inner closing, i.e. first of } }
+ return unless ( %keep_break_before_type || %keep_break_after_type );
- my $previous_pair;
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ $self->check_for_old_break( $Kfirst, \%keep_break_before_type,
+ $rbreak_before_Kfirst );
+ $self->check_for_old_break( $Klast, \%keep_break_after_type,
+ $rbreak_after_Klast );
+ }
+ return;
+} ## end sub keep_old_line_breaks
- # Main loop over nested pairs...
- # We are working from outermost to innermost pairs so that
- # level changes will be complete when we arrive at the inner pairs.
- while ( my $item = pop( @{$rnested_pairs} ) ) {
- my ( $inner_seqno, $outer_seqno ) = @{$item};
+sub weld_containers {
- my $Kouter_opening = $K_opening_container->{$outer_seqno};
- my $Kinner_opening = $K_opening_container->{$inner_seqno};
- my $Kouter_closing = $K_closing_container->{$outer_seqno};
- my $Kinner_closing = $K_closing_container->{$inner_seqno};
+ # Called once per file to do any welding operations requested by --weld*
+ # flags.
+ my ($self) = @_;
- # RULE: do not weld if inner container has <= 3 tokens unless the next
- # token is a heredoc (so we know there will be multiple lines)
- if ( $Kinner_closing - $Kinner_opening <= 4 ) {
- my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
- next unless defined($Knext_nonblank);
- my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
- next unless ( $type eq 'h' );
- }
+ # This count is used to eliminate needless calls for weld checks elsewhere
+ $total_weld_count = 0;
- my $outer_opening = $rLL->[$Kouter_opening];
- my $inner_opening = $rLL->[$Kinner_opening];
- my $outer_closing = $rLL->[$Kouter_closing];
- my $inner_closing = $rLL->[$Kinner_closing];
+ return if ( $rOpts->{'indent-only'} );
+ return unless ($rOpts_add_newlines);
- # RULE: do not weld to a hash brace. The reason is that it has a very
- # strong bond strength to the next token, so a line break after it
- # may not work. Previously we allowed welding to something like @{
- # but that caused blinking states (cases b751, b779).
- if ( $inner_opening->[_TYPE_] eq 'L' ) {
- next;
- }
+ # Important: sub 'weld_cuddled_blocks' must be called before
+ # sub 'weld_nested_containers'. This is because the cuddled option needs to
+ # use the original _LEVEL_ values of containers, but the weld nested
+ # containers changes _LEVEL_ of welded containers.
- # RULE: do not weld to a square bracket which does not contain commas
- if ( $inner_opening->[_TYPE_] eq '[' ) {
- my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
- next unless ($rtype_count);
- my $comma_count = $rtype_count->{','};
- next unless ($comma_count);
+ # Here is a good test case to be sure that both cuddling and welding
+ # are working and not interfering with each other: <<snippets/ce_wn1.in>>
- # Do not weld if there is text before a '[' such as here:
- # curr_opt ( @beg [2,5] )
- # It will not break into the desired sandwich structure.
- # This fixes case b109, 110.
- my $Kdiff = $Kinner_opening - $Kouter_opening;
- next if ( $Kdiff > 2 );
- next
- if ( $Kdiff == 2
- && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
+ # perltidy -wn -ce
- }
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
- # RULE: Avoid welding under stress. The idea is that we need to have a
- # little space* within a welded container to avoid instability. Note
- # that after each weld the level values are reduced, so long multiple
- # welds can still be made. This rule will seldom be a limiting factor
- # in actual working code. Fixes b1206, b1243.
- my $inner_level = $inner_opening->[_LEVEL_];
- if ( $inner_level >= $weld_cutoff_level ) { next }
+ $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
- # Set flag saying if this pair starts a new weld
- my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
+ if ( $rOpts->{'weld-nested-containers'} ) {
- # Set flag saying if this pair is adjacent to the previous nesting pair
- # (even if previous pair was rejected as a weld)
- my $touch_previous_pair =
- defined($previous_pair) && $outer_seqno == $previous_pair->[0];
- $previous_pair = $item;
+ $self->weld_nested_containers();
- my $do_not_weld_rule = 0;
- my $Msg = EMPTY_STRING;
- my $is_one_line_weld;
+ $self->weld_nested_quotes();
+ }
- my $iline_oo = $outer_opening->[_LINE_INDEX_];
- my $iline_io = $inner_opening->[_LINE_INDEX_];
- my $iline_ic = $inner_closing->[_LINE_INDEX_];
- my $iline_oc = $outer_closing->[_LINE_INDEX_];
- my $token_oo = $outer_opening->[_TOKEN_];
- my $token_io = $inner_opening->[_TOKEN_];
+ #-------------------------------------------------------------
+ # All welding is done. Finish setting up weld data structures.
+ #-------------------------------------------------------------
- my $is_multiline_weld =
- $iline_oo == $iline_io
- && $iline_ic == $iline_oc
- && $iline_io != $iline_ic;
+ my $rLL = $self->[_rLL_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
- if (DEBUG_WELD) {
- my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
- my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
- $Msg .= <<EOM;
-Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
-Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
-tokens '$token_oo' .. '$token_io'
-EOM
- }
+ my @K_multi_weld;
+ my @keys = keys %{$rK_weld_right};
+ $total_weld_count = @keys;
- # DO-NOT-WELD RULE 0:
- # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
- # by one line). This can produce instabilities (fixes b1250 b1251
- # 1256).
- if ( !$is_multiline_weld
- && $iline_ic == $iline_io + 1
- && $token_oo eq '('
- && $token_io eq '(' )
- {
- if (DEBUG_WELD) {
- $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
- print $Msg;
- }
+ # First pass to process binary welds.
+ # This loop is processed in unsorted order for efficiency.
+ foreach my $Kstart (@keys) {
+ my $Kend = $rK_weld_right->{$Kstart};
+
+ # An error here would be due to an incorrect initialization introduced
+ # in one of the above weld routines, like sub weld_nested.
+ if ( $Kend <= $Kstart ) {
+ Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
+ if (DEVEL_MODE);
next;
}
- # If this pair is not adjacent to the previous pair (skipped or not),
- # then measure lengths from the start of line of oo.
- if (
- !$touch_previous_pair
+ # Set weld values for all tokens this welded pair
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
- # Also do this if restarting at a new line; fixes case b965, s001
- || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
- )
+ # Remember the leftmost index of welds which continue to the right
+ if ( defined( $rK_weld_right->{$Kend} )
+ && !defined( $rK_weld_left->{$Kstart} ) )
{
+ push @K_multi_weld, $Kstart;
+ }
+ }
- # Remember the line we are using as a reference
- $iline_outer_opening = $iline_oo;
- $weld_count_this_start = 0;
+ # Second pass to process chains of welds (these are rare).
+ # This has to be processed in sorted order.
+ if (@K_multi_weld) {
+ my $Kend = -1;
+ foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
- ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
- = $self->setup_new_weld_measurements( $Kouter_opening,
- $Kinner_opening );
+ # Skip any interior K which was originally missing a left link
+ next if ( $Kstart <= $Kend );
- if (
- !$new_weld_ok
- && ( $iline_oo != $iline_io
- || $iline_ic != $iline_oc )
- )
- {
- if (DEBUG_WELD) { print $msg}
- next;
+ # Find the end of this chain
+ $Kend = $rK_weld_right->{$Kstart};
+ my $Knext = $rK_weld_right->{$Kend};
+ while ( defined($Knext) ) {
+ $Kend = $Knext;
+ $Knext = $rK_weld_right->{$Kend};
}
- my $rK_range = $rlines->[$iline_oo]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
+ # Set weld values this chain
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
+ }
+ }
- # An existing one-line weld is a line in which
- # (1) the containers are all on one line, and
- # (2) the line does not exceed the allowable length
- if ( $iline_oo == $iline_oc ) {
+ return;
+} ## end sub weld_containers
- # All the tokens are on one line, now check their length.
- # Start with the full line index range. We will reduce this
- # in the coding below in some cases.
- my $Kstart = $Kfirst;
- my $Kstop = $Klast;
+sub cumulative_length_before_K {
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+}
- # Note that the following minimal choice for measuring will
- # work and will not cause any instabilities because it is
- # invariant:
+sub weld_cuddled_blocks {
+ my ($self) = @_;
- ## my $Kstart = $Kouter_opening;
- ## my $Kstop = $Kouter_closing;
+ # Called once per file to handle cuddled formatting
- # But that can lead to some undesirable welds. So a little
- # more complicated method has been developed.
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # We are trying to avoid creating bad two-line welds when we are
- # working on long, previously un-welded input text, such as
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+ return unless ( %{$rcuddled_block_types} );
- # INPUT (example of a long input line weld candidate):
- ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # GOOD two-line break: (not welded; result marked too long):
- ## $mutation->transpos(
- ## $self->RNA->position($mutation->label, $atg_label));
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # BAD two-line break: (welded; result if we weld):
- ## $mutation->transpos($self->RNA->position(
- ## $mutation->label, $atg_label));
+ # A stack to remember open chains at all levels: This is a hash rather than
+ # an array for safety because negative levels can occur in files with
+ # errors. This allows us to keep processing with negative levels.
+ # $in_chain{$level} = [$chain_type, $type_sequence];
+ my %in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
- # We can only get an approximate estimate of the final length,
- # since the line breaks may change, and for -lp mode because
- # even the indentation is not yet known.
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
- my $level_last = $rLL->[$Klast]->[_LEVEL_];
- my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
- my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
+ }
- # - measure to the end of the original line if balanced
- # - measure to the closing container if unbalanced (fixes b1230)
- #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
- if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
+ # NOTE: we must use the original levels here. They can get changed
+ # by sub 'weld_nested_containers', so this routine must be called
+ # before sub 'weld_nested_containers'.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_];
- # - measure from the start of the original line if balanced
- # - measure from the most previous token with same level
- # if unbalanced (b1232)
- if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
- $Kstart = $Kouter_opening;
+ if ( $level < $last_level ) { $in_chain{$last_level} = undef }
+ elsif ( $level > $last_level ) { $in_chain{$level} = undef }
- foreach
- my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
- {
- next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
- last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
- $Kstart = $KK;
- }
- }
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
- my $excess =
- $self->excess_line_length_for_Krange( $Kstart, $Kstop );
+ if ( $token eq '{' ) {
- # Coding simplified here for case b1219.
- # Increased tol from 0 to 1 when pvt>0 to fix b1284.
- $is_one_line_weld = $excess <= $one_line_tol;
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ if ( !$block_type ) {
+
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
}
+ if ( $in_chain{$level} ) {
- # DO-NOT-WELD RULE 1:
- # Do not weld something that looks like the start of a two-line
- # function call, like this: <<snippets/wn6.in>>
- # $trans->add_transformation(
- # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
- # We will look for a semicolon after the closing paren.
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain{$level}->[1];
+ my $opening_seqno = $type_sequence;
- # We want to weld something complex, like this though
- # my $compass = uc( opposite_direction( line_to_canvas_direction(
- # @{ $coords[0] }, @{ $coords[1] } ) ) );
- # Otherwise we will get a 'blinker'. For example, the following
- # would become a blinker without this rule:
- # $Self->_Add( $SortOrderDisplay{ $Field
- # ->GenerateFieldForSelectSQL() } );
- # But it is okay to weld a two-line statement if it looks like
- # it was already welded, meaning that the two opening containers are
- # on a different line that the two closing containers. This is
- # necessary to prevent blinking of something like this with
- # perltidy -wn -pbp (starting indentation two levels deep):
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$ris_broken_container->{$closing_seqno}
+ && !$rbreak_container->{$closing_seqno} )
+ {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
- # $top_label->set_text( gettext(
- # "Unable to create personal directory - check permissions.") );
- if ( $iline_oc == $iline_oo + 1
- && $iline_io == $iline_ic
- && $token_oo eq '(' )
- {
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon;
+ if ( defined($Ko) ) {
+ $Kon = $self->K_next_nonblank($Ko);
+ }
- # Look for following semicolon...
- my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
- my $next_nonblank_type =
- defined($Knext_nonblank)
- ? $rLL->[$Knext_nonblank]->[_TYPE_]
- : 'b';
- if ( $next_nonblank_type eq ';' ) {
+ # ..unless it is a comment
+ if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
+
+ # OK to weld these two tokens...
+ $rK_weld_right->{$Ko} = $Kon;
+ $rK_weld_left->{$Kon} = $Ko;
+
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
+
+ # Remember which braces are cuddled.
+ # The closing brace is used to set adjusted indentations.
+ # The opening brace is not yet used but might eventually
+ # be needed in setting adjusted indentation.
+ $ris_cuddled_closing_brace->{$closing_seqno} = 1;
- # Then do not weld if no other containers between inner
- # opening and closing.
- my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
- if ( $Knext_seq_item == $Kinner_closing ) {
- $do_not_weld_rule = 1;
- }
}
- }
- } ## end starting new weld sequence
- else {
+ }
+ else {
- # set the 1-line flag if continuing a weld sequence; fixes b1239
- $is_one_line_weld = ( $iline_oo == $iline_oc );
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ }
}
+ elsif ( $token eq '}' ) {
+ if ( $in_chain{$level} ) {
- # DO-NOT-WELD RULE 2:
- # Do not weld an opening paren to an inner one line brace block
- # We will just use old line numbers for this test and require
- # iterations if necessary for convergence
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_code($KK);
+ next unless $Knn;
- # For example, otherwise we could cause the opening paren
- # in the following example to separate from the caller name
- # as here:
+ my $chain_type = $in_chain{$level}->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
- # $_[0]->code_handler
- # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain{$level}->[1] = $type_sequence;
+ }
+ else { $in_chain{$level} = undef }
+ }
+ }
+ }
+ return;
+} ## end sub weld_cuddled_blocks
- # Here is another example where we do not want to weld:
- # $wrapped->add_around_modifier(
- # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+sub find_nested_pairs {
+ my $self = shift;
- # If the one line sub block gets broken due to length or by the
- # user, then we can weld. The result will then be:
- # $wrapped->add_around_modifier( sub {
- # push @tracelog => 'around 1';
- # $_[0]->();
- # } );
+ # This routine is called once per file to do preliminary work needed for
+ # the --weld-nested option. This information is also needed for adding
+ # semicolons.
- # Updated to fix cases b1082 b1102 b1106 b1115:
- # Also, do not weld to an intact inner block if the outer opening token
- # is on a different line. For example, this prevents oscillation
- # between these two states in case b1106:
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
- # return map{
- # ($_,[$self->$_(@_[1..$#_])])
- # }@every;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # return map { (
- # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
- # ) } @every;
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
- # The effect of this change on typical code is very minimal. Sometimes
- # it may take a second iteration to converge, but this gives protection
- # against blinking.
- if ( !$do_not_weld_rule
- && !$is_one_line_weld
- && $iline_ic == $iline_io )
- {
- $do_not_weld_rule = 2
- if ( $token_oo eq '(' || $iline_oo != $iline_io );
- }
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
- # DO-NOT-WELD RULE 2A:
- # Do not weld an opening asub brace in -lp mode if -asbl is set. This
- # helps avoid instabilities in one-line block formation, and fixes
- # b1241. Previously, the '$is_one_line_weld' flag was tested here
- # instead of -asbl, and this fixed most cases. But it turns out that
- # the real problem was the -asbl flag, and switching to this was
- # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
- if (
- !$do_not_weld_rule
- ##&& $is_one_line_weld
- && $rOpts_line_up_parentheses
- && $rOpts_asbl
- && $ris_asub_block->{$outer_seqno}
- )
- {
- $do_not_weld_rule = '2A';
+ # Loop over all closing container tokens
+ foreach my $inner_seqno ( keys %{$K_closing_container} ) {
+ my $K_inner_closing = $K_closing_container->{$inner_seqno};
+
+ # See if it is immediately followed by another, outer closing token
+ my $K_outer_closing = $K_inner_closing + 1;
+ $K_outer_closing += 1
+ if ( $K_outer_closing < $Num
+ && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
+
+ next unless ( $K_outer_closing < $Num );
+ my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
+ next unless ($outer_seqno);
+ my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
+ next unless ( $is_closing_token{$token_outer_closing} );
+
+ # Simple filter: No commas or semicolons in the outer container
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
+ if ($rtype_count) {
+ next if ( $rtype_count->{','} || $rtype_count->{';'} );
}
- # DO-NOT-WELD RULE 3:
- # Do not weld if this makes our line too long.
- # Use a tolerance which depends on if the old tokens were welded
- # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
- if ( !$do_not_weld_rule ) {
+ # Now we have to check the opening tokens.
+ my $K_outer_opening = $K_opening_container->{$outer_seqno};
+ my $K_inner_opening = $K_opening_container->{$inner_seqno};
+ next unless defined($K_outer_opening) && defined($K_inner_opening);
- # Measure to a little beyond the inner opening token if it is
- # followed by a bare word, which may have unusual line break rules.
+ my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
+ my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
- # NOTE: Originally this was OLD RULE 6: do not weld to a container
- # which is followed on the same line by an unknown bareword token.
- # This can cause blinkers (cases b626, b611). But OK to weld one
- # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
- # has been merged into RULE 3 here to also fix cases b1078 b1091.
+ # Verify that the inner opening token is the next container after the
+ # outer opening token.
+ my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
+ next unless defined($K_io_check);
+ if ( $K_io_check != $K_inner_opening ) {
- my $K_for_length = $Kinner_opening;
- my $Knext_io = $self->K_next_nonblank($Kinner_opening);
- next unless ( defined($Knext_io) ); # shouldn't happen
- my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
+ # The inner opening container does not immediately follow the outer
+ # opening container, but we may still allow a weld if they are
+ # separated by a sub signature. For example, we may have something
+ # like this, where $K_io_check may be at the first 'x' instead of
+ # 'io'. So we need to hop over the signature and see if we arrive
+ # at 'io'.
- # Note: may need to eventually also include other types here,
- # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
- if ( $type_io_next eq 'w' ) {
- my $Knext_io2 = $self->K_next_nonblank($Knext_io);
- next unless ( defined($Knext_io2) );
- my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
- if ( !$type_ok_after_bareword{$type_io_next2} ) {
- $K_for_length = $Knext_io2;
- }
- }
+ # oo io
+ # | x x |
+ # $obj->then( sub ( $code ) {
+ # ...
+ # return $c->render(text => '', status => $code);
+ # } );
+ # | |
+ # ic oc
- # Use a tolerance for welds over multiple lines to avoid blinkers.
- # We can use zero tolerance if it looks like we are working on an
- # existing weld.
- my $tol =
- $is_one_line_weld || $is_multiline_weld
- ? $single_line_tol
- : $multiline_tol;
+ next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
+ next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
+ my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
+ next unless defined($seqno_signature);
+ my $K_signature_closing = $K_closing_container->{$seqno_signature};
+ next unless defined($K_signature_closing);
+ my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
+ next
+ unless ( defined($K_test) && $K_test == $K_inner_opening );
- # By how many characters does this exceed the text window?
- my $excess =
- $self->cumulative_length_before_K($K_for_length) -
- $starting_lentot + 1 + $tol -
- $maximum_text_length;
+ # OK, we have arrived at 'io' in the above diagram. We should put
+ # a limit on the length or complexity of the signature here. There
+ # is no perfect way to do this, one way is to put a limit on token
+ # count. For consistency with older versions, we should allow a
+ # signature with a single variable to weld, but not with
+ # multiple variables. A single variable as in 'sub ($code) {' can
+ # have a $Kdiff of 2 to 4, depending on spacing.
- # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
- # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
- # Revised patch: New tolerance definition allows going back to '> 0'
- # here. This fixes case b1124. See also cases b1087 and b1087a.
- if ( $excess > 0 ) { $do_not_weld_rule = 3 }
+ # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
+ # 7, depending on spacing. So to keep formatting consistent with
+ # previous versions, we will also avoid welding if there is a comma
+ # in the signature.
- if (DEBUG_WELD) {
- $Msg .=
-"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
- }
+ my $Kdiff = $K_signature_closing - $K_io_check;
+ next if ( $Kdiff > 4 );
+
+ # backup comma count test; but we cannot get here with Kdiff<=4
+ my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
+ next if ( $rtc && $rtc->{','} );
}
- # DO-NOT-WELD RULE 4; implemented for git#10:
- # Do not weld an opening -ce brace if the next container is on a single
- # line, different from the opening brace. (This is very rare). For
- # example, given the following with -ce, we will avoid joining the {
- # and [
+ # Yes .. this is a possible nesting pair.
+ # They can be separated by a small amount.
+ my $K_diff = $K_inner_opening - $K_outer_opening;
- # } else {
- # [ $_, length($_) ]
- # }
+ # Count the number of nonblank characters separating them.
+ # Note: the $nonblank_count includes the inner opening container
+ # but not the outer opening container, so it will be >= 1.
+ if ( $K_diff < 0 ) { next } # Shouldn't happen
+ my $nonblank_count = 0;
+ my $type;
+ my $is_name;
- # because this would produce a terminal one-line block:
+ # Here is an example of a long identifier chain which counts as a
+ # single nonblank here (this spans about 10 K indexes):
+ # if ( !Boucherot::SetOfConnections->new->handler->execute(
+ # ^--K_o_o ^--K_i_o
+ # @array) )
+ my $Kn_first = $K_outer_opening;
+ my $Kn_last_nonblank;
+ my $saw_comment;
- # } else { [ $_, length($_) ] }
+ foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
+ next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ if ( !$nonblank_count ) { $Kn_first = $Kn }
+ if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
+ $Kn_last_nonblank = $Kn;
- # which may not be what is desired. But given this input:
+ # skip chain of identifier tokens
+ my $last_type = $type;
+ my $last_is_name = $is_name;
+ $type = $rLL->[$Kn]->[_TYPE_];
+ if ( $type eq '#' ) { $saw_comment = 1; last }
+ $is_name = $is_name_type->{$type};
+ next if ( $is_name && $last_is_name );
- # } else { [ $_, length($_) ] }
+ # do not count a possible leading - of bareword hash key
+ next if ( $type eq 'm' && !$last_type );
- # then we will do the weld and retain the one-line block
- if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
- my $block_type = $rblock_type_of_seqno->{$outer_seqno};
- if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
- my $io_line = $inner_opening->[_LINE_INDEX_];
- my $ic_line = $inner_closing->[_LINE_INDEX_];
- my $oo_line = $outer_opening->[_LINE_INDEX_];
- if ( $oo_line < $io_line && $ic_line == $io_line ) {
- $do_not_weld_rule = 4;
- }
- }
+ $nonblank_count++;
+ last if ( $nonblank_count > 2 );
}
- # DO-NOT-WELD RULE 5: do not include welds excluded by user
- if (
- !$do_not_weld_rule
- && %weld_nested_exclusion_rules
- && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
- || $self->is_excluded_weld( $Kinner_opening, 0 ) )
- )
+ # Do not weld across a comment .. fix for c058.
+ next if ($saw_comment);
+
+ # Patch for b1104: do not weld to a paren preceded by sort/map/grep
+ # because the special line break rules may cause a blinking state
+ if ( defined($Kn_last_nonblank)
+ && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
+ && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
{
- $do_not_weld_rule = 5;
+ my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
+
+ # Turn off welding at sort/map/grep (
+ if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
}
- # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
+ my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
- # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
- # (case b973)
- if ( !$do_not_weld_rule
- && $rOpts_break_at_old_method_breakpoints
- && $iline_io > $iline_oo )
- {
+ if (
- foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
- my $rK_range = $rlines->[$iline]->{_rK_range};
- next unless defined($rK_range);
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless defined($Kfirst);
- if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
- $do_not_weld_rule = 7;
- last;
- }
- }
- }
+ # 1: adjacent opening containers, like: do {{
+ $nonblank_count == 1
- if ($do_not_weld_rule) {
+ # 2. anonymous sub + prototype or sig: )->then( sub ($code) {
+ # ... but it seems best not to stack two structural blocks, like
+ # this
+ # sub make_anon_with_my_sub { sub {
+ # because it probably hides the structure a little too much.
+ || ( $inner_blocktype
+ && $inner_blocktype eq 'sub'
+ && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
+ && !$outer_blocktype )
- # After neglecting a pair, we start measuring from start of point
- # io ... but not if previous type does not like to be separated
- # from its container (fixes case b1184)
- my $Kprev = $self->K_previous_nonblank($Kinner_opening);
- my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
- if ( !$has_tight_paren{$type_prev} ) {
- my $starting_level = $inner_opening->[_LEVEL_];
- my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
- $starting_lentot =
- $self->cumulative_length_before_K($Kinner_opening);
- $maximum_text_length =
- $maximum_text_length_at_level[$starting_level] -
- $starting_ci_level * $rOpts_continuation_indentation;
- }
-
- if (DEBUG_WELD) {
- $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
- print $Msg;
- }
+ # 3. short item following opening paren, like: fun( yyy (
+ || $nonblank_count == 2 && $token_oo eq '('
- # Normally, a broken pair should not decrease indentation of
- # intermediate tokens:
- ## if ( $last_pair_broken ) { next }
- # However, for long strings of welded tokens, such as '{{{{{{...'
- # we will allow broken pairs to also remove indentation.
- # This will keep very long strings of opening and closing
- # braces from marching off to the right. We will do this if the
- # number of tokens in a weld before the broken weld is 4 or more.
- # This rule will mainly be needed for test scripts, since typical
- # welds have fewer than about 4 welded tokens.
- if ( !@welds || @{ $welds[-1] } < 4 ) { next }
+ # 4. weld around fat commas, if requested (git #108), such as
+ # elf->call_method( method_name_foo => {
+ || ( $type eq '=>'
+ && $nonblank_count <= 3
+ && %weld_fat_comma_rules
+ && $weld_fat_comma_rules{$token_oo} )
+ )
+ {
+ push @nested_pairs,
+ [ $inner_seqno, $outer_seqno, $K_inner_closing ];
}
+ next;
+ }
- # otherwise start new weld ...
- elsif ($starting_new_weld) {
- $weld_count_this_start++;
- if (DEBUG_WELD) {
- $Msg .= "Starting new weld\n";
- print $Msg;
- }
- push @welds, $item;
+ # The weld routine expects the pairs in order in the form
+ # [$seqno_inner, $seqno_outer]
+ # And they must be in the same order as the inner closing tokens
+ # (otherwise, welds of three or more adjacent tokens will not work). The K
+ # value of this inner closing token has temporarily been stored for
+ # sorting.
+ @nested_pairs =
- $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
- $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+ # Drop the K index after sorting (it would cause trouble downstream)
+ map { [ $_->[0], $_->[1] ] }
- $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
- $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
- }
+ # Sort on the K values
+ sort { $a->[2] <=> $b->[2] } @nested_pairs;
- # ... or extend current weld
- else {
- $weld_count_this_start++;
- if (DEBUG_WELD) {
- $Msg .= "Extending current weld\n";
- print $Msg;
- }
- unshift @{ $welds[-1] }, $inner_seqno;
- $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
- $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+ return \@nested_pairs;
+} ## end sub find_nested_pairs
- $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
- $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
- }
+sub match_paren_control_flag {
- # After welding, reduce the indentation level if all intermediate tokens
- my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
- if ( $dlevel != 0 ) {
- my $Kstart = $Kinner_opening;
- my $Kstop = $Kinner_closing;
- foreach my $KK ( $Kstart .. $Kstop ) {
- $rLL->[$KK]->[_LEVEL_] += $dlevel;
- }
+ # Decide if this paren is excluded by user request:
+ # undef matches no parens
+ # '*' matches all parens
+ # 'k' matches only if the previous nonblank token is a perl builtin
+ # keyword (such as 'if', 'while'),
+ # 'K' matches if 'k' does not, meaning if the previous token is not a
+ # keyword.
+ # 'f' matches if the previous token is a function other than a keyword.
+ # 'F' matches if 'f' does not.
+ # 'w' matches if either 'k' or 'f' match.
+ # 'W' matches if 'w' does not.
+ my ( $self, $seqno, $flag, $rLL ) = @_;
- # Copy opening ci level to help break at = for -lp mode (case b1124)
- $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
- $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+ # Input parameters:
+ # $seqno = sequence number of the container (should be paren)
+ # $flag = the flag which defines what matches
+ # $rLL = an optional alternate token list needed for respace operations
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
- # But do not copy the closing ci level ... it can give poor results
- ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
- ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
- }
- }
+ return 0 unless ( defined($flag) );
+ return 0 if $flag eq '0';
+ return 1 if $flag eq '1';
+ return 1 if $flag eq '*';
+ return 0 unless ($seqno);
+ my $K_opening = $self->[_K_opening_container_]->{$seqno};
+ return unless ( defined($K_opening) );
- return;
-} ## end sub weld_nested_containers
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
-sub weld_nested_quotes {
+ # keyword?
+ $is_k = $type_p eq 'k';
- # Called once per file for option '--weld-nested-containers'. This
- # does welding on qw quotes.
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
- my $self = shift;
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
+ }
+ my $match;
+ if ( $flag eq 'k' ) { $match = $is_k }
+ elsif ( $flag eq 'K' ) { $match = !$is_k }
+ elsif ( $flag eq 'f' ) { $match = $is_f }
+ elsif ( $flag eq 'F' ) { $match = !$is_f }
+ elsif ( $flag eq 'w' ) { $match = $is_w }
+ elsif ( $flag eq 'W' ) { $match = !$is_w }
+ return $match;
+} ## end sub match_paren_control_flag
- # See if quotes are excluded from welding
- my $rflags = $weld_nested_exclusion_rules{'q'};
- return if ( defined($rflags) && defined( $rflags->[1] ) );
+sub is_excluded_weld {
- my $rK_weld_left = $self->[_rK_weld_left_];
- my $rK_weld_right = $self->[_rK_weld_right_];
+ # decide if this weld is excluded by user request
+ my ( $self, $KK, $is_leading ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $weld_nested_exclusion_rules{$token};
+ return 0 unless ( defined($rflags) );
+ my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
+ return 0 unless ( defined($flag) );
+ return 1 if $flag eq '*';
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ return $self->match_paren_control_flag( $seqno, $flag );
+} ## end sub is_excluded_weld
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $Num = @{$rLL};
+# hashes to simplify welding logic
+my %type_ok_after_bareword;
+my %has_tight_paren;
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rlines = $self->[_rlines_];
+BEGIN {
- my $starting_lentot;
- my $maximum_text_length;
+ # types needed for welding RULE 6
+ my @q = qw# => -> { ( [ #;
+ @type_ok_after_bareword{@q} = (1) x scalar(@q);
- my $is_single_quote = sub {
- my ( $Kbeg, $Kend, $quote_type ) = @_;
- foreach my $K ( $Kbeg .. $Kend ) {
- my $test_type = $rLL->[$K]->[_TYPE_];
- next if ( $test_type eq 'b' );
- return if ( $test_type ne $quote_type );
- }
- return 1;
- };
+ # these types do not 'like' to be separated from a following paren
+ @q = qw(w i q Q G C Z U);
+ @{has_tight_paren}{@q} = (1) x scalar(@q);
+} ## end BEGIN
- # Length tolerance - same as previously used for sub weld_nested
- my $multiline_tol =
- 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
+use constant DEBUG_WELD => 0;
- # look for single qw quotes nested in containers
- my $KNEXT = $self->[_K_first_seq_item_];
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$outer_seqno ) {
- next if ( $KK == 0 ); # first token in file may not be container
+sub setup_new_weld_measurements {
- # A fault here implies that an error was made in the little loop at
- # the bottom of sub 'respace_tokens' which set the values of
- # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
- # loop control lines above.
- Fault("sequence = $outer_seqno not defined at K=$KK")
- if (DEVEL_MODE);
- next;
- }
+ # Define quantities to check for excess line lengths when welded.
+ # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
- my $token = $rtoken_vars->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
- # see if the next token is a quote of some type
- my $Kn = $KK + 1;
- $Kn += 1
- if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
- next unless ( $Kn < $Num );
+ # Given indexes of outer and inner opening containers to be welded:
+ # $Kouter_opening, $Kinner_opening
- my $next_token = $rLL->[$Kn]->[_TOKEN_];
- my $next_type = $rLL->[$Kn]->[_TYPE_];
- next
- unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
- && $next_token =~ /^q/ );
+ # Returns these variables:
+ # $new_weld_ok = true (new weld ok) or false (do not start new weld)
+ # $starting_indent = starting indentation
+ # $starting_lentot = starting cumulative length
+ # $msg = diagnostic message for debugging
- # The token before the closing container must also be a quote
- my $Kouter_closing = $K_closing_container->{$outer_seqno};
- my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
- next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
- # This is an inner opening container
- my $Kinner_opening = $Kn;
+ my $starting_level;
+ my $starting_ci;
+ my $starting_lentot;
+ my $maximum_text_length;
+ my $msg = EMPTY_STRING;
- # Do not weld to single-line quotes. Nothing is gained, and it may
- # look bad.
- next if ( $Kinner_closing == $Kinner_opening );
+ my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
- # Only weld to quotes delimited with container tokens. This is
- # because welding to arbitrary quote delimiters can produce code
- # which is less readable than without welding.
- my $closing_delimiter =
- substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
- next
- unless ( $is_closing_token{$closing_delimiter}
- || $closing_delimiter eq '>' );
+ #-------------------------------------------------------------------------
+ # We now define a reference index, '$Kref', from which to start measuring
+ # This choice turns out to be critical for keeping welds stable during
+ # iterations, so we go through a number of STEPS...
+ #-------------------------------------------------------------------------
- # Now make sure that there is just a single quote in the container
- next
- unless (
- $is_single_quote->(
+ # STEP 1: Our starting guess is to use measure from the first token of the
+ # current line. This is usually a good guess.
+ my $Kref = $Kfirst;
+
+ # STEP 2: See if we should go back a little farther
+ my $Kprev = $self->K_previous_nonblank($Kfirst);
+ if ( defined($Kprev) ) {
+
+ # Avoid measuring from between an opening paren and a previous token
+ # which should stay close to it ... fixes b1185
+ my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ if ( $Kouter_opening == $Kfirst
+ && $token_oo eq '('
+ && $has_tight_paren{$type_prev} )
+ {
+ $Kref = $Kprev;
+ }
+
+ # Back up and count length from a token like '=' or '=>' if -lp
+ # is used (this fixes b520)
+ # ...or if a break is wanted before there
+ elsif ($rOpts_line_up_parentheses
+ || $want_break_before{$type_prev} )
+ {
+
+ # If there are other sequence items between the start of this line
+ # and the opening token in question, then do not include tokens on
+ # the previous line in length calculations. This check added to
+ # fix case b1174 which had a '?' on the line
+ my $no_previous_seq_item = $Kref == $Kouter_opening
+ || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
+
+ if ( $no_previous_seq_item
+ && substr( $type_prev, 0, 1 ) eq '=' )
+ {
+ $Kref = $Kprev;
+
+ # Fix for b1144 and b1112: backup to the first nonblank
+ # character before the =>, or to the start of its line.
+ if ( $type_prev eq '=>' ) {
+ my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
+ my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
+ my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev};
+ foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
+ next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
+ $Kref = $KK;
+ last;
+ }
+ }
+ }
+ }
+ }
+
+ # STEP 3: Now look ahead for a ternary and, if found, use it.
+ # This fixes case b1182.
+ # Also look for a ')' at the same level and, if found, use it.
+ # This fixes case b1224.
+ if ( $Kref < $Kouter_opening ) {
+ my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
+ my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ while ( $Knext < $Kouter_opening ) {
+ if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
+ if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
+ || $rLL->[$Knext]->[_TOKEN_] eq ')' )
+ {
+ $Kref = $Knext;
+ last;
+ }
+ }
+ $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
+ }
+ }
+
+ # Define the starting measurements we will need
+ $starting_lentot =
+ $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
+ $starting_level = $rLL->[$Kref]->[_LEVEL_];
+ $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
+
+ $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
+ $starting_ci * $rOpts_continuation_indentation;
+
+ # STEP 4: Switch to using the outer opening token as the reference
+ # point if a line break before it would make a longer line.
+ # Fixes case b1055 and is also an alternate fix for b1065.
+ my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ if ( $Kref < $Kouter_opening ) {
+ my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+ my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $maximum_text_length_oo =
+ $maximum_text_length_at_level[$starting_level_oo] -
+ $starting_ci_oo * $rOpts_continuation_indentation;
+
+ # The excess length to any cumulative length K = lenK is either
+ # $excess = $lenk - ($lentot + $maximum_text_length), or
+ # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
+ # so the worst case (maximum excess) corresponds to the configuration
+ # with minimum value of the sum: $lentot + $maximum_text_length
+ if ( $lentot_oo + $maximum_text_length_oo <
+ $starting_lentot + $maximum_text_length )
+ {
+ $Kref = $Kouter_opening;
+ $starting_level = $starting_level_oo;
+ $starting_ci = $starting_ci_oo;
+ $starting_lentot = $lentot_oo;
+ $maximum_text_length = $maximum_text_length_oo;
+ }
+ }
+
+ my $new_weld_ok = 1;
+
+ # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
+ # combination -wn -lp -dws -naws does not work well and can cause blinkers.
+ # It will probably only occur in stress testing. For this situation we
+ # will only start a new weld if we start at a 'good' location.
+ # - Added 'if' to fix case b1032.
+ # - Require blank before certain previous characters to fix b1111.
+ # - Add ';' to fix case b1139
+ # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
+ # - relaxed constraints for b1227
+ # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
+ # - added skip if type is 'Q' for b1447
+ if ( $starting_ci
+ && $rOpts_line_up_parentheses
+ && $rOpts_delete_old_whitespace
+ && !$rOpts_add_whitespace
+ && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
+ && $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q'
+ && defined($Kprev) )
+ {
+ my $type_first = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ my $type_pp = 'b';
+ if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
+ unless (
+ $type_prev =~ /^[\,\.\;]/
+ || $type_prev =~ /^[=\{\[\(\L]/
+ && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
+ || $type_first =~ /^[=\,\.\;\{\[\(\L]/
+ || $type_first eq '||'
+ || (
+ $type_first eq 'k'
+ && ( $token_first eq 'if'
+ || $token_first eq 'or' )
+ )
+ )
+ {
+ $msg =
+"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
+ $new_weld_ok = 0;
+ }
+ }
+ return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
+} ## end sub setup_new_weld_measurements
+
+sub excess_line_length_for_Krange {
+ my ( $self, $Kfirst, $Klast ) = @_;
+
+ # returns $excess_length =
+ # by how many characters a line composed of tokens $Kfirst .. $Klast will
+ # exceed the allowed line length
+
+ my $rLL = $self->[_rLL_];
+ my $length_before_Kfirst =
+ $Kfirst <= 0
+ ? 0
+ : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
+
+ # backup before a side comment if necessary
+ my $Kend = $Klast;
+ if ( $rOpts_ignore_side_comment_lengths
+ && $rLL->[$Klast]->[_TYPE_] eq '#' )
+ {
+ my $Kprev = $self->K_previous_nonblank($Klast);
+ if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
+ }
+
+ # get the length of the text
+ my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
+
+ # get the size of the text window
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
+ my $max_text_length = $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
+
+ my $excess_length = $length - $max_text_length;
+
+ DEBUG_WELD
+ && print
+"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
+ return ($excess_length);
+} ## end sub excess_line_length_for_Krange
+
+sub weld_nested_containers {
+ my ($self) = @_;
+
+ # Called once per file for option '--weld-nested-containers'
+
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+
+ # This routine implements the -wn flag by "welding together"
+ # the nested closing and opening tokens which were previously
+ # identified by sub 'find_nested_pairs'. "welding" simply
+ # involves setting certain hash values which will be checked
+ # later during formatting.
+
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
+
+ my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+
+ # Find nested pairs of container tokens for any welding.
+ my $rnested_pairs = $self->find_nested_pairs();
+
+ # Return unless there are nested pairs to weld
+ return unless defined($rnested_pairs) && @{$rnested_pairs};
+
+ # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
+ # pairs. But it isn't clear if this is possible because we don't know
+ # which sequences might actually start a weld.
+
+ my $rOpts_break_at_old_method_breakpoints =
+ $rOpts->{'break-at-old-method-breakpoints'};
+
+ # This array will hold the sequence numbers of the tokens to be welded.
+ my @welds;
+
+ # Variables needed for estimating line lengths
+ my $maximum_text_length; # maximum spaces available for text
+ my $starting_lentot; # cumulative text to start of current line
+
+ my $iline_outer_opening = -1;
+ my $weld_count_this_start = 0;
+
+ # OLD: $single_line_tol added to fix cases b1180 b1181
+ # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
+ # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
+ my $single_line_tol = 0;
+
+ my $multiline_tol = $single_line_tol + 1 +
+ max( $rOpts_indent_columns, $rOpts_continuation_indentation );
+
+ # Define a welding cutoff level: do not start a weld if the inside
+ # container level equals or exceeds this level.
+
+ # We use the minimum of two criteria, either of which may be more
+ # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
+ # the 'beta' value is more restrictive in other cases (b1243).
+ # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
+ # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
+ # This is now '$high_stress_level'.
+
+ # The vertical tightness flags can throw off line length calculations.
+ # This patch was added to fix instability issue b1284.
+ # It works to always use a tol of 1 for 1 line block length tests, but
+ # this restricted value keeps test case wn6.wn working as before.
+ # It may be necessary to include '[' and '{' here in the future.
+ my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
+
+ # Abbreviations:
+ # _oo=outer opening, i.e. first of { {
+ # _io=inner opening, i.e. second of { {
+ # _oc=outer closing, i.e. second of } {
+ # _ic=inner closing, i.e. first of } }
+
+ my $previous_pair;
+
+ # Main loop over nested pairs...
+ # We are working from outermost to innermost pairs so that
+ # level changes will be complete when we arrive at the inner pairs.
+ while ( my $item = pop( @{$rnested_pairs} ) ) {
+ my ( $inner_seqno, $outer_seqno ) = @{$item};
+
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $Kinner_opening = $K_opening_container->{$inner_seqno};
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $K_closing_container->{$inner_seqno};
+
+ # RULE: do not weld if inner container has <= 3 tokens unless the next
+ # token is a heredoc (so we know there will be multiple lines)
+ if ( $Kinner_closing - $Kinner_opening <= 4 ) {
+ my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
+ next unless defined($Knext_nonblank);
+ my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
+ next unless ( $type eq 'h' );
+ }
+
+ my $outer_opening = $rLL->[$Kouter_opening];
+ my $inner_opening = $rLL->[$Kinner_opening];
+ my $outer_closing = $rLL->[$Kouter_closing];
+ my $inner_closing = $rLL->[$Kinner_closing];
+
+ # RULE: do not weld to a hash brace. The reason is that it has a very
+ # strong bond strength to the next token, so a line break after it
+ # may not work. Previously we allowed welding to something like @{
+ # but that caused blinking states (cases b751, b779).
+ if ( $inner_opening->[_TYPE_] eq 'L' ) {
+ next;
+ }
+
+ # RULE: do not weld to a square bracket which does not contain commas
+ if ( $inner_opening->[_TYPE_] eq '[' ) {
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
+ next unless ( $rtype_count && $rtype_count->{','} );
+
+ # Do not weld if there is text before a '[' such as here:
+ # curr_opt ( @beg [2,5] )
+ # It will not break into the desired sandwich structure.
+ # This fixes case b109, 110.
+ my $Kdiff = $Kinner_opening - $Kouter_opening;
+ next if ( $Kdiff > 2 );
+ next
+ if ( $Kdiff == 2
+ && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
+
+ }
+
+ # RULE: Avoid welding under stress. The idea is that we need to have a
+ # little space* within a welded container to avoid instability. Note
+ # that after each weld the level values are reduced, so long multiple
+ # welds can still be made. This rule will seldom be a limiting factor
+ # in actual working code. Fixes b1206, b1243.
+ my $inner_level = $inner_opening->[_LEVEL_];
+ if ( $inner_level >= $high_stress_level ) { next }
+
+ # Set flag saying if this pair starts a new weld
+ my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
+
+ # Set flag saying if this pair is adjacent to the previous nesting pair
+ # (even if previous pair was rejected as a weld)
+ my $touch_previous_pair =
+ defined($previous_pair) && $outer_seqno == $previous_pair->[0];
+ $previous_pair = $item;
+
+ my $do_not_weld_rule = 0;
+ my $Msg = EMPTY_STRING;
+ my $is_one_line_weld;
+
+ my $iline_oo = $outer_opening->[_LINE_INDEX_];
+ my $iline_io = $inner_opening->[_LINE_INDEX_];
+ my $iline_ic = $inner_closing->[_LINE_INDEX_];
+ my $iline_oc = $outer_closing->[_LINE_INDEX_];
+ my $token_oo = $outer_opening->[_TOKEN_];
+ my $token_io = $inner_opening->[_TOKEN_];
+
+ # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
+ # Added for case b973. Moved here from below to fix b1423.
+ if ( !$do_not_weld_rule
+ && $rOpts_break_at_old_method_breakpoints
+ && $iline_io > $iline_oo )
+ {
+
+ foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ next unless defined($rK_range);
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+ if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
+ $do_not_weld_rule = 7;
+ last;
+ }
+ }
+ }
+ next if ($do_not_weld_rule);
+
+ # Turn off vertical tightness at possible one-line welds. Fixes b1402,
+ # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
+ # b1340, b1341, b1342, b1343, which previously used a separate fix.
+ # Issue c161 is the latest and simplest check, using
+ # $iline_ic==$iline_io as the test.
+ if ( %opening_vertical_tightness
+ && $iline_ic == $iline_io
+ && $opening_vertical_tightness{$token_oo} )
+ {
+ $rmax_vertical_tightness->{$outer_seqno} = 0;
+ }
+
+ my $is_multiline_weld =
+ $iline_oo == $iline_io
+ && $iline_ic == $iline_oc
+ && $iline_io != $iline_ic;
+
+ if (DEBUG_WELD) {
+ my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
+ my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
+ $Msg .= <<EOM;
+Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
+Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
+tokens '$token_oo' .. '$token_io'
+EOM
+ }
+
+ # DO-NOT-WELD RULE 0:
+ # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
+ # by one line). This can produce instabilities (fixes b1250 b1251
+ # 1256).
+ if ( !$is_multiline_weld
+ && $iline_ic == $iline_io + 1
+ && $token_oo eq '('
+ && $token_io eq '(' )
+ {
+ if (DEBUG_WELD) {
+ $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
+ print $Msg;
+ }
+ next;
+ }
+
+ # If this pair is not adjacent to the previous pair (skipped or not),
+ # then measure lengths from the start of line of oo.
+ if (
+ !$touch_previous_pair
+
+ # Also do this if restarting at a new line; fixes case b965, s001
+ || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
+ )
+ {
+
+ # Remember the line we are using as a reference
+ $iline_outer_opening = $iline_oo;
+ $weld_count_this_start = 0;
+
+ ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
+ = $self->setup_new_weld_measurements( $Kouter_opening,
+ $Kinner_opening );
+
+ if (
+ !$new_weld_ok
+ && ( $iline_oo != $iline_io
+ || $iline_ic != $iline_oc )
+ )
+ {
+ if (DEBUG_WELD) { print $msg}
+ next;
+ }
+
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+
+ # An existing one-line weld is a line in which
+ # (1) the containers are all on one line, and
+ # (2) the line does not exceed the allowable length
+ if ( $iline_oo == $iline_oc ) {
+
+ # All the tokens are on one line, now check their length.
+ # Start with the full line index range. We will reduce this
+ # in the coding below in some cases.
+ my $Kstart = $Kfirst;
+ my $Kstop = $Klast;
+
+ # Note that the following minimal choice for measuring will
+ # work and will not cause any instabilities because it is
+ # invariant:
+
+ ## my $Kstart = $Kouter_opening;
+ ## my $Kstop = $Kouter_closing;
+
+ # But that can lead to some undesirable welds. So a little
+ # more complicated method has been developed.
+
+ # We are trying to avoid creating bad two-line welds when we are
+ # working on long, previously un-welded input text, such as
+
+ # INPUT (example of a long input line weld candidate):
+ ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
+
+ # GOOD two-line break: (not welded; result marked too long):
+ ## $mutation->transpos(
+ ## $self->RNA->position($mutation->label, $atg_label));
+
+ # BAD two-line break: (welded; result if we weld):
+ ## $mutation->transpos($self->RNA->position(
+ ## $mutation->label, $atg_label));
+
+ # We can only get an approximate estimate of the final length,
+ # since the line breaks may change, and for -lp mode because
+ # even the indentation is not yet known.
+
+ my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
+ my $level_last = $rLL->[$Klast]->[_LEVEL_];
+ my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
+
+ # - measure to the end of the original line if balanced
+ # - measure to the closing container if unbalanced (fixes b1230)
+ #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
+ if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
+
+ # - measure from the start of the original line if balanced
+ # - measure from the most previous token with same level
+ # if unbalanced (b1232)
+ if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
+ $Kstart = $Kouter_opening;
+
+ foreach
+ my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
+ {
+ next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
+ last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
+ $Kstart = $KK;
+ }
+ }
+
+ my $excess =
+ $self->excess_line_length_for_Krange( $Kstart, $Kstop );
+
+ # Coding simplified here for case b1219.
+ # Increased tol from 0 to 1 when pvt>0 to fix b1284.
+ $is_one_line_weld = $excess <= $one_line_tol;
+ }
+
+ # DO-NOT-WELD RULE 1:
+ # Do not weld something that looks like the start of a two-line
+ # function call, like this: <<snippets/wn6.in>>
+ # $trans->add_transformation(
+ # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
+ # We will look for a semicolon after the closing paren.
+
+ # We want to weld something complex, like this though
+ # my $compass = uc( opposite_direction( line_to_canvas_direction(
+ # @{ $coords[0] }, @{ $coords[1] } ) ) );
+ # Otherwise we will get a 'blinker'. For example, the following
+ # would become a blinker without this rule:
+ # $Self->_Add( $SortOrderDisplay{ $Field
+ # ->GenerateFieldForSelectSQL() } );
+ # But it is okay to weld a two-line statement if it looks like
+ # it was already welded, meaning that the two opening containers are
+ # on a different line that the two closing containers. This is
+ # necessary to prevent blinking of something like this with
+ # perltidy -wn -pbp (starting indentation two levels deep):
+
+ # $top_label->set_text( gettext(
+ # "Unable to create personal directory - check permissions.") );
+ if ( $iline_oc == $iline_oo + 1
+ && $iline_io == $iline_ic
+ && $token_oo eq '(' )
+ {
+
+ # Look for following semicolon...
+ my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
+ my $next_nonblank_type =
+ defined($Knext_nonblank)
+ ? $rLL->[$Knext_nonblank]->[_TYPE_]
+ : 'b';
+ if ( $next_nonblank_type eq ';' ) {
+
+ # Then do not weld if no other containers between inner
+ # opening and closing.
+ my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
+ if ( $Knext_seq_item == $Kinner_closing ) {
+ $do_not_weld_rule = 1;
+ }
+ }
+ }
+ } ## end starting new weld sequence
+
+ else {
+
+ # set the 1-line flag if continuing a weld sequence; fixes b1239
+ $is_one_line_weld = ( $iline_oo == $iline_oc );
+ }
+
+ # DO-NOT-WELD RULE 2:
+ # Do not weld an opening paren to an inner one line brace block
+ # We will just use old line numbers for this test and require
+ # iterations if necessary for convergence
+
+ # For example, otherwise we could cause the opening paren
+ # in the following example to separate from the caller name
+ # as here:
+
+ # $_[0]->code_handler
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
+
+ # Here is another example where we do not want to weld:
+ # $wrapped->add_around_modifier(
+ # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+
+ # If the one line sub block gets broken due to length or by the
+ # user, then we can weld. The result will then be:
+ # $wrapped->add_around_modifier( sub {
+ # push @tracelog => 'around 1';
+ # $_[0]->();
+ # } );
+
+ # Updated to fix cases b1082 b1102 b1106 b1115:
+ # Also, do not weld to an intact inner block if the outer opening token
+ # is on a different line. For example, this prevents oscillation
+ # between these two states in case b1106:
+
+ # return map{
+ # ($_,[$self->$_(@_[1..$#_])])
+ # }@every;
+
+ # return map { (
+ # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
+ # ) } @every;
+
+ # The effect of this change on typical code is very minimal. Sometimes
+ # it may take a second iteration to converge, but this gives protection
+ # against blinking.
+ if ( !$do_not_weld_rule
+ && !$is_one_line_weld
+ && $iline_ic == $iline_io )
+ {
+ $do_not_weld_rule = 2
+ if ( $token_oo eq '(' || $iline_oo != $iline_io );
+ }
+
+ # DO-NOT-WELD RULE 2A:
+ # Do not weld an opening asub brace in -lp mode if -asbl is set. This
+ # helps avoid instabilities in one-line block formation, and fixes
+ # b1241. Previously, the '$is_one_line_weld' flag was tested here
+ # instead of -asbl, and this fixed most cases. But it turns out that
+ # the real problem was the -asbl flag, and switching to this was
+ # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
+ if ( !$do_not_weld_rule
+ && $rOpts_line_up_parentheses
+ && $rOpts_asbl
+ && $ris_asub_block->{$outer_seqno} )
+ {
+ $do_not_weld_rule = '2A';
+ }
+
+ # DO-NOT-WELD RULE 3:
+ # Do not weld if this makes our line too long.
+ # Use a tolerance which depends on if the old tokens were welded
+ # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
+ if ( !$do_not_weld_rule ) {
+
+ # Measure to a little beyond the inner opening token if it is
+ # followed by a bare word, which may have unusual line break rules.
+
+ # NOTE: Originally this was OLD RULE 6: do not weld to a container
+ # which is followed on the same line by an unknown bareword token.
+ # This can cause blinkers (cases b626, b611). But OK to weld one
+ # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
+ # has been merged into RULE 3 here to also fix cases b1078 b1091.
+
+ my $K_for_length = $Kinner_opening;
+ my $Knext_io = $self->K_next_nonblank($Kinner_opening);
+ next unless ( defined($Knext_io) ); # shouldn't happen
+ my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
+
+ # Note: may need to eventually also include other types here,
+ # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
+ if ( $type_io_next eq 'w' ) {
+ my $Knext_io2 = $self->K_next_nonblank($Knext_io);
+ next unless ( defined($Knext_io2) );
+ my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
+ if ( !$type_ok_after_bareword{$type_io_next2} ) {
+ $K_for_length = $Knext_io2;
+ }
+ }
+
+ # Use a tolerance for welds over multiple lines to avoid blinkers.
+ # We can use zero tolerance if it looks like we are working on an
+ # existing weld.
+ my $tol =
+ $is_one_line_weld || $is_multiline_weld
+ ? $single_line_tol
+ : $multiline_tol;
+
+ # By how many characters does this exceed the text window?
+ my $excess =
+ $self->cumulative_length_before_K($K_for_length) -
+ $starting_lentot + 1 + $tol -
+ $maximum_text_length;
+
+ # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
+ # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
+ # Revised patch: New tolerance definition allows going back to '> 0'
+ # here. This fixes case b1124. See also cases b1087 and b1087a.
+ if ( $excess > 0 ) { $do_not_weld_rule = 3 }
+
+ if (DEBUG_WELD) {
+ $Msg .=
+"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
+ }
+ }
+
+ # DO-NOT-WELD RULE 4; implemented for git#10:
+ # Do not weld an opening -ce brace if the next container is on a single
+ # line, different from the opening brace. (This is very rare). For
+ # example, given the following with -ce, we will avoid joining the {
+ # and [
+
+ # } else {
+ # [ $_, length($_) ]
+ # }
+
+ # because this would produce a terminal one-line block:
+
+ # } else { [ $_, length($_) ] }
+
+ # which may not be what is desired. But given this input:
+
+ # } else { [ $_, length($_) ] }
+
+ # then we will do the weld and retain the one-line block
+ if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
+ my $block_type = $rblock_type_of_seqno->{$outer_seqno};
+ if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
+ my $io_line = $inner_opening->[_LINE_INDEX_];
+ my $ic_line = $inner_closing->[_LINE_INDEX_];
+ my $oo_line = $outer_opening->[_LINE_INDEX_];
+ if ( $oo_line < $io_line && $ic_line == $io_line ) {
+ $do_not_weld_rule = 4;
+ }
+ }
+ }
+
+ # DO-NOT-WELD RULE 5: do not include welds excluded by user
+ if (
+ !$do_not_weld_rule
+ && %weld_nested_exclusion_rules
+ && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
+ || $self->is_excluded_weld( $Kinner_opening, 0 ) )
+ )
+ {
+ $do_not_weld_rule = 5;
+ }
+
+ # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
+
+ if ($do_not_weld_rule) {
+
+ # After neglecting a pair, we start measuring from start of point
+ # io ... but not if previous type does not like to be separated
+ # from its container (fixes case b1184)
+ my $Kprev = $self->K_previous_nonblank($Kinner_opening);
+ my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
+ if ( !$has_tight_paren{$type_prev} ) {
+ my $starting_level = $inner_opening->[_LEVEL_];
+ my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
+ $starting_lentot =
+ $self->cumulative_length_before_K($Kinner_opening);
+ $maximum_text_length =
+ $maximum_text_length_at_level[$starting_level] -
+ $starting_ci_level * $rOpts_continuation_indentation;
+ }
+
+ if (DEBUG_WELD) {
+ $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
+ print $Msg;
+ }
+
+ # Normally, a broken pair should not decrease indentation of
+ # intermediate tokens:
+ ## if ( $last_pair_broken ) { next }
+ # However, for long strings of welded tokens, such as '{{{{{{...'
+ # we will allow broken pairs to also remove indentation.
+ # This will keep very long strings of opening and closing
+ # braces from marching off to the right. We will do this if the
+ # number of tokens in a weld before the broken weld is 4 or more.
+ # This rule will mainly be needed for test scripts, since typical
+ # welds have fewer than about 4 welded tokens.
+ if ( !@welds || @{ $welds[-1] } < 4 ) { next }
+ }
+
+ # otherwise start new weld ...
+ elsif ($starting_new_weld) {
+ $weld_count_this_start++;
+ if (DEBUG_WELD) {
+ $Msg .= "Starting new weld\n";
+ print $Msg;
+ }
+ push @welds, $item;
+
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+ }
+
+ # ... or extend current weld
+ else {
+ $weld_count_this_start++;
+ if (DEBUG_WELD) {
+ $Msg .= "Extending current weld\n";
+ print $Msg;
+ }
+ unshift @{ $welds[-1] }, $inner_seqno;
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+
+ # Keep a broken container broken at multiple welds. This might
+ # also be useful for simple welds, but for now it is restricted
+ # to multiple welds to minimize changes to existing coding. This
+ # fixes b1429, b1430. Updated for issue c198: but allow a
+ # line differences of 1 (simple shear) so that a simple shear
+ # can remain or become a single line.
+ if ( $iline_ic - $iline_io > 1 ) {
+
+ # Only set this break if it is the last possible weld in this
+ # chain. This will keep some extreme test cases unchanged.
+ my $is_chain_end = !@{$rnested_pairs}
+ || $rnested_pairs->[-1]->[1] != $inner_seqno;
+ if ($is_chain_end) {
+ $self->[_rbreak_container_]->{$inner_seqno} = 1;
+ }
+ }
+ }
+
+ # After welding, reduce the indentation level if all intermediate tokens
+ my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
+ if ( $dlevel != 0 ) {
+ my $Kstart = $Kinner_opening;
+ my $Kstop = $Kinner_closing;
+ foreach my $KK ( $Kstart .. $Kstop ) {
+ $rLL->[$KK]->[_LEVEL_] += $dlevel;
+ }
+
+ # Copy opening ci level to help break at = for -lp mode (case b1124)
+ $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
+ $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+
+ # But do not copy the closing ci level ... it can give poor results
+ ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
+ ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
+ }
+ }
+
+ return;
+} ## end sub weld_nested_containers
+
+sub weld_nested_quotes {
+
+ # Called once per file for option '--weld-nested-containers'. This
+ # does welding on qw quotes.
+
+ my $self = shift;
+
+ # See if quotes are excluded from welding
+ my $rflags = $weld_nested_exclusion_rules{'q'};
+ return if ( defined($rflags) && defined( $rflags->[1] ) );
+
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
+
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rlines = $self->[_rlines_];
+
+ my $starting_lentot;
+ my $maximum_text_length;
+
+ my $is_single_quote = sub {
+ my ( $Kbeg, $Kend, $quote_type ) = @_;
+ foreach my $K ( $Kbeg .. $Kend ) {
+ my $test_type = $rLL->[$K]->[_TYPE_];
+ next if ( $test_type eq 'b' );
+ return if ( $test_type ne $quote_type );
+ }
+ return 1;
+ };
+
+ # Length tolerance - same as previously used for sub weld_nested
+ my $multiline_tol =
+ 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
+
+ # look for single qw quotes nested in containers
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$outer_seqno ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $outer_seqno not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
+ }
+
+ my $token = $rtoken_vars->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
+
+ # see if the next token is a quote of some type
+ my $Kn = $KK + 1;
+ $Kn += 1
+ if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ next unless ( $Kn < $Num );
+
+ my $next_token = $rLL->[$Kn]->[_TOKEN_];
+ my $next_type = $rLL->[$Kn]->[_TYPE_];
+ next
+ unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
+ && substr( $next_token, 0, 1 ) eq 'q' );
+
+ # The token before the closing container must also be a quote
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
+ next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
+
+ # This is an inner opening container
+ my $Kinner_opening = $Kn;
+
+ # Do not weld to single-line quotes. Nothing is gained, and it may
+ # look bad.
+ next if ( $Kinner_closing == $Kinner_opening );
+
+ # Only weld to quotes delimited with container tokens. This is
+ # because welding to arbitrary quote delimiters can produce code
+ # which is less readable than without welding.
+ my $closing_delimiter =
+ substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
+ next
+ unless ( $is_closing_token{$closing_delimiter}
+ || $closing_delimiter eq '>' );
+
+ # Now make sure that there is just a single quote in the container
+ next
+ unless (
+ $is_single_quote->(
$Kinner_opening + 1,
$Kinner_closing - 1,
$next_type
)
);
- # OK: This is a candidate for welding
- my $Msg = EMPTY_STRING;
- my $do_not_weld;
+ # OK: This is a candidate for welding
+ my $Msg = EMPTY_STRING;
+ my $do_not_weld;
+
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
+ my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
+ my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
+ my $is_old_weld =
+ ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
+
+ # Fix for case b1189. If quote is marked as type 'Q' then only weld
+ # if the two closing tokens are on the same input line. Otherwise,
+ # the closing line will be output earlier in the pipeline than
+ # other CODE lines and welding will not actually occur. This will
+ # leave a half-welded structure with potential formatting
+ # instability. This might be fixed by adding a check for a weld on
+ # a closing Q token and sending it down the normal channel, but it
+ # would complicate the code and is potentially risky.
+ next
+ if (!$is_old_weld
+ && $next_type eq 'Q'
+ && $iline_ic != $iline_oc );
+
+ # If welded, the line must not exceed allowed line length
+ ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
+ = $self->setup_new_weld_measurements( $Kouter_opening,
+ $Kinner_opening );
+ if ( !$ok_to_weld ) {
+ if (DEBUG_WELD) { print $msg}
+ next;
+ }
+
+ my $length =
+ $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
+ my $excess = $length + $multiline_tol - $maximum_text_length;
+
+ my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
+ if ( $excess >= $excess_max ) {
+ $do_not_weld = 1;
+ }
+
+ if (DEBUG_WELD) {
+ if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
+ $Msg .=
+"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
+ }
+
+ # Check weld exclusion rules for outer container
+ if ( !$do_not_weld ) {
+ my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
+ if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
+ if (DEBUG_WELD) {
+ $Msg .=
+"No qw weld due to weld exclusion rules for outer container\n";
+ }
+ $do_not_weld = 1;
+ }
+ }
+
+ # Check the length of the last line (fixes case b1039)
+ if ( !$do_not_weld ) {
+ my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
+ my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
+ my $excess_ic =
+ $self->excess_line_length_for_Krange( $Kfirst_ic,
+ $Kouter_closing );
+
+ # Allow extra space for additional welded closing container(s)
+ # and a space and comma or semicolon.
+ # NOTE: weld len has not been computed yet. Use 2 spaces
+ # for now, correct for a single weld. This estimate could
+ # be made more accurate if necessary.
+ my $weld_len =
+ defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
+ if ( $excess_ic + $weld_len + 2 > 0 ) {
+ if (DEBUG_WELD) {
+ $Msg .=
+"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
+ }
+ $do_not_weld = 1;
+ }
+ }
+
+ if ($do_not_weld) {
+ if (DEBUG_WELD) {
+ $Msg .= "Not Welding QW\n";
+ print $Msg;
+ }
+ next;
+ }
+
+ # OK to weld
+ if (DEBUG_WELD) {
+ $Msg .= "Welding QW\n";
+ print $Msg;
+ }
+
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+
+ # Undo one indentation level if an extra level was added to this
+ # multiline quote
+ my $qw_seqno =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
+ if ( $qw_seqno
+ && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
+ {
+ foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
+ $rLL->[$K]->[_LEVEL_] -= 1;
+ }
+ $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
+ $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
+ }
+
+ # undo CI for other welded quotes
+ else {
+
+ foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
+ $rLL->[$K]->[_CI_LEVEL_] = 0;
+ }
+ }
+
+ # Change the level of a closing qw token to be that of the outer
+ # containing token. This will allow -lp indentation to function
+ # correctly in the vertical aligner.
+ # Patch to fix c002: but not if it contains text
+ if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
+ $rLL->[$Kinner_closing]->[_LEVEL_] =
+ $rLL->[$Kouter_closing]->[_LEVEL_];
+ }
+ }
+ }
+ return;
+} ## end sub weld_nested_quotes
+
+sub is_welded_at_seqno {
+
+ my ( $self, $seqno ) = @_;
+
+ # given a sequence number:
+ # return true if it is welded either left or right
+ # return false otherwise
+ return unless ( $total_weld_count && defined($seqno) );
+ my $KK_o = $self->[_K_opening_container_]->{$seqno};
+ return unless defined($KK_o);
+ return defined( $self->[_rK_weld_left_]->{$KK_o} )
+ || defined( $self->[_rK_weld_right_]->{$KK_o} );
+} ## end sub is_welded_at_seqno
+
+sub mark_short_nested_blocks {
+
+ # This routine looks at the entire file and marks any short nested blocks
+ # which should not be broken. The results are stored in the hash
+ # $rshort_nested->{$type_sequence}
+ # which will be true if the container should remain intact.
+ #
+ # For example, consider the following line:
+
+ # sub cxt_two { sort { $a <=> $b } test_if_list() }
+
+ # The 'sort' block is short and nested within an outer sub block.
+ # Normally, the existence of the 'sort' block will force the sub block to
+ # break open, but this is not always desirable. Here we will set a flag for
+ # the sort block to prevent this. To give the user control, we will
+ # follow the input file formatting. If either of the blocks is broken in
+ # the input file then we will allow it to remain broken. Otherwise we will
+ # set a flag to keep it together in later formatting steps.
+
+ # The flag which is set here will be checked in two places:
+ # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
+
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ return unless ( $rOpts->{'one-line-block-nesting'} );
+
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ # Variables needed for estimating line lengths
+ my $maximum_text_length;
+ my $starting_lentot;
+ my $length_tol = 1;
+
+ my $excess_length_to_K = sub {
+ my ($K) = @_;
+
+ # Estimate the length from the line start to a given token
+ my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
+ my $excess_length = $length + $length_tol - $maximum_text_length;
+ return ($excess_length);
+ };
+
+ # loop over all containers
+ my @open_block_stack;
+ my $iline = -1;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
+ }
+
+ # Patch: do not mark short blocks with welds.
+ # In some cases blinkers can form (case b690).
+ if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
+ next;
+ }
+
+ # We are just looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+ next unless ( $rblock_type_of_seqno->{$type_sequence} );
+
+ # Keep a stack of all acceptable block braces seen.
+ # Only consider blocks entirely on one line so dump the stack when line
+ # changes.
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline != $iline_last ) { @open_block_stack = () }
+
+ if ( $token eq '}' ) {
+ if (@open_block_stack) { pop @open_block_stack }
+ }
+ next unless ( $token eq '{' );
+
+ # block must be balanced (bad scripts may be unbalanced)
+ my $K_opening = $K_opening_container->{$type_sequence};
+ my $K_closing = $K_closing_container->{$type_sequence};
+ next unless ( defined($K_opening) && defined($K_closing) );
+
+ # require that this block be entirely on one line
+ next
+ if ( $ris_broken_container->{$type_sequence}
+ || $rbreak_container->{$type_sequence} );
+
+ # See if this block fits on one line of allowed length (which may
+ # be different from the input script)
+ $starting_lentot =
+ $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
+ $maximum_text_length =
+ $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
+
+ # Dump the stack if block is too long and skip this block
+ if ( $excess_length_to_K->($K_closing) > 0 ) {
+ @open_block_stack = ();
+ next;
+ }
+
+ # OK, Block passes tests, remember it
+ push @open_block_stack, $type_sequence;
+
+ # We are only marking nested code blocks,
+ # so check for a previous block on the stack
+ next unless ( @open_block_stack > 1 );
+
+ # Looks OK, mark this as a short nested block
+ $rshort_nested->{$type_sequence} = 1;
+
+ }
+ return;
+} ## end sub mark_short_nested_blocks
+
+sub special_indentation_adjustments {
+
+ my ($self) = @_;
+
+ # Called once per file to do special indentation adjustments.
+ # These routines adjust levels either by changing _CI_LEVEL_ directly or
+ # by setting modified levels in the array $self->[_radjusted_levels_].
+
+ # Initialize the adjusted levels. These will be the levels actually used
+ # for computing indentation.
+
+ # NOTE: This routine is called after the weld routines, which may have
+ # already adjusted _LEVEL_, so we are making adjustments on top of those
+ # levels. It would be much nicer to have the weld routines also use this
+ # adjustment, but that gets complicated when we combine -gnu -wn and have
+ # some welded quotes.
+ my $Klimit = $self->[_Klimit_];
+ my $rLL = $self->[_rLL_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+
+ return unless ( defined($Klimit) );
+
+ foreach my $KK ( 0 .. $Klimit ) {
+ $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
+ }
+
+ # First set adjusted levels for any non-indenting braces.
+ $self->do_non_indenting_braces();
+
+ # Adjust breaks and indentation list containers
+ $self->break_before_list_opening_containers();
+
+ # Set adjusted levels for the whitespace cycle option.
+ $self->whitespace_cycle_adjustment();
+
+ $self->braces_left_setup();
+
+ # Adjust continuation indentation if -bli is set
+ $self->bli_adjustment();
+
+ $self->extended_ci()
+ if ($rOpts_extended_continuation_indentation);
+
+ # Now clip any adjusted levels to be non-negative
+ $self->clip_adjusted_levels();
+
+ return;
+} ## end sub special_indentation_adjustments
+
+sub clip_adjusted_levels {
+
+ # Replace any negative adjusted levels with zero.
+ # Negative levels can occur in files with brace errors.
+ my ($self) = @_;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ return unless defined($radjusted_levels) && @{$radjusted_levels};
+ my $min = min( @{$radjusted_levels} ); # fast check for min
+ if ( $min < 0 ) {
+
+ # slow loop, but rarely needed
+ foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
+ }
+ return;
+} ## end sub clip_adjusted_levels
+
+sub do_non_indenting_braces {
+
+ # Called once per file to handle the --non-indenting-braces parameter.
+ # Remove indentation within marked braces if requested
+ my ($self) = @_;
+
+ # Any non-indenting braces have been found by sub find_non_indenting_braces
+ # and are defined by the following hash:
+ my $rseqno_non_indenting_brace_by_ix =
+ $self->[_rseqno_non_indenting_brace_by_ix_];
+ return unless ( %{$rseqno_non_indenting_brace_by_ix} );
+
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+
+ # First locate all of the marked blocks
+ my @K_stack;
+ foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
+ my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
+ my $KK = $K_opening_container->{$seqno};
+ my $line_of_tokens = $rlines->[$ix];
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ $rspecial_side_comment_type->{$Klast} = 'NIB';
+ push @K_stack, [ $KK, 1 ];
+ my $Kc = $K_closing_container->{$seqno};
+ push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
+ }
+ return unless (@K_stack);
+ @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
+
+ # Then loop to remove indentation within marked blocks
+ my $KK_last = 0;
+ my $ndeep = 0;
+ foreach my $item (@K_stack) {
+ my ( $KK, $inc ) = @{$item};
+ if ( $ndeep > 0 ) {
+
+ foreach ( $KK_last + 1 .. $KK ) {
+ $radjusted_levels->[$_] -= $ndeep;
+ }
+
+ # We just subtracted the old $ndeep value, which only applies to a
+ # '{'. The new $ndeep applies to a '}', so we undo the error.
+ if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
+ }
+
+ $ndeep += $inc;
+ $KK_last = $KK;
+ }
+ return;
+} ## end sub do_non_indenting_braces
+
+sub whitespace_cycle_adjustment {
+
+ my $self = shift;
+
+ # Called once per file to implement the --whitespace-cycle option
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $maximum_level = $self->[_maximum_level_];
+
+ if ( $rOpts_whitespace_cycle
+ && $rOpts_whitespace_cycle > 0
+ && $rOpts_whitespace_cycle < $maximum_level )
+ {
+
+ my $Kmax = @{$rLL} - 1;
+
+ my $whitespace_last_level = -1;
+ my @whitespace_level_stack = ();
+ my $last_nonblank_type = 'b';
+ my $last_nonblank_token = EMPTY_STRING;
+ foreach my $KK ( 0 .. $Kmax ) {
+ my $level_abs = $radjusted_levels->[$KK];
+ my $level = $level_abs;
+ if ( $level_abs < $whitespace_last_level ) {
+ pop(@whitespace_level_stack);
+ }
+ if ( !@whitespace_level_stack ) {
+ push @whitespace_level_stack, $level_abs;
+ }
+ elsif ( $level_abs > $whitespace_last_level ) {
+ $level = $whitespace_level_stack[-1] +
+ ( $level_abs - $whitespace_last_level );
+
+ if (
+ # 1 Try to break at a block brace
+ (
+ $level > $rOpts_whitespace_cycle
+ && $last_nonblank_type eq '{'
+ && $last_nonblank_token eq '{'
+ )
+
+ # 2 Then either a brace or bracket
+ || ( $level > $rOpts_whitespace_cycle + 1
+ && $last_nonblank_token =~ /^[\{\[]$/ )
+
+ # 3 Then a paren too
+ || $level > $rOpts_whitespace_cycle + 2
+ )
+ {
+ $level = 1;
+ }
+ push @whitespace_level_stack, $level;
+ }
+ $level = $whitespace_level_stack[-1];
+ $radjusted_levels->[$KK] = $level;
+
+ $whitespace_last_level = $level_abs;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ }
+ }
+ }
+ return;
+} ## end sub whitespace_cycle_adjustment
+
+use constant DEBUG_BBX => 0;
+
+sub break_before_list_opening_containers {
+
+ my ($self) = @_;
+
+ # This routine is called once per batch to implement parameters
+ # --break-before-hash-brace=n and similar -bbx=n flags
+ # and their associated indentation flags:
+ # --break-before-hash-brace-and-indent and similar -bbxi=n
+
+ # Nothing to do if none of the -bbx=n parameters has been set
+ return unless %break_before_container_types;
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $rhas_list = $self->[_rhas_list_];
+ my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ my $rlines = $self->[_rlines_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
+ my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my $length_tol =
+ max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
+ if ($rOpts_ignore_old_breakpoints) {
+
+ # Patch suggested by b1231; the old tol was excessive.
+ ## $length_tol += $rOpts_maximum_line_length;
+ $length_tol *= 2;
+ }
+
+ my $rbreak_before_container_by_seqno = {};
+ my $rwant_reduced_ci = {};
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+
+ #----------------------------------------------------------------
+ # Part 1: Examine any -bbx=n flags
+ #----------------------------------------------------------------
+
+ next if ( $rblock_type_of_seqno->{$seqno} );
+ my $KK = $K_opening_container->{$seqno};
+
+ # This must be a list or contain a list.
+ # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
+ # Note2: 'has_list' holds the depth to the sub-list. We will require
+ # a depth of just 1
+ my $is_list = $self->is_list_by_seqno($seqno);
+ my $has_list = $rhas_list->{$seqno};
+
+ # Fix for b1173: if welded opening container, use flag of innermost
+ # seqno. Otherwise, the restriction $has_list==1 prevents triple and
+ # higher welds from following the -BBX parameters.
+ if ($total_weld_count) {
+ my $KK_test = $rK_weld_right->{$KK};
+ if ( defined($KK_test) ) {
+ my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
+ $is_list ||= $self->is_list_by_seqno($seqno_inner);
+ $has_list = $rhas_list->{$seqno_inner};
+ }
+ }
+
+ next unless ( $is_list || $has_list && $has_list == 1 );
+
+ my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
+
+ # Only for types of container tokens with a non-default break option
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $break_option = $break_before_container_types{$token};
+ next unless ($break_option);
+
+ # Do not use -bbx under stress for stability ... fixes b1300
+ # TODO: review this; do we also need to look at stress_level_lalpha?
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ if ( $level >= $stress_level_beta ) {
+ DEBUG_BBX
+ && print
+"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
+ next;
+ }
+
+ # Require previous nonblank to be '=' or '=>'
+ my $Kprev = $KK - 1;
+ next if ( $Kprev < 0 );
+ my $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ if ( $prev_type eq 'b' ) {
+ $Kprev--;
+ next if ( $Kprev < 0 );
+ $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ }
+ next unless ( $is_equal_or_fat_comma{$prev_type} );
+
+ my $ci = $rLL->[$KK]->[_CI_LEVEL_];
+
+ #--------------------------------------------
+ # New coding for option 2 (break if complex).
+ #--------------------------------------------
+ # This new coding uses clues which are invariant under formatting to
+ # decide if a list is complex. For now it is only applied when -lp
+ # and -vmll are used, but eventually it may become the standard method.
+ # Fixes b1274, b1275, and others, including b1099.
+ if ( $break_option == 2 ) {
+
+ if ( $rOpts_line_up_parentheses
+ || $rOpts_variable_maximum_line_length )
+ {
+
+ # Start with the basic definition of a complex list...
+ my $is_complex = $is_list && $has_list;
+
+ # and it is also complex if the parent is a list
+ if ( !$is_complex ) {
+ my $parent = $rparent_of_seqno->{$seqno};
+ if ( $self->is_list_by_seqno($parent) ) {
+ $is_complex = 1;
+ }
+ }
+
+ # finally, we will call it complex if there are inner opening
+ # and closing container tokens, not parens, within the outer
+ # container tokens.
+ if ( !$is_complex ) {
+ my $Kp = $self->K_next_nonblank($KK);
+ my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
+ if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
+
+ my $Kc = $K_closing_container->{$seqno};
+ my $Km = $self->K_previous_nonblank($Kc);
+ my $token_m =
+ defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+
+ # ignore any optional ending comma
+ if ( $token_m eq ',' ) {
+ $Km = $self->K_previous_nonblank($Km);
+ $token_m =
+ defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+ }
+
+ $is_complex ||=
+ $is_closing_token{$token_m} && $token_m ne ')';
+ }
+ }
+
+ # Convert to option 3 (always break) if complex
+ next unless ($is_complex);
+ $break_option = 3;
+ }
+ }
+
+ # Fix for b1231: the has_list_with_lec does not cover all cases.
+ # A broken container containing a list and with line-ending commas
+ # will stay broken, so can be treated as if it had a list with lec.
+ $has_list_with_lec ||=
+ $has_list
+ && $ris_broken_container->{$seqno}
+ && $rlec_count_by_seqno->{$seqno};
+
+ DEBUG_BBX
+ && print STDOUT
+"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
+
+ # -bbx=1 = stable, try to follow input
+ if ( $break_option == 1 ) {
+
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless ( $KK == $Kfirst );
+ }
+
+ # -bbx=2 => apply this style only for a 'complex' list
+ elsif ( $break_option == 2 ) {
+
+ # break if this list contains a broken list with line-ending comma
+ my $ok_to_break;
+ my $Msg = EMPTY_STRING;
+ if ($has_list_with_lec) {
+ $ok_to_break = 1;
+ DEBUG_BBX && do { $Msg = "has list with lec;" };
+ }
+
+ if ( !$ok_to_break ) {
+
+ # Turn off -xci if -bbx=2 and this container has a sublist but
+ # not a broken sublist. This avoids creating blinkers. The
+ # problem is that -xci can cause one-line lists to break open,
+ # and thereby creating formatting instability.
+ # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
+ # b1045 b1046 b1047 b1051 b1052 b1061.
+ if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
+
+ my $parent = $rparent_of_seqno->{$seqno};
+ if ( $self->is_list_by_seqno($parent) ) {
+ DEBUG_BBX && do { $Msg = "parent is list" };
+ $ok_to_break = 1;
+ }
+ }
+
+ if ( !$ok_to_break ) {
+ DEBUG_BBX
+ && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
+ next;
+ }
+
+ DEBUG_BBX
+ && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
+
+ # Patch: turn off -xci if -bbx=2 and -lp
+ # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
+ $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
+ }
+
+ # -bbx=3 = always break
+ elsif ( $break_option == 3 ) {
+
+ # ok to break
+ }
+
+ # Shouldn't happen! Bad flag, but make behavior same as 3
+ else {
+ # ok to break
+ }
+
+ # Set a flag for actual implementation later in
+ # sub insert_breaks_before_list_opening_containers
+ $rbreak_before_container_by_seqno->{$seqno} = 1;
+ DEBUG_BBX
+ && print STDOUT "BBX: ok to break at seqno=$seqno\n";
+
+ # -bbxi=0: Nothing more to do if the ci value remains unchanged
+ my $ci_flag = $container_indentation_options{$token};
+ next unless ($ci_flag);
+
+ # -bbxi=1: This option removes ci and is handled in
+ # later sub get_final_indentation
+ if ( $ci_flag == 1 ) {
+ $rwant_reduced_ci->{$seqno} = 1;
+ next;
+ }
+
+ # -bbxi=2: This option changes the level ...
+ # This option can conflict with -xci in some cases. We can turn off
+ # -xci for this container to avoid blinking. For now, only do this if
+ # -vmll is set. ( fixes b1335, b1336 )
+ if ($rOpts_variable_maximum_line_length) {
+ $rno_xci_by_seqno->{$seqno} = 1;
+ }
+
+ #----------------------------------------------------------------
+ # Part 2: Perform tests before committing to changing ci and level
+ #----------------------------------------------------------------
+
+ # Before changing the ci level of the opening container, we need
+ # to be sure that the container will be broken in the later stages of
+ # formatting. We have to do this because we are working early in the
+ # formatting pipeline. A problem can occur if we change the ci or
+ # level of the opening token but do not actually break the container
+ # open as expected. In most cases it wouldn't make any difference if
+ # we changed ci or not, but there are some edge cases where this
+ # can cause blinking states, so we need to try to only change ci if
+ # the container will really be broken.
+
+ # Only consider containers already broken
+ next if ( !$ris_broken_container->{$seqno} );
+
+ # Patch to fix issue b1305: the combination of -naws and ci>i appears
+ # to cause an instability. It should almost never occur in practice.
+ next
+ if (!$rOpts_add_whitespace
+ && $rOpts_continuation_indentation > $rOpts_indent_columns );
+
+ # Always ok to change ci for permanently broken containers
+ if ( $ris_permanently_broken->{$seqno} ) { }
+
+ # Always OK if this list contains a broken sub-container with
+ # a non-terminal line-ending comma
+ elsif ($has_list_with_lec) { }
+
+ # Otherwise, we are considering a single container...
+ else {
+
+ # A single container must have at least 1 line-ending comma:
+ next unless ( $rlec_count_by_seqno->{$seqno} );
+
+ my $OK;
+
+ # Since it has a line-ending comma, it will stay broken if the
+ # -boc flag is set
+ if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
+
+ # OK if the container contains multiple fat commas
+ # Better: multiple lines with fat commas
+ if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ next unless ($rtype_count);
+ my $fat_comma_count = $rtype_count->{'=>'};
+ DEBUG_BBX
+ && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
+ if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
+ }
+
+ # The last check we can make is to see if this container could
+ # fit on a single line. Use the least possible indentation
+ # estimate, ci=0, so we are not subtracting $ci *
+ # $rOpts_continuation_indentation from tabulated
+ # $maximum_text_length value.
+ if ( !$OK ) {
+ my $maximum_text_length = $maximum_text_length_at_level[$level];
+ my $K_closing = $K_closing_container->{$seqno};
+ my $length = $self->cumulative_length_before_K($K_closing) -
+ $self->cumulative_length_before_K($KK);
+ my $excess_length = $length - $maximum_text_length;
+ DEBUG_BBX
+ && print STDOUT
+"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
+
+ # OK if the net container definitely breaks on length
+ if ( $excess_length > $length_tol ) {
+ $OK = 1;
+ DEBUG_BBX
+ && print STDOUT "BBX: excess_length=$excess_length\n";
+ }
+
+ # Otherwise skip it
+ else { next }
+ }
+ }
+
+ #------------------------------------------------------------
+ # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
+ #------------------------------------------------------------
+
+ DEBUG_BBX && print STDOUT "BBX: OK to break\n";
+
+ # -bbhbi=n
+ # -bbsbi=n
+ # -bbpi=n
+
+ # where:
+
+ # n=0 default indentation (usually one ci)
+ # n=1 outdent one ci
+ # n=2 indent one level (minus one ci)
+ # n=3 indent one extra ci [This may be dropped]
+
+ # NOTE: We are adjusting indentation of the opening container. The
+ # closing container will normally follow the indentation of the opening
+ # container automatically, so this is not currently done.
+ next unless ($ci);
+
+ # option 1: outdent
+ if ( $ci_flag == 1 ) {
+ $ci -= 1;
+ }
+
+ # option 2: indent one level
+ elsif ( $ci_flag == 2 ) {
+ $ci -= 1;
+ $radjusted_levels->[$KK] += 1;
+ }
+
+ # unknown option
+ else {
+ # Shouldn't happen - leave ci unchanged
+ }
+
+ $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
+ }
+
+ $self->[_rbreak_before_container_by_seqno_] =
+ $rbreak_before_container_by_seqno;
+ $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
+ return;
+} ## end sub break_before_list_opening_containers
+
+use constant DEBUG_XCI => 0;
+
+sub extended_ci {
+
+ # This routine implements the -xci (--extended-continuation-indentation)
+ # flag. We add CI to interior tokens of a container which itself has CI but
+ # only if a token does not already have CI.
+
+ # To do this, we will locate opening tokens which themselves have
+ # continuation indentation (CI). We track them with their sequence
+ # numbers. These sequence numbers are called 'controlling sequence
+ # numbers'. They apply continuation indentation to the tokens that they
+ # contain. These inner tokens remember their controlling sequence numbers.
+ # Later, when these inner tokens are output, they have to see if the output
+ # lines with their controlling tokens were output with CI or not. If not,
+ # then they must remove their CI too.
+
+ # The controlling CI concept works hierarchically. But CI itself is not
+ # hierarchical; it is either on or off. There are some rare instances where
+ # it would be best to have hierarchical CI too, but not enough to be worth
+ # the programming effort.
+
+ # The operations to remove unwanted CI are done in sub 'undo_ci'.
+
+ my ($self) = @_;
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+ my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my %available_space;
+
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my @seqno_stack;
+ my $seqno_top;
+ my $KLAST;
+ my $KNEXT = $self->[_K_first_seq_item_];
+
+ # The following variable can be used to allow a little extra space to
+ # avoid blinkers. A value $len_tol = 20 fixed the following
+ # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
+ # It turned out that the real problem was mis-parsing a list brace as
+ # a code block in a 'use' statement when the line length was extremely
+ # small. A value of 0 works now, but a slightly larger value can
+ # be used to minimize the chance of a blinker.
+ my $len_tol = 0;
+
+ while ( defined($KNEXT) ) {
+
+ # Fix all tokens up to the next sequence item if we are changing CI
+ if ($seqno_top) {
+
+ my $is_list = $ris_list_by_seqno->{$seqno_top};
+ my $space = $available_space{$seqno_top};
+ my $count = 0;
+ foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
+
+ next if ( $rLL->[$Kt]->[_CI_LEVEL_] );
+
+ # But do not include tokens which might exceed the line length
+ # and are not in a list.
+ # ... This fixes case b1031
+ if ( $is_list
+ || $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space
+ || $rLL->[$Kt]->[_TYPE_] eq '#' )
+ {
+ $rLL->[$Kt]->[_CI_LEVEL_] = 1;
+ $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
+ $count++;
+ }
+ }
+ $ris_seqno_controlling_ci->{$seqno_top} += $count;
+ }
- my $Kouter_opening = $K_opening_container->{$outer_seqno};
- my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
- my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
- my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
- my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
- my $is_old_weld =
- ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
+ $KLAST = $KNEXT;
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- # Fix for case b1189. If quote is marked as type 'Q' then only weld
- # if the two closing tokens are on the same input line. Otherwise,
- # the closing line will be output earlier in the pipeline than
- # other CODE lines and welding will not actually occur. This will
- # leave a half-welded structure with potential formatting
- # instability. This might be fixed by adding a check for a weld on
- # a closing Q token and sending it down the normal channel, but it
- # would complicate the code and is potentially risky.
- next
- if (!$is_old_weld
- && $next_type eq 'Q'
- && $iline_ic != $iline_oc );
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+
+ # see if we have reached the end of the current controlling container
+ if ( $seqno_top && $seqno == $seqno_top ) {
+ $seqno_top = pop @seqno_stack;
+ }
+
+ # Patch to fix some block types...
+ # Certain block types arrive from the tokenizer without CI but should
+ # have it for this option. These include anonymous subs and
+ # do sort map grep eval
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ( $block_type && $is_block_with_ci{$block_type} ) {
+ $rLL->[$KK]->[_CI_LEVEL_] = 1;
+ if ($seqno_top) {
+ $rseqno_controlling_my_ci->{$KK} = $seqno_top;
+ $ris_seqno_controlling_ci->{$seqno_top}++;
+ }
+ }
+
+ # If this does not have ci, update ci if necessary and continue looking
+ elsif ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
+ if ($seqno_top) {
+ $rLL->[$KK]->[_CI_LEVEL_] = 1;
+ $rseqno_controlling_my_ci->{$KK} = $seqno_top;
+ $ris_seqno_controlling_ci->{$seqno_top}++;
+ }
+ next;
+ }
+
+ # We are looking for opening container tokens with ci
+ my $K_opening = $K_opening_container->{$seqno};
+ next unless ( defined($K_opening) && $KK == $K_opening );
+
+ # Make sure there is a corresponding closing container
+ # (could be missing if the script has a brace error)
+ my $K_closing = $K_closing_container->{$seqno};
+ next unless defined($K_closing);
+
+ # Skip if requested by -bbx to avoid blinkers
+ next if ( $rno_xci_by_seqno->{$seqno} );
+
+ # Skip if this is a -bli container (this fixes case b1065) Note: case
+ # b1065 is also fixed by the update for b1055, so this update is not
+ # essential now. But there does not seem to be a good reason to add
+ # xci and bli together, so the update is retained.
+ next if ( $ris_bli_container->{$seqno} );
+
+ # Require different input lines. This will filter out a large number
+ # of small hash braces and array brackets. If we accidentally filter
+ # out an important container, it will get fixed on the next pass.
+ if (
+ $rLL->[$K_opening]->[_LINE_INDEX_] ==
+ $rLL->[$K_closing]->[_LINE_INDEX_]
+ && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
+ $rOpts_maximum_line_length )
+ )
+ {
+ DEBUG_XCI
+ && print "XCI: Skipping seqno=$seqno, require different lines\n";
+ next;
+ }
+
+ # Do not apply -xci if adding extra ci will put the container contents
+ # beyond the line length limit (fixes cases b899 b935)
+ my $level = $rLL->[$K_opening]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
+ my $maximum_text_length =
+ $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
+
+ # Fix for b1197 b1198 b1199 b1200 b1201 b1202
+ # Do not apply -xci if we are running out of space
+ # TODO: review this; do we also need to look at stress_level_alpha?
+ if ( $level >= $stress_level_beta ) {
+ DEBUG_XCI
+ && print
+"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
+ next;
+ }
+
+ # remember how much space is available for patch b1031 above
+ my $space =
+ $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
+
+ if ( $space < 0 ) {
+ DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
+ next;
+ }
+ DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
+
+ $available_space{$seqno} = $space;
+
+ # This becomes the next controlling container
+ push @seqno_stack, $seqno_top if ($seqno_top);
+ $seqno_top = $seqno;
+ }
+ return;
+} ## end sub extended_ci
+
+sub braces_left_setup {
+
+ # Called once per file to mark all -bl, -sbl, and -asbl containers
+ my $self = shift;
+
+ my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
+ my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
+ my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+ return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ # We will turn on this hash for braces controlled by these flags:
+ my $rbrace_left = $self->[_rbrace_left_];
+
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $ris_sub_block = $self->[_ris_sub_block_];
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+
+ # use -asbl flag for an anonymous sub block
+ if ( $ris_asub_block->{$seqno} ) {
+ if ($rOpts_asbl) {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+
+ # use -sbl flag for a named sub
+ elsif ( $ris_sub_block->{$seqno} ) {
+ if ($rOpts_sbl) {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+
+ # use -bl flag if not a sub block of any type
+ else {
+ if ( $rOpts_bl
+ && $block_type =~ /$bl_pattern/
+ && $block_type !~ /$bl_exclusion_pattern/ )
+ {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+ }
+ return;
+} ## end sub braces_left_setup
+
+sub bli_adjustment {
+
+ # Called once per file to implement the --brace-left-and-indent option.
+ # If -bli is set, adds one continuation indentation for certain braces
+ my $self = shift;
+ return unless ( $rOpts->{'brace-left-and-indent'} );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rbrace_left = $self->[_rbrace_left_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ( $block_type
+ && $block_type =~ /$bli_pattern/
+ && $block_type !~ /$bli_exclusion_pattern/ )
+ {
+ $ris_bli_container->{$seqno} = 1;
+ $rbrace_left->{$seqno} = 1;
+ my $Ko = $K_opening_container->{$seqno};
+ my $Kc = $K_closing_container->{$seqno};
+ if ( defined($Ko) && defined($Kc) ) {
+ $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
+ }
+ }
+ }
+ return;
+} ## end sub bli_adjustment
+
+sub find_multiline_qw {
+
+ my ( $self, $rqw_lines ) = @_;
+
+ # Multiline qw quotes are not sequenced items like containers { [ (
+ # but behave in some respects in a similar way. So this routine finds them
+ # and creates a separate sequence number system for later use.
+
+ # This is straightforward because they always begin at the end of one line
+ # and end at the beginning of a later line. This is true no matter how we
+ # finally make our line breaks, so we can find them before deciding on new
+ # line breaks.
+
+ # Input parameter:
+ # if $rqw_lines is defined it is a ref to array of all line index numbers
+ # for which there is a type 'q' qw quote at either end of the line. This
+ # was defined by sub resync_lines_and_tokens for efficiency.
+ #
+
+ my $rlines = $self->[_rlines_];
+
+ # if $rqw_lines is not defined (this will occur with -io option) then we
+ # will have to scan all lines.
+ if ( !defined($rqw_lines) ) {
+ $rqw_lines = [ 0 .. @{$rlines} - 1 ];
+ }
+
+ # if $rqw_lines is defined but empty, just return because there are no
+ # multiline qw's
+ else {
+ if ( !@{$rqw_lines} ) { return }
+ }
+
+ my $rstarting_multiline_qw_seqno_by_K = {};
+ my $rending_multiline_qw_seqno_by_K = {};
+ my $rKrange_multiline_qw_by_seqno = {};
+ my $rmultiline_qw_has_extra_level = {};
+
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+
+ my $rLL = $self->[_rLL_];
+ my $qw_seqno;
+ my $num_qw_seqno = 0;
+ my $K_start_multiline_qw;
+
+ # For reference, here is the old loop, before $rqw_lines became available:
+ ## foreach my $line_of_tokens ( @{$rlines} ) {
+ foreach my $iline ( @{$rqw_lines} ) {
+ my $line_of_tokens = $rlines->[$iline];
+
+ # Note that these first checks are required in case we have to scan
+ # all lines, not just lines with type 'q' at the ends.
+ my $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
+
+ # Continuing a sequence of qw lines ...
+ if ( defined($K_start_multiline_qw) ) {
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
- # If welded, the line must not exceed allowed line length
- ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
- = $self->setup_new_weld_measurements( $Kouter_opening,
- $Kinner_opening );
- if ( !$ok_to_weld ) {
- if (DEBUG_WELD) { print $msg}
+ # shouldn't happen
+ if ( $type ne 'q' ) {
+ DEVEL_MODE && print STDERR <<EOM;
+STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
+EOM
+ $K_start_multiline_qw = undef;
next;
}
-
- my $length =
- $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
- my $excess = $length + $multiline_tol - $maximum_text_length;
-
- my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
- if ( $excess >= $excess_max ) {
- $do_not_weld = 1;
+ my $Kprev = $self->K_previous_nonblank($Kfirst);
+ my $Knext = $self->K_next_nonblank($Kfirst);
+ my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
+ my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
+ if ( $type_m eq 'q' && $type_p ne 'q' ) {
+ $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
+ $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
+ [ $K_start_multiline_qw, $Kfirst ];
+ $K_start_multiline_qw = undef;
+ $qw_seqno = undef;
}
+ }
- if (DEBUG_WELD) {
- if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
- $Msg .=
-"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
+ # Starting a new a sequence of qw lines ?
+ if ( !defined($K_start_multiline_qw)
+ && $rLL->[$Klast]->[_TYPE_] eq 'q' )
+ {
+ my $Kprev = $self->K_previous_nonblank($Klast);
+ my $Knext = $self->K_next_nonblank($Klast);
+ my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
+ my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
+ if ( $type_m ne 'q' && $type_p eq 'q' ) {
+ $num_qw_seqno++;
+ $qw_seqno = 'q' . $num_qw_seqno;
+ $K_start_multiline_qw = $Klast;
+ $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
}
+ }
+ }
- # Check weld exclusion rules for outer container
- if ( !$do_not_weld ) {
- my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
- if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
- if (DEBUG_WELD) {
- $Msg .=
-"No qw weld due to weld exclusion rules for outer container\n";
- }
- $do_not_weld = 1;
- }
- }
+ # Give multiline qw lists extra indentation instead of CI. This option
+ # works well but is currently only activated when the -xci flag is set.
+ # The reason is to avoid unexpected changes in formatting.
+ if ($rOpts_extended_continuation_indentation) {
+ while ( my ( $qw_seqno_x, $rKrange ) =
+ each %{$rKrange_multiline_qw_by_seqno} )
+ {
+ my ( $Kbeg, $Kend ) = @{$rKrange};
- # Check the length of the last line (fixes case b1039)
- if ( !$do_not_weld ) {
- my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
- my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
- my $excess_ic =
- $self->excess_line_length_for_Krange( $Kfirst_ic,
- $Kouter_closing );
+ # require isolated closing token
+ my $token_end = $rLL->[$Kend]->[_TOKEN_];
+ next
+ unless ( length($token_end) == 1
+ && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
- # Allow extra space for additional welded closing container(s)
- # and a space and comma or semicolon.
- # NOTE: weld len has not been computed yet. Use 2 spaces
- # for now, correct for a single weld. This estimate could
- # be made more accurate if necessary.
- my $weld_len =
- defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
- if ( $excess_ic + $weld_len + 2 > 0 ) {
- if (DEBUG_WELD) {
- $Msg .=
-"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
- }
- $do_not_weld = 1;
- }
- }
+ # require isolated opening token
+ my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
- if ($do_not_weld) {
- if (DEBUG_WELD) {
- $Msg .= "Not Welding QW\n";
- print $Msg;
- }
- next;
+ # allow space(s) after the qw
+ if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
+ {
+ $token_beg =~ s/\s+//;
}
- # OK to weld
- if (DEBUG_WELD) {
- $Msg .= "Welding QW\n";
- print $Msg;
+ next unless ( length($token_beg) == 3 );
+
+ foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
+ $rLL->[$KK]->[_LEVEL_]++;
+ $rLL->[$KK]->[_CI_LEVEL_] = 0;
}
- $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
- $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+ # set flag for -wn option, which will remove the level
+ $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
+ }
+ }
- $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
- $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+ # For the -lp option we need to mark all parent containers of
+ # multiline quotes
+ if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
- # Undo one indentation level if an extra level was added to this
- # multiline quote
- my $qw_seqno =
- $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
- if ( $qw_seqno
- && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
- {
- foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
- $rLL->[$K]->[_LEVEL_] -= 1;
- }
- $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
- $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
- }
+ while ( my ( $qw_seqno_x, $rKrange ) =
+ each %{$rKrange_multiline_qw_by_seqno} )
+ {
+ my ( $Kbeg, $Kend ) = @{$rKrange};
+ my $parent_seqno = $self->parent_seqno_by_K($Kend);
+ next unless ($parent_seqno);
- # undo CI for other welded quotes
- else {
+ # If the parent container exactly surrounds this qw, then -lp
+ # formatting seems to work so we will not mark it.
+ my $is_tightly_contained;
+ my $Kn = $self->K_next_nonblank($Kend);
+ my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
+ if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
- foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
- $rLL->[$K]->[_CI_LEVEL_] = 0;
+ my $Kp = $self->K_previous_nonblank($Kbeg);
+ my $seqno_p =
+ defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
+ if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
+ $is_tightly_contained = 1;
}
}
- # Change the level of a closing qw token to be that of the outer
- # containing token. This will allow -lp indentation to function
- # correctly in the vertical aligner.
- # Patch to fix c002: but not if it contains text
- if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
- $rLL->[$Kinner_closing]->[_LEVEL_] =
- $rLL->[$Kouter_closing]->[_LEVEL_];
+ $ris_excluded_lp_container->{$parent_seqno} = 1
+ unless ($is_tightly_contained);
+
+ # continue up the tree marking parent containers
+ while (1) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
+ last
+ unless ( defined($parent_seqno)
+ && $parent_seqno ne SEQ_ROOT );
+ $ris_excluded_lp_container->{$parent_seqno} = 1;
}
}
}
+
+ $self->[_rstarting_multiline_qw_seqno_by_K_] =
+ $rstarting_multiline_qw_seqno_by_K;
+ $self->[_rending_multiline_qw_seqno_by_K_] =
+ $rending_multiline_qw_seqno_by_K;
+ $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
+ $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
+
return;
-} ## end sub weld_nested_quotes
+} ## end sub find_multiline_qw
-sub is_welded_at_seqno {
+use constant DEBUG_COLLAPSED_LENGTHS => 0;
- my ( $self, $seqno ) = @_;
+# Minimum space reserved for contents of a code block. A value of 40 has given
+# reasonable results. With a large line length, say -l=120, this will not
+# normally be noticeable but it will prevent making a mess in some edge cases.
+use constant MIN_BLOCK_LEN => 40;
- # given a sequence number:
- # return true if it is welded either left or right
- # return false otherwise
- return unless ( $total_weld_count && defined($seqno) );
- my $KK_o = $self->[_K_opening_container_]->{$seqno};
- return unless defined($KK_o);
- return defined( $self->[_rK_weld_left_]->{$KK_o} )
- || defined( $self->[_rK_weld_right_]->{$KK_o} );
-} ## end sub is_welded_at_seqno
+my %is_handle_type;
-sub mark_short_nested_blocks {
+BEGIN {
+ my @q = qw( w C U G i k => );
+ @is_handle_type{@q} = (1) x scalar(@q);
- # This routine looks at the entire file and marks any short nested blocks
- # which should not be broken. The results are stored in the hash
- # $rshort_nested->{$type_sequence}
- # which will be true if the container should remain intact.
- #
- # For example, consider the following line:
+ my $i = 0;
+ use constant {
+ _max_prong_len_ => $i++,
+ _handle_len_ => $i++,
+ _seqno_o_ => $i++,
+ _iline_o_ => $i++,
+ _K_o_ => $i++,
+ _K_c_ => $i++,
+ _interrupted_list_rule_ => $i++,
+ };
+} ## end BEGIN
- # sub cxt_two { sort { $a <=> $b } test_if_list() }
+sub is_fragile_block_type {
+ my ( $self, $block_type, $seqno ) = @_;
- # The 'sort' block is short and nested within an outer sub block.
- # Normally, the existence of the 'sort' block will force the sub block to
- # break open, but this is not always desirable. Here we will set a flag for
- # the sort block to prevent this. To give the user control, we will
- # follow the input file formatting. If either of the blocks is broken in
- # the input file then we will allow it to remain broken. Otherwise we will
- # set a flag to keep it together in later formatting steps.
+ # Given:
+ # $block_type = the block type of a token, and
+ # $seqno = its sequence number
- # The flag which is set here will be checked in two places:
- # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
+ # Return:
+ # true if this block type stays broken after being broken,
+ # false otherwise
- my $self = shift;
- return if $rOpts->{'indent-only'};
+ # This sub has been added to isolate a tricky decision needed
+ # to fix issue b1428.
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # The coding here needs to agree with:
+ # - sub process_line where variable '$rbrace_follower' is set
+ # - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set,
- return unless ( $rOpts->{'one-line-block-nesting'} );
+ if ( $is_sort_map_grep_eval{$block_type}
+ || $block_type eq 't'
+ || $self->[_rshort_nested_]->{$seqno} )
+ {
+ return 0;
+ }
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rlines = $self->[_rlines_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ return 1;
- # Variables needed for estimating line lengths
- my $maximum_text_length;
- my $starting_lentot;
- my $length_tol = 1;
+} ## end sub is_fragile_block_type
- my $excess_length_to_K = sub {
- my ($K) = @_;
+{ ## closure xlp_collapsed_lengths
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length = $length + $length_tol - $maximum_text_length;
- return ($excess_length);
- };
+ my $max_prong_len;
+ my $len;
+ my $last_nonblank_type;
+ my @stack;
- my $is_broken_block = sub {
+ sub xlp_collapsed_lengths_initialize {
- # a block is broken if the input line numbers of the braces differ
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ $max_prong_len = 0;
+ $len = 0;
+ $last_nonblank_type = 'b';
+ @stack = ();
- # loop over all containers
- my @open_block_stack;
- my $iline = -1;
- my $KNEXT = $self->[_K_first_seq_item_];
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
+ push @stack, [
+ 0, # $max_prong_len,
+ 0, # $handle_len,
+ SEQ_ROOT, # $seqno,
+ undef, # $iline,
+ undef, # $KK,
+ undef, # $K_c,
+ undef, # $interrupted_list_rule
+ ];
- # A fault here implies that an error was made in the little loop at
- # the bottom of sub 'respace_tokens' which set the values of
- # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
- # loop control lines above.
- Fault("sequence = $type_sequence not defined at K=$KK")
- if (DEVEL_MODE);
- next;
- }
+ return;
+ } ## end sub xlp_collapsed_lengths_initialize
- # Patch: do not mark short blocks with welds.
- # In some cases blinkers can form (case b690).
- if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
- next;
- }
+ sub cumulative_length_to_comma {
+ my ( $self, $KK, $K_comma, $K_closing ) = @_;
- # We are just looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
- next unless ( $rblock_type_of_seqno->{$type_sequence} );
+ # Given:
+ # $KK = index of starting token, or blank before start
+ # $K_comma = index of line-ending comma
+ # $K_closing = index of the container closing token
- # Keep a stack of all acceptable block braces seen.
- # Only consider blocks entirely on one line so dump the stack when line
- # changes.
- my $iline_last = $iline;
- $iline = $rLL->[$KK]->[_LINE_INDEX_];
- if ( $iline != $iline_last ) { @open_block_stack = () }
+ # Return:
+ # $length = cumulative length of the term
- if ( $token eq '}' ) {
- if (@open_block_stack) { pop @open_block_stack }
- }
- next unless ( $token eq '{' );
+ my $rLL = $self->[_rLL_];
+ if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ }
+ my $length = 0;
+ if (
+ $KK < $K_comma
+ && $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true
- # block must be balanced (bad scripts may be unbalanced)
- my $K_opening = $K_opening_container->{$type_sequence};
- my $K_closing = $K_closing_container->{$type_sequence};
- next unless ( defined($K_opening) && defined($K_closing) );
+ # Ignore if terminal comma, causes instability (b1297,
+ # b1330)
+ && (
+ $K_closing - $K_comma > 2
+ || ( $K_closing - $K_comma == 2
+ && $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' )
+ )
- # require that this block be entirely on one line
- next if ( $is_broken_block->($type_sequence) );
+ # The comma should be in this container
+ && ( $rLL->[$K_comma]->[_LEVEL_] - 1 ==
+ $rLL->[$K_closing]->[_LEVEL_] )
+ )
+ {
- # See if this block fits on one line of allowed length (which may
- # be different from the input script)
- $starting_lentot =
- $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- my $level = $rLL->[$KK]->[_LEVEL_];
- my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
- $maximum_text_length =
- $maximum_text_length_at_level[$level] -
- $ci_level * $rOpts_continuation_indentation;
+ # An additional check: if line ends in ), and the ) has vtc then
+ # skip this estimate. Otherwise, vtc can give oscillating results.
+ # Fixes b1448. For example, this could be unstable:
- # Dump the stack if block is too long and skip this block
- if ( $excess_length_to_K->($K_closing) > 0 ) {
- @open_block_stack = ();
- next;
+ # ( $os ne 'win' ? ( -selectcolor => "red" ) : () ),
+ # | |^--K_comma
+ # | ^-- K_prev
+ # ^--- KK
+
+ # An alternative, possibly better strategy would be to try to turn
+ # off -vtc locally, but it turns out to be difficult to locate the
+ # appropriate closing token when it is not on the same line as its
+ # opening token.
+
+ my $K_prev = $self->K_previous_nonblank($K_comma);
+ if ( defined($K_prev)
+ && $K_prev >= $KK
+ && $rLL->[$K_prev]->[_TYPE_SEQUENCE_] )
+ {
+ my $token = $rLL->[$K_prev]->[_TOKEN_];
+ my $type = $rLL->[$K_prev]->[_TYPE_];
+ if ( $closing_vertical_tightness{$token} && $type ne 'R' ) {
+ ## type 'R' does not normally get broken, so ignore
+ ## skip length calculation
+ return 0;
+ }
+ }
+ my $starting_len =
+ $KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
+ $length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len;
}
+ return $length;
+ } ## end sub cumulative_length_to_comma
- # OK, Block passes tests, remember it
- push @open_block_stack, $type_sequence;
+ sub xlp_collapsed_lengths {
- # We are only marking nested code blocks,
- # so check for a previous block on the stack
- next unless ( @open_block_stack > 1 );
+ my $self = shift;
- # Looks OK, mark this as a short nested block
- $rshort_nested->{$type_sequence} = 1;
+ #----------------------------------------------------------------
+ # Define the collapsed lengths of containers for -xlp indentation
+ #----------------------------------------------------------------
- }
- return;
-} ## end sub mark_short_nested_blocks
+ # We need an estimate of the minimum required line length starting at
+ # any opening container for the -xlp style. This is needed to avoid
+ # using too much indentation space for lower level containers and
+ # thereby running out of space for outer container tokens due to the
+ # maximum line length limit.
-sub adjust_indentation_levels {
+ # The basic idea is that at each node in the tree we imagine that we
+ # have a fork with a handle and collapsible prongs:
+ #
+ # |------------
+ # |--------
+ # ------------|-------
+ # handle |------------
+ # |--------
+ # prongs
+ #
+ # Each prong has a minimum collapsed length. The collapsed length at a
+ # node is the maximum of these minimum lengths, plus the handle length.
+ # Each of the prongs may itself be a tree node.
- my ($self) = @_;
+ # This is just a rough calculation to get an approximate starting point
+ # for indentation. Later routines will be more precise. It is
+ # important that these estimates be independent of the line breaks of
+ # the input stream in order to avoid instabilities.
- # Called once per file to do special indentation adjustments.
- # These routines adjust levels either by changing _CI_LEVEL_ directly or
- # by setting modified levels in the array $self->[_radjusted_levels_].
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
- # Initialize the adjusted levels. These will be the levels actually used
- # for computing indentation.
+ my $K_start_multiline_qw;
+ my $level_start_multiline_qw = 0;
- # NOTE: This routine is called after the weld routines, which may have
- # already adjusted _LEVEL_, so we are making adjustments on top of those
- # levels. It would be much nicer to have the weld routines also use this
- # adjustment, but that gets complicated when we combine -gnu -wn and have
- # some welded quotes.
- my $Klimit = $self->[_Klimit_];
- my $rLL = $self->[_rLL_];
- my $radjusted_levels = $self->[_radjusted_levels_];
+ xlp_collapsed_lengths_initialize();
- return unless ( defined($Klimit) );
+ #--------------------------------
+ # Loop over all lines in the file
+ #--------------------------------
+ my $iline = -1;
+ my $skip_next_line;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ if ($skip_next_line) {
+ $skip_next_line = 0;
+ next;
+ }
+ my $line_type = $line_of_tokens->{_line_type};
+ next if ( $line_type ne 'CODE' );
+ my $CODE_type = $line_of_tokens->{_code_type};
- foreach my $KK ( 0 .. $Klimit ) {
- $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
- }
+ # Always skip blank lines
+ next if ( $CODE_type eq 'BL' );
+
+ # Note on other line types:
+ # 'FS' (Format Skipping) lines may contain opening/closing tokens so
+ # we have to process them to keep the stack correctly sequenced
+ # 'VB' (Verbatim) lines could be skipped, but testing shows that
+ # results look better if we include their lengths.
+
+ # Also note that we could exclude -xlp formatting of containers with
+ # 'FS' and 'VB' lines, but in testing that was not really beneficial
+
+ # So we process tokens in 'FS' and 'VB' lines like all the rest...
+
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $K_first, $K_last ) = @{$rK_range};
+ next unless ( defined($K_first) && defined($K_last) );
+
+ my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
+
+ # Always ignore block comments
+ next if ( $has_comment && $K_first == $K_last );
+
+ # Handle an intermediate line of a multiline qw quote. These may
+ # require including some -ci or -i spaces. See cases c098/x063.
+ # Updated to check all lines (not just $K_first==$K_last) to fix
+ # b1316
+ my $K_begin_loop = $K_first;
+ if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
+
+ my $KK = $K_first;
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
+
+ # remember the level of the start
+ if ( !defined($K_start_multiline_qw) ) {
+ $K_start_multiline_qw = $K_first;
+ $level_start_multiline_qw = $level;
+ my $seqno_qw =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]
+ ->{$K_start_multiline_qw};
+ if ( !$seqno_qw ) {
+ my $Kp = $self->K_previous_nonblank($K_first);
+ if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
+
+ $K_start_multiline_qw = $Kp;
+ $level_start_multiline_qw =
+ $rLL->[$K_start_multiline_qw]->[_LEVEL_];
+ }
+ else {
- # First set adjusted levels for any non-indenting braces.
- $self->do_non_indenting_braces();
+ # Fix for b1319, b1320
+ $K_start_multiline_qw = undef;
+ }
+ }
+ }
- # Adjust breaks and indentation list containers
- $self->break_before_list_opening_containers();
+ if ( defined($K_start_multiline_qw) ) {
+ $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- # Set adjusted levels for the whitespace cycle option.
- $self->whitespace_cycle_adjustment();
+ # We may have to add the spaces of one level or ci level
+ # ... it depends depends on the -xci flag, the -wn flag,
+ # and if the qw uses a container token as the quote
+ # delimiter.
- $self->braces_left_setup();
+ # First rule: add ci if there is a $ci_level
+ if ($ci_level) {
+ $len += $rOpts_continuation_indentation;
+ }
- # Adjust continuation indentation if -bli is set
- $self->bli_adjustment();
+ # Second rule: otherwise, look for an extra indentation
+ # level from the start and add one indentation level if
+ # found.
+ elsif ( $level > $level_start_multiline_qw ) {
+ $len += $rOpts_indent_columns;
+ }
- $self->extended_ci()
- if ($rOpts_extended_continuation_indentation);
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
- # Now clip any adjusted levels to be non-negative
- $self->clip_adjusted_levels();
+ $last_nonblank_type = 'q';
- return;
-} ## end sub adjust_indentation_levels
+ $K_begin_loop = $K_first + 1;
-sub clip_adjusted_levels {
+ # We can skip to the next line if more tokens
+ next if ( $K_begin_loop > $K_last );
+ }
+ }
- # Replace any negative adjusted levels with zero.
- # Negative levels can occur in files with brace errors.
- my ($self) = @_;
- my $radjusted_levels = $self->[_radjusted_levels_];
- return unless defined($radjusted_levels) && @{$radjusted_levels};
- foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
- return;
-} ## end sub clip_adjusted_levels
+ $K_start_multiline_qw = undef;
-sub do_non_indenting_braces {
+ # Find the terminal token, before any side comment
+ my $K_terminal = $K_last;
+ if ($has_comment) {
+ $K_terminal -= 1;
+ $K_terminal -= 1
+ if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
+ && $K_terminal > $K_first );
+ }
- # Called once per file to handle the --non-indenting-braces parameter.
- # Remove indentation within marked braces if requested
- my ($self) = @_;
+ # Use length to terminal comma if interrupted list rule applies
+ if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
+ my $K_c = $stack[-1]->[_K_c_];
+ if ( defined($K_c) ) {
- # Any non-indenting braces have been found by sub find_non_indenting_braces
- # and are defined by the following hash:
- my $rseqno_non_indenting_brace_by_ix =
- $self->[_rseqno_non_indenting_brace_by_ix_];
- return unless ( %{$rseqno_non_indenting_brace_by_ix} );
+ #----------------------------------------------------------
+ # BEGIN patch for issue b1408: If this line ends in an
+ # opening token, look for the closing token and comma at
+ # the end of the next line. If so, combine the two lines to
+ # get the correct sums. This problem seems to require -xlp
+ # -vtc=2 and blank lines to occur. Use %is_opening_type to
+ # fix b1431.
+ #----------------------------------------------------------
+ if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] }
+ && !$has_comment )
+ {
+ my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
+ my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_];
- my $rLL = $self->[_rLL_];
- my $rlines = $self->[_rlines_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
- my $radjusted_levels = $self->[_radjusted_levels_];
+ # We are looking for a short broken remnant on the next
+ # line; something like the third line here (b1408):
- # First locate all of the marked blocks
- my @K_stack;
- foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
- my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
- my $KK = $K_opening_container->{$seqno};
- my $line_of_tokens = $rlines->[$ix];
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- $rspecial_side_comment_type->{$Klast} = 'NIB';
- push @K_stack, [ $KK, 1 ];
- my $Kc = $K_closing_container->{$seqno};
- push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
- }
- return unless (@K_stack);
- @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
+ # parent =>
+ # Moose::Util::TypeConstraints::find_type_constraint(
+ # 'RefXX' ),
+ # or this
+ #
+ # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
+ # $story_set_all_chores),
+ # or this (b1431):
+ # $issue->{
+ # 'borrowernumber'}, # borrowernumber
+ if ( defined($Kc_test)
+ && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
+ && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
+ {
+ my $line_of_tokens_next = $rlines->[ $iline + 1 ];
+ my $rtype_count =
+ $rtype_count_by_seqno->{$seqno_end};
+ my ( $K_first_next, $K_terminal_next ) =
+ @{ $line_of_tokens_next->{_rK_range} };
+
+ # backup at a side comment
+ if ( defined($K_terminal_next)
+ && $rLL->[$K_terminal_next]->[_TYPE_] eq '#' )
+ {
+ my $Kprev =
+ $self->K_previous_nonblank($K_terminal_next);
+ if ( defined($Kprev)
+ && $Kprev >= $K_first_next )
+ {
+ $K_terminal_next = $Kprev;
+ }
+ }
- # Then loop to remove indentation within marked blocks
- my $KK_last = 0;
- my $ndeep = 0;
- foreach my $item (@K_stack) {
- my ( $KK, $inc ) = @{$item};
- if ( $ndeep > 0 ) {
+ if (
+ defined($K_terminal_next)
- foreach ( $KK_last + 1 .. $KK ) {
- $radjusted_levels->[$_] -= $ndeep;
- }
+ # next line ends with a comma
+ && $rLL->[$K_terminal_next]->[_TYPE_] eq ','
- # We just subtracted the old $ndeep value, which only applies to a
- # '{'. The new $ndeep applies to a '}', so we undo the error.
- if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
- }
+ # which follows the closing container token
+ && (
+ $K_terminal_next - $Kc_test == 1
+ || ( $K_terminal_next - $Kc_test == 2
+ && $rLL->[ $K_terminal_next - 1 ]
+ ->[_TYPE_] eq 'b' )
+ )
- $ndeep += $inc;
- $KK_last = $KK;
- }
- return;
-} ## end sub do_non_indenting_braces
+ # no commas in the container
+ && ( !defined($rtype_count)
+ || !$rtype_count->{','} )
-sub whitespace_cycle_adjustment {
+ # for now, restrict this to a container with
+ # just 1 or two tokens
+ && $K_terminal_next - $K_terminal <= 5
- my $self = shift;
+ )
+ {
- # Called once per file to implement the --whitespace-cycle option
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $maximum_level = $self->[_maximum_level_];
+ # combine the next line with the current line
+ $K_terminal = $K_terminal_next;
+ $skip_next_line = 1;
+ if (DEBUG_COLLAPSED_LENGTHS) {
+ print "Combining lines at line $iline\n";
+ }
+ }
+ }
+ }
- if ( $rOpts_whitespace_cycle
- && $rOpts_whitespace_cycle > 0
- && $rOpts_whitespace_cycle < $maximum_level )
- {
+ #--------------------------
+ # END patch for issue b1408
+ #--------------------------
+ if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) {
- my $Kmax = @{$rLL} - 1;
+ my $length =
+ $self->cumulative_length_to_comma( $K_first,
+ $K_terminal, $K_c );
- my $whitespace_last_level = -1;
- my @whitespace_level_stack = ();
- my $last_nonblank_type = 'b';
- my $last_nonblank_token = EMPTY_STRING;
- foreach my $KK ( 0 .. $Kmax ) {
- my $level_abs = $radjusted_levels->[$KK];
- my $level = $level_abs;
- if ( $level_abs < $whitespace_last_level ) {
- pop(@whitespace_level_stack);
- }
- if ( !@whitespace_level_stack ) {
- push @whitespace_level_stack, $level_abs;
+ # Fix for b1331: at a broken => item, include the
+ # length of the previous half of the item plus one for
+ # the missing space
+ if ( $last_nonblank_type eq '=>' ) {
+ $length += $len + 1;
+ }
+ if ( $length > $max_prong_len ) {
+ $max_prong_len = $length;
+ }
+ }
+ }
}
- elsif ( $level_abs > $whitespace_last_level ) {
- $level = $whitespace_level_stack[-1] +
- ( $level_abs - $whitespace_last_level );
-
- if (
- # 1 Try to break at a block brace
- (
- $level > $rOpts_whitespace_cycle
- && $last_nonblank_type eq '{'
- && $last_nonblank_token eq '{'
- )
- # 2 Then either a brace or bracket
- || ( $level > $rOpts_whitespace_cycle + 1
- && $last_nonblank_token =~ /^[\{\[]$/ )
+ #----------------------------------
+ # Loop over all tokens on this line
+ #----------------------------------
+ $self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop,
+ $K_terminal, $K_last );
- # 3 Then a paren too
- || $level > $rOpts_whitespace_cycle + 2
- )
- {
- $level = 1;
+ # Now take care of any side comment;
+ if ($has_comment) {
+ if ($rOpts_ignore_side_comment_lengths) {
+ $len = 0;
+ }
+ else {
+
+ # For a side comment when -iscl is not set, measure length from
+ # the start of the previous nonblank token
+ my $len0 =
+ $K_terminal > 0
+ ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
+ : 0;
+ $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
- push @whitespace_level_stack, $level;
}
- $level = $whitespace_level_stack[-1];
- $radjusted_levels->[$KK] = $level;
- $whitespace_last_level = $level_abs;
- my $type = $rLL->[$KK]->[_TYPE_];
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
+ } ## end loop over lines
+
+ if (DEBUG_COLLAPSED_LENGTHS) {
+ print "\nCollapsed lengths--\n";
+ foreach
+ my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
+ {
+ my $clen = $rcollapsed_length_by_seqno->{$key};
+ print "$key -> $clen\n";
}
}
- }
- return;
-} ## end sub whitespace_cycle_adjustment
-use constant DEBUG_BBX => 0;
+ return;
+ } ## end sub xlp_collapsed_lengths
-sub break_before_list_opening_containers {
+ sub xlp_collapse_lengths_inner_loop {
- my ($self) = @_;
+ my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_;
- # This routine is called once per batch to implement parameters
- # --break-before-hash-brace=n and similar -bbx=n flags
- # and their associated indentation flags:
- # --break-before-hash-brace-and-indent and similar -bbxi=n
+ my $rLL = $self->[_rLL_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # Nothing to do if none of the -bbx=n parameters has been set
- return unless %break_before_container_types;
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ #----------------------------------
+ # Loop over tokens on this line ...
+ #----------------------------------
+ foreach my $KK ( $K_begin_loop .. $K_terminal ) {
- # Loop over all opening container tokens
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- my $ris_permanently_broken = $self->[_ris_permanently_broken_];
- my $rhas_list = $self->[_rhas_list_];
- my $rhas_broken_list = $self->[_rhas_broken_list_];
- my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $rparent_of_seqno = $self->[_rparent_of_seqno_];
- my $rlines = $self->[_rlines_];
- my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
- my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
- my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
- my $rK_weld_right = $self->[_rK_weld_right_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $type = $rLL->[$KK]->[_TYPE_];
+ next if ( $type eq 'b' );
- my $length_tol =
- max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
- if ($rOpts_ignore_old_breakpoints) {
+ #------------------------
+ # Handle sequenced tokens
+ #------------------------
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($seqno) {
- # Patch suggested by b1231; the old tol was excessive.
- ## $length_tol += $rOpts_maximum_line_length;
- $length_tol *= 2;
- }
+ my $token = $rLL->[$KK]->[_TOKEN_];
- my $rbreak_before_container_by_seqno = {};
- my $rwant_reduced_ci = {};
- foreach my $seqno ( keys %{$K_opening_container} ) {
+ #----------------------------
+ # Entering a new container...
+ #----------------------------
+ if ( $is_opening_token{$token}
+ && defined( $K_closing_container->{$seqno} ) )
+ {
- #----------------------------------------------------------------
- # Part 1: Examine any -bbx=n flags
- #----------------------------------------------------------------
+ # save current prong length
+ $stack[-1]->[_max_prong_len_] = $max_prong_len;
+ $max_prong_len = 0;
- next if ( $rblock_type_of_seqno->{$seqno} );
- my $KK = $K_opening_container->{$seqno};
+ # Start new prong one level deeper
+ my $handle_len = 0;
+ if ( $rblock_type_of_seqno->{$seqno} ) {
- # This must be a list or contain a list.
- # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
- # Note2: 'has_list' holds the depth to the sub-list. We will require
- # a depth of just 1
- my $is_list = $self->is_list_by_seqno($seqno);
- my $has_list = $rhas_list->{$seqno};
+ # code blocks do not use -lp indentation, but behave as
+ # if they had a handle of one indentation length
+ $handle_len = $rOpts_indent_columns;
- # Fix for b1173: if welded opening container, use flag of innermost
- # seqno. Otherwise, the restriction $has_list==1 prevents triple and
- # higher welds from following the -BBX parameters.
- if ($total_weld_count) {
- my $KK_test = $rK_weld_right->{$KK};
- if ( defined($KK_test) ) {
- my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
- $is_list ||= $self->is_list_by_seqno($seqno_inner);
- $has_list = $rhas_list->{$seqno_inner};
- }
- }
+ }
+ elsif ( $is_handle_type{$last_nonblank_type} ) {
+ $handle_len = $len;
+ $handle_len += 1
+ if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
+ }
- next unless ( $is_list || $has_list && $has_list == 1 );
+ # Set a flag if the 'Interrupted List Rule' will be applied
+ # (see sub copy_old_breakpoints).
+ # - Added check on has_broken_list to fix issue b1298
- my $has_broken_list = $rhas_broken_list->{$seqno};
- my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
+ my $interrupted_list_rule =
+ $ris_permanently_broken->{$seqno}
+ && $ris_list_by_seqno->{$seqno}
+ && !$rhas_broken_list->{$seqno}
+ && !$rOpts_ignore_old_breakpoints;
- # Only for types of container tokens with a non-default break option
- my $token = $rLL->[$KK]->[_TOKEN_];
- my $break_option = $break_before_container_types{$token};
- next unless ($break_option);
+ # NOTES: Since we are looking at old line numbers we have
+ # to be very careful not to introduce an instability.
- # Do not use -bbx under stress for stability ... fixes b1300
- my $level = $rLL->[$KK]->[_LEVEL_];
- if ( $level >= $stress_level_beta ) {
- DEBUG_BBX
- && print
-"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
- next;
- }
+ # This following causes instability (b1288-b1296):
+ # $interrupted_list_rule ||=
+ # $rOpts_break_at_old_comma_breakpoints;
- # Require previous nonblank to be '=' or '=>'
- my $Kprev = $KK - 1;
- next if ( $Kprev < 0 );
- my $prev_type = $rLL->[$Kprev]->[_TYPE_];
- if ( $prev_type eq 'b' ) {
- $Kprev--;
- next if ( $Kprev < 0 );
- $prev_type = $rLL->[$Kprev]->[_TYPE_];
- }
- next unless ( $is_equal_or_fat_comma{$prev_type} );
+ # - We could turn off the interrupted list rule if there is
+ # a broken sublist, to follow 'Compound List Rule 1'.
+ # - We could use the _rhas_broken_list_ flag for this.
+ # - But it seems safer not to do this, to avoid
+ # instability, since the broken sublist could be
+ # temporary. It seems better to let the formatting
+ # stabilize by itself after one or two iterations.
+ # - So, not doing this for now
- my $ci = $rLL->[$KK]->[_CI_LEVEL_];
+ # Turn off the interrupted list rule if -vmll is set and a
+ # list has '=>' characters. This avoids instabilities due
+ # to dependence on old line breaks; issue b1325.
+ if ( $interrupted_list_rule
+ && $rOpts_variable_maximum_line_length )
+ {
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ if ( $rtype_count && $rtype_count->{'=>'} ) {
+ $interrupted_list_rule = 0;
+ }
+ }
- #--------------------------------------------
- # New coding for option 2 (break if complex).
- #--------------------------------------------
- # This new coding uses clues which are invariant under formatting to
- # decide if a list is complex. For now it is only applied when -lp
- # and -vmll are used, but eventually it may become the standard method.
- # Fixes b1274, b1275, and others, including b1099.
- if ( $break_option == 2 ) {
+ my $K_c = $K_closing_container->{$seqno};
- if ( $rOpts_line_up_parentheses
- || $rOpts_variable_maximum_line_length )
- {
+ # Add length of any terminal list item if interrupted
+ # so that the result is the same as if the term is
+ # in the next line (b1446).
- # Start with the basic definition of a complex list...
- my $is_complex = $is_list && $has_list;
+ if (
+ $interrupted_list_rule
+ && $KK < $K_terminal
- # and it is also complex if the parent is a list
- if ( !$is_complex ) {
- my $parent = $rparent_of_seqno->{$seqno};
- if ( $self->is_list_by_seqno($parent) ) {
- $is_complex = 1;
+ # The line should end in a comma
+ # NOTE: this currently assumes break after comma.
+ # As long as the other call to cumulative_length..
+ # makes the same assumption we should remain stable.
+ && $rLL->[$K_terminal]->[_TYPE_] eq ','
+
+ )
+ {
+ $max_prong_len =
+ $self->cumulative_length_to_comma( $KK + 1,
+ $K_terminal, $K_c );
}
+
+ push @stack, [
+
+ $max_prong_len,
+ $handle_len,
+ $seqno,
+ $iline,
+ $KK,
+ $K_c,
+ $interrupted_list_rule
+ ];
+
}
- # finally, we will call it complex if there are inner opening
- # and closing container tokens, not parens, within the outer
- # container tokens.
- if ( !$is_complex ) {
- my $Kp = $self->K_next_nonblank($KK);
- my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
- if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
+ #--------------------
+ # Exiting a container
+ #--------------------
+ elsif ( $is_closing_token{$token} && @stack ) {
+
+ # The current prong ends - get its handle
+ my $item = pop @stack;
+ my $handle_len = $item->[_handle_len_];
+ my $seqno_o = $item->[_seqno_o_];
+ my $iline_o = $item->[_iline_o_];
+ my $K_o = $item->[_K_o_];
+ my $K_c_expect = $item->[_K_c_];
+ my $collapsed_len = $max_prong_len;
+
+ if ( $seqno_o ne $seqno ) {
+
+ # This can happen if input file has brace errors.
+ # Otherwise it shouldn't happen. Not fatal but -lp
+ # formatting could get messed up.
+ if ( DEVEL_MODE && !get_saw_brace_error() ) {
+ Fault(<<EOM);
+sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
+EOM
+ }
+ }
- my $Kc = $K_closing_container->{$seqno};
- my $Km = $self->K_previous_nonblank($Kc);
- my $token_m =
- defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+ #------------------------------------------
+ # Rules to avoid scrunching code blocks ...
+ #------------------------------------------
+ # Some test cases:
+ # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ($block_type) {
+
+ my $K_c = $KK;
+ my $block_length = MIN_BLOCK_LEN;
+ my $is_one_line_block;
+ my $level = $rLL->[$K_o]->[_LEVEL_];
+ if ( defined($K_o) && defined($K_c) ) {
+
+ # note: fixed 3 May 2022 (removed 'my')
+ $block_length =
+ $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
+ $is_one_line_block = $iline == $iline_o;
+ }
- # ignore any optional ending comma
- if ( $token_m eq ',' ) {
- $Km = $self->K_previous_nonblank($Km);
- $token_m =
- defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+ # Code block rule 1: Use the total block length if
+ # it is less than the minimum.
+ if ( $block_length < MIN_BLOCK_LEN ) {
+ $collapsed_len = $block_length;
+ }
+
+ # Code block rule 2: Use the full length of a
+ # one-line block to avoid breaking it, unless
+ # extremely long. We do not need to do a precise
+ # check here, because if it breaks then it will
+ # stay broken on later iterations.
+ elsif (
+ $is_one_line_block
+ && $block_length <
+ $maximum_line_length_at_level[$level]
+
+ # But skip this for blocks types which can reform,
+ # like sort/map/grep/eval blocks, to avoid
+ # instability (b1345, b1428)
+ && $self->is_fragile_block_type( $block_type,
+ $seqno )
+ )
+ {
+ $collapsed_len = $block_length;
}
- $is_complex ||=
- $is_closing_token{$token_m} && $token_m ne ')';
- }
- }
+ # Code block rule 3: Otherwise the length should be
+ # at least MIN_BLOCK_LEN to avoid scrunching code
+ # blocks.
+ elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
+ $collapsed_len = MIN_BLOCK_LEN;
+ }
+ }
+
+ # Store the result. Some extra space, '2', allows for
+ # length of an opening token, inside space, comma, ...
+ # This constant has been tuned to give good overall
+ # results.
+ $collapsed_len += 2;
+ $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
+
+ # Restart scanning the lower level prong
+ if (@stack) {
+ $max_prong_len = $stack[-1]->[_max_prong_len_];
+ $collapsed_len += $handle_len;
+ if ( $collapsed_len > $max_prong_len ) {
+ $max_prong_len = $collapsed_len;
+ }
+ }
+ }
+
+ # it is a ternary - no special processing for these yet
+ else {
+
+ }
+
+ $len = 0;
+ $last_nonblank_type = $type;
+ next;
+ }
+
+ #----------------------------
+ # Handle non-container tokens
+ #----------------------------
+ my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
+
+ # Count lengths of things like 'xx => yy' as a single item
+ if ( $type eq '=>' ) {
+ $len += $token_length + 1;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ elsif ( $last_nonblank_type eq '=>' ) {
+ $len += $token_length;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
- # Convert to option 3 (always break) if complex
- next unless ($is_complex);
- $break_option = 3;
+ # but only include one => per item
+ $len = $token_length;
}
- }
- # Fix for b1231: the has_list_with_lec does not cover all cases.
- # A broken container containing a list and with line-ending commas
- # will stay broken, so can be treated as if it had a list with lec.
- $has_list_with_lec ||=
- $has_list
- && $ris_broken_container->{$seqno}
- && $rlec_count_by_seqno->{$seqno};
+ # include everything to end of line after a here target
+ elsif ( $type eq 'h' ) {
+ $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
- DEBUG_BBX
- && print STDOUT
-"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
+ # for everything else just use the token length
+ else {
+ $len = $token_length;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ $last_nonblank_type = $type;
- # -bbx=1 = stable, try to follow input
- if ( $break_option == 1 ) {
+ } ## end loop over tokens on this line
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
- my $rK_range = $rlines->[$iline]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless ( $KK == $Kfirst );
- }
+ return;
- # -bbx=2 => apply this style only for a 'complex' list
- elsif ( $break_option == 2 ) {
+ } ## end sub xlp_collapse_lengths_inner_loop
- # break if this list contains a broken list with line-ending comma
- my $ok_to_break;
- my $Msg = EMPTY_STRING;
- if ($has_list_with_lec) {
- $ok_to_break = 1;
- DEBUG_BBX && do { $Msg = "has list with lec;" };
- }
+} ## end closure xlp_collapsed_lengths
- if ( !$ok_to_break ) {
+sub is_excluded_lp {
- # Turn off -xci if -bbx=2 and this container has a sublist but
- # not a broken sublist. This avoids creating blinkers. The
- # problem is that -xci can cause one-line lists to break open,
- # and thereby creating formatting instability.
- # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
- # b1045 b1046 b1047 b1051 b1052 b1061.
- if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
+ # Decide if this container is excluded by user request:
+ # returns true if this token is excluded (i.e., may not use -lp)
+ # returns false otherwise
- my $parent = $rparent_of_seqno->{$seqno};
- if ( $self->is_list_by_seqno($parent) ) {
- DEBUG_BBX && do { $Msg = "parent is list" };
- $ok_to_break = 1;
- }
- }
+ # The control hash can either describe:
+ # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
+ # what to include: $line_up_parentheses_control_is_lxpl = 0
- if ( !$ok_to_break ) {
- DEBUG_BBX
- && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
- next;
- }
+ # Input parameter:
+ # $KK = index of the container opening token
- DEBUG_BBX
- && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $line_up_parentheses_control_hash{$token};
- # Patch: turn off -xci if -bbx=2 and -lp
- # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
- $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
- }
+ #-----------------------------------------------
+ # TEST #1: check match to listed container types
+ #-----------------------------------------------
+ if ( !defined($rflags) ) {
- # -bbx=3 = always break
- elsif ( $break_option == 3 ) {
+ # There is no entry for this container, so we are done
+ return !$line_up_parentheses_control_is_lxpl;
+ }
- # ok to break
- }
+ my ( $flag1, $flag2 ) = @{$rflags};
- # Shouldn't happen! Bad flag, but make behavior same as 3
- else {
- # ok to break
- }
+ #-----------------------------------------------------------
+ # TEST #2: check match to flag1, the preceding nonblank word
+ #-----------------------------------------------------------
+ my $match_flag1 = !defined($flag1) || $flag1 eq '*';
+ if ( !$match_flag1 ) {
- # Set a flag for actual implementation later in
- # sub insert_breaks_before_list_opening_containers
- $rbreak_before_container_by_seqno->{$seqno} = 1;
- DEBUG_BBX
- && print STDOUT "BBX: ok to break at seqno=$seqno\n";
+ # Find the previous token
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank($KK);
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- # -bbxi=0: Nothing more to do if the ci value remains unchanged
- my $ci_flag = $container_indentation_options{$token};
- next unless ($ci_flag);
+ # keyword?
+ $is_k = $type_p eq 'k';
- # -bbxi=1: This option removes ci and is handled in
- # later sub final_indentation_adjustment
- if ( $ci_flag == 1 ) {
- $rwant_reduced_ci->{$seqno} = 1;
- next;
- }
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
- # -bbxi=2: This option changes the level ...
- # This option can conflict with -xci in some cases. We can turn off
- # -xci for this container to avoid blinking. For now, only do this if
- # -vmll is set. ( fixes b1335, b1336 )
- if ($rOpts_variable_maximum_line_length) {
- $rno_xci_by_seqno->{$seqno} = 1;
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
}
- #----------------------------------------------------------------
- # Part 2: Perform tests before committing to changing ci and level
- #----------------------------------------------------------------
+ # Check for match based on flag1 and the previous token:
+ if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
+ elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
+ elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
+ elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
+ elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
+ elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
+ ## else { no match found }
+ }
- # Before changing the ci level of the opening container, we need
- # to be sure that the container will be broken in the later stages of
- # formatting. We have to do this because we are working early in the
- # formatting pipeline. A problem can occur if we change the ci or
- # level of the opening token but do not actually break the container
- # open as expected. In most cases it wouldn't make any difference if
- # we changed ci or not, but there are some edge cases where this
- # can cause blinking states, so we need to try to only change ci if
- # the container will really be broken.
+ # See if we can exclude this based on the flag1 test...
+ if ($line_up_parentheses_control_is_lxpl) {
+ return 1 if ($match_flag1);
+ }
+ else {
+ return 1 if ( !$match_flag1 );
+ }
- # Only consider containers already broken
- next if ( !$ris_broken_container->{$seqno} );
+ #-------------------------------------------------------------
+ # TEST #3: exclusion based on flag2 and the container contents
+ #-------------------------------------------------------------
- # Patch to fix issue b1305: the combination of -naws and ci>i appears
- # to cause an instability. It should almost never occur in practice.
- next
- if (!$rOpts_add_whitespace
- && $rOpts_continuation_indentation > $rOpts_indent_columns );
+ # Note that this is an exclusion test for both -lpxl or -lpil input methods
+ # The options are:
+ # 0 or blank: ignore container contents
+ # 1 exclude non-lists or lists with sublists
+ # 2 same as 1 but also exclude lists with code blocks
- # Always ok to change ci for permanently broken containers
- if ( $ris_permanently_broken->{$seqno} ) {
- goto OK;
+ my $match_flag2;
+ if ($flag2) {
+
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
+ my $has_list = $self->[_rhas_list_]->{$seqno};
+ my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
+ my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
+
+ if ( !$is_list
+ || $has_list
+ || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
+ {
+ $match_flag2 = 1;
}
+ }
+ return $match_flag2;
+} ## end sub is_excluded_lp
- # Always OK if this list contains a broken sub-container with
- # a non-terminal line-ending comma
- if ($has_list_with_lec) { goto OK }
+sub set_excluded_lp_containers {
- # From here on we are considering a single container...
+ my ($self) = @_;
+ return unless ($rOpts_line_up_parentheses);
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # A single container must have at least 1 line-ending comma:
- next unless ( $rlec_count_by_seqno->{$seqno} );
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # Since it has a line-ending comma, it will stay broken if the -boc
- # flag is set
- if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
+ foreach my $seqno ( keys %{$K_opening_container} ) {
- # OK if the container contains multiple fat commas
- # Better: multiple lines with fat commas
- if ( !$rOpts_ignore_old_breakpoints ) {
- my $rtype_count = $rtype_count_by_seqno->{$seqno};
- next unless ($rtype_count);
- my $fat_comma_count = $rtype_count->{'=>'};
- DEBUG_BBX
- && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
- if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
- }
-
- # The last check we can make is to see if this container could fit on a
- # single line. Use the least possible indentation estimate, ci=0,
- # so we are not subtracting $ci * $rOpts_continuation_indentation from
- # tabulated $maximum_text_length value.
- my $maximum_text_length = $maximum_text_length_at_level[$level];
- my $K_closing = $K_closing_container->{$seqno};
- my $length = $self->cumulative_length_before_K($K_closing) -
- $self->cumulative_length_before_K($KK);
- my $excess_length = $length - $maximum_text_length;
- DEBUG_BBX
- && print STDOUT
-"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
+ # code blocks are always excluded by the -lp coding so we can skip them
+ next if ( $rblock_type_of_seqno->{$seqno} );
- # OK if the net container definitely breaks on length
- if ( $excess_length > $length_tol ) {
- DEBUG_BBX
- && print STDOUT "BBX: excess_length=$excess_length\n";
- goto OK;
- }
+ my $KK = $K_opening_container->{$seqno};
+ next unless defined($KK);
- # Otherwise skip it
- next;
+ # see if a user exclusion rule turns off -lp for this container
+ if ( $self->is_excluded_lp($KK) ) {
+ $ris_excluded_lp_container->{$seqno} = 1;
+ }
+ }
+ return;
+} ## end sub set_excluded_lp_containers
- #################################################################
- # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
- #################################################################
+######################################
+# CODE SECTION 6: Process line-by-line
+######################################
- OK:
+sub process_all_lines {
- DEBUG_BBX && print STDOUT "BBX: OK to break\n";
+ #----------------------------------------------------------
+ # Main loop to format all lines of a file according to type
+ #----------------------------------------------------------
- # -bbhbi=n
- # -bbsbi=n
- # -bbpi=n
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $logger_object = $self->[_logger_object_];
+ my $vertical_aligner_object = $self->[_vertical_aligner_object_];
+ my $save_logfile = $self->[_save_logfile_];
- # where:
+ # Flag to prevent blank lines when POD occurs in a format skipping sect.
+ my $in_format_skipping_section;
- # n=0 default indentation (usually one ci)
- # n=1 outdent one ci
- # n=2 indent one level (minus one ci)
- # n=3 indent one extra ci [This may be dropped]
+ # set locations for blanks around long runs of keywords
+ my $rwant_blank_line_after = $self->keyword_group_scan();
- # NOTE: We are adjusting indentation of the opening container. The
- # closing container will normally follow the indentation of the opening
- # container automatically, so this is not currently done.
- next unless ($ci);
+ my $line_type = EMPTY_STRING;
+ my $i_last_POD_END = -10;
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
- # option 1: outdent
- if ( $ci_flag == 1 ) {
- $ci -= 1;
+ # insert blank lines requested for keyword sequences
+ if ( defined( $rwant_blank_line_after->{$i} )
+ && $rwant_blank_line_after->{$i} == 1 )
+ {
+ $self->want_blank_line();
}
- # option 2: indent one level
- elsif ( $ci_flag == 2 ) {
- $ci -= 1;
- $radjusted_levels->[$KK] += 1;
- }
+ $i++;
- # unknown option
- else {
- # Shouldn't happen - leave ci unchanged
- }
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ my $input_line = $line_of_tokens->{_line_text};
- $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
- }
+ # _line_type codes are:
+ # SYSTEM - system-specific code before hash-bang line
+ # CODE - line of perl code (including comments)
+ # POD_START - line starting pod, such as '=head'
+ # POD - pod documentation text
+ # POD_END - last line of pod section, '=cut'
+ # HERE - text of here-document
+ # HERE_END - last line of here-doc (target word)
+ # FORMAT - format section
+ # FORMAT_END - last line of format section, '.'
+ # SKIP - code skipping section
+ # SKIP_END - last line of code skipping section, '#>>V'
+ # DATA_START - __DATA__ line
+ # DATA - unidentified text following __DATA__
+ # END_START - __END__ line
+ # END - unidentified text following __END__
+ # ERROR - we are in big trouble, probably not a perl script
- $self->[_rbreak_before_container_by_seqno_] =
- $rbreak_before_container_by_seqno;
- $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
- return;
-} ## end sub break_before_list_opening_containers
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
+ $i_last_POD_END = $i;
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
+ $self->want_blank_line();
+ }
+ }
-use constant DEBUG_XCI => 0;
+ # handle line of code..
+ if ( $line_type eq 'CODE' ) {
-sub extended_ci {
+ my $CODE_type = $line_of_tokens->{_code_type};
+ $in_format_skipping_section = $CODE_type eq 'FS';
- # This routine implements the -xci (--extended-continuation-indentation)
- # flag. We add CI to interior tokens of a container which itself has CI but
- # only if a token does not already have CI.
+ # Handle blank lines
+ if ( $CODE_type eq 'BL' ) {
- # To do this, we will locate opening tokens which themselves have
- # continuation indentation (CI). We track them with their sequence
- # numbers. These sequence numbers are called 'controlling sequence
- # numbers'. They apply continuation indentation to the tokens that they
- # contain. These inner tokens remember their controlling sequence numbers.
- # Later, when these inner tokens are output, they have to see if the output
- # lines with their controlling tokens were output with CI or not. If not,
- # then they must remove their CI too.
+ # Keep this blank? Start with the flag -kbl=n, where
+ # n=0 ignore all old blank lines
+ # n=1 stable: keep old blanks, but limited by -mbl=n
+ # n=2 keep all old blank lines, regardless of -mbl=n
+ # If n=0 we delete all old blank lines and let blank line
+ # rules generate any needed blank lines.
+ my $kgb_keep = $rOpts_keep_old_blank_lines;
- # The controlling CI concept works hierarchically. But CI itself is not
- # hierarchical; it is either on or off. There are some rare instances where
- # it would be best to have hierarchical CI too, but not enough to be worth
- # the programming effort.
+ # Then delete lines requested by the keyword-group logic if
+ # allowed
+ if ( $kgb_keep == 1
+ && defined( $rwant_blank_line_after->{$i} )
+ && $rwant_blank_line_after->{$i} == 2 )
+ {
+ $kgb_keep = 0;
+ }
- # The operations to remove unwanted CI are done in sub 'undo_ci'.
+ # But always keep a blank line following an =cut
+ if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
+ $kgb_keep = 1;
+ }
- my ($self) = @_;
+ if ($kgb_keep) {
+ $self->flush($CODE_type);
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
+ $self->[_last_line_leading_type_] = 'b';
+ }
+ next;
+ }
+ else {
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # Let logger see all non-blank lines of code. This is a slow
+ # operation so we avoid it if it is not going to be saved.
+ if ( $save_logfile && $logger_object ) {
+ $logger_object->black_box( $line_of_tokens,
+ $vertical_aligner_object->get_output_line_number );
+ }
+ }
- my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
- my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
- my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
- my $rlines = $self->[_rlines_];
- my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
- my $ris_bli_container = $self->[_ris_bli_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ # Handle Format Skipping (FS) and Verbatim (VB) Lines
+ if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
+ $self->write_unindented_line("$input_line");
+ $file_writer_object->reset_consecutive_blank_lines();
+ next;
+ }
- my %available_space;
+ # Handle all other lines of code
+ $self->process_line_of_CODE($line_of_tokens);
+ }
- # Loop over all opening container tokens
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- my @seqno_stack;
- my $seqno_top;
- my $KLAST;
- my $KNEXT = $self->[_K_first_seq_item_];
+ # handle line of non-code..
+ else {
- # The following variable can be used to allow a little extra space to
- # avoid blinkers. A value $len_tol = 20 fixed the following
- # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
- # It turned out that the real problem was mis-parsing a list brace as
- # a code block in a 'use' statement when the line length was extremely
- # small. A value of 0 works now, but a slightly larger value can
- # be used to minimize the chance of a blinker.
- my $len_tol = 0;
+ # set special flags
+ my $skip_line = 0;
+ if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
- while ( defined($KNEXT) ) {
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
+ if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
+ && !$in_format_skipping_section
+ && $line_type eq 'POD_START'
+ && !$self->[_saw_END_or_DATA_] )
+ {
+ $self->want_blank_line();
+ }
+ }
- # Fix all tokens up to the next sequence item if we are changing CI
- if ($seqno_top) {
+ # leave the blank counters in a predictable state
+ # after __END__ or __DATA__
+ elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ $self->[_saw_END_or_DATA_] = 1;
+ }
- my $is_list = $ris_list_by_seqno->{$seqno_top};
- my $space = $available_space{$seqno_top};
- my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
- my $count = 0;
- foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) {
+ # Patch to avoid losing blank lines after a code-skipping block;
+ # fixes case c047.
+ elsif ( $line_type eq 'SKIP_END' ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ }
- # But do not include tokens which might exceed the line length
- # and are not in a list.
- # ... This fixes case b1031
- my $length_before = $length;
- $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
- if (
- !$rLL->[$Kt]->[_CI_LEVEL_]
- && ( $is_list
- || $length - $length_before < $space
- || $rLL->[$Kt]->[_TYPE_] eq '#' )
- )
- {
- $rLL->[$Kt]->[_CI_LEVEL_] = 1;
- $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
- $count++;
- }
+ # write unindented non-code line
+ if ( !$skip_line ) {
+ $self->write_unindented_line($input_line);
}
- $ris_seqno_controlling_ci->{$seqno_top} += $count;
}
+ }
+ return;
- $KLAST = $KNEXT;
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+} ## end sub process_all_lines
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $K_opening = $K_opening_container->{$seqno};
+{ ## closure keyword_group_scan
- # see if we have reached the end of the current controlling container
- if ( $seqno_top && $seqno == $seqno_top ) {
- $seqno_top = pop @seqno_stack;
- }
+ # this is the return var
+ my $rhash_of_desires;
- # Patch to fix some block types...
- # Certain block types arrive from the tokenizer without CI but should
- # have it for this option. These include anonymous subs and
- # do sort map grep eval
- my $block_type = $rblock_type_of_seqno->{$seqno};
- if ( $block_type && $is_block_with_ci{$block_type} ) {
- $rLL->[$KK]->[_CI_LEVEL_] = 1;
- if ($seqno_top) {
- $rseqno_controlling_my_ci->{$KK} = $seqno_top;
- $ris_seqno_controlling_ci->{$seqno_top}++;
- }
- }
+ # user option variables for -kgb
+ my (
- # If this does not have ci, update ci if necessary and continue looking
- if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
- if ($seqno_top) {
- $rLL->[$KK]->[_CI_LEVEL_] = 1;
- $rseqno_controlling_my_ci->{$KK} = $seqno_top;
- $ris_seqno_controlling_ci->{$seqno_top}++;
- }
- next;
- }
+ $rOpts_kgb_after,
+ $rOpts_kgb_before,
+ $rOpts_kgb_delete,
+ $rOpts_kgb_inside,
+ $rOpts_kgb_size_max,
+ $rOpts_kgb_size_min,
+
+ );
+
+ # group variables, initialized by kgb_initialize_group_vars
+ my ( $ibeg, $iend, $count, $level_beg, $K_closing );
+ my ( @iblanks, @group, @subgroup );
+
+ # line variables, updated by sub keyword_group_scan
+ my ( $line_type, $CODE_type, $K_first, $K_last );
+ my $number_of_groups_seen;
+
+ #------------------------
+ # -kgb helper subroutines
+ #------------------------
+
+ sub kgb_initialize_options {
+
+ # check and initialize user options for -kgb
+ # return error flag:
+ # true for some input error, do not continue
+ # false if ok
+
+ # Local copies of the various control parameters
+ $rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
+ $rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
+ $rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
+ $rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
+
+ # A range of sizes can be input with decimal notation like 'min.max'
+ # with any number of dots between the two numbers. Examples:
+ # string => min max matches
+ # 1.1 1 1 exactly 1
+ # 1.3 1 3 1,2, or 3
+ # 1..3 1 3 1,2, or 3
+ # 5 5 - 5 or more
+ # 6. 6 - 6 or more
+ # .2 - 2 up to 2
+ # 1.0 1 0 nothing
+ my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
+ ( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/,
+ $rOpts_kgb_size;
+ if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/
+ || $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ )
+ {
+ Warn(<<EOM);
+Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max';
+ignoring all -kgb flags
+EOM
- # Skip if requested by -bbx to avoid blinkers
- if ( $rno_xci_by_seqno->{$seqno} ) {
- next;
+ # Turn this option off so that this message does not keep repeating
+ # during iterations and other files.
+ $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
+ return $rhash_of_desires;
}
+ $rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min);
- # Skip if this is a -bli container (this fixes case b1065) Note: case
- # b1065 is also fixed by the update for b1055, so this update is not
- # essential now. But there does not seem to be a good reason to add
- # xci and bli together, so the update is retained.
- if ( $ris_bli_container->{$seqno} ) {
- next;
+ if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min )
+ {
+ return $rhash_of_desires;
}
- # We are looking for opening container tokens with ci
- next unless ( defined($K_opening) && $KK == $K_opening );
+ # check codes for $rOpts_kgb_before and
+ # $rOpts_kgb_after:
+ # 0 = never (delete if exist)
+ # 1 = stable (keep unchanged)
+ # 2 = always (insert if missing)
+ return $rhash_of_desires
+ unless $rOpts_kgb_size_min > 0
+ && ( $rOpts_kgb_before != 1
+ || $rOpts_kgb_after != 1
+ || $rOpts_kgb_inside
+ || $rOpts_kgb_delete );
- # Make sure there is a corresponding closing container
- # (could be missing if the script has a brace error)
- my $K_closing = $K_closing_container->{$seqno};
- next unless defined($K_closing);
+ return;
+ } ## end sub kgb_initialize_options
+
+ sub kgb_initialize_group_vars {
+
+ # Definitions:
+ # $ibeg = first line index of this entire group
+ # $iend = last line index of this entire group
+ # $count = total number of keywords seen in this entire group
+ # $level_beg = indentation level of this group
+ # @group = [ $i, $token, $count ] =list of all keywords & blanks
+ # @subgroup = $j, index of group where token changes
+ # @iblanks = line indexes of blank lines in input stream in this group
+ # where i=starting line index
+ # token (the keyword)
+ # count = number of this token in this subgroup
+ # j = index in group where token changes
+ $ibeg = -1;
+ $iend = undef;
+ $level_beg = -1;
+ $K_closing = undef;
+ $count = 0;
+ @group = ();
+ @subgroup = ();
+ @iblanks = ();
+ return;
+ } ## end sub kgb_initialize_group_vars
- # Require different input lines. This will filter out a large number
- # of small hash braces and array brackets. If we accidentally filter
- # out an important container, it will get fixed on the next pass.
- if (
- $rLL->[$K_opening]->[_LINE_INDEX_] ==
- $rLL->[$K_closing]->[_LINE_INDEX_]
- && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
- $rOpts_maximum_line_length )
- )
- {
- DEBUG_XCI
- && print "XCI: Skipping seqno=$seqno, require different lines\n";
- next;
- }
+ sub kgb_initialize_line_vars {
+ $CODE_type = EMPTY_STRING;
+ $K_first = undef;
+ $K_last = undef;
+ $line_type = EMPTY_STRING;
+ return;
+ } ## end sub kgb_initialize_line_vars
- # Do not apply -xci if adding extra ci will put the container contents
- # beyond the line length limit (fixes cases b899 b935)
- my $level = $rLL->[$K_opening]->[_LEVEL_];
- my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
- my $maximum_text_length =
- $maximum_text_length_at_level[$level] -
- $ci_level * $rOpts_continuation_indentation;
+ sub kgb_initialize {
- # Fix for b1197 b1198 b1199 b1200 b1201 b1202
- # Do not apply -xci if we are running out of space
- if ( $level >= $stress_level_beta ) {
- DEBUG_XCI
- && print
-"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
- next;
- }
+ # initialize all closure variables for -kgb
+ # return:
+ # true to cause immediate exit (something is wrong)
+ # false to continue ... all is okay
- # remember how much space is available for patch b1031 above
- my $space =
- $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
+ # This is the return variable:
+ $rhash_of_desires = {};
- if ( $space < 0 ) {
- DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
- next;
- }
- DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
+ # initialize and check user options;
+ my $quit = kgb_initialize_options();
+ if ($quit) { return $quit }
- $available_space{$seqno} = $space;
+ # initialize variables for the current group and subgroups:
+ kgb_initialize_group_vars();
- # This becomes the next controlling container
- push @seqno_stack, $seqno_top if ($seqno_top);
- $seqno_top = $seqno;
- }
- return;
-} ## end sub extended_ci
+ # initialize variables for the most recently seen line:
+ kgb_initialize_line_vars();
-sub braces_left_setup {
+ $number_of_groups_seen = 0;
- # Called once per file to mark all -bl, -sbl, and -asbl containers
- my $self = shift;
+ # all okay
+ return;
+ } ## end sub kgb_initialize
- my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
- my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
- my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
- return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
+ sub kgb_insert_blank_after {
+ my ($i) = @_;
+ $rhash_of_desires->{$i} = 1;
+ my $ip = $i + 1;
+ if ( defined( $rhash_of_desires->{$ip} )
+ && $rhash_of_desires->{$ip} == 2 )
+ {
+ $rhash_of_desires->{$ip} = 0;
+ }
+ return;
+ } ## end sub kgb_insert_blank_after
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ sub kgb_split_into_sub_groups {
- # We will turn on this hash for braces controlled by these flags:
- my $rbrace_left = $self->[_rbrace_left_];
+ # place blanks around long sub-groups of keywords
+ # ...if requested
+ return unless ($rOpts_kgb_inside);
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $ris_asub_block = $self->[_ris_asub_block_];
- my $ris_sub_block = $self->[_ris_sub_block_];
- foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+ # loop over sub-groups, index k
+ push @subgroup, scalar @group;
+ my $kbeg = 1;
+ my $kend = @subgroup - 1;
+ foreach my $k ( $kbeg .. $kend ) {
- my $block_type = $rblock_type_of_seqno->{$seqno};
+ # index j runs through all keywords found
+ my $j_b = $subgroup[ $k - 1 ];
+ my $j_e = $subgroup[$k] - 1;
- # use -asbl flag for an anonymous sub block
- if ( $ris_asub_block->{$seqno} ) {
- if ($rOpts_asbl) {
- $rbrace_left->{$seqno} = 1;
- }
- }
+ # index i is the actual line number of a keyword
+ my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
+ my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
+ my $num = $count_e - $count_b + 1;
- # use -sbl flag for a named sub
- elsif ( $ris_sub_block->{$seqno} ) {
- if ($rOpts_sbl) {
- $rbrace_left->{$seqno} = 1;
- }
- }
+ # This subgroup runs from line $ib to line $ie-1, but may contain
+ # blank lines
+ if ( $num >= $rOpts_kgb_size_min ) {
- # use -bl flag if not a sub block of any type
- else {
- if ( $rOpts_bl
- && $block_type =~ /$bl_pattern/
- && $block_type !~ /$bl_exclusion_pattern/ )
- {
- $rbrace_left->{$seqno} = 1;
+ # if there are blank lines, we require that at least $num lines
+ # be non-blank up to the boundary with the next subgroup.
+ my $nog_b = my $nog_e = 1;
+ if ( @iblanks && !$rOpts_kgb_delete ) {
+ my $j_bb = $j_b + $num - 1;
+ my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
+ $nog_b = $count_bb - $count_b + 1 == $num;
+
+ my $j_ee = $j_e - ( $num - 1 );
+ my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
+ $nog_e = $count_e - $count_ee + 1 == $num;
+ }
+ if ( $nog_b && $k > $kbeg ) {
+ kgb_insert_blank_after( $i_b - 1 );
+ }
+ if ( $nog_e && $k < $kend ) {
+ my ( $i_ep, $tok_ep, $count_ep ) =
+ @{ $group[ $j_e + 1 ] };
+ kgb_insert_blank_after( $i_ep - 1 );
+ }
}
}
- }
- return;
-} ## end sub braces_left_setup
+ return;
+ } ## end sub kgb_split_into_sub_groups
-sub bli_adjustment {
+ sub kgb_delete_if_blank {
+ my ( $self, $i ) = @_;
- # Called once per file to implement the --brace-left-and-indent option.
- # If -bli is set, adds one continuation indentation for certain braces
- my $self = shift;
- return unless ( $rOpts->{'brace-left-and-indent'} );
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # delete line $i if it is blank
+ my $rlines = $self->[_rlines_];
+ return unless ( $i >= 0 && $i < @{$rlines} );
+ return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
+ my $code_type = $rlines->[$i]->{_code_type};
+ if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
+ return;
+ } ## end sub kgb_delete_if_blank
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $ris_bli_container = $self->[_ris_bli_container_];
- my $rbrace_left = $self->[_rbrace_left_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
+ sub kgb_delete_inner_blank_lines {
- foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
- my $block_type = $rblock_type_of_seqno->{$seqno};
- if ( $block_type
- && $block_type =~ /$bli_pattern/
- && $block_type !~ /$bli_exclusion_pattern/ )
- {
- $ris_bli_container->{$seqno} = 1;
- $rbrace_left->{$seqno} = 1;
- my $Ko = $K_opening_container->{$seqno};
- my $Kc = $K_closing_container->{$seqno};
- if ( defined($Ko) && defined($Kc) ) {
- $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
- }
+ # always remove unwanted trailing blank lines from our list
+ return unless (@iblanks);
+ while ( my $ibl = pop(@iblanks) ) {
+ if ( $ibl < $iend ) { push @iblanks, $ibl; last }
+ $iend = $ibl;
}
- }
- return;
-} ## end sub bli_adjustment
-sub find_multiline_qw {
+ # now mark mark interior blank lines for deletion if requested
+ return unless ($rOpts_kgb_delete);
- my $self = shift;
+ while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
- # Multiline qw quotes are not sequenced items like containers { [ (
- # but behave in some respects in a similar way. So this routine finds them
- # and creates a separate sequence number system for later use.
+ return;
+ } ## end sub kgb_delete_inner_blank_lines
- # This is straightforward because they always begin at the end of one line
- # and and at the beginning of a later line. This is true no matter how we
- # finally make our line breaks, so we can find them before deciding on new
- # line breaks.
+ sub kgb_end_group {
- my $rstarting_multiline_qw_seqno_by_K = {};
- my $rending_multiline_qw_seqno_by_K = {};
- my $rKrange_multiline_qw_by_seqno = {};
- my $rmultiline_qw_has_extra_level = {};
+ # end a group of keywords
+ my ( $self, $bad_ending ) = @_;
+ if ( defined($ibeg) && $ibeg >= 0 ) {
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ # then handle sufficiently large groups
+ if ( $count >= $rOpts_kgb_size_min ) {
- my $rlines = $self->[_rlines_];
- my $rLL = $self->[_rLL_];
- my $qw_seqno;
- my $num_qw_seqno = 0;
- my $K_start_multiline_qw;
+ $number_of_groups_seen++;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ # do any blank deletions regardless of the count
+ kgb_delete_inner_blank_lines();
- my $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
- if ( defined($K_start_multiline_qw) ) {
- my $type = $rLL->[$Kfirst]->[_TYPE_];
+ my $rlines = $self->[_rlines_];
+ if ( $ibeg > 0 ) {
+ my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
+
+ # patch for hash bang line which is not currently marked as
+ # a comment; mark it as a comment
+ if ( $ibeg == 1 && !$code_type ) {
+ my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
+ $code_type = 'BC'
+ if ( $line_text && $line_text =~ /^#/ );
+ }
+
+ # Do not insert a blank after a comment
+ # (this could be subject to a flag in the future)
+ if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
+ if ( $rOpts_kgb_before == INSERT ) {
+ kgb_insert_blank_after( $ibeg - 1 );
+
+ }
+ elsif ( $rOpts_kgb_before == DELETE ) {
+ $self->kgb_delete_if_blank( $ibeg - 1 );
+ }
+ }
+ }
+
+ # We will only put blanks before code lines. We could loosen
+ # this rule a little, but we have to be very careful because
+ # for example we certainly don't want to drop a blank line
+ # after a line like this:
+ # my $var = <<EOM;
+ if ( $line_type eq 'CODE' && defined($K_first) ) {
+
+ # - Do not put a blank before a line of different level
+ # - Do not put a blank line if we ended the search badly
+ # - Do not put a blank at the end of the file
+ # - Do not put a blank line before a hanging side comment
+ my $rLL = $self->[_rLL_];
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- # shouldn't happen
- if ( $type ne 'q' ) {
- DEVEL_MODE && print STDERR <<EOM;
-STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
-EOM
- $K_start_multiline_qw = undef;
- next;
- }
- my $Kprev = $self->K_previous_nonblank($Kfirst);
- my $Knext = $self->K_next_nonblank($Kfirst);
- my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
- my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
- if ( $type_m eq 'q' && $type_p ne 'q' ) {
- $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
- $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
- [ $K_start_multiline_qw, $Kfirst ];
- $K_start_multiline_qw = undef;
- $qw_seqno = undef;
- }
- }
- if ( !defined($K_start_multiline_qw)
- && $rLL->[$Klast]->[_TYPE_] eq 'q' )
- {
- my $Kprev = $self->K_previous_nonblank($Klast);
- my $Knext = $self->K_next_nonblank($Klast);
- my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
- my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
- if ( $type_m ne 'q' && $type_p eq 'q' ) {
- $num_qw_seqno++;
- $qw_seqno = 'q' . $num_qw_seqno;
- $K_start_multiline_qw = $Klast;
- $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
+ if ( $level == $level_beg
+ && $ci_level == 0
+ && !$bad_ending
+ && $iend < @{$rlines}
+ && $CODE_type ne 'HSC' )
+ {
+ if ( $rOpts_kgb_after == INSERT ) {
+ kgb_insert_blank_after($iend);
+ }
+ elsif ( $rOpts_kgb_after == DELETE ) {
+ $self->kgb_delete_if_blank( $iend + 1 );
+ }
+ }
+ }
}
+ kgb_split_into_sub_groups();
}
- }
- # Give multiline qw lists extra indentation instead of CI. This option
- # works well but is currently only activated when the -xci flag is set.
- # The reason is to avoid unexpected changes in formatting.
- if ($rOpts_extended_continuation_indentation) {
- while ( my ( $qw_seqno_x, $rKrange ) =
- each %{$rKrange_multiline_qw_by_seqno} )
- {
- my ( $Kbeg, $Kend ) = @{$rKrange};
+ # reset for another group
+ kgb_initialize_group_vars();
- # require isolated closing token
- my $token_end = $rLL->[$Kend]->[_TOKEN_];
- next
- unless ( length($token_end) == 1
- && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
+ return;
+ } ## end sub kgb_end_group
- # require isolated opening token
- my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
+ sub kgb_find_container_end {
- # allow space(s) after the qw
- if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
- {
- $token_beg =~ s/\s+//;
- }
+ # If the keyword line is continued onto subsequent lines, find the
+ # closing token '$K_closing' so that we can easily skip past the
+ # contents of the container.
- next unless ( length($token_beg) == 3 );
+ # We only set this value if we find a simple list, meaning
+ # -contents only one level deep
+ # -not welded
- foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
- $rLL->[$KK]->[_LEVEL_]++;
- $rLL->[$KK]->[_CI_LEVEL_] = 0;
- }
+ my ($self) = @_;
- # set flag for -wn option, which will remove the level
- $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
- }
- }
+ # First check: skip if next line is not one deeper
+ my $Knext_nonblank = $self->K_next_nonblank($K_last);
+ return if ( !defined($Knext_nonblank) );
+ my $rLL = $self->[_rLL_];
+ my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
+ return if ( $level_next != $level_beg + 1 );
- # For the -lp option we need to mark all parent containers of
- # multiline quotes
- if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
+ # Find the parent container of the first token on the next line
+ my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
+ return unless ( defined($parent_seqno) );
- while ( my ( $qw_seqno_x, $rKrange ) =
- each %{$rKrange_multiline_qw_by_seqno} )
- {
- my ( $Kbeg, $Kend ) = @{$rKrange};
- my $parent_seqno = $self->parent_seqno_by_K($Kend);
- next unless ($parent_seqno);
+ # Must not be a weld (can be unstable)
+ return
+ if ( $total_weld_count
+ && $self->is_welded_at_seqno($parent_seqno) );
- # If the parent container exactly surrounds this qw, then -lp
- # formatting seems to work so we will not mark it.
- my $is_tightly_contained;
- my $Kn = $self->K_next_nonblank($Kend);
- my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
- if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
+ # Opening container must exist and be on this line
+ my $Ko = $self->[_K_opening_container_]->{$parent_seqno};
+ return unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
- my $Kp = $self->K_previous_nonblank($Kbeg);
- my $seqno_p =
- defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
- if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
- $is_tightly_contained = 1;
- }
- }
+ # Verify that the closing container exists and is on a later line
+ my $Kc = $self->[_K_closing_container_]->{$parent_seqno};
+ return unless ( defined($Kc) && $Kc > $K_last );
- $ris_excluded_lp_container->{$parent_seqno} = 1
- unless ($is_tightly_contained);
+ # That's it
+ $K_closing = $Kc;
- # continue up the tree marking parent containers
- while (1) {
- $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
- last
- unless ( defined($parent_seqno)
- && $parent_seqno ne SEQ_ROOT );
- $ris_excluded_lp_container->{$parent_seqno} = 1;
- }
- }
- }
+ return;
+ } ## end sub kgb_find_container_end
- $self->[_rstarting_multiline_qw_seqno_by_K_] =
- $rstarting_multiline_qw_seqno_by_K;
- $self->[_rending_multiline_qw_seqno_by_K_] =
- $rending_multiline_qw_seqno_by_K;
- $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
- $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
+ sub kgb_add_to_group {
+ my ( $self, $i, $token, $level ) = @_;
- return;
-} ## end sub find_multiline_qw
+ # End the previous group if we have reached the maximum
+ # group size
+ if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) {
+ $self->kgb_end_group();
+ }
-use constant DEBUG_COLLAPSED_LENGTHS => 0;
+ if ( @group == 0 ) {
+ $ibeg = $i;
+ $level_beg = $level;
+ $count = 0;
+ }
-# Minimum space reserved for contents of a code block. A value of 40 has given
-# reasonable results. With a large line length, say -l=120, this will not
-# normally be noticeable but it will prevent making a mess in some edge cases.
-use constant MIN_BLOCK_LEN => 40;
+ $count++;
+ $iend = $i;
-my %is_handle_type;
+ # New sub-group?
+ if ( !@group || $token ne $group[-1]->[1] ) {
+ push @subgroup, scalar(@group);
+ }
+ push @group, [ $i, $token, $count ];
-BEGIN {
- my @q = qw( w C U G i k => );
- @is_handle_type{@q} = (1) x scalar(@q);
+ # remember if this line ends in an open container
+ $self->kgb_find_container_end();
- my $i = 0;
- use constant {
- _max_prong_len_ => $i++,
- _handle_len_ => $i++,
- _seqno_o_ => $i++,
- _iline_o_ => $i++,
- _K_o_ => $i++,
- _K_c_ => $i++,
- _interrupted_list_rule_ => $i++,
- };
-}
+ return;
+ } ## end sub kgb_add_to_group
-sub collapsed_lengths {
+ #---------------------
+ # -kgb main subroutine
+ #---------------------
- my $self = shift;
+ sub keyword_group_scan {
+ my $self = shift;
- #----------------------------------------------------------------
- # Define the collapsed lengths of containers for -xlp indentation
- #----------------------------------------------------------------
+ # Called once per file to process --keyword-group-blanks-* parameters.
- # We need an estimate of the minimum required line length starting at any
- # opening container for the -xlp style. This is needed to avoid using too
- # much indentation space for lower level containers and thereby running
- # out of space for outer container tokens due to the maximum line length
- # limit.
+ # Task:
+ # Manipulate blank lines around keyword groups (kgb* flags)
+ # Scan all lines looking for runs of consecutive lines beginning with
+ # selected keywords. Example keywords are 'my', 'our', 'local', ... but
+ # they may be anything. We will set flags requesting that blanks be
+ # inserted around and within them according to input parameters. Note
+ # that we are scanning the lines as they came in in the input stream, so
+ # they are not necessarily well formatted.
- # The basic idea is that at each node in the tree we imagine that we have a
- # fork with a handle and collapsible prongs:
- #
- # |------------
- # |--------
- # ------------|-------
- # handle |------------
- # |--------
- # prongs
- #
- # Each prong has a minimum collapsed length. The collapsed length at a node
- # is the maximum of these minimum lengths, plus the handle length. Each of
- # the prongs may itself be a tree node.
+ # Returns:
+ # The output of this sub is a return hash ref whose keys are the indexes
+ # of lines after which we desire a blank line. For line index $i:
+ # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
+ # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
+
+ # Nothing to do if no blanks can be output. This test added to fix
+ # case b760.
+ if ( !$rOpts_maximum_consecutive_blank_lines ) {
+ return $rhash_of_desires;
+ }
- # This is just a rough calculation to get an approximate starting point for
- # indentation. Later routines will be more precise. It is important that
- # these estimates be independent of the line breaks of the input stream in
- # order to avoid instabilities.
+ #---------------
+ # initialization
+ #---------------
+ my $quit = kgb_initialize();
+ if ($quit) { return $rhash_of_desires }
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines = $self->[_rlines_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- my $ris_permanently_broken = $self->[_ris_permanently_broken_];
- my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
- my $rhas_broken_list = $self->[_rhas_broken_list_];
- my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
- my $K_start_multiline_qw;
- my $level_start_multiline_qw = 0;
- my $max_prong_len = 0;
- my $handle_len_x = 0;
- my @stack;
- my $len = 0;
- my $last_nonblank_type = 'b';
- push @stack,
- [ $max_prong_len, $handle_len_x, SEQ_ROOT, undef, undef, undef, undef ];
+ $self->kgb_end_group();
+ my $i = -1;
+ my $Opt_repeat_count =
+ $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
- my $iline = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $iline++;
- my $line_type = $line_of_tokens->{_line_type};
- next if ( $line_type ne 'CODE' );
- my $CODE_type = $line_of_tokens->{_code_type};
+ #----------------------------------
+ # loop over all lines of the source
+ #----------------------------------
+ foreach my $line_of_tokens ( @{$rlines} ) {
- # Always skip blank lines
- next if ( $CODE_type eq 'BL' );
+ $i++;
+ last
+ if ( $Opt_repeat_count > 0
+ && $number_of_groups_seen >= $Opt_repeat_count );
- # Note on other line types:
- # 'FS' (Format Skipping) lines may contain opening/closing tokens so
- # we have to process them to keep the stack correctly sequenced.
- # 'VB' (Verbatim) lines could be skipped, but testing shows that
- # results look better if we include their lengths.
+ kgb_initialize_line_vars();
- # Also note that we could exclude -xlp formatting of containers with
- # 'FS' and 'VB' lines, but in testing that was not really beneficial.
+ $line_type = $line_of_tokens->{_line_type};
- # So we process tokens in 'FS' and 'VB' lines like all the rest...
+ # always end a group at non-CODE
+ if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next }
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $K_first, $K_last ) = @{$rK_range};
- next unless ( defined($K_first) && defined($K_last) );
-
- my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
-
- # Always ignore block comments
- next if ( $has_comment && $K_first == $K_last );
-
- # Handle an intermediate line of a multiline qw quote. These may
- # require including some -ci or -i spaces. See cases c098/x063.
- # Updated to check all lines (not just $K_first==$K_last) to fix b1316
- my $K_begin_loop = $K_first;
- if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
-
- my $KK = $K_first;
- my $level = $rLL->[$KK]->[_LEVEL_];
- my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
-
- # remember the level of the start
- if ( !defined($K_start_multiline_qw) ) {
- $K_start_multiline_qw = $K_first;
- $level_start_multiline_qw = $level;
- my $seqno_qw =
- $self->[_rstarting_multiline_qw_seqno_by_K_]
- ->{$K_start_multiline_qw};
- if ( !$seqno_qw ) {
- my $Kp = $self->K_previous_nonblank($K_first);
- if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
-
- $K_start_multiline_qw = $Kp;
- $level_start_multiline_qw =
- $rLL->[$K_start_multiline_qw]->[_LEVEL_];
- }
- else {
+ $CODE_type = $line_of_tokens->{_code_type};
- # Fix for b1319, b1320
- goto NOT_MULTILINE_QW;
- }
- }
+ # end any group at a format skipping line
+ if ( $CODE_type && $CODE_type eq 'FS' ) {
+ $self->kgb_end_group();
+ next;
}
- $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
- $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ # continue in a verbatim (VB) type; it may be quoted text
+ if ( $CODE_type eq 'VB' ) {
+ if ( $ibeg >= 0 ) { $iend = $i; }
+ next;
+ }
- # We may have to add the spaces of one level or ci level ... it
- # depends depends on the -xci flag, the -wn flag, and if the qw
- # uses a container token as the quote delimiter.
+ # and continue in blank (BL) types
+ if ( $CODE_type eq 'BL' ) {
+ if ( $ibeg >= 0 ) {
+ $iend = $i;
+ push @{iblanks}, $i;
- # First rule: add ci if there is a $ci_level
- if ($ci_level) {
- $len += $rOpts_continuation_indentation;
+ # propagate current subgroup token
+ my $tok = $group[-1]->[1];
+ push @group, [ $i, $tok, $count ];
+ }
+ next;
}
- # Second rule: otherwise, look for an extra indentation level
- # from the start and add one indentation level if found.
- elsif ( $level > $level_start_multiline_qw ) {
- $len += $rOpts_indent_columns;
- }
+ # examine the first token of this line
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $K_first, $K_last ) = @{$rK_range};
+ if ( !defined($K_first) ) {
+
+ # Somewhat unexpected blank line..
+ # $rK_range is normally defined for line type CODE, but this can
+ # happen for example if the input line was a single semicolon
+ # which is being deleted. In that case there was code in the
+ # input file but it is not being retained. So we can silently
+ # return.
+ return $rhash_of_desires;
+ }
+
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $type = $rLL->[$K_first]->[_TYPE_];
+ my $token = $rLL->[$K_first]->[_TOKEN_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+
+ # End a group 'badly' at an unexpected level. This will prevent
+ # blank lines being incorrectly placed after the end of the group.
+ # We are looking for any deviation from two acceptable patterns:
+ # PATTERN 1: a simple list; secondary lines are at level+1
+ # PATTERN 2: a long statement; all secondary lines same level
+ # This was added as a fix for case b1177, in which a complex
+ # structure got incorrectly inserted blank lines.
+ if ( $ibeg >= 0 ) {
- if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ # Check for deviation from PATTERN 1, simple list:
+ if ( defined($K_closing) && $K_first < $K_closing ) {
+ $self->kgb_end_group(1) if ( $level != $level_beg + 1 );
+ }
- $last_nonblank_type = 'q';
+ # Check for deviation from PATTERN 2, single statement:
+ elsif ( $level != $level_beg ) { $self->kgb_end_group(1) }
+ }
- $K_begin_loop = $K_first + 1;
+ # Do not look for keywords in lists ( keyword 'my' can occur in
+ # lists, see case b760); fixed for c048.
+ if ( $self->is_list_by_K($K_first) ) {
+ if ( $ibeg >= 0 ) { $iend = $i }
+ next;
+ }
- # We can skip to the next line if more tokens
- next if ( $K_begin_loop > $K_last );
+ # see if this is a code type we seek (i.e. comment)
+ if ( $CODE_type
+ && $keyword_group_list_comment_pattern
+ && $CODE_type =~ /$keyword_group_list_comment_pattern/ )
+ {
- }
+ my $tok = $CODE_type;
- NOT_MULTILINE_QW:
- $K_start_multiline_qw = undef;
+ # Continuing a group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $self->kgb_add_to_group( $i, $tok, $level );
+ }
- # Find the terminal token, before any side comment
- my $K_terminal = $K_last;
- if ($has_comment) {
- $K_terminal -= 1;
- $K_terminal -= 1
- if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
- && $K_terminal > $K_first );
- }
+ # Start new group
+ else {
- # Use length to terminal comma if interrupted list rule applies
- if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
- my $K_c = $stack[-1]->[_K_c_];
- if (
- defined($K_c)
- && $rLL->[$K_terminal]->[_TYPE_] eq ','
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
+ $self->kgb_add_to_group( $i, $tok, $level );
+ }
+ next;
+ }
- # Ignore if terminal comma, causes instability (b1297, b1330)
- && (
- $K_c - $K_terminal > 2
- || ( $K_c - $K_terminal == 2
- && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' )
- )
- )
+ # See if it is a keyword we seek, but never start a group in a
+ # continuation line; the code may be badly formatted.
+ if ( $ci_level == 0
+ && $type eq 'k'
+ && $token =~ /$keyword_group_list_pattern/ )
{
- my $Kend = $K_terminal;
- # This caused an instability in b1311 by making the result
- # dependent on input. It is not really necessary because the
- # comment length is added at the end of the loop.
- ##if ( $has_comment
- ## && !$rOpts_ignore_side_comment_lengths )
- ##{
- ## $Kend = $K_last;
- ##}
+ # Continuing a keyword group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $self->kgb_add_to_group( $i, $token, $level );
+ }
- # changed from $len to my $leng to fix b1302 b1306 b1317 b1321
- my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
- $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
+ # Start new keyword group
+ else {
- # Fix for b1331: at a broken => item, include the length of
- # the previous half of the item plus one for the missing space
- if ( $last_nonblank_type eq '=>' ) {
- $leng += $len + 1;
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
+ $self->kgb_add_to_group( $i, $token, $level );
}
-
- if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
+ next;
}
- }
- # Loop over tokens on this line ...
- foreach my $KK ( $K_begin_loop .. $K_terminal ) {
+ # This is not one of our keywords, but we are in a keyword group
+ # so see if we should continue or quit
+ elsif ( $ibeg >= 0 ) {
- my $type = $rLL->[$KK]->[_TYPE_];
- next if ( $type eq 'b' );
+ # - bail out on a large level change; we may have walked into a
+ # data structure or anonymous sub code.
+ if ( $level > $level_beg + 1 || $level < $level_beg ) {
+ $self->kgb_end_group(1);
+ next;
+ }
- #------------------------
- # Handle sequenced tokens
- #------------------------
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- if ($seqno) {
+ # - keep going on a continuation line of the same level, since
+ # it is probably a continuation of our previous keyword,
+ # - and keep going past hanging side comments because we never
+ # want to interrupt them.
+ if ( ( ( $level == $level_beg ) && $ci_level > 0 )
+ || $CODE_type eq 'HSC' )
+ {
+ $iend = $i;
+ next;
+ }
- my $token = $rLL->[$KK]->[_TOKEN_];
+ # - continue if if we are within in a container which started
+ # with the line of the previous keyword.
+ if ( defined($K_closing) && $K_first <= $K_closing ) {
+
+ # continue if entire line is within container
+ if ( $K_last <= $K_closing ) { $iend = $i; next }
+
+ # continue at ); or }; or ];
+ my $KK = $K_closing + 1;
+ if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
+ if ( $KK < $K_last ) {
+ if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
+ if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' )
+ {
+ $self->kgb_end_group(1);
+ next;
+ }
+ }
+ $iend = $i;
+ next;
+ }
- #----------------------------
- # Entering a new container...
- #----------------------------
- if ( $is_opening_token{$token}
- && defined( $K_closing_container->{$seqno} ) )
- {
+ $self->kgb_end_group(1);
+ next;
+ }
- # save current prong length
- $stack[-1]->[_max_prong_len_] = $max_prong_len;
- $max_prong_len = 0;
+ # - end the group if none of the above
+ $self->kgb_end_group();
+ next;
+ }
- # Start new prong one level deeper
- my $handle_len = 0;
- if ( $rblock_type_of_seqno->{$seqno} ) {
+ # not in a keyword group; continue
+ else { next }
+ } ## end of loop over all lines
- # code blocks do not use -lp indentation, but behave as
- # if they had a handle of one indentation length
- $handle_len = $rOpts_indent_columns;
+ $self->kgb_end_group();
+ return $rhash_of_desires;
- }
- elsif ( $is_handle_type{$last_nonblank_type} ) {
- $handle_len = $len;
- $handle_len += 1
- if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
- }
+ } ## end sub keyword_group_scan
+} ## end closure keyword_group_scan
- # Set a flag if the 'Interrupted List Rule' will be applied
- # (see sub copy_old_breakpoints).
- # - Added check on has_broken_list to fix issue b1298
+#######################################
+# CODE SECTION 7: Process lines of code
+#######################################
- my $interrupted_list_rule =
- $ris_permanently_broken->{$seqno}
- && $ris_list_by_seqno->{$seqno}
- && !$rhas_broken_list->{$seqno}
- && !$rOpts_ignore_old_breakpoints;
+{ ## begin closure process_line_of_CODE
- # NOTES: Since we are looking at old line numbers we have
- # to be very careful not to introduce an instability.
+ # The routines in this closure receive lines of code and combine them into
+ # 'batches' and send them along. A 'batch' is the unit of code which can be
+ # processed further as a unit. It has the property that it is the largest
+ # amount of code into which which perltidy is free to place one or more
+ # line breaks within it without violating any constraints.
- # This following causes instability (b1288-b1296):
- # $interrupted_list_rule ||=
- # $rOpts_break_at_old_comma_breakpoints;
+ # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
- # - We could turn off the interrupted list rule if there is
- # a broken sublist, to follow 'Compound List Rule 1'.
- # - We could use the _rhas_broken_list_ flag for this.
- # - But it seems safer not to do this, to avoid
- # instability, since the broken sublist could be
- # temporary. It seems better to let the formatting
- # stabilize by itself after one or two iterations.
- # - So, not doing this for now
+ # flags needed by the store routine
+ my $line_of_tokens;
+ my $no_internal_newlines;
+ my $CODE_type;
- # Turn off the interrupted list rule if -vmll is set and a
- # list has '=>' characters. This avoids instabilities due
- # to dependence on old line breaks; issue b1325.
- if ( $interrupted_list_rule
- && $rOpts_variable_maximum_line_length )
- {
- my $rtype_count = $rtype_count_by_seqno->{$seqno};
- if ( $rtype_count && $rtype_count->{'=>'} ) {
- $interrupted_list_rule = 0;
- }
- }
+ # range of K of tokens for the current line
+ my ( $K_first, $K_last );
- # Include length to a comma ending this line
- if ( $interrupted_list_rule
- && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
- {
- my $Kend = $K_terminal;
-
- # fix for b1332: side comments handled at end of loop
- ##if ( $Kend < $K_last
- ## && !$rOpts_ignore_side_comment_lengths )
- ##{
- ## $Kend = $K_last;
- ##}
-
- # Measure from the next blank if any (fixes b1301)
- my $Kbeg = $KK;
- if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
- && $Kbeg < $Kend )
- {
- $Kbeg++;
- }
+ my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
+ $rblock_type_of_seqno, $ri_starting_one_line_block );
- my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
- if ( $leng > $max_prong_len ) { $max_prong_len = $leng }
- }
+ # past stored nonblank tokens and flags
+ my (
+ $K_last_nonblank_code, $looking_for_else,
+ $is_static_block_comment, $last_CODE_type,
+ $last_line_had_side_comment, $next_parent_seqno,
+ $next_slevel,
+ );
- my $K_c = $K_closing_container->{$seqno};
+ # Called once at the start of a new file
+ sub initialize_process_line_of_CODE {
+ $K_last_nonblank_code = undef;
+ $looking_for_else = 0;
+ $is_static_block_comment = 0;
+ $last_line_had_side_comment = 0;
+ $next_parent_seqno = SEQ_ROOT;
+ $next_slevel = undef;
+ return;
+ } ## end sub initialize_process_line_of_CODE
- push @stack,
- [
- $max_prong_len, $handle_len,
- $seqno, $iline,
- $KK, $K_c,
- $interrupted_list_rule
- ];
- }
+ # Batch variables: these describe the current batch of code being formed
+ # and sent down the pipeline. They are initialized in the next
+ # sub.
+ my (
+ $rbrace_follower, $index_start_one_line_block,
+ $starting_in_quote, $ending_in_quote,
+ );
- #--------------------
- # Exiting a container
- #--------------------
- elsif ( $is_closing_token{$token} ) {
- if (@stack) {
+ # Called before the start of each new batch
+ sub initialize_batch_variables {
- # The current prong ends - get its handle
- my $item = pop @stack;
- my $handle_len = $item->[_handle_len_];
- my $seqno_o = $item->[_seqno_o_];
- my $iline_o = $item->[_iline_o_];
- my $K_o = $item->[_K_o_];
- my $K_c_expect = $item->[_K_c_];
- my $collapsed_len = $max_prong_len;
-
- if ( $seqno_o ne $seqno ) {
-
- # This can happen if input file has brace errors.
- # Otherwise it shouldn't happen. Not fatal but -lp
- # formatting could get messed up.
- if ( DEVEL_MODE && !get_saw_brace_error() ) {
- Fault(<<EOM);
-sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
-EOM
- }
- }
+ # Initialize array values for a new batch. Any changes here must be
+ # carefully coordinated with sub store_token_to_go.
- #------------------------------------------
- # Rules to avoid scrunching code blocks ...
- #------------------------------------------
- # Some test cases:
- # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
- my $block_type = $rblock_type_of_seqno->{$seqno};
- if ($block_type) {
-
- my $K_c = $KK;
- my $block_length = MIN_BLOCK_LEN;
- my $is_one_line_block;
- my $level = $rLL->[$K_o]->[_LEVEL_];
- if ( defined($K_o) && defined($K_c) ) {
-
- # note: fixed 3 May 2022 (removed 'my')
- $block_length =
- $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
- $is_one_line_block = $iline == $iline_o;
- }
+ $max_index_to_go = UNDEFINED_INDEX;
+ $summed_lengths_to_go[0] = 0;
+ $nesting_depth_to_go[0] = 0;
+ $ri_starting_one_line_block = [];
- # Code block rule 1: Use the total block length if
- # it is less than the minimum.
- if ( $block_length < MIN_BLOCK_LEN ) {
- $collapsed_len = $block_length;
- }
+ # Redefine some sparse arrays.
+ # It is more efficient to redefine these sparse arrays and rely on
+ # undef's instead of initializing to 0's. Testing showed that using
+ # @array=() is more efficient than $#array=-1
- # Code block rule 2: Use the full length of a
- # one-line block to avoid breaking it, unless
- # extremely long. We do not need to do a precise
- # check here, because if it breaks then it will
- # stay broken on later iterations.
- elsif (
- $is_one_line_block
- && $block_length <
- $maximum_line_length_at_level[$level]
-
- # But skip this for sort/map/grep/eval blocks
- # because they can reform (b1345)
- && !$is_sort_map_grep_eval{$block_type}
- )
- {
- $collapsed_len = $block_length;
- }
+ @old_breakpoint_to_go = ();
+ @forced_breakpoint_to_go = ();
+ @block_type_to_go = ();
+ @mate_index_to_go = ();
+ @type_sequence_to_go = ();
- # Code block rule 3: Otherwise the length should be
- # at least MIN_BLOCK_LEN to avoid scrunching code
- # blocks.
- elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
- $collapsed_len = MIN_BLOCK_LEN;
- }
- }
+ # NOTE: @nobreak_to_go is sparse and could be treated this way, but
+ # testing showed that there would be very little efficiency gain
+ # because an 'if' test must be added in store_token_to_go.
- # Store the result. Some extra space, '2', allows for
- # length of an opening token, inside space, comma, ...
- # This constant has been tuned to give good overall
- # results.
- $collapsed_len += 2;
- $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
-
- # Restart scanning the lower level prong
- if (@stack) {
- $max_prong_len = $stack[-1]->[_max_prong_len_];
- $collapsed_len += $handle_len;
- if ( $collapsed_len > $max_prong_len ) {
- $max_prong_len = $collapsed_len;
- }
- }
- }
- }
+ # The initialization code for the remaining batch arrays is as follows
+ # and can be activated for testing. But profiling shows that it is
+ # time-consuming to re-initialize the batch arrays and is not necessary
+ # because the maximum valid token, $max_index_to_go, is carefully
+ # controlled. This means however that it is not possible to do any
+ # type of filter or map operation directly on these arrays. And it is
+ # not possible to use negative indexes. As a precaution against program
+ # changes which might do this, sub pad_array_to_go adds some undefs at
+ # the end of the current batch of data.
- # it is a ternary - no special processing for these yet
- else {
+ ## 0 && do { #<<<
+ ## @nobreak_to_go = ();
+ ## @token_lengths_to_go = ();
+ ## @levels_to_go = ();
+ ## @ci_levels_to_go = ();
+ ## @tokens_to_go = ();
+ ## @K_to_go = ();
+ ## @types_to_go = ();
+ ## @leading_spaces_to_go = ();
+ ## @reduced_spaces_to_go = ();
+ ## @inext_to_go = ();
+ ## @parent_seqno_to_go = ();
+ ## };
- }
+ $rbrace_follower = undef;
+ $ending_in_quote = 0;
- $len = 0;
- $last_nonblank_type = $type;
- next;
- }
+ $index_start_one_line_block = undef;
- #----------------------------
- # Handle non-container tokens
- #----------------------------
- my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
+ # initialize forced breakpoint vars associated with each output batch
+ $forced_breakpoint_count = 0;
+ $index_max_forced_break = UNDEFINED_INDEX;
+ $forced_breakpoint_undo_count = 0;
- # Count lengths of things like 'xx => yy' as a single item
- if ( $type eq '=>' ) {
- $len += $token_length + 1;
- if ( $len > $max_prong_len ) { $max_prong_len = $len }
- }
- elsif ( $last_nonblank_type eq '=>' ) {
- $len += $token_length;
- if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ return;
+ } ## end sub initialize_batch_variables
- # but only include one => per item
- $len = $token_length;
- }
+ sub leading_spaces_to_go {
- # include everything to end of line after a here target
- elsif ( $type eq 'h' ) {
- $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
- $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- if ( $len > $max_prong_len ) { $max_prong_len = $len }
- }
+ # return the number of indentation spaces for a token in the output
+ # stream
- # for everything else just use the token length
- else {
- $len = $token_length;
- if ( $len > $max_prong_len ) { $max_prong_len = $len }
- }
- $last_nonblank_type = $type;
+ my ($ii) = @_;
+ return 0 if ( $ii < 0 );
+ my $indentation = $leading_spaces_to_go[$ii];
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
+ } ## end sub leading_spaces_to_go
- } ## end loop over tokens on this line
+ sub create_one_line_block {
- # Now take care of any side comment
- if ($has_comment) {
- if ($rOpts_ignore_side_comment_lengths) {
- $len = 0;
- }
- else {
+ # set index starting next one-line block
+ # call with no args to delete the current one-line block
+ ($index_start_one_line_block) = @_;
+ return;
+ } ## end sub create_one_line_block
- # For a side comment when -iscl is not set, measure length from
- # the start of the previous nonblank token
- my $len0 =
- $K_terminal > 0
- ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
- : 0;
- $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
- if ( $len > $max_prong_len ) { $max_prong_len = $len }
- }
- }
+ # Routine to place the current token into the output stream.
+ # Called once per output token.
- } ## end loop over lines
+ use constant DEBUG_STORE => 0;
- if (DEBUG_COLLAPSED_LENGTHS) {
- print "\nCollapsed lengths--\n";
- foreach
- my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
- {
- my $clen = $rcollapsed_length_by_seqno->{$key};
- print "$key -> $clen\n";
- }
- }
+ sub store_token_to_go {
- return;
-} ## end sub collapsed_lengths
+ my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
-sub is_excluded_lp {
+ #-------------------------------------------------------
+ # Token storage utility for sub process_line_of_CODE.
+ # Add one token to the next batch of '_to_go' variables.
+ #-------------------------------------------------------
- # Decide if this container is excluded by user request:
- # returns true if this token is excluded (i.e., may not use -lp)
- # returns false otherwise
+ # Input parameters:
+ # $Ktoken_vars = the index K in the global token array
+ # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
+ # unless they are temporarily being overridden
- # The control hash can either describe:
- # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
- # what to include: $line_up_parentheses_control_is_lxpl = 0
+ #------------------------------------------------------------------
+ # NOTE: called once per token so coding efficiency is critical here.
+ # All changes need to be benchmarked with Devel::NYTProf.
+ #------------------------------------------------------------------
- my ( $self, $KK ) = @_;
- my $rLL = $self->[_rLL_];
- my $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $rflags = $line_up_parentheses_control_hash{$token};
+ my (
- #-----------------------------------------------
- # TEST #1: check match to listed container types
- #-----------------------------------------------
- if ( !defined($rflags) ) {
+ $type,
+ $token,
+ $ci_level,
+ $level,
+ $seqno,
+ $length,
- # There is no entry for this container, so we are done
- return !$line_up_parentheses_control_is_lxpl;
- }
+ ) = @{$rtoken_vars}[
- my ( $flag1, $flag2 ) = @{$rflags};
+ _TYPE_,
+ _TOKEN_,
+ _CI_LEVEL_,
+ _LEVEL_,
+ _TYPE_SEQUENCE_,
+ _TOKEN_LENGTH_,
- #-----------------------------------------------------------
- # TEST #2: check match to flag1, the preceding nonblank word
- #-----------------------------------------------------------
- my $match_flag1 = !defined($flag1) || $flag1 eq '*';
- if ( !$match_flag1 ) {
+ ];
- # Find the previous token
- my ( $is_f, $is_k, $is_w );
- my $Kp = $self->K_previous_nonblank($KK);
- if ( defined($Kp) ) {
- my $type_p = $rLL->[$Kp]->[_TYPE_];
- my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ # Check for emergency flush...
+ # The K indexes in the batch must always be a continuous sequence of
+ # the global token array. The batch process programming assumes this.
+ # If storing this token would cause this relation to fail we must dump
+ # the current batch before storing the new token. It is extremely rare
+ # for this to happen. One known example is the following two-line
+ # snippet when run with parameters
+ # --noadd-newlines --space-terminal-semicolon:
+ # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
+ # $yy=1;
+ if ( $max_index_to_go >= 0 ) {
+ if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
+ $self->flush_batch_of_CODE();
+ }
- # keyword?
- $is_k = $type_p eq 'k';
+ # Do not output consecutive blank tokens ... this should not
+ # happen, but it is worth checking. Later code can then make the
+ # simplifying assumption that blank tokens are not consecutive.
+ elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
- # function call?
- $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+ if (DEVEL_MODE) {
- # either keyword or function call?
- $is_w = $is_k || $is_f;
+ # if this happens, it is may be that consecutive blanks
+ # were inserted into the token stream in 'respace_tokens'
+ my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
+ Fault("consecutive blanks near line $lno; please fix");
+ }
+ return;
+ }
}
- # Check for match based on flag1 and the previous token:
- if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
- elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
- elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
- elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
- elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
- elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
- }
-
- # See if we can exclude this based on the flag1 test...
- if ($line_up_parentheses_control_is_lxpl) {
- return 1 if ($match_flag1);
- }
- else {
- return 1 if ( !$match_flag1 );
- }
-
- #-------------------------------------------------------------
- # TEST #3: exclusion based on flag2 and the container contents
- #-------------------------------------------------------------
+ # Do not start a batch with a blank token.
+ # Fixes cases b149 b888 b984 b985 b986 b987
+ else {
+ if ( $type eq 'b' ) { return }
+ }
- # Note that this is an exclusion test for both -lpxl or -lpil input methods
- # The options are:
- # 0 or blank: ignore container contents
- # 1 exclude non-lists or lists with sublists
- # 2 same as 1 but also exclude lists with code blocks
+ # Update counter and do initializations if first token of new batch
+ if ( !++$max_index_to_go ) {
- my $match_flag2;
- if ($flag2) {
+ # Reset flag '$starting_in_quote' for a new batch. It must be set
+ # to the value of '$in_continued_quote', but here for efficiency we
+ # set it to zero, which is its normal value. Then in coding below
+ # we will change it if we find we are actually in a continued quote.
+ $starting_in_quote = 0;
- my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ # Update the next parent sequence number for each new batch.
- my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
- my $has_list = $self->[_rhas_list_]->{$seqno};
- my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
- my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
+ #----------------------------------------
+ # Begin coding from sub parent_seqno_by_K
+ #----------------------------------------
- if ( !$is_list
- || $has_list
- || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
- {
- $match_flag2 = 1;
- }
- }
- return $match_flag2;
-} ## end sub is_excluded_lp
+ # The following is equivalent to this call but much faster:
+ # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
-sub set_excluded_lp_containers {
+ $next_parent_seqno = SEQ_ROOT;
+ if ($seqno) {
+ $next_parent_seqno = $rparent_of_seqno->{$seqno};
+ }
+ else {
+ my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type_t = $rLL->[$Kt]->[_TYPE_];
- my ($self) = @_;
- return unless ($rOpts_line_up_parentheses);
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type_t} ) {
+ $next_parent_seqno = $type_sequence_t;
+ }
- my $K_opening_container = $self->[_K_opening_container_];
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ # otherwise we want its parent container
+ else {
+ $next_parent_seqno =
+ $rparent_of_seqno->{$type_sequence_t};
+ }
+ }
+ }
+ $next_parent_seqno = SEQ_ROOT
+ unless ( defined($next_parent_seqno) );
- foreach my $seqno ( keys %{$K_opening_container} ) {
+ #--------------------------------------
+ # End coding from sub parent_seqno_by_K
+ #--------------------------------------
- # code blocks are always excluded by the -lp coding so we can skip them
- next if ( $rblock_type_of_seqno->{$seqno} );
+ $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
+ }
- my $KK = $K_opening_container->{$seqno};
- next unless defined($KK);
+ # Clip levels to zero if there are level errors in the file.
+ # We had to wait until now for reasons explained in sub 'write_line'.
+ if ( $level < 0 ) { $level = 0 }
- # see if a user exclusion rule turns off -lp for this container
- if ( $self->is_excluded_lp($KK) ) {
- $ris_excluded_lp_container->{$seqno} = 1;
+ # Safety check that length is defined. This is slow and should not be
+ # needed now, so just do it in DEVEL_MODE to check programming changes.
+ # Formerly needed for --indent-only, in which the entire set of tokens
+ # is normally turned into type 'q'. Lengths are now defined in sub
+ # 'respace_tokens' so this check is no longer needed.
+ if ( DEVEL_MODE && !defined($length) ) {
+ my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
+ $length = length($token);
+ Fault(<<EOM);
+undefined length near line $lno; num chars=$length, token='$token'
+EOM
}
- }
- return;
-} ## end sub set_excluded_lp_containers
-######################################
-# CODE SECTION 6: Process line-by-line
-######################################
+ #----------------------------
+ # add this token to the batch
+ #----------------------------
+ $K_to_go[$max_index_to_go] = $Ktoken_vars;
+ $types_to_go[$max_index_to_go] = $type;
+ $tokens_to_go[$max_index_to_go] = $token;
+ $ci_levels_to_go[$max_index_to_go] = $ci_level;
+ $levels_to_go[$max_index_to_go] = $level;
+ $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
+ $token_lengths_to_go[$max_index_to_go] = $length;
-sub process_all_lines {
+ # Skip point initialization for these sparse arrays - undef's okay;
+ # See also related code in sub initialize_batch_variables.
+ ## $old_breakpoint_to_go[$max_index_to_go] = 0;
+ ## $forced_breakpoint_to_go[$max_index_to_go] = 0;
+ ## $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
+ ## $type_sequence_to_go[$max_index_to_go] = $seqno;
- #----------------------------------------------------------
- # Main loop to format all lines of a file according to type
- #----------------------------------------------------------
+ # NOTE1: nobreak_to_go can be treated as a sparse array, but testing
+ # showed that there is almost no efficiency gain because an if test
+ # would need to be added.
- my $self = shift;
- my $rlines = $self->[_rlines_];
- my $sink_object = $self->[_sink_object_];
- my $fh_tee = $self->[_fh_tee_];
- my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
- my $file_writer_object = $self->[_file_writer_object_];
- my $logger_object = $self->[_logger_object_];
- my $vertical_aligner_object = $self->[_vertical_aligner_object_];
- my $save_logfile = $self->[_save_logfile_];
+ # NOTE2: Eventually '$type_sequence_to_go' can be also handled as a
+ # sparse array with undef's, but this will require extensive testing
+ # because of its heavy use.
- # Note for RT#118553, leave only one newline at the end of a file.
- # Example code to do this is in comments below:
- # my $Opt_trim_ending_blank_lines = 0;
- # if ($Opt_trim_ending_blank_lines) {
- # while ( my $line_of_tokens = pop @{$rlines} ) {
- # my $line_type = $line_of_tokens->{_line_type};
- # if ( $line_type eq 'CODE' ) {
- # my $CODE_type = $line_of_tokens->{_code_type};
- # next if ( $CODE_type eq 'BL' );
- # }
- # push @{$rlines}, $line_of_tokens;
- # last;
- # }
- # }
-
- # But while this would be a trivial update, it would have very undesirable
- # side effects when perltidy is run from within an editor on a small snippet.
- # So this is best done with a separate filter, such
- # as 'delete_ending_blank_lines.pl' in the examples folder.
+ # We keep a running sum of token lengths from the start of this batch:
+ # summed_lengths_to_go[$i] = total length to just before token $i
+ # summed_lengths_to_go[$i+1] = total length to just after token $i
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] =
+ $summed_lengths_to_go[$max_index_to_go] + $length;
- # Flag to prevent blank lines when POD occurs in a format skipping sect.
- my $in_format_skipping_section;
+ # Initialize some sequence-dependent variables to their normal values
+ $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
+ $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
- # set locations for blanks around long runs of keywords
- my $rwant_blank_line_after = $self->keyword_group_scan();
+ # Then fix them at container tokens:
+ if ($seqno) {
- my $line_type = EMPTY_STRING;
- my $i_last_POD_END = -10;
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $i++;
+ $type_sequence_to_go[$max_index_to_go] = $seqno;
- # insert blank lines requested for keyword sequences
- if ( $i > 0
- && defined( $rwant_blank_line_after->{ $i - 1 } )
- && $rwant_blank_line_after->{ $i - 1 } == 1 )
- {
- $self->want_blank_line();
- }
+ $block_type_to_go[$max_index_to_go] =
+ $rblock_type_of_seqno->{$seqno};
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
+ if ( $is_opening_token{$token} ) {
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # SKIP - code skipping section
- # SKIP_END - last line of code skipping section, '#>>V'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
+ my $slevel = $rdepth_of_opening_seqno->[$seqno];
+ $nesting_depth_to_go[$max_index_to_go] = $slevel;
+ $next_slevel = $slevel + 1;
+
+ $next_parent_seqno = $seqno;
- # put a blank line after an =cut which comes before __END__ and __DATA__
- # (required by podchecker)
- if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
- $i_last_POD_END = $i;
- $file_writer_object->reset_consecutive_blank_lines();
- if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
- $self->want_blank_line();
}
- }
+ elsif ( $is_closing_token{$token} ) {
- # handle line of code..
- if ( $line_type eq 'CODE' ) {
+ $next_slevel = $rdepth_of_opening_seqno->[$seqno];
+ my $slevel = $next_slevel + 1;
+ $nesting_depth_to_go[$max_index_to_go] = $slevel;
- my $CODE_type = $line_of_tokens->{_code_type};
- $in_format_skipping_section = $CODE_type eq 'FS';
+ my $parent_seqno = $rparent_of_seqno->{$seqno};
+ $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
+ $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
+ $next_parent_seqno = $parent_seqno;
- # Handle blank lines
- if ( $CODE_type eq 'BL' ) {
+ }
+ else {
+ # ternary token: nothing to do
+ }
+ }
- # Keep this blank? Start with the flag -kbl=n, where
- # n=0 ignore all old blank lines
- # n=1 stable: keep old blanks, but limited by -mbl=n
- # n=2 keep all old blank lines, regardless of -mbl=n
- # If n=0 we delete all old blank lines and let blank line
- # rules generate any needed blank lines.
- my $kgb_keep = $rOpts_keep_old_blank_lines;
+ # Define the indentation that this token will have in two cases:
+ # Without CI = reduced_spaces_to_go
+ # With CI = leading_spaces_to_go
+ if ( ( $Ktoken_vars == $K_first )
+ && $line_of_tokens->{_starting_in_quote} )
+ {
+ # in a continued quote - correct value set above if first token
+ if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
- # Then delete lines requested by the keyword-group logic if
- # allowed
- if ( $kgb_keep == 1
- && defined( $rwant_blank_line_after->{$i} )
- && $rwant_blank_line_after->{$i} == 2 )
- {
- $kgb_keep = 0;
- }
+ $leading_spaces_to_go[$max_index_to_go] = 0;
+ $reduced_spaces_to_go[$max_index_to_go] = 0;
+ }
+ else {
+ $leading_spaces_to_go[$max_index_to_go] =
+ $reduced_spaces_to_go[$max_index_to_go] =
+ $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
- # But always keep a blank line following an =cut
- if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
- $kgb_keep = 1;
- }
+ $leading_spaces_to_go[$max_index_to_go] +=
+ $rOpts_continuation_indentation * $ci_level
+ if ($ci_level);
+ }
- if ($kgb_keep) {
- $self->flush($CODE_type);
- $file_writer_object->write_blank_code_line(
- $rOpts_keep_old_blank_lines == 2 );
- $self->[_last_line_leading_type_] = 'b';
- }
- next;
- }
- else {
+ DEBUG_STORE && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
+ };
+ return;
+ } ## end sub store_token_to_go
- # Let logger see all non-blank lines of code. This is a slow
- # operation so we avoid it if it is not going to be saved.
- if ( $save_logfile && $logger_object ) {
- $logger_object->black_box( $line_of_tokens,
- $vertical_aligner_object->get_output_line_number );
- }
- }
+ sub flush_batch_of_CODE {
- # Handle Format Skipping (FS) and Verbatim (VB) Lines
- if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
- $self->write_unindented_line("$input_line");
- $file_writer_object->reset_consecutive_blank_lines();
- next;
- }
+ # Finish and process the current batch.
+ # This must be the only call to grind_batch_of_CODE()
+ my ($self) = @_;
- # Handle all other lines of code
- $self->process_line_of_CODE($line_of_tokens);
- }
+ # If a batch has been started ...
+ if ( $max_index_to_go >= 0 ) {
- # handle line of non-code..
- else {
+ # Create an array to hold variables for this batch
+ my $this_batch = [];
- # set special flags
- my $skip_line = 0;
- if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
+ $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
+ $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
- # Pod docs should have a preceding blank line. But stay
- # out of __END__ and __DATA__ sections, because
- # the user may be using this section for any purpose whatsoever
- if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
- if ( !$skip_line
- && !$in_format_skipping_section
- && $line_type eq 'POD_START'
- && !$self->[_saw_END_or_DATA_] )
- {
- $self->want_blank_line();
- }
+ if ( $CODE_type || $last_CODE_type ) {
+ $this_batch->[_batch_CODE_type_] =
+ $K_to_go[$max_index_to_go] >= $K_first
+ ? $CODE_type
+ : $last_CODE_type;
}
- # leave the blank counters in a predictable state
- # after __END__ or __DATA__
- elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $self->[_saw_END_or_DATA_] = 1;
- }
+ $last_line_had_side_comment =
+ ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
- # Patch to avoid losing blank lines after a code-skipping block;
- # fixes case c047.
- elsif ( $line_type eq 'SKIP_END' ) {
- $file_writer_object->reset_consecutive_blank_lines();
+ # The flag $is_static_block_comment applies to the line which just
+ # arrived. So it only applies if we are outputting that line.
+ if ( $is_static_block_comment && !$last_line_had_side_comment ) {
+ $this_batch->[_is_static_block_comment_] =
+ $K_to_go[0] == $K_first;
}
- # write unindented non-code line
- if ( !$skip_line ) {
- $self->write_unindented_line($input_line);
- }
- }
- }
- return;
+ $this_batch->[_ri_starting_one_line_block_] =
+ $ri_starting_one_line_block;
-} ## end sub process_all_lines
+ $self->[_this_batch_] = $this_batch;
-sub keyword_group_scan {
- my $self = shift;
+ #-------------------
+ # process this batch
+ #-------------------
+ $self->grind_batch_of_CODE();
- #-------------------------------------------------------------------------
- # Called once per file to process any --keyword-group-blanks-* parameters.
- #-------------------------------------------------------------------------
+ # Done .. this batch is history
+ $self->[_this_batch_] = undef;
- # Manipulate blank lines around keyword groups (kgb* flags)
- # Scan all lines looking for runs of consecutive lines beginning with
- # selected keywords. Example keywords are 'my', 'our', 'local', ... but
- # they may be anything. We will set flags requesting that blanks be
- # inserted around and within them according to input parameters. Note
- # that we are scanning the lines as they came in in the input stream, so
- # they are not necessarily well formatted.
-
- # The output of this sub is a return hash ref whose keys are the indexes of
- # lines after which we desire a blank line. For line index i:
- # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
- # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
- my $rhash_of_desires = {};
-
- # Nothing to do if no blanks can be output. This test added to fix
- # case b760.
- if ( !$rOpts_maximum_consecutive_blank_lines ) {
- return $rhash_of_desires;
- }
+ initialize_batch_variables();
+ }
- my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
- my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
- my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
- my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
- my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
-
- # A range of sizes can be input with decimal notation like 'min.max' with
- # any number of dots between the two numbers. Examples:
- # string => min max matches
- # 1.1 1 1 exactly 1
- # 1.3 1 3 1,2, or 3
- # 1..3 1 3 1,2, or 3
- # 5 5 - 5 or more
- # 6. 6 - 6 or more
- # .2 - 2 up to 2
- # 1.0 1 0 nothing
- my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
- if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
- || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
- {
- Warn(<<EOM);
-Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
-ignoring all -kgb flags
-EOM
+ return;
+ } ## end sub flush_batch_of_CODE
- # Turn this option off so that this message does not keep repeating
- # during iterations and other files.
- $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
- return $rhash_of_desires;
- }
- $Opt_size_min = 1 unless ($Opt_size_min);
+ sub end_batch {
- if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
- return $rhash_of_desires;
- }
+ # End the current batch, EXCEPT for a few special cases
+ my ($self) = @_;
- # codes for $Opt_blanks_before and $Opt_blanks_after:
- # 0 = never (delete if exist)
- # 1 = stable (keep unchanged)
- # 2 = always (insert if missing)
+ if ( $max_index_to_go < 0 ) {
- return $rhash_of_desires
- unless $Opt_size_min > 0
- && ( $Opt_blanks_before != 1
- || $Opt_blanks_after != 1
- || $Opt_blanks_inside
- || $Opt_blanks_delete );
+ # nothing to do .. this is harmless but wastes time.
+ if (DEVEL_MODE) {
+ Fault("sub end_batch called with nothing to do; please fix\n");
+ }
+ return;
+ }
- my $Opt_pattern = $keyword_group_list_pattern;
- my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
- my $Opt_repeat_count =
- $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
+ # Exceptions when a line does not end with a comment... (fixes c058)
+ if ( $types_to_go[$max_index_to_go] ne '#' ) {
- my $rlines = $self->[_rlines_];
- my $rLL = $self->[_rLL_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $rK_weld_right = $self->[_rK_weld_right_];
-
- # variables for the current group and subgroups:
- my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
- @subgroup );
-
- # Definitions:
- # ($ibeg, $iend) = starting and ending line indexes of this entire group
- # $count = total number of keywords seen in this entire group
- # $level_beg = indentation level of this group
- # @group = [ $i, $token, $count ] =list of all keywords & blanks
- # @subgroup = $j, index of group where token changes
- # @iblanks = line indexes of blank lines in input stream in this group
- # where i=starting line index
- # token (the keyword)
- # count = number of this token in this subgroup
- # j = index in group where token changes
- #
- # These vars will contain values for the most recently seen line:
- my ( $line_type, $CODE_type, $K_first, $K_last );
+ # Exception 1: Do not end line in a weld
+ return
+ if ( $total_weld_count
+ && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
- my $number_of_groups_seen = 0;
+ # Exception 2: just set a tentative breakpoint if we might be in a
+ # one-line block
+ if ( defined($index_start_one_line_block) ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ return;
+ }
+ }
- #-------------------
- # helper subroutines
- #-------------------
+ $self->flush_batch_of_CODE();
+ return;
+ } ## end sub end_batch
- my $insert_blank_after = sub {
- my ($i) = @_;
- $rhash_of_desires->{$i} = 1;
- my $ip = $i + 1;
- if ( defined( $rhash_of_desires->{$ip} )
- && $rhash_of_desires->{$ip} == 2 )
- {
- $rhash_of_desires->{$ip} = 0;
- }
+ sub flush_vertical_aligner {
+ my ($self) = @_;
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->flush();
return;
- };
+ } ## end sub flush_vertical_aligner
- my $split_into_sub_groups = sub {
+ # flush is called to output any tokens in the pipeline, so that
+ # an alternate source of lines can be written in the correct order
+ sub flush {
+ my ( $self, $CODE_type_flush ) = @_;
- # place blanks around long sub-groups of keywords
- # ...if requested
- return unless ($Opt_blanks_inside);
+ # end the current batch with 1 exception
- # loop over sub-groups, index k
- push @subgroup, scalar @group;
- my $kbeg = 1;
- my $kend = @subgroup - 1;
- foreach my $k ( $kbeg .. $kend ) {
+ $index_start_one_line_block = undef;
- # index j runs through all keywords found
- my $j_b = $subgroup[ $k - 1 ];
- my $j_e = $subgroup[$k] - 1;
+ # Exception: if we are flushing within the code stream only to insert
+ # blank line(s), then we can keep the batch intact at a weld. This
+ # improves formatting of -ce. See test 'ce1.ce'
+ if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
- # index i is the actual line number of a keyword
- my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
- my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
- my $num = $count_e - $count_b + 1;
+ # otherwise, we have to shut things down completely.
+ else { $self->flush_batch_of_CODE() }
- # This subgroup runs from line $ib to line $ie-1, but may contain
- # blank lines
- if ( $num >= $Opt_size_min ) {
+ $self->flush_vertical_aligner();
+ return;
+ } ## end sub flush
- # if there are blank lines, we require that at least $num lines
- # be non-blank up to the boundary with the next subgroup.
- my $nog_b = my $nog_e = 1;
- if ( @iblanks && !$Opt_blanks_delete ) {
- my $j_bb = $j_b + $num - 1;
- my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
- $nog_b = $count_bb - $count_b + 1 == $num;
+ my %is_assignment_or_fat_comma;
- my $j_ee = $j_e - ( $num - 1 );
- my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
- $nog_e = $count_e - $count_ee + 1 == $num;
- }
- if ( $nog_b && $k > $kbeg ) {
- $insert_blank_after->( $i_b - 1 );
- }
- if ( $nog_e && $k < $kend ) {
- my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
- $insert_blank_after->( $i_ep - 1 );
- }
- }
- }
- return;
- };
+ BEGIN {
+ %is_assignment_or_fat_comma = %is_assignment;
+ $is_assignment_or_fat_comma{'=>'} = 1;
+ }
- my $delete_if_blank = sub {
- my ($i) = @_;
+ sub process_line_of_CODE {
- # delete line $i if it is blank
- return unless ( $i >= 0 && $i < @{$rlines} );
- return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
- my $code_type = $rlines->[$i]->{_code_type};
- if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
- return;
- };
+ my ( $self, $my_line_of_tokens ) = @_;
- my $delete_inner_blank_lines = sub {
+ #----------------------------------------------------------------
+ # This routine is called once per INPUT line to format all of the
+ # tokens on that line.
+ #----------------------------------------------------------------
- # always remove unwanted trailing blank lines from our list
- return unless (@iblanks);
- while ( my $ibl = pop(@iblanks) ) {
- if ( $ibl < $iend ) { push @iblanks, $ibl; last }
- $iend = $ibl;
- }
+ # It outputs full-line comments and blank lines immediately.
- # now mark mark interior blank lines for deletion if requested
- return unless ($Opt_blanks_delete);
+ # For lines of code:
+ # - Tokens are copied one-by-one from the global token
+ # array $rLL to a set of '_to_go' arrays which collect batches of
+ # tokens. This is done with calls to 'store_token_to_go'.
+ # - A batch is closed and processed upon reaching a well defined
+ # structural break point (i.e. code block boundary) or forced
+ # breakpoint (i.e. side comment or special user controls).
+ # - Subsequent stages of formatting make additional line breaks
+ # appropriate for lists and logical structures, and as necessary to
+ # keep line lengths below the requested maximum line length.
- while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
+ #-----------------------------------
+ # begin initialize closure variables
+ #-----------------------------------
+ $line_of_tokens = $my_line_of_tokens;
+ my $rK_range = $line_of_tokens->{_rK_range};
+ if ( !defined( $rK_range->[0] ) ) {
- return;
- };
+ # Empty line: This can happen if tokens are deleted, for example
+ # with the -mangle parameter
+ return;
+ }
- my $end_group = sub {
+ ( $K_first, $K_last ) = @{$rK_range};
+ $last_CODE_type = $CODE_type;
+ $CODE_type = $line_of_tokens->{_code_type};
- # end a group of keywords
- my ($bad_ending) = @_;
- if ( defined($ibeg) && $ibeg >= 0 ) {
+ $rLL = $self->[_rLL_];
+ $radjusted_levels = $self->[_radjusted_levels_];
+ $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
+ $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # then handle sufficiently large groups
- if ( $count >= $Opt_size_min ) {
+ #---------------------------------
+ # end initialize closure variables
+ #---------------------------------
+
+ # This flag will become nobreak_to_go and should be set to 2 to prevent
+ # a line break AFTER the current token.
+ $no_internal_newlines = 0;
+ if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
+ $no_internal_newlines = 2;
+ }
- $number_of_groups_seen++;
+ my $input_line = $line_of_tokens->{_line_text};
- # do any blank deletions regardless of the count
- $delete_inner_blank_lines->();
+ my ( $is_block_comment, $has_side_comment );
+ if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
+ if ( $K_last == $K_first ) { $is_block_comment = 1 }
+ else { $has_side_comment = 1 }
+ }
- if ( $ibeg > 0 ) {
- my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
+ my $is_static_block_comment_without_leading_space =
+ $CODE_type eq 'SBCX';
+ $is_static_block_comment =
+ $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
- # patch for hash bang line which is not currently marked as
- # a comment; mark it as a comment
- if ( $ibeg == 1 && !$code_type ) {
- my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
- $code_type = 'BC'
- if ( $line_text && $line_text =~ /^#/ );
- }
+ # check for a $VERSION statement
+ if ( $CODE_type eq 'VER' ) {
+ $self->[_saw_VERSION_in_this_file_] = 1;
+ $no_internal_newlines = 2;
+ }
- # Do not insert a blank after a comment
- # (this could be subject to a flag in the future)
- if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
- if ( $Opt_blanks_before == INSERT ) {
- $insert_blank_after->( $ibeg - 1 );
+ # Add interline blank if any
+ my $last_old_nonblank_type = "b";
+ my $first_new_nonblank_token = EMPTY_STRING;
+ my $K_first_true = $K_first;
+ if ( $max_index_to_go >= 0 ) {
+ $last_old_nonblank_type = $types_to_go[$max_index_to_go];
+ $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
+ if ( !$is_block_comment
+ && $types_to_go[$max_index_to_go] ne 'b'
+ && $K_first > 0
+ && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
+ {
+ $K_first -= 1;
+ }
+ }
- }
- elsif ( $Opt_blanks_before == DELETE ) {
- $delete_if_blank->( $ibeg - 1 );
- }
- }
- }
+ my $rtok_first = $rLL->[$K_first];
- # We will only put blanks before code lines. We could loosen
- # this rule a little, but we have to be very careful because
- # for example we certainly don't want to drop a blank line
- # after a line like this:
- # my $var = <<EOM;
- if ( $line_type eq 'CODE' && defined($K_first) ) {
+ my $in_quote = $line_of_tokens->{_ending_in_quote};
+ $ending_in_quote = $in_quote;
- # - Do not put a blank before a line of different level
- # - Do not put a blank line if we ended the search badly
- # - Do not put a blank at the end of the file
- # - Do not put a blank line before a hanging side comment
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ #------------------------------------
+ # Handle a block (full-line) comment.
+ #------------------------------------
+ if ($is_block_comment) {
- if ( $level == $level_beg
- && $ci_level == 0
- && !$bad_ending
- && $iend < @{$rlines}
- && $CODE_type ne 'HSC' )
- {
- if ( $Opt_blanks_after == INSERT ) {
- $insert_blank_after->($iend);
- }
- elsif ( $Opt_blanks_after == DELETE ) {
- $delete_if_blank->( $iend + 1 );
- }
- }
- }
+ if ( $rOpts->{'delete-block-comments'} ) {
+ $self->flush();
+ return;
}
- $split_into_sub_groups->();
- }
- # reset for another group
- $ibeg = -1;
- $iend = undef;
- $level_beg = -1;
- $K_closing = undef;
- @group = ();
- @subgroup = ();
- @iblanks = ();
+ $index_start_one_line_block = undef;
+ $self->end_batch() if ( $max_index_to_go >= 0 );
- return;
- };
+ # output a blank line before block comments
+ if (
+ # unless we follow a blank or comment line
+ $self->[_last_line_leading_type_] ne '#'
+ && $self->[_last_line_leading_type_] ne 'b'
- my $find_container_end = sub {
+ # only if allowed
+ && $rOpts->{'blanks-before-comments'}
- # If the keyword line is continued onto subsequent lines, find the
- # closing token '$K_closing' so that we can easily skip past the
- # contents of the container.
+ # if this is NOT an empty comment, unless it follows a side
+ # comment and could become a hanging side comment.
+ && (
+ $rtok_first->[_TOKEN_] ne '#'
+ || ( $last_line_had_side_comment
+ && $rLL->[$K_first]->[_LEVEL_] > 0 )
+ )
- # We only set this value if we find a simple list, meaning
- # -contents only one level deep
- # -not welded
+ # not after a short line ending in an opening token
+ # because we already have space above this comment.
+ # Note that the first comment in this if block, after
+ # the 'if (', does not get a blank line because of this.
+ && !$self->[_last_output_short_opening_token_]
- # First check: skip if next line is not one deeper
- my $Knext_nonblank = $self->K_next_nonblank($K_last);
- goto RETURN if ( !defined($Knext_nonblank) );
- my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
- goto RETURN if ( $level_next != $level_beg + 1 );
+ # never before static block comments
+ && !$is_static_block_comment
+ )
+ {
+ $self->flush(); # switching to new output stream
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_blank_code_line();
+ $self->[_last_line_leading_type_] = 'b';
+ }
- # Find the parent container of the first token on the next line
- my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
- goto RETURN unless ( defined($parent_seqno) );
+ if (
+ $rOpts->{'indent-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
+ || $input_line =~ /^\s+/ )
+ && !$is_static_block_comment_without_leading_space
+ )
+ {
+ my $Ktoken_vars = $K_first;
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ $self->end_batch();
+ }
+ else {
- # Must not be a weld (can be unstable)
- goto RETURN
- if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
+ # switching to new output stream
+ $self->flush();
- # Opening container must exist and be on this line
- my $Ko = $K_opening_container->{$parent_seqno};
- goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
+ # Note that last arg in call here is 'undef' for comments
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line(
+ $rtok_first->[_TOKEN_] . "\n", undef );
+ $self->[_last_line_leading_type_] = '#';
+ }
+ return;
+ }
- # Verify that the closing container exists and is on a later line
- my $Kc = $K_closing_container->{$parent_seqno};
- goto RETURN unless ( defined($Kc) && $Kc > $K_last );
+ #--------------------------------------------
+ # Compare input/output indentation in logfile
+ #--------------------------------------------
+ if ( $self->[_save_logfile_] ) {
+
+ # Compare input/output indentation except for:
+ # - hanging side comments
+ # - continuation lines (have unknown leading blank space)
+ # - and lines which are quotes (they may have been outdented)
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+
+ unless ( $CODE_type eq 'HSC'
+ || $rtok_first->[_CI_LEVEL_] > 0
+ || $guessed_indentation_level == 0
+ && $rtok_first->[_TYPE_] eq 'Q' )
+ {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->compare_indentation_levels( $K_first,
+ $guessed_indentation_level, $input_line_number );
+ }
+ }
- # That's it
- $K_closing = $Kc;
- goto RETURN;
+ #-----------------------------------------
+ # Handle a line marked as indentation-only
+ #-----------------------------------------
- RETURN:
- return;
- };
+ if ( $CODE_type eq 'IO' ) {
+ $self->flush();
+ my $line = $input_line;
- my $add_to_group = sub {
- my ( $i, $token, $level ) = @_;
+ # Fix for rt #125506 Unexpected string formating
+ # in which leading space of a terminal quote was removed
+ $line =~ s/\s+$//;
+ $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
- # End the previous group if we have reached the maximum
- # group size
- if ( $Opt_size_max && @group >= $Opt_size_max ) {
- $end_group->();
- }
+ my $Ktoken_vars = $K_first;
- if ( @group == 0 ) {
- $ibeg = $i;
- $level_beg = $level;
- $count = 0;
- }
+ # We work with a copy of the token variables and change the
+ # first token to be the entire line as a quote variable
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
- $count++;
- $iend = $i;
+ # Patch: length is not really important here but must be defined
+ $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
- # New sub-group?
- if ( !@group || $token ne $group[-1]->[1] ) {
- push @subgroup, scalar(@group);
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ $self->end_batch();
+ return;
}
- push @group, [ $i, $token, $count ];
- # remember if this line ends in an open container
- $find_container_end->();
+ #---------------------------
+ # Handle all other lines ...
+ #---------------------------
- return;
- };
+ # If we just saw the end of an elsif block, write nag message
+ # if we do not see another elseif or an else.
+ if ($looking_for_else) {
- #----------------------------------
- # loop over all lines of the source
- #----------------------------------
- $end_group->();
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ ## /^(elsif|else)$/
+ if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
+ write_logfile_entry("(No else block)\n");
+ }
+ $looking_for_else = 0;
+ }
- $i++;
- last
- if ( $Opt_repeat_count > 0
- && $number_of_groups_seen >= $Opt_repeat_count );
+ # This is a good place to kill incomplete one-line blocks
+ if ( $max_index_to_go >= 0 ) {
- $CODE_type = EMPTY_STRING;
- $K_first = undef;
- $K_last = undef;
- $line_type = $line_of_tokens->{_line_type};
+ # For -iob and -lp, mark essential old breakpoints.
+ # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
+ # See related code below.
+ if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
+ my $type_first = $rLL->[$K_first_true]->[_TYPE_];
+ if ( $is_assignment_or_fat_comma{$type_first} ) {
+ $old_breakpoint_to_go[$max_index_to_go] = 1;
+ }
+ }
- # always end a group at non-CODE
- if ( $line_type ne 'CODE' ) { $end_group->(); next }
+ if (
- $CODE_type = $line_of_tokens->{_code_type};
+ # this check needed -mangle (for example rt125012)
+ (
+ ( !$index_start_one_line_block )
+ && ( $last_old_nonblank_type eq ';' )
+ && ( $first_new_nonblank_token ne '}' )
+ )
- # end any group at a format skipping line
- if ( $CODE_type && $CODE_type eq 'FS' ) {
- $end_group->();
- next;
- }
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $last_old_nonblank_type eq ',' )
+ )
+ {
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
+ $index_start_one_line_block = undef;
+ $self->end_batch();
+ }
- # continue in a verbatim (VB) type; it may be quoted text
- if ( $CODE_type eq 'VB' ) {
- if ( $ibeg >= 0 ) { $iend = $i; }
- next;
+ # Keep any requested breaks before this line. Note that we have to
+ # use the original K_first because it may have been reduced above
+ # to add a blank. The value of the flag is as follows:
+ # 1 => hard break, flush the batch
+ # 2 => soft break, set breakpoint and continue building the batch
+ # added check on max_index_to_go for c177
+ if ( $max_index_to_go >= 0
+ && $self->[_rbreak_before_Kfirst_]->{$K_first_true} )
+ {
+ $index_start_one_line_block = undef;
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ }
+ else {
+ $self->end_batch();
+ }
+ }
}
- # and continue in blank (BL) types
- if ( $CODE_type eq 'BL' ) {
- if ( $ibeg >= 0 ) {
- $iend = $i;
- push @{iblanks}, $i;
+ #--------------------------------------
+ # loop to process the tokens one-by-one
+ #--------------------------------------
+ $self->process_line_inner_loop($has_side_comment);
- # propagate current subgroup token
- my $tok = $group[-1]->[1];
- push @group, [ $i, $tok, $count ];
- }
- next;
- }
+ # if there is anything left in the output buffer ...
+ if ( $max_index_to_go >= 0 ) {
- # examine the first token of this line
- my $rK_range = $line_of_tokens->{_rK_range};
- ( $K_first, $K_last ) = @{$rK_range};
- if ( !defined($K_first) ) {
+ my $type = $rLL->[$K_last]->[_TYPE_];
+ my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
- # Somewhat unexpected blank line..
- # $rK_range is normally defined for line type CODE, but this can
- # happen for example if the input line was a single semicolon which
- # is being deleted. In that case there was code in the input
- # file but it is not being retained. So we can silently return.
- return $rhash_of_desires;
- }
+ # we have to flush ..
+ if (
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $type = $rLL->[$K_first]->[_TYPE_];
- my $token = $rLL->[$K_first]->[_TOKEN_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # if there is a side comment...
+ $type eq '#'
- # End a group 'badly' at an unexpected level. This will prevent
- # blank lines being incorrectly placed after the end of the group.
- # We are looking for any deviation from two acceptable patterns:
- # PATTERN 1: a simple list; secondary lines are at level+1
- # PATTERN 2: a long statement; all secondary lines same level
- # This was added as a fix for case b1177, in which a complex structure
- # got incorrectly inserted blank lines.
- if ( $ibeg >= 0 ) {
+ # if this line ends in a quote
+ # NOTE: This is critically important for insuring that quoted
+ # lines do not get processed by things like -sot and -sct
+ || $in_quote
- # Check for deviation from PATTERN 1, simple list:
- if ( defined($K_closing) && $K_first < $K_closing ) {
- $end_group->(1) if ( $level != $level_beg + 1 );
- }
+ # if this is a VERSION statement
+ || $CODE_type eq 'VER'
- # Check for deviation from PATTERN 2, single statement:
- elsif ( $level != $level_beg ) { $end_group->(1) }
- }
+ # to keep a label at the end of a line
+ || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
- # Do not look for keywords in lists ( keyword 'my' can occur in lists,
- # see case b760); fixed for c048.
- if ( $self->is_list_by_K($K_first) ) {
- if ( $ibeg >= 0 ) { $iend = $i }
- next;
- }
+ # if we have a hard break request
+ || $break_flag && $break_flag != 2
- # see if this is a code type we seek (i.e. comment)
- if ( $CODE_type
- && $Opt_comment_pattern
- && $CODE_type =~ /$Opt_comment_pattern/ )
- {
+ # if we are instructed to keep all old line breaks
+ || !$rOpts->{'delete-old-newlines'}
- my $tok = $CODE_type;
+ # if this is a line of the form 'use overload'. A break here in
+ # the input file is a good break because it will allow the
+ # operators which follow to be formatted well. Without this
+ # break the formatting with -ci=4 -xci is poor, for example.
- # Continuing a group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $tok, $level );
+ # use overload
+ # '+' => sub {
+ # print length $_[2], "\n";
+ # my ( $x, $y ) = _order(@_);
+ # Number::Roman->new( int $x + $y );
+ # },
+ # '-' => sub {
+ # my ( $x, $y ) = _order(@_);
+ # Number::Roman->new( int $x - $y );
+ # };
+ || ( $max_index_to_go == 2
+ && $types_to_go[0] eq 'k'
+ && $tokens_to_go[0] eq 'use'
+ && $tokens_to_go[$max_index_to_go] eq 'overload' )
+ )
+ {
+ $index_start_one_line_block = undef;
+ $self->end_batch();
}
- # Start new group
else {
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg >= 0 ) { $end_group->(); }
- $add_to_group->( $i, $tok, $level );
+ # Check for a soft break request
+ if ( $break_flag && $break_flag == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ }
+
+ # mark old line breakpoints in current output stream
+ if (
+ !$rOpts_ignore_old_breakpoints
+
+ # Mark essential old breakpoints if combination -iob -lp is
+ # used. These two options do not work well together, but
+ # we can avoid turning -iob off by ignoring -iob at certain
+ # essential line breaks. See also related code above.
+ # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
+ || ( $rOpts_line_up_parentheses
+ && $is_assignment_or_fat_comma{$type} )
+ )
+ {
+ $old_breakpoint_to_go[$max_index_to_go] = 1;
+ }
}
- next;
}
- # See if it is a keyword we seek, but never start a group in a
- # continuation line; the code may be badly formatted.
- if ( $ci_level == 0
- && $type eq 'k'
- && $token =~ /$Opt_pattern/ )
- {
+ return;
+ } ## end sub process_line_of_CODE
- # Continuing a keyword group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $token, $level );
- }
+ sub process_line_inner_loop {
- # Start new keyword group
- else {
+ my ( $self, $has_side_comment ) = @_;
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg >= 0 ) { $end_group->(); }
- $add_to_group->( $i, $token, $level );
- }
- next;
+ #--------------------------------------------------------------------
+ # Loop to move all tokens from one input line to a newly forming batch
+ #--------------------------------------------------------------------
+
+ # Do not start a new batch with a blank space
+ if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
+ $K_first++;
}
- # This is not one of our keywords, but we are in a keyword group
- # so see if we should continue or quit
- elsif ( $ibeg >= 0 ) {
+ foreach my $Ktoken_vars ( $K_first .. $K_last ) {
+
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
- # - bail out on a large level change; we may have walked into a
- # data structure or anonymous sub code.
- if ( $level > $level_beg + 1 || $level < $level_beg ) {
- $end_group->(1);
+ #--------------
+ # handle blanks
+ #--------------
+ if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
next;
}
- # - keep going on a continuation line of the same level, since
- # it is probably a continuation of our previous keyword,
- # - and keep going past hanging side comments because we never
- # want to interrupt them.
- if ( ( ( $level == $level_beg ) && $ci_level > 0 )
- || $CODE_type eq 'HSC' )
- {
- $iend = $i;
- next;
+ #------------------
+ # handle non-blanks
+ #------------------
+ my $type = $rtoken_vars->[_TYPE_];
+
+ # If we are continuing after seeing a right curly brace, flush
+ # buffer unless we see what we are looking for, as in
+ # } else ...
+ if ($rbrace_follower) {
+ my $token = $rtoken_vars->[_TOKEN_];
+ unless ( $rbrace_follower->{$token} ) {
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
+ $rbrace_follower = undef;
}
- # - continue if if we are within in a container which started with
- # the line of the previous keyword.
- if ( defined($K_closing) && $K_first <= $K_closing ) {
+ my (
+ $block_type, $type_sequence,
+ $is_opening_BLOCK, $is_closing_BLOCK,
+ $nobreak_BEFORE_BLOCK
+ );
+
+ if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
- # continue if entire line is within container
- if ( $K_last <= $K_closing ) { $iend = $i; next }
+ my $token = $rtoken_vars->[_TOKEN_];
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
- # continue at ); or }; or ];
- my $KK = $K_closing + 1;
- if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
- if ( $KK < $K_last ) {
- if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
- if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
- $end_group->(1);
- next;
- }
+ if ( $block_type
+ && $token eq $type
+ && $block_type ne 't'
+ && !$self->[_rshort_nested_]->{$type_sequence} )
+ {
+
+ if ( $type eq '{' ) {
+ $is_opening_BLOCK = 1;
+ $nobreak_BEFORE_BLOCK = $no_internal_newlines;
}
- $iend = $i;
+ elsif ( $type eq '}' ) {
+ $is_closing_BLOCK = 1;
+ $nobreak_BEFORE_BLOCK = $no_internal_newlines;
+ }
+ }
+ }
+
+ #---------------------
+ # handle side comments
+ #---------------------
+ if ($has_side_comment) {
+
+ # if at last token ...
+ if ( $Ktoken_vars == $K_last ) {
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
next;
}
- $end_group->(1);
- next;
+ # if before last token ... do not allow breaks which would
+ # promote a side comment to a block comment
+ elsif ($Ktoken_vars == $K_last - 1
+ || $Ktoken_vars == $K_last - 2
+ && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
+ {
+ $no_internal_newlines = 2;
+ }
}
- # - end the group if none of the above
- $end_group->();
- next;
- }
+ # Process non-blank and non-comment tokens ...
- # not in a keyword group; continue
- else { next }
- }
+ #-----------------
+ # handle semicolon
+ #-----------------
+ if ( $type eq ';' ) {
- # end of loop over all lines
- $end_group->();
- return $rhash_of_desires;
+ my $next_nonblank_token_type = 'b';
+ my $next_nonblank_token = EMPTY_STRING;
+ if ( $Ktoken_vars < $K_last ) {
+ my $Knnb = $Ktoken_vars + 1;
+ $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
-} ## end sub keyword_group_scan
+ if ( $rOpts_break_at_old_semicolon_breakpoints
+ && ( $Ktoken_vars == $K_first )
+ && $max_index_to_go >= 0
+ && !defined($index_start_one_line_block) )
+ {
+ $self->end_batch();
+ }
-#######################################
-# CODE SECTION 7: Process lines of code
-#######################################
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
-{ ## begin closure process_line_of_CODE
+ $self->end_batch()
+ unless (
+ $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons
+ && $Ktoken_vars < $K_last )
+ || ( $next_nonblank_token eq '}' )
+ );
+ }
- # The routines in this closure receive lines of code and combine them into
- # 'batches' and send them along. A 'batch' is the unit of code which can be
- # processed further as a unit. It has the property that it is the largest
- # amount of code into which which perltidy is free to place one or more
- # line breaks within it without violating any constraints.
+ #-----------
+ # handle '{'
+ #-----------
+ elsif ($is_opening_BLOCK) {
- # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
+ # Tentatively output this token. This is required before
+ # calling starting_one_line_block. We may have to unstore
+ # it, though, if we have to break before it.
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- # flags needed by the store routine
- my $line_of_tokens;
- my $no_internal_newlines;
- my $CODE_type;
+ # Look ahead to see if we might form a one-line block..
+ my $too_long =
+ $self->starting_one_line_block( $Ktoken_vars,
+ $K_last_nonblank_code, $K_last );
+ $self->clear_breakpoint_undo_stack();
- # range of K of tokens for the current line
- my ( $K_first, $K_last );
+ # to simplify the logic below, set a flag to indicate if
+ # this opening brace is far from the keyword which introduces it
+ my $keyword_on_same_line = 1;
+ if (
+ $max_index_to_go >= 0
+ && defined($K_last_nonblank_code)
+ && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
+ && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
+ || $too_long )
+ )
+ {
+ $keyword_on_same_line = 0;
+ }
- my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
- $rblock_type_of_seqno, $ri_starting_one_line_block );
+ # Break before '{' if requested with -bl or -bli flag
+ my $want_break = $self->[_rbrace_left_]->{$type_sequence};
- # past stored nonblank tokens and flags
- my (
- $K_last_nonblank_code, $looking_for_else,
- $is_static_block_comment, $last_CODE_type,
- $last_line_had_side_comment, $next_parent_seqno,
- $next_slevel,
- );
+ # But do not break if this token is welded to the left
+ if ( $total_weld_count
+ && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
+ {
+ $want_break = 0;
+ }
- # Called once at the start of a new file
- sub initialize_process_line_of_CODE {
- $K_last_nonblank_code = undef;
- $looking_for_else = 0;
- $is_static_block_comment = 0;
- $last_line_had_side_comment = 0;
- $next_parent_seqno = SEQ_ROOT;
- $next_slevel = undef;
- return;
- }
+ # Break BEFORE an opening '{' ...
+ if (
- # Batch variables: these describe the current batch of code being formed
- # and sent down the pipeline. They are initialized in the next
- # sub.
- my ( $rbrace_follower, $index_start_one_line_block,
- $semicolons_before_block_self_destruct,
- $starting_in_quote, $ending_in_quote, );
+ # if requested
+ $want_break
+
+ # and we were unable to start looking for a block,
+ && !defined($index_start_one_line_block)
+
+ # or if it will not be on same line as its keyword, so that
+ # it will be outdented (eval.t, overload.t), and the user
+ # has not insisted on keeping it on the right
+ || ( !$keyword_on_same_line
+ && !$rOpts_opening_brace_always_on_right )
+ )
+ {
- # Called before the start of each new batch
- sub initialize_batch_variables {
+ # but only if allowed
+ unless ($nobreak_BEFORE_BLOCK) {
- $max_index_to_go = UNDEFINED_INDEX;
- $summed_lengths_to_go[0] = 0;
- $nesting_depth_to_go[0] = 0;
- ##@summed_lengths_to_go = @nesting_depth_to_go = (0);
- $ri_starting_one_line_block = [];
+ # since we already stored this token, we must unstore it
+ $self->unstore_token_to_go();
- # The initialization code for the remaining batch arrays is as follows
- # and can be activated for testing. But profiling shows that it is
- # time-consuming to re-initialize the batch arrays and is not necessary
- # because the maximum valid token, $max_index_to_go, is carefully
- # controlled. This means however that it is not possible to do any
- # type of filter or map operation directly on these arrays. And it is
- # not possible to use negative indexes. As a precaution against program
- # changes which might do this, sub pad_array_to_go adds some undefs at
- # the end of the current batch of data.
+ # then output the line
+ $self->end_batch() if ( $max_index_to_go >= 0 );
- # So 'long story short': this is a waste of time
- 0 && do { #<<<
- @block_type_to_go = ();
- @type_sequence_to_go = ();
- @forced_breakpoint_to_go = ();
- @token_lengths_to_go = ();
- @levels_to_go = ();
- @mate_index_to_go = ();
- @ci_levels_to_go = ();
- @nobreak_to_go = ();
- @old_breakpoint_to_go = ();
- @tokens_to_go = ();
- @K_to_go = ();
- @types_to_go = ();
- @leading_spaces_to_go = ();
- @reduced_spaces_to_go = ();
- @inext_to_go = ();
- @iprev_to_go = ();
- @parent_seqno_to_go = ();
- };
+ # and now store this token at the start of a new line
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
+ }
- $rbrace_follower = undef;
- $ending_in_quote = 0;
+ # now output this line
+ $self->end_batch()
+ if ( $max_index_to_go >= 0 && !$no_internal_newlines );
+ }
- # These get re-initialized by calls to sub destroy_one_line_block():
- $index_start_one_line_block = UNDEFINED_INDEX;
- $semicolons_before_block_self_destruct = 0;
+ #-----------
+ # handle '}'
+ #-----------
+ elsif ($is_closing_BLOCK) {
- # initialize forced breakpoint vars associated with each output batch
- $forced_breakpoint_count = 0;
- $index_max_forced_break = UNDEFINED_INDEX;
- $forced_breakpoint_undo_count = 0;
+ my $next_nonblank_token_type = 'b';
+ my $next_nonblank_token = EMPTY_STRING;
+ my $Knnb;
+ if ( $Ktoken_vars < $K_last ) {
+ $Knnb = $Ktoken_vars + 1;
+ $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
- return;
- } ## end sub initialize_batch_variables
+ # If there is a pending one-line block ..
+ if ( defined($index_start_one_line_block) ) {
- sub leading_spaces_to_go {
+ # Fix for b1208: if a side comment follows this closing
+ # brace then we must include its length in the length test
+ # ... unless the -issl flag is set (fixes b1307-1309).
+ # Assume a minimum of 1 blank space to the comment.
+ my $added_length = 0;
+ if ( $has_side_comment
+ && !$rOpts_ignore_side_comment_lengths
+ && $next_nonblank_token_type eq '#' )
+ {
+ $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
+ }
- # return the number of indentation spaces for a token in the output
- # stream
+ # we have to terminate it if..
+ if (
- my ($ii) = @_;
- return 0 if ( $ii < 0 );
- my $indentation = $leading_spaces_to_go[$ii];
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
- } ## end sub leading_spaces_to_go
+ # it is too long (final length may be different from
+ # initial estimate). note: must allow 1 space for this
+ # token
+ $self->excess_line_length( $index_start_one_line_block,
+ $max_index_to_go ) + $added_length >= 0
+ )
+ {
+ $index_start_one_line_block = undef;
+ }
+ }
- sub create_one_line_block {
- ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
- = @_;
- return;
- }
+ # put a break before this closing curly brace if appropriate
+ $self->end_batch()
+ if ( $max_index_to_go >= 0
+ && !$nobreak_BEFORE_BLOCK
+ && !defined($index_start_one_line_block) );
- sub destroy_one_line_block {
- $index_start_one_line_block = UNDEFINED_INDEX;
- $semicolons_before_block_self_destruct = 0;
- return;
- }
+ # store the closing curly brace
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- # Routine to place the current token into the output stream.
- # Called once per output token.
+ # ok, we just stored a closing curly brace. Often, but
+ # not always, we want to end the line immediately.
+ # So now we have to check for special cases.
- use constant DEBUG_STORE => 0;
+ # if this '}' successfully ends a one-line block..
+ my $one_line_block_type = EMPTY_STRING;
+ my $keep_going;
+ if ( defined($index_start_one_line_block) ) {
- sub store_token_to_go {
+ # Remember the type of token just before the
+ # opening brace. It would be more general to use
+ # a stack, but this will work for one-line blocks.
+ $one_line_block_type =
+ $types_to_go[$index_start_one_line_block];
- my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
+ # we have to actually make it by removing tentative
+ # breaks that were set within it
+ $self->undo_forced_breakpoint_stack(0);
- # Add one token to the next batch.
- # $Ktoken_vars = the index K in the global token array
- # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
- # unless they are temporarily being overridden
+ # For -lp, extend the nobreak to include a trailing
+ # terminal ','. This is because the -lp indentation was
+ # not known when making one-line blocks, so we may be able
+ # to move the line back to fit. Otherwise we may create a
+ # needlessly stranded comma on the next line.
+ my $iend_nobreak = $max_index_to_go - 1;
+ if ( $rOpts_line_up_parentheses
+ && $next_nonblank_token_type eq ','
+ && $Knnb eq $K_last )
+ {
+ my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
+ my $is_excluded =
+ $self->[_ris_excluded_lp_container_]->{$p_seqno};
+ $iend_nobreak = $max_index_to_go if ( !$is_excluded );
+ }
- #------------------------------------------------------------------
- # NOTE: called once per token so coding efficiency is critical here
- #------------------------------------------------------------------
+ $self->set_nobreaks( $index_start_one_line_block,
+ $iend_nobreak );
- my $type = $rtoken_vars->[_TYPE_];
+ # save starting block indexes so that sub correct_lp can
+ # check and adjust -lp indentation (c098)
+ push @{$ri_starting_one_line_block},
+ $index_start_one_line_block;
- # Check for emergency flush...
- # The K indexes in the batch must always be a continuous sequence of
- # the global token array. The batch process programming assumes this.
- # If storing this token would cause this relation to fail we must dump
- # the current batch before storing the new token. It is extremely rare
- # for this to happen. One known example is the following two-line
- # snippet when run with parameters
- # --noadd-newlines --space-terminal-semicolon:
- # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
- # $yy=1;
- if ( $max_index_to_go >= 0 ) {
- if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
- $self->flush_batch_of_CODE();
- }
+ # then re-initialize for the next one-line block
+ $index_start_one_line_block = undef;
- # Do not output consecutive blank tokens ... this should not
- # happen, but it is worth checking. Later code can then make the
- # simplifying assumption that blank tokens are not consecutive.
- elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
+ # then decide if we want to break after the '}' ..
+ # We will keep going to allow certain brace followers as in:
+ # do { $ifclosed = 1; last } unless $losing;
+ #
+ # But make a line break if the curly ends a
+ # significant block:
+ if (
+ (
+ $is_block_without_semicolon{$block_type}
- if (DEVEL_MODE) {
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $one_line_block_type =~ /^[UG]$/
+ && $Ktoken_vars == $K_last
+ )
- # if this happens, it is may be that consecutive blanks
- # were inserted into the token stream in 'respace_tokens'
- my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
- Fault("consecutive blanks near line $lno; please fix");
+ # if needless semicolon follows we handle it later
+ && $next_nonblank_token ne ';'
+ )
+ {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
}
- return;
- }
- }
-
- # Do not start a batch with a blank token.
- # Fixes cases b149 b888 b984 b985 b986 b987
- else {
- if ( $type eq 'b' ) { return }
- }
- #----------------------------
- # add this token to the batch
- #----------------------------
- $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars;
- $types_to_go[$max_index_to_go] = $type;
-
- $old_breakpoint_to_go[$max_index_to_go] = 0;
- $forced_breakpoint_to_go[$max_index_to_go] = 0;
- $mate_index_to_go[$max_index_to_go] = -1;
+ # set string indicating what we need to look for brace follower
+ # tokens
+ if ( $is_if_unless_elsif_else{$block_type} ) {
+ $rbrace_follower = undef;
+ }
+ elsif ( $block_type eq 'do' ) {
+ $rbrace_follower = \%is_do_follower;
+ if (
+ $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
+ )
+ {
+ $rbrace_follower = { ')' => 1 };
+ }
+ }
- my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
+ # added eval for borris.t
+ elsif ($is_sort_map_grep_eval{$block_type}
+ || $one_line_block_type eq 'G' )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
- my $ci_level = $ci_levels_to_go[$max_index_to_go] =
- $rtoken_vars->[_CI_LEVEL_];
+ # anonymous sub
+ elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
+ if ($one_line_block_type) {
- # Clip levels to zero if there are level errors in the file.
- # We had to wait until now for reasons explained in sub 'write_line'.
- my $level = $rtoken_vars->[_LEVEL_];
- if ( $level < 0 ) { $level = 0 }
- $levels_to_go[$max_index_to_go] = $level;
+ $rbrace_follower = \%is_anon_sub_1_brace_follower;
- my $seqno = $type_sequence_to_go[$max_index_to_go] =
- $rtoken_vars->[_TYPE_SEQUENCE_];
+ # Exceptions to help keep -lp intact, see git #74 ...
+ # Exception 1: followed by '}' on this line
+ if ( $Ktoken_vars < $K_last
+ && $next_nonblank_token eq '}' )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
- my $in_continued_quote =
- ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
+ # Exception 2: followed by '}' on next line if -lp set.
+ # The -lp requirement allows the formatting to follow
+ # old breaks when -lp is not used, minimizing changes.
+ # Fixes issue c087.
+ elsif ($Ktoken_vars == $K_last
+ && $rOpts_line_up_parentheses )
+ {
+ my $K_closing_container =
+ $self->[_K_closing_container_];
+ my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
+ my $Kc = $K_closing_container->{$p_seqno};
+ my $is_excluded =
+ $self->[_ris_excluded_lp_container_]->{$p_seqno};
+ $keep_going =
+ ( defined($Kc)
+ && $rLL->[$Kc]->[_TOKEN_] eq '}'
+ && !$is_excluded
+ && $Kc - $Ktoken_vars <= 2 );
+ $rbrace_follower = undef if ($keep_going);
+ }
+ }
+ else {
+ $rbrace_follower = \%is_anon_sub_brace_follower;
+ }
+ }
- # Initializations for first token of new batch
- if ( $max_index_to_go == 0 ) {
+ # None of the above: specify what can follow a closing
+ # brace of a block which is not an
+ # if/elsif/else/do/sort/map/grep/eval
+ # Testfiles:
+ # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
+ else {
+ $rbrace_follower = \%is_other_brace_follower;
+ }
- $starting_in_quote = $in_continued_quote;
+ # See if an elsif block is followed by another elsif or else;
+ # complain if not.
+ if ( $block_type eq 'elsif' ) {
- # Update the next parent sequence number for each new batch.
+ if ( $next_nonblank_token_type eq 'b' ) { # end of line?
+ $looking_for_else = 1; # ok, check on next line
+ }
+ else {
+ ## /^(elsif|else)$/
+ if ( !$is_elsif_else{$next_nonblank_token} ) {
+ write_logfile_entry("No else block :(\n");
+ }
+ }
+ }
- #----------------------------------------
- # Begin coding from sub parent_seqno_by_K
- #----------------------------------------
+ # keep going after certain block types (map,sort,grep,eval)
+ # added eval for borris.t
+ if ($keep_going) {
- # The following is equivalent to this call but much faster:
- # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
+ # keep going
+ $rbrace_follower = undef;
- $next_parent_seqno = SEQ_ROOT;
- if ($seqno) {
- $next_parent_seqno = $rparent_of_seqno->{$seqno};
- }
- else {
- my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
- if ( defined($Kt) ) {
- my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
- my $type_t = $rLL->[$Kt]->[_TYPE_];
+ }
- # if next container token is closing, it is the parent seqno
- if ( $is_closing_type{$type_t} ) {
- $next_parent_seqno = $type_sequence_t;
+ # if no more tokens, postpone decision until re-entering
+ elsif ( ( $next_nonblank_token_type eq 'b' )
+ && $rOpts_add_newlines )
+ {
+ unless ($rbrace_follower) {
+ $self->end_batch()
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
}
+ }
+ elsif ($rbrace_follower) {
- # otherwise we want its parent container
+ if ( $rbrace_follower->{$next_nonblank_token} ) {
+
+ # Fix for b1385: keep break after a comma following a
+ # 'do' block. This could also be used for other block
+ # types, but that would cause a significant change in
+ # existing formatting without much benefit.
+ if ( $next_nonblank_token eq ','
+ && $Knnb eq $K_last
+ && $block_type eq 'do'
+ && $rOpts_add_newlines
+ && $self->is_trailing_comma($Knnb) )
+ {
+ $self->[_rbreak_after_Klast_]->{$K_last} = 1;
+ }
+ }
else {
- $next_parent_seqno =
- $rparent_of_seqno->{$type_sequence_t};
+ $self->end_batch()
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
}
- }
- }
- $next_parent_seqno = SEQ_ROOT
- unless ( defined($next_parent_seqno) );
-
- #--------------------------------------
- # End coding from sub parent_seqno_by_K
- #--------------------------------------
- $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
- }
+ $rbrace_follower = undef;
+ }
- # Initialize some sequence-dependent variables to their normal values
- $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
- $nesting_depth_to_go[$max_index_to_go] = $next_slevel;
- $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
+ else {
+ $self->end_batch()
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
+ }
- # Then fix them at container tokens:
- if ($seqno) {
+ } ## end treatment of closing block token
- $block_type_to_go[$max_index_to_go] =
- $rblock_type_of_seqno->{$seqno}
- if ( $rblock_type_of_seqno->{$seqno} );
+ #------------------------------
+ # handle here_doc target string
+ #------------------------------
+ elsif ( $type eq 'h' ) {
- if ( $is_opening_token{$token} ) {
+ # no newlines after seeing here-target
+ $no_internal_newlines = 2;
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
- my $slevel = $rdepth_of_opening_seqno->[$seqno];
- $nesting_depth_to_go[$max_index_to_go] = $slevel;
- $next_slevel = $slevel + 1;
+ #-----------------------------
+ # handle all other token types
+ #-----------------------------
+ else {
- $next_parent_seqno = $seqno;
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ # break after a label if requested
+ if ( $rOpts_break_after_labels
+ && $type eq 'J'
+ && $rOpts_break_after_labels == 1 )
+ {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
}
- elsif ( $is_closing_token{$token} ) {
- $next_slevel = $rdepth_of_opening_seqno->[$seqno];
- my $slevel = $next_slevel + 1;
- $nesting_depth_to_go[$max_index_to_go] = $slevel;
+ # remember previous nonblank, non-comment OUTPUT token
+ $K_last_nonblank_code = $Ktoken_vars;
- my $parent_seqno = $rparent_of_seqno->{$seqno};
- $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
- $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
- $next_parent_seqno = $parent_seqno;
+ } ## end of loop over all tokens in this line
+ return;
+ } ## end sub process_line_inner_loop
- }
- else {
- # ternary token: nothing to do
- }
- }
+} ## end closure process_line_of_CODE
- $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
+sub is_trailing_comma {
+ my ( $self, $KK ) = @_;
- my $length = $rtoken_vars->[_TOKEN_LENGTH_];
+ # Given:
+ # $KK - index of a comma in token list
+ # Return:
+ # true if the comma at index $KK is a trailing comma
+ # false if not
- # Safety check that length is defined. Should not be needed now.
- # Former patch for indent-only, in which the entire set of tokens is
- # turned into type 'q'. Lengths may have not been defined because sub
- # 'respace_tokens' is bypassed. We do not need lengths in this case,
- # but we will use the character count to have a defined value. In the
- # future, it would be nicer to have 'respace_tokens' convert the lines
- # to quotes and get correct lengths.
- if ( !defined($length) ) {
- $length = length($token);
+ my $rLL = $self->[_rLL_];
+ my $type_KK = $rLL->[$KK]->[_TYPE_];
+ if ( $type_KK ne ',' ) {
+ DEVEL_MODE
+ && Fault("Bad call: expected type ',' but received '$type_KK'\n");
+ return;
+ }
+ my $Knnb = $self->K_next_nonblank($KK);
+ if ( defined($Knnb) ) {
+ my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
+ my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
+ if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
+ return 1;
}
+ }
+ return;
+} ## end sub is_trailing_comma
- $token_lengths_to_go[$max_index_to_go] = $length;
+sub tight_paren_follows {
- # We keep a running sum of token lengths from the start of this batch:
- # summed_lengths_to_go[$i] = total length to just before token $i
- # summed_lengths_to_go[$i+1] = total length to just after token $i
- $summed_lengths_to_go[ $max_index_to_go + 1 ] =
- $summed_lengths_to_go[$max_index_to_go] + $length;
+ my ( $self, $K_to_go_0, $K_ic ) = @_;
- # Define the indentation that this token will have in two cases:
- # Without CI = reduced_spaces_to_go
- # With CI = leading_spaces_to_go
- if ($in_continued_quote) {
- $leading_spaces_to_go[$max_index_to_go] = 0;
- $reduced_spaces_to_go[$max_index_to_go] = 0;
- }
- else {
- $leading_spaces_to_go[$max_index_to_go] =
- $reduced_spaces_to_go[$max_index_to_go] =
- $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
+ # Input parameters:
+ # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
+ # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
+ # Return parameter:
+ # false if we want a break after the closing do brace
+ # true if we do not want a break after the closing do brace
- $leading_spaces_to_go[$max_index_to_go] +=
- $rOpts_continuation_indentation * $ci_level
- if ($ci_level);
- }
+ # We are at the closing brace of a 'do' block. See if this brace is
+ # followed by a closing paren, and if so, set a flag which indicates
+ # that we do not want a line break between the '}' and ')'.
- DEBUG_STORE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
- };
- return;
- } ## end sub store_token_to_go
+ # xxxxx ( ...... do { ... } ) {
+ # ^-------looking at this brace, K_ic
- sub flush_batch_of_CODE {
+ # Subscript notation:
+ # _i = inner container (braces in this case)
+ # _o = outer container (parens in this case)
+ # _io = inner opening = '{'
+ # _ic = inner closing = '}'
+ # _oo = outer opening = '('
+ # _oc = outer closing = ')'
- # Finish any batch packaging and call the process routine.
- # This must be the only call to grind_batch_of_CODE()
- my ($self) = @_;
+ # |--K_oo |--K_oc = outer container
+ # xxxxx ( ...... do { ...... } ) {
+ # |--K_io |--K_ic = inner container
- if ( $max_index_to_go >= 0 ) {
+ # In general, the safe thing to do is return a 'false' value
+ # if the statement appears to be complex. This will have
+ # the downstream side-effect of opening up outer containers
+ # to help make complex code readable. But for simpler
+ # do blocks it can be preferable to keep the code compact
+ # by returning a 'true' value.
- # Create an array to hold variables for this batch
- my $this_batch = [];
+ return unless defined($K_ic);
+ my $rLL = $self->[_rLL_];
- $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
- $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
+ # we should only be called at a closing block
+ my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
+ return unless ($seqno_i); # shouldn't happen;
- if ( $CODE_type || $last_CODE_type ) {
- $this_batch->[_batch_CODE_type_] =
- $K_to_go[$max_index_to_go] >= $K_first
- ? $CODE_type
- : $last_CODE_type;
- }
+ # This only applies if the next nonblank is a ')'
+ my $K_oc = $self->K_next_nonblank($K_ic);
+ return unless defined($K_oc);
+ my $token_next = $rLL->[$K_oc]->[_TOKEN_];
+ return unless ( $token_next eq ')' );
- $last_line_had_side_comment =
- ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
+ my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
+ my $K_io = $self->[_K_opening_container_]->{$seqno_i};
+ my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
+ return unless ( defined($K_io) && defined($K_oo) );
- # The flag $is_static_block_comment applies to the line which just
- # arrived. So it only applies if we are outputting that line.
- if ( $is_static_block_comment && !$last_line_had_side_comment ) {
- $this_batch->[_is_static_block_comment_] =
- $K_to_go[0] == $K_first;
+ # RULE 1: Do not break before a closing signature paren
+ # (regardless of complexity). This is a fix for issue git#22.
+ # Looking for something like:
+ # sub xxx ( ... do { ... } ) {
+ # ^----- next block_type
+ my $K_test = $self->K_next_nonblank($K_oc);
+ if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
+ my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
+ if ($seqno_test) {
+ if ( $self->[_ris_asub_block_]->{$seqno_test}
+ || $self->[_ris_sub_block_]->{$seqno_test} )
+ {
+ return 1;
}
+ }
+ }
- $this_batch->[_ri_starting_one_line_block_] =
- $ri_starting_one_line_block;
+ # RULE 2: Break if the contents within braces appears to be 'complex'. We
+ # base this decision on the number of tokens between braces.
- $self->[_this_batch_] = $this_batch;
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^
+
+ # Although very simple, it has the advantages of (1) being insensitive to
+ # changes in lengths of identifier names, (2) easy to understand, implement
+ # and test. A test case for this is 't/snippets/long_line.in'.
+
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # if ( do { $2 !~ /&/ } ) { ... }
+
+ # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- $self->grind_batch_of_CODE();
+ # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
+ # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
- # Done .. this batch is history
- $self->[_this_batch_] = undef;
+ return if ( $K_ic - $K_io > 16 );
- initialize_batch_variables();
- }
+ # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
+ # As with the previous rule, we decide based on the token count
- return;
- } ## end sub flush_batch_of_CODE
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^^^
- sub end_batch {
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # $K_io - $K_oo = 4 [Pass Rule 3]
+ # if ( do { $2 !~ /&/ } ) { ... }
- # end the current batch, EXCEPT for a few special cases
- my ($self) = @_;
+ # Example: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 9 [Pass rule 3]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- if ( $max_index_to_go < 0 ) {
+ return if ( $K_io - $K_oo > 9 );
- # This is harmless but should be eliminated in development
- if (DEVEL_MODE) {
- Fault("End batch called with nothing to do; please fix\n");
- }
- return;
- }
+ # RULE 4: Break if we have already broken this batch of output tokens
+ return if ( $K_oo < $K_to_go_0 );
- # Exceptions when a line does not end with a comment... (fixes c058)
- if ( $types_to_go[$max_index_to_go] ne '#' ) {
+ # RULE 5: Break if input is not on one line
+ # For example, we will set the flag for the following expression
+ # written in one line:
- # Exception 1: Do not end line in a weld
- return
- if ( $total_weld_count
- && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
+ # This has: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 8 [Pass rule 3]
+ # $self->debug( 'Error: ' . do { local $/; <$err> } );
- # Exception 2: just set a tentative breakpoint if we might be in a
- # one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- $self->set_forced_breakpoint($max_index_to_go);
- return;
- }
- }
+ # but we break after the brace if it is on multiple lines on input, since
+ # the user may prefer it on multiple lines:
- $self->flush_batch_of_CODE();
- return;
- } ## end sub end_batch
+ # [Fail rule 5]
+ # $self->debug(
+ # 'Error: ' . do { local $/; <$err> }
+ # );
- sub flush_vertical_aligner {
- my ($self) = @_;
- my $vao = $self->[_vertical_aligner_object_];
- $vao->flush();
- return;
+ if ( !$rOpts_ignore_old_breakpoints ) {
+ my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
+ return if ( $iline_oo != $iline_oc );
}
- # flush is called to output any tokens in the pipeline, so that
- # an alternate source of lines can be written in the correct order
- sub flush {
- my ( $self, $CODE_type_flush ) = @_;
+ # OK to keep the paren tight
+ return 1;
+} ## end sub tight_paren_follows
- # end the current batch with 1 exception
+my %is_brace_semicolon_colon;
- destroy_one_line_block();
+BEGIN {
+ my @q = qw( { } ; : );
+ @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
+}
- # Exception: if we are flushing within the code stream only to insert
- # blank line(s), then we can keep the batch intact at a weld. This
- # improves formatting of -ce. See test 'ce1.ce'
- if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
- $self->end_batch() if ( $max_index_to_go >= 0 );
- }
+sub starting_one_line_block {
- # otherwise, we have to shut things down completely.
- else { $self->flush_batch_of_CODE() }
+ # After seeing an opening curly brace, look for the closing brace and see
+ # if the entire block will fit on a line. This routine is not always right
+ # so a check is made later (at the closing brace) to make sure we really
+ # have a one-line block. We have to do this preliminary check, though,
+ # because otherwise we would always break at a semicolon within a one-line
+ # block if the block contains multiple statements.
- $self->flush_vertical_aligner();
- return;
- } ## end sub flush
+ # Given:
+ # $Kj = index of opening brace
+ # $K_last_nonblank = index of previous nonblank code token
+ # $K_last = index of last token of input line
- sub process_line_of_CODE {
+ # Calls 'create_one_line_block' if one-line block might be formed.
- my ( $self, $my_line_of_tokens ) = @_;
+ # Also returns a flag '$too_long':
+ # true = distance from opening keyword to OPENING brace exceeds
+ # the maximum line length.
+ # false (simple return) => not too long
+ # Note that this flag is for distance from the statement start to the
+ # OPENING brace, not the closing brace.
- #----------------------------------------------------------------
- # This routine is called once per INPUT line to format all of the
- # tokens on that line.
- #----------------------------------------------------------------
+ my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
- # It outputs full-line comments and blank lines immediately.
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # The tokens are copied one-by-one from the global token array $rLL to
- # a set of '_to_go' arrays which collect batches of tokens for a
- # further processing via calls to 'sub store_token_to_go', until a well
- # defined 'structural' break point* or 'forced' breakpoint* is reached.
- # Then, the batch of collected '_to_go' tokens is passed along to 'sub
- # grind_batch_of_CODE' for further processing.
+ # kill any current block - we can only go 1 deep
+ create_one_line_block();
- # * 'structural' break points are basically line breaks corresponding
- # to code blocks. An example is a chain of if-elsif-else statements,
- # which should typically be broken at the opening and closing braces.
+ my $i_start = 0;
- # * 'forced' break points are breaks required by side comments or by
- # special user controls.
+ # This routine should not have been called if there are no tokens in the
+ # 'to_go' arrays of previously stored tokens. A previous call to
+ # 'store_token_to_go' should have stored an opening brace. An error here
+ # indicates that a programming change may have caused a flush operation to
+ # clean out the previously stored tokens.
+ if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
+ Fault("program bug: store_token_to_go called incorrectly\n")
+ if (DEVEL_MODE);
+ return;
+ }
- # So this routine is just making an initial set of required line
- # breaks, basically regardless of the maximum requested line length.
- # The subsequent stage of formatting make additional line breaks
- # appropriate for lists and logical structures, and to keep line
- # lengths below the requested maximum line length.
+ # Return if block should be broken
+ my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
+ if ( $rbreak_container->{$type_sequence_j} ) {
+ return;
+ }
- #-----------------------------------
- # begin initialize closure variables
- #-----------------------------------
- $line_of_tokens = $my_line_of_tokens;
- my $rK_range = $line_of_tokens->{_rK_range};
- if ( !defined( $rK_range->[0] ) ) {
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $is_bli = $ris_bli_container->{$type_sequence_j};
- # Empty line: This can happen if tokens are deleted, for example
- # with the -mangle parameter
- return;
+ my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
+ $block_type = EMPTY_STRING unless ( defined($block_type) );
+
+ my $previous_nonblank_token = EMPTY_STRING;
+ my $i_last_nonblank = -1;
+ if ( defined($K_last_nonblank) ) {
+ $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
+ if ( $i_last_nonblank >= 0 ) {
+ $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
}
+ }
- ( $K_first, $K_last ) = @{$rK_range};
- $last_CODE_type = $CODE_type;
- $CODE_type = $line_of_tokens->{_code_type};
+ #---------------------------------------------------------------------
+ # find the starting keyword for this block (such as 'if', 'else', ...)
+ #---------------------------------------------------------------------
+ if (
+ $max_index_to_go == 0
+ ##|| $block_type =~ /^[\{\}\;\:]$/
+ || $is_brace_semicolon_colon{$block_type}
+ || substr( $block_type, 0, 7 ) eq 'package'
+ )
+ {
+ $i_start = $max_index_to_go;
+ }
- $rLL = $self->[_rLL_];
- $radjusted_levels = $self->[_radjusted_levels_];
- $rparent_of_seqno = $self->[_rparent_of_seqno_];
- $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
- $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ # the previous nonblank token should start these block types
+ elsif (
+ $i_last_nonblank >= 0
+ && ( $previous_nonblank_token eq $block_type
+ || $self->[_ris_asub_block_]->{$type_sequence_j}
+ || $self->[_ris_sub_block_]->{$type_sequence_j}
+ || substr( $block_type, -2, 2 ) eq '()' )
+ )
+ {
+ $i_start = $i_last_nonblank;
- #---------------------------------
- # end initialize closure variables
- #---------------------------------
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intact, and cause the parenthesized
+ # expression to break open. That looks bad.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
- # This flag will become nobreak_to_go and should be set to 2 to prevent
- # a line break AFTER the current token.
- $no_internal_newlines = 0;
- if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
- $no_internal_newlines = 2;
- }
+ # Find the opening paren
+ my $K_start = $K_to_go[$i_start];
+ return unless defined($K_start);
+ my $seqno = $type_sequence_to_go[$i_start];
+ return unless ($seqno);
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless defined($K_opening);
+ my $i_opening = $i_start + ( $K_opening - $K_start );
- my $input_line = $line_of_tokens->{_line_text};
+ # give up if not on this line
+ return unless ( $i_opening >= 0 );
+ $i_start = $i_opening;
- my ( $is_block_comment, $has_side_comment );
- if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
- if ( $K_last == $K_first ) { $is_block_comment = 1 }
- else { $has_side_comment = 1 }
+ # go back one token before the opening paren
+ if ( $i_start > 0 ) { $i_start-- }
+ if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
}
+ }
- my $is_static_block_comment_without_leading_space =
- $CODE_type eq 'SBCX';
- $is_static_block_comment =
- $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
+ elsif ( $previous_nonblank_token eq ')' ) {
- # check for a $VERSION statement
- if ( $CODE_type eq 'VER' ) {
- $self->[_saw_VERSION_in_this_file_] = 1;
- $no_internal_newlines = 2;
+ # For something like "if (xxx) {", the keyword "if" will be
+ # just after the most recent break. This will be 0 unless
+ # we have just killed a one-line block and are starting another.
+ # (doif.t)
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
}
- # Add interline blank if any
- my $last_old_nonblank_type = "b";
- my $first_new_nonblank_token = EMPTY_STRING;
- my $K_first_true = $K_first;
- if ( $max_index_to_go >= 0 ) {
- $last_old_nonblank_type = $types_to_go[$max_index_to_go];
- $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
- if ( !$is_block_comment
- && $types_to_go[$max_index_to_go] ne 'b'
- && $K_first > 0
- && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
- {
- $K_first -= 1;
- }
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ if ( substr( $block_type, -2, 2 ) eq '()' ) {
+ $stripped_block_type = substr( $block_type, 0, -2 );
}
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
+ return;
+ }
+ }
- my $rtok_first = $rLL->[$K_first];
-
- my $in_quote = $line_of_tokens->{_ending_in_quote};
- $ending_in_quote = $in_quote;
-
- #------------------------------------
- # Handle a block (full-line) comment.
- #------------------------------------
- if ($is_block_comment) {
+ # patch for SWITCH/CASE to retain one-line case/when blocks
+ elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
- if ( $rOpts->{'delete-block-comments'} ) {
- $self->flush();
- return;
- }
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+ unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ return;
+ }
+ }
+ else {
- destroy_one_line_block();
- $self->end_batch() if ( $max_index_to_go >= 0 );
+ #-------------------------------------------
+ # Couldn't find start - return too_long flag
+ #-------------------------------------------
+ return 1;
+ }
- # output a blank line before block comments
- if (
- # unless we follow a blank or comment line
- $self->[_last_line_leading_type_] ne '#'
- && $self->[_last_line_leading_type_] ne 'b'
+ my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
- # only if allowed
- && $rOpts->{'blanks-before-comments'}
+ my $maximum_line_length =
+ $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
- # if this is NOT an empty comment, unless it follows a side
- # comment and could become a hanging side comment.
- && (
- $rtok_first->[_TOKEN_] ne '#'
- || ( $last_line_had_side_comment
- && $rLL->[$K_first]->[_LEVEL_] > 0 )
- )
+ # see if distance to the opening container is too great to even start
+ if ( $pos > $maximum_line_length ) {
- # not after a short line ending in an opening token
- # because we already have space above this comment.
- # Note that the first comment in this if block, after
- # the 'if (', does not get a blank line because of this.
- && !$self->[_last_output_short_opening_token_]
+ #------------------------------
+ # too long to the opening token
+ #------------------------------
+ return 1;
+ }
- # never before static block comments
- && !$is_static_block_comment
- )
- {
- $self->flush(); # switching to new output stream
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->write_blank_code_line();
- $self->[_last_line_leading_type_] = 'b';
- }
+ #-----------------------------------------------------------------------
+ # OK so far: the statement is not to long just to the OPENING token. Now
+ # see if everything to the closing token will fit on one line
+ #-----------------------------------------------------------------------
- if (
- $rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
- || $input_line =~ /^\s+/ )
- && !$is_static_block_comment_without_leading_space
- )
- {
- my $Ktoken_vars = $K_first;
- my $rtoken_vars = $rLL->[$Ktoken_vars];
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- $self->end_batch();
- }
- else {
+ # This is part of an update to fix cases b562 .. b983
+ my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
+ return unless ( defined($K_closing) );
+ my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
- # switching to new output stream
- $self->flush();
+ my $excess = $pos + 1 + $container_length - $maximum_line_length;
- # Note that last arg in call here is 'undef' for comments
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->write_code_line(
- $rtok_first->[_TOKEN_] . "\n", undef );
- $self->[_last_line_leading_type_] = '#';
- }
- return;
- }
+ # Add a small tolerance for welded tokens (case b901)
+ if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
+ $excess += 2;
+ }
- # Compare input/output indentation except for:
- # - hanging side comments
- # - continuation lines (have unknown amount of initial blank space)
- # - and lines which are quotes (because they may have been outdented)
- my $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
+ if ( $excess > 0 ) {
- unless ( $CODE_type eq 'HSC'
- || $rtok_first->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
- {
- my $input_line_number = $line_of_tokens->{_line_number};
- $self->compare_indentation_levels( $K_first,
- $guessed_indentation_level, $input_line_number );
- }
+ # line is too long... there is no chance of forming a one line block
+ # if the excess is more than 1 char
+ return if ( $excess > 1 );
- #------------------------
- # Handle indentation-only
- #------------------------
+ # ... and give up if it is not a one-line block on input.
+ # note: for a one-line block on input, it may be possible to keep
+ # it as a one-line block (by removing a needless semicolon ).
+ my $K_start = $K_to_go[$i_start];
+ my $ldiff =
+ $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
+ return if ($ldiff);
+ }
- # NOTE: In previous versions we sent all qw lines out immediately here.
- # No longer doing this: also write a line which is entirely a 'qw' list
- # to allow stacking of opening and closing tokens. Note that interior
- # qw lines will still go out at the end of this routine.
- if ( $CODE_type eq 'IO' ) {
- $self->flush();
- my $line = $input_line;
+ #------------------------------------------------------------------
+ # Loop to check contents and length of the potential one-line block
+ #------------------------------------------------------------------
+ foreach my $Ki ( $Kj + 1 .. $K_last ) {
- # Fix for rt #125506 Unexpected string formating
- # in which leading space of a terminal quote was removed
- $line =~ s/\s+$//;
- $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
+ # old whitespace could be arbitrarily large, so don't use it
+ if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
+ else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
- my $Ktoken_vars = $K_first;
+ # ignore some small blocks
+ my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
+ my $nobreak = $rshort_nested->{$type_sequence_i};
- # We work with a copy of the token variables and change the
- # first token to be the entire line as a quote variable
- my $rtoken_vars = $rLL->[$Ktoken_vars];
- $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
+ # Return false result if we exceed the maximum line length,
+ if ( $pos > $maximum_line_length ) {
+ return;
+ }
- # Patch: length is not really important here
- $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
+ # keep going for non-containers
+ elsif ( !$type_sequence_i ) {
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- $self->end_batch();
- return;
}
- #---------------------------
- # Handle all other lines ...
- #---------------------------
+ # return if we encounter another opening brace before finding the
+ # closing brace.
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
+ && $rLL->[$Ki]->[_TYPE_] eq '{'
+ && $rblock_type_of_seqno->{$type_sequence_i}
+ && !$nobreak )
+ {
+ return;
+ }
- # If we just saw the end of an elsif block, write nag message
- # if we do not see another elseif or an else.
- if ($looking_for_else) {
+ # if we find our closing brace..
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
+ && $rLL->[$Ki]->[_TYPE_] eq '}'
+ && $rblock_type_of_seqno->{$type_sequence_i}
+ && !$nobreak )
+ {
- ## /^(elsif|else)$/
- if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) {
- write_logfile_entry("(No else block)\n");
+ # be sure any trailing comment also fits on the line
+ my $Ki_nonblank = $Ki;
+ if ( $Ki_nonblank < $K_last ) {
+ $Ki_nonblank++;
+ if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
+ && $Ki_nonblank < $K_last )
+ {
+ $Ki_nonblank++;
+ }
}
- $looking_for_else = 0;
- }
- # This is a good place to kill incomplete one-line blocks
- if ( $max_index_to_go >= 0 ) {
- if (
- (
- ( $semicolons_before_block_self_destruct == 0 )
- && ( $last_old_nonblank_type eq ';' )
- && ( $first_new_nonblank_token ne '}' )
- )
+ # Patch for one-line sort/map/grep/eval blocks with side comments:
+ # We will ignore the side comment length for sort/map/grep/eval
+ # because this can lead to statements which change every time
+ # perltidy is run. Here is an example from Denis Moskowitz which
+ # oscillates between these two states without this patch:
- # Patch for RT #98902. Honor request to break at old commas.
- || ( $rOpts_break_at_old_comma_breakpoints
- && $last_old_nonblank_type eq ',' )
- )
+## --------
+## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+##
+## grep {
+## $_->foo ne 'bar'
+## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+## --------
+
+ # When the first line is input it gets broken apart by the main
+ # line break logic in sub process_line_of_CODE.
+ # When the second line is input it gets recombined by
+ # process_line_of_CODE and passed to the output routines. The
+ # output routines (break_long_lines) do not break it apart
+ # because the bond strengths are set to the highest possible value
+ # for grep/map/eval/sort blocks, so the first version gets output.
+ # It would be possible to fix this by changing bond strengths,
+ # but they are high to prevent errors in older versions of perl.
+ # See c100 for eval test.
+ if ( $Ki < $K_last
+ && $rLL->[$K_last]->[_TYPE_] eq '#'
+ && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
+ && !$rOpts_ignore_side_comment_lengths
+ && !$is_sort_map_grep_eval{$block_type}
+ && $K_last - $Ki_nonblank <= 2 )
{
- $forced_breakpoint_to_go[$max_index_to_go] = 1
- if ($rOpts_break_at_old_comma_breakpoints);
- destroy_one_line_block();
- $self->end_batch();
- }
+ # Only include the side comment for if/else/elsif/unless if it
+ # immediately follows (because the current '$rbrace_follower'
+ # logic for these will give an immediate brake after these
+ # closing braces). So for example a line like this
+ # if (...) { ... } ; # very long comment......
+ # will already break like this:
+ # if (...) { ... }
+ # ; # very long comment......
+ # so we do not need to include the length of the comment, which
+ # would break the block. Project 'bioperl' has coding like this.
+ ## !~ /^(if|else|elsif|unless)$/
+ if ( !$is_if_unless_elsif_else{$block_type}
+ || $K_last == $Ki_nonblank )
+ {
+ $Ki_nonblank = $K_last;
+ $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
- # Keep any requested breaks before this line. Note that we have to
- # use the original K_first because it may have been reduced above
- # to add a blank. The value of the flag is as follows:
- # 1 => hard break, flush the batch
- # 2 => soft break, set breakpoint and continue building the batch
- if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
- destroy_one_line_block();
- if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
- $self->set_forced_breakpoint($max_index_to_go);
- }
- else {
- $self->end_batch() if ( $max_index_to_go >= 0 );
- }
- }
- }
+ if ( $Ki_nonblank > $Ki + 1 ) {
- #--------------------------------------
- # loop to process the tokens one-by-one
- #--------------------------------------
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
+ $pos += 1;
+ }
+ else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
+ }
- # We do not want a leading blank if the previous batch just got output
+ if ( $pos >= $maximum_line_length ) {
+ return;
+ }
+ }
+ }
- if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
- $K_first++;
+ #--------------------------
+ # ok, it's a one-line block
+ #--------------------------
+ create_one_line_block($i_start);
+ return;
}
- foreach my $Ktoken_vars ( $K_first .. $K_last ) {
+ # just keep going for other characters
+ else {
+ }
+ }
- my $rtoken_vars = $rLL->[$Ktoken_vars];
+ #--------------------------------------------------
+ # End Loop to examine tokens in potential one-block
+ #--------------------------------------------------
- #--------------
- # handle blanks
- #--------------
- if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- next;
- }
+ # We haven't hit the closing brace, but there is still space. So the
+ # question here is, should we keep going to look at more lines in hopes of
+ # forming a new one-line block, or should we stop right now. The problem
+ # with continuing is that we will not be able to honor breaks before the
+ # opening brace if we continue.
- #------------------
- # handle non-blanks
- #------------------
- my $type = $rtoken_vars->[_TYPE_];
+ # Typically we will want to keep trying to make one-line blocks for things
+ # like sort/map/grep/eval. But it is not always a good idea to make as
+ # many one-line blocks as possible, so other types are not done. The user
+ # can always use -mangle.
- # If we are continuing after seeing a right curly brace, flush
- # buffer unless we see what we are looking for, as in
- # } else ...
- if ($rbrace_follower) {
- my $token = $rtoken_vars->[_TOKEN_];
- unless ( $rbrace_follower->{$token} ) {
- $self->end_batch() if ( $max_index_to_go >= 0 );
- }
- $rbrace_follower = undef;
+ # If we want to keep going, we will create a new one-line block.
+ # The blocks which we can keep going are in a hash, but we never want
+ # to continue if we are at a '-bli' block.
+ if ( $want_one_line_block{$block_type} && !$is_bli ) {
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
+ my $semicolon_count = $rtype_count
+ && $rtype_count->{';'} ? $rtype_count->{';'} : 0;
+
+ # Ignore a terminal semicolon in the count
+ if ( $semicolon_count <= 2 ) {
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $K_closing_j = $K_closing_container->{$type_sequence_j};
+ my $Kp = $self->K_previous_nonblank($K_closing_j);
+ if ( defined($Kp)
+ && $rLL->[$Kp]->[_TYPE_] eq ';' )
+ {
+ $semicolon_count -= 1;
}
+ }
+ if ( $semicolon_count <= 0 ) {
+ create_one_line_block($i_start);
+ }
+ elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
- my (
- $block_type, $type_sequence,
- $is_opening_BLOCK, $is_closing_BLOCK,
- $nobreak_BEFORE_BLOCK
- );
-
- if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
+ # Mark short broken eval blocks for possible later use in
+ # avoiding adding spaces before a 'package' line. This is not
+ # essential but helps keep newer and older formatting the same.
+ $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
+ }
+ }
+ return;
+} ## end sub starting_one_line_block
- my $token = $rtoken_vars->[_TOKEN_];
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- $block_type = $rblock_type_of_seqno->{$type_sequence};
+sub unstore_token_to_go {
- if ( $block_type
- && $token eq $type
- && $block_type ne 't'
- && !$self->[_rshort_nested_]->{$type_sequence} )
- {
+ # remove most recent token from output stream
+ my $self = shift;
+ if ( $max_index_to_go > 0 ) {
+ $max_index_to_go--;
+ }
+ else {
+ $max_index_to_go = UNDEFINED_INDEX;
+ }
+ return;
+} ## end sub unstore_token_to_go
- if ( $type eq '{' ) {
- $is_opening_BLOCK = 1;
- $nobreak_BEFORE_BLOCK = $no_internal_newlines;
- }
- elsif ( $type eq '}' ) {
- $is_closing_BLOCK = 1;
- $nobreak_BEFORE_BLOCK = $no_internal_newlines;
- }
- }
- }
+sub compare_indentation_levels {
- # if at last token ...
- if ( $Ktoken_vars == $K_last ) {
+ # Check to see if output line tabbing agrees with input line
+ # this can be very useful for debugging a script which has an extra
+ # or missing brace.
- #---------------------
- # handle side comments
- #---------------------
- if ($has_side_comment) {
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- next;
- }
- }
+ my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
+ return unless ( defined($K_first) );
- # if before last token ... do not allow breaks which would promote
- # a side comment to a block comment
- elsif (
- $has_side_comment
- && ( $Ktoken_vars == $K_last - 1
- || $Ktoken_vars == $K_last - 2
- && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
- )
- {
- $no_internal_newlines = 2;
- }
+ my $rLL = $self->[_rLL_];
- # Process non-blank and non-comment tokens ...
+ # ignore a line with a leading blank token - issue c195
+ my $type = $rLL->[$K_first]->[_TYPE_];
+ return if ( $type eq 'b' );
- #-----------------
- # handle semicolon
- #-----------------
- if ( $type eq ';' ) {
+ my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first];
- my $next_nonblank_token_type = 'b';
- my $next_nonblank_token = EMPTY_STRING;
- if ( $Ktoken_vars < $K_last ) {
- my $Knnb = $Ktoken_vars + 1;
- $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
- $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
- $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
- }
+ # record max structural depth for log file
+ if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
+ $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
+ $self->[_maximum_BLOCK_level_at_line_] = $line_number;
+ }
- my $break_before_semicolon = ( $Ktoken_vars == $K_first )
- && $rOpts_break_at_old_semicolon_breakpoints;
+ my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
+ my $is_closing_block =
+ $type_sequence
+ && $self->[_rblock_type_of_seqno_]->{$type_sequence}
+ && $type eq '}';
- # kill one-line blocks with too many semicolons
- $semicolons_before_block_self_destruct--;
- if (
- $break_before_semicolon
- || ( $semicolons_before_block_self_destruct < 0 )
- || ( $semicolons_before_block_self_destruct == 0
- && $next_nonblank_token_type !~ /^[b\}]$/ )
- )
- {
- destroy_one_line_block();
- $self->end_batch()
- if ( $break_before_semicolon
- && $max_index_to_go >= 0 );
- }
+ if ( $guessed_indentation_level ne $structural_indentation_level ) {
+ $self->[_last_tabbing_disagreement_] = $line_number;
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ if ($is_closing_block) {
- $self->end_batch()
- unless (
- $no_internal_newlines
- || ( $rOpts_keep_interior_semicolons
- && $Ktoken_vars < $K_last )
- || ( $next_nonblank_token eq '}' )
- );
+ if ( !$self->[_in_brace_tabbing_disagreement_] ) {
+ $self->[_in_brace_tabbing_disagreement_] = $line_number;
}
+ if ( !$self->[_first_brace_tabbing_disagreement_] ) {
+ $self->[_first_brace_tabbing_disagreement_] = $line_number;
+ }
+ }
- #-----------
- # handle '{'
- #-----------
- elsif ($is_opening_BLOCK) {
+ if ( !$self->[_in_tabbing_disagreement_] ) {
+ $self->[_tabbing_disagreement_count_]++;
- # Tentatively output this token. This is required before
- # calling starting_one_line_block. We may have to unstore
- # it, though, if we have to break before it.
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
+ );
+ }
+ $self->[_in_tabbing_disagreement_] = $line_number;
+ $self->[_first_tabbing_disagreement_] = $line_number
+ unless ( $self->[_first_tabbing_disagreement_] );
+ }
+ }
+ else {
- # Look ahead to see if we might form a one-line block..
- my $too_long =
- $self->starting_one_line_block( $Ktoken_vars,
- $K_last_nonblank_code, $K_last );
- $self->clear_breakpoint_undo_stack();
+ $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
- # to simplify the logic below, set a flag to indicate if
- # this opening brace is far from the keyword which introduces it
- my $keyword_on_same_line = 1;
- if (
- $max_index_to_go >= 0
- && defined($K_last_nonblank_code)
- && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
- && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
- || $too_long )
- )
- {
- $keyword_on_same_line = 0;
- }
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
+ if ($in_tabbing_disagreement) {
- # Break before '{' if requested with -bl or -bli flag
- my $want_break = $self->[_rbrace_left_]->{$type_sequence};
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"End indentation disagreement from input line $in_tabbing_disagreement\n"
+ );
- # But do not break if this token is welded to the left
- if ( $total_weld_count
- && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
+ if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
{
- $want_break = 0;
+ write_logfile_entry(
+ "No further tabbing disagreements will be noted\n");
}
+ }
+ $self->[_in_tabbing_disagreement_] = 0;
- # Break BEFORE an opening '{' ...
- if (
+ }
+ }
+ return;
+} ## end sub compare_indentation_levels
- # if requested
- $want_break
+###################################################
+# CODE SECTION 8: Utilities for setting breakpoints
+###################################################
- # and we were unable to start looking for a block,
- && $index_start_one_line_block == UNDEFINED_INDEX
+{ ## begin closure set_forced_breakpoint
- # or if it will not be on same line as its keyword, so that
- # it will be outdented (eval.t, overload.t), and the user
- # has not insisted on keeping it on the right
- || ( !$keyword_on_same_line
- && !$rOpts_opening_brace_always_on_right )
- )
- {
+ my @forced_breakpoint_undo_stack;
- # but only if allowed
- unless ($nobreak_BEFORE_BLOCK) {
+ # These are global vars for efficiency:
+ # my $forced_breakpoint_count;
+ # my $forced_breakpoint_undo_count;
+ # my $index_max_forced_break;
- # since we already stored this token, we must unstore it
- $self->unstore_token_to_go();
+ # Break before or after certain tokens based on user settings
+ my %break_before_or_after_token;
- # then output the line
- $self->end_batch() if ( $max_index_to_go >= 0 );
+ BEGIN {
- # and now store this token at the start of a new line
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- }
- }
+ # Updated to use all operators. This fixes case b1054
+ # Here is the previous simplified version:
+ ## my @q = qw( . : ? and or xor && || );
+ my @q = @all_operators;
- # now output this line
- $self->end_batch()
- if ( $max_index_to_go >= 0 && !$no_internal_newlines );
- }
+ push @q, ',';
+ @break_before_or_after_token{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- #-----------
- # handle '}'
- #-----------
- elsif ($is_closing_BLOCK) {
+ sub set_fake_breakpoint {
- my $next_nonblank_token_type = 'b';
- my $next_nonblank_token = EMPTY_STRING;
- my $Knnb;
- if ( $Ktoken_vars < $K_last ) {
- $Knnb = $Ktoken_vars + 1;
- $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
- $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
- $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
- }
+ # Just bump up the breakpoint count as a signal that there are breaks.
+ # This is useful if we have breaks but may want to postpone deciding
+ # where to make them.
+ $forced_breakpoint_count++;
+ return;
+ } ## end sub set_fake_breakpoint
- # If there is a pending one-line block ..
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ use constant DEBUG_FORCE => 0;
- # Fix for b1208: if a side comment follows this closing
- # brace then we must include its length in the length test
- # ... unless the -issl flag is set (fixes b1307-1309).
- # Assume a minimum of 1 blank space to the comment.
- my $added_length = 0;
- if ( $has_side_comment
- && !$rOpts_ignore_side_comment_lengths
- && $next_nonblank_token_type eq '#' )
- {
- $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
- }
+ sub set_forced_breakpoint {
+ my ( $self, $i ) = @_;
- # we have to terminate it if..
- if (
+ # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
- # it is too long (final length may be different from
- # initial estimate). note: must allow 1 space for this
- # token
- $self->excess_line_length( $index_start_one_line_block,
- $max_index_to_go ) + $added_length >= 0
+ # Exceptions:
+ # - If the token at index $i is a blank, backup to $i-1 to
+ # get to the previous nonblank token.
+ # - For certain tokens, the break may be placed BEFORE the token
+ # at index $i, depending on user break preference settings.
+ # - If a break is made after an opening token, then a break will
+ # also be made before the corresponding closing token.
- # or if it has too many semicolons
- || ( $semicolons_before_block_self_destruct == 0
- && defined($K_last_nonblank_code)
- && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
- )
- {
- destroy_one_line_block();
- }
- }
+ # Returns '$i_nonblank':
+ # = index of the token after which the breakpoint was actually placed
+ # = undef if breakpoint was not set.
+ my $i_nonblank;
- # put a break before this closing curly brace if appropriate
- $self->end_batch()
- if ( $max_index_to_go >= 0
- && !$nobreak_BEFORE_BLOCK
- && $index_start_one_line_block == UNDEFINED_INDEX );
+ if ( !defined($i) || $i < 0 ) {
- # store the closing curly brace
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ # Calls with bad index $i are harmless but waste time and should
+ # be caught and eliminated during code development.
+ if (DEVEL_MODE) {
+ my ( $a, $b, $c ) = caller();
+ Fault(
+"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
+ );
+ }
+ return;
+ }
- # ok, we just stored a closing curly brace. Often, but
- # not always, we want to end the line immediately.
- # So now we have to check for special cases.
+ # Break after token $i
+ $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
- # if this '}' successfully ends a one-line block..
- my $is_one_line_block = 0;
- my $keep_going = 0;
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ # If we break at an opening container..break at the closing
+ my $set_closing;
+ if ( defined($i_nonblank)
+ && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
+ {
+ $set_closing = 1;
+ $self->set_closing_breakpoint($i_nonblank);
+ }
- # Remember the type of token just before the
- # opening brace. It would be more general to use
- # a stack, but this will work for one-line blocks.
- $is_one_line_block =
- $types_to_go[$index_start_one_line_block];
+ DEBUG_FORCE && do {
+ my ( $a, $b, $c ) = caller();
+ my $msg =
+"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
+ if ( !defined($i_nonblank) ) {
+ $i = EMPTY_STRING unless defined($i);
+ $msg .= " but could not set break after i='$i'\n";
+ }
+ else {
+ my $nobr = $nobreak_to_go[$i_nonblank];
+ $nobr = 0 if ( !defined($nobr) );
+ $msg .= <<EOM;
+set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr
+EOM
+ if ( defined($set_closing) ) {
+ $msg .=
+" Also set closing breakpoint corresponding to this token\n";
+ }
+ }
+ print STDOUT $msg;
+ };
- # we have to actually make it by removing tentative
- # breaks that were set within it
- $self->undo_forced_breakpoint_stack(0);
+ return $i_nonblank;
+ } ## end sub set_forced_breakpoint
- # For -lp, extend the nobreak to include a trailing
- # terminal ','. This is because the -lp indentation was
- # not known when making one-line blocks, so we may be able
- # to move the line back to fit. Otherwise we may create a
- # needlessly stranded comma on the next line.
- my $iend_nobreak = $max_index_to_go - 1;
- if ( $rOpts_line_up_parentheses
- && $next_nonblank_token_type eq ','
- && $Knnb eq $K_last )
- {
- my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
- my $is_excluded =
- $self->[_ris_excluded_lp_container_]->{$p_seqno};
- $iend_nobreak = $max_index_to_go if ( !$is_excluded );
- }
+ sub set_forced_breakpoint_AFTER {
+ my ( $self, $i ) = @_;
- $self->set_nobreaks( $index_start_one_line_block,
- $iend_nobreak );
+ # This routine is only called by sub set_forced_breakpoint and
+ # sub set_closing_breakpoint.
- # save starting block indexes so that sub correct_lp can
- # check and adjust -lp indentation (c098)
- push @{$ri_starting_one_line_block},
- $index_start_one_line_block;
+ # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
- # then re-initialize for the next one-line block
- destroy_one_line_block();
+ # Exceptions:
+ # - If the token at index $i is a blank, backup to $i-1 to
+ # get to the previous nonblank token.
+ # - For certain tokens, the break may be placed BEFORE the token
+ # at index $i, depending on user break preference settings.
- # then decide if we want to break after the '}' ..
- # We will keep going to allow certain brace followers as in:
- # do { $ifclosed = 1; last } unless $losing;
- #
- # But make a line break if the curly ends a
- # significant block:
- if (
- (
- $is_block_without_semicolon{$block_type}
+ # Returns:
+ # - the index of the token after which the break was set, or
+ # - undef if no break was set
- # Follow users break point for
- # one line block types U & G, such as a 'try' block
- || $is_one_line_block =~ /^[UG]$/
- && $Ktoken_vars == $K_last
- )
+ return unless ( defined($i) && $i >= 0 );
- # if needless semicolon follows we handle it later
- && $next_nonblank_token ne ';'
- )
- {
- $self->end_batch()
- unless ($no_internal_newlines);
- }
- }
+ # Back up at a blank so we have a token to examine.
+ # This was added to fix for cases like b932 involving an '=' break.
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- # set string indicating what we need to look for brace follower
- # tokens
- if ( $is_if_unless_elsif_else{$block_type} ) {
- $rbrace_follower = undef;
- }
- elsif ( $block_type eq 'do' ) {
- $rbrace_follower = \%is_do_follower;
- if (
- $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
- )
- {
- $rbrace_follower = { ')' => 1 };
- }
- }
+ # Never break between welded tokens
+ return
+ if ( $total_weld_count
+ && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
- # added eval for borris.t
- elsif ($is_sort_map_grep_eval{$block_type}
- || $is_one_line_block eq 'G' )
- {
- $rbrace_follower = undef;
- $keep_going = 1;
- }
+ my $token = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
- # anonymous sub
- elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
- if ($is_one_line_block) {
+ # For certain tokens, use user settings to decide if we break before or
+ # after it
+ if ( $break_before_or_after_token{$token}
+ && ( $type eq $token || $type eq 'k' ) )
+ {
+ if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
+ }
- $rbrace_follower = \%is_anon_sub_1_brace_follower;
+ # breaks are forced before 'if' and 'unless'
+ elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
- # Exceptions to help keep -lp intact, see git #74 ...
- # Exception 1: followed by '}' on this line
- if ( $Ktoken_vars < $K_last
- && $next_nonblank_token eq '}' )
- {
- $rbrace_follower = undef;
- $keep_going = 1;
- }
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
- # Exception 2: followed by '}' on next line if -lp set.
- # The -lp requirement allows the formatting to follow
- # old breaks when -lp is not used, minimizing changes.
- # Fixes issue c087.
- elsif ($Ktoken_vars == $K_last
- && $rOpts_line_up_parentheses )
- {
- my $K_closing_container =
- $self->[_K_closing_container_];
- my $K_opening_container =
- $self->[_K_opening_container_];
- my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
- my $Kc = $K_closing_container->{$p_seqno};
- my $is_excluded =
- $self->[_ris_excluded_lp_container_]->{$p_seqno};
- if ( defined($Kc)
- && $rLL->[$Kc]->[_TOKEN_] eq '}'
- && !$is_excluded
- && $Kc - $Ktoken_vars <= 2 )
- {
- $rbrace_follower = undef;
- $keep_going = 1;
- }
- }
- }
- else {
- $rbrace_follower = \%is_anon_sub_brace_follower;
- }
- }
+ if ( $i_nonblank >= 0
+ && !$nobreak_to_go[$i_nonblank]
+ && !$forced_breakpoint_to_go[$i_nonblank] )
+ {
+ $forced_breakpoint_to_go[$i_nonblank] = 1;
- # None of the above: specify what can follow a closing
- # brace of a block which is not an
- # if/elsif/else/do/sort/map/grep/eval
- # Testfiles:
- # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
- else {
- $rbrace_follower = \%is_other_brace_follower;
+ if ( $i_nonblank > $index_max_forced_break ) {
+ $index_max_forced_break = $i_nonblank;
}
+ $forced_breakpoint_count++;
+ $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
+ = $i_nonblank;
- # See if an elsif block is followed by another elsif or else;
- # complain if not.
- if ( $block_type eq 'elsif' ) {
+ # success
+ return $i_nonblank;
+ }
+ }
+ return;
+ } ## end sub set_forced_breakpoint_AFTER
- if ( $next_nonblank_token_type eq 'b' ) { # end of line?
- $looking_for_else = 1; # ok, check on next line
- }
- else {
- ## /^(elsif|else)$/
- if ( !$is_elsif_else{$next_nonblank_token} ) {
- write_logfile_entry("No else block :(\n");
- }
- }
- }
+ sub clear_breakpoint_undo_stack {
+ my ($self) = @_;
+ $forced_breakpoint_undo_count = 0;
+ return;
+ }
- # keep going after certain block types (map,sort,grep,eval)
- # added eval for borris.t
- if ($keep_going) {
+ use constant DEBUG_UNDOBP => 0;
- # keep going
- }
+ sub undo_forced_breakpoint_stack {
- # if no more tokens, postpone decision until re-entering
- elsif ( ( $next_nonblank_token_type eq 'b' )
- && $rOpts_add_newlines )
- {
- unless ($rbrace_follower) {
- $self->end_batch()
- unless ( $no_internal_newlines
- || $max_index_to_go < 0 );
- }
- }
- elsif ($rbrace_follower) {
+ my ( $self, $i_start ) = @_;
- unless ( $rbrace_follower->{$next_nonblank_token} ) {
- $self->end_batch()
- unless ( $no_internal_newlines
- || $max_index_to_go < 0 );
- }
- $rbrace_follower = undef;
- }
+ # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
+ # remove all breakpoints from the top of the 'undo stack' down to and
+ # including index $i_start.
- else {
- $self->end_batch()
- unless ( $no_internal_newlines
- || $max_index_to_go < 0 );
- }
+ # The 'undo stack' is a stack of all breakpoints made for a batch of
+ # code.
- } ## end treatment of closing block token
+ if ( $i_start < 0 ) {
+ $i_start = 0;
+ my ( $a, $b, $c ) = caller();
- #------------------------------
- # handle here_doc target string
- #------------------------------
- elsif ( $type eq 'h' ) {
+ # Bad call, can only be due to a recent programming change.
+ Fault(
+"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
+ ) if (DEVEL_MODE);
+ return;
+ }
- # no newlines after seeing here-target
- $no_internal_newlines = 2;
- ## destroy_one_line_block(); # deleted to fix case b529
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ while ( $forced_breakpoint_undo_count > $i_start ) {
+ my $i =
+ $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ $forced_breakpoint_to_go[$i] = 0;
+ $forced_breakpoint_count--;
+
+ DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
+ };
}
- #-----------------------------
- # handle all other token types
- #-----------------------------
+ # shouldn't happen, but not a critical error
else {
+ if (DEVEL_MODE) {
+ my ( $a, $b, $c ) = caller();
+ Fault(<<EOM);
+Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go
+EOM
+ }
+ }
+ }
+ return;
+ } ## end sub undo_forced_breakpoint_stack
+} ## end closure set_forced_breakpoint
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+{ ## begin closure set_closing_breakpoint
- # break after a label if requested
- if ( $rOpts_break_after_labels
- && $type eq 'J'
- && $rOpts_break_after_labels == 1 )
- {
- $self->end_batch()
- unless ($no_internal_newlines);
- }
+ my %postponed_breakpoint;
+
+ sub initialize_postponed_breakpoint {
+ %postponed_breakpoint = ();
+ return;
+ }
+
+ sub has_postponed_breakpoint {
+ my ($seqno) = @_;
+ return $postponed_breakpoint{$seqno};
+ }
+
+ sub set_closing_breakpoint {
+
+ # set a breakpoint at a matching closing token
+ my ( $self, $i_break ) = @_;
+
+ if ( defined( $mate_index_to_go[$i_break] ) ) {
+
+ # Don't reduce the '2' in the statement below.
+ # Test files: attrib.t, BasicLyx.pm.html
+ if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
+
+ # break before } ] and ), but sub set_forced_breakpoint will decide
+ # to break before or after a ? and :
+ my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
+ $self->set_forced_breakpoint_AFTER(
+ $mate_index_to_go[$i_break] - $inc );
}
+ }
+ else {
+ my $type_sequence = $type_sequence_to_go[$i_break];
+ if ($type_sequence) {
+ $postponed_breakpoint{$type_sequence} = 1;
+ }
+ }
+ return;
+ } ## end sub set_closing_breakpoint
+} ## end closure set_closing_breakpoint
- # remember previous nonblank, non-comment OUTPUT token
- $K_last_nonblank_code = $Ktoken_vars;
+#########################################
+# CODE SECTION 9: Process batches of code
+#########################################
- } ## end of loop over all tokens in this line
+{ ## begin closure grind_batch_of_CODE
- # if there is anything left in the output buffer ...
- if ( $max_index_to_go >= 0 ) {
+ # The routines in this closure begin the processing of a 'batch' of code.
- my $type = $rLL->[$K_last]->[_TYPE_];
- my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
+ # A variable to keep track of consecutive nonblank lines so that we can
+ # insert occasional blanks
+ my @nonblank_lines_at_depth;
- # we have to flush ..
- if (
+ # A variable to remember maximum size of previous batches; this is needed
+ # by the logical padding routine
+ my $peak_batch_size;
+ my $batch_count;
- # if there is a side comment...
- $type eq '#'
+ # variables to keep track of indentation of unmatched containers.
+ my %saved_opening_indentation;
- # if this line ends in a quote
- # NOTE: This is critically important for insuring that quoted
- # lines do not get processed by things like -sot and -sct
- || $in_quote
+ sub initialize_grind_batch_of_CODE {
+ @nonblank_lines_at_depth = ();
+ $peak_batch_size = 0;
+ $batch_count = 0;
+ %saved_opening_indentation = ();
+ return;
+ } ## end sub initialize_grind_batch_of_CODE
- # if this is a VERSION statement
- || $CODE_type eq 'VER'
+ # sub grind_batch_of_CODE receives sections of code which are the longest
+ # possible lines without a break. In other words, it receives what is left
+ # after applying all breaks forced by blank lines, block comments, side
+ # comments, pod text, and structural braces. Its job is to break this code
+ # down into smaller pieces, if necessary, which fit within the maximum
+ # allowed line length. Then it sends the resulting lines of code on down
+ # the pipeline to the VerticalAligner package, breaking the code into
+ # continuation lines as necessary. The batch of tokens are in the "to_go"
+ # arrays. The name 'grind' is slightly suggestive of a machine continually
+ # breaking down long lines of code, but mainly it is unique and easy to
+ # remember and find with an editor search.
- # to keep a label at the end of a line
- || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
+ # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
+ # together in the following way:
- # if we have a hard break request
- || $break_flag && $break_flag != 2
+ # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
+ # combines them into the largest sequences of tokens which might form a new
+ # line.
+ # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
+ # lines.
- # if we are instructed to keep all old line breaks
- || !$rOpts->{'delete-old-newlines'}
+ # So sub 'process_line_of_CODE' builds up the longest possible continuous
+ # sequences of tokens, regardless of line length, and then
+ # grind_batch_of_CODE breaks these sequences back down into the new output
+ # lines.
- # if this is a line of the form 'use overload'. A break here in
- # the input file is a good break because it will allow the
- # operators which follow to be formatted well. Without this
- # break the formatting with -ci=4 -xci is poor, for example.
+ # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
- # use overload
- # '+' => sub {
- # print length $_[2], "\n";
- # my ( $x, $y ) = _order(@_);
- # Number::Roman->new( int $x + $y );
- # },
- # '-' => sub {
- # my ( $x, $y ) = _order(@_);
- # Number::Roman->new( int $x - $y );
- # };
- || ( $max_index_to_go == 2
- && $types_to_go[0] eq 'k'
- && $tokens_to_go[0] eq 'use'
- && $tokens_to_go[$max_index_to_go] eq 'overload' )
- )
- {
- destroy_one_line_block();
- $self->end_batch();
- }
+ use constant DEBUG_GRIND => 0;
+
+ sub check_grind_input {
+
+ # Check for valid input to sub grind_batch_of_CODE. An error here
+ # would most likely be due to an error in 'sub store_token_to_go'.
+ my ($self) = @_;
+
+ # Be sure there are tokens in the batch
+ if ( $max_index_to_go < 0 ) {
+ Fault(<<EOM);
+sub grind incorrectly called with max_index_to_go=$max_index_to_go
+EOM
+ }
+ my $Klimit = $self->[_Klimit_];
- else {
+ # The local batch tokens must be a continuous part of the global token
+ # array.
+ my $KK;
+ foreach my $ii ( 0 .. $max_index_to_go ) {
- # Check for a soft break request
- if ( $break_flag && $break_flag == 2 ) {
- $self->set_forced_breakpoint($max_index_to_go);
- }
+ my $Km = $KK;
- # mark old line breakpoints in current output stream
- if ( !$rOpts_ignore_old_breakpoints
- || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
- {
- my $jobp = $max_index_to_go;
- if ( $types_to_go[$max_index_to_go] eq 'b'
- && $max_index_to_go > 0 )
- {
- $jobp--;
- }
- $old_breakpoint_to_go[$jobp] = 1;
- }
+ $KK = $K_to_go[$ii];
+ if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
+ $KK = '(undef)' unless defined($KK);
+ Fault(<<EOM);
+at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
+EOM
}
- }
+ if ( $ii > 0 && $KK != $Km + 1 ) {
+ my $im = $ii - 1;
+ Fault(<<EOM);
+Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
+EOM
+ }
+ }
return;
- } ## end sub process_line_of_CODE
-} ## end closure process_line_of_CODE
+ } ## end sub check_grind_input
-sub tight_paren_follows {
+ # This filter speeds up a critical if-test
+ my %quick_filter;
- my ( $self, $K_to_go_0, $K_ic ) = @_;
+ BEGIN {
+ my @q = qw# L { ( [ R ] ) } ? : f => #;
+ push @q, ',';
+ @quick_filter{@q} = (1) x scalar(@q);
+ }
- # Input parameters:
- # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
- # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
- # Return parameter:
- # false if we want a break after the closing do brace
- # true if we do not want a break after the closing do brace
+ sub grind_batch_of_CODE {
- # We are at the closing brace of a 'do' block. See if this brace is
- # followed by a closing paren, and if so, set a flag which indicates
- # that we do not want a line break between the '}' and ')'.
+ my ($self) = @_;
- # xxxxx ( ...... do { ... } ) {
- # ^-------looking at this brace, K_ic
+ #-----------------------------------------------------------------
+ # This sub directs the formatting of one complete batch of tokens.
+ # The tokens of the batch are in the '_to_go' arrays.
+ #-----------------------------------------------------------------
- # Subscript notation:
- # _i = inner container (braces in this case)
- # _o = outer container (parens in this case)
- # _io = inner opening = '{'
- # _ic = inner closing = '}'
- # _oo = outer opening = '('
- # _oc = outer closing = ')'
+ my $this_batch = $self->[_this_batch_];
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_batch_count_] = ++$batch_count;
- # |--K_oo |--K_oc = outer container
- # xxxxx ( ...... do { ...... } ) {
- # |--K_io |--K_ic = inner container
+ $self->check_grind_input() if (DEVEL_MODE);
- # In general, the safe thing to do is return a 'false' value
- # if the statement appears to be complex. This will have
- # the downstream side-effect of opening up outer containers
- # to help make complex code readable. But for simpler
- # do blocks it can be preferable to keep the code compact
- # by returning a 'true' value.
+ # This routine is only called from sub flush_batch_of_code, so that
+ # routine is a better spot for debugging.
+ DEBUG_GRIND && do {
+ my $token = my $type = EMPTY_STRING;
+ if ( $max_index_to_go >= 0 ) {
+ $token = $tokens_to_go[$max_index_to_go];
+ $type = $types_to_go[$max_index_to_go];
+ }
+ my $output_str = EMPTY_STRING;
+ if ( $max_index_to_go > 20 ) {
+ my $mm = $max_index_to_go - 10;
+ $output_str =
+ join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
+ . join( EMPTY_STRING,
+ @tokens_to_go[ $mm .. $max_index_to_go ] );
+ }
+ else {
+ $output_str = join EMPTY_STRING,
+ @tokens_to_go[ 0 .. $max_index_to_go ];
+ }
+ print STDERR <<EOM;
+grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
+$output_str
+EOM
+ };
- return unless defined($K_ic);
- my $rLL = $self->[_rLL_];
+ # Remove any trailing blank, which is possible (c192 has example)
+ if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) {
+ $max_index_to_go -= 1;
+ }
- # we should only be called at a closing block
- my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
- return unless ($seqno_i); # shouldn't happen;
+ return if ( $max_index_to_go < 0 );
- # This only applies if the next nonblank is a ')'
- my $K_oc = $self->K_next_nonblank($K_ic);
- return unless defined($K_oc);
- my $token_next = $rLL->[$K_oc]->[_TOKEN_];
- return unless ( $token_next eq ')' );
+ if ($rOpts_line_up_parentheses) {
+ $self->set_lp_indentation();
+ }
- my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
- my $K_io = $self->[_K_opening_container_]->{$seqno_i};
- my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
- return unless ( defined($K_io) && defined($K_oo) );
+ #--------------------------------------------------
+ # Shortcut for block comments
+ # Note that this shortcut does not work for -lp yet
+ #--------------------------------------------------
+ elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) {
+ my $ibeg = 0;
+ $this_batch->[_ri_first_] = [$ibeg];
+ $this_batch->[_ri_last_] = [$ibeg];
- # RULE 1: Do not break before a closing signature paren
- # (regardless of complexity). This is a fix for issue git#22.
- # Looking for something like:
- # sub xxx ( ... do { ... } ) {
- # ^----- next block_type
- my $K_test = $self->K_next_nonblank($K_oc);
- if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
- my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
- if ($seqno_test) {
- if ( $self->[_ris_asub_block_]->{$seqno_test}
- || $self->[_ris_sub_block_]->{$seqno_test} )
- {
- return 1;
- }
+ $self->convey_batch_to_vertical_aligner();
+
+ my $level = $levels_to_go[$ibeg];
+ $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
+ $self->[_last_line_leading_level_] = $level;
+ $nonblank_lines_at_depth[$level] = 1;
+ return;
}
- }
- # RULE 2: Break if the contents within braces appears to be 'complex'. We
- # base this decision on the number of tokens between braces.
+ #-------------
+ # Normal route
+ #-------------
- # xxxxx ( ... do { ... } ) {
- # ^^^^^^
+ my $rLL = $self->[_rLL_];
- # Although very simple, it has the advantages of (1) being insensitive to
- # changes in lengths of identifier names, (2) easy to understand, implement
- # and test. A test case for this is 't/snippets/long_line.in'.
+ #-------------------------------------------------------
+ # Loop over the batch to initialize some batch variables
+ #-------------------------------------------------------
+ my $comma_count_in_batch = 0;
+ my @colon_list;
+ my @ix_seqno_controlling_ci;
+ my %comma_arrow_count;
+ my $comma_arrow_count_contained = 0;
+ my @unmatched_closing_indexes_in_this_batch;
+ my @unmatched_opening_indexes_in_this_batch;
- # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
- # if ( do { $2 !~ /&/ } ) { ... }
+ my @i_for_semicolon;
+ foreach my $i ( 0 .. $max_index_to_go ) {
- # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
- # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
+ if ( $types_to_go[$i] eq 'b' ) {
+ $inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1;
+ next;
+ }
- # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
- # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
+ $inext_to_go[$i] = $i + 1;
- return if ( $K_ic - $K_io > 16 );
+ # This is an optional shortcut to save a bit of time by skipping
+ # most tokens. Note: the filter may need to be updated if the
+ # next 'if' tests are ever changed to include more token types.
+ next if ( !$quick_filter{ $types_to_go[$i] } );
- # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
- # As with the previous rule, we decide based on the token count
+ my $type = $types_to_go[$i];
- # xxxxx ( ... do { ... } ) {
- # ^^^^^^^^
+ # gather info needed by sub break_long_lines
+ if ( $type_sequence_to_go[$i] ) {
+ my $seqno = $type_sequence_to_go[$i];
+ my $token = $tokens_to_go[$i];
- # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
- # $K_io - $K_oo = 4 [Pass Rule 3]
- # if ( do { $2 !~ /&/ } ) { ... }
+ # remember indexes of any tokens controlling xci
+ # in this batch. This list is needed by sub undo_ci.
+ if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
+ push @ix_seqno_controlling_ci, $i;
+ }
- # Example: $K_ic - $K_oo = 10 [Pass rule 2]
- # $K_io - $K_oo = 9 [Pass rule 3]
- # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
+ if ( $is_opening_sequence_token{$token} ) {
+ if ( $self->[_rbreak_container_]->{$seqno} ) {
+ $self->set_forced_breakpoint($i);
+ }
+ push @unmatched_opening_indexes_in_this_batch, $i;
+ if ( $type eq '?' ) {
+ push @colon_list, $type;
+ }
+ }
+ elsif ( $is_closing_sequence_token{$token} ) {
- return if ( $K_io - $K_oo > 9 );
+ if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
- # RULE 4: Break if we have already broken this batch of output tokens
- return if ( $K_oo < $K_to_go_0 );
+ my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
+ if ( defined($i_mate) && $i_mate >= 0 ) {
+ if ( $type_sequence_to_go[$i_mate] ==
+ $type_sequence_to_go[$i] )
+ {
+ $mate_index_to_go[$i] = $i_mate;
+ $mate_index_to_go[$i_mate] = $i;
+ my $cac = $comma_arrow_count{$seqno};
+ $comma_arrow_count_contained += $cac if ($cac);
+ }
+ else {
+ push @unmatched_opening_indexes_in_this_batch,
+ $i_mate;
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ }
+ else {
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ if ( $type eq ':' ) {
+ push @colon_list, $type;
+ }
+ } ## end elsif ( $is_closing_sequence_token...)
- # RULE 5: Break if input is not on one line
- # For example, we will set the flag for the following expression
- # written in one line:
+ } ## end if ($seqno)
- # This has: $K_ic - $K_oo = 10 [Pass rule 2]
- # $K_io - $K_oo = 8 [Pass rule 3]
- # $self->debug( 'Error: ' . do { local $/; <$err> } );
+ elsif ( $type eq ',' ) { $comma_count_in_batch++; }
+ elsif ( $type eq '=>' ) {
+ if (@unmatched_opening_indexes_in_this_batch) {
+ my $j = $unmatched_opening_indexes_in_this_batch[-1];
+ my $seqno = $type_sequence_to_go[$j];
+ $comma_arrow_count{$seqno}++;
+ }
+ }
+ elsif ( $type eq 'f' ) {
+ push @i_for_semicolon, $i;
+ }
- # but we break after the brace if it is on multiple lines on input, since
- # the user may prefer it on multiple lines:
+ } ## end for ( my $i = 0 ; $i <=...)
- # [Fail rule 5]
- # $self->debug(
- # 'Error: ' . do { local $/; <$err> }
- # );
+ # Break at a single interior C-style for semicolon in this batch (c154)
+ if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
+ my $i = $i_for_semicolon[0];
+ my $inext = $inext_to_go[$i];
+ if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
+ $self->set_forced_breakpoint($i);
+ }
+ }
- if ( !$rOpts_ignore_old_breakpoints ) {
- my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
- my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
- return if ( $iline_oo != $iline_oc );
- }
+ my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
+ @unmatched_closing_indexes_in_this_batch;
- # OK to keep the paren tight
- return 1;
-} ## end sub tight_paren_follows
+ if (@unmatched_opening_indexes_in_this_batch) {
+ $this_batch->[_runmatched_opening_indexes_] =
+ \@unmatched_opening_indexes_in_this_batch;
+ }
-my %is_brace_semicolon_colon;
+ if (@ix_seqno_controlling_ci) {
+ $this_batch->[_rix_seqno_controlling_ci_] =
+ \@ix_seqno_controlling_ci;
+ }
-BEGIN {
- my @q = qw( { } ; : );
- @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
-}
+ #------------------------
+ # Set special breakpoints
+ #------------------------
+ # If this line ends in a code block brace, set breaks at any
+ # previous closing code block braces to breakup a chain of code
+ # blocks on one line. This is very rare but can happen for
+ # user-defined subs. For example we might be looking at this:
+ # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+ my $saw_good_break; # flag to force breaks even if short line
+ if (
+
+ # looking for opening or closing block brace
+ $block_type_to_go[$max_index_to_go]
+
+ # never any good breaks if just one token
+ && $max_index_to_go > 0
+
+ # but not one of these which are never duplicated on a line:
+ # until|while|for|if|elsif|else
+ && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
+ }
+ )
+ {
+ my $lev = $nesting_depth_to_go[$max_index_to_go];
-sub starting_one_line_block {
+ # Walk backwards from the end and
+ # set break at any closing block braces at the same level.
+ # But quit if we are not in a chain of blocks.
+ foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
+ last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
+ next if ( $levels_to_go[$i] > $lev ); # skip past higher level
- # after seeing an opening curly brace, look for the closing brace and see
- # if the entire block will fit on a line. This routine is not always right
- # so a check is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check, though,
- # because otherwise we would always break at a semicolon within a one-line
- # block if the block contains multiple statements.
+ if ( $block_type_to_go[$i] ) {
+ if ( $tokens_to_go[$i] eq '}' ) {
+ $self->set_forced_breakpoint($i);
+ $saw_good_break = 1;
+ }
+ }
- my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
+ # quit if we see anything besides words, function, blanks
+ # at this level
+ elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
+ }
+ }
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rLL = $self->[_rLL_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ #-----------------------------------------------
+ # insertion of any blank lines before this batch
+ #-----------------------------------------------
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
+ my $imin = 0;
+ my $imax = $max_index_to_go;
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
+ # trim any blank tokens - for safety, but should not be necessary
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_start = 0;
+ if ( $imin > $imax ) {
+ if (DEVEL_MODE) {
+ my $K0 = $K_to_go[0];
+ my $lno = EMPTY_STRING;
+ if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
+ Fault(<<EOM);
+Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
+EOM
+ }
+ return;
+ }
- # This routine should not have been called if there are no tokens in the
- # 'to_go' arrays of previously stored tokens. A previous call to
- # 'store_token_to_go' should have stored an opening brace. An error here
- # indicates that a programming change may have caused a flush operation to
- # clean out the previously stored tokens.
- if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
- Fault("program bug: store_token_to_go called incorrectly\n")
- if (DEVEL_MODE);
- return 0;
- }
+ my $last_line_leading_type = $self->[_last_line_leading_type_];
+ my $last_line_leading_level = $self->[_last_line_leading_level_];
- # Return if block should be broken
- my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
- if ( $rbreak_container->{$type_sequence_j} ) {
- return 0;
- }
+ my $leading_type = $types_to_go[0];
+ my $leading_level = $levels_to_go[0];
- my $ris_bli_container = $self->[_ris_bli_container_];
- my $is_bli = $ris_bli_container->{$type_sequence_j};
+ # add blank line(s) before certain key types but not after a comment
+ if ( $last_line_leading_type ne '#' ) {
+ my $blank_count = 0;
+ my $leading_token = $tokens_to_go[0];
- my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
- $block_type = EMPTY_STRING unless ( defined($block_type) );
+ # break before certain key blocks except one-liners
+ if ( $leading_type eq 'k' ) {
+ if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
+ $blank_count = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( 0, $max_index_to_go ) ne '}' );
+ }
- my $previous_nonblank_token = EMPTY_STRING;
- my $i_last_nonblank = -1;
- if ( defined($K_last_nonblank) ) {
- $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
- if ( $i_last_nonblank >= 0 ) {
- $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
- }
- }
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($last_line_leading_type ne 'b'
+ && $is_if_unless_while_until_for_foreach{$leading_token} )
+ {
+ my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
- # find the starting keyword for this block (such as 'if', 'else', ...)
- if (
- $max_index_to_go == 0
- ##|| $block_type =~ /^[\{\}\;\:]$/
- || $is_brace_semicolon_colon{$block_type}
- || substr( $block_type, 0, 7 ) eq 'package'
- )
- {
- $i_start = $max_index_to_go;
- }
+ # patch for RT #128216: no blank line inserted at a level
+ # change
+ if ( $levels_to_go[0] != $last_line_leading_level ) {
+ $lc = 0;
+ }
- # the previous nonblank token should start these block types
- elsif (
- $i_last_nonblank >= 0
- && ( $previous_nonblank_token eq $block_type
- || $self->[_ris_asub_block_]->{$type_sequence_j}
- || $self->[_ris_sub_block_]->{$type_sequence_j}
- || substr( $block_type, -2, 2 ) eq '()' )
- )
- {
- $i_start = $i_last_nonblank;
+ if ( $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $self->consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && terminal_type_i( 0, $max_index_to_go ) ne '}' )
+ {
+ $blank_count = 1;
+ }
+ }
+ }
- # For signatures and extended syntax ...
- # If this brace follows a parenthesized list, we should look back to
- # find the keyword before the opening paren because otherwise we might
- # form a one line block which stays intact, and cause the parenthesized
- # expression to break open. That looks bad.
- if ( $tokens_to_go[$i_start] eq ')' ) {
+ # blank lines before subs except declarations and one-liners
+ elsif ( $leading_type eq 'i' ) {
+ my $special_identifier =
+ $self->[_ris_special_identifier_token_]->{$leading_token};
+ if ($special_identifier) {
+ ## $leading_token =~ /$SUB_PATTERN/
+ if ( $special_identifier eq 'sub' ) {
+
+ $blank_count = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( 0, $max_index_to_go ) !~
+ /^[\;\}\,]$/ );
+ }
- # Find the opening paren
- my $K_start = $K_to_go[$i_start];
- return 0 unless defined($K_start);
- my $seqno = $type_sequence_to_go[$i_start];
- return 0 unless ($seqno);
- my $K_opening = $K_opening_container->{$seqno};
- return 0 unless defined($K_opening);
- my $i_opening = $i_start + ( $K_opening - $K_start );
+ # break before all package declarations
+ ## substr( $leading_token, 0, 8 ) eq 'package '
+ elsif ( $special_identifier eq 'package' ) {
- # give up if not on this line
- return 0 unless ( $i_opening >= 0 );
- $i_start = $i_opening; ##$index_max_forced_break + 1;
+ # ... except in a very short eval block
+ my $pseqno = $parent_seqno_to_go[0];
+ $blank_count = $rOpts->{'blank-lines-before-packages'}
+ if (
+ !$self->[_ris_short_broken_eval_block_]->{$pseqno}
+ );
+ }
+ }
+ }
- # go back one token before the opening paren
- if ( $i_start > 0 ) { $i_start-- }
- if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
- my $lev = $levels_to_go[$i_start];
- if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
- }
- }
+ # Check for blank lines wanted before a closing brace
+ elsif ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[0]
+ && $block_type_to_go[0] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $blank_count ) {
+ $blank_count = $nblanks;
+ }
+ }
+ }
- elsif ( $previous_nonblank_token eq ')' ) {
+ if ($blank_count) {
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
+ # future: send blank line down normal path to VerticalAligner?
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->require_blank_code_lines($blank_count);
+ }
}
- # Patch to avoid breaking short blocks defined with extended_syntax:
- # Strip off any trailing () which was added in the parser to mark
- # the opening keyword. For example, in the following
- # create( TypeFoo $e) {$bubba}
- # the blocktype would be marked as create()
- my $stripped_block_type = $block_type;
- if ( substr( $block_type, -2, 2 ) eq '()' ) {
- $stripped_block_type = substr( $block_type, 0, -2 );
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ if ( $leading_level == $last_line_leading_level
+ && $leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$leading_level] ) )
+ {
+ $nonblank_lines_at_depth[$leading_level]++;
}
- unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
- return 0;
+ else {
+ $nonblank_lines_at_depth[$leading_level] = 1;
}
- }
- # patch for SWITCH/CASE to retain one-line case/when blocks
- elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+ $self->[_last_line_leading_type_] = $leading_type;
+ $self->[_last_line_leading_level_] = $leading_level;
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
- }
- }
+ #--------------------------
+ # scan lists and long lines
+ #--------------------------
- else {
- return 1;
- }
+ # Flag to remember if we called sub 'pad_array_to_go'.
+ # Some routines (break_lists(), break_long_lines() ) need some
+ # extra tokens added at the end of the batch. Most batches do not
+ # use these routines, so we will avoid calling 'pad_array_to_go'
+ # unless it is needed.
+ my $called_pad_array_to_go;
- my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
+ # set all forced breakpoints for good list formatting
+ my $is_long_line;
+ my $multiple_old_lines_in_batch;
+ if ( $max_index_to_go > 0 ) {
+ $is_long_line =
+ $self->excess_line_length( $imin, $max_index_to_go ) > 0;
- my $maximum_line_length =
- $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
+ my $Kbeg = $K_to_go[0];
+ my $Kend = $K_to_go[$max_index_to_go];
+ $multiple_old_lines_in_batch =
+ $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
+ }
- # see if block starting location is too great to even start
- if ( $pos > $maximum_line_length ) {
- return 1;
- }
+ my $rbond_strength_bias = [];
+ if (
+ $is_long_line
+ || $multiple_old_lines_in_batch
- # See if everything to the closing token will fit on one line
- # This is part of an update to fix cases b562 .. b983
- my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
- return 0 unless ( defined($K_closing) );
- my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
+ # must always call break_lists() with unbalanced batches because
+ # it is maintaining some stacks
+ || $is_unbalanced_batch
- my $excess = $pos + 1 + $container_length - $maximum_line_length;
+ # call break_lists if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
- # Add a small tolerance for welded tokens (case b901)
- if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
- $excess += 2;
- }
+ # call break_lists if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
+ )
+ {
+ # add a couple of extra terminal blank tokens
+ $self->pad_array_to_go();
+ $called_pad_array_to_go = 1;
+
+ my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
+ $saw_good_break ||= $sgb;
+ }
+
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
- if ( $excess > 0 ) {
+ #-----------------------------
+ # a single token uses one line
+ #-----------------------------
+ if ( !$max_index_to_go ) {
+ $ri_first = [$imin];
+ $ri_last = [$imax];
+ }
- # line is too long... there is no chance of forming a one line block
- # if the excess is more than 1 char
- return 0 if ( $excess > 1 );
+ # for multiple tokens
+ else {
- # ... and give up if it is not a one-line block on input.
- # note: for a one-line block on input, it may be possible to keep
- # it as a one-line block (by removing a needless semicolon ).
- my $K_start = $K_to_go[$i_start];
- my $ldiff =
- $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
- return 0 if ($ldiff);
- }
+ #-------------------------
+ # write a single line if..
+ #-------------------------
+ if (
+ (
- foreach my $Ki ( $Kj + 1 .. $K_last ) {
+ # this line is 'short'
+ !$is_long_line
- # old whitespace could be arbitrarily large, so don't use it
- if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
- else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
+ # and we didn't see a good breakpoint
+ && !$saw_good_break
- # ignore some small blocks
- my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
- my $nobreak = $rshort_nested->{$type_sequence_i};
+ # and we don't already have an interior breakpoint
+ && !$forced_breakpoint_count
+ )
- # Return false result if we exceed the maximum line length,
- if ( $pos > $maximum_line_length ) {
- return 0;
- }
+ # or, we aren't allowed to add any newlines
+ || !$rOpts_add_newlines
- # keep going for non-containers
- elsif ( !$type_sequence_i ) {
+ )
+ {
+ $ri_first = [$imin];
+ $ri_last = [$imax];
+ }
- }
+ #-----------------------------
+ # otherwise use multiple lines
+ #-----------------------------
+ else {
- # return if we encounter another opening brace before finding the
- # closing brace.
- elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
- && $rLL->[$Ki]->[_TYPE_] eq '{'
- && $rblock_type_of_seqno->{$type_sequence_i}
- && !$nobreak )
- {
- return 0;
- }
+ # add a couple of extra terminal blank tokens if we haven't
+ # already done so
+ $self->pad_array_to_go() unless ($called_pad_array_to_go);
- # if we find our closing brace..
- elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
- && $rLL->[$Ki]->[_TYPE_] eq '}'
- && $rblock_type_of_seqno->{$type_sequence_i}
- && !$nobreak )
- {
+ ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
+ $self->break_long_lines( $saw_good_break, \@colon_list,
+ $rbond_strength_bias );
- # be sure any trailing comment also fits on the line
- my $Ki_nonblank = $Ki;
- if ( $Ki_nonblank < $K_last ) {
- $Ki_nonblank++;
- if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
- && $Ki_nonblank < $K_last )
- {
- $Ki_nonblank++;
- }
- }
+ $self->break_all_chain_tokens( $ri_first, $ri_last );
- # Patch for one-line sort/map/grep/eval blocks with side comments:
- # We will ignore the side comment length for sort/map/grep/eval
- # because this can lead to statements which change every time
- # perltidy is run. Here is an example from Denis Moskowitz which
- # oscillates between these two states without this patch:
+ $self->break_equals( $ri_first, $ri_last )
+ if @{$ri_first} >= 3;
-## --------
-## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-##
-## grep {
-## $_->foo ne 'bar'
-## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-## --------
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ $self->recombine_breakpoints( $ri_first, $ri_last,
+ $rbond_strength_to_go )
+ if ( $rOpts_recombine && @{$ri_first} > 1 );
- # When the first line is input it gets broken apart by the main
- # line break logic in sub process_line_of_CODE.
- # When the second line is input it gets recombined by
- # process_line_of_CODE and passed to the output routines. The
- # output routines (break_long_lines) do not break it apart
- # because the bond strengths are set to the highest possible value
- # for grep/map/eval/sort blocks, so the first version gets output.
- # It would be possible to fix this by changing bond strengths,
- # but they are high to prevent errors in older versions of perl.
- # See c100 for eval test.
- if ( $Ki < $K_last
- && $rLL->[$K_last]->[_TYPE_] eq '#'
- && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
- && !$rOpts_ignore_side_comment_lengths
- && !$is_sort_map_grep_eval{$block_type}
- && $K_last - $Ki_nonblank <= 2 )
- {
- # Only include the side comment for if/else/elsif/unless if it
- # immediately follows (because the current '$rbrace_follower'
- # logic for these will give an immediate brake after these
- # closing braces). So for example a line like this
- # if (...) { ... } ; # very long comment......
- # will already break like this:
- # if (...) { ... }
- # ; # very long comment......
- # so we do not need to include the length of the comment, which
- # would break the block. Project 'bioperl' has coding like this.
- ## !~ /^(if|else|elsif|unless)$/
- if ( !$is_if_unless_elsif_else{$block_type}
- || $K_last == $Ki_nonblank )
- {
- $Ki_nonblank = $K_last;
- $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
+ $self->insert_final_ternary_breaks( $ri_first, $ri_last )
+ if (@colon_list);
+ }
- if ( $Ki_nonblank > $Ki + 1 ) {
+ $self->insert_breaks_before_list_opening_containers( $ri_first,
+ $ri_last )
+ if ( %break_before_container_types && $max_index_to_go > 0 );
- # source whitespace could be anything, assume
- # at least one space before the hash on output
- if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
- $pos += 1;
- }
- else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
- }
+ # Check for a phantom semicolon at the end of the batch
+ if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
+ $self->unmask_phantom_token($imax);
+ }
- if ( $pos >= $maximum_line_length ) {
- return 0;
- }
- }
+ if ( $rOpts_one_line_block_semicolons == 0 ) {
+ $self->delete_one_line_semicolons( $ri_first, $ri_last );
}
- # ok, it's a one-line block
- create_one_line_block( $i_start, 20 );
- return 0;
+ # Remember the largest batch size processed. This is needed by the
+ # logical padding routine to avoid padding the first nonblank token
+ if ( $max_index_to_go > $peak_batch_size ) {
+ $peak_batch_size = $max_index_to_go;
+ }
}
- # just keep going for other characters
- else {
+ #-------------------
+ # -lp corrector step
+ #-------------------
+ if ($rOpts_line_up_parentheses) {
+ $self->correct_lp_indentation( $ri_first, $ri_last );
}
- }
- # We haven't hit the closing brace, but there is still space. So the
- # question here is, should we keep going to look at more lines in hopes of
- # forming a new one-line block, or should we stop right now. The problem
- # with continuing is that we will not be able to honor breaks before the
- # opening brace if we continue.
+ #--------------------
+ # ship this batch out
+ #--------------------
+ $this_batch->[_ri_first_] = $ri_first;
+ $this_batch->[_ri_last_] = $ri_last;
- # Typically we will want to keep trying to make one-line blocks for things
- # like sort/map/grep/eval. But it is not always a good idea to make as
- # many one-line blocks as possible, so other types are not done. The user
- # can always use -mangle.
+ $self->convey_batch_to_vertical_aligner();
- # If we want to keep going, we will create a new one-line block.
- # The blocks which we can keep going are in a hash, but we never want
- # to continue if we are at a '-bli' block.
- if ( $want_one_line_block{$block_type} && !$is_bli ) {
- create_one_line_block( $i_start, 1 );
- }
- return 0;
-} ## end sub starting_one_line_block
+ #-------------------------------------------------------------------
+ # Write requested number of blank lines after an opening block brace
+ #-------------------------------------------------------------------
+ if ($rOpts_blank_lines_after_opening_block) {
+ my $iterm = $imax;
+ if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
+ $iterm -= 1;
+ if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
+ $iterm -= 1;
+ }
+ }
-sub unstore_token_to_go {
+ if ( $types_to_go[$iterm] eq '{'
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts_blank_lines_after_opening_block;
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
+ }
- # remove most recent token from output stream
- my $self = shift;
- if ( $max_index_to_go > 0 ) {
- $max_index_to_go--;
- }
- else {
- $max_index_to_go = UNDEFINED_INDEX;
+ return;
+ } ## end sub grind_batch_of_CODE
+
+ sub iprev_to_go {
+ my ($i) = @_;
+ return $i - 1 > 0
+ && $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1;
}
- return;
-} ## end sub unstore_token_to_go
-sub compare_indentation_levels {
+ sub unmask_phantom_token {
+ my ( $self, $iend ) = @_;
- # Check to see if output line tabbing agrees with input line
- # this can be very useful for debugging a script which has an extra
- # or missing brace.
+ # Turn a phantom token into a real token.
- my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
- return unless ( defined($K_first) );
+ # Input parameter:
+ # $iend = the index in the output batch array of this token.
- my $rLL = $self->[_rLL_];
+ # Phantom tokens are specially marked token types (such as ';') with
+ # no token text which only become real tokens if they occur at the end
+ # of an output line. At one time phantom ',' tokens were handled
+ # here, but now they are processed elsewhere.
- my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
- my $radjusted_levels = $self->[_radjusted_levels_];
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
- $structural_indentation_level = $radjusted_levels->[$K_first];
- }
+ my $rLL = $self->[_rLL_];
+ my $KK = $K_to_go[$iend];
+ my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
- # record max structural depth for log file
- if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
- $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
- $self->[_maximum_BLOCK_level_at_line_] = $line_number;
- }
+ my $type = $types_to_go[$iend];
+ return unless ( $type eq ';' );
+ my $tok = $type;
+ my $tok_len = length($tok);
+ if ( $want_left_space{$type} != WS_NO ) {
+ $tok = SPACE . $tok;
+ $tok_len += 1;
+ }
- my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
- my $is_closing_block =
- $type_sequence
- && $self->[_rblock_type_of_seqno_]->{$type_sequence}
- && $rLL->[$K_first]->[_TYPE_] eq '}';
+ $tokens_to_go[$iend] = $tok;
+ $token_lengths_to_go[$iend] = $tok_len;
- if ( $guessed_indentation_level ne $structural_indentation_level ) {
- $self->[_last_tabbing_disagreement_] = $line_number;
+ $rLL->[$KK]->[_TOKEN_] = $tok;
+ $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
- if ($is_closing_block) {
+ $self->note_added_semicolon($line_number);
- if ( !$self->[_in_brace_tabbing_disagreement_] ) {
- $self->[_in_brace_tabbing_disagreement_] = $line_number;
- }
- if ( !$self->[_first_brace_tabbing_disagreement_] ) {
- $self->[_first_brace_tabbing_disagreement_] = $line_number;
- }
+ # This changes the summed lengths of the rest of this batch
+ foreach ( $iend .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
}
+ return;
+ } ## end sub unmask_phantom_token
- if ( !$self->[_in_tabbing_disagreement_] ) {
- $self->[_tabbing_disagreement_count_]++;
+ sub save_opening_indentation {
- if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
- );
+ # This should be called after each batch of tokens is output. It
+ # saves indentations of lines of all unmatched opening tokens.
+ # These will be used by sub get_opening_indentation.
+
+ my ( $self, $ri_first, $ri_last, $rindentation_list,
+ $runmatched_opening_indexes )
+ = @_;
+
+ $runmatched_opening_indexes = []
+ if ( !defined($runmatched_opening_indexes) );
+
+ # QW INDENTATION PATCH 1:
+ # Also save indentation for multiline qw quotes
+ my @i_qw;
+ my $seqno_qw_opening;
+ if ( $types_to_go[$max_index_to_go] eq 'q' ) {
+ my $KK = $K_to_go[$max_index_to_go];
+ $seqno_qw_opening =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
+ if ($seqno_qw_opening) {
+ push @i_qw, $max_index_to_go;
}
- $self->[_in_tabbing_disagreement_] = $line_number;
- $self->[_first_tabbing_disagreement_] = $line_number
- unless ( $self->[_first_tabbing_disagreement_] );
}
- }
- else {
-
- $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
- my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
- if ($in_tabbing_disagreement) {
+ # we need to save indentations of any unmatched opening tokens
+ # in this batch because we may need them in a subsequent batch.
+ foreach ( @{$runmatched_opening_indexes}, @i_qw ) {
- if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"End indentation disagreement from input line $in_tabbing_disagreement\n"
- );
+ my $seqno = $type_sequence_to_go[$_];
- if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
- {
- write_logfile_entry(
- "No further tabbing disagreements will be noted\n");
+ if ( !$seqno ) {
+ if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
+ $seqno = $seqno_qw_opening;
+ }
+ else {
+
+ # shouldn't happen
+ $seqno = 'UNKNOWN';
+ DEVEL_MODE && Fault("unable to find sequence number\n");
}
}
- $self->[_in_tabbing_disagreement_] = 0;
+ $saved_opening_indentation{$seqno} = [
+ lookup_opening_indentation(
+ $_, $ri_first, $ri_last, $rindentation_list
+ )
+ ];
}
- }
- return;
-} ## end sub compare_indentation_levels
+ return;
+ } ## end sub save_opening_indentation
-###################################################
-# CODE SECTION 8: Utilities for setting breakpoints
-###################################################
+ sub get_saved_opening_indentation {
+ my ($seqno) = @_;
+ my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
-{ ## begin closure set_forced_breakpoint
+ if ($seqno) {
+ if ( $saved_opening_indentation{$seqno} ) {
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
+ $exists = 1;
+ }
+ }
- my @forced_breakpoint_undo_stack;
+ # some kind of serious error it doesn't exist
+ # (example is badfile.t)
- # These are global vars for efficiency:
- # my $forced_breakpoint_count;
- # my $forced_breakpoint_undo_count;
- # my $index_max_forced_break;
+ return ( $indent, $offset, $is_leading, $exists );
+ } ## end sub get_saved_opening_indentation
+} ## end closure grind_batch_of_CODE
- # Break before or after certain tokens based on user settings
- my %break_before_or_after_token;
+sub lookup_opening_indentation {
- BEGIN {
+ # get the indentation of the line in the current output batch
+ # which output a selected opening token
+ #
+ # given:
+ # $i_opening - index of an opening token in the current output batch
+ # whose line indentation we need
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line. (NOTE: the first slot in
+ # this list is the last returned line number, and this is
+ # followed by the list of indentations).
+ #
+ # return
+ # -the indentation of the line which contained token $i_opening
+ # -and its offset (number of columns) from the start of the line
- # Updated to use all operators. This fixes case b1054
- # Here is the previous simplified version:
- ## my @q = qw( . : ? and or xor && || );
- my @q = @all_operators;
+ my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
- push @q, ',';
- @break_before_or_after_token{@q} = (1) x scalar(@q);
- }
+ if ( !@{$ri_last} ) {
- # This is no longer called - global vars - moved into initialize_batch_vars
- sub initialize_forced_breakpoint_vars {
- $forced_breakpoint_count = 0;
- $index_max_forced_break = UNDEFINED_INDEX;
- $forced_breakpoint_undo_count = 0;
- ##@forced_breakpoint_undo_stack = (); # not needed
- return;
+ # An error here implies a bug introduced by a recent program change.
+ # Every batch of code has lines, so this should never happen.
+ if (DEVEL_MODE) {
+ Fault("Error in opening_indentation: no lines");
+ }
+ return ( 0, 0, 0 );
}
- sub set_fake_breakpoint {
+ my $nline = $rindentation_list->[0]; # line number of previous lookup
- # Just bump up the breakpoint count as a signal that there are breaks.
- # This is useful if we have breaks but may want to postpone deciding
- # where to make them.
- $forced_breakpoint_count++;
- return;
+ # reset line location if necessary
+ $nline = 0 if ( $i_opening < $ri_start->[$nline] );
+
+ # find the correct line
+ unless ( $i_opening > $ri_last->[-1] ) {
+ while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
}
- use constant DEBUG_FORCE => 0;
+ # Error - token index is out of bounds - shouldn't happen
+ # A program bug has been introduced in one of the calling routines.
+ # We better stop here.
+ else {
+ my $i_last_line = $ri_last->[-1];
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program bug in call to lookup_opening_indentation - index out of range
+ called with index i_opening=$i_opening > $i_last_line = max index of last line
+This batch has max index = $max_index_to_go,
+EOM
+ }
+ $nline = $#{$ri_last};
+ }
- sub set_forced_breakpoint {
- my ( $self, $i ) = @_;
+ $rindentation_list->[0] =
+ $nline; # save line number to start looking next call
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
+} ## end sub lookup_opening_indentation
- # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
+sub terminal_type_i {
- # Exceptions:
- # - If the token at index $i is a blank, backup to $i-1 to
- # get to the previous nonblank token.
- # - For certain tokens, the break may be placed BEFORE the token
- # at index $i, depending on user break preference settings.
- # - If a break is made after an opening token, then a break will
- # also be made before the corresponding closing token.
+ # returns type of last token on this line (terminal token), as follows:
+ # returns # for a full-line comment
+ # returns ' ' for a blank line
+ # otherwise returns final token type
- # Returns '$i_nonblank':
- # = index of the token after which the breakpoint was actually placed
- # = undef if breakpoint was not set.
- my $i_nonblank;
+ my ( $ibeg, $iend ) = @_;
- if ( !defined($i) || $i < 0 ) {
+ # Start at the end and work backwards
+ my $i = $iend;
+ my $type_i = $types_to_go[$i];
- # Calls with bad index $i are harmless but waste time and should
- # be caught and eliminated during code development.
- if (DEVEL_MODE) {
- my ( $a, $b, $c ) = caller();
- Fault(
-"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
- );
- }
- return;
+ # Check for side comment
+ if ( $type_i eq '#' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
+ $type_i = $types_to_go[$i];
+ }
- # Break after token $i
- $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
-
- # If we break at an opening container..break at the closing
- my $set_closing;
- if ( defined($i_nonblank)
- && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
- {
- $set_closing = 1;
- $self->set_closing_breakpoint($i_nonblank);
+ # Skip past a blank
+ if ( $type_i eq 'b' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
+ $type_i = $types_to_go[$i];
+ }
- DEBUG_FORCE && do {
- my ( $a, $b, $c ) = caller();
- my $msg =
-"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
- if ( !defined($i_nonblank) ) {
- $i = EMPTY_STRING unless defined($i);
- $msg .= " but could not set break after i='$i'\n";
- }
- else {
- $msg .= <<EOM;
-set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
-EOM
- if ( defined($set_closing) ) {
- $msg .=
-" Also set closing breakpoint corresponding to this token\n";
- }
- }
- print STDOUT $msg;
- };
-
- return $i_nonblank;
- } ## end sub set_forced_breakpoint
+ # Found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/map/grep/eval/do because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $block_type_to_go[$i];
+ if (
+ $type_i eq '}'
+ && ( !$block_type
+ || $is_sort_map_grep_eval_do{$block_type} )
+ )
+ {
+ $type_i = 'b';
+ }
+ return wantarray ? ( $type_i, $i ) : $type_i;
+} ## end sub terminal_type_i
- sub set_forced_breakpoint_AFTER {
- my ( $self, $i ) = @_;
+sub pad_array_to_go {
- # This routine is only called by sub set_forced_breakpoint and
- # sub set_closing_breakpoint.
+ # To simplify coding in break_lists and set_bond_strengths, it helps to
+ # create some extra blank tokens at the end of the arrays. We also add
+ # some undef's to help guard against using invalid data.
+ my ($self) = @_;
+ $K_to_go[ $max_index_to_go + 1 ] = undef;
+ $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
+ $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
+ $tokens_to_go[ $max_index_to_go + 3 ] = undef;
+ $types_to_go[ $max_index_to_go + 1 ] = 'b';
+ $types_to_go[ $max_index_to_go + 2 ] = 'b';
+ $types_to_go[ $max_index_to_go + 3 ] = undef;
+ $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] =
+ $nesting_depth_to_go[$max_index_to_go];
- # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
+ # /^[R\}\)\]]$/
+ if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
+ if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
- # Exceptions:
- # - If the token at index $i is a blank, backup to $i-1 to
- # get to the previous nonblank token.
- # - For certain tokens, the break may be placed BEFORE the token
- # at index $i, depending on user break preference settings.
+ # Nesting depths are set to be >=0 in sub write_line, so it should
+ # not be possible to get here unless the code has a bracing error
+ # which leaves a closing brace with zero nesting depth.
+ unless ( get_saw_brace_error() ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program bug in pad_array_to_go: hit nesting error which should have been caught
+EOM
+ }
+ }
+ }
+ else {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
+ }
+ }
- # Returns:
- # - the index of the token after which the break was set, or
- # - undef if no break was set
+ # /^[L\{\(\[]$/
+ elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
+ }
+ return;
+} ## end sub pad_array_to_go
- return unless ( defined($i) && $i >= 0 );
+sub break_all_chain_tokens {
- # Back up at a blank so we have a token to examine.
- # This was added to fix for cases like b932 involving an '=' break.
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
+ # scan the current breakpoints looking for breaks at certain "chain
+ # operators" (. : && || + etc) which often occur repeatedly in a long
+ # statement. If we see a break at any one, break at all similar tokens
+ # within the same container.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
- # Never break between welded tokens
- return
- if ( $total_weld_count
- && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
+ my %saw_chain_type;
+ my %left_chain_type;
+ my %right_chain_type;
+ my %interior_chain_type;
+ my $nmax = @{$ri_right} - 1;
- my $token = $tokens_to_go[$i];
- my $type = $types_to_go[$i];
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ $typel = '+' if ( $typel eq '-' ); # treat + and - the same
+ $typer = '+' if ( $typer eq '-' );
+ $typel = '*' if ( $typel eq '/' ); # treat * and / the same
+ $typer = '*' if ( $typer eq '/' );
- # For certain tokens, use user settings to decide if we break before or
- # after it
- if ( $break_before_or_after_token{$token}
- && ( $type eq $token || $type eq 'k' ) )
- {
- if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
+ my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
+ my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
+ if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
+ next if ( $typel eq '?' );
+ push @{ $left_chain_type{$keyl} }, $il;
+ $saw_chain_type{$keyl} = 1;
+ $count++;
}
+ if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
+ next if ( $typer eq '?' );
+ push @{ $right_chain_type{$keyr} }, $ir;
+ $saw_chain_type{$keyr} = 1;
+ $count++;
+ }
+ }
+ return unless $count;
- # breaks are forced before 'if' and 'unless'
- elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
-
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
-
- if ( $i_nonblank >= 0
- && $nobreak_to_go[$i_nonblank] == 0
- && !$forced_breakpoint_to_go[$i_nonblank] )
- {
- $forced_breakpoint_to_go[$i_nonblank] = 1;
-
- if ( $i_nonblank > $index_max_forced_break ) {
- $index_max_forced_break = $i_nonblank;
- }
- $forced_breakpoint_count++;
- $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
- = $i_nonblank;
-
- # success
- return $i_nonblank;
+ # now look for any interior tokens of the same types
+ $count = 0;
+ my $has_interior_dot_or_plus;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir - 1 ) {
+ my $type = $types_to_go[$i];
+ my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
+ $key = '+' if ( $key eq '-' );
+ $key = '*' if ( $key eq '/' );
+ if ( $saw_chain_type{$key} ) {
+ push @{ $interior_chain_type{$key} }, $i;
+ $count++;
+ $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
}
}
- return;
- } ## end sub set_forced_breakpoint_AFTER
-
- sub clear_breakpoint_undo_stack {
- my ($self) = @_;
- $forced_breakpoint_undo_count = 0;
- return;
}
+ return unless $count;
- use constant DEBUG_UNDOBP => 0;
-
- sub undo_forced_breakpoint_stack {
-
- my ( $self, $i_start ) = @_;
+ my @keys = keys %saw_chain_type;
- # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
- # remove all breakpoints from the top of the 'undo stack' down to and
- # including index $i_start.
+ # quit if just ONE continuation line with leading . For example--
+ # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
+ # . $contents;
+ # Fixed for b1399.
+ if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
+ return;
+ }
- # The 'undo stack' is a stack of all breakpoints made for a batch of
- # code.
+ # now make a list of all new break points
+ my @insert_list;
- if ( $i_start < 0 ) {
- $i_start = 0;
- my ( $a, $b, $c ) = caller();
+ # loop over all chain types
+ foreach my $key (@keys) {
- # Bad call, can only be due to a recent programming change.
- Fault(
-"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
- ) if (DEVEL_MODE);
- return;
- }
+ # loop over all interior chain tokens
+ foreach my $itest ( @{ $interior_chain_type{$key} } ) {
- while ( $forced_breakpoint_undo_count > $i_start ) {
- my $i =
- $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- $forced_breakpoint_to_go[$i] = 0;
- $forced_breakpoint_count--;
+ # loop over all left end tokens of same type
+ if ( $left_chain_type{$key} ) {
+ next if $nobreak_to_go[ $itest - 1 ];
+ foreach my $i ( @{ $left_chain_type{$key} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest - 1;
- DEBUG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
- };
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $key eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( defined($i_question) && $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
+ last;
+ }
}
- # shouldn't happen, but not a critical error
- else {
- DEBUG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
- };
+ # loop over all right end tokens of same type
+ if ( $right_chain_type{$key} ) {
+ next if $nobreak_to_go[$itest];
+ foreach my $i ( @{ $right_chain_type{$key} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest;
+
+ # break at matching ? if this : is at a different level
+ if ( $key eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( defined($i_question) ) {
+ push @insert_list, $i_question;
+ }
+ }
+ last;
+ }
}
}
- return;
- } ## end sub undo_forced_breakpoint_stack
-} ## end closure set_forced_breakpoint
-
-{ ## begin closure set_closing_breakpoint
-
- my %postponed_breakpoint;
-
- sub initialize_postponed_breakpoint {
- %postponed_breakpoint = ();
- return;
}
- sub has_postponed_breakpoint {
- my ($seqno) = @_;
- return $postponed_breakpoint{$seqno};
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
+ return;
+} ## end sub break_all_chain_tokens
- sub set_closing_breakpoint {
-
- # set a breakpoint at a matching closing token
- my ( $self, $i_break ) = @_;
+sub insert_additional_breaks {
- if ( $mate_index_to_go[$i_break] >= 0 ) {
+ # this routine will add line breaks at requested locations after
+ # sub break_long_lines has made preliminary breaks.
- # Don't reduce the '2' in the statement below.
- # Test files: attrib.t, BasicLyx.pm.html
- if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
+ my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
+ my $i_f;
+ my $i_l;
+ my $line_number = 0;
+ foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
- # break before } ] and ), but sub set_forced_breakpoint will decide
- # to break before or after a ? and :
- my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
- $self->set_forced_breakpoint_AFTER(
- $mate_index_to_go[$i_break] - $inc );
- }
- }
- else {
- my $type_sequence = $type_sequence_to_go[$i_break];
- if ($type_sequence) {
- my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
- $postponed_breakpoint{$type_sequence} = 1;
- }
- }
- return;
- } ## end sub set_closing_breakpoint
-} ## end closure set_closing_breakpoint
+ next if ( $nobreak_to_go[$i_break_left] );
-#########################################
-# CODE SECTION 9: Process batches of code
-#########################################
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ while ( $i_break_left >= $i_l ) {
+ $line_number++;
-{ ## begin closure grind_batch_of_CODE
+ # shouldn't happen unless caller passes bad indexes
+ if ( $line_number >= @{$ri_last} ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Non-fatal program bug: couldn't set break at $i_break_left
+EOM
+ }
+ return;
+ }
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ }
- # The routines in this closure begin the processing of a 'batch' of code.
+ # Do not leave a blank at the end of a line; back up if necessary
+ if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
- # A variable to keep track of consecutive nonblank lines so that we can
- # insert occasional blanks
- my @nonblank_lines_at_depth;
+ my $i_break_right = $inext_to_go[$i_break_left];
+ if ( $i_break_left >= $i_f
+ && $i_break_left < $i_l
+ && $i_break_right > $i_f
+ && $i_break_right <= $i_l )
+ {
+ splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
+ splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+ }
+ }
+ return;
+} ## end sub insert_additional_breaks
- # A variable to remember maximum size of previous batches; this is needed
- # by the logical padding routine
- my $peak_batch_size;
- my $batch_count;
+{ ## begin closure in_same_container_i
+ my $ris_break_token;
+ my $ris_comma_token;
- # variables to keep track of unbalanced containers.
- my %saved_opening_indentation;
- my @unmatched_opening_indexes_in_this_batch;
+ BEGIN {
- sub initialize_grind_batch_of_CODE {
- @nonblank_lines_at_depth = ();
- $peak_batch_size = 0;
- $batch_count = 0;
- %saved_opening_indentation = ();
- return;
- }
+ # all cases break on seeing commas at same level
+ my @q = qw( => );
+ push @q, ',';
+ @{$ris_comma_token}{@q} = (1) x scalar(@q);
- # sub grind_batch_of_CODE receives sections of code which are the longest
- # possible lines without a break. In other words, it receives what is left
- # after applying all breaks forced by blank lines, block comments, side
- # comments, pod text, and structural braces. Its job is to break this code
- # down into smaller pieces, if necessary, which fit within the maximum
- # allowed line length. Then it sends the resulting lines of code on down
- # the pipeline to the VerticalAligner package, breaking the code into
- # continuation lines as necessary. The batch of tokens are in the "to_go"
- # arrays. The name 'grind' is slightly suggestive of a machine continually
- # breaking down long lines of code, but mainly it is unique and easy to
- # remember and find with an editor search.
+ # Non-ternary text also breaks on seeing any of qw(? : || or )
+ # Example: we would not want to break at any of these .'s
+ # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
+ push @q, qw( or || ? : );
+ @{$ris_break_token}{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
- # together in the following way:
+ sub in_same_container_i {
- # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
- # combines them into the largest sequences of tokens which might form a new
- # line.
- # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
- # lines.
+ # Check to see if tokens at i1 and i2 are in the same container, and
+ # not separated by certain characters: => , ? : || or
+ # This is an interface between the _to_go arrays to the rLL array
+ my ( $self, $i1, $i2 ) = @_;
- # So sub 'process_line_of_CODE' builds up the longest possible continuous
- # sequences of tokens, regardless of line length, and then
- # grind_batch_of_CODE breaks these sequences back down into the new output
- # lines.
+ # quick check
+ my $parent_seqno_1 = $parent_seqno_to_go[$i1];
+ return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
- # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
+ if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
+ my $K1 = $K_to_go[$i1];
+ my $K2 = $K_to_go[$i2];
+ my $rLL = $self->[_rLL_];
- use constant DEBUG_GRIND => 0;
+ my $depth_1 = $nesting_depth_to_go[$i1];
+ return if ( $depth_1 < 0 );
- sub check_grind_input {
+ # Shouldn't happen since i1 and i2 have same parent:
+ return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
- # Check for valid input to sub grind_batch_of_CODE. An error here
- # would most likely be due to an error in 'sub store_token_to_go'.
- my ($self) = @_;
+ # Select character set to scan for
+ my $type_1 = $types_to_go[$i1];
+ my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
- # Be sure there are tokens in the batch
- if ( $max_index_to_go < 0 ) {
- Fault(<<EOM);
-sub grind incorrectly called with max_index_to_go=$max_index_to_go
-EOM
+ # Fast preliminary loop to verify that tokens are in the same container
+ my $KK = $K1;
+ while (1) {
+ $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ last if !defined($KK);
+ last if ( $KK >= $K2 );
+ my $ii = $i1 + $KK - $K1;
+ my $depth_i = $nesting_depth_to_go[$ii];
+ return if ( $depth_i < $depth_1 );
+ next if ( $depth_i > $depth_1 );
+ if ( $type_1 ne ':' ) {
+ my $tok_i = $tokens_to_go[$ii];
+ return if ( $tok_i eq '?' || $tok_i eq ':' );
+ }
}
- my $Klimit = $self->[_Klimit_];
- # The local batch tokens must be a continuous part of the global token
- # array.
- my $KK;
- foreach my $ii ( 0 .. $max_index_to_go ) {
+ # Slow loop checking for certain characters
- my $Km = $KK;
+ #-----------------------------------------------------
+ # This is potentially a slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ #-----------------------------------------------------
+ return if ( $i2 - $i1 > 200 );
- $KK = $K_to_go[$ii];
- if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
- $KK = '(undef)' unless defined($KK);
- Fault(<<EOM);
-at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
-EOM
- }
+ foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
- if ( $ii > 0 && $KK != $Km + 1 ) {
- my $im = $ii - 1;
- Fault(<<EOM);
-Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
-EOM
- }
+ my $depth_i = $nesting_depth_to_go[$ii];
+ next if ( $depth_i > $depth_1 );
+ return if ( $depth_i < $depth_1 );
+ my $tok_i = $tokens_to_go[$ii];
+ return if ( $rbreak->{$tok_i} );
}
- return;
- } ## end sub check_grind_input
+ return 1;
+ } ## end sub in_same_container_i
+} ## end closure in_same_container_i
- sub grind_batch_of_CODE {
+sub break_equals {
- my ($self) = @_;
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 2 );
- my $this_batch = $self->[_this_batch_];
- $batch_count++;
+ # scan the left ends of first two lines
+ my $tokbeg = EMPTY_STRING;
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
+ my $keyl = $typel eq 'k' ? $tokenl : $typel;
- $self->check_grind_input() if (DEVEL_MODE);
+ my $has_leading_op = $is_chain_operator{$keyl};
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
- # This routine is only called from sub flush_batch_of_code, so that
- # routine is a better spot for debugging.
- DEBUG_GRIND && do {
- my $token = my $type = EMPTY_STRING;
- if ( $max_index_to_go >= 0 ) {
- $token = $tokens_to_go[$max_index_to_go];
- $type = $types_to_go[$max_index_to_go];
- }
- my $output_str = EMPTY_STRING;
- if ( $max_index_to_go > 20 ) {
- my $mm = $max_index_to_go - 10;
- $output_str =
- join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
- . join( EMPTY_STRING,
- @tokens_to_go[ $mm .. $max_index_to_go ] );
+ # now look for any interior tokens of the same types
+ my $il = $ri_left->[0];
+ my $ir = $ri_right->[0];
+
+ # now make a list of all new break points
+ my @insert_list;
+ foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
}
else {
- $output_str = join EMPTY_STRING,
- @tokens_to_go[ 0 .. $max_index_to_go ];
+ push @insert_list, $i;
}
- print STDERR <<EOM;
-grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
-$output_str
-EOM
- };
-
- return if ( $max_index_to_go < 0 );
-
- $self->set_lp_indentation()
- if ($rOpts_line_up_parentheses);
-
- #----------------------------
- # Shortcut for block comments
- #----------------------------
- if (
- $max_index_to_go == 0
- && $types_to_go[0] eq '#'
+ }
+ }
- # this shortcut does not work for -lp yet
- && !$rOpts_line_up_parentheses
- )
- {
- my $ibeg = 0;
- $this_batch->[_ri_first_] = [$ibeg];
- $this_batch->[_ri_last_] = [$ibeg];
- $this_batch->[_peak_batch_size_] = $peak_batch_size;
- $this_batch->[_do_not_pad_] = 0;
- $this_batch->[_batch_count_] = $batch_count;
- $this_batch->[_rix_seqno_controlling_ci_] = [];
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
- $self->convey_batch_to_vertical_aligner();
+ return unless (@insert_list);
- my $level = $levels_to_go[$ibeg];
- $self->[_last_last_line_leading_level_] =
- $self->[_last_line_leading_level_];
- $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
- $self->[_last_line_leading_level_] = $level;
- $nonblank_lines_at_depth[$level] = 1;
- return;
+ # One final check...
+ # scan second and third lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il_n = $ri_left->[$n];
+ my $ir_n = $ri_right->[$n];
+ foreach my $i ( $il_n + 1 .. $ir_n ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
}
+ }
- #-------------
- # Normal route
- #-------------
-
- my $rLL = $self->[_rLL_];
- my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
- my $rwant_container_open = $self->[_rwant_container_open_];
-
- #-------------------------------------------------------
- # Loop over the batch to initialize some batch variables
- #-------------------------------------------------------
- my $comma_count_in_batch = 0;
- my $ilast_nonblank = -1;
- my @colon_list;
- my @ix_seqno_controlling_ci;
- my %comma_arrow_count;
- my $comma_arrow_count_contained = 0;
- my @unmatched_closing_indexes_in_this_batch;
+ # ok, insert any new break point
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+} ## end sub break_equals
- @unmatched_opening_indexes_in_this_batch = ();
+{ ## begin closure recombine_breakpoints
- foreach my $i ( 0 .. $max_index_to_go ) {
- $iprev_to_go[$i] = $ilast_nonblank;
- $inext_to_go[$i] = $i + 1;
+ # This routine is called once per batch to see if it would be better
+ # to combine some of the lines into which the batch has been broken.
- my $type = $types_to_go[$i];
- if ( $type ne 'b' ) {
- if ( $ilast_nonblank >= 0 ) {
- $inext_to_go[$ilast_nonblank] = $i;
+ my %is_amp_amp;
+ my %is_math_op;
+ my %is_plus_minus;
+ my %is_mult_div;
- # just in case there are two blanks in a row (shouldn't
- # happen)
- if ( ++$ilast_nonblank < $i ) {
- $inext_to_go[$ilast_nonblank] = $i;
- }
- }
- $ilast_nonblank = $i;
+ BEGIN {
- # This is a good spot to efficiently collect information needed
- # for breaking lines...
+ my @q;
+ @q = qw( && || );
+ @is_amp_amp{@q} = (1) x scalar(@q);
- # gather info needed by sub break_long_lines
- if ( $type_sequence_to_go[$i] ) {
- my $seqno = $type_sequence_to_go[$i];
- my $token = $tokens_to_go[$i];
+ @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
- # remember indexes of any tokens controlling xci
- # in this batch. This list is needed by sub undo_ci.
- if ( $ris_seqno_controlling_ci->{$seqno} ) {
- push @ix_seqno_controlling_ci, $i;
- }
+ @q = qw( + - );
+ @is_plus_minus{@q} = (1) x scalar(@q);
- if ( $is_opening_sequence_token{$token} ) {
- if ( $rwant_container_open->{$seqno} ) {
- $self->set_forced_breakpoint($i);
- }
- push @unmatched_opening_indexes_in_this_batch, $i;
- if ( $type eq '?' ) {
- push @colon_list, $type;
- }
- }
- elsif ( $is_closing_sequence_token{$token} ) {
+ @q = qw( * / );
+ @is_mult_div{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- if ( $i > 0 && $rwant_container_open->{$seqno} ) {
- $self->set_forced_breakpoint( $i - 1 );
- }
+ sub Debug_dump_breakpoints {
- my $i_mate =
- pop @unmatched_opening_indexes_in_this_batch;
- if ( defined($i_mate) && $i_mate >= 0 ) {
- if ( $type_sequence_to_go[$i_mate] ==
- $type_sequence_to_go[$i] )
- {
- $mate_index_to_go[$i] = $i_mate;
- $mate_index_to_go[$i_mate] = $i;
- if ( $comma_arrow_count{$seqno} ) {
- $comma_arrow_count_contained +=
- $comma_arrow_count{$seqno};
- }
- }
- else {
- push @unmatched_opening_indexes_in_this_batch,
- $i_mate;
- push @unmatched_closing_indexes_in_this_batch,
- $i;
- }
- }
- else {
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- if ( $type eq ':' ) {
- push @colon_list, $type;
- }
- } ## end elsif ( $is_closing_sequence_token...)
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $ri_beg->[$n];
+ my $iend = $ri_end->[$n];
+ my $text = EMPTY_STRING;
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
+ }
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ return;
+ } ## end sub Debug_dump_breakpoints
- } ## end if ($seqno)
+ sub delete_one_line_semicolons {
- elsif ( $type eq ',' ) { $comma_count_in_batch++; }
- elsif ( $tokens_to_go[$i] eq '=>' ) {
- if (@unmatched_opening_indexes_in_this_batch) {
- my $j = $unmatched_opening_indexes_in_this_batch[-1];
- my $seqno = $type_sequence_to_go[$j];
- $comma_arrow_count{$seqno}++;
- }
- }
- } ## end if ( $type ne 'b' )
- } ## end for ( my $i = 0 ; $i <=...)
+ my ( $self, $ri_beg, $ri_end ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
- my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
- @unmatched_closing_indexes_in_this_batch;
+ # Walk down the lines of this batch and delete any semicolons
+ # terminating one-line blocks;
+ my $nmax = @{$ri_end} - 1;
- #------------------------
- # Set special breakpoints
- #------------------------
- # If this line ends in a code block brace, set breaks at any
- # previous closing code block braces to breakup a chain of code
- # blocks on one line. This is very rare but can happen for
- # user-defined subs. For example we might be looking at this:
- # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
- my $saw_good_break = 0; # flag to force breaks even if short line
- if (
+ foreach my $n ( 0 .. $nmax ) {
+ my $i_beg = $ri_beg->[$n];
+ my $i_e = $ri_end->[$n];
+ my $K_beg = $K_to_go[$i_beg];
+ my $K_e = $K_to_go[$i_e];
+ my $K_end = $K_e;
+ my $type_end = $rLL->[$K_end]->[_TYPE_];
+ if ( $type_end eq '#' ) {
+ $K_end = $self->K_previous_nonblank($K_end);
+ if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
+ }
- # looking for opening or closing block brace
- $block_type_to_go[$max_index_to_go]
+ # we are looking for a line ending in closing brace
+ next
+ unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
- # never any good breaks if just one token
- && $max_index_to_go > 0
+ # ...and preceded by a semicolon on the same line
+ my $K_semicolon = $self->K_previous_nonblank($K_end);
+ next unless defined($K_semicolon);
+ my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
+ next if ( $i_semicolon <= $i_beg );
+ next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
- # but not one of these which are never duplicated on a line:
- # until|while|for|if|elsif|else
- && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
+ # Safety check - shouldn't happen - not critical
+ # This is not worth throwing a Fault, except in DEVEL_MODE
+ if ( $types_to_go[$i_semicolon] ne ';' ) {
+ DEVEL_MODE
+ && Fault("unexpected type looking for semicolon");
+ next;
}
- )
- {
- my $lev = $nesting_depth_to_go[$max_index_to_go];
- # Walk backwards from the end and
- # set break at any closing block braces at the same level.
- # But quit if we are not in a chain of blocks.
- foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
- last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
- next if ( $levels_to_go[$i] > $lev ); # skip past higher level
+ # ... with the corresponding opening brace on the same line
+ my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$type_sequence};
+ next unless ( defined($K_opening) );
+ my $i_opening = $i_beg + ( $K_opening - $K_beg );
+ next if ( $i_opening < $i_beg );
- if ( $block_type_to_go[$i] ) {
- if ( $tokens_to_go[$i] eq '}' ) {
- $self->set_forced_breakpoint($i);
- $saw_good_break = 1;
- }
+ # ... and only one semicolon between these braces
+ my $semicolon_count = 0;
+ foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
+ if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
+ $semicolon_count++;
+ last;
}
-
- # quit if we see anything besides words, function, blanks
- # at this level
- elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
- }
-
- #-----------------------------------------------
- # insertion of any blank lines before this batch
- #-----------------------------------------------
-
- my $imin = 0;
- my $imax = $max_index_to_go;
-
- # trim any blank tokens
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
+ next if ($semicolon_count);
- if ( $imin > $imax ) {
- if (DEVEL_MODE) {
- my $K0 = $K_to_go[0];
- my $lno = EMPTY_STRING;
- if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
- Fault(<<EOM);
-Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
-EOM
+ # ...ok, then make the semicolon invisible
+ my $len = $token_lengths_to_go[$i_semicolon];
+ $tokens_to_go[$i_semicolon] = EMPTY_STRING;
+ $token_lengths_to_go[$i_semicolon] = 0;
+ $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
+ $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
+ foreach ( $i_semicolon .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] -= $len;
}
- return;
}
+ return;
+ } ## end sub delete_one_line_semicolons
- my $last_line_leading_type = $self->[_last_line_leading_type_];
- my $last_line_leading_level = $self->[_last_line_leading_level_];
- my $last_last_line_leading_level =
- $self->[_last_last_line_leading_level_];
+ use constant DEBUG_RECOMBINE => 0;
- # add a blank line before certain key types but not after a comment
- if ( $last_line_leading_type ne '#' ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
+ sub recombine_breakpoints {
- # break before certain key blocks except one-liners
- if ( $leading_type eq 'k' ) {
- if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( terminal_type_i( $imin, $imax ) ne '}' );
- }
+ my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($last_line_leading_type ne 'b'
- && $is_if_unless_while_until_for_foreach{$leading_token} )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
+ # This sub implements the 'recombine' operation on a batch.
+ # Its task is to combine some of these lines back together to
+ # improve formatting. The need for this arises because
+ # sub 'break_long_lines' is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Sometimes small line fragments
+ # are produced which would look better if they were combined.
- # patch for RT #128216: no blank line inserted at a level
- # change
- if ( $levels_to_go[$imin] != $last_line_leading_level ) {
- $lc = 0;
- }
+ # Input parameters:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ # $rbond_strength_to_go = array of bond strengths pulling
+ # tokens together, used to decide where best to recombine lines.
- $want_blank =
- $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && $self->consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && terminal_type_i( $imin, $imax ) ne '}';
- }
- }
+ #-------------------------------------------------------------------
+ # Do nothing under extreme stress; use <= 2 for c171.
+ # (NOTE: New optimizations make this unnecessary. But removing this
+ # check is not really useful because this condition only occurs in
+ # test runs, and another formatting pass will fix things anyway.)
+ # This routine has a long history of improvements. Some past
+ # relevant issues are : c118, c167, c171, c186, c187, c193, c200.
+ #-------------------------------------------------------------------
+ return if ( $high_stress_level <= 2 );
+
+ my $nmax_start = @{$ri_end} - 1;
+ return if ( $nmax_start <= 0 );
- # blank lines before subs except declarations and one-liners
- elsif ( $leading_type eq 'i' ) {
- if (
+ my $iend_max = $ri_end->[$nmax_start];
+ if ( $types_to_go[$iend_max] eq '#' ) {
+ $iend_max = iprev_to_go($iend_max);
+ }
+ my $has_terminal_semicolon =
+ $iend_max >= 0 && $types_to_go[$iend_max] eq ';';
- # quick check
- (
- substr( $leading_token, 0, 3 ) eq 'sub'
- || $rOpts_sub_alias_list
- )
+ #--------------------------------------------------------------------
+ # Break into the smallest possible sub-sections to improve efficiency
+ #--------------------------------------------------------------------
- # slow check
- && $leading_token =~ /$SUB_PATTERN/
- )
- {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
- }
+ # Also make a list of all good joining tokens between the lines
+ # n-1 and n.
+ my @joint;
- # break before all package declarations
- elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
- $want_blank = $rOpts->{'blank-lines-before-packages'};
- }
- }
+ my $rsections = [];
+ my $nbeg_sec = 0;
+ my $nend_sec;
+ my $nmax_section = 0;
+ foreach my $nn ( 1 .. $nmax_start ) {
+ my $ibeg_1 = $ri_beg->[ $nn - 1 ];
+ my $iend_1 = $ri_end->[ $nn - 1 ];
+ my $iend_2 = $ri_end->[$nn];
+ my $ibeg_2 = $ri_beg->[$nn];
- # Check for blank lines wanted before a closing brace
- elsif ( $leading_token eq '}' ) {
- if ( $rOpts->{'blank-lines-before-closing-block'}
- && $block_type_to_go[$imin]
- && $block_type_to_go[$imin] =~
- /$blank_lines_before_closing_block_pattern/ )
+ # Define certain good joint tokens
+ my ( $itok, $itokp, $itokm );
+ foreach my $itest ( $iend_1, $ibeg_2 ) {
+ my $type = $types_to_go[$itest];
+ if ( $is_math_op{$type}
+ || $is_amp_amp{$type}
+ || $is_assignment{$type}
+ || $type eq ':' )
{
- my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
- if ( $nblanks > $want_blank ) {
- $want_blank = $nblanks;
- }
+ $itok = $itest;
}
}
- if ($want_blank) {
+ # joint[$nn] = index of joint character
+ $joint[$nn] = $itok;
- # future: send blank line down normal path to VerticalAligner
- $self->flush_vertical_aligner();
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->require_blank_code_lines($want_blank);
+ # Update the section list
+ my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ if (
+ $excess <= 1
+
+ # The number 5 here is an arbitrary small number intended
+ # to keep most small matches in one sub-section.
+ || ( defined($nend_sec)
+ && ( $nn < 5 || $nmax_start - $nn < 5 ) )
+ )
+ {
+ $nend_sec = $nn;
+ }
+ else {
+ if ( defined($nend_sec) ) {
+ push @{$rsections}, [ $nbeg_sec, $nend_sec ];
+ my $num = $nend_sec - $nbeg_sec;
+ if ( $num > $nmax_section ) { $nmax_section = $num }
+ $nbeg_sec = $nn;
+ $nend_sec = undef;
+ }
+ $nbeg_sec = $nn;
}
}
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
- }
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ if ( defined($nend_sec) ) {
+ push @{$rsections}, [ $nbeg_sec, $nend_sec ];
+ my $num = $nend_sec - $nbeg_sec;
+ if ( $num > $nmax_section ) { $nmax_section = $num }
}
- $self->[_last_line_leading_type_] = $last_line_leading_type;
- $self->[_last_line_leading_level_] = $last_line_leading_level;
- $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
-
- #--------------------------
- # scan lists and long lines
- #--------------------------
-
- # Flag to remember if we called sub 'pad_array_to_go'.
- # Some routines (break_lists(), break_long_lines() ) need some
- # extra tokens added at the end of the batch. Most batches do not
- # use these routines, so we will avoid calling 'pad_array_to_go'
- # unless it is needed.
- my $called_pad_array_to_go;
+ my $num_sections = @{$rsections};
- # set all forced breakpoints for good list formatting
- my $is_long_line = $max_index_to_go > 0
- && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
+ if ( DEBUG_RECOMBINE > 1 ) {
+ print STDERR <<EOM;
+sections=$num_sections; nmax_sec=$nmax_section
+EOM
+ }
- my $old_line_count_in_batch = 1;
- if ( $max_index_to_go > 0 ) {
- my $Kbeg = $K_to_go[0];
- my $Kend = $K_to_go[$max_index_to_go];
- $old_line_count_in_batch +=
- $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
+ if ( DEBUG_RECOMBINE > 0 ) {
+ my $max = 0;
+ print STDERR
+ "-----\n$num_sections sections found for nmax=$nmax_start\n";
+ foreach my $sect ( @{$rsections} ) {
+ my ( $nbeg, $nend ) = @{$sect};
+ my $num = $nend - $nbeg;
+ if ( $num > $max ) { $max = $num }
+ print STDERR "$nbeg $nend\n";
+ }
+ print STDERR "max size=$max of $nmax_start lines\n";
}
- my $rbond_strength_bias = [];
- if (
- $is_long_line
- || $old_line_count_in_batch > 1
+ # Loop over all sub-sections. Note that we have to work backwards
+ # from the end of the batch since the sections use original line
+ # numbers, and the line numbers change as we go.
+ while ( my $section = pop @{$rsections} ) {
+ my ( $nbeg, $nend ) = @{$section};
+ $self->recombine_section_loop(
+ {
+ _ri_beg => $ri_beg,
+ _ri_end => $ri_end,
+ _nbeg => $nbeg,
+ _nend => $nend,
+ _rjoint => \@joint,
+ _rbond_strength_to_go => $rbond_strength_to_go,
+ _has_terminal_semicolon => $has_terminal_semicolon,
+ }
+ );
+ }
- # must always call break_lists() with unbalanced batches because
- # it is maintaining some stacks
- || $is_unbalanced_batch
+ return;
+ } ## end sub recombine_breakpoints
- # call break_lists if we might want to break at commas
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
+ sub recombine_section_loop {
+ my ( $self, $rhash ) = @_;
- # call break_lists if user may want to break open some one-line
- # hash references
- || ( $comma_arrow_count_contained
- && $rOpts_comma_arrow_breakpoints != 3 )
- )
- {
- # add a couple of extra terminal blank tokens
- $self->pad_array_to_go();
- $called_pad_array_to_go = 1;
+ # Recombine breakpoints for one section of lines in the current batch
- my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
- $saw_good_break ||= $sgb;
- }
+ # Given:
+ # $ri_beg, $ri_end = ref to arrays with token indexes of the first
+ # and last line
+ # $nbeg, $nend = line numbers bounding this section
+ # $rjoint = ref to array of good joining tokens per line
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
+ # Update: $ri_beg, $ri_end, $rjoint if lines are joined
- #-------------------------
- # write a single line if..
- #-------------------------
- if (
+ # Returns:
+ # nothing
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
+ #-------------
+ # Definitions:
+ #-------------
+ # $rhash = {
- # or,
- || (
+ # _ri_beg = ref to array with starting token index by line
+ # _ri_end = ref to array with ending token index by line
+ # _nbeg = first line number of this section
+ # _nend = last line number of this section
+ # _rjoint = ref to array of good joining tokens for each line
+ # _rbond_strength_to_go = array of bond strengths
+ # _has_terminal_semicolon = true if last line of batch has ';'
- # this line is 'short'
- !$is_long_line
+ # _num_freeze = fixed number of lines at end of this batch
+ # _optimization_on = true during final optimization loop
+ # _num_compares = total number of line compares made so far
+ # _pair_list = list of line pairs in optimal search order
- # and we didn't see a good breakpoint
- && !$saw_good_break
+ # };
- # and we don't already have an interior breakpoint
- && !$forced_breakpoint_count
- )
- )
- {
- @{$ri_first} = ($imin);
- @{$ri_last} = ($imax);
- }
+ my $ri_beg = $rhash->{_ri_beg};
+ my $ri_end = $rhash->{_ri_end};
+
+ # Line index range of this section:
+ my $nbeg = $rhash->{_nbeg}; # stays constant
+ my $nend = $rhash->{_nend}; # will decrease
+
+ # $nmax_batch = starting number of lines in the full batch
+ # $num_freeze = number of lines following this section to leave alone
+ my $nmax_batch = @{$ri_end} - 1;
+ $rhash->{_num_freeze} = $nmax_batch - $nend;
+
+ # Setup the list of line pairs to test. This stores the following
+ # values for each line pair:
+ # [ $n=index of the second line of the pair, $bs=bond strength]
+ my @pair_list;
+ my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
+ foreach my $n ( $nbeg + 1 .. $nend ) {
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $bs_tweak = 0;
+ if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 }
+ my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
+ push @pair_list, [ $n, $bs ];
+ }
+
+ # Any order for testing is possible, but optimization is only possible
+ # if we sort the line pairs on decreasing joint strength.
+ @pair_list =
+ sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list;
+ $rhash->{_rpair_list} = \@pair_list;
+
+ #----------------
+ # Iteration limit
+ #----------------
+
+ # This was originally an O(n-squared) loop which required a check on
+ # the maximum number of iterations for safety. It is now a very fast
+ # loop which runs in O(n) time, but a check on total number of
+ # iterations is retained to guard against future programming errors.
+
+ # Most cases require roughly 1 comparison per line pair (1 full pass).
+ # The upper bound is estimated to be about 3 comparisons per line pair
+ # unless optimization is deactivated. The approximate breakdown is:
+ # 1 pass with 1 compare per joint to do any special cases, plus
+ # 1 pass with up to 2 compares per joint in optimization mode
+ # The most extreme cases in my collection are:
+ # camel1.t - needs 2.7 compares per line (12 without optimization)
+ # ternary.t - needs 2.8 compares per line (12 without optimization)
+ # So a value of MAX_COMPARE_RATIO = 3 looks like an upper bound as
+ # long as optimization is used. A value of 20 should allow all code to
+ # pass even if optimization is turned off for testing.
+
+ # The OPTIMIZE_OK flag should be true except for testing.
+ use constant MAX_COMPARE_RATIO => 20;
+ use constant OPTIMIZE_OK => 1;
+
+ my $num_pairs = $nend - $nbeg + 1;
+ my $max_compares = MAX_COMPARE_RATIO * $num_pairs;
+
+ # Always start with optimization off
+ $rhash->{_num_compares} = 0;
+ $rhash->{_optimization_on} = 0;
+ $rhash->{_ix_best_last} = 0;
- #-----------------------------
- # otherwise use multiple lines
- #-----------------------------
- else {
+ #--------------------------------------------
+ # loop until there are no more recombinations
+ #--------------------------------------------
+ my $nmax_last = $nmax_batch + 1;
+ while (1) {
- # add a couple of extra terminal blank tokens if we haven't
- # already done so
- $self->pad_array_to_go() unless ($called_pad_array_to_go);
+ # Stop when the number of lines in the batch does not decrease
+ $nmax_batch = @{$ri_end} - 1;
+ if ( $nmax_batch >= $nmax_last ) {
+ last;
+ }
+ $nmax_last = $nmax_batch;
- ( $ri_first, $ri_last, my $rbond_strength_to_go ) =
- $self->break_long_lines( $saw_good_break, \@colon_list,
- $rbond_strength_bias );
+ #-----------------------------------------
+ # inner loop to find next best combination
+ #-----------------------------------------
+ $self->recombine_inner_loop($rhash);
- $self->break_all_chain_tokens( $ri_first, $ri_last );
+ # Iteration limit check:
+ if ( $rhash->{_num_compares} > $max_compares ) {
- $self->break_equals( $ri_first, $ri_last );
+ # See note above; should only get here on a programming error
+ if (DEVEL_MODE) {
+ my $ibeg = $ri_beg->[$nbeg];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_];
+ Fault(<<EOM);
+inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno
+EOM
+ }
+ last;
+ }
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- $self->recombine_breakpoints( $ri_first, $ri_last,
- $rbond_strength_to_go )
- if ( $rOpts_recombine && @{$ri_first} > 1 );
+ } ## end iteration loop
- $self->insert_final_ternary_breaks( $ri_first, $ri_last )
- if (@colon_list);
+ if (DEBUG_RECOMBINE) {
+ my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs;
+ print STDERR
+"exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n";
}
- $self->insert_breaks_before_list_opening_containers( $ri_first,
- $ri_last )
- if ( %break_before_container_types && $max_index_to_go > 0 );
+ return;
+ } ## end sub recombine_section_loop
- #-------------------
- # -lp corrector step
- #-------------------
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
- }
+ sub recombine_inner_loop {
+ my ( $self, $rhash ) = @_;
- #--------------------------
- # unmask phantom semicolons
- #--------------------------
- if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
- my $i = $imax;
- my $tok = ';';
- my $tok_len = 1;
- if ( $want_left_space{';'} != WS_NO ) {
- $tok = ' ;';
- $tok_len = 2;
- }
- $tokens_to_go[$i] = $tok;
- $token_lengths_to_go[$i] = $tok_len;
- my $KK = $K_to_go[$i];
- $rLL->[$KK]->[_TOKEN_] = $tok;
- $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
- my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
- $self->note_added_semicolon($line_number);
+ # This is the inner loop of the recombine operation. We look at all of
+ # the remaining joints in this section and select the best joint to be
+ # recombined. If a recombination is made, the number of lines
+ # in this section will be reduced by one.
- foreach ( $imax .. $max_index_to_go ) {
- $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
- }
- }
+ # Returns: nothing
- if ( $rOpts_one_line_block_semicolons == 0 ) {
- $self->delete_one_line_semicolons( $ri_first, $ri_last );
- }
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- #--------------------
- # ship this batch out
- #--------------------
- $this_batch->[_ri_first_] = $ri_first;
- $this_batch->[_ri_last_] = $ri_last;
- $this_batch->[_peak_batch_size_] = $peak_batch_size;
- $this_batch->[_do_not_pad_] = $do_not_pad;
- $this_batch->[_batch_count_] = $batch_count;
- $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
+ my $ri_beg = $rhash->{_ri_beg};
+ my $ri_end = $rhash->{_ri_end};
+ my $nbeg = $rhash->{_nbeg};
+ my $rjoint = $rhash->{_rjoint};
+ my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
+ my $rpair_list = $rhash->{_rpair_list};
- $self->convey_batch_to_vertical_aligner();
+ # This will remember the best joint:
+ my $n_best = 0;
+ my $bs_best = 0.;
+ my $ix_best = 0;
+ my $num_bs = 0;
- #-------------------------------------------------------------------
- # Write requested number of blank lines after an opening block brace
- #-------------------------------------------------------------------
- if ($rOpts_blank_lines_after_opening_block) {
- my $iterm = $imax;
- if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
- $iterm -= 1;
- if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
- $iterm -= 1;
- }
- }
+ # The range of lines in this group is $nbeg to $nstop
+ my $nmax = @{$ri_end} - 1;
+ my $nstop = $nmax - $rhash->{_num_freeze};
+ my $num_joints = $nstop - $nbeg;
- if ( $types_to_go[$iterm] eq '{'
- && $block_type_to_go[$iterm]
- && $block_type_to_go[$iterm] =~
- /$blank_lines_after_opening_block_pattern/ )
- {
- my $nblanks = $rOpts_blank_lines_after_opening_block;
- $self->flush_vertical_aligner();
- my $file_writer_object = $self->[_file_writer_object_];
- $file_writer_object->require_blank_code_lines($nblanks);
- }
+ # Turn off optimization if just two joints remain to allow
+ # special two-line logic to be checked (c193)
+ if ( $rhash->{_optimization_on} && $num_joints <= 2 ) {
+ $rhash->{_optimization_on} = 0;
}
- # Remember the largest batch size processed. This is needed by the
- # logical padding routine to avoid padding the first nonblank token
- if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
- $peak_batch_size = $max_index_to_go;
- }
+ # Start where we ended the last search
+ my $ix_start = $rhash->{_ix_best_last};
- return;
- } ## end sub grind_batch_of_CODE
+ # Keep the starting index in bounds
+ $ix_start = max( 0, $ix_start );
- sub save_opening_indentation {
+ # Make a search order list which cycles around to visit
+ # all line pairs.
+ my $ix_max = @{$rpair_list} - 1;
+ my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 );
+ my $ix_last = $ix_list[-1];
- # This should be called after each batch of tokens is output. It
- # saves indentations of lines of all unmatched opening tokens.
- # These will be used by sub get_opening_indentation.
+ #-------------------------
+ # loop over all line pairs
+ #-------------------------
+ my $incomplete_loop;
+ foreach my $ix (@ix_list) {
+ my $item = $rpair_list->[$ix];
+ my ( $n, $bs ) = @{$item};
+
+ # This flag will be true if we 'last' out of this loop early.
+ # We cannot turn on optimization if this is true.
+ $incomplete_loop = $ix != $ix_last;
+
+ # Update the count of the number of times through this inner loop
+ $rhash->{_num_compares}++;
+
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just move to the next
+ # pair if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
- my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
+ # The combined line cannot be too long
+ my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ next if ( $excess > 0 );
- # QW INDENTATION PATCH 1:
- # Also save indentation for multiline qw quotes
- my @i_qw;
- my $seqno_qw_opening;
- if ( $types_to_go[$max_index_to_go] eq 'q' ) {
- my $KK = $K_to_go[$max_index_to_go];
- $seqno_qw_opening =
- $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
- if ($seqno_qw_opening) {
- push @i_qw, $max_index_to_go;
- }
- }
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
- # we need to save indentations of any unmatched opening tokens
- # in this batch because we may need them in a subsequent batch.
- foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
+ DEBUG_RECOMBINE > 1 && do {
+ print STDERR
+"RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
- my $seqno = $type_sequence_to_go[$_];
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ my $this_line_is_semicolon_terminated;
+ if ( $n == $nmax ) {
- if ( !$seqno ) {
- if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
- $seqno = $seqno_qw_opening;
- }
- else {
+ if ( $type_ibeg_2 eq '{' ) {
- # shouldn't happen
- $seqno = 'UNKNOWN';
+ # join isolated ')' and '{' if requested (git #110)
+ if ( $rOpts_cuddled_paren_brace
+ && $type_iend_1 eq '}'
+ && $iend_1 == $ibeg_1
+ && $ibeg_2 == $iend_2 )
+ {
+ if ( $tokens_to_go[$iend_1] eq ')'
+ && $tokens_to_go[$ibeg_2] eq '{' )
+ {
+ $n_best = $n;
+ $ix_best = $ix;
+ last;
+ }
+ }
+
+ # otherwise, a terminal '{' should stay where it is
+ # unless preceded by a fat comma
+ next if ( $type_iend_1 ne '=>' );
}
- }
- $saved_opening_indentation{$seqno} = [
- lookup_opening_indentation(
- $_, $ri_first, $ri_last, $rindentation_list
- )
- ];
- }
- return;
- } ## end sub save_opening_indentation
+ $this_line_is_semicolon_terminated =
+ $rhash->{_has_terminal_semicolon};
- sub get_saved_opening_indentation {
- my ($seqno) = @_;
- my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
+ }
- if ($seqno) {
- if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset, $is_leading ) =
- @{ $saved_opening_indentation{$seqno} };
- $exists = 1;
+ #----------------------------------------------------------
+ # Recombine Section 0:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code
+ # and to make formatting independent of whether breaks are
+ # to the left or right of an operator.
+ #----------------------------------------------------------
+
+ my $itok = $rjoint->[$n];
+ if ($itok) {
+ my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n );
+ next if ( !$ok_0 );
}
- }
- # some kind of serious error it doesn't exist
- # (example is badfile.t)
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Join welded nested containers immediately
+ #----------------------------------------------------------
- return ( $indent, $offset, $is_leading, $exists );
- } ## end sub get_saved_opening_indentation
-} ## end closure grind_batch_of_CODE
+ if (
+ $total_weld_count
+ && ( $type_sequence_to_go[$iend_1]
+ && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
+ || $type_sequence_to_go[$ibeg_2]
+ && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
+ )
+ {
+ $n_best = $n;
+ $ix_best = $ix;
+ last;
+ }
-sub lookup_opening_indentation {
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
+
+ my ( $ok_2, $skip_Section_3 ) =
+ recombine_section_2( $ri_beg, $ri_end, $n,
+ $this_line_is_semicolon_terminated );
+ next if ( !$ok_2 );
+
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
+
+ # Join lines identified above as capable of
+ # causing an outdented line with leading closing paren.
+ # Note that we are skipping the rest of this section
+ # and the rest of the loop to do the join.
+ if ($skip_Section_3) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ $n_best = $n;
+ $ix_best = $ix;
+ $incomplete_loop = 1;
+ last;
+ }
- # get the indentation of the line in the current output batch
- # which output a selected opening token
- #
- # given:
- # $i_opening - index of an opening token in the current output batch
- # whose line indentation we need
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line. (NOTE: the first slot in
- # this list is the last returned line number, and this is
- # followed by the list of indentations).
- #
- # return
- # -the indentation of the line which contained token $i_opening
- # -and its offset (number of columns) from the start of the line
+ my ( $ok_3, $bs_tweak ) =
+ recombine_section_3( $ri_beg, $ri_end, $n,
+ $this_line_is_semicolon_terminated );
+ next if ( !$ok_3 );
- my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
- if ( !@{$ri_last} ) {
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] );
- # An error here implies a bug introduced by a recent program change.
- # Every batch of code has lines, so this should never happen.
- if (DEVEL_MODE) {
- Fault("Error in opening_indentation: no lines");
- }
- return ( 0, 0, 0 );
- }
+ if (DEVEL_MODE) {
- my $nline = $rindentation_list->[0]; # line number of previous lookup
+ # This fault can only occur if an array index error has been
+ # introduced by a recent programming change.
+ my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
+ if ( $bs_check != $bs ) {
+ Fault(<<EOM);
+bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n
+EOM
+ }
+ }
- # reset line location if necessary
- $nline = 0 if ( $i_opening < $ri_start->[$nline] );
+ # Require a few extra spaces before recombining lines if we
+ # are at an old breakpoint unless this is a simple list or
+ # terminal line. The goal is to avoid oscillating between
+ # two quasi-stable end states. For example this snippet
+ # caused problems:
- # find the correct line
- unless ( $i_opening > $ri_last->[-1] ) {
- while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
- }
+## my $this =
+## bless {
+## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
+## },
+## $type;
+ next
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
+
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $ri_beg->[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
- # Error - token index is out of bounds - shouldn't happen
- # A program bug has been introduced in one of the calling routines.
- # We better stop here.
- else {
- my $i_last_line = $ri_last->[-1];
- if (DEVEL_MODE) {
- Fault(<<EOM);
-Program bug in call to lookup_opening_indentation - index out of range
- called with index i_opening=$i_opening > $i_last_line = max index of last line
-This batch has max index = $max_index_to_go,
-EOM
- }
- $nline = $#{$ri_last};
- }
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
- $rindentation_list->[0] =
- $nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- my $is_leading = ( $ibeg == $i_opening );
- return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
-} ## end sub lookup_opening_indentation
+ ## OLD: honor no-break's
+ ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
-sub terminal_type_i {
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
+ # First good joint ...
+ $n_best = $n;
+ $ix_best = $ix;
+ $bs_best = $bs;
+ $num_bs = 1;
- my ( $ibeg, $iend ) = @_;
+ # In optimization mode: stop on the first acceptable joint
+ # because we already know it has the highest strength
+ if ( $rhash->{_optimization_on} == 1 ) {
+ last;
+ }
+ }
+ else {
- # Start at the end and work backwards
- my $i = $iend;
- my $type_i = $types_to_go[$i];
+ # Second and later joints ..
+ $num_bs++;
- # Check for side comment
- if ( $type_i eq '#' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
- }
- $type_i = $types_to_go[$i];
- }
+ # save maximum strength; in case of a tie select min $n
+ if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) {
+ $n_best = $n;
+ $ix_best = $ix;
+ $bs_best = $bs;
+ }
+ }
- # Skip past a blank
- if ( $type_i eq 'b' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
- }
- $type_i = $types_to_go[$i];
- }
+ } ## end loop over all line pairs
- # Found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/map/grep/eval/do because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $block_type_to_go[$i];
- if (
- $type_i eq '}'
- && ( !$block_type
- || $is_sort_map_grep_eval_do{$block_type} )
- )
- {
- $type_i = 'b';
- }
- return wantarray ? ( $type_i, $i ) : $type_i;
-} ## end sub terminal_type_i
+ #---------------------------------------------------
+ # recombine the pair with the greatest bond strength
+ #---------------------------------------------------
+ if ($n_best) {
+ DEBUG_RECOMBINE > 1
+ && print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n";
+ splice @{$ri_beg}, $n_best, 1;
+ splice @{$ri_end}, $n_best - 1, 1;
+ splice @{$rjoint}, $n_best, 1;
+
+ splice @{$rpair_list}, $ix_best, 1;
+
+ # Update the line indexes in the pair list:
+ # Old $n values greater than the best $n decrease by 1
+ # because of the splice we just did.
+ foreach my $item ( @{$rpair_list} ) {
+ my $n_old = $item->[0];
+ if ( $n_old > $n_best ) { $item->[0] -= 1 }
+ }
+
+ # Store the index of this location for starting the next search.
+ # We must subtract 1 to get an updated index because the splice
+ # above just removed the best pair.
+ # BUT CAUTION: if this is the first pair in the pair list, then
+ # this produces an invalid index. So this index must be tested
+ # before use in the next pass through the outer loop.
+ $rhash->{_ix_best_last} = $ix_best - 1;
+
+ # Turn on optimization if ...
+ if (
-sub pad_array_to_go {
+ # it is not already on, and
+ !$rhash->{_optimization_on}
- # To simplify coding in break_lists and set_bond_strengths, it helps to
- # create some extra blank tokens at the end of the arrays. We also add
- # some undef's to help guard against using invalid data.
- my ($self) = @_;
- $K_to_go[ $max_index_to_go + 1 ] = undef;
- $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
- $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
- $tokens_to_go[ $max_index_to_go + 3 ] = undef;
- $types_to_go[ $max_index_to_go + 1 ] = 'b';
- $types_to_go[ $max_index_to_go + 2 ] = 'b';
- $types_to_go[ $max_index_to_go + 3 ] = undef;
- $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
- $nesting_depth_to_go[ $max_index_to_go + 1 ] =
- $nesting_depth_to_go[$max_index_to_go];
+ # we have not taken a shortcut to get here, and
+ && !$incomplete_loop
- # /^[R\}\)\]]$/
- if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
- if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
+ # we have seen a good break on strength, and
+ && $num_bs
- # Nesting depths are set to be >=0 in sub write_line, so it should
- # not be possible to get here unless the code has a bracing error
- # which leaves a closing brace with zero nesting depth.
- unless ( get_saw_brace_error() ) {
- if (DEVEL_MODE) {
- Fault(<<EOM);
-Program bug in pad_array_to_go: hit nesting error which should have been caught
-EOM
+ # we are allowed to optimize
+ && OPTIMIZE_OK
+
+ )
+ {
+ $rhash->{_optimization_on} = 1;
+ if (DEBUG_RECOMBINE) {
+ my $num_compares = $rhash->{_num_compares};
+ my $pair_count = @ix_list;
+ print STDERR
+"Entering optimization phase at $num_compares compares, pair count = $pair_count\n";
}
}
}
- else {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
- }
- }
+ return;
+ } ## end sub recombine_inner_loop
- # /^[L\{\(\[]$/
- elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
- }
- return;
-} ## end sub pad_array_to_go
+ sub recombine_section_0 {
+ my ( $itok, $ri_beg, $ri_end, $n ) = @_;
-sub break_all_chain_tokens {
+ # Recombine Section 0:
+ # Examine special candidate joining token $itok
- # scan the current breakpoints looking for breaks at certain "chain
- # operators" (. : && || + etc) which often occur repeatedly in a long
- # statement. If we see a break at any one, break at all similar tokens
- # within the same container.
- #
- my ( $self, $ri_left, $ri_right ) = @_;
+ # Given:
+ # $itok = index of token at a possible join of lines $n-1 and $n
- my %saw_chain_type;
- my %left_chain_type;
- my %right_chain_type;
- my %interior_chain_type;
- my $nmax = @{$ri_right} - 1;
+ # Return:
+ # true => ok to combine
+ # false => do not combine lines
- # scan the left and right end tokens of all lines
- my $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- $typel = '+' if ( $typel eq '-' ); # treat + and - the same
- $typer = '+' if ( $typer eq '-' );
- $typel = '*' if ( $typel eq '/' ); # treat * and / the same
- $typer = '*' if ( $typer eq '/' );
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^ ^
+ # | |
+ # ------------$itok is one of these tokens
- my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
- my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
- if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
- next if ( $typel eq '?' );
- push @{ $left_chain_type{$keyl} }, $il;
- $saw_chain_type{$keyl} = 1;
- $count++;
- }
- if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
- next if ( $typer eq '?' );
- push @{ $right_chain_type{$keyr} }, $ir;
- $saw_chain_type{$keyr} = 1;
- $count++;
- }
- }
- return unless $count;
+ # Put as many tests in this section to avoid duplicate code
+ # and to make formatting independent of whether breaks are
+ # to the left or right of an operator.
- # now look for any interior tokens of the same types
- $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir - 1 ) {
- my $type = $types_to_go[$i];
- my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
- $key = '+' if ( $key eq '-' );
- $key = '*' if ( $key eq '/' );
- if ( $saw_chain_type{$key} ) {
- push @{ $interior_chain_type{$key} }, $i;
- $count++;
- }
- }
- }
- return unless $count;
+ my $nmax = @{$ri_end} - 1;
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $iend_2 = $ri_end->[$n];
+
+ if ($itok) {
+
+ my $type = $types_to_go[$itok];
+
+ if ( $type eq ':' ) {
+
+ # do not join at a colon unless it disobeys the
+ # break request
+ if ( $itok eq $iend_1 ) {
+ return unless $want_break_before{$type};
+ }
+ else {
+ return if $want_break_before{$type};
+ }
+ } ## end if ':'
- # now make a list of all new break points
- my @insert_list;
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
- # loop over all chain types
- foreach my $key ( keys %saw_chain_type ) {
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
- # quit if just ONE continuation line with leading . For example--
- # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
- # . $contents;
- last if ( $nmax == 1 && $key =~ /^[\.\+]$/ );
+ # This can be important in math-intensive code.
- # loop over all interior chain tokens
- foreach my $itest ( @{ $interior_chain_type{$key} } ) {
+ my $good_combo;
- # loop over all left end tokens of same type
- if ( $left_chain_type{$key} ) {
- next if $nobreak_to_go[ $itest - 1 ];
- foreach my $i ( @{ $left_chain_type{$key} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest - 1;
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( iprev_to_go($itok), $ibeg_1 );
+ my $itokmm = max( iprev_to_go($itokm), $ibeg_1 );
- # Break at matching ? if this : is at a different level.
- # For example, the ? before $THRf_DEAD in the following
- # should get a break if its : gets a break.
- #
- # my $flags =
- # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
- # : ( $_ & 4 ) ? $THRf_R_DETACHED
- # : $THRf_R_JOINABLE;
- if ( $key eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question > 0 ) {
- push @insert_list, $i_question - 1;
- }
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
+
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
+
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp] } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
}
- last;
}
- }
- # loop over all right end tokens of same type
- if ( $right_chain_type{$key} ) {
- next if $nobreak_to_go[$itest];
- foreach my $i ( @{ $right_chain_type{$key} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest;
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
- # break at matching ? if this : is at a different level
- if ( $key eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
+
+ # otherwise look one more token to left
+ else {
+
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm] } );
}
- last;
}
- }
- }
- }
- # insert any new break points
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-} ## end sub break_all_chain_tokens
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
-sub insert_additional_breaks {
+ # Slight adjustment factor to make results
+ # independent of break before or after operator
+ # in long summed lists. (An operator and a
+ # space make two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
- # this routine will add line breaks at requested locations after
- # sub break_long_lines has made preliminary breaks.
+ $good_combo =
- my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
- my $i_f;
- my $i_l;
- my $line_number = 0;
- foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
- next if ( $nobreak_to_go[$i_break_left] );
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right
+ # of joint
+ $itokpp == $iend_2
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- while ( $i_break_left >= $i_l ) {
- $line_number++;
+ # short
+ && token_sequence_length( $itokp, $iend_2 ) <
+ $two + $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
- # shouldn't happen unless caller passes bad indexes
- if ( $line_number >= @{$ri_last} ) {
- if (DEVEL_MODE) {
- Fault(<<EOM);
-Non-fatal program bug: couldn't set break at $i_break_left
-EOM
+ # short
+ && token_sequence_length( $ibeg_1, $itokm ) <
+ 2 - $two + $rOpts_short_concatenation_item_length
+ )
+
+ )
+
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] } )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp] } )
+ )
+
+ ;
}
- return;
- }
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- }
- # Do not leave a blank at the end of a line; back up if necessary
- if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+ # it is also good to combine if we can reduce to 2
+ # lines
+ if ( !$good_combo ) {
- my $i_break_right = $inext_to_go[$i_break_left];
- if ( $i_break_left >= $i_f
- && $i_break_left < $i_l
- && $i_break_right > $i_f
- && $i_break_right <= $i_l )
- {
- splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
- splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
- }
- }
- return;
-} ## end sub insert_additional_breaks
+ # index on other line where same token would be
+ # in a long chain.
+ my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
-{ ## begin closure in_same_container_i
- my $ris_break_token;
- my $ris_comma_token;
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
- BEGIN {
+ return unless ($good_combo);
- # all cases break on seeing commas at same level
- my @q = qw( => );
- push @q, ',';
- @{$ris_comma_token}{@q} = (1) x scalar(@q);
+ } ## end math
- # Non-ternary text also breaks on seeing any of qw(? : || or )
- # Example: we would not want to break at any of these .'s
- # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
- push @q, qw( or || ? : );
- @{$ris_break_token}{@q} = (1) x scalar(@q);
- }
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
- sub in_same_container_i {
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
- # Check to see if tokens at i1 and i2 are in the same container, and
- # not separated by certain characters: => , ? : || or
- # This is an interface between the _to_go arrays to the rLL array
- my ( $self, $i1, $i2 ) = @_;
+ # ok to combine lines
+ return 1;
+ } ## end sub recombine_section_0
- # quick check
- my $parent_seqno_1 = $parent_seqno_to_go[$i1];
- return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
+ sub recombine_section_2 {
- if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
- my $K1 = $K_to_go[$i1];
- my $K2 = $K_to_go[$i2];
- my $rLL = $self->[_rLL_];
+ my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
- my $depth_1 = $nesting_depth_to_go[$i1];
- return if ( $depth_1 < 0 );
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
- # Shouldn't happen since i1 and i2 have same parent:
- return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # -----Section 2 looks at this token
- # Select character set to scan for
- my $type_1 = $types_to_go[$i1];
- my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
+ # Returns:
+ # (nothing) => do not join lines
+ # 1, skip_Section_3 => ok to join lines
+
+ # $skip_Section_3 is a flag for skipping the next section
+ my $skip_Section_3 = 0;
+
+ my $nmax = @{$ri_end} - 1;
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_nmax = $ri_beg->[$nmax];
+
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
+
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
+
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ # But we do not want to do this for something like the -lp
+ # option where the paren is not outdentable because the
+ # trailing clause will be far to the right.
+ #
+ # The logic here is synchronized with the logic in sub
+ # sub get_final_indentation, which actually does
+ # the outdenting.
+ #
+ my $combine_ok = $this_line_is_semicolon_terminated
+
+ # only one token on last line
+ && $ibeg_1 == $iend_1
+
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
+
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
+
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
+
+ # But only combine leading '&&', '||', if no previous && || :
+ # seen. This count includes these tokens at all levels. The
+ # idea is that seeing these at any level can make it hard to read
+ # formatting if we recombine.
+ if ( $is_amp_amp{$type_ibeg_2} ) {
+ foreach my $n_t ( reverse( 0 .. $n - 2 ) ) {
+ my $ibeg_t = $ri_beg->[$n_t];
+ my $type_t = $types_to_go[$ibeg_t];
+ if ( $is_amp_amp{$type_t} || $type_t eq ':' ) {
+ $combine_ok = 0;
+ last;
+ }
+ }
+ }
- # Fast preliminary loop to verify that tokens are in the same container
- my $KK = $K1;
- while (1) {
- $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
- last if !defined($KK);
- last if ( $KK >= $K2 );
- my $ii = $i1 + $KK - $K1;
- my $depth_i = $nesting_depth_to_go[$ii];
- return if ( $depth_i < $depth_1 );
- next if ( $depth_i > $depth_1 );
- if ( $type_1 ne ':' ) {
- my $tok_i = $tokens_to_go[$ii];
- return if ( $tok_i eq '?' || $tok_i eq ':' );
+ $skip_Section_3 ||= $combine_ok;
+
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in get_final_indentation which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1]
+ && $rOpts_brace_follower_vertical_tightness > 0
+ && (
+
+ # -bfvt=1, allow cuddled eval chains [default]
+ (
+ $tokens_to_go[$iend_2] eq '{'
+ && $block_type_to_go[$iend_1] eq 'eval'
+ && !ref( $leading_spaces_to_go[$iend_1] )
+ && !$rOpts_indent_closing_brace
+ )
+
+ # -bfvt=2, allow most brace followers [part of git #110]
+ || ( $rOpts_brace_follower_vertical_tightness > 1
+ && $ibeg_1 == $iend_1 )
+
+ )
+
+ && (
+ ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $skip_Section_3 ||= 1;
}
- }
- # Slow loop checking for certain characters
+ return
+ unless (
+ $skip_Section_3
- #-----------------------------------------------------
- # This is potentially a slow routine and not critical.
- # For safety just give up for large differences.
- # See test file 'infinite_loop.txt'
- #-----------------------------------------------------
- return if ( $i2 - $i1 > 200 );
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
- foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
+ # fix for c054 (unusual -pbp case)
+ || $type_ibeg_2 eq '=='
- my $depth_i = $nesting_depth_to_go[$ii];
- next if ( $depth_i > $depth_1 );
- return if ( $depth_i < $depth_1 );
- my $tok_i = $tokens_to_go[$ii];
- return if ( $rbreak->{$tok_i} );
+ );
}
- return 1;
- } ## end sub in_same_container_i
-} ## end closure in_same_container_i
-sub break_equals {
+ elsif ( $type_iend_1 eq '{' ) {
- # Look for assignment operators that could use a breakpoint.
- # For example, in the following snippet
- #
- # $HOME = $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # we could break at the = to get this, which is a little nicer:
- # $HOME =
- # $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # The logic here follows the logic in set_logical_padding, which
- # will add the padding in the second line to improve alignment.
- #
- my ( $self, $ri_left, $ri_right ) = @_;
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 2 );
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ return if ( $forced_breakpoint_to_go[$iend_1] );
+ }
- # scan the left ends of first two lines
- my $tokbeg = EMPTY_STRING;
- my $depth_beg;
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $typel = $types_to_go[$il];
- my $tokenl = $tokens_to_go[$il];
- my $keyl = $typel eq 'k' ? $tokenl : $typel;
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ return unless ( $want_break_before{$type_iend_1} );
+ }
- my $has_leading_op = $is_chain_operator{$keyl};
- return unless ($has_leading_op);
- if ( $n > 1 ) {
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
+
+ # Do not recombine different levels
return
- unless ( $tokenl eq $tokbeg
- && $nesting_depth_to_go[$il] eq $depth_beg );
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+
+ # do not recombine unless next line ends in :
+ return unless $type_iend_2 eq ':';
}
- $tokbeg = $tokenl;
- $depth_beg = $nesting_depth_to_go[$il];
- }
- # now look for any interior tokens of the same types
- my $il = $ri_left->[0];
- my $ir = $ri_right->[0];
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
- # now make a list of all new break points
- my @insert_list;
- foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
- my $type = $types_to_go[$i];
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- if ( $want_break_before{$type} ) {
- push @insert_list, $i - 1;
+ # Do not recombine at comma which is following the
+ # input bias.
+ # NOTE: this could be controlled by a special flag,
+ # but it seems to work okay.
+ return if ( $old_breakpoint_to_go[$iend_1] );
+
+ # An isolated '},' may join with an identifier + ';'
+ # This is useful for the class of a 'bless' statement
+ # (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
+ {
+ return
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
+
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
+
+ # but otherwise ..
else {
- push @insert_list, $i;
+
+ # do not recombine after a comma unless this will
+ # leave just 1 more line
+ return unless ( $n + 1 >= $nmax );
+
+ # do not recombine if there is a change in
+ # indentation depth
+ return
+ if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ return if $saw_paren;
}
}
- }
- # Break after a 'return' followed by a chain of operators
- # return ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- # To give:
- # return
- # ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- my $i = 0;
- if ( $types_to_go[$i] eq 'k'
- && $tokens_to_go[$i] eq 'return'
- && $ir > $il
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- push @insert_list, $i;
- }
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
- return unless (@insert_list);
+ # No longer doing this
+ }
- # One final check...
- # scan second and third lines and be sure there are no assignments
- # we want to avoid breaking at an = to make something like this:
- # unless ( $icon =
- # $html_icons{"$type-$state"}
- # or $icon = $html_icons{$type}
- # or $icon = $html_icons{$state} )
- for my $n ( 1 .. 2 ) {
- my $il_n = $ri_left->[$n];
- my $ir_n = $ri_right->[$n];
- foreach my $i ( $il_n + 1 .. $ir_n ) {
- my $type = $types_to_go[$i];
- return
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg );
+ elsif ( $type_iend_1 eq ')' ) {
+
+ # No longer doing this
}
- }
- # ok, insert any new break point
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-} ## end sub break_equals
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ return;
+ }
-{ ## begin closure recombine_breakpoints
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
- # This routine is called once per batch to see if it would be better
- # to combine some of the lines into which the batch has been broken.
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
+ return
+ if (
+ $old_breakpoint_to_go[$iend_1]
- my %is_amp_amp;
- my %is_math_op;
- my %is_plus_minus;
- my %is_mult_div;
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1
+ );
- BEGIN {
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary = (
+ $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
+ && $types_to_go[$ibeg_3] eq ':' )
+ );
- my @q;
- @q = qw( && || );
- @is_amp_amp{@q} = (1) x scalar(@q);
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ return
+ unless (
+ (
- @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
- @q = qw( + - );
- @is_plus_minus{@q} = (1) x scalar(@q);
+ # or three lines, the last with a leading
+ # semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
- @q = qw( * / );
- @is_mult_div{@q} = (1) x scalar(@q);
- }
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- sub Debug_dump_breakpoints {
+ # or the next line ends in an open paren or
+ # brace and the break hasn't been forced
+ # [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
- # Debug routine to dump current breakpoints...not normally called
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $self, $ri_beg, $ri_end, $msg ) = @_;
- print STDERR "----Dumping breakpoints from: $msg----\n";
- for my $n ( 0 .. @{$ri_end} - 1 ) {
- my $ibeg = $ri_beg->[$n];
- my $iend = $ri_end->[$n];
- my $text = EMPTY_STRING;
- foreach my $i ( $ibeg .. $iend ) {
- $text .= $tokens_to_go[$i];
+ # do not recombine if the two lines might align
+ # well this is a very approximate test for this
+ && (
+
+ # RT#127633 - the leading tokens are not
+ # operators
+ ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+
+ # or they are different
+ || ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
+ )
+ );
+
+ if (
+
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
+
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && !(
+ $ibeg_3 > 0
+ && ref( $leading_spaces_to_go[$ibeg_3] )
+ && $type_iend_2 eq ','
+ )
+ )
+ {
+
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last token
+ # in case it is an opening paren.
+ my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
+ return if ( !$ok );
+
+ }
+ }
+
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- print STDERR "$n ($ibeg:$iend) $text\n";
}
- print STDERR "----\n";
- return;
- } ## end sub Debug_dump_breakpoints
- sub delete_one_line_semicolons {
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
- my ( $self, $ri_beg, $ri_end ) = @_;
- my $rLL = $self->[_rLL_];
- my $K_opening_container = $self->[_K_opening_container_];
+ # make major control keywords stand out
+ # (recombine.t)
+ return
+ if (
- # Walk down the lines of this batch and delete any semicolons
- # terminating one-line blocks;
- my $nmax = @{$ri_end} - 1;
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- foreach my $n ( 0 .. $nmax ) {
- my $i_beg = $ri_beg->[$n];
- my $i_e = $ri_end->[$n];
- my $K_beg = $K_to_go[$i_beg];
- my $K_e = $K_to_go[$i_e];
- my $K_end = $K_e;
- my $type_end = $rLL->[$K_end]->[_TYPE_];
- if ( $type_end eq '#' ) {
- $K_end = $self->K_previous_nonblank($K_end);
- if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
+
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ return
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
}
+ }
+ elsif ( $type_iend_1 eq '.' ) {
- # we are looking for a line ending in closing brace
- next
- unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
+ # NOTE: the logic here should match that of section 3 so that
+ # line breaks are independent of choice of break before or after.
+ # It would be nice to combine them in section 0, but the
+ # special junction case ') .' makes that difficult.
+ # This section added to fix issues c172, c174.
+ my $i_next_nonblank = $ibeg_2;
+ my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
+ $summed_lengths_to_go[$ibeg_1];
+ my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
+ $summed_lengths_to_go[$ibeg_2];
+ my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) );
- # ...and preceded by a semicolon on the same line
- my $K_semicolon = $self->K_previous_nonblank($K_end);
- next unless defined($K_semicolon);
- my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
- next if ( $i_semicolon <= $i_beg );
- next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
+ return
+ unless (
- # Safety check - shouldn't happen - not critical
- # This is not worth throwing a Fault, except in DEVEL_MODE
- if ( $types_to_go[$i_semicolon] ne ';' ) {
- DEVEL_MODE
- && Fault("unexpected type looking for semicolon");
- next;
+ # ... unless there is just one and we can reduce
+ # this to two lines if we do. For example, this
+ #
+ #
+ # $bodyA .=
+ # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
+ #
+ # looks better than this:
+ # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' .
+ # '$args .= $pat;'
+
+ # check for 2 lines, not in a long broken '.' chain
+ ( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 )
+
+ # ... or this would strand a short quote , like this
+ # "some long quote" .
+ # "\n";
+ || (
+ $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 2
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length
+
+ # additional constraints to fix c167
+ && ( $types_to_go[$iend_1_minus] ne 'Q'
+ || $summed_len_2 < $summed_len_1 )
+ )
+ );
+ }
+ return ( 1, $skip_Section_3 );
+ } ## end sub recombine_section_2
+
+ sub simple_rhs {
+
+ my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
+
+ # Scan line ibeg_2 to $iend_2 up to last token for complexity.
+ # We are not counting the last token in case it is an opening paren.
+ # Return:
+ # true if rhs is simple, ok to recombine
+ # false otherwise
+
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 1 );
}
+ $depth = $nesting_depth_to_go[$i];
+ }
- # ... with the corresponding opening brace on the same line
- my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
- my $K_opening = $K_opening_container->{$type_sequence};
- next unless ( defined($K_opening) );
- my $i_opening = $i_beg + ( $K_opening - $K_beg );
- next if ( $i_opening < $i_beg );
+ # ok to recombine if no level changes before
+ # last token
+ if ( $tv > 0 ) {
- # ... and only one semicolon between these braces
- my $semicolon_count = 0;
- foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
- if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
- $semicolon_count++;
- last;
+ # otherwise, do not recombine if more than
+ # two level changes.
+ return if ( $tv > 1 );
+
+ # check total complexity of the two
+ # adjacent lines that will occur if we do
+ # this join
+ my $istop =
+ ( $n < $nmax )
+ ? $ri_end->[ $n + 1 ]
+ : $iend_2;
+ foreach my $i ( $iend_2 .. $istop ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
}
+ $depth = $nesting_depth_to_go[$i];
}
- next if ($semicolon_count);
- # ...ok, then make the semicolon invisible
- my $len = $token_lengths_to_go[$i_semicolon];
- $tokens_to_go[$i_semicolon] = EMPTY_STRING;
- $token_lengths_to_go[$i_semicolon] = 0;
- $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
- $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
- foreach ( $i_semicolon .. $max_index_to_go ) {
- $summed_lengths_to_go[ $_ + 1 ] -= $len;
- }
+ # do not recombine if total is more than 2
+ # level changes
+ return if ( $tv > 2 );
}
- return;
- } ## end sub delete_one_line_semicolons
+ return 1;
+ } ## end sub simple_rhs
- use constant DEBUG_RECOMBINE => 0;
+ sub recombine_section_3 {
- sub recombine_breakpoints {
+ my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (right end of first line of pair)
- # sub break_long_lines is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Sometimes small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine.
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # -----Section 3 looks at this token
- # do nothing under extreme stress
- return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
+ # Returns:
+ # (nothing) => do not join lines
+ # 1, bs_tweak => ok to join lines
+
+ # $bstweak is a small tolerance to add to bond strengths
+ my $bs_tweak = 0;
+
+ my $nmax = @{$ri_end} - 1;
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+
+ my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
+ my $ibeg_nmax = $ri_beg->[$nmax];
+
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
+
+ # handle lines with leading &&, ||
+ if ( $is_amp_amp{$type_ibeg_2} ) {
+
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
+
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ # Combine a trailing && term with an || term: fix for
+ # c060 This is rare but can happen.
+ $ok ||= 1
+ if ( $ibeg_3 < 0
+ && $type_ibeg_2 eq '&&'
+ && $type_ibeg_1 eq '||'
+ && $nesting_depth_to_go[$ibeg_2] ==
+ $nesting_depth_to_go[$ibeg_1] );
+
+ return if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ return if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ return unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a
+ # chain. we will just look at a few nearby lines
+ # to see if this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ return unless ( $local_count > 1 );
+ }
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
+ my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
+ $summed_lengths_to_go[$ibeg_1];
+ my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
+ $summed_lengths_to_go[$ibeg_2];
- my $rK_weld_right = $self->[_rK_weld_right_];
- my $rK_weld_left = $self->[_rK_weld_left_];
+ return
+ unless (
- my $nmax_start = @{$ri_end} - 1;
- return if ( $nmax_start <= 0 );
+ # ... unless there is just one and we can reduce
+ # this to two lines if we do. For example, this
+ #
+ #
+ # $bodyA .=
+ # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
+ #
+ # looks better than this:
+ # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
+ # . '$args .= $pat;'
- # Make a list of all good joining tokens between the lines
- # n-1 and n.
- my @joint;
+ ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
- # Break the total batch sub-sections with lengths short enough to
- # recombine
- my $rsections = [];
- my $nbeg_sec = 0;
- my $nend_sec;
- my $nmax_section = 0;
- foreach my $nn ( 1 .. $nmax_start ) {
- my $ibeg_1 = $ri_beg->[ $nn - 1 ];
- my $iend_1 = $ri_end->[ $nn - 1 ];
- my $iend_2 = $ri_end->[$nn];
- my $ibeg_2 = $ri_beg->[$nn];
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
+ || (
+ $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length
+
+ # additional constraints to fix c167
+ && (
+ $types_to_go[$iend_1] ne 'Q'
+
+ # allow a term shorter than the previous term
+ || $summed_len_2 < $summed_len_1
+
+ # or allow a short semicolon-terminated term if this
+ # makes two lines (see c169)
+ || ( $n == 2
+ && $n == $nmax
+ && $this_line_is_semicolon_terminated )
+ )
+ )
+ );
+ }
+
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
+
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ return
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+ $type_ibeg_1 eq '}'
+ || (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ # important: only combine a very simple
+ # or statement because the step below
+ # may have combined a trailing 'and'
+ # with this or, and we do not want to
+ # then combine everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ )
+ );
+
+ #X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless ( $old_breakpoint_to_go[$iend_1] );
+ }
+
+ # handle leading 'and' and 'xor'
+ elsif ($tokens_to_go[$ibeg_2] eq 'and'
+ || $tokens_to_go[$ibeg_2] eq 'xor' )
+ {
+
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
+
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ return
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
- # Define the joint variable
- my ( $itok, $itokp, $itokm );
- foreach my $itest ( $iend_1, $ibeg_2 ) {
- my $type = $types_to_go[$itest];
- if ( $is_math_op{$type}
- || $is_amp_amp{$type}
- || $is_assignment{$type}
- || $type eq ':' )
- {
- $itok = $itest;
- }
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
}
- $joint[$nn] = [$itok];
- # Update the section list
- my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
- if (
- $excess <= 1
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
- # The number 5 here is an arbitrary small number intended
- # to keep most small matches in one sub-section.
- || ( defined($nend_sec)
- && ( $nn < 5 || $nmax_start - $nn < 5 ) )
- )
- {
- $nend_sec = $nn;
+ # Combine something like:
+ # next
+ # if ( $lang !~ /${l}$/i );
+ # into:
+ # next if ( $lang !~ /${l}$/i );
+ return
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
+
+ );
}
+
+ # handle all other leading keywords
else {
- if ( defined($nend_sec) ) {
- push @{$rsections}, [ $nbeg_sec, $nend_sec ];
- my $num = $nend_sec - $nbeg_sec;
- if ( $num > $nmax_section ) { $nmax_section = $num }
- $nbeg_sec = $nn;
- $nend_sec = undef;
+
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ return
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
}
- $nbeg_sec = $nn;
}
}
- if ( defined($nend_sec) ) {
- push @{$rsections}, [ $nbeg_sec, $nend_sec ];
- my $num = $nend_sec - $nbeg_sec;
- if ( $num > $nmax_section ) { $nmax_section = $num }
- }
- my $num_sections = @{$rsections};
+ # similar treatment of && and || as above for 'and' and
+ # 'or': NOTE: This block of code is currently bypassed
+ # because of a previous block but is retained for possible
+ # future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- # This is potentially an O(n-squared) loop, but not critical, so we can
- # put a finite limit on the total number of iterations. This is
- # suggested by issue c118, which pushed about 5.e5 lines through here
- # and caused an excessive run time.
-
- # Three lines of defense have been put in place to prevent excessive
- # run times:
- # 1. do nothing if formatting under stress (c118 was under stress)
- # 2. break into small sub-sections to decrease the maximum n-squared.
- # 3. put a finite limit on the number of iterations.
-
- # Testing shows that most batches only require one or two iterations.
- # A very large batch which is broken into sub-sections can require one
- # iteration per section. This suggests the limit here, which allows
- # up to 10 iterations plus one pass per sub-section.
- my $it_count = 0;
- my $it_count_max =
- 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- if ( DEBUG_RECOMBINE > 1 ) {
- my $max = 0;
- print STDERR
- "-----\n$num_sections sections found for nmax=$nmax_start\n";
- foreach my $sect ( @{$rsections} ) {
- my ( $nbeg, $nend ) = @{$sect};
- my $num = $nend - $nbeg;
- if ( $num > $max ) { $max = $num }
- print STDERR "$nbeg $nend\n";
- }
- print STDERR "max size=$max of $nmax_start lines\n";
+ return
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with an 'if' or 'unless'
+ # keyword
+ && $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ );
}
- # Loop over all sub-sections. Note that we have to work backwards
- # from the end of the batch since the sections use original line
- # numbers, and the line numbers change as we go.
- while ( my $section = pop @{$rsections} ) {
- my ( $nbeg, $nend ) = @{$section};
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ return unless ( $n == 1 || $n == $nmax );
+ return if ( $old_breakpoint_to_go[$iend_1] );
+ return
+ unless (
- # number of ending lines to leave untouched in this pass
- my $nmax_sec = @{$ri_end} - 1;
- my $num_freeze = $nmax_sec - $nend;
+ # unless we can reduce this to two lines
+ $nmax == 2
- my $more_to_do = 1;
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = $nmax_sec + 1;
- my $reverse = 0;
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- while ($more_to_do) {
+ # or this is a short line ending in ;
+ || ( $n == $nmax
+ && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+ return ( 1, $bs_tweak );
+ } ## end sub recombine_section_3
- # Safety check for excess total iterations
- $it_count++;
- if ( $it_count > $it_count_max ) {
- goto RETURN;
- }
+} ## end closure recombine_breakpoints
- my $n_best = 0;
- my $bs_best;
- my $nmax = @{$ri_end} - 1;
+sub insert_final_ternary_breaks {
- # Safety check for infinite loop: the line count must decrease
- unless ( $nmax < $nmax_last ) {
+ my ( $self, $ri_left, $ri_right ) = @_;
- # Shouldn't happen because splice below decreases nmax on
- # each iteration. An error can only be due to a recent
- # programming change. We better stop here.
- if (DEVEL_MODE) {
- Fault(
-"Program bug-infinite loop in recombine breakpoints\n"
- );
- }
- $more_to_do = 0;
+ # Called once per batch to look for and do any final line breaks for
+ # long ternary chains
+
+ my $nmax = @{$ri_right} - 1;
+
+ # scan the left and right end tokens of all lines
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
+
+ # For long ternary chains,
+ # if the first : we see has its ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( defined($i_question) && $i_question > 0 ) {
+ my @insert_list;
+ foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
+
+ # For now, a good break is either a comma or,
+ # in a long chain, a 'return'.
+ # Patch for RT #126633: added the $nmax>1 check to avoid
+ # breaking after a return for a simple ternary. For longer
+ # chains the break after return allows vertical alignment, so
+ # it is still done. So perltidy -wba='?' will not break
+ # immediately after the return in the following statement:
+ # sub x {
+ # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
+ # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
+ # }
+ if (
+ (
+ $type eq ','
+ || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
+ )
+ && $self->in_same_container_i( $ii, $i_question )
+ )
+ {
+ push @insert_list, $ii;
last;
}
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $skip_Section_3;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left,
+ $ri_right );
+ }
+ }
+ }
+ return;
+} ## end sub insert_final_ternary_breaks
- # loop over all remaining lines in this batch
- my $nstop = $nmax - $num_freeze;
- for my $iter ( $nbeg + 1 .. $nstop ) {
+sub insert_breaks_before_list_opening_containers {
- # alternating sweep direction gives symmetric results
- # for recombining lines which exceed the line length
- # such as eval {{{{.... }}}}
- my $n;
- if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
- else { $n = $iter }
+ my ( $self, $ri_left, $ri_right ) = @_;
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # -----line $n-1--- | -----line $n-----
- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
- # ^
- # |
- # We want to decide if we should remove the line break
- # between the tokens at $iend_1 and $ibeg_2
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- #
- # beginning and ending tokens of the lines we are working on
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
- my $ibeg_nmax = $ri_beg->[$nmax];
-
- # combined line cannot be too long
- my $excess =
- $self->excess_line_length( $ibeg_1, $iend_2, 1 );
- next if ( $excess > 0 );
-
- my $type_iend_1 = $types_to_go[$iend_1];
- my $type_iend_2 = $types_to_go[$iend_2];
- my $type_ibeg_1 = $types_to_go[$ibeg_1];
- my $type_ibeg_2 = $types_to_go[$ibeg_2];
-
- # terminal token of line 2 if any side comment is ignored:
- my $iend_2t = $iend_2;
- my $type_iend_2t = $type_iend_2;
-
- # some beginning indexes of other lines, which may not exist
- my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
- my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
- my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
-
- my $bs_tweak = 0;
-
- #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
- # $nesting_depth_to_go[$ibeg_1] );
-
- DEBUG_RECOMBINE > 1 && do {
- print STDERR
-"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
- };
+ # This routine is called once per batch to implement the parameters
+ # --break-before-hash-brace, etc.
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
+ # Nothing to do if none of these parameters has been set
+ return unless %break_before_container_types;
- # a terminal '{' should stay where it is
- # unless preceded by a fat comma
- next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 0 );
- if ( $type_iend_2 eq '#'
- && $iend_2 - $ibeg_2 >= 2
- && $types_to_go[ $iend_2 - 1 ] eq 'b' )
- {
- $iend_2t = $iend_2 - 2;
- $type_iend_2t = $types_to_go[$iend_2t];
- }
+ my $rLL = $self->[_rLL_];
- $this_line_is_semicolon_terminated =
- $type_iend_2t eq ';';
- }
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- #----------------------------------------------------------
- # Recombine Section 0:
- # Examine the special token joining this line pair, if any.
- # Put as many tests in this section to avoid duplicate code
- # and to make formatting independent of whether breaks are
- # to the left or right of an operator.
- #----------------------------------------------------------
+ # scan the ends of all lines
+ my @insert_list;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ next unless ( $ir > $il );
+ my $Kl = $K_to_go[$il];
+ my $Kr = $K_to_go[$ir];
+ my $Kend = $Kr;
+ my $type_end = $rLL->[$Kr]->[_TYPE_];
+
+ # Backup before any side comment
+ if ( $type_end eq '#' ) {
+ $Kend = $self->K_previous_nonblank($Kr);
+ next unless defined($Kend);
+ $type_end = $rLL->[$Kend]->[_TYPE_];
+ }
- my ($itok) = @{ $joint[$n] };
- if ($itok) {
+ # Backup to the start of any weld; fix for b1173.
+ if ($total_weld_count) {
+ my $Kend_test = $rK_weld_left->{$Kend};
+ if ( defined($Kend_test) && $Kend_test > $Kl ) {
+ $Kend = $Kend_test;
+ $Kend_test = $rK_weld_left->{$Kend};
+ }
- my $type = $types_to_go[$itok];
+ # Do not break if we did not back up to the start of a weld
+ # (shouldn't happen)
+ next if ( defined($Kend_test) );
+ }
- if ( $type eq ':' ) {
+ my $token = $rLL->[$Kend]->[_TOKEN_];
+ next unless ( $is_opening_token{$token} );
+ next unless ( $Kl < $Kend - 1 );
- # do not join at a colon unless it disobeys the
- # break request
- if ( $itok eq $iend_1 ) {
- next unless $want_break_before{$type};
- }
- else {
- $leading_amp_count++;
- next if $want_break_before{$type};
- }
- } ## end if ':'
+ my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
+ next unless ( defined($seqno) );
+
+ # Use the flag which was previously set
+ next unless ( $rbreak_before_container_by_seqno->{$seqno} );
- # handle math operators + - * /
- elsif ( $is_math_op{$type} ) {
+ # Install a break before this opening token.
+ my $Kbreak = $self->K_previous_nonblank($Kend);
+ my $ibreak = $Kbreak - $Kl + $il;
+ next if ( $ibreak < $il );
+ next if ( $nobreak_to_go[$ibreak] );
+ push @insert_list, $ibreak;
+ }
- # Combine these lines if this line is a single
- # number, or if it is a short term with same
- # operator as the previous line. For example, in
- # the following code we will combine all of the
- # short terms $A, $B, $C, $D, $E, $F, together
- # instead of leaving them one per line:
- # my $time =
- # $A * $B * $C * $D * $E * $F *
- # ( 2. * $eps * $sigma * $area ) *
- # ( 1. / $tcold**3 - 1. / $thot**3 );
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+} ## end sub insert_breaks_before_list_opening_containers
- # This can be important in math-intensive code.
+sub note_added_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_added_semicolon_at_] = $line_number;
+ if ( $self->[_added_semicolon_count_] == 0 ) {
+ $self->[_first_added_semicolon_at_] = $line_number;
+ }
+ $self->[_added_semicolon_count_]++;
+ write_logfile_entry("Added ';' here\n");
+ return;
+} ## end sub note_added_semicolon
- my $good_combo;
+sub note_deleted_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_deleted_semicolon_at_] = $line_number;
+ if ( $self->[_deleted_semicolon_count_] == 0 ) {
+ $self->[_first_deleted_semicolon_at_] = $line_number;
+ }
+ $self->[_deleted_semicolon_count_]++;
+ write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
+ return;
+} ## end sub note_deleted_semicolon
- my $itokp = min( $inext_to_go[$itok], $iend_2 );
- my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
- my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
- my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+sub note_embedded_tab {
+ my ( $self, $line_number ) = @_;
+ $self->[_embedded_tab_count_]++;
+ $self->[_last_embedded_tab_at_] = $line_number;
+ if ( !$self->[_first_embedded_tab_at_] ) {
+ $self->[_first_embedded_tab_at_] = $line_number;
+ }
- # check for a number on the right
- if ( $types_to_go[$itokp] eq 'n' ) {
+ if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry("Embedded tabs in quote or pattern\n");
+ }
+ return;
+} ## end sub note_embedded_tab
- # ok if nothing else on right
- if ( $itokp == $iend_2 ) {
- $good_combo = 1;
- }
- else {
-
- # look one more token to right..
- # okay if math operator or some termination
- $good_combo =
- ( ( $itokpp == $iend_2 )
- && $is_math_op{ $types_to_go[$itokpp]
- } )
- || $types_to_go[$itokpp] =~ /^[#,;]$/;
- }
- }
+use constant DEBUG_CORRECT_LP => 0;
- # check for a number on the left
- if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+sub correct_lp_indentation {
- # okay if nothing else to left
- if ( $itokm == $ibeg_1 ) {
- $good_combo = 1;
- }
+ # When the -lp option is used, we need to make a last pass through
+ # each line to correct the indentation positions in case they differ
+ # from the predictions. This is necessary because perltidy uses a
+ # predictor/corrector method for aligning with opening parens. The
+ # predictor is usually good, but sometimes stumbles. The corrector
+ # tries to patch things up once the actual opening paren locations
+ # are known.
+ my ( $self, $ri_first, $ri_last ) = @_;
- # otherwise look one more token to left
- else {
+ # first remove continuation indentation if appropriate
+ my $max_line = @{$ri_first} - 1;
- # okay if math operator, comma, or assignment
- $good_combo = ( $itokmm == $ibeg_1 )
- && ( $is_math_op{ $types_to_go[$itokmm] }
- || $types_to_go[$itokmm] =~ /^[,]$/
- || $is_assignment{ $types_to_go[$itokmm]
- } );
- }
- }
+ #---------------------------------------------------------------------------
+ # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
+ #---------------------------------------------------------------------------
- # look for a single short token either side of the
- # operator
- if ( !$good_combo ) {
+ # The point is that sub 'starting_one_line_block' made one-line blocks based
+ # on default indentation, not -lp indentation. So some of the one-line
+ # blocks may be too long when given -lp indentation. We will fix that now
+ # if possible, using the list of these closing block indexes.
+ my $ri_starting_one_line_block =
+ $self->[_this_batch_]->[_ri_starting_one_line_block_];
+ if ( @{$ri_starting_one_line_block} ) {
+ $self->correct_lp_indentation_pass_1( $ri_first, $ri_last,
+ $ri_starting_one_line_block );
+ }
- # Slight adjustment factor to make results
- # independent of break before or after operator
- # in long summed lists. (An operator and a
- # space make two spaces).
- my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+ #-------------------------------------------------------------------
+ # PASS 2: look for and fix other problems in each line of this batch
+ #-------------------------------------------------------------------
- $good_combo =
+ # look at each output line ...
+ foreach my $line ( 0 .. $max_line ) {
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
- # numbers or id's on both sides of this joint
- $types_to_go[$itokp] =~ /^[in]$/
- && $types_to_go[$itokm] =~ /^[in]$/
+ # looking at each token in this output line ...
+ foreach my $i ( $ibeg .. $iend ) {
- # one of the two lines must be short:
- && (
- (
- # no more than 2 nonblank tokens right
- # of joint
- $itokpp == $iend_2
-
- # short
- && token_sequence_length(
- $itokp, $iend_2
- ) < $two +
- $rOpts_short_concatenation_item_length
- )
- || (
- # no more than 2 nonblank tokens left of
- # joint
- $itokmm == $ibeg_1
-
- # short
- && token_sequence_length(
- $ibeg_1, $itokm
- ) < 2 - $two +
- $rOpts_short_concatenation_item_length
- )
+ # How many space characters to place before this token
+ # for special alignment. Actual padding is done in the
+ # continue block.
- )
+ # looking for next unvisited indentation item ...
+ my $indentation = $leading_spaces_to_go[$i];
- # keep pure terms; don't mix +- with */
- && !(
- $is_plus_minus{$type}
- && ( $is_mult_div{ $types_to_go[$itokmm] }
- || $is_mult_div{ $types_to_go[$itokpp] }
- )
- )
- && !(
- $is_mult_div{$type}
- && ( $is_plus_minus{ $types_to_go[$itokmm] }
- || $is_plus_minus{ $types_to_go[$itokpp]
- } )
- )
+ # This is just for indentation objects (c098)
+ next unless ( ref($indentation) );
- ;
- }
+ # Visit each indentation object just once
+ next if ( $indentation->get_marked() );
- # it is also good to combine if we can reduce to 2
- # lines
- if ( !$good_combo ) {
+ # Mark first visit
+ $indentation->set_marked(1);
- # index on other line where same token would be
- # in a long chain.
- my $iother =
- ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+ # Skip indentation objects which do not align with container tokens
+ my $align_seqno = $indentation->get_align_seqno();
+ next unless ($align_seqno);
- $good_combo =
- $n == 2
- && $n == $nmax
- && $types_to_go[$iother] ne $type;
- }
+ # Skip a container which is entirely on this line
+ my $Ko = $self->[_K_opening_container_]->{$align_seqno};
+ my $Kc = $self->[_K_closing_container_]->{$align_seqno};
+ if ( defined($Ko) && defined($Kc) ) {
+ next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
+ }
- next unless ($good_combo);
+ # Note on flag '$do_not_pad':
+ # We want to avoid a situation like this, where the aligner
+ # inserts whitespace before the '=' to align it with a previous
+ # '=', because otherwise the parens might become mis-aligned in a
+ # situation like this, where the '=' has become aligned with the
+ # previous line, pushing the opening '(' forward beyond where we
+ # want it.
+ #
+ # $mkFloor::currentRoom = '';
+ # $mkFloor::c_entry = $c->Entry(
+ # -width => '10',
+ # -relief => 'sunken',
+ # ...
+ # );
+ #
+ # We leave it to the aligner to decide how to do this.
+ if ( $line == 1 && $i == $ibeg ) {
+ $self->[_this_batch_]->[_do_not_pad_] = 1;
+ }
- } ## end math
+ #--------------------------------------------
+ # Now see what the error is and try to fix it
+ #--------------------------------------------
+ my $closing_index = $indentation->get_closed();
+ my $predicted_pos = $indentation->get_spaces();
- elsif ( $is_amp_amp{$type} ) {
- ##TBD
- } ## end &&, ||
+ # Find actual position:
+ my $actual_pos;
- elsif ( $is_assignment{$type} ) {
- ##TBD
- } ## end assignment
- }
+ if ( $i == $ibeg ) {
- #----------------------------------------------------------
- # Recombine Section 1:
- # Join welded nested containers immediately
- #----------------------------------------------------------
+ # Case 1: token is first character of of batch - table lookup
+ if ( $line == 0 ) {
- if (
- $total_weld_count
- && ( $type_sequence_to_go[$iend_1]
- && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
- || $type_sequence_to_go[$ibeg_2]
- && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
- )
- {
- $n_best = $n;
- last;
- }
+ $actual_pos = $predicted_pos;
- $reverse = 0;
+ my ( $indent, $offset, $is_leading, $exists ) =
+ get_saved_opening_indentation($align_seqno);
+ if ( defined($indent) ) {
- #----------------------------------------------------------
- # Recombine Section 2:
- # Examine token at $iend_1 (right end of first line of pair)
- #----------------------------------------------------------
+ # NOTE: we could use '1' here if no space after
+ # opening and '2' if want space; it is hardwired at 1
+ # like -gnu-style. But it is probably best to leave
+ # this alone because changing it would change
+ # formatting of much existing code without any
+ # significant benefit.
+ $actual_pos = get_spaces($indent) + $offset + 1;
+ }
+ }
- # an isolated '}' may join with a ';' terminated segment
- if ( $type_iend_1 eq '}' ) {
-
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
- #
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
- #
- # which makes the parens line up.
- #
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
- #
- # But we do not want to do this for something like the -lp
- # option where the paren is not outdentable because the
- # trailing clause will be far to the right.
- #
- # The logic here is synchronized with the logic in sub
- # sub final_indentation_adjustment, which actually does
- # the outdenting.
- #
- $skip_Section_3 ||= $this_line_is_semicolon_terminated
-
- # only one token on last line
- && $ibeg_1 == $iend_1
-
- # must be structural paren
- && $tokens_to_go[$iend_1] eq ')'
-
- # style must allow outdenting,
- && !$closing_token_indentation{')'}
-
- # only leading '&&', '||', and ':' if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with a
- # previous colon or question (count could be wrong).
- && $type_ibeg_2 ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$iend_1] ==
- $nesting_depth_to_go[$iend_2] + 1 );
-
- # YVES patch 2 of 2:
- # Allow cuddled eval chains, like this:
- # eval {
- # #STUFF;
- # 1; # return true
- # } or do {
- # #handle error
- # };
- # This patch works together with a patch in
- # setting adjusted indentation (where the closing eval
- # brace is outdented if possible).
- # The problem is that an 'eval' block has continuation
- # indentation and it looks better to undo it in some
- # cases. If we do not use this patch we would get:
- # eval {
- # #STUFF;
- # 1; # return true
- # }
- # or do {
- # #handle error
- # };
- # The alternative, for uncuddled style, is to create
- # a patch in final_indentation_adjustment which undoes
- # the indentation of a leading line like 'or do {'.
- # This doesn't work well with -icb through
- if (
- $block_type_to_go[$iend_1] eq 'eval'
- && !ref( $leading_spaces_to_go[$iend_1] )
- && !$rOpts_indent_closing_brace
- && $tokens_to_go[$iend_2] eq '{'
- && (
- ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
- || ( $type_ibeg_2 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_2] } )
- || $is_if_unless{ $tokens_to_go[$ibeg_2] }
- )
- )
- {
- $skip_Section_3 ||= 1;
- }
+ # Case 2: token starts a new line - use length of previous line
+ else {
- next
- unless (
- $skip_Section_3
+ my $ibegm = $ri_first->[ $line - 1 ];
+ my $iendm = $ri_last->[ $line - 1 ];
+ $actual_pos = total_line_length( $ibegm, $iendm );
- # handle '.' and '?' specially below
- || ( $type_ibeg_2 =~ /^[\.\?]$/ )
+ # follow -pt style
+ ++$actual_pos
+ if ( $types_to_go[ $iendm + 1 ] eq 'b' );
- # fix for c054 (unusual -pbp case)
- || $type_ibeg_2 eq '=='
+ }
+ }
- );
- }
+ # Case 3: $i>$ibeg: token is mid-line - use length to previous token
+ else {
- elsif ( $type_iend_1 eq '{' ) {
+ $actual_pos = total_line_length( $ibeg, $i - 1 );
- # YVES
- # honor breaks at opening brace
- # Added to prevent recombining something like this:
- # } || eval { package main;
- next if $forced_breakpoint_to_go[$iend_1];
+ # for mid-line token, we must check to see if all
+ # additional lines have continuation indentation,
+ # and remove it if so. Otherwise, we do not get
+ # good alignment.
+ if ( $closing_index > $iend ) {
+ my $ibeg_next = $ri_first->[ $line + 1 ];
+ if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
+ $self->undo_lp_ci( $line, $i, $closing_index,
+ $ri_first, $ri_last );
}
+ }
+ }
- # do not recombine lines with ending &&, ||,
- elsif ( $is_amp_amp{$type_iend_1} ) {
- next unless $want_break_before{$type_iend_1};
- }
+ # By how many spaces (plus or minus) would we need to increase the
+ # indentation to get alignment with the opening token?
+ my $move_right = $actual_pos - $predicted_pos;
- # Identify and recombine a broken ?/: chain
- elsif ( $type_iend_1 eq '?' ) {
+ if (DEBUG_CORRECT_LP) {
+ my $tok = substr( $tokens_to_go[$i], 0, 8 );
+ my $avail = $self->get_available_spaces_to_go($ibeg);
+ print
+"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
+ }
- # Do not recombine different levels
- next
- if (
- $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+ # nothing more to do if no error to correct (gnu2.t)
+ if ( $move_right == 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
- # do not recombine unless next line ends in :
- next unless $type_iend_2 eq ':';
- }
+ # Get any collapsed length defined for -xlp
+ my $collapsed_length =
+ $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
+ $collapsed_length = 0 unless ( defined($collapsed_length) );
- # for lines ending in a comma...
- elsif ( $type_iend_1 eq ',' ) {
+ if (DEBUG_CORRECT_LP) {
+ print
+"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
+ }
- # Do not recombine at comma which is following the
- # input bias.
- # TODO: might be best to make a special flag
- next if ( $old_breakpoint_to_go[$iend_1] );
+ # if we have not seen closure for this indentation in this batch,
+ # and do not have a collapsed length estimate, we can only pass on
+ # a request to the vertical aligner
+ if ( $closing_index < 0 && !$collapsed_length ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
- # An isolated '},' may join with an identifier + ';'
- # This is useful for the class of a 'bless' statement
- # (bless.t)
- if ( $type_ibeg_1 eq '}'
- && $type_ibeg_2 eq 'i' )
- {
- next
- unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
- && ( $iend_2 == ( $ibeg_2 + 1 ) )
- && $this_line_is_semicolon_terminated );
+ # If necessary, look ahead to see if there is really any leading
+ # whitespace dependent on this whitespace, and also find the
+ # longest line using this whitespace. Since it is always safe to
+ # move left if there are no dependents, we only need to do this if
+ # we may have dependent nodes or need to move right.
- # override breakpoint
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ my $have_child = $indentation->get_have_child();
+ my %saw_indentation;
+ my $line_count = 1;
+ $saw_indentation{$indentation} = $indentation;
- # but otherwise ..
- else {
+ # How far can we move right before we hit the limit?
+ # let $right_margen = the number of spaces that we can increase
+ # the current indentation before hitting the maximum line length.
+ my $right_margin = 0;
- # do not recombine after a comma unless this will
- # leave just 1 more line
- next unless ( $n + 1 >= $nmax );
+ if ( $have_child || $move_right > 0 ) {
+ $have_child = 0;
- # do not recombine if there is a change in
- # indentation depth
- next
- if ( $levels_to_go[$iend_1] !=
- $levels_to_go[$iend_2] );
-
- # do not recombine a "complex expression" after a
- # comma. "complex" means no parens.
- my $saw_paren;
- foreach my $ii ( $ibeg_2 .. $iend_2 ) {
- if ( $tokens_to_go[$ii] eq '(' ) {
- $saw_paren = 1;
- last;
- }
- }
- next if $saw_paren;
- }
- }
+ # include estimated collapsed length for incomplete containers
+ my $max_length = 0;
+ if ( $Kc > $K_to_go[$max_index_to_go] ) {
+ $max_length = $collapsed_length + $predicted_pos;
+ }
- # opening paren..
- elsif ( $type_iend_1 eq '(' ) {
+ if ( $i == $ibeg ) {
+ my $length = total_line_length( $ibeg, $iend );
+ if ( $length > $max_length ) { $max_length = $length }
+ }
- # No longer doing this
- }
+ # look ahead at the rest of the lines of this batch..
+ foreach my $line_t ( $line + 1 .. $max_line ) {
+ my $ibeg_t = $ri_first->[$line_t];
+ my $iend_t = $ri_last->[$line_t];
+ last if ( $closing_index <= $ibeg_t );
- elsif ( $type_iend_1 eq ')' ) {
+ # remember all different indentation objects
+ my $indentation_t = $leading_spaces_to_go[$ibeg_t];
+ $saw_indentation{$indentation_t} = $indentation_t;
+ $line_count++;
- # No longer doing this
+ # remember longest line in the group
+ my $length_t = total_line_length( $ibeg_t, $iend_t );
+ if ( $length_t > $max_length ) {
+ $max_length = $length_t;
}
+ }
- # keep a terminal for-semicolon
- elsif ( $type_iend_1 eq 'f' ) {
- next;
- }
+ $right_margin =
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
+ $max_length;
+ if ( $right_margin < 0 ) { $right_margin = 0 }
+ }
- # if '=' at end of line ...
- elsif ( $is_assignment{$type_iend_1} ) {
+ my $first_line_comma_count =
+ grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
+ my $comma_count = $indentation->get_comma_count();
+ my $arrow_count = $indentation->get_arrow_count();
- # keep break after = if it was in input stream
- # this helps prevent 'blinkers'
- next
- if (
- $old_breakpoint_to_go[$iend_1]
+ # This is a simple approximate test for vertical alignment:
+ # if we broke just after an opening paren, brace, bracket,
+ # and there are 2 or more commas in the first line,
+ # and there are no '=>'s,
+ # then we are probably vertically aligned. We could set
+ # an exact flag in sub break_lists, but this is good
+ # enough.
+ my $indentation_count = keys %saw_indentation;
+ my $is_vertically_aligned =
+ ( $i == $ibeg
+ && $first_line_comma_count > 1
+ && $indentation_count == 1
+ && ( $arrow_count == 0 || $arrow_count == $line_count ) );
- # don't strand an isolated '='
- && $iend_1 != $ibeg_1
- );
+ # Make the move if possible ..
+ if (
- my $is_short_quote =
- ( $type_ibeg_2 eq 'Q'
- && $ibeg_2 == $iend_2
- && token_sequence_length( $ibeg_2, $ibeg_2 ) <
- $rOpts_short_concatenation_item_length );
- my $is_ternary = (
- $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
- && $types_to_go[$ibeg_3] eq ':' )
- );
+ # we can always move left
+ $move_right < 0
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $ibeg_1 != $iend_1
- && !$is_short_quote
- && !$is_ternary )
- {
- next
- unless (
- (
+ # -xlp
- # unless we can reduce this to two lines
- $nmax < $n + 2
+ # incomplete container
+ || ( $rOpts_extended_line_up_parentheses
+ && $Kc > $K_to_go[$max_index_to_go] )
+ || $closing_index < 0
- # or three lines, the last with a leading
- # semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ibeg_nmax] eq ';' )
+ # but we should only move right if we are sure it will
+ # not spoil vertical alignment
+ || ( $comma_count == 0 )
+ || ( $comma_count > 0 && !$is_vertically_aligned )
+ )
+ {
+ my $move =
+ ( $move_right <= $right_margin )
+ ? $move_right
+ : $right_margin;
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ if (DEBUG_CORRECT_LP) {
+ print
+ "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
+ }
- # or the next line ends in an open paren or
- # brace and the break hasn't been forced
- # [dima.t]
- || ( !$forced_breakpoint_to_go[$iend_1]
- && $type_iend_2 eq '{' )
- )
+ foreach ( keys %saw_indentation ) {
+ $saw_indentation{$_}
+ ->permanently_decrease_available_spaces( -$move );
+ }
+ }
- # do not recombine if the two lines might align
- # well this is a very approximate test for this
- && (
+ # Otherwise, record what we want and the vertical aligner
+ # will try to recover it.
+ else {
+ $indentation->set_recoverable_spaces($move_right);
+ }
+ } ## end loop over tokens in a line
+ } ## end loop over lines
+ return;
+} ## end sub correct_lp_indentation
- # RT#127633 - the leading tokens are not
- # operators
- ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+sub correct_lp_indentation_pass_1 {
+ my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_;
- # or they are different
- || ( $ibeg_3 >= 0
- && $type_ibeg_2 ne
- $types_to_go[$ibeg_3] )
- )
- );
+ # So some of the one-line blocks may be too long when given -lp
+ # indentation. We will fix that now if possible, using the list of these
+ # closing block indexes.
- if (
+ my @ilist = @{$ri_starting_one_line_block};
+ return unless (@ilist);
- # Recombine if we can make two lines
- $nmax >= $n + 2
-
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have
- # ending comma
- && !(
- $ibeg_3 > 0
- && ref( $leading_spaces_to_go[$ibeg_3] )
- && $type_iend_2 eq ','
- )
- )
- {
+ my $max_line = @{$ri_first} - 1;
+ my $inext = shift(@ilist);
- # otherwise, scan the rhs line up to last token
- # for complexity. Note that we are not
- # counting the last token in case it is an
- # opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$ibeg_2];
- foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # loop over lines, checking length of each with a one-line block
+ my ( $ibeg, $iend );
+ foreach my $line ( 0 .. $max_line ) {
+ $iend = $ri_last->[$line];
+ next if ( $inext > $iend );
+ $ibeg = $ri_first->[$line];
- # ok to recombine if no level changes before
- # last token
- if ( $tv > 0 ) {
-
- # otherwise, do not recombine if more than
- # two level changes.
- next if ( $tv > 1 );
-
- # check total complexity of the two
- # adjacent lines that will occur if we do
- # this join
- my $istop =
- ( $n < $nmax )
- ? $ri_end->[ $n + 1 ]
- : $iend_2;
- foreach my $i ( $iend_2 .. $istop ) {
- if (
- $nesting_depth_to_go[$i] != $depth )
- {
- $tv++;
- last if ( $tv > 2 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
-
- # do not recombine if total is more than 2
- # level changes
- next if ( $tv > 2 );
- }
- }
- }
+ # This is just for lines with indentation objects (c098)
+ my $excess =
+ ref( $leading_spaces_to_go[$ibeg] )
+ ? $self->excess_line_length( $ibeg, $iend )
+ : 0;
- unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
- }
+ if ( $excess > 0 ) {
+ my $available_spaces = $self->get_available_spaces_to_go($ibeg);
- # for keywords..
- elsif ( $type_iend_1 eq 'k' ) {
+ if ( $available_spaces > 0 ) {
+ my $delete_want = min( $available_spaces, $excess );
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $ibeg, $delete_want );
+ $available_spaces = $self->get_available_spaces_to_go($ibeg);
+ }
+ }
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ # skip forward to next one-line block to check
+ while (@ilist) {
+ $inext = shift @ilist;
+ next if ( $inext <= $iend );
+ last if ( $inext > $iend );
+ }
+ last if ( $inext <= $iend );
+ }
+ return;
+} ## end sub correct_lp_indentation_pass_1
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
+sub undo_lp_ci {
- # but only if followed by multiple lines
- && $n < $nmax
- );
+ # If there is a single, long parameter within parens, like this:
+ #
+ # $self->command( "/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?" );
+ #
+ # we can remove the continuation indentation of the 2nd and higher lines
+ # to achieve this effect, which is more pleasing:
+ #
+ # $self->command("/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?");
- if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
- next
- unless $want_break_before{ $tokens_to_go[$iend_1]
- };
- }
- }
+ my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
+ @_;
+ my $max_line = @{$ri_first} - 1;
- #----------------------------------------------------------
- # Recombine Section 3:
- # Examine token at $ibeg_2 (left end of second line of pair)
- #----------------------------------------------------------
+ # must be multiple lines
+ return unless $max_line > $line_open;
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- # Note that we are skipping the rest of this section
- # and the rest of the loop to do the join
- if ($skip_Section_3) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- $n_best = $n;
- last;
- }
+ my $lev_start = $levels_to_go[$i_start];
+ my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
- # handle lines with leading &&, ||
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ # see if all additional lines in this container have continuation
+ # indentation
+ my $line_1 = 1 + $line_open;
+ my $n = $line_open;
- $leading_amp_count++;
+ while ( ++$n <= $max_line ) {
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ if ( $ibeg eq $closing_index ) { $n--; last }
+ return if ( $lev_start != $levels_to_go[$ibeg] );
+ return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
+ last if ( $closing_index <= $iend );
+ }
- # ok to recombine if it follows a ? or :
- # and is followed by an open paren..
- my $ok =
- ( $is_ternary{$type_ibeg_1}
- && $tokens_to_go[$iend_2] eq '(' )
+ # we can reduce the indentation of all continuation lines
+ my $continuation_line_count = $n - $line_open;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
+ return;
+} ## end sub undo_lp_ci
- # or is followed by a ? or : at same depth
- #
- # We are looking for something like this. We can
- # recombine the && line with the line above to make the
- # structure more clear:
- # return
- # exists $G->{Attr}->{V}
- # && exists $G->{Attr}->{V}->{$u}
- # ? %{ $G->{Attr}->{V}->{$u} }
- # : ();
- #
- # We should probably leave something like this alone:
- # return
- # exists $G->{Attr}->{E}
- # && exists $G->{Attr}->{E}->{$u}
- # && exists $G->{Attr}->{E}->{$u}->{$v}
- # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
- # : ();
- # so that we either have all of the &&'s (or ||'s)
- # on one line, as in the first example, or break at
- # each one as in the second example. However, it
- # sometimes makes things worse to check for this because
- # it prevents multiple recombinations. So this is not done.
- || ( $ibeg_3 >= 0
- && $is_ternary{ $types_to_go[$ibeg_3] }
- && $nesting_depth_to_go[$ibeg_3] ==
- $nesting_depth_to_go[$ibeg_2] );
-
- # Combine a trailing && term with an || term: fix for
- # c060 This is rare but can happen.
- $ok ||= 1
- if ( $ibeg_3 < 0
- && $type_ibeg_2 eq '&&'
- && $type_ibeg_1 eq '||'
- && $nesting_depth_to_go[$ibeg_2] ==
- $nesting_depth_to_go[$ibeg_1] );
-
- next if !$ok && $want_break_before{$type_ibeg_2};
- $forced_breakpoint_to_go[$iend_1] = 0;
-
- # tweak the bond strength to give this joint priority
- # over ? and :
- $bs_tweak = 0.25;
- }
+###############################################
+# CODE SECTION 10: Code to break long statments
+###############################################
- # Identify and recombine a broken ?/: chain
- elsif ( $type_ibeg_2 eq '?' ) {
-
- # Do not recombine different levels
- my $lev = $levels_to_go[$ibeg_2];
- next if ( $lev ne $levels_to_go[$ibeg_1] );
-
- # Do not recombine a '?' if either next line or
- # previous line does not start with a ':'. The reasons
- # are that (1) no alignment of the ? will be possible
- # and (2) the expression is somewhat complex, so the
- # '?' is harder to see in the interior of the line.
- my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
- my $precedes_colon =
- $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
- next unless ( $follows_colon || $precedes_colon );
-
- # we will always combining a ? line following a : line
- if ( !$follows_colon ) {
-
- # ...otherwise recombine only if it looks like a
- # chain. we will just look at a few nearby lines
- # to see if this looks like a chain.
- my $local_count = 0;
- foreach
- my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
- {
- $local_count++
- if $ii >= 0
- && $types_to_go[$ii] eq ':'
- && $levels_to_go[$ii] == $lev;
- }
- next unless ( $local_count > 1 );
- }
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+use constant DEBUG_BREAK_LINES => 0;
- # do not recombine lines with leading '.'
- elsif ( $type_ibeg_2 eq '.' ) {
- my $i_next_nonblank =
- min( $inext_to_go[$ibeg_2], $iend_2 );
- next
- unless (
-
- # ... unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- #
- #
- # $bodyA .=
- # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
- #
- # looks better than this:
- # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
- # . '$args .= $pat;'
-
- (
- $n == 2
- && $n == $nmax
- && $type_ibeg_1 ne $type_ibeg_2
- )
+sub break_long_lines {
- # ... or this would strand a short quote , like this
- # . "some long quote"
- # . "\n";
+ #-----------------------------------------------------------
+ # Break a batch of tokens into lines which do not exceed the
+ # maximum line length.
+ #-----------------------------------------------------------
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $iend_2 - 1
- && $token_lengths_to_go[$i_next_nonblank] <
- $rOpts_short_concatenation_item_length )
- );
- }
+ my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
- # handle leading keyword..
- elsif ( $type_ibeg_2 eq 'k' ) {
+ # Input parameters:
+ # $saw_good_break - a flag set by break_lists
+ # $rcolon_list - ref to a list of all the ? and : tokens in the batch,
+ # in order.
+ # $rbond_strength_bias - small bond strength bias values set by break_lists
- # handle leading "or"
- if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
- $type_ibeg_1 eq '}'
- || (
-
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1]
- }
-
- # important: only combine a very simple
- # or statement because the step below
- # may have combined a trailing 'and'
- # with this or, and we do not want to
- # then combine everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
- )
- )
- );
+ # Output: returns references to the arrays:
+ # @i_first
+ # @i_last
+ # which contain the indexes $i of the first and last tokens on each
+ # line.
- #X: RT #81854
- $forced_breakpoint_to_go[$iend_1] = 0
- unless ( $old_breakpoint_to_go[$iend_1] );
- }
+ # In addition, the array:
+ # $forced_breakpoint_to_go[$i]
+ # may be updated to be =1 for any index $i after which there must be
+ # a break. This signals later routines not to undo the breakpoint.
- # handle leading 'and' and 'xor'
- elsif ($tokens_to_go[$ibeg_2] eq 'and'
- || $tokens_to_go[$ibeg_2] eq 'xor' )
- {
+ # Method:
+ # This routine is called if a statement is longer than the maximum line
+ # length, or if a preliminary scanning located desirable break points.
+ # Sub break_lists has already looked at these tokens and set breakpoints
+ # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
+ # example after commas, after opening parens, and before closing parens).
+ # This routine will honor these breakpoints and also add additional
+ # breakpoints as necessary to keep the line length below the maximum
+ # requested. It bases its decision on where the 'bond strength' is
+ # lowest.
- # Decide if we will combine a single terminal 'and'
- # after an 'if' or 'unless'.
-
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ my @i_first = (); # the first index to output
+ my @i_last = (); # the last index to output
+ my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
+ if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
- || $tokens_to_go[$ibeg_1] eq 'or' )
- )
- );
- }
+ # Get the 'bond strengths' between tokens
+ my $rbond_strength_to_go = $self->set_bond_strengths();
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+ # Add any comma bias set by break_lists
+ if ( @{$rbond_strength_bias} ) {
+ foreach my $item ( @{$rbond_strength_bias} ) {
+ my ( $ii, $bias ) = @{$item};
+ if ( $ii >= 0 && $ii <= $max_index_to_go ) {
+ $rbond_strength_to_go->[$ii] += $bias;
+ }
+ elsif (DEVEL_MODE) {
+ my $KK = $K_to_go[0];
+ my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
+ Fault(
+"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
+ );
+ }
+ }
+ }
- # Combine something like:
- # next
- # if ( $lang !~ /${l}$/i );
- # into:
- # next if ( $lang !~ /${l}$/i );
- next
- unless (
- $this_line_is_semicolon_terminated
+ my $imin = 0;
+ my $imax = $max_index_to_go;
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- # previous line begins with 'and' or 'or'
- && $type_ibeg_1 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_1] }
+ my $i_begin = $imin;
+ my $last_break_strength = NO_BREAK;
+ my $i_last_break = -1;
+ my $line_count = 0;
- );
- }
+ # see if any ?/:'s are in order
+ my $colons_in_order = 1;
+ my $last_tok = EMPTY_STRING;
+ foreach ( @{$rcolon_list} ) {
+ if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
+ $last_tok = $_;
+ }
- # handle all other leading keywords
- else {
+ # This is a sufficient but not necessary condition for colon chain
+ my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{$type_iend_1} ) {
- next
- if ( ( $type_iend_1 ne 'k' )
- && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
- }
- }
- }
+ #------------------------------------------
+ # BEGINNING of main loop to set breakpoints
+ # Keep iterating until we reach the end
+ #------------------------------------------
+ while ( $i_begin <= $imax ) {
- # similar treatment of && and || as above for 'and' and
- # 'or': NOTE: This block of code is currently bypassed
- # because of a previous block but is retained for possible
- # future use.
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ #------------------------------------------------------------------
+ # Find the best next breakpoint based on token-token bond strengths
+ #------------------------------------------------------------------
+ my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) =
+ $self->break_lines_inner_loop(
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ $i_begin,
+ $i_last_break,
+ $imax,
+ $last_break_strength,
+ $line_count,
+ $rbond_strength_to_go,
+ $saw_good_break,
- next
- unless (
- $this_line_is_semicolon_terminated
+ );
- # previous line begins with an 'if' or 'unless'
- # keyword
- && $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ # Now make any adjustments required by ternary breakpoint rules
+ if ( @{$rcolon_list} ) {
- );
- }
+ my $i_next_nonblank = $inext_to_go[$i_lowest];
- # handle line with leading = or similar
- elsif ( $is_assignment{$type_ibeg_2} ) {
- next unless ( $n == 1 || $n == $nmax );
- next if ( $old_breakpoint_to_go[$iend_1] );
- next
- unless (
+ #-------------------------------------------------------
+ # ?/: rule 1 : if a break here will separate a '?' on this
+ # line from its closing ':', then break at the '?' instead.
+ # But do not break a sequential chain of ?/: statements
+ #-------------------------------------------------------
+ if ( !$is_colon_chain ) {
+ foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
+ next unless ( $tokens_to_go[$i] eq '?' );
- # unless we can reduce this to two lines
- $nmax == 2
+ # do not break if statement is broken by side comment
+ next
+ if ( $tokens_to_go[$max_index_to_go] eq '#'
+ && terminal_type_i( 0, $max_index_to_go ) !~
+ /^[\;\}]$/ );
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+ # no break needed if matching : is also on the line
+ next
+ if ( defined( $mate_index_to_go[$i] )
+ && $mate_index_to_go[$i] <= $i_next_nonblank );
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ $i_lowest = $i;
+ if ( $want_break_before{'?'} ) { $i_lowest-- }
+ $i_next_nonblank = $inext_to_go[$i_lowest];
+ last;
+ }
+ }
- # or this is a short line ending in ;
- || ( $n == $nmax
- && $this_line_is_semicolon_terminated )
- );
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- #----------------------------------------------------------
- # Recombine Section 4:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ #-------------------------------------------------------------
+ # ?/: rule 2 : if we break at a '?', then break at its ':'
+ #
+ # Note: this rule is also in sub break_lists to handle a break
+ # at the start and end of a line (in case breaks are dictated
+ # by side comments).
+ #-------------------------------------------------------------
+ if ( $next_nonblank_type eq '?' ) {
+ $self->set_closing_breakpoint($i_next_nonblank);
+ }
+ elsif ( $types_to_go[$i_lowest] eq '?' ) {
+ $self->set_closing_breakpoint($i_lowest);
+ }
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+ #--------------------------------------------------------
+ # ?/: rule 3 : if we break at a ':' then we save
+ # its location for further work below. We may need to go
+ # back and break at its '?'.
+ #--------------------------------------------------------
+ if ( $next_nonblank_type eq ':' ) {
+ push @i_colon_breaks, $i_next_nonblank;
+ }
+ elsif ( $types_to_go[$i_lowest] eq ':' ) {
+ push @i_colon_breaks, $i_lowest;
+ }
- my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
+ # here we should set breaks for all '?'/':' pairs which are
+ # separated by this line
+ }
- # Require a few extra spaces before recombining lines if we are
- # at an old breakpoint unless this is a simple list or terminal
- # line. The goal is to avoid oscillating between two
- # quasi-stable end states. For example this snippet caused
- # problems:
-## my $this =
-## bless {
-## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
-## },
-## $type;
- next
- if ( $old_breakpoint_to_go[$iend_1]
- && !$this_line_is_semicolon_terminated
- && $n < $nmax
- && $excess + 4 > 0
- && $type_iend_2 ne ',' );
-
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $ri_beg->[ $n + 1 ];
- next
- if (
- $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
- && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
-
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $iend_1 - $ibeg_1 <= 2
- && $type_ibeg_1 eq 'k'
- && $tokens_to_go[$ibeg_1] eq 'if'
- && $tokens_to_go[$iend_1] ne '('
- )
- );
- }
+ # guard against infinite loop (should never happen)
+ if ( $i_lowest <= $i_last_break ) {
+ DEVEL_MODE
+ && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
+ $i_lowest = $imax;
+ }
- # honor no-break's
- ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
+ DEBUG_BREAK_LINES
+ && print STDOUT
+"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ $line_count++;
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- }
- }
+ # save this line segment, after trimming blanks at the ends
+ push( @i_first,
+ ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
+ push( @i_last,
+ ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @{$ri_beg}, $n_best, 1;
- splice @{$ri_end}, $n_best - 1, 1;
- splice @joint, $n_best, 1;
+ # set a forced breakpoint at a container opening, if necessary, to
+ # signal a break at a closing container. Excepting '(' for now.
+ if (
+ (
+ $tokens_to_go[$i_lowest] eq '{'
+ || $tokens_to_go[$i_lowest] eq '['
+ )
+ && !$forced_breakpoint_to_go[$i_lowest]
+ )
+ {
+ $self->set_closing_breakpoint($i_lowest);
+ }
- # keep going if we are still making progress
- $more_to_do++;
- }
- } # end iteration loop
+ # get ready to find the next breakpoint
+ $last_break_strength = $lowest_strength;
+ $i_last_break = $i_lowest;
+ $i_begin = $i_lowest + 1;
- } # end loop over sections
+ # skip past a blank
+ if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
+ $i_begin++;
+ }
+ }
- RETURN:
+ #-------------------------------------------------
+ # END of main loop to set continuation breakpoints
+ #-------------------------------------------------
- if (DEBUG_RECOMBINE) {
- my $nmax_last = @{$ri_end} - 1;
- print STDERR
-"exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
- }
- return;
- } ## end sub recombine_breakpoints
-} ## end closure recombine_breakpoints
+ #-----------------------------------------------------------
+ # ?/: rule 4 -- if we broke at a ':', then break at
+ # corresponding '?' unless this is a chain of ?: expressions
+ #-----------------------------------------------------------
+ if (@i_colon_breaks) {
+ my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+ if ( !$is_chain ) {
+ $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
+ }
+ }
-sub insert_final_ternary_breaks {
+ return ( \@i_first, \@i_last, $rbond_strength_to_go );
+} ## end sub break_long_lines
- my ( $self, $ri_left, $ri_right ) = @_;
+# small bond strength numbers to help break ties
+use constant TINY_BIAS => 0.0001;
+use constant MAX_BIAS => 0.001;
- # Called once per batch to look for and do any final line breaks for
- # long ternary chains
+sub break_lines_inner_loop {
- my $nmax = @{$ri_right} - 1;
+ #-----------------------------------------------------------------
+ # Find the best next breakpoint in index range ($i_begin .. $imax)
+ # which, if possible, does not exceed the maximum line length.
+ #-----------------------------------------------------------------
- # scan the left and right end tokens of all lines
- my $count = 0;
- my $i_first_colon = -1;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- return if ( $typel eq '?' );
- return if ( $typer eq '?' );
- if ( $typel eq ':' ) { $i_first_colon = $il; last; }
- elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
- }
+ my (
+ $self, #
- # For long ternary chains,
- # if the first : we see has its ? is in the interior
- # of a preceding line, then see if there are any good
- # breakpoints before the ?.
- if ( $i_first_colon > 0 ) {
- my $i_question = $mate_index_to_go[$i_first_colon];
- if ( $i_question > 0 ) {
- my @insert_list;
- foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
- my $token = $tokens_to_go[$ii];
- my $type = $types_to_go[$ii];
+ $i_begin,
+ $i_last_break,
+ $imax,
+ $last_break_strength,
+ $line_count,
+ $rbond_strength_to_go,
+ $saw_good_break,
- # For now, a good break is either a comma or,
- # in a long chain, a 'return'.
- # Patch for RT #126633: added the $nmax>1 check to avoid
- # breaking after a return for a simple ternary. For longer
- # chains the break after return allows vertical alignment, so
- # it is still done. So perltidy -wba='?' will not break
- # immediately after the return in the following statement:
- # sub x {
- # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
- # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
- # }
- if (
- (
- $type eq ','
- || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
- )
- && $self->in_same_container_i( $ii, $i_question )
- )
- {
- push @insert_list, $ii;
- last;
- }
- }
+ ) = @_;
- # insert any new break points
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left,
- $ri_right );
- }
+ # Given:
+ # $i_begin = first index of range
+ # $i_last_break = index of previous break
+ # $imax = last index of range
+ # $last_break_strength = bond strength of last break
+ # $line_count = number of output lines so far
+ # $rbond_strength_to_go = ref to array of bond strengths
+ # $saw_good_break = true if old line had a good breakpoint
+
+ # Returns:
+ # $i_lowest = index of best breakpoint
+ # $lowest_strength = 'bond strength' at best breakpoint
+ # $leading_alignment_type = special token type after break
+ # $Msg = string of debug info
+
+ my $Msg = EMPTY_STRING;
+ my $strength = NO_BREAK;
+ my $i_test = $i_begin - 1;
+ my $i_lowest = -1;
+ my $starting_sum = $summed_lengths_to_go[$i_begin];
+ my $lowest_strength = NO_BREAK;
+ my $leading_alignment_type = EMPTY_STRING;
+ my $leading_spaces = leading_spaces_to_go($i_begin);
+ my $maximum_line_length =
+ $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
+ DEBUG_BREAK_LINES
+ && do {
+ $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
+ };
+
+ # Do not separate an isolated bare word from an opening paren.
+ # Alternate Fix #2 for issue b1299. This waits as long as possible
+ # to make the decision.
+ if ( $types_to_go[$i_begin] eq 'i'
+ && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
+ {
+ my $i_next_nonblank = $inext_to_go[$i_begin];
+ if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
+ $rbond_strength_to_go->[$i_begin] = NO_BREAK;
}
}
- return;
-} ## end sub insert_final_ternary_breaks
-sub insert_breaks_before_list_opening_containers {
+ # Avoid a break which would strand a single punctuation
+ # token. For example, we do not want to strand a leading
+ # '.' which is followed by a long quoted string.
+ # But note that we do want to do this with -extrude (l=1)
+ # so please test any changes to this code on -extrude.
+ if (
+ ( $i_begin < $imax )
+ && ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] )
+ && !$forced_breakpoint_to_go[$i_begin]
+ && !(
+
+ # Allow break after a closing eval brace. This is an
+ # approximate way to simulate a forced breakpoint made in
+ # Section B below. No differences have been found, but if
+ # necessary the full logic of Section B could be used here
+ # (see c165).
+ $tokens_to_go[$i_begin] eq '}'
+ && $block_type_to_go[$i_begin]
+ && $block_type_to_go[$i_begin] eq 'eval'
+ )
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_begin + 1 ] -
+ $starting_sum
+ ) < $maximum_line_length
+ )
+ )
+ {
+ $i_test = min( $imax, $inext_to_go[$i_begin] ) - 1;
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :skip ahead at i=$i_test";
+ };
+ }
- my ( $self, $ri_left, $ri_right ) = @_;
+ #-------------------------------------------------------
+ # Begin INNER_LOOP over the indexes in the _to_go arrays
+ #-------------------------------------------------------
+ while ( ++$i_test <= $imax ) {
+ my $type = $types_to_go[$i_test];
+ my $token = $tokens_to_go[$i_test];
+ my $i_next_nonblank = $inext_to_go[$i_test];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- # This routine is called once per batch to implement the parameters
- # --break-before-hash-brace, etc.
+ #---------------------------------------------------------------
+ # Section A: Get token-token strength and handle any adjustments
+ #---------------------------------------------------------------
- # Nothing to do if none of these parameters has been set
- return unless %break_before_container_types;
+ # adjustments to the previous bond strength may have been made, and
+ # we must keep the bond strength of a token and its following blank
+ # the same;
+ my $last_strength = $strength;
+ $strength = $rbond_strength_to_go->[$i_test];
+ if ( $type eq 'b' ) { $strength = $last_strength }
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 0 );
+ # reduce strength a bit to break ties at an old comma breakpoint ...
+ if (
- my $rLL = $self->[_rLL_];
+ $old_breakpoint_to_go[$i_test]
- my $rbreak_before_container_by_seqno =
- $self->[_rbreak_before_container_by_seqno_];
- my $rK_weld_left = $self->[_rK_weld_left_];
+ # Patch: limited to just commas to avoid blinking states
+ && $type eq ','
- # scan the ends of all lines
- my @insert_list;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- next unless ( $ir > $il );
- my $Kl = $K_to_go[$il];
- my $Kr = $K_to_go[$ir];
- my $Kend = $Kr;
- my $type_end = $rLL->[$Kr]->[_TYPE_];
+ # which is a 'good' breakpoint, meaning ...
+ # we don't want to break before it
+ && !$want_break_before{$type}
- # Backup before any side comment
- if ( $type_end eq '#' ) {
- $Kend = $self->K_previous_nonblank($Kr);
- next unless defined($Kend);
- $type_end = $rLL->[$Kend]->[_TYPE_];
+ # and either we want to break before the next token
+ # or the next token is not short (i.e. not a '*', '/' etc.)
+ && $i_next_nonblank <= $imax
+ && ( $want_break_before{$next_nonblank_type}
+ || $token_lengths_to_go[$i_next_nonblank] > 2
+ || $next_nonblank_type eq ','
+ || $is_opening_type{$next_nonblank_type} )
+ )
+ {
+ $strength -= TINY_BIAS;
+ DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
}
- # Backup to the start of any weld; fix for b1173.
- if ($total_weld_count) {
- my $Kend_test = $rK_weld_left->{$Kend};
- if ( defined($Kend_test) && $Kend_test > $Kl ) {
- $Kend = $Kend_test;
- $Kend_test = $rK_weld_left->{$Kend};
+ # otherwise increase strength a bit if this token would be at the
+ # maximum line length. This is necessary to avoid blinking
+ # in the above example when the -iob flag is added.
+ else {
+ my $len =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum;
+ if ( $len >= $maximum_line_length ) {
+ $strength += TINY_BIAS;
+ DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
}
-
- # Do not break if we did not back up to the start of a weld
- # (shouldn't happen)
- next if ( defined($Kend_test) );
}
- my $token = $rLL->[$Kend]->[_TOKEN_];
- next unless ( $is_opening_token{$token} );
- next unless ( $Kl < $Kend - 1 );
-
- my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
- next unless ( defined($seqno) );
+ #-------------------------------------
+ # Section B: Handle forced breakpoints
+ #-------------------------------------
+ my $must_break;
- # Use the flag which was previously set
- next unless ( $rbreak_before_container_by_seqno->{$seqno} );
+ # Force an immediate break at certain operators
+ # with lower level than the start of the line,
+ # unless we've already seen a better break.
+ #
+ # Note on an issue with a preceding '?' :
- # Install a break before this opening token.
- my $Kbreak = $self->K_previous_nonblank($Kend);
- my $ibreak = $Kbreak - $Kl + $il;
- next if ( $ibreak < $il );
- next if ( $nobreak_to_go[$ibreak] );
- push @insert_list, $ibreak;
- }
+ # There may be a break at a previous ? if the line is long. Because
+ # of this we do not want to force a break if there is a previous ? on
+ # this line. For now the best way to do this is to not break if we
+ # have seen a lower strength point, which is probably a ?.
+ #
+ # Example of unwanted breaks we are avoiding at a '.' following a ?
+ # from pod2html using perltidy -gnu:
+ # )
+ # ? "\n<A NAME=\""
+ # . $value
+ # . "\">\n$text</A>\n"
+ # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
+ if (
+ ( $strength <= $lowest_strength )
+ && ( $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_next_nonblank] )
+ && (
+ $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
+ || (
+ $next_nonblank_type eq 'k'
- # insert any new break points
- if (@insert_list) {
- $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-} ## end sub insert_breaks_before_list_opening_containers
+ ## /^(and|or)$/ # note: includes 'xor' now
+ && $is_and_or{$next_nonblank_token}
+ )
+ )
+ )
+ {
+ $self->set_forced_breakpoint($i_next_nonblank);
+ DEBUG_BREAK_LINES
+ && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
+ }
-sub note_added_semicolon {
- my ( $self, $line_number ) = @_;
- $self->[_last_added_semicolon_at_] = $line_number;
- if ( $self->[_added_semicolon_count_] == 0 ) {
- $self->[_first_added_semicolon_at_] = $line_number;
- }
- $self->[_added_semicolon_count_]++;
- write_logfile_entry("Added ';' here\n");
- return;
-} ## end sub note_added_semicolon
+ if (
-sub note_deleted_semicolon {
- my ( $self, $line_number ) = @_;
- $self->[_last_deleted_semicolon_at_] = $line_number;
- if ( $self->[_deleted_semicolon_count_] == 0 ) {
- $self->[_first_deleted_semicolon_at_] = $line_number;
- }
- $self->[_deleted_semicolon_count_]++;
- write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
- return;
-} ## end sub note_deleted_semicolon
+ # Try to put a break where requested by break_lists
+ $forced_breakpoint_to_go[$i_test]
-sub note_embedded_tab {
- my ( $self, $line_number ) = @_;
- $self->[_embedded_tab_count_]++;
- $self->[_last_embedded_tab_at_] = $line_number;
- if ( !$self->[_first_embedded_tab_at_] ) {
- $self->[_first_embedded_tab_at_] = $line_number;
- }
+ # break between ) { in a continued line so that the '{' can
+ # be outdented
+ # See similar logic in break_lists which catches instances
+ # where a line is just something like ') {'. We have to
+ # be careful because the corresponding block keyword might
+ # not be on the first line, such as 'for' here:
+ #
+ # eval {
+ # for ("a") {
+ # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
+ # }
+ # };
+ #
+ || (
+ $line_count
+ && ( $token eq ')' )
+ && ( $next_nonblank_type eq '{' )
+ && ($next_nonblank_block_type)
+ && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
- if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
- }
- return;
-} ## end sub note_embedded_tab
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceding list is long and broken
+ && !(
-use constant DEBUG_CORRECT_LP => 0;
+ (
+ $next_nonblank_block_type =~ /$SUB_PATTERN/
+ || $next_nonblank_block_type =~ /$ASUB_PATTERN/
+ )
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
-sub correct_lp_indentation {
+ && !$rOpts_opening_brace_always_on_right
+ )
- # When the -lp option is used, we need to make a last pass through
- # each line to correct the indentation positions in case they differ
- # from the predictions. This is necessary because perltidy uses a
- # predictor/corrector method for aligning with opening parens. The
- # predictor is usually good, but sometimes stumbles. The corrector
- # tries to patch things up once the actual opening paren locations
- # are known.
- my ( $self, $ri_first, $ri_last ) = @_;
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $do_not_pad = 0;
-
- # Note on flag '$do_not_pad':
- # We want to avoid a situation like this, where the aligner inserts
- # whitespace before the '=' to align it with a previous '=', because
- # otherwise the parens might become mis-aligned in a situation like
- # this, where the '=' has become aligned with the previous line,
- # pushing the opening '(' forward beyond where we want it.
- #
- # $mkFloor::currentRoom = '';
- # $mkFloor::c_entry = $c->Entry(
- # -width => '10',
- # -relief => 'sunken',
- # ...
- # );
- #
- # We leave it to the aligner to decide how to do this.
+ # There is an implied forced break at a terminal opening brace
+ || ( ( $type eq '{' ) && ( $i_test == $imax ) )
+ )
+ {
- # first remove continuation indentation if appropriate
- my $rLL = $self->[_rLL_];
- my $max_line = @{$ri_first} - 1;
+ # Forced breakpoints must sometimes be overridden, for example
+ # because of a side comment causing a NO_BREAK. It is easier
+ # to catch this here than when they are set.
+ if ( $strength < NO_BREAK - 1 ) {
+ $strength = $lowest_strength - TINY_BIAS;
+ $must_break = 1;
+ DEBUG_BREAK_LINES
+ && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
+ }
+ }
- #---------------------------------------------------------------------------
- # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
- #---------------------------------------------------------------------------
+ # quit if a break here would put a good terminal token on
+ # the next line and we already have a possible break
+ if (
+ ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
+ && !$must_break
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
+ $starting_sum
+ ) > $maximum_line_length
+ )
+ )
+ {
+ if ( $i_lowest >= 0 ) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :quit at good terminal='$next_nonblank_type'";
+ };
+ last;
+ }
+ }
- # The point is that sub 'starting_one_line_block' made one-line blocks based
- # on default indentation, not -lp indentation. So some of the one-line
- # blocks may be too long when given -lp indentation. We will fix that now
- # if possible, using the list of these closing block indexes.
- my $ri_starting_one_line_block =
- $self->[_this_batch_]->[_ri_starting_one_line_block_];
- if ( @{$ri_starting_one_line_block} ) {
- my @ilist = @{$ri_starting_one_line_block};
- my $inext = shift(@ilist);
+ #------------------------------------------------------------
+ # Section C: Look for the lowest bond strength between tokens
+ #------------------------------------------------------------
+ if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
- # loop over lines, checking length of each with a one-line block
- my ( $ibeg, $iend );
- foreach my $line ( 0 .. $max_line ) {
- $iend = $ri_last->[$line];
- next if ( $inext > $iend );
- $ibeg = $ri_first->[$line];
+ # break at previous best break if it would have produced
+ # a leading alignment of certain common tokens, and it
+ # is different from the latest candidate break
+ if ($leading_alignment_type) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .=
+ " :last at leading_alignment='$leading_alignment_type'";
+ };
+ last;
+ }
- # This is just for lines with indentation objects (c098)
- my $excess =
- ref( $leading_spaces_to_go[$ibeg] )
- ? $self->excess_line_length( $ibeg, $iend )
- : 0;
+ # Force at least one breakpoint if old code had good
+ # break It is only called if a breakpoint is required or
+ # desired. This will probably need some adjustments
+ # over time. A goal is to try to be sure that, if a new
+ # side comment is introduced into formatted text, then
+ # the same breakpoints will occur. scbreak.t
+ if (
+ $i_test == $imax # we are at the end
+ && !$forced_breakpoint_count
+ && $saw_good_break # old line had good break
+ && $type =~ /^[#;\{]$/ # and this line ends in
+ # ';' or side comment
+ && $i_last_break < 0 # and we haven't made a break
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $imax - 1 # (but not just before this ;)
+ && $strength - $lowest_strength < 0.5 * WEAK # and it's good
+ )
+ {
- if ( $excess > 0 ) {
- my $available_spaces = $self->get_available_spaces_to_go($ibeg);
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :last at good old break\n";
+ };
+ last;
+ }
- if ( $available_spaces > 0 ) {
- my $delete_want = min( $available_spaces, $excess );
- my $deleted_spaces =
- $self->reduce_lp_indentation( $ibeg, $delete_want );
- $available_spaces =
- $self->get_available_spaces_to_go($ibeg);
+ # Do not skip past an important break point in a short final
+ # segment. For example, without this check we would miss the
+ # break at the final / in the following code:
+ #
+ # $depth_stop =
+ # ( $tau * $mass_pellet * $q_0 *
+ # ( 1. - exp( -$t_stop / $tau ) ) -
+ # 4. * $pi * $factor * $k_ice *
+ # ( $t_melt - $t_ice ) *
+ # $r_pellet *
+ # $t_stop ) /
+ # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
+ #
+ if (
+ $line_count > 2
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $i_test
+ && $i_test > $imax - 2
+ && $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_lowest]
+ && $lowest_strength < $last_break_strength - .5 * WEAK
+ )
+ {
+ # Make this break for math operators for now
+ my $ir = $inext_to_go[$i_lowest];
+ my $il = iprev_to_go($ir);
+ if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
+ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
+ {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :last-noskip_short";
+ };
+ last;
}
}
- # skip forward to next one-line block to check
- while (@ilist) {
- $inext = shift @ilist;
- next if ( $inext <= $iend );
- last if ( $inext > $iend );
+ # Update the minimum bond strength location
+ $lowest_strength = $strength;
+ $i_lowest = $i_test;
+ if ($must_break) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :last-must_break";
+ };
+ last;
}
- last if ( $inext <= $iend );
- }
- }
-
- #-------------------------------------------------------------------
- # PASS 2: look for and fix other problems in each line of this batch
- #-------------------------------------------------------------------
-
- # look at each output line ...
- my ( $ibeg, $iend );
- foreach my $line ( 0 .. $max_line ) {
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
- # looking at each token in this output line ...
- foreach my $i ( $ibeg .. $iend ) {
+ # set flags to remember if a break here will produce a
+ # leading alignment of certain common tokens
+ if ( $line_count > 0
+ && $i_test < $imax
+ && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
+ {
+ my $i_last_end = iprev_to_go($i_begin);
+ my $tok_beg = $tokens_to_go[$i_begin];
+ my $type_beg = $types_to_go[$i_begin];
+ if (
- # How many space characters to place before this token
- # for special alignment. Actual padding is done in the
- # continue block.
+ # check for leading alignment of certain tokens
+ (
+ $tok_beg eq $next_nonblank_token
+ && $is_chain_operator{$tok_beg}
+ && ( $type_beg eq 'k'
+ || $type_beg eq $tok_beg )
+ && $nesting_depth_to_go[$i_begin] >=
+ $nesting_depth_to_go[$i_next_nonblank]
+ )
- # looking for next unvisited indentation item ...
- my $indentation = $leading_spaces_to_go[$i];
+ || ( $tokens_to_go[$i_last_end] eq $token
+ && $is_chain_operator{$token}
+ && ( $type eq 'k' || $type eq $token )
+ && $nesting_depth_to_go[$i_last_end] >=
+ $nesting_depth_to_go[$i_test] )
+ )
+ {
+ $leading_alignment_type = $next_nonblank_type;
+ }
+ }
+ }
- # This is just for indentation objects (c098)
- next unless ( ref($indentation) );
+ #-----------------------------------------------------------
+ # Section D: See if the maximum line length will be exceeded
+ #-----------------------------------------------------------
- # Visit each indentation object just once
- next if ( $indentation->get_marked() );
+ # Quit if there are no more tokens to test
+ last if ( $i_test >= $imax );
- # Mark first visit
- $indentation->set_marked(1);
+ # Keep going if we have not reached the limit
+ my $excess =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 2 ] -
+ $starting_sum -
+ $maximum_line_length;
- # Skip indentation objects which do not align with container tokens
- my $align_seqno = $indentation->get_align_seqno();
- next unless ($align_seqno);
+ if ( $excess < 0 ) {
+ next;
+ }
+ elsif ( $excess == 0 ) {
- # Skip a container which is entirely on this line
- my $Ko = $K_opening_container->{$align_seqno};
- my $Kc = $K_closing_container->{$align_seqno};
- if ( defined($Ko) && defined($Kc) ) {
- next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
+ # To prevent blinkers we will avoid leaving a token exactly at
+ # the line length limit unless it is the last token or one of
+ # several "good" types.
+ #
+ # The following code was a blinker with -pbp before this
+ # modification:
+ # $last_nonblank_token eq '('
+ # && $is_indirect_object_taker{ $paren_type
+ # [$paren_depth] }
+ # The issue causing the problem is that if the
+ # term [$paren_depth] gets broken across a line then
+ # the whitespace routine doesn't see both opening and closing
+ # brackets and will format like '[ $paren_depth ]'. This
+ # leads to an oscillation in length depending if we break
+ # before the closing bracket or not.
+ if ( $i_test + 1 < $imax
+ && $next_nonblank_type ne ','
+ && !$is_closing_type{$next_nonblank_type} )
+ {
+ # too long
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :too_long";
+ }
}
-
- if ( $line == 1 && $i == $ibeg ) {
- $do_not_pad = 1;
+ else {
+ next;
}
+ }
+ else {
+ # too long
+ }
- #--------------------------------------------
- # Now see what the error is and try to fix it
- #--------------------------------------------
- my $closing_index = $indentation->get_closed();
- my $predicted_pos = $indentation->get_spaces();
-
- # Find actual position:
- my $actual_pos;
+ # a break here makes the line too long ...
- if ( $i == $ibeg ) {
+ DEBUG_BREAK_LINES && do {
+ my $ltok = $token;
+ my $rtok =
+ $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
+ my $i_testp2 = $i_test + 2;
+ if ( $i_testp2 > $max_index_to_go + 1 ) {
+ $i_testp2 = $max_index_to_go + 1;
+ }
+ if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
+ if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
+ print STDOUT
+"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n";
+ };
- # Case 1: token is first character of of batch - table lookup
- if ( $line == 0 ) {
+ # Exception: allow one extra terminal token after exceeding line length
+ # if it would strand this token.
+ if ( $i_lowest == $i_test
+ && $token_lengths_to_go[$i_test] > 1
+ && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
+ && $rOpts_fuzzy_line_length )
+ {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :do_not_strand next='$next_nonblank_type'";
+ };
+ next;
+ }
- $actual_pos = $predicted_pos;
+ # Stop if here if we have a solution and the line will be too long
+ if ( $i_lowest >= 0 ) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .=
+" :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax";
+ };
+ last;
+ }
+ }
- my ( $indent, $offset, $is_leading, $exists ) =
- get_saved_opening_indentation($align_seqno);
- if ( defined($indent) ) {
+ #-----------------------------------------------------
+ # End INNER_LOOP over the indexes in the _to_go arrays
+ #-----------------------------------------------------
- # FIXME: should use '1' here if no space after opening
- # and '2' if want space; hardwired at 1 like -gnu-style
- $actual_pos = get_spaces($indent) + $offset + 1;
- }
- }
+ # Be sure we return an index in the range ($ibegin .. $imax).
+ # We will break at imax if no other break was found.
+ if ( $i_lowest < 0 ) { $i_lowest = $imax }
- # Case 2: token starts a new line - use length of previous line
- else {
+ return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg );
+} ## end sub break_lines_inner_loop
- my $ibegm = $ri_first->[ $line - 1 ];
- my $iendm = $ri_last->[ $line - 1 ];
- $actual_pos = total_line_length( $ibegm, $iendm );
+sub do_colon_breaks {
+ my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
- # follow -pt style
- ++$actual_pos
- if ( $types_to_go[ $iendm + 1 ] eq 'b' );
+ # using a simple method for deciding if we are in a ?/: chain --
+ # this is a chain if it has multiple ?/: pairs all in order;
+ # otherwise not.
+ # Note that if line starts in a ':' we count that above as a break
- }
+ my @insert_list = ();
+ foreach ( @{$ri_colon_breaks} ) {
+ my $i_question = $mate_index_to_go[$_];
+ if ( defined($i_question) ) {
+ if ( $want_break_before{'?'} ) {
+ $i_question = iprev_to_go($i_question);
}
- # Case 3: $i>$ibeg: token is mid-line - use length to previous token
- else {
-
- $actual_pos = total_line_length( $ibeg, $i - 1 );
-
- # for mid-line token, we must check to see if all
- # additional lines have continuation indentation,
- # and remove it if so. Otherwise, we do not get
- # good alignment.
- if ( $closing_index > $iend ) {
- my $ibeg_next = $ri_first->[ $line + 1 ];
- if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
- $self->undo_lp_ci( $line, $i, $closing_index,
- $ri_first, $ri_last );
- }
- }
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
}
+ }
+ $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
+ }
+ return;
+} ## end sub do_colon_breaks
- # By how many spaces (plus or minus) would we need to increase the
- # indentation to get alignment with the opening token?
- my $move_right = $actual_pos - $predicted_pos;
-
- if (DEBUG_CORRECT_LP) {
- my $tok = substr( $tokens_to_go[$i], 0, 8 );
- my $avail = $self->get_available_spaces_to_go($ibeg);
- print
-"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
- }
+###########################################
+# CODE SECTION 11: Code to break long lists
+###########################################
- # nothing more to do if no error to correct (gnu2.t)
- if ( $move_right == 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+{ ## begin closure break_lists
- # Get any collapsed length defined for -xlp
- my $collapsed_length =
- $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
- $collapsed_length = 0 unless ( defined($collapsed_length) );
+ # These routines and variables are involved in finding good
+ # places to break long lists.
- if (DEBUG_CORRECT_LP) {
- print
-"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
- }
+ use constant DEBUG_BREAK_LISTS => 0;
- # if we have not seen closure for this indentation in this batch,
- # and do not have a collapsed length estimate, we can only pass on
- # a request to the vertical aligner
- if ( $closing_index < 0 && !$collapsed_length ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+ my (
- # If necessary, look ahead to see if there is really any leading
- # whitespace dependent on this whitespace, and also find the
- # longest line using this whitespace. Since it is always safe to
- # move left if there are no dependents, we only need to do this if
- # we may have dependent nodes or need to move right.
+ $block_type,
+ $current_depth,
+ $depth,
+ $i,
+ $i_last_colon,
+ $i_line_end,
+ $i_line_start,
+ $i_last_nonblank_token,
+ $last_nonblank_block_type,
+ $last_nonblank_token,
+ $last_nonblank_type,
+ $last_old_breakpoint_count,
+ $minimum_depth,
+ $next_nonblank_block_type,
+ $next_nonblank_token,
+ $next_nonblank_type,
+ $old_breakpoint_count,
+ $starting_breakpoint_count,
+ $starting_depth,
+ $token,
+ $type,
+ $type_sequence,
- my $have_child = $indentation->get_have_child();
- my %saw_indentation;
- my $line_count = 1;
- $saw_indentation{$indentation} = $indentation;
+ );
- # How far can we move right before we hit the limit?
- # let $right_margen = the number of spaces that we can increase
- # the current indentation before hitting the maximum line length.
- my $right_margin = 0;
+ my (
- if ( $have_child || $move_right > 0 ) {
- $have_child = 0;
+ @breakpoint_stack,
+ @breakpoint_undo_stack,
+ @comma_index,
+ @container_type,
+ @identifier_count_stack,
+ @index_before_arrow,
+ @interrupted_list,
+ @item_count_stack,
+ @last_comma_index,
+ @last_dot_index,
+ @last_nonblank_type,
+ @old_breakpoint_count_stack,
+ @opening_structure_index_stack,
+ @rfor_semicolon_list,
+ @has_old_logical_breakpoints,
+ @rand_or_list,
+ @i_equals,
+ @override_cab3,
+ @type_sequence_stack,
- # include estimated collapsed length for incomplete containers
- my $max_length = 0;
- if ( $Kc > $K_to_go[$max_index_to_go] ) {
- $max_length = $collapsed_length + $predicted_pos;
- }
+ );
- if ( $i == $ibeg ) {
- my $length = total_line_length( $ibeg, $iend );
- if ( $length > $max_length ) { $max_length = $length }
- }
+ # these arrays must retain values between calls
+ my ( @has_broken_sublist, @dont_align, @want_comma_break );
- # look ahead at the rest of the lines of this batch..
- foreach my $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $ri_first->[$line_t];
- my $iend_t = $ri_last->[$line_t];
- last if ( $closing_index <= $ibeg_t );
+ my $length_tol;
+ my $lp_tol_boost;
- # remember all different indentation objects
- my $indentation_t = $leading_spaces_to_go[$ibeg_t];
- $saw_indentation{$indentation_t} = $indentation_t;
- $line_count++;
+ sub initialize_break_lists {
+ @dont_align = ();
+ @has_broken_sublist = ();
+ @want_comma_break = ();
- # remember longest line in the group
- my $length_t = total_line_length( $ibeg_t, $iend_t );
- if ( $length_t > $max_length ) {
- $max_length = $length_t;
- }
- }
+ #---------------------------------------------------
+ # Set tolerances to prevent formatting instabilities
+ #---------------------------------------------------
- $right_margin =
- $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
- $max_length;
- if ( $right_margin < 0 ) { $right_margin = 0 }
- }
+ # Define tolerances to use when checking if closed
+ # containers will fit on one line. This is necessary to avoid
+ # formatting instability. The basic tolerance is based on the
+ # following:
- my $first_line_comma_count =
- grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_comma_count();
- my $arrow_count = $indentation->get_arrow_count();
+ # - Always allow for at least one extra space after a closing token so
+ # that we do not strand a comma or semicolon. (oneline.t).
- # This is a simple approximate test for vertical alignment:
- # if we broke just after an opening paren, brace, bracket,
- # and there are 2 or more commas in the first line,
- # and there are no '=>'s,
- # then we are probably vertically aligned. We could set
- # an exact flag in sub break_lists, but this is good
- # enough.
- my $indentation_count = keys %saw_indentation;
- my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
+ # - Use an increased line length tolerance when -ci > -i to avoid
+ # blinking states (case b923 and others).
+ $length_tol =
+ 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
- # Make the move if possible ..
- if (
+ # In addition, it may be necessary to use a few extra tolerance spaces
+ # when -lp is used and/or when -xci is used. The history of this
+ # so far is as follows:
- # we can always move left
- $move_right < 0
+ # FIX1: At least 3 characters were been found to be required for -lp
+ # to fixes cases b1059 b1063 b1117.
- # -xlp
+ # FIX2: Further testing showed that we need a total of 3 extra spaces
+ # when -lp is set for non-lists, and at least 2 spaces when -lp and
+ # -xci are set.
+ # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
+ # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
+ # b1165
- # incomplete container
- || ( $rOpts_extended_line_up_parentheses
- && $Kc > $K_to_go[$max_index_to_go] )
- || $closing_index < 0
+ # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
+ # 'find_token_starting_list' to go back before an initial blank space.
+ # This fixed these three cases, and allowed the tolerances to be
+ # reduced to continue to fix all other known cases of instability.
+ # This gives the current tolerance formulation.
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
+ $lp_tol_boost = 0;
- if (DEBUG_CORRECT_LP) {
- print
- "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
- }
+ if ($rOpts_line_up_parentheses) {
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_available_spaces( -$move );
- }
+ # boost tol for combination -lp -xci
+ if ($rOpts_extended_continuation_indentation) {
+ $lp_tol_boost = 2;
}
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
+ # boost tol for combination -lp and any -vtc > 0, but only for
+ # non-list containers
else {
- $indentation->set_recoverable_spaces($move_right);
+ foreach ( keys %closing_vertical_tightness ) {
+ next
+ unless ( $closing_vertical_tightness{$_} );
+ $lp_tol_boost = 1; # Fixes B1193;
+ last;
+ }
}
- } ## end loop over tokens in a line
- } ## end loop over lines
- return $do_not_pad;
-} ## end sub correct_lp_indentation
+ }
-sub undo_lp_ci {
+ # Define a level where list formatting becomes highly stressed and
+ # needs to be simplified. Introduced for case b1262.
+ # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
+ # This is now '$high_stress_level'.
- # If there is a single, long parameter within parens, like this:
- #
- # $self->command( "/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?" );
- #
- # we can remove the continuation indentation of the 2nd and higher lines
- # to achieve this effect, which is more pleasing:
- #
- # $self->command("/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?");
+ return;
+ } ## end sub initialize_break_lists
- my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
- @_;
- my $max_line = @{$ri_first} - 1;
+ # routine to define essential variables when we go 'up' to
+ # a new depth
+ sub check_for_new_minimum_depth {
+ my ( $self, $depth_t, $seqno ) = @_;
+ if ( $depth_t < $minimum_depth ) {
- # must be multiple lines
- return unless $max_line > $line_open;
+ $minimum_depth = $depth_t;
- my $lev_start = $levels_to_go[$i_start];
- my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
+ # these arrays need not retain values between calls
+ $type_sequence_stack[$depth_t] = $seqno;
+ $override_cab3[$depth_t] = undef;
+ if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) {
+ $override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno};
+ }
+ $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
+ $container_type[$depth_t] = EMPTY_STRING;
+ $identifier_count_stack[$depth_t] = 0;
+ $index_before_arrow[$depth_t] = -1;
+ $interrupted_list[$depth_t] = 1;
+ $item_count_stack[$depth_t] = 0;
+ $last_nonblank_type[$depth_t] = EMPTY_STRING;
+ $opening_structure_index_stack[$depth_t] = -1;
- # see if all additional lines in this container have continuation
- # indentation
- my $line_1 = 1 + $line_open;
- my $n = $line_open;
+ $breakpoint_undo_stack[$depth_t] = undef;
+ $comma_index[$depth_t] = undef;
+ $last_comma_index[$depth_t] = undef;
+ $last_dot_index[$depth_t] = undef;
+ $old_breakpoint_count_stack[$depth_t] = undef;
+ $has_old_logical_breakpoints[$depth_t] = 0;
+ $rand_or_list[$depth_t] = [];
+ $rfor_semicolon_list[$depth_t] = [];
+ $i_equals[$depth_t] = -1;
- while ( ++$n <= $max_line ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- if ( $ibeg eq $closing_index ) { $n--; last }
- return if ( $lev_start != $levels_to_go[$ibeg] );
- return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
- last if ( $closing_index <= $iend );
- }
+ # these arrays must retain values between calls
+ if ( !defined( $has_broken_sublist[$depth_t] ) ) {
+ $dont_align[$depth_t] = 0;
+ $has_broken_sublist[$depth_t] = 0;
+ $want_comma_break[$depth_t] = 0;
+ }
+ }
+ return;
+ } ## end sub check_for_new_minimum_depth
- # we can reduce the indentation of all continuation lines
- my $continuation_line_count = $n - $line_open;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- (0) x ($continuation_line_count);
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
- return;
-} ## end sub undo_lp_ci
+ # routine to decide which commas to break at within a container;
+ # returns:
+ # $bp_count = number of comma breakpoints set
+ # $do_not_break_apart = a flag indicating if container need not
+ # be broken open
+ sub set_comma_breakpoints {
-###############################################
-# CODE SECTION 10: Code to break long statments
-###############################################
+ my ( $self, $dd, $rbond_strength_bias ) = @_;
+ my $bp_count = 0;
+ my $do_not_break_apart = 0;
-sub break_long_lines {
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
- #-----------------------------------------------------------
- # Break a batch of tokens into lines which do not exceed the
- # maximum line length.
- #-----------------------------------------------------------
+ # Do not break a list unless there are some non-line-ending commas.
+ # This avoids getting different results with only non-essential
+ # commas, and fixes b1192.
+ my $seqno = $type_sequence_stack[$dd];
- # Define an array of indexes for inserting newline characters to
- # keep the line lengths below the maximum desired length. There is
- # an implied break after the last token, so it need not be included.
+ my $real_comma_count =
+ $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
- # Method:
- # This routine is part of series of routines which adjust line
- # lengths. It is only called if a statement is longer than the
- # maximum line length, or if a preliminary scanning located
- # desirable break points. Sub break_lists has already looked at
- # these tokens and set breakpoints (in array
- # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
- # after commas, after opening parens, and before closing parens).
- # This routine will honor these breakpoints and also add additional
- # breakpoints as necessary to keep the line length below the maximum
- # requested. It bases its decision on where the 'bond strength' is
- # lowest.
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
+ }
- # Output: returns references to the arrays:
- # @i_first
- # @i_last
- # which contain the indexes $i of the first and last tokens on each
- # line.
+ # handle commas within containers...
+ elsif ($real_comma_count) {
+ my $fbc = $forced_breakpoint_count;
- # In addition, the array:
- # $forced_breakpoint_to_go[$i]
- # may be updated to be =1 for any index $i after which there must be
- # a break. This signals later routines not to undo the breakpoint.
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
- my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
+ $self->table_maker(
+ {
+ depth => $dd,
+ i_opening_paren => $opening_structure_index_stack[$dd],
+ i_closing_paren => $i,
+ item_count => $item_count_stack[$dd],
+ identifier_count => $identifier_count_stack[$dd],
+ rcomma_index => $comma_index[$dd],
+ next_nonblank_type => $next_nonblank_type,
+ list_type => $container_type[$dd],
+ interrupted => $interrupted_list[$dd],
+ rdo_not_break_apart => \$do_not_break_apart,
+ must_break_open => $must_break_open,
+ has_broken_sublist => $has_broken_sublist[$dd],
+ }
+ );
+ $bp_count = $forced_breakpoint_count - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
+ }
+ return ( $bp_count, $do_not_break_apart );
+ } ## end sub set_comma_breakpoints
- # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
- # order.
+ # These types are excluded at breakpoints to prevent blinking
+ # Switched from excluded to included as part of fix for b1214
+ my %is_uncontained_comma_break_included_type;
- use constant DEBUG_BREAK_LINES => 0;
+ BEGIN {
- my @i_first = (); # the first index to output
- my @i_last = (); # the last index to output
- my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
- if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
+ my @q = qw< k R } ) ] Y Z U w i q Q .
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
+ @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- my $rbond_strength_to_go = $self->set_bond_strengths();
+ sub do_uncontained_comma_breaks {
- # Add any comma bias set by break_lists
- if ( @{$rbond_strength_bias} ) {
- foreach my $item ( @{$rbond_strength_bias} ) {
- my ( $ii, $bias ) = @{$item};
- if ( $ii >= 0 && $ii <= $max_index_to_go ) {
- $rbond_strength_to_go->[$ii] += $bias;
- }
- elsif (DEVEL_MODE) {
- my $KK = $K_to_go[0];
- my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
- Fault(
-"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
- );
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my ( $self, $dd, $rbond_strength_bias ) = @_;
+
+ # Check added for issue c131; an error here would be due to an
+ # error initializing @comma_index when entering depth $dd.
+ if (DEVEL_MODE) {
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $ii < 0 || $ii > $max_index_to_go ) {
+ my $KK = $K_to_go[0];
+ my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
+ Fault(<<EOM);
+Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
+EOM
+ }
}
}
- }
-
- my $imin = 0;
- my $imax = $max_index_to_go;
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin; # index for starting next iteration
-
- my $leading_spaces = leading_spaces_to_go($imin);
- my $line_count = 0;
- my $last_break_strength = NO_BREAK;
- my $i_last_break = -1;
- my $max_bias = 0.001;
- my $tiny_bias = 0.0001;
- my $leading_alignment_token = EMPTY_STRING;
- my $leading_alignment_type = EMPTY_STRING;
- # see if any ?/:'s are in order
- my $colons_in_order = 1;
- my $last_tok = EMPTY_STRING;
- foreach ( @{$rcolon_list} ) {
- if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
- $last_tok = $_;
- }
+ my $bias = -.01;
+ my $old_comma_break_count = 0;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
- # This is a sufficient but not necessary condition for colon chain
- my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $old_comma_break_count++;
- my $Msg = EMPTY_STRING;
+ # Store the bias info for use by sub set_bond_strength
+ push @{$rbond_strength_bias}, [ $ii, $bias ];
- #-------------------------------------------------------
- # BEGINNING of main loop to set continuation breakpoints
- # Keep iterating until we reach the end
- #-------------------------------------------------------
- while ( $i_begin <= $imax ) {
- my $lowest_strength = NO_BREAK;
- my $starting_sum = $summed_lengths_to_go[$i_begin];
- my $i_lowest = -1;
- my $i_test = -1;
- my $lowest_next_token = EMPTY_STRING;
- my $lowest_next_type = 'b';
- my $i_lowest_next_nonblank = -1;
- my $maximum_line_length =
- $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
-
- # Do not separate an isolated bare word from an opening paren.
- # Alternate Fix #2 for issue b1299. This waits as long as possible
- # to make the decision.
- if ( $types_to_go[$i_begin] eq 'i'
- && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
- {
- my $i_next_nonblank = $inext_to_go[$i_begin];
- if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
- $rbond_strength_to_go->[$i_begin] = NO_BREAK;
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
}
}
- #-------------------------------------------------------
- # BEGINNING of inner loop to find the best next breakpoint
- #-------------------------------------------------------
- my $strength = NO_BREAK;
- $i_test = $i_begin - 1;
- while ( ++$i_test <= $imax ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank = $inext_to_go[$i_test];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
-
- # adjustments to the previous bond strength may have been made, and
- # we must keep the bond strength of a token and its following blank
- # the same;
- my $last_strength = $strength;
- $strength = $rbond_strength_to_go->[$i_test];
- if ( $type eq 'b' ) { $strength = $last_strength }
-
- # reduce strength a bit to break ties at an old comma breakpoint ...
- if (
-
- $old_breakpoint_to_go[$i_test]
-
- # Patch: limited to just commas to avoid blinking states
- && $type eq ','
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) there was exactly one old break before the first comma break
+ # (3) OLD: there are multiple old comma breaks
+ # (3) NEW: there are one or more old comma breaks (see return example)
+ # (4) the first comma is at the starting level ...
+ # ... fixes cases b064 b065 b068 b210 b747
+ # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
+ # ... fixes b1220. If ci>0 we are in the middle of a snippet,
+ # maybe because -boc has been forcing out previous lines.
- # which is a 'good' breakpoint, meaning ...
- # we don't want to break before it
- && !$want_break_before{$type}
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ #
+ # Another example, just one comma, where we will break after
+ # the return:
+ # return
+ # $x * cos($a) - $y * sin($a),
+ # $x * sin($a) + $y * cos($a);
- # and either we want to break before the next token
- # or the next token is not short (i.e. not a '*', '/' etc.)
- && $i_next_nonblank <= $imax
- && ( $want_break_before{$next_nonblank_type}
- || $token_lengths_to_go[$i_next_nonblank] > 2
- || $next_nonblank_type eq ','
- || $is_opening_type{$next_nonblank_type} )
- )
- {
- $strength -= $tiny_bias;
- DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
- }
+ # Breaking a print statement:
+ # print SAVEOUT
+ # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
+ # ( $? & 128 ) ? " -- core dumped" : "", "\n";
+ #
+ # But we will not force a break after the opening paren here
+ # (causes a blinker):
+ # $heap->{stream}->set_output_filter(
+ # poe::filter::reference->new('myotherfreezer') ),
+ # ;
+ #
+ my $i_first_comma = $comma_index[$dd]->[0];
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ci_start = $ci_levels_to_go[0];
- # otherwise increase strength a bit if this token would be at the
- # maximum line length. This is necessary to avoid blinking
- # in the above example when the -iob flag is added.
- else {
- my $len =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum;
- if ( $len >= $maximum_line_length ) {
- $strength += $tiny_bias;
- DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
+ # Here we want to use the value of ci before any -xci adjustment
+ if ( $ci_start && $rOpts_extended_continuation_indentation ) {
+ my $K0 = $K_to_go[0];
+ if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
+ }
+ if ( !$ci_start
+ && $old_breakpoint_to_go[$i_first_comma]
+ && $level_comma == $levels_to_go[0] )
+ {
+ my $ibreak = -1;
+ my $obp_count = 0;
+ foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
}
}
- my $must_break = 0;
-
- # Force an immediate break at certain operators
- # with lower level than the start of the line,
- # unless we've already seen a better break.
- #
- #------------------------------------
- # Note on an issue with a preceding ?
- #------------------------------------
- # We don't include a ? in the above list, but there may
- # be a break at a previous ? if the line is long.
- # Because of this we do not want to force a break if
- # there is a previous ? on this line. For now the best way
- # to do this is to not break if we have seen a lower strength
- # point, which is probably a ?.
- #
- # Example of unwanted breaks we are avoiding at a '.' following a ?
- # from pod2html using perltidy -gnu:
- # )
- # ? "\n<A NAME=\""
- # . $value
- # . "\">\n$text</A>\n"
- # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
- if (
- ( $strength <= $lowest_strength )
- && ( $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_next_nonblank] )
- && (
- $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
- || (
- $next_nonblank_type eq 'k'
-
- ## /^(and|or)$/ # note: includes 'xor' now
- && $is_and_or{$next_nonblank_token}
- )
- )
- )
+ # Changed rule from multiple old commas to just one here:
+ if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
{
- $self->set_forced_breakpoint($i_next_nonblank);
- DEBUG_BREAK_LINES
- && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
- }
+ my $ibreak_m = $ibreak;
+ $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
+ if ( $ibreak_m >= 0 ) {
- if (
+ # In order to avoid blinkers we have to be fairly
+ # restrictive:
- # Try to put a break where requested by break_lists
- $forced_breakpoint_to_go[$i_test]
+ # OLD Rules:
+ # Rule 1: Do not to break before an opening token
+ # Rule 2: avoid breaking at ternary operators
+ # (see b931, which is similar to the above print example)
+ # Rule 3: Do not break at chain operators to fix case b1119
+ # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
- # break between ) { in a continued line so that the '{' can
- # be outdented
- # See similar logic in break_lists which catches instances
- # where a line is just something like ') {'. We have to
- # be careful because the corresponding block keyword might
- # not be on the first line, such as 'for' here:
- #
- # eval {
- # for ("a") {
- # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
- # }
- # };
- #
- || (
- $line_count
- && ( $token eq ')' )
- && ( $next_nonblank_type eq '{' )
- && ($next_nonblank_block_type)
- && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
-
- # RT #104427: Dont break before opening sub brace because
- # sub block breaks handled at higher level, unless
- # it looks like the preceding list is long and broken
- && !(
+ # NEW Rule, replaced above rules after case b1214:
+ # only break at one of the included types
- (
- $next_nonblank_block_type =~ /$SUB_PATTERN/
- || $next_nonblank_block_type =~ /$ASUB_PATTERN/
- )
- && ( $nesting_depth_to_go[$i_begin] ==
- $nesting_depth_to_go[$i_next_nonblank] )
- )
+ # Be sure to test any changes to these rules against runs
+ # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
+ # series.
+ my $type_m = $types_to_go[$ibreak_m];
- && !$rOpts_opening_brace_always_on_right
- )
+ # Switched from excluded to included for b1214. If necessary
+ # the token could also be checked if type_m eq 'k'
+ if ( $is_uncontained_comma_break_included_type{$type_m} ) {
- # There is an implied forced break at a terminal opening brace
- || ( ( $type eq '{' ) && ( $i_test == $imax ) )
- )
- {
+ # Rule added to fix b1449:
+ # Do not break before a '?' if -nbot is set
+ # Otherwise, we may alternately arrive here and
+ # set the break, or not, depending on the input.
+ my $no_break;
+ my $ibreak_p = $inext_to_go[$ibreak_m];
+ if ( !$rOpts_break_at_old_ternary_breakpoints
+ && $ibreak_p <= $max_index_to_go )
+ {
+ my $type_p = $types_to_go[$ibreak_p];
+ $no_break = $type_p eq '?';
+ }
- # Forced breakpoints must sometimes be overridden, for example
- # because of a side comment causing a NO_BREAK. It is easier
- # to catch this here than when they are set.
- if ( $strength < NO_BREAK - 1 ) {
- $strength = $lowest_strength - $tiny_bias;
- $must_break = 1;
- DEBUG_BREAK_LINES
- && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
+ $self->set_forced_breakpoint($ibreak)
+ if ( !$no_break );
+ }
}
}
+ }
+ return;
+ } ## end sub do_uncontained_comma_breaks
- # quit if a break here would put a good terminal token on
- # the next line and we already have a possible break
- if (
- !$must_break
- && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
- $starting_sum
- ) > $maximum_line_length
- )
- )
- {
- if ( $i_lowest >= 0 ) {
- DEBUG_BREAK_LINES && do {
- $Msg .= " :quit at good terminal='$next_nonblank_type'";
- };
- last;
- }
- }
+ my %is_logical_container;
+ my %quick_filter;
- # Avoid a break which would strand a single punctuation
- # token. For example, we do not want to strand a leading
- # '.' which is followed by a long quoted string.
- # But note that we do want to do this with -extrude (l=1)
- # so please test any changes to this code on -extrude.
- if (
- !$must_break
- && ( $i_test == $i_begin )
- && ( $i_test < $imax )
- && ( $token eq $type )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum
- ) < $maximum_line_length
- )
- )
- {
- $i_test = min( $imax, $inext_to_go[$i_test] );
- DEBUG_BREAK_LINES && do {
- $Msg .= " :redo at i=$i_test";
- };
- redo;
- }
+ BEGIN {
+ my @q = qw# if elsif unless while and or err not && | || ? : ! #;
+ @is_logical_container{@q} = (1) x scalar(@q);
- if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
- {
+ # This filter will allow most tokens to skip past a section of code
+ %quick_filter = %is_assignment;
+ @q = qw# => . ; < > ~ #;
+ push @q, ',';
+ push @q, 'f'; # added for ';' for issue c154
+ @quick_filter{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- # break at previous best break if it would have produced
- # a leading alignment of certain common tokens, and it
- # is different from the latest candidate break
- if ($leading_alignment_type) {
- DEBUG_BREAK_LINES && do {
- $Msg .=
-" :last at leading_alignment='$leading_alignment_type'";
- };
- last;
- }
+ sub set_for_semicolon_breakpoints {
+ my ( $self, $dd ) = @_;
+ foreach ( @{ $rfor_semicolon_list[$dd] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ return;
+ } ## end sub set_for_semicolon_breakpoints
- # Force at least one breakpoint if old code had good
- # break It is only called if a breakpoint is required or
- # desired. This will probably need some adjustments
- # over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formatted text, then
- # the same breakpoints will occur. scbreak.t
- if (
- $i_test == $imax # we are at the end
- && !$forced_breakpoint_count
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest >= 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
- && $strength - $lowest_strength < 0.5 * WEAK # and it's good
- )
- {
+ sub set_logical_breakpoints {
+ my ( $self, $dd ) = @_;
+ if (
+ $item_count_stack[$dd] == 0
+ && $is_logical_container{ $container_type[$dd] }
- DEBUG_BREAK_LINES && do {
- $Msg .= " :last at good old break\n";
- };
- last;
- }
+ || $has_old_logical_breakpoints[$dd]
+ )
+ {
- # Do not skip past an important break point in a short final
- # segment. For example, without this check we would miss the
- # break at the final / in the following code:
- #
- # $depth_stop =
- # ( $tau * $mass_pellet * $q_0 *
- # ( 1. - exp( -$t_stop / $tau ) ) -
- # 4. * $pi * $factor * $k_ice *
- # ( $t_melt - $t_ice ) *
- # $r_pellet *
- # $t_stop ) /
- # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
- #
- if (
- $line_count > 2
- && $i_lowest >= 0 # and we saw a possible break
- && $i_lowest < $i_test
- && $i_test > $imax - 2
- && $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_lowest]
- && $lowest_strength < $last_break_strength - .5 * WEAK
- )
- {
- # Make this break for math operators for now
- my $ir = $inext_to_go[$i_lowest];
- my $il = $iprev_to_go[$ir];
- if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
- || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
- {
- DEBUG_BREAK_LINES && do {
- $Msg .= " :last-noskip_short";
- };
- last;
+ # Look for breaks in this order:
+ # 0 1 2 3
+ # or and || &&
+ foreach my $i ( 0 .. 3 ) {
+ if ( $rand_or_list[$dd][$i] ) {
+ foreach ( @{ $rand_or_list[$dd][$i] } ) {
+ $self->set_forced_breakpoint($_);
}
- }
- # Update the minimum bond strength location
- $lowest_strength = $strength;
- $i_lowest = $i_test;
- $lowest_next_token = $next_nonblank_token;
- $lowest_next_type = $next_nonblank_type;
- $i_lowest_next_nonblank = $i_next_nonblank;
- if ($must_break) {
- DEBUG_BREAK_LINES && do {
- $Msg .= " :last-must_break";
- };
+ # break at any 'if' and 'unless' too
+ foreach ( @{ $rand_or_list[$dd][4] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ $rand_or_list[$dd] = [];
last;
}
+ }
+ }
+ return;
+ } ## end sub set_logical_breakpoints
- # set flags to remember if a break here will produce a
- # leading alignment of certain common tokens
- if ( $line_count > 0
- && $i_test < $imax
- && ( $lowest_strength - $last_break_strength <= $max_bias )
- )
- {
- my $i_last_end = $iprev_to_go[$i_begin];
- my $tok_beg = $tokens_to_go[$i_begin];
- my $type_beg = $types_to_go[$i_begin];
- if (
+ sub is_unbreakable_container {
- # check for leading alignment of certain tokens
- (
- $tok_beg eq $next_nonblank_token
- && $is_chain_operator{$tok_beg}
- && ( $type_beg eq 'k'
- || $type_beg eq $tok_beg )
- && $nesting_depth_to_go[$i_begin] >=
- $nesting_depth_to_go[$i_next_nonblank]
- )
+ # never break a container of one of these types
+ # because bad things can happen (map1.t)
+ my $dd = shift;
+ return $is_sort_map_grep{ $container_type[$dd] };
+ } ## end sub is_unbreakable_container
+
+ sub break_lists {
+
+ my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
+
+ #--------------------------------------------------------------------
+ # This routine is called once per batch, if the batch is a list, to
+ # set line breaks so that hierarchical structure can be displayed and
+ # so that list items can be vertically aligned. The output of this
+ # routine is stored in the array @forced_breakpoint_to_go, which is
+ # used by sub 'break_long_lines' to set final breakpoints. This is
+ # probably the most complex routine in perltidy, so I have
+ # broken it into pieces and over-commented it.
+ #--------------------------------------------------------------------
+
+ $starting_depth = $nesting_depth_to_go[0];
+
+ $block_type = SPACE;
+ $current_depth = $starting_depth;
+ $i = -1;
+ $i_last_colon = -1;
+ $i_line_end = -1;
+ $i_line_start = -1;
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_nonblank_block_type = SPACE;
+ $last_old_breakpoint_count = 0;
+ $minimum_depth = $current_depth + 1; # forces update in check below
+ $old_breakpoint_count = 0;
+ $starting_breakpoint_count = $forced_breakpoint_count;
+ $token = ';';
+ $type = ';';
+ $type_sequence = EMPTY_STRING;
- || ( $tokens_to_go[$i_last_end] eq $token
- && $is_chain_operator{$token}
- && ( $type eq 'k' || $type eq $token )
- && $nesting_depth_to_go[$i_last_end] >=
- $nesting_depth_to_go[$i_test] )
- )
- {
- $leading_alignment_token = $next_nonblank_token;
- $leading_alignment_type = $next_nonblank_type;
- }
- }
- }
+ my $total_depth_variation = 0;
+ my $i_old_assignment_break;
+ my $depth_last = $starting_depth;
+ my $comma_follows_last_closing_token;
- my $too_long = ( $i_test >= $imax );
- if ( !$too_long ) {
- my $next_length =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 2 ] -
- $starting_sum;
- $too_long = $next_length > $maximum_line_length;
+ $self->check_for_new_minimum_depth( $current_depth,
+ $parent_seqno_to_go[0] )
+ if ( $current_depth < $minimum_depth );
- # To prevent blinkers we will avoid leaving a token exactly at
- # the line length limit unless it is the last token or one of
- # several "good" types.
- #
- # The following code was a blinker with -pbp before this
- # modification:
-## $last_nonblank_token eq '('
-## && $is_indirect_object_taker{ $paren_type
-## [$paren_depth] }
- # The issue causing the problem is that if the
- # term [$paren_depth] gets broken across a line then
- # the whitespace routine doesn't see both opening and closing
- # brackets and will format like '[ $paren_depth ]'. This
- # leads to an oscillation in length depending if we break
- # before the closing bracket or not.
- if ( !$too_long
- && $i_test + 1 < $imax
- && $next_nonblank_type ne ','
- && !$is_closing_type{$next_nonblank_type} )
- {
- $too_long = $next_length >= $maximum_line_length;
- DEBUG_BREAK_LINES && do {
- $Msg .= " :too_long=$too_long" if ($too_long);
- }
- }
- }
+ my $i_want_previous_break = -1;
- DEBUG_BREAK_LINES && do {
- my $ltok = $token;
- my $rtok =
- $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
- my $i_testp2 = $i_test + 2;
- if ( $i_testp2 > $max_index_to_go + 1 ) {
- $i_testp2 = $max_index_to_go + 1;
- }
- if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
- if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
- print STDOUT
-"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
- };
+ my $saw_good_breakpoint;
- # allow one extra terminal token after exceeding line length
- # if it would strand this token.
- if ( $rOpts_fuzzy_line_length
- && $too_long
- && $i_lowest == $i_test
- && $token_lengths_to_go[$i_test] > 1
- && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
- )
- {
- $too_long = 0;
- DEBUG_BREAK_LINES && do {
- $Msg .= " :do_not_strand next='$next_nonblank_type'";
- };
+ #----------------------------------------
+ # Main loop over all tokens in this batch
+ #----------------------------------------
+ while ( ++$i <= $max_index_to_go ) {
+ if ( $type ne 'b' ) {
+ $i_last_nonblank_token = $i - 1;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ $last_nonblank_block_type = $block_type;
}
+ $type = $types_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $token = $tokens_to_go[$i];
+ $type_sequence = $type_sequence_to_go[$i];
- # we are done if...
- if (
+ my $i_next_nonblank = $inext_to_go[$i];
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- # ... no more space and we have a break
- $too_long && $i_lowest >= 0
+ #-------------------------------------------
+ # Loop Section A: Look for special breakpoints...
+ #-------------------------------------------
- # ... or no more tokens
- || $i_test == $imax
- )
- {
- DEBUG_BREAK_LINES && do {
- $Msg .=
-" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
- };
- last;
+ # set break if flag was set
+ if ( $i_want_previous_break >= 0 ) {
+ $self->set_forced_breakpoint($i_want_previous_break);
+ $i_want_previous_break = -1;
}
- }
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint
- # Now decide exactly where to put the breakpoint
- #-------------------------------------------------------
+ $last_old_breakpoint_count = $old_breakpoint_count;
- # it's always ok to break at imax if no other break was found
- if ( $i_lowest < 0 ) { $i_lowest = $imax }
+ # Check for a good old breakpoint ..
+ if ( $old_breakpoint_to_go[$i] ) {
+ ( $i_want_previous_break, $i_old_assignment_break ) =
+ $self->examine_old_breakpoint( $i_next_nonblank,
+ $i_want_previous_break, $i_old_assignment_break );
+ }
- # semi-final index calculation
- my $i_next_nonblank = $inext_to_go[$i_lowest];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ next if ( $type eq 'b' );
- #-------------------------------------------------------
- # ?/: rule 1 : if a break here will separate a '?' on this
- # line from its closing ':', then break at the '?' instead.
- #-------------------------------------------------------
- foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
- next unless ( $tokens_to_go[$i] eq '?' );
+ $depth = $nesting_depth_to_go[ $i + 1 ];
- # do not break if probable sequence of ?/: statements
- next if ($is_colon_chain);
+ $total_depth_variation += abs( $depth - $depth_last );
+ $depth_last = $depth;
- # do not break if statement is broken by side comment
- next
- if ( $tokens_to_go[$max_index_to_go] eq '#'
- && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
+ # safety check - be sure we always break after a comment
+ # Shouldn't happen .. an error here probably means that the
+ # nobreak flag did not get turned off correctly during
+ # formatting.
+ if ( $type eq '#' ) {
+ if ( $i != $max_index_to_go ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Non-fatal program bug: backup logic required to break after a comment
+EOM
+ }
+ $nobreak_to_go[$i] = 0;
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $i != $max_index_to_go)
+ } ## end if ( $type eq '#' )
- # no break needed if matching : is also on the line
- next
- if ( $mate_index_to_go[$i] >= 0
- && $mate_index_to_go[$i] <= $i_next_nonblank );
+ # Force breakpoints at certain tokens in long lines.
+ # Note that such breakpoints will be undone later if these tokens
+ # are fully contained within parens on a line.
+ if (
- $i_lowest = $i;
- if ( $want_break_before{'?'} ) { $i_lowest-- }
- last;
- }
+ # break before a keyword within a line
+ $type eq 'k'
+ && $i > 0
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint:
- # Break the line after the token with index i=$i_lowest
- #-------------------------------------------------------
+ # if one of these keywords:
+ && $is_if_unless_while_until_for_foreach{$token}
- # final index calculation
- $i_next_nonblank = $inext_to_go[$i_lowest];
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # but do not break at something like '1 while'
+ && ( $last_nonblank_type ne 'n' || $i > 2 )
- DEBUG_BREAK_LINES
- && print STDOUT
-"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
- $Msg = EMPTY_STRING;
+ # and let keywords follow a closing 'do' brace
+ && ( !$last_nonblank_block_type
+ || $last_nonblank_block_type ne 'do' )
- #-------------------------------------------------------
- # ?/: rule 2 : if we break at a '?', then break at its ':'
- #
- # Note: this rule is also in sub break_lists to handle a break
- # at the start and end of a line (in case breaks are dictated
- # by side comments).
- #-------------------------------------------------------
- if ( $next_nonblank_type eq '?' ) {
- $self->set_closing_breakpoint($i_next_nonblank);
- }
- elsif ( $types_to_go[$i_lowest] eq '?' ) {
- $self->set_closing_breakpoint($i_lowest);
- }
+ && (
+ $is_long_line
- #-------------------------------------------------------
- # ?/: rule 3 : if we break at a ':' then we save
- # its location for further work below. We may need to go
- # back and break at its '?'.
- #-------------------------------------------------------
- if ( $next_nonblank_type eq ':' ) {
- push @i_colon_breaks, $i_next_nonblank;
- }
- elsif ( $types_to_go[$i_lowest] eq ':' ) {
- push @i_colon_breaks, $i_lowest;
- }
+ # or container is broken (by side-comment, etc)
+ || (
+ $next_nonblank_token eq '('
+ && ( !defined( $mate_index_to_go[$i_next_nonblank] )
+ || $mate_index_to_go[$i_next_nonblank] < $i )
+ )
+ )
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
- # here we should set breaks for all '?'/':' pairs which are
- # separated by this line
+ # remember locations of '||' and '&&' for possible breaks if we
+ # decide this is a long logical expression.
+ if ( $type eq '||' ) {
+ push @{ $rand_or_list[$depth][2] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ }
+ elsif ( $type eq '&&' ) {
+ push @{ $rand_or_list[$depth][3] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ }
+ elsif ( $type eq 'f' ) {
+ push @{ $rfor_semicolon_list[$depth] }, $i;
+ }
+ elsif ( $type eq 'k' ) {
+ if ( $token eq 'and' ) {
+ push @{ $rand_or_list[$depth][1] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ }
- $line_count++;
+ # break immediately at 'or's which are probably not in a logical
+ # block -- but we will break in logical breaks below so that
+ # they do not add to the forced_breakpoint_count
+ elsif ( $token eq 'or' ) {
+ push @{ $rand_or_list[$depth][0] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ if ( $is_logical_container{ $container_type[$depth] } ) {
+ }
+ else {
+ if ($is_long_line) { $self->set_forced_breakpoint($i) }
+ elsif ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $saw_good_breakpoint = 1;
+ }
+ }
+ }
+ elsif ( $token eq 'if' || $token eq 'unless' ) {
+ push @{ $rand_or_list[$depth][4] }, $i;
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $self->set_forced_breakpoint($i);
+ }
+ }
+ }
+ elsif ( $is_assignment{$type} ) {
+ $i_equals[$depth] = $i;
+ }
- # save this line segment, after trimming blanks at the ends
- push( @i_first,
- ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
- push( @i_last,
- ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
+ #-----------------------------------------
+ # Loop Section B: Handle a sequenced token
+ #-----------------------------------------
+ if ($type_sequence) {
+ $self->break_lists_type_sequence;
+ }
- # set a forced breakpoint at a container opening, if necessary, to
- # signal a break at a closing container. Excepting '(' for now.
- if (
- (
- $tokens_to_go[$i_lowest] eq '{'
- || $tokens_to_go[$i_lowest] eq '['
- )
- && !$forced_breakpoint_to_go[$i_lowest]
- )
- {
- $self->set_closing_breakpoint($i_lowest);
- }
+ #------------------------------------------
+ # Loop Section C: Handle Increasing Depth..
+ #------------------------------------------
- # get ready to go again
- $i_begin = $i_lowest + 1;
- $last_break_strength = $lowest_strength;
- $i_last_break = $i_lowest;
- $leading_alignment_token = EMPTY_STRING;
- $leading_alignment_type = EMPTY_STRING;
- $lowest_next_token = EMPTY_STRING;
- $lowest_next_type = 'b';
+ # hardened against bad input syntax: depth jump must be 1 and type
+ # must be opening..fixes c102
+ if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
+ $self->break_lists_increasing_depth();
+ }
- if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
- $i_begin++;
- }
+ #------------------------------------------
+ # Loop Section D: Handle Decreasing Depth..
+ #------------------------------------------
- # update indentation size
- if ( $i_begin <= $imax ) {
- $leading_spaces = leading_spaces_to_go($i_begin);
- DEBUG_BREAK_LINES
- && print STDOUT
- "updating leading spaces to be $leading_spaces at i=$i_begin\n";
- }
- }
+ # hardened against bad input syntax: depth jump must be 1 and type
+ # must be closing .. fixes c102
+ elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
- #-------------------------------------------------------
- # END of main loop to set continuation breakpoints
- # Now go back and make any necessary corrections
- #-------------------------------------------------------
+ $self->break_lists_decreasing_depth();
- #-------------------------------------------------------
- # ?/: rule 4 -- if we broke at a ':', then break at
- # corresponding '?' unless this is a chain of ?: expressions
- #-------------------------------------------------------
- if (@i_colon_breaks) {
+ $comma_follows_last_closing_token =
+ $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
- # using a simple method for deciding if we are in a ?/: chain --
- # this is a chain if it has multiple ?/: pairs all in order;
- # otherwise not.
- # Note that if line starts in a ':' we count that above as a break
- my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+ }
- unless ($is_chain) {
- my @insert_list = ();
- foreach (@i_colon_breaks) {
- my $i_question = $mate_index_to_go[$_];
- if ( $i_question >= 0 ) {
- if ( $want_break_before{'?'} ) {
- $i_question = $iprev_to_go[$i_question];
- }
+ #----------------------------------
+ # Loop Section E: Handle this token
+ #----------------------------------
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
- }
- $self->insert_additional_breaks( \@insert_list, \@i_first,
- \@i_last );
- }
- }
- }
- return ( \@i_first, \@i_last, $rbond_strength_to_go );
-} ## end sub break_long_lines
+ $current_depth = $depth;
-###########################################
-# CODE SECTION 11: Code to break long lists
-###########################################
+ # most token types can skip the rest of this loop
+ next unless ( $quick_filter{$type} );
-{ ## begin closure break_lists
+ # handle comma-arrow
+ if ( $type eq '=>' ) {
+ next if ( $last_nonblank_type eq '=>' );
+ next if $rOpts_break_at_old_comma_breakpoints;
+ next
+ if ( $rOpts_comma_arrow_breakpoints == 3
+ && !defined( $override_cab3[$depth] ) );
+ $want_comma_break[$depth] = 1;
+ $index_before_arrow[$depth] = $i_last_nonblank_token;
+ next;
+ }
- # These routines and variables are involved in finding good
- # places to break long lists.
+ elsif ( $type eq '.' ) {
+ $last_dot_index[$depth] = $i;
+ }
- use constant DEBUG_BREAK_LISTS => 0;
+ # Turn off comma alignment if we are sure that this is not a list
+ # environment. To be safe, we will do this if we see certain
+ # non-list tokens, such as ';', '=', and also the environment is
+ # not a list.
+ ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
+ elsif ( $is_non_list_type{$type}
+ && !$self->is_in_list_by_i($i) )
+ {
+ $dont_align[$depth] = 1;
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
- my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_nonblank_token,
- $last_nonblank_type, $last_nonblank_block_type,
- $last_old_breakpoint_count, $minimum_depth,
- $next_nonblank_block_type, $next_nonblank_token,
- $next_nonblank_type, $old_breakpoint_count,
- $starting_breakpoint_count, $starting_depth,
- $token, $type,
- $type_sequence,
- );
+ # no special comma breaks in C-style 'for' terms (c154)
+ if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
+ }
- my (
- @breakpoint_stack, @breakpoint_undo_stack,
- @comma_index, @container_type,
- @identifier_count_stack, @index_before_arrow,
- @interrupted_list, @item_count_stack,
- @last_comma_index, @last_dot_index,
- @last_nonblank_type, @old_breakpoint_count_stack,
- @opening_structure_index_stack, @rfor_semicolon_list,
- @has_old_logical_breakpoints, @rand_or_list,
- @i_equals, @override_cab3,
- @type_sequence_stack,
- );
+ # now just handle any commas
+ next if ( $type ne ',' );
+ $self->study_comma($comma_follows_last_closing_token);
- # these arrays must retain values between calls
- my ( @has_broken_sublist, @dont_align, @want_comma_break );
+ } ## end while ( ++$i <= $max_index_to_go)
- my $length_tol;
- my $lp_tol_boost;
- my $list_stress_level;
+ #-------------------------------------------
+ # END of loop over all tokens in this batch
+ # Now set breaks for any unfinished lists ..
+ #-------------------------------------------
- sub initialize_break_lists {
- @dont_align = ();
- @has_broken_sublist = ();
- @want_comma_break = ();
+ foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
- #---------------------------------------------------
- # Set tolerances to prevent formatting instabilities
- #---------------------------------------------------
+ $interrupted_list[$dd] = 1;
+ $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
+ $self->set_comma_breakpoints( $dd, $rbond_strength_bias )
+ if ( $item_count_stack[$dd] );
+ $self->set_logical_breakpoints($dd)
+ if ( $has_old_logical_breakpoints[$dd] );
+ $self->set_for_semicolon_breakpoints($dd);
- # Define tolerances to use when checking if closed
- # containers will fit on one line. This is necessary to avoid
- # formatting instability. The basic tolerance is based on the
- # following:
+ # break open container...
+ my $i_opening = $opening_structure_index_stack[$dd];
+ if ( defined($i_opening) && $i_opening >= 0 ) {
+ $self->set_forced_breakpoint($i_opening)
+ unless (
+ is_unbreakable_container($dd)
- # - Always allow for at least one extra space after a closing token so
- # that we do not strand a comma or semicolon. (oneline.t).
+ # Avoid a break which would place an isolated ' or "
+ # on a line
+ || ( $type eq 'Q'
+ && $i_opening >= $max_index_to_go - 2
+ && ( $token eq "'" || $token eq '"' ) )
+ );
+ }
+ } ## end for ( my $dd = $current_depth...)
- # - Use an increased line length tolerance when -ci > -i to avoid
- # blinking states (case b923 and others).
- $length_tol =
- 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
+ #----------------------------------------
+ # Return the flag '$saw_good_breakpoint'.
+ #----------------------------------------
+ # This indicates if the input file had some good breakpoints. This
+ # flag will be used to force a break in a line shorter than the
+ # allowed line length.
+ if ( $has_old_logical_breakpoints[$current_depth] ) {
+ $saw_good_breakpoint = 1;
+ }
- # In addition, it may be necessary to use a few extra tolerance spaces
- # when -lp is used and/or when -xci is used. The history of this
- # so far is as follows:
+ # A complex line with one break at an = has a good breakpoint.
+ # This is not complex ($total_depth_variation=0):
+ # $res1
+ # = 10;
+ #
+ # This is complex ($total_depth_variation=6):
+ # $res2 =
+ # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
- # FIX1: At least 3 characters were been found to be required for -lp
- # to fixes cases b1059 b1063 b1117.
+ # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
+ elsif ($i_old_assignment_break
+ && $total_depth_variation > 4
+ && $old_breakpoint_count == 1
+ && $i_old_assignment_break < $max_index_to_go )
+ {
+ $saw_good_breakpoint = 1;
+ }
- # FIX2: Further testing showed that we need a total of 3 extra spaces
- # when -lp is set for non-lists, and at least 2 spaces when -lp and
- # -xci are set.
- # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
- # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
- # b1165
+ return $saw_good_breakpoint;
+ } ## end sub break_lists
- # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
- # 'find_token_starting_list' to go back before an initial blank space.
- # This fixed these three cases, and allowed the tolerances to be
- # reduced to continue to fix all other known cases of instability.
- # This gives the current tolerance formulation.
+ sub study_comma {
- $lp_tol_boost = 0;
+ # study and store info for a list comma
- if ($rOpts_line_up_parentheses) {
+ my ( $self, $comma_follows_last_closing_token ) = @_;
- # boost tol for combination -lp -xci
- if ($rOpts_extended_continuation_indentation) {
- $lp_tol_boost = 2;
- }
+ $last_dot_index[$depth] = undef;
+ $last_comma_index[$depth] = $i;
- # boost tol for combination -lp and any -vtc > 0, but only for
- # non-list containers
- else {
- foreach ( keys %closing_vertical_tightness ) {
- next
- unless ( $closing_vertical_tightness{$_} );
- $lp_tol_boost = 1; # Fixes B1193;
- last;
+ # break here if this comma follows a '=>'
+ # but not if there is a side comment after the comma
+ if ( $want_comma_break[$depth] ) {
+
+ if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
+ if ($rOpts_comma_arrow_breakpoints) {
+ $want_comma_break[$depth] = 0;
+ return;
}
}
- }
- # Define a level where list formatting becomes highly stressed and
- # needs to be simplified. Introduced for case b1262.
- $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
- return;
- } ## end sub initialize_break_lists
+ # break before the previous token if it looks safe
+ # Example of something that we will not try to break before:
+ # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
+ # Also we don't want to break at a binary operator (like +):
+ # $c->createOval(
+ # $x + $R, $y +
+ # $R => $x - $R,
+ # $y - $R, -fill => 'black',
+ # );
+ my $ibreak = $index_before_arrow[$depth] - 1;
+ if ( $ibreak > 0
+ && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
+ {
+ if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+
+ # don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
+ $self->set_forced_breakpoint($ibreak);
+ }
+ }
+ }
- # routine to define essential variables when we go 'up' to
- # a new depth
- sub check_for_new_minimum_depth {
- my ( $self, $depth_t, $seqno ) = @_;
- if ( $depth_t < $minimum_depth ) {
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
- $minimum_depth = $depth_t;
+ # handle list which mixes '=>'s and ','s:
+ # treat any list items so far as an interrupted list
+ $interrupted_list[$depth] = 1;
+ return;
+ }
- # these arrays need not retain values between calls
- $type_sequence_stack[$depth_t] = $seqno;
- $override_cab3[$depth_t] =
- $rOpts_comma_arrow_breakpoints == 3
- && $seqno
- && $self->[_roverride_cab3_]->{$seqno};
+ # Break after all commas above starting depth...
+ # But only if the last closing token was followed by a comma,
+ # to avoid breaking a list operator (issue c119)
+ if ( $depth < $starting_depth
+ && $comma_follows_last_closing_token
+ && !$dont_align[$depth] )
+ {
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
+ return;
+ }
- $override_cab3[$depth_t] = undef;
- $breakpoint_stack[$depth_t] = $starting_breakpoint_count;
- $container_type[$depth_t] = EMPTY_STRING;
- $identifier_count_stack[$depth_t] = 0;
- $index_before_arrow[$depth_t] = -1;
- $interrupted_list[$depth_t] = 1;
- $item_count_stack[$depth_t] = 0;
- $last_nonblank_type[$depth_t] = EMPTY_STRING;
- $opening_structure_index_stack[$depth_t] = -1;
+ # add this comma to the list..
+ my $item_count = $item_count_stack[$depth];
+ if ( $item_count == 0 ) {
- $breakpoint_undo_stack[$depth_t] = undef;
- $comma_index[$depth_t] = undef;
- $last_comma_index[$depth_t] = undef;
- $last_dot_index[$depth_t] = undef;
- $old_breakpoint_count_stack[$depth_t] = undef;
- $has_old_logical_breakpoints[$depth_t] = 0;
- $rand_or_list[$depth_t] = [];
- $rfor_semicolon_list[$depth_t] = [];
- $i_equals[$depth_t] = -1;
+ # but do not form a list with no opening structure
+ # for example:
- # these arrays must retain values between calls
- if ( !defined( $has_broken_sublist[$depth_t] ) ) {
- $dont_align[$depth_t] = 0;
- $has_broken_sublist[$depth_t] = 0;
- $want_comma_break[$depth_t] = 0;
+ # open INFILE_COPY, ">$input_file_copy"
+ # or die ("very long message");
+ if ( ( $opening_structure_index_stack[$depth] < 0 )
+ && $self->is_in_block_by_i($i) )
+ {
+ $dont_align[$depth] = 1;
}
}
+
+ $comma_index[$depth][$item_count] = $i;
+ ++$item_count_stack[$depth];
+ if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
+ $identifier_count_stack[$depth]++;
+ }
return;
- } ## end sub check_for_new_minimum_depth
+ } ## end sub study_comma
- # routine to decide which commas to break at within a container;
- # returns:
- # $bp_count = number of comma breakpoints set
- # $do_not_break_apart = a flag indicating if container need not
- # be broken open
- sub set_comma_breakpoints {
+ my %poor_types;
+ my %poor_keywords;
+ my %poor_next_types;
+ my %poor_next_keywords;
- my ( $self, $dd, $rbond_strength_bias ) = @_;
- my $bp_count = 0;
- my $do_not_break_apart = 0;
+ BEGIN {
- # Do not break a list unless there are some non-line-ending commas.
- # This avoids getting different results with only non-essential commas,
- # and fixes b1192.
- my $seqno = $type_sequence_stack[$dd];
- my $real_comma_count =
- $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
+ # Setup filters for detecting very poor breaks to ignore.
+ # b1097: old breaks after type 'L' and before 'R' are poor
+ # b1450: old breaks at 'eq' and related operators are poor
+ my @q = qw(== <= >= !=);
- # anything to do?
- if ( $item_count_stack[$dd] ) {
+ @{poor_types}{@q} = (1) x scalar(@q);
+ @{poor_next_types}{@q} = (1) x scalar(@q);
+ $poor_types{'L'} = 1;
+ $poor_next_types{'R'} = 1;
- # handle commas not in containers...
- if ( $dont_align[$dd] ) {
- $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
- }
+ @q = qw(eq ne le ge lt gt);
+ @{poor_keywords}{@q} = (1) x scalar(@q);
+ @{poor_next_keywords}{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- # handle commas within containers...
- elsif ($real_comma_count) {
- my $fbc = $forced_breakpoint_count;
+ sub examine_old_breakpoint {
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+ my ( $self, $i_next_nonblank, $i_want_previous_break,
+ $i_old_assignment_break )
+ = @_;
- $self->set_comma_breakpoints_do(
- {
- depth => $dd,
- i_opening_paren => $opening_structure_index_stack[$dd],
- i_closing_paren => $i,
- item_count => $item_count_stack[$dd],
- identifier_count => $identifier_count_stack[$dd],
- rcomma_index => $comma_index[$dd],
- next_nonblank_type => $next_nonblank_type,
- list_type => $container_type[$dd],
- interrupted => $interrupted_list[$dd],
- rdo_not_break_apart => \$do_not_break_apart,
- must_break_open => $must_break_open,
- has_broken_sublist => $has_broken_sublist[$dd],
- }
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
- }
+ # Look at an old breakpoint and set/update certain flags:
+
+ # Given indexes of three tokens in this batch:
+ # $i_next_nonblank - index of the next nonblank token
+ # $i_want_previous_break - we want a break before this index
+ # $i_old_assignment_break - the index of an '=' or equivalent
+ # Update:
+ # $old_breakpoint_count - a counter to increment unless poor break
+ # Update and return:
+ # $i_want_previous_break
+ # $i_old_assignment_break
+
+ #-----------------------
+ # Filter out poor breaks
+ #-----------------------
+ # Just return if this is a poor break and pretend it does not exist.
+ # Otherwise, poor breaks made under stress can cause instability.
+ my $poor_break;
+ if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} }
+ else { $poor_break ||= $poor_types{$type} }
+
+ if ( $next_nonblank_type eq 'k' ) {
+ $poor_break ||= $poor_next_keywords{$next_nonblank_token};
}
- return ( $bp_count, $do_not_break_apart );
- } ## end sub set_comma_breakpoints
+ else { $poor_break ||= $poor_next_types{$next_nonblank_type} }
- # These types are excluded at breakpoints to prevent blinking
- # Switched from excluded to included as part of fix for b1214
- my %is_uncontained_comma_break_included_type;
+ # Also ignore any high stress level breaks; fixes b1395
+ $poor_break ||= $levels_to_go[$i] >= $high_stress_level;
+ if ($poor_break) { goto RETURN }
- BEGIN {
+ #--------------------------------------------
+ # Not a poor break, so continue to examine it
+ #--------------------------------------------
+ $old_breakpoint_count++;
+ $i_line_end = $i;
+ $i_line_start = $i_next_nonblank;
+
+ #---------------------------------------
+ # Do we want to break before this token?
+ #---------------------------------------
+
+ # Break before certain keywords if user broke there and
+ # this is a 'safe' break point. The idea is to retain
+ # any preferred breaks for sequential list operations,
+ # like a schwartzian transform.
+ if ($rOpts_break_at_old_keyword_breakpoints) {
+ if (
+ $next_nonblank_type eq 'k'
+ && $is_keyword_returning_list{$next_nonblank_token}
+ && ( $type =~ /^[=\)\]\}Riw]$/
+ || $type eq 'k' && $is_keyword_returning_list{$token} )
+ )
+ {
- my @q = qw< k R } ) ] Y Z U w i q Q .
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
- @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
- }
+ # we actually have to set this break next time through
+ # the loop because if we are at a closing token (such
+ # as '}') which forms a one-line block, this break might
+ # get undone.
- sub do_uncontained_comma_breaks {
+ # But do not do this at an '=' if:
+ # - the user wants breaks before an equals (b434 b903)
+ # - or -naws is set (can be unstable, see b1354)
+ my $skip = $type eq '='
+ && ( $want_break_before{$type}
+ || !$rOpts_add_whitespace );
- # Handle commas not in containers...
- # This is a catch-all routine for commas that we
- # don't know what to do with because the don't fall
- # within containers. We will bias the bond strength
- # to break at commas which ended lines in the input
- # file. This usually works better than just trying
- # to put as many items on a line as possible. A
- # downside is that if the input file is garbage it
- # won't work very well. However, the user can always
- # prevent following the old breakpoints with the
- # -iob flag.
- my ( $self, $dd, $rbond_strength_bias ) = @_;
+ $i_want_previous_break = $i
+ unless ($skip);
- # Check added for issue c131; an error here would be due to an
- # error initializing @comma_index when entering depth $dd.
- if (DEVEL_MODE) {
- foreach my $ii ( @{ $comma_index[$dd] } ) {
- if ( $ii < 0 || $ii > $max_index_to_go ) {
- my $KK = $K_to_go[0];
- my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
- Fault(<<EOM);
-Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
-EOM
- }
}
}
- my $bias = -.01;
- my $old_comma_break_count = 0;
- foreach my $ii ( @{ $comma_index[$dd] } ) {
+ # Break before attributes if user broke there
+ if ($rOpts_break_at_old_attribute_breakpoints) {
+ if ( $next_nonblank_type eq 'A' ) {
+ $i_want_previous_break = $i;
+ }
+ }
- if ( $old_breakpoint_to_go[$ii] ) {
- $old_comma_break_count++;
+ #---------------------------------
+ # Is this an old assignment break?
+ #---------------------------------
+ if ( $is_assignment{$type} ) {
+ $i_old_assignment_break = $i;
+ }
+ elsif ( $is_assignment{$next_nonblank_type} ) {
+ $i_old_assignment_break = $i_next_nonblank;
+ }
- # Store the bias info for use by sub set_bond_strength
- push @{$rbond_strength_bias}, [ $ii, $bias ];
+ RETURN:
+ return ( $i_want_previous_break, $i_old_assignment_break );
+ } ## end sub examine_old_breakpoint
+
+ sub break_lists_type_sequence {
+
+ my ($self) = @_;
+
+ # We have encountered a sequenced token while setting list breakpoints
- # reduce bias magnitude to force breaks in order
- $bias *= 0.99;
- }
- }
+ # if closing type, one of } ) ] :
+ if ( $is_closing_sequence_token{$token} ) {
- # Also put a break before the first comma if
- # (1) there was a break there in the input, and
- # (2) there was exactly one old break before the first comma break
- # (3) OLD: there are multiple old comma breaks
- # (3) NEW: there are one or more old comma breaks (see return example)
- # (4) the first comma is at the starting level ...
- # ... fixes cases b064 b065 b068 b210 b747
- # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
- # ... fixes b1220. If ci>0 we are in the middle of a snippet,
- # maybe because -boc has been forcing out previous lines.
+ if ( $type eq ':' ) {
+ $i_last_colon = $i;
- # For example, we will follow the user and break after
- # 'print' in this snippet:
- # print
- # "conformability (Not the same dimension)\n",
- # "\t", $have, " is ", text_unit($hu), "\n",
- # "\t", $want, " is ", text_unit($wu), "\n",
- # ;
- #
- # Another example, just one comma, where we will break after
- # the return:
- # return
- # $x * cos($a) - $y * sin($a),
- # $x * sin($a) + $y * cos($a);
+ # retain break at a ':' line break
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_ternary_breakpoints
+ && $levels_to_go[$i] < $high_stress_level )
+ {
- # Breaking a print statement:
- # print SAVEOUT
- # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
- # ( $? & 128 ) ? " -- core dumped" : "", "\n";
- #
- # But we will not force a break after the opening paren here
- # (causes a blinker):
- # $heap->{stream}->set_output_filter(
- # poe::filter::reference->new('myotherfreezer') ),
- # ;
- #
- my $i_first_comma = $comma_index[$dd]->[0];
- my $level_comma = $levels_to_go[$i_first_comma];
- my $ci_start = $ci_levels_to_go[0];
+ $self->set_forced_breakpoint($i);
- # Here we want to use the value of ci before any -xci adjustment
- if ( $ci_start && $rOpts_extended_continuation_indentation ) {
- my $K0 = $K_to_go[0];
- if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
- }
- if ( !$ci_start
- && $old_breakpoint_to_go[$i_first_comma]
- && $level_comma == $levels_to_go[0] )
- {
- my $ibreak = -1;
- my $obp_count = 0;
- foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $obp_count++;
- last if ( $obp_count > 1 );
- $ibreak = $ii
- if ( $levels_to_go[$ii] == $level_comma );
+ # Break at a previous '=', but only if it is before
+ # the mating '?'. Mate_index test fixes b1287.
+ my $ieq = $i_equals[$depth];
+ my $mix = $mate_index_to_go[$i];
+ if ( !defined($mix) ) { $mix = -1 }
+ if ( $ieq > 0 && $ieq < $mix ) {
+ $self->set_forced_breakpoint( $i_equals[$depth] );
+ $i_equals[$depth] = -1;
+ }
}
}
- # Changed rule from multiple old commas to just one here:
- if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
- {
- my $ibreak_m = $ibreak;
- $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
- if ( $ibreak_m >= 0 ) {
+ # handle any postponed closing breakpoints
+ if ( has_postponed_breakpoint($type_sequence) ) {
+ my $inc = ( $type eq ':' ) ? 0 : 1;
+ if ( $i >= $inc ) {
+ $self->set_forced_breakpoint( $i - $inc );
+ }
+ }
+ }
- # In order to avoid blinkers we have to be fairly
- # restrictive:
+ # must be opening token, one of { ( [ ?
+ else {
- # OLD Rules:
- # Rule 1: Do not to break before an opening token
- # Rule 2: avoid breaking at ternary operators
- # (see b931, which is similar to the above print example)
- # Rule 3: Do not break at chain operators to fix case b1119
- # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
+ # set breaks at ?/: if they will get separated (and are
+ # not a ?/: chain), or if the '?' is at the end of the
+ # line
+ if ( $token eq '?' ) {
+ my $i_colon = $mate_index_to_go[$i];
+ if (
+ !defined($i_colon) # the ':' is not in this batch
+ || $i == 0 # this '?' is the first token of the line
+ || $i == $max_index_to_go # or this '?' is the last token
+ )
+ {
- # NEW Rule, replaced above rules after case b1214:
- # only break at one of the included types
+ # don't break if # this has a side comment, and
+ # don't break at a '?' if preceded by ':' on
+ # this line of previous ?/: pair on this line.
+ # This is an attempt to preserve a chain of ?/:
+ # expressions (elsif2.t).
+ if (
+ (
+ $i_last_colon < 0
+ || $parent_seqno_to_go[$i_last_colon] !=
+ $parent_seqno_to_go[$i]
+ )
+ && $tokens_to_go[$max_index_to_go] ne '#'
+ )
+ {
+ $self->set_forced_breakpoint($i);
+ }
+ $self->set_closing_breakpoint($i);
+ }
+ }
- # Be sure to test any changes to these rules against runs
- # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
- # series.
- my $type_m = $types_to_go[$ibreak_m];
+ # must be one of { ( [
+ else {
- # Switched from excluded to included for b1214. If necessary
- # the token could also be checked if type_m eq 'k'
- if ( $is_uncontained_comma_break_included_type{$type_m} ) {
- $self->set_forced_breakpoint($ibreak);
+ # do requested -lp breaks at the OPENING token for BROKEN
+ # blocks. NOTE: this can be done for both -lp and -xlp,
+ # but only -xlp can really take advantage of this. So this
+ # is currently restricted to -xlp to avoid excess changes to
+ # existing -lp formatting.
+ if ( $rOpts_extended_line_up_parentheses
+ && !defined( $mate_index_to_go[$i] ) )
+ {
+ my $lp_object =
+ $self->[_rlp_object_by_seqno_]->{$type_sequence};
+ if ($lp_object) {
+ my $K_begin_line = $lp_object->get_K_begin_line();
+ my $i_begin_line = $K_begin_line - $K_to_go[0];
+ $self->set_forced_lp_break( $i_begin_line, $i );
}
}
}
}
return;
- } ## end sub do_uncontained_comma_breaks
+ } ## end sub break_lists_type_sequence
- my %is_logical_container;
- my %quick_filter;
+ sub break_lists_increasing_depth {
- BEGIN {
- my @q = qw# if elsif unless while and or err not && | || ? : ! #;
- @is_logical_container{@q} = (1) x scalar(@q);
+ my ($self) = @_;
- # This filter will allow most tokens to skip past a section of code
- %quick_filter = %is_assignment;
- @q = qw# => . ; < > ~ #;
- push @q, ',';
- @quick_filter{@q} = (1) x scalar(@q);
- }
+ #--------------------------------------------
+ # prepare for a new list when depth increases
+ # token $i is a '(','{', or '['
+ #--------------------------------------------
- sub set_for_semicolon_breakpoints {
- my ( $self, $dd ) = @_;
- foreach ( @{ $rfor_semicolon_list[$dd] } ) {
- $self->set_forced_breakpoint($_);
- }
- return;
- }
+ #----------------------------------------------------------
+ # BEGIN initialize depth arrays
+ # ... use the same order as sub check_for_new_minimum_depth
+ #----------------------------------------------------------
+ $type_sequence_stack[$depth] = $type_sequence;
+
+ $override_cab3[$depth] = undef;
+ if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) {
+ $override_cab3[$depth] =
+ $self->[_roverride_cab3_]->{$type_sequence};
+ }
+
+ $breakpoint_stack[$depth] = $forced_breakpoint_count;
+ $container_type[$depth] =
+
+ # k => && || ? : .
+ $is_container_label_type{$last_nonblank_type}
+ ? $last_nonblank_token
+ : EMPTY_STRING;
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 0;
+ $item_count_stack[$depth] = 0;
+ $last_nonblank_type[$depth] = $last_nonblank_type;
+ $opening_structure_index_stack[$depth] = $i;
+
+ $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
+ $comma_index[$depth] = undef;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
+ $has_old_logical_breakpoints[$depth] = 0;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
+
+ # if line ends here then signal closing token to break
+ if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
+ $self->set_closing_breakpoint($i);
+ }
+
+ # Not all lists of values should be vertically aligned..
+ $dont_align[$depth] =
+
+ # code BLOCKS are handled at a higher level
+ ##( $block_type ne EMPTY_STRING )
+ $block_type
+
+ # certain paren lists
+ || ( $type eq '(' ) && (
+
+ # it does not usually look good to align a list of
+ # identifiers in a parameter list, as in:
+ # my($var1, $var2, ...)
+ # (This test should probably be refined, for now I'm just
+ # testing for any keyword)
+ ( $last_nonblank_type eq 'k' )
+
+ # a trailing '(' usually indicates a non-list
+ || ( $next_nonblank_type eq '(' )
+ );
+ $has_broken_sublist[$depth] = 0;
+ $want_comma_break[$depth] = 0;
- sub set_logical_breakpoints {
- my ( $self, $dd ) = @_;
+ #----------------------------
+ # END initialize depth arrays
+ #----------------------------
+
+ # patch to outdent opening brace of long if/for/..
+ # statements (like this one). See similar coding in
+ # set_continuation breaks. We have also catch it here for
+ # short line fragments which otherwise will not go through
+ # break_long_lines.
if (
- $item_count_stack[$dd] == 0
- && $is_logical_container{ $container_type[$dd] }
+ $block_type
- || $has_old_logical_breakpoints[$dd]
+ # if we have the ')' but not its '(' in this batch..
+ && ( $last_nonblank_token eq ')' )
+ && !defined( $mate_index_to_go[$i_last_nonblank_token] )
+
+ # and user wants brace to left
+ && !$rOpts_opening_brace_always_on_right
+
+ && ( $type eq '{' ) # should be true
+ && ( $token eq '{' ) # should be true
)
{
+ $self->set_forced_breakpoint( $i - 1 );
+ }
- # Look for breaks in this order:
- # 0 1 2 3
- # or and || &&
- foreach my $i ( 0 .. 3 ) {
- if ( $rand_or_list[$dd][$i] ) {
- foreach ( @{ $rand_or_list[$dd][$i] } ) {
- $self->set_forced_breakpoint($_);
- }
+ return;
+ } ## end sub break_lists_increasing_depth
+
+ sub break_lists_decreasing_depth {
+
+ my ( $self, $rbond_strength_bias ) = @_;
+
+ # We have arrived at a closing container token in sub break_lists:
+ # the token at index $i is one of these: ')','}', ']'
+ # A number of important breakpoints for this container can now be set
+ # based on the information that we have collected. This includes:
+ # - breaks at commas to format tables
+ # - breaks at certain logical operators and other good breakpoints
+ # - breaks at opening and closing containers if needed by selected
+ # formatting styles
+ # These breaks are made by calling sub 'set_forced_breakpoint'
+
+ $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
+ if ( $depth < $minimum_depth );
+
+ # force all outer logical containers to break after we see on
+ # old breakpoint
+ $has_old_logical_breakpoints[$depth] ||=
+ $has_old_logical_breakpoints[$current_depth];
+
+ # Patch to break between ') {' if the paren list is broken.
+ # There is similar logic in break_long_lines for
+ # non-broken lists.
+ if ( $token eq ')'
+ && $next_nonblank_block_type
+ && $interrupted_list[$current_depth]
+ && $next_nonblank_type eq '{'
+ && !$rOpts_opening_brace_always_on_right )
+ {
+ $self->set_forced_breakpoint($i);
+ }
- # break at any 'if' and 'unless' too
- foreach ( @{ $rand_or_list[$dd][4] } ) {
- $self->set_forced_breakpoint($_);
- }
- $rand_or_list[$dd] = [];
- last;
- }
- }
+#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
+
+ #-----------------------------------------------------------------
+ # Set breaks at commas to display a table of values if appropriate
+ #-----------------------------------------------------------------
+ my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
+ ( $bp_count, $do_not_break_apart ) =
+ $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias )
+ if ( $item_count_stack[$current_depth] );
+
+ #-----------------------------------------------------------
+ # Now set flags needed to decide if we should break open the
+ # container ... This is a long rambling section which has
+ # grown over time to handle all situations.
+ #-----------------------------------------------------------
+ my $i_opening = $opening_structure_index_stack[$current_depth];
+ my $saw_opening_structure = ( $i_opening >= 0 );
+ my $lp_object;
+ if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
+ $lp_object = $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$i_opening] };
+ }
+
+ # this term is long if we had to break at interior commas..
+ my $is_long_term = $bp_count > 0;
+
+ # If this is a short container with one or more comma arrows,
+ # then we will mark it as a long term to open it if requested.
+ # $rOpts_comma_arrow_breakpoints =
+ # 0 - open only if comma precedes closing brace
+ # 1 - stable: except for one line blocks
+ # 2 - try to form 1 line blocks
+ # 3 - ignore =>
+ # 4 - always open up if vt=0
+ # 5 - stable: even for one line blocks if vt=0
+
+ my $cab_flag = $rOpts_comma_arrow_breakpoints;
+
+ # replace -cab=3 if overriden
+ if ( $cab_flag == 3 && $type_sequence ) {
+ my $test_cab = $self->[_roverride_cab3_]->{$type_sequence};
+ if ( defined($test_cab) ) { $cab_flag = $test_cab }
+ }
+
+ # PATCH: Modify the -cab flag if we are not processing a list:
+ # We only want the -cab flag to apply to list containers, so
+ # for non-lists we use the default and stable -cab=5 value.
+ # Fixes case b939a.
+ if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
+ {
+ $cab_flag = 5;
+ }
+
+ # Ignore old breakpoints when under stress.
+ # Fixes b1203 b1204 as well as b1197-b1200.
+ # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
+ # b1264 to see if this check is still required at all, and
+ # these still require a check, but at higher level beta+3
+ # instead of beta: b1193 b780
+ if ( $saw_opening_structure
+ && !$lp_object
+ && $levels_to_go[$i_opening] >= $high_stress_level )
+ {
+ $cab_flag = 2;
+
+ # Do not break hash braces under stress (fixes b1238)
+ $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
+
+ # This option fixes b1235, b1237, b1240 with old and new
+ # -lp, but formatting is nicer with next option.
+ ## $is_long_term ||=
+ ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
+
+ # This option fixes b1240 but not b1235, b1237 with new -lp,
+ # but this gives better formatting than the previous option.
+ # TODO: see if stress_level_alpha should also be considered
+ $do_not_break_apart ||=
+ $levels_to_go[$i_opening] > $stress_level_beta;
}
- return;
- } ## end sub set_logical_breakpoints
- sub is_unbreakable_container {
+ if ( !$is_long_term
+ && $saw_opening_structure
+ && $is_opening_token{ $tokens_to_go[$i_opening] }
+ && $index_before_arrow[ $depth + 1 ] > 0
+ && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
+ {
+ $is_long_term =
+ $cab_flag == 4
+ || $cab_flag == 0 && $last_nonblank_token eq ','
+ || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
+ }
- # never break a container of one of these types
- # because bad things can happen (map1.t)
- my $dd = shift;
- return $is_sort_map_grep{ $container_type[$dd] };
- }
+ # mark term as long if the length between opening and closing
+ # parens exceeds allowed line length
+ if ( !$is_long_term && $saw_opening_structure ) {
- sub break_lists {
+ my $i_opening_minus = $self->find_token_starting_list($i_opening);
- my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
+ my $excess = $self->excess_line_length( $i_opening_minus, $i );
- #----------------------------------------------------------------------
- # This routine is called once per batch, if the batch is a list, to set
- # line breaks so that hierarchical structure can be displayed and so
- # that list items can be vertically aligned. The output of this
- # routine is stored in the array @forced_breakpoint_to_go, which is
- # used by sub 'break_long_lines' to set final breakpoints.
- #----------------------------------------------------------------------
+ # Use standard spaces for indentation of lists in -lp mode
+ # if it gives a longer line length. This helps to avoid an
+ # instability due to forming and breaking one-line blocks.
+ # This fixes case b1314.
+ my $indentation = $leading_spaces_to_go[$i_opening_minus];
+ if ( ref($indentation)
+ && $self->[_ris_broken_container_]->{$type_sequence} )
+ {
+ my $lp_spaces = $indentation->get_spaces();
+ my $std_spaces = $indentation->get_standard_spaces();
+ my $diff = $std_spaces - $lp_spaces;
+ if ( $diff > 0 ) { $excess += $diff }
+ }
- my $rLL = $self->[_rLL_];
- my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
- my $ris_broken_container = $self->[_ris_broken_container_];
- my $rbreak_before_container_by_seqno =
- $self->[_rbreak_before_container_by_seqno_];
+ my $tol = $length_tol;
- $starting_depth = $nesting_depth_to_go[0];
+ # boost tol for an -lp container
+ if (
+ $lp_tol_boost
+ && $lp_object
+ && ( $rOpts_extended_continuation_indentation
+ || !$self->[_ris_list_by_seqno_]->{$type_sequence} )
+ )
+ {
+ $tol += $lp_tol_boost;
+ }
- $block_type = SPACE;
- $current_depth = $starting_depth;
- $i = -1;
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_nonblank_block_type = SPACE;
- $last_old_breakpoint_count = 0;
- $minimum_depth = $current_depth + 1; # forces update in check below
- $old_breakpoint_count = 0;
- $starting_breakpoint_count = $forced_breakpoint_count;
- $token = ';';
- $type = ';';
- $type_sequence = EMPTY_STRING;
+ # Patch to avoid blinking with -bbxi=2 and -cab=2
+ # in which variations in -ci cause unstable formatting
+ # in edge cases. We just always add one ci level so that
+ # the formatting is independent of the -BBX results.
+ # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
+ # b1161 b1166 b1167 b1168
+ if ( !$ci_levels_to_go[$i_opening]
+ && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
+ )
+ {
+ $tol += $rOpts_continuation_indentation;
+ }
- my $total_depth_variation = 0;
- my $i_old_assignment_break;
- my $depth_last = $starting_depth;
- my $comma_follows_last_closing_token;
+ $is_long_term = $excess + $tol > 0;
- $self->check_for_new_minimum_depth( $current_depth,
- $parent_seqno_to_go[0] );
+ }
- my $want_previous_breakpoint = -1;
+ # We've set breaks after all comma-arrows. Now we have to
+ # undo them if this can be a one-line block
+ # (the only breakpoints set will be due to comma-arrows)
- my $saw_good_breakpoint;
- my $i_line_end = -1;
- my $i_line_start = -1;
- my $i_last_colon = -1;
+ if (
- #----------------------------------------
- # Main loop over all tokens in this batch
- #----------------------------------------
- while ( ++$i <= $max_index_to_go ) {
- if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- $last_nonblank_block_type = $block_type;
- } ## end if ( $type ne 'b' )
- $type = $types_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $token = $tokens_to_go[$i];
- $type_sequence = $type_sequence_to_go[$i];
- my $next_type = $types_to_go[ $i + 1 ];
- my $next_token = $tokens_to_go[ $i + 1 ];
- my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+ # user doesn't require breaking after all comma-arrows
+ ( $cab_flag != 0 ) && ( $cab_flag != 4 )
- # set break if flag was set
- if ( $want_previous_breakpoint >= 0 ) {
- $self->set_forced_breakpoint($want_previous_breakpoint);
- $want_previous_breakpoint = -1;
- }
+ # and if the opening structure is in this batch
+ && $saw_opening_structure
- $last_old_breakpoint_count = $old_breakpoint_count;
+ # and either on the same old line
+ && (
+ $old_breakpoint_count_stack[$current_depth] ==
+ $last_old_breakpoint_count
+
+ # or user wants to form long blocks with arrows
+ || $cab_flag == 2
+ )
+
+ # and we made breakpoints between the opening and closing
+ && ( $breakpoint_undo_stack[$current_depth] <
+ $forced_breakpoint_undo_count )
+
+ # and this block is short enough to fit on one line
+ # Note: use < because need 1 more space for possible comma
+ && !$is_long_term
+
+ )
+ {
+ $self->undo_forced_breakpoint_stack(
+ $breakpoint_undo_stack[$current_depth] );
+ }
+
+ # now see if we have any comma breakpoints left
+ my $has_comma_breakpoints =
+ ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
+
+ # update broken-sublist flag of the outer container
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
+ || $has_broken_sublist[$current_depth]
+ || $is_long_term
+ || $has_comma_breakpoints;
+
+ # Having come to the closing ')', '}', or ']', now we have to decide
+ # if we should 'open up' the structure by placing breaks at the
+ # opening and closing containers. This is a tricky decision. Here
+ # are some of the basic considerations:
+ #
+ # -If this is a BLOCK container, then any breakpoints will have
+ # already been set (and according to user preferences), so we need do
+ # nothing here.
+ #
+ # -If we have a comma-separated list for which we can align the list
+ # items, then we need to do so because otherwise the vertical aligner
+ # cannot currently do the alignment.
+ #
+ # -If this container does itself contain a container which has been
+ # broken open, then it should be broken open to properly show the
+ # structure.
+ #
+ # -If there is nothing to align, and no other reason to break apart,
+ # then do not do it.
+ #
+ # We will not break open the parens of a long but 'simple' logical
+ # expression. For example:
+ #
+ # This is an example of a simple logical expression and its formatting:
+ #
+ # if ( $bigwasteofspace1 && $bigwasteofspace2
+ # || $bigwasteofspace3 && $bigwasteofspace4 )
+ #
+ # Most people would prefer this than the 'spacey' version:
+ #
+ # if (
+ # $bigwasteofspace1 && $bigwasteofspace2
+ # || $bigwasteofspace3 && $bigwasteofspace4
+ # )
+ #
+ # To illustrate the rules for breaking logical expressions, consider:
+ #
+ # FULLY DENSE:
+ # if ( $opt_excl
+ # and ( exists $ids_excl_uc{$id_uc}
+ # or grep $id_uc =~ /$_/, @ids_excl_uc ))
+ #
+ # This is on the verge of being difficult to read. The current
+ # default is to open it up like this:
+ #
+ # DEFAULT:
+ # if (
+ # $opt_excl
+ # and ( exists $ids_excl_uc{$id_uc}
+ # or grep $id_uc =~ /$_/, @ids_excl_uc )
+ # )
+ #
+ # This is a compromise which tries to avoid being too dense and to
+ # spacey. A more spaced version would be:
+ #
+ # SPACEY:
+ # if (
+ # $opt_excl
+ # and (
+ # exists $ids_excl_uc{$id_uc}
+ # or grep $id_uc =~ /$_/, @ids_excl_uc
+ # )
+ # )
+ #
+ # Some people might prefer the spacey version -- an option could be
+ # added. The innermost expression contains a long block '( exists
+ # $ids_... ')'.
+ #
+ # Here is how the logic goes: We will force a break at the 'or' that
+ # the innermost expression contains, but we will not break apart its
+ # opening and closing containers because (1) it contains no
+ # multi-line sub-containers itself, and (2) there is no alignment to
+ # be gained by breaking it open like this
+ #
+ # and (
+ # exists $ids_excl_uc{$id_uc}
+ # or grep $id_uc =~ /$_/, @ids_excl_uc
+ # )
+ #
+ # (although this looks perfectly ok and might be good for long
+ # expressions). The outer 'if' container, though, contains a broken
+ # sub-container, so it will be broken open to avoid too much density.
+ # Also, since it contains no 'or's, there will be a forced break at
+ # its 'and'.
+
+ # Handle the experimental flag --break-open-compact-parens
+ # NOTE: This flag is not currently used and may eventually be removed.
+ # If this flag is set, we will implement it by
+ # pretending we did not see the opening structure, since in that case
+ # parens always get opened up.
+ if ( $saw_opening_structure
+ && $rOpts_break_open_compact_parens )
+ {
- # Fixed for case b1097 to not consider old breaks at highly
- # stressed locations, such as types 'L' and 'R'. It might be
- # useful to generalize this concept in the future by looking at
- # actual bond strengths.
- if ( $old_breakpoint_to_go[$i]
- && $type ne 'L'
- && $next_nonblank_type ne 'R' )
+ # This parameter is a one-character flag, as follows:
+ # '0' matches no parens -> break open NOT OK
+ # '1' matches all parens -> break open OK
+ # Other values are same as used by the weld-exclusion-list
+ my $flag = $rOpts_break_open_compact_parens;
+ if ( $flag eq '*'
+ || $flag eq '1' )
{
- $i_line_end = $i;
- $i_line_start = $i_next_nonblank;
-
- $old_breakpoint_count++;
-
- # Break before certain keywords if user broke there and
- # this is a 'safe' break point. The idea is to retain
- # any preferred breaks for sequential list operations,
- # like a schwartzian transform.
- if ($rOpts_break_at_old_keyword_breakpoints) {
- if (
- $next_nonblank_type eq 'k'
- && $is_keyword_returning_list{$next_nonblank_token}
- && ( $type =~ /^[=\)\]\}Riw]$/
- || $type eq 'k'
- && $is_keyword_returning_list{$token} )
- )
- {
+ $saw_opening_structure = 0;
+ }
+ else {
- # we actually have to set this break next time through
- # the loop because if we are at a closing token (such
- # as '}') which forms a one-line block, this break might
- # get undone.
+ # NOTE: $seqno will be equal to closure var $type_sequence here
+ my $seqno = $type_sequence_to_go[$i_opening];
+ $saw_opening_structure =
+ !$self->match_paren_control_flag( $seqno, $flag );
+ }
+ }
- # And do not do this at an equals if the user wants
- # breaks before an equals (blinker cases b434 b903)
- unless ( $type eq '=' && $want_break_before{$type} ) {
- $want_previous_breakpoint = $i;
- }
- } ## end if ( $next_nonblank_type...)
- } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+ # Set some more flags telling something about this container..
+ my $is_simple_logical_expression;
+ if ( $item_count_stack[$current_depth] == 0
+ && $saw_opening_structure
+ && $tokens_to_go[$i_opening] eq '('
+ && $is_logical_container{ $container_type[$current_depth] } )
+ {
- # Break before attributes if user broke there
- if ($rOpts_break_at_old_attribute_breakpoints) {
- if ( $next_nonblank_type eq 'A' ) {
- $want_previous_breakpoint = $i;
- }
- }
+ # This seems to be a simple logical expression with
+ # no existing breakpoints. Set a flag to prevent
+ # opening it up.
+ if ( !$has_comma_breakpoints ) {
+ $is_simple_logical_expression = 1;
+ }
- # remember an = break as possible good break point
- if ( $is_assignment{$type} ) {
- $i_old_assignment_break = $i;
- }
- elsif ( $is_assignment{$next_nonblank_type} ) {
- $i_old_assignment_break = $i_next_nonblank;
- }
- } ## end if ( $old_breakpoint_to_go...)
+ #---------------------------------------------------
+ # This seems to be a simple logical expression with
+ # breakpoints (broken sublists, for example). Break
+ # at all 'or's and '||'s.
+ #---------------------------------------------------
+ else {
+ $self->set_logical_breakpoints($current_depth);
+ }
+ }
- next if ( $type eq 'b' );
- $depth = $nesting_depth_to_go[ $i + 1 ];
+ # break long terms at any C-style for semicolons (c154)
+ if ( $is_long_term
+ && @{ $rfor_semicolon_list[$current_depth] } )
+ {
+ $self->set_for_semicolon_breakpoints($current_depth);
- $total_depth_variation += abs( $depth - $depth_last );
- $depth_last = $depth;
+ # and open up a long 'for' or 'foreach' container to allow
+ # leading term alignment unless -lp is used.
+ $has_comma_breakpoints = 1 unless ($lp_object);
+ }
- # safety check - be sure we always break after a comment
- # Shouldn't happen .. an error here probably means that the
- # nobreak flag did not get turned off correctly during
- # formatting.
- if ( $type eq '#' ) {
- if ( $i != $max_index_to_go ) {
- if (DEVEL_MODE) {
- Fault(<<EOM);
-Non-fatal program bug: backup logic required to break after a comment
-EOM
- }
- $nobreak_to_go[$i] = 0;
- $self->set_forced_breakpoint($i);
- } ## end if ( $i != $max_index_to_go)
- } ## end if ( $type eq '#' )
+ #----------------------------------------------------------------
+ # FINALLY: Break open container according to the flags which have
+ # been set.
+ #----------------------------------------------------------------
+ if (
- # Force breakpoints at certain tokens in long lines.
- # Note that such breakpoints will be undone later if these tokens
- # are fully contained within parens on a line.
- if (
+ # breaks for code BLOCKS are handled at a higher level
+ !$block_type
- # break before a keyword within a line
- $type eq 'k'
- && $i > 0
+ # we do not need to break at the top level of an 'if'
+ # type expression
+ && !$is_simple_logical_expression
- # if one of these keywords:
- && $is_if_unless_while_until_for_foreach{$token}
+ ## modification to keep ': (' containers vertically tight;
+ ## but probably better to let user set -vt=1 to avoid
+ ## inconsistency with other paren types
+ ## && ($container_type[$current_depth] ne ':')
- # but do not break at something like '1 while'
- && ( $last_nonblank_type ne 'n' || $i > 2 )
+ # otherwise, we require one of these reasons for breaking:
+ && (
- # and let keywords follow a closing 'do' brace
- && $last_nonblank_block_type ne 'do'
+ # - this term has forced line breaks
+ $has_comma_breakpoints
- && (
- $is_long_line
+ # - the opening container is separated from this batch
+ # for some reason (comment, blank line, code block)
+ # - this is a non-paren container spanning multiple lines
+ || !$saw_opening_structure
- # or container is broken (by side-comment, etc)
- || ( $next_nonblank_token eq '('
- && $mate_index_to_go[$i_next_nonblank] < $i )
- )
- )
- {
- $self->set_forced_breakpoint( $i - 1 );
- } ## end if ( $type eq 'k' && $i...)
+ # - this is a long block contained in another breakable
+ # container
+ || $is_long_term && !$self->is_in_block_by_i($i_opening)
+ )
+ )
+ {
- # remember locations of '||' and '&&' for possible breaks if we
- # decide this is a long logical expression.
- if ( $type eq '||' ) {
- push @{ $rand_or_list[$depth][2] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '||' )
- elsif ( $type eq '&&' ) {
- push @{ $rand_or_list[$depth][3] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '&&' )
- elsif ( $type eq 'f' ) {
- push @{ $rfor_semicolon_list[$depth] }, $i;
+ # do special -lp breaks at the CLOSING token for INTACT
+ # blocks (because we might not do them if the block does
+ # not break open)
+ if ($lp_object) {
+ my $K_begin_line = $lp_object->get_K_begin_line();
+ my $i_begin_line = $K_begin_line - $K_to_go[0];
+ $self->set_forced_lp_break( $i_begin_line, $i_opening );
}
- elsif ( $type eq 'k' ) {
- if ( $token eq 'and' ) {
- push @{ $rand_or_list[$depth][1] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end if ( $token eq 'and' )
- # break immediately at 'or's which are probably not in a logical
- # block -- but we will break in logical breaks below so that
- # they do not add to the forced_breakpoint_count
- elsif ( $token eq 'or' ) {
- push @{ $rand_or_list[$depth][0] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- if ( $is_logical_container{ $container_type[$depth] } ) {
- }
- else {
- if ($is_long_line) { $self->set_forced_breakpoint($i) }
- elsif ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
+ # break after opening structure.
+ # note: break before closing structure will be automatic
+ if ( $minimum_depth <= $current_depth ) {
+
+ if ( $i_opening >= 0 ) {
+ if ( !$do_not_break_apart
+ && !is_unbreakable_container($current_depth) )
+ {
+ $self->set_forced_breakpoint($i_opening);
+
+ # Do not let brace types L/R use vertical tightness
+ # flags to recombine if we have to break on length
+ # because instability is possible if both vt and vtc
+ # flags are set ... see issue b1444.
+ if ( $is_long_term
+ && $types_to_go[$i_opening] eq 'L'
+ && $opening_vertical_tightness{'{'}
+ && $closing_vertical_tightness{'}'} )
{
- $saw_good_breakpoint = 1;
+ my $seqno = $type_sequence_to_go[$i_opening];
+ if ($seqno) {
+ $self->[_rbreak_container_]->{$seqno} = 1;
+ }
}
- } ## end else [ if ( $is_logical_container...)]
- } ## end elsif ( $token eq 'or' )
- elsif ( $token eq 'if' || $token eq 'unless' ) {
- push @{ $rand_or_list[$depth][4] }, $i;
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- $self->set_forced_breakpoint($i);
}
- } ## end elsif ( $token eq 'if' ||...)
- } ## end elsif ( $type eq 'k' )
- elsif ( $is_assignment{$type} ) {
- $i_equals[$depth] = $i;
- }
-
- if ($type_sequence) {
-
- # handle any postponed closing breakpoints
- if ( $is_closing_sequence_token{$token} ) {
- if ( $type eq ':' ) {
- $i_last_colon = $i;
+ }
- # retain break at a ':' line break
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_ternary_breakpoints
- && $levels_to_go[$i] < $list_stress_level )
- {
+ # break at ',' of lower depth level before opening token
+ if ( $last_comma_index[$depth] ) {
+ $self->set_forced_breakpoint( $last_comma_index[$depth] );
+ }
- $self->set_forced_breakpoint($i);
+ # break at '.' of lower depth level before opening token
+ if ( $last_dot_index[$depth] ) {
+ $self->set_forced_breakpoint( $last_dot_index[$depth] );
+ }
- # Break at a previous '=', but only if it is before
- # the mating '?'. Mate_index test fixes b1287.
- my $ieq = $i_equals[$depth];
- if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
- $self->set_forced_breakpoint(
- $i_equals[$depth] );
- $i_equals[$depth] = -1;
- }
- } ## end if ( ( $i == $i_line_start...))
- } ## end if ( $type eq ':' )
- if ( has_postponed_breakpoint($type_sequence) ) {
- my $inc = ( $type eq ':' ) ? 0 : 1;
- if ( $i >= $inc ) {
- $self->set_forced_breakpoint( $i - $inc );
- }
- }
- } ## end if ( $is_closing_sequence_token{$token} )
+ # break before opening structure if preceded by another
+ # closing structure and a comma. This is normally
+ # done by the previous closing brace, but not
+ # if it was a one-line block.
+ if ( $i_opening > 2 ) {
+ my $i_prev =
+ ( $types_to_go[ $i_opening - 1 ] eq 'b' )
+ ? $i_opening - 2
+ : $i_opening - 1;
- # set breaks at ?/: if they will get separated (and are
- # not a ?/: chain), or if the '?' is at the end of the
- # line
- elsif ( $token eq '?' ) {
- my $i_colon = $mate_index_to_go[$i];
+ my $type_prev = $types_to_go[$i_prev];
+ my $token_prev = $tokens_to_go[$i_prev];
if (
- $i_colon <= 0 # the ':' is not in this batch
- || $i == 0 # this '?' is the first token of the line
- || $i ==
- $max_index_to_go # or this '?' is the last token
+ $type_prev eq ','
+ && ( $types_to_go[ $i_prev - 1 ] eq ')'
+ || $types_to_go[ $i_prev - 1 ] eq '}' )
)
{
+ $self->set_forced_breakpoint($i_prev);
+ }
- # don't break if # this has a side comment, and
- # don't break at a '?' if preceded by ':' on
- # this line of previous ?/: pair on this line.
- # This is an attempt to preserve a chain of ?/:
- # expressions (elsif2.t).
- if (
- (
- $i_last_colon < 0
- || $parent_seqno_to_go[$i_last_colon] !=
- $parent_seqno_to_go[$i]
- )
- && $tokens_to_go[$max_index_to_go] ne '#'
- )
- {
- $self->set_forced_breakpoint($i);
- }
- $self->set_closing_breakpoint($i);
- } ## end if ( $i_colon <= 0 ||...)
- } ## end elsif ( $token eq '?' )
-
- elsif ( $is_opening_token{$token} ) {
-
- # do requested -lp breaks at the OPENING token for BROKEN
- # blocks. NOTE: this can be done for both -lp and -xlp,
- # but only -xlp can really take advantage of this. So this
- # is currently restricted to -xlp to avoid excess changes to
- # existing -lp formatting.
- if ( $rOpts_extended_line_up_parentheses
- && $mate_index_to_go[$i] < 0 )
+ # also break before something like ':(' or '?('
+ # if appropriate.
+ elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
+ && $want_break_before{$token_prev} )
{
- my $lp_object =
- $self->[_rlp_object_by_seqno_]->{$type_sequence};
- if ($lp_object) {
- my $K_begin_line = $lp_object->get_K_begin_line();
- my $i_begin_line = $K_begin_line - $K_to_go[0];
- $self->set_forced_lp_break( $i_begin_line, $i );
- }
+ $self->set_forced_breakpoint($i_prev);
}
}
+ }
- } ## end if ($type_sequence)
+ # break after comma following closing structure
+ if ( $types_to_go[ $i + 1 ] eq ',' ) {
+ $self->set_forced_breakpoint( $i + 1 );
+ }
-#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
+ # break before an '=' following closing structure
+ if (
+ $is_assignment{$next_nonblank_type}
+ && ( $breakpoint_stack[$current_depth] !=
+ $forced_breakpoint_count )
+ )
+ {
+ $self->set_forced_breakpoint($i);
+ }
- #------------------------------------------------------------
- # Handle Increasing Depth..
- #
- # prepare for a new list when depth increases
- # token $i is a '(','{', or '['
- #------------------------------------------------------------
- # hardened against bad input syntax: depth jump must be 1 and type
- # must be opening..fixes c102
- if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
+ # break at any comma before the opening structure Added
+ # for -lp, but seems to be good in general. It isn't
+ # obvious how far back to look; the '5' below seems to
+ # work well and will catch the comma in something like
+ # push @list, myfunc( $param, $param, ..
- #----------------------------------------------------------
- # BEGIN initialize depth arrays
- # ... use the same order as sub check_for_new_minimum_depth
- #----------------------------------------------------------
- $type_sequence_stack[$depth] = $type_sequence;
- $override_cab3[$depth] =
- $rOpts_comma_arrow_breakpoints == 3
- && $type_sequence
- && $self->[_roverride_cab3_]->{$type_sequence};
-
- $breakpoint_stack[$depth] = $forced_breakpoint_count;
- $container_type[$depth] =
-
- # k => && || ? : .
- $is_container_label_type{$last_nonblank_type}
- ? $last_nonblank_token
- : EMPTY_STRING;
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 0;
- $item_count_stack[$depth] = 0;
- $last_nonblank_type[$depth] = $last_nonblank_type;
- $opening_structure_index_stack[$depth] = $i;
-
- $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
- $comma_index[$depth] = undef;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
- $has_old_logical_breakpoints[$depth] = 0;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
-
- # if line ends here then signal closing token to break
- if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
- {
- $self->set_closing_breakpoint($i);
+ my $icomma = $last_comma_index[$depth];
+ if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
+ unless ( $forced_breakpoint_to_go[$icomma] ) {
+ $self->set_forced_breakpoint($icomma);
}
+ }
+ }
- # Not all lists of values should be vertically aligned..
- $dont_align[$depth] =
-
- # code BLOCKS are handled at a higher level
- ( $block_type ne EMPTY_STRING )
+ #-----------------------------------------------------------
+ # Break open a logical container open if it was already open
+ #-----------------------------------------------------------
+ elsif ($is_simple_logical_expression
+ && $has_old_logical_breakpoints[$current_depth] )
+ {
+ $self->set_logical_breakpoints($current_depth);
+ }
- # certain paren lists
- || ( $type eq '(' ) && (
+ # Handle long container which does not get opened up
+ elsif ($is_long_term) {
- # it does not usually look good to align a list of
- # identifiers in a parameter list, as in:
- # my($var1, $var2, ...)
- # (This test should probably be refined, for now I'm just
- # testing for any keyword)
- ( $last_nonblank_type eq 'k' )
+ # must set fake breakpoint to alert outer containers that
+ # they are complex
+ set_fake_breakpoint();
+ }
- # a trailing '(' usually indicates a non-list
- || ( $next_nonblank_type eq '(' )
- );
- $has_broken_sublist[$depth] = 0;
- $want_comma_break[$depth] = 0;
+ return;
+ } ## end sub break_lists_decreasing_depth
+} ## end closure break_lists
- #-------------------------------------
- # END initialize depth arrays
- #-------------------------------------
+my %is_kwiZ;
+my %is_key_type;
- # patch to outdent opening brace of long if/for/..
- # statements (like this one). See similar coding in
- # set_continuation breaks. We have also catch it here for
- # short line fragments which otherwise will not go through
- # break_long_lines.
- if (
- $block_type
+BEGIN {
- # if we have the ')' but not its '(' in this batch..
- && ( $last_nonblank_token eq ')' )
- && $mate_index_to_go[$i_last_nonblank_token] < 0
+ # Added 'w' to fix b1172
+ my @q = qw(k w i Z ->);
+ @is_kwiZ{@q} = (1) x scalar(@q);
- # and user wants brace to left
- && !$rOpts_opening_brace_always_on_right
+ # added = for b1211
+ @q = qw<( [ { L R } ] ) = b>;
+ push @q, ',';
+ @is_key_type{@q} = (1) x scalar(@q);
+} ## end BEGIN
- && ( $type eq '{' ) # should be true
- && ( $token eq '{' ) # should be true
- )
- {
- $self->set_forced_breakpoint( $i - 1 );
- } ## end if ( $block_type && ( ...))
- } ## end if ( $depth > $current_depth)
+use constant DEBUG_FIND_START => 0;
- #------------------------------------------------------------
- # Handle Decreasing Depth..
- #
- # finish off any old list when depth decreases
- # token $i is a ')','}', or ']'
- #------------------------------------------------------------
- # hardened against bad input syntax: depth jump must be 1 and type
- # must be closing .. fixes c102
- elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
+sub find_token_starting_list {
- $self->check_for_new_minimum_depth( $depth,
- $parent_seqno_to_go[$i] );
+ # When testing to see if a block will fit on one line, some
+ # previous token(s) may also need to be on the line; particularly
+ # if this is a sub call. So we will look back at least one
+ # token.
+ my ( $self, $i_opening_paren ) = @_;
- $comma_follows_last_closing_token =
- $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
+ # This will be the return index
+ my $i_opening_minus = $i_opening_paren;
- # force all outer logical containers to break after we see on
- # old breakpoint
- $has_old_logical_breakpoints[$depth] ||=
- $has_old_logical_breakpoints[$current_depth];
-
- # Patch to break between ') {' if the paren list is broken.
- # There is similar logic in break_long_lines for
- # non-broken lists.
- if ( $token eq ')'
- && $next_nonblank_block_type
- && $interrupted_list[$current_depth]
- && $next_nonblank_type eq '{'
- && !$rOpts_opening_brace_always_on_right )
- {
- $self->set_forced_breakpoint($i);
- } ## end if ( $token eq ')' && ...
+ if ( $i_opening_minus <= 0 ) {
+ return $i_opening_minus;
+ }
-#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
+ my $im1 = $i_opening_paren - 1;
+ my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
+ if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
+ $iprev_nb -= 1;
+ $type_prev_nb = $types_to_go[$iprev_nb];
+ }
- # set breaks at commas if necessary
- my ( $bp_count, $do_not_break_apart ) =
- $self->set_comma_breakpoints( $current_depth,
- $rbond_strength_bias );
+ if ( $type_prev_nb eq ',' ) {
- my $i_opening = $opening_structure_index_stack[$current_depth];
- my $saw_opening_structure = ( $i_opening >= 0 );
- my $lp_object;
- if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
- $lp_object = $self->[_rlp_object_by_seqno_]
- ->{ $type_sequence_to_go[$i_opening] };
- }
-
- # this term is long if we had to break at interior commas..
- my $is_long_term = $bp_count > 0;
-
- # If this is a short container with one or more comma arrows,
- # then we will mark it as a long term to open it if requested.
- # $rOpts_comma_arrow_breakpoints =
- # 0 - open only if comma precedes closing brace
- # 1 - stable: except for one line blocks
- # 2 - try to form 1 line blocks
- # 3 - ignore =>
- # 4 - always open up if vt=0
- # 5 - stable: even for one line blocks if vt=0
-
- # PATCH: Modify the -cab flag if we are not processing a list:
- # We only want the -cab flag to apply to list containers, so
- # for non-lists we use the default and stable -cab=5 value.
- # Fixes case b939a.
- my $cab_flag = $rOpts_comma_arrow_breakpoints;
- if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
- $cab_flag = 5;
- }
-
- # Ignore old breakpoints when under stress.
- # Fixes b1203 b1204 as well as b1197-b1200.
- # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
- # b1264 to see if this check is still required at all, and
- # these still require a check, but at higher level beta+3
- # instead of beta: b1193 b780
- if ( $saw_opening_structure
- && !$lp_object
- && $levels_to_go[$i_opening] >= $list_stress_level )
- {
- $cab_flag = 2;
+ # a previous comma is a good break point
+ # $i_opening_minus = $i_opening_paren;
+ }
- # Do not break hash braces under stress (fixes b1238)
- $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
+ elsif (
+ $tokens_to_go[$i_opening_paren] eq '('
- # This option fixes b1235, b1237, b1240 with old and new
- # -lp, but formatting is nicer with next option.
- ## $is_long_term ||=
- ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
+ # non-parens added here to fix case b1186
+ || $is_kwiZ{$type_prev_nb}
+ )
+ {
+ $i_opening_minus = $im1;
- # This option fixes b1240 but not b1235, b1237 with new -lp,
- # but this gives better formatting than the previous option.
- $do_not_break_apart ||=
- $levels_to_go[$i_opening] > $stress_level_beta;
- }
+ # Walk back to improve length estimate...
+ # FIX for cases b1169 b1170 b1171: start walking back
+ # at the previous nonblank. This makes the result insensitive
+ # to the flag --space-function-paren, and similar.
+ # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
+ foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
+ if ( $is_key_type{ $types_to_go[$j] } ) {
- if ( !$is_long_term
- && $saw_opening_structure
- && $is_opening_token{ $tokens_to_go[$i_opening] }
- && $index_before_arrow[ $depth + 1 ] > 0
- && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
- )
- {
- $is_long_term =
- $cab_flag == 4
- || $cab_flag == 0 && $last_nonblank_token eq ','
- || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
- } ## end if ( !$is_long_term &&...)
-
- # mark term as long if the length between opening and closing
- # parens exceeds allowed line length
- if ( !$is_long_term && $saw_opening_structure ) {
-
- my $i_opening_minus =
- $self->find_token_starting_list($i_opening);
-
- my $excess =
- $self->excess_line_length( $i_opening_minus, $i );
-
- # Use standard spaces for indentation of lists in -lp mode
- # if it gives a longer line length. This helps to avoid an
- # instability due to forming and breaking one-line blocks.
- # This fixes case b1314.
- my $indentation = $leading_spaces_to_go[$i_opening_minus];
- if ( ref($indentation)
- && $ris_broken_container->{$type_sequence} )
- {
- my $lp_spaces = $indentation->get_spaces();
- my $std_spaces = $indentation->get_standard_spaces();
- my $diff = $std_spaces - $lp_spaces;
- if ( $diff > 0 ) { $excess += $diff }
- }
+ # fix for b1211
+ if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
+ last;
+ }
+ $i_opening_minus = $j;
+ }
+ if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
+ }
- my $tol = $length_tol;
+ DEBUG_FIND_START && print <<EOM;
+FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
+EOM
- # boost tol for an -lp container
- if (
- $lp_tol_boost
- && $lp_object
- && ( $rOpts_extended_continuation_indentation
- || !$ris_list_by_seqno->{$type_sequence} )
- )
- {
- $tol += $lp_tol_boost;
- }
+ return $i_opening_minus;
+} ## end sub find_token_starting_list
- # Patch to avoid blinking with -bbxi=2 and -cab=2
- # in which variations in -ci cause unstable formatting
- # in edge cases. We just always add one ci level so that
- # the formatting is independent of the -BBX results.
- # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
- # b1161 b1166 b1167 b1168
- if ( !$ci_levels_to_go[$i_opening]
- && $rbreak_before_container_by_seqno->{$type_sequence} )
- {
- $tol += $rOpts->{'continuation-indentation'};
- }
+{ ## begin closure table_maker
- $is_long_term = $excess + $tol > 0;
+ my %is_keyword_with_special_leading_term;
- } ## end if ( !$is_long_term &&...)
+ BEGIN {
- # We've set breaks after all comma-arrows. Now we have to
- # undo them if this can be a one-line block
- # (the only breakpoints set will be due to comma-arrows)
+ # These keywords have prototypes which allow a special leading item
+ # followed by a list
+ my @q = qw(
+ chmod
+ formline
+ grep
+ join
+ kill
+ map
+ pack
+ printf
+ push
+ sprintf
+ unshift
+ );
+ @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
+ } ## end BEGIN
- if (
+ use constant DEBUG_SPARSE => 0;
- # user doesn't require breaking after all comma-arrows
- ( $cab_flag != 0 ) && ( $cab_flag != 4 )
+ sub table_maker {
- # and if the opening structure is in this batch
- && $saw_opening_structure
+ # Given a list of comma-separated items, set breakpoints at some of
+ # the commas, if necessary, to make it easy to read.
+ # This is done by making calls to 'set_forced_breakpoint'.
+ # This is a complex routine because there are many special cases.
- # and either on the same old line
- && (
- $old_breakpoint_count_stack[$current_depth] ==
- $last_old_breakpoint_count
+ # Returns: nothing
- # or user wants to form long blocks with arrows
- || $cab_flag == 2
+ # The numerous variables involved are contained three hashes:
+ # $rhash_IN : For contents see the calling routine
+ # $rhash_A: For contents see return from sub 'table_layout_A'
+ # $rhash_B: For contents see return from sub 'table_layout_B'
- # if -cab=3 is overridden then use -cab=2 behavior
- || $cab_flag == 3 && $override_cab3[$current_depth]
- )
+ my ( $self, $rhash_IN ) = @_;
- # and we made breakpoints between the opening and closing
- && ( $breakpoint_undo_stack[$current_depth] <
- $forced_breakpoint_undo_count )
+ # Find lengths of all list items needed for calculating page layout
+ my $rhash_A = table_layout_A($rhash_IN);
+ return if ( !defined($rhash_A) );
- # and this block is short enough to fit on one line
- # Note: use < because need 1 more space for possible comma
- && !$is_long_term
+ # Some variables received from caller...
+ my $i_closing_paren = $rhash_IN->{i_closing_paren};
+ my $i_opening_paren = $rhash_IN->{i_opening_paren};
+ my $has_broken_sublist = $rhash_IN->{has_broken_sublist};
+ my $interrupted = $rhash_IN->{interrupted};
- )
- {
- $self->undo_forced_breakpoint_stack(
- $breakpoint_undo_stack[$current_depth] );
- } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
-
- # now see if we have any comma breakpoints left
- my $has_comma_breakpoints =
- ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count );
-
- # update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] =
- $has_broken_sublist[$depth]
- || $has_broken_sublist[$current_depth]
- || $is_long_term
- || $has_comma_breakpoints;
-
-# Having come to the closing ')', '}', or ']', now we have to decide if we
-# should 'open up' the structure by placing breaks at the opening and
-# closing containers. This is a tricky decision. Here are some of the
-# basic considerations:
-#
-# -If this is a BLOCK container, then any breakpoints will have already
-# been set (and according to user preferences), so we need do nothing here.
-#
-# -If we have a comma-separated list for which we can align the list items,
-# then we need to do so because otherwise the vertical aligner cannot
-# currently do the alignment.
-#
-# -If this container does itself contain a container which has been broken
-# open, then it should be broken open to properly show the structure.
-#
-# -If there is nothing to align, and no other reason to break apart,
-# then do not do it.
-#
-# We will not break open the parens of a long but 'simple' logical expression.
-# For example:
-#
-# This is an example of a simple logical expression and its formatting:
-#
-# if ( $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4 )
-#
-# Most people would prefer this than the 'spacey' version:
-#
-# if (
-# $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4
-# )
-#
-# To illustrate the rules for breaking logical expressions, consider:
-#
-# FULLY DENSE:
-# if ( $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc ))
-#
-# This is on the verge of being difficult to read. The current default is to
-# open it up like this:
-#
-# DEFAULT:
-# if (
-# $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc )
-# )
-#
-# This is a compromise which tries to avoid being too dense and to spacey.
-# A more spaced version would be:
-#
-# SPACEY:
-# if (
-# $opt_excl
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-# )
-#
-# Some people might prefer the spacey version -- an option could be added. The
-# innermost expression contains a long block '( exists $ids_... ')'.
-#
-# Here is how the logic goes: We will force a break at the 'or' that the
-# innermost expression contains, but we will not break apart its opening and
-# closing containers because (1) it contains no multi-line sub-containers itself,
-# and (2) there is no alignment to be gained by breaking it open like this
-#
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-#
-# (although this looks perfectly ok and might be good for long expressions). The
-# outer 'if' container, though, contains a broken sub-container, so it will be
-# broken open to avoid too much density. Also, since it contains no 'or's, there
-# will be a forced break at its 'and'.
-
- # Open-up if parens if requested. We do this by pretending we
- # did not see the opening structure, since in that case parens
- # always get opened up.
- if ( $saw_opening_structure
- && $rOpts_break_open_compact_parens )
- {
+ #-----------------------------------------
+ # Section A: Handle some special cases ...
+ #-----------------------------------------
- # This parameter is a one-character flag, as follows:
- # '0' matches no parens -> break open NOT OK
- # '1' matches all parens -> break open OK
- # Other values are same as used by the weld-exclusion-list
- my $flag = $rOpts_break_open_compact_parens;
- if ( $flag eq '*'
- || $flag eq '1' )
- {
- $saw_opening_structure = 0;
- }
- else {
- my $KK = $K_to_go[$i_opening];
- $saw_opening_structure =
- !$self->match_paren_flag( $KK, $flag );
- }
- }
+ #-------------------------------------------------------------
+ # Special Case A1: Compound List Rule 1:
+ # Break at (almost) every comma for a list containing a broken
+ # sublist. This has higher priority than the Interrupted List
+ # Rule.
+ #-------------------------------------------------------------
+ if ($has_broken_sublist) {
- # set some flags telling something about this container..
- my $is_simple_logical_expression = 0;
- if ( $item_count_stack[$current_depth] == 0
- && $saw_opening_structure
- && $tokens_to_go[$i_opening] eq '('
- && $is_logical_container{ $container_type[$current_depth] }
- )
- {
+ $self->apply_broken_sublist_rule( $rhash_A, $interrupted );
- # This seems to be a simple logical expression with
- # no existing breakpoints. Set a flag to prevent
- # opening it up.
- if ( !$has_comma_breakpoints ) {
- $is_simple_logical_expression = 1;
- }
+ return;
+ }
- # This seems to be a simple logical expression with
- # breakpoints (broken sublists, for example). Break
- # at all 'or's and '||'s.
- else {
- $self->set_logical_breakpoints($current_depth);
- }
- } ## end if ( $item_count_stack...)
+ #--------------------------------------------------------------
+ # Special Case A2: Interrupted List Rule:
+ # A list is forced to use old breakpoints if it was interrupted
+ # by side comments or blank lines, or requested by user.
+ #--------------------------------------------------------------
+ if ( $rOpts_break_at_old_comma_breakpoints
+ || $interrupted
+ || $i_opening_paren < 0 )
+ {
+ my $i_first_comma = $rhash_A->{_i_first_comma};
+ my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
+ $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
+ return;
+ }
- if ( $is_long_term
- && @{ $rfor_semicolon_list[$current_depth] } )
- {
- $self->set_for_semicolon_breakpoints($current_depth);
+ #-----------------------------------------------------------------
+ # Special Case A3: If it fits on one line, return and let the line
+ # break logic decide if and where to break.
+ #-----------------------------------------------------------------
- # open up a long 'for' or 'foreach' container to allow
- # leading term alignment unless -lp is used.
- $has_comma_breakpoints = 1 unless ($lp_object);
- } ## end if ( $is_long_term && ...)
+ # The -bbxi=2 parameters can add an extra hidden level of indentation
+ # so they need a tolerance to avoid instability. Fixes b1259, 1260.
+ my $opening_token = $tokens_to_go[$i_opening_paren];
+ my $tol = 0;
+ if ( $break_before_container_types{$opening_token}
+ && $container_indentation_options{$opening_token}
+ && $container_indentation_options{$opening_token} == 2 )
+ {
+ $tol = $rOpts_indent_columns;
- if (
+ # use greater of -ci and -i (fix for case b1334)
+ if ( $tol < $rOpts_continuation_indentation ) {
+ $tol = $rOpts_continuation_indentation;
+ }
+ }
- # breaks for code BLOCKS are handled at a higher level
- !$block_type
+ my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
+ my $excess =
+ $self->excess_line_length( $i_opening_minus, $i_closing_paren );
+ return if ( $excess + $tol <= 0 );
- # we do not need to break at the top level of an 'if'
- # type expression
- && !$is_simple_logical_expression
+ #---------------------------------------
+ # Section B: Handle a multiline list ...
+ #---------------------------------------
- ## modification to keep ': (' containers vertically tight;
- ## but probably better to let user set -vt=1 to avoid
- ## inconsistency with other paren types
- ## && ($container_type[$current_depth] ne ':')
+ $self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus );
+ return;
- # otherwise, we require one of these reasons for breaking:
- && (
+ } ## end sub table_maker
- # - this term has forced line breaks
- $has_comma_breakpoints
+ sub apply_broken_sublist_rule {
- # - the opening container is separated from this batch
- # for some reason (comment, blank line, code block)
- # - this is a non-paren container spanning multiple lines
- || !$saw_opening_structure
+ my ( $self, $rhash_A, $interrupted ) = @_;
- # - this is a long block contained in another breakable
- # container
- || $is_long_term && !$self->is_in_block_by_i($i_opening)
- )
- )
- {
+ my $ritem_lengths = $rhash_A->{_ritem_lengths};
+ my $ri_term_begin = $rhash_A->{_ri_term_begin};
+ my $ri_term_end = $rhash_A->{_ri_term_end};
+ my $ri_term_comma = $rhash_A->{_ri_term_comma};
+ my $item_count = $rhash_A->{_item_count_A};
+ my $i_first_comma = $rhash_A->{_i_first_comma};
+ my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
- # do special -lp breaks at the CLOSING token for INTACT
- # blocks (because we might not do them if the block does
- # not break open)
- if ($lp_object) {
- my $K_begin_line = $lp_object->get_K_begin_line();
- my $i_begin_line = $K_begin_line - $K_to_go[0];
- $self->set_forced_lp_break( $i_begin_line, $i_opening );
- }
+ # Break at every comma except for a comma between two
+ # simple, small terms. This prevents long vertical
+ # columns of, say, just 0's.
+ my $small_length = 10; # 2 + actual maximum length wanted
- # break after opening structure.
- # note: break before closing structure will be automatic
- if ( $minimum_depth <= $current_depth ) {
+ # We'll insert a break in long runs of small terms to
+ # allow alignment in uniform tables.
+ my $skipped_count = 0;
+ my $columns = table_columns_available($i_first_comma);
+ my $fields = int( $columns / $small_length );
+ if ( $rOpts_maximum_fields_per_table
+ && $fields > $rOpts_maximum_fields_per_table )
+ {
+ $fields = $rOpts_maximum_fields_per_table;
+ }
+ my $max_skipped_count = $fields - 1;
+
+ my $is_simple_last_term = 0;
+ my $is_simple_next_term = 0;
+ foreach my $j ( 0 .. $item_count ) {
+ $is_simple_last_term = $is_simple_next_term;
+ $is_simple_next_term = 0;
+ if ( $j < $item_count
+ && $ri_term_end->[$j] == $ri_term_begin->[$j]
+ && $ritem_lengths->[$j] <= $small_length )
+ {
+ $is_simple_next_term = 1;
+ }
+ next if $j == 0;
+ if ( $is_simple_last_term
+ && $is_simple_next_term
+ && $skipped_count < $max_skipped_count )
+ {
+ $skipped_count++;
+ }
+ else {
+ $skipped_count = 0;
+ my $i_tc = $ri_term_comma->[ $j - 1 ];
+ last unless defined $i_tc;
+ $self->set_forced_breakpoint($i_tc);
+ }
+ }
- if ( $i_opening >= 0 ) {
- $self->set_forced_breakpoint($i_opening)
- unless ( $do_not_break_apart
- || is_unbreakable_container($current_depth) );
- }
+ # always break at the last comma if this list is
+ # interrupted; we wouldn't want to leave a terminal '{', for
+ # example.
+ if ($interrupted) {
+ $self->set_forced_breakpoint($i_true_last_comma);
+ }
+ return;
+ } ## end sub apply_broken_sublist_rule
- # break at ',' of lower depth level before opening token
- if ( $last_comma_index[$depth] ) {
- $self->set_forced_breakpoint(
- $last_comma_index[$depth] );
- }
+ sub set_emergency_comma_breakpoints {
- # break at '.' of lower depth level before opening token
- if ( $last_dot_index[$depth] ) {
- $self->set_forced_breakpoint(
- $last_dot_index[$depth] );
- }
+ my (
- # break before opening structure if preceded by another
- # closing structure and a comma. This is normally
- # done by the previous closing brace, but not
- # if it was a one-line block.
- if ( $i_opening > 2 ) {
- my $i_prev =
- ( $types_to_go[ $i_opening - 1 ] eq 'b' )
- ? $i_opening - 2
- : $i_opening - 1;
+ $self, #
- if (
- $types_to_go[$i_prev] eq ','
- && ( $types_to_go[ $i_prev - 1 ] eq ')'
- || $types_to_go[ $i_prev - 1 ] eq '}' )
- )
- {
- $self->set_forced_breakpoint($i_prev);
- }
+ $number_of_fields_best,
+ $rhash_IN,
+ $comma_count,
+ $i_first_comma,
- # also break before something like ':(' or '?('
- # if appropriate.
- elsif (
- $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
- {
- my $token_prev = $tokens_to_go[$i_prev];
- if ( $want_break_before{$token_prev} ) {
- $self->set_forced_breakpoint($i_prev);
- }
- } ## end elsif ( $types_to_go[$i_prev...])
- } ## end if ( $i_opening > 2 )
- } ## end if ( $minimum_depth <=...)
+ ) = @_;
- # break after comma following closing structure
- if ( $next_type eq ',' ) {
- $self->set_forced_breakpoint( $i + 1 );
- }
+ # The number of fields worked out to be negative, so we
+ # have to make an emergency fix.
- # break before an '=' following closing structure
- if (
- $is_assignment{$next_nonblank_type}
- && ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count )
- )
- {
- $self->set_forced_breakpoint($i);
- } ## end if ( $is_assignment{$next_nonblank_type...})
-
- # break at any comma before the opening structure Added
- # for -lp, but seems to be good in general. It isn't
- # obvious how far back to look; the '5' below seems to
- # work well and will catch the comma in something like
- # push @list, myfunc( $param, $param, ..
-
- my $icomma = $last_comma_index[$depth];
- if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
- unless ( $forced_breakpoint_to_go[$icomma] ) {
- $self->set_forced_breakpoint($icomma);
- }
- }
- } ## end logic to open up a container
+ my $rcomma_index = $rhash_IN->{rcomma_index};
+ my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
+ my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
+ my $must_break_open = $rhash_IN->{must_break_open};
- # Break open a logical container open if it was already open
- elsif ($is_simple_logical_expression
- && $has_old_logical_breakpoints[$current_depth] )
- {
- $self->set_logical_breakpoints($current_depth);
- }
+ # are we an item contained in an outer list?
+ my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
- # Handle long container which does not get opened up
- elsif ($is_long_term) {
+ # In many cases, it may be best to not force a break if there is just
+ # one comma, because the standard continuation break logic will do a
+ # better job without it.
- # must set fake breakpoint to alert outer containers that
- # they are complex
- set_fake_breakpoint();
- } ## end elsif ($is_long_term)
+ # In the common case that all but one of the terms can fit
+ # on a single line, it may look better not to break open the
+ # containing parens. Consider, for example
- } ## end elsif ( $depth < $current_depth)
+ # $color =
+ # join ( '/',
+ # sort { $color_value{$::a} <=> $color_value{$::b}; }
+ # keys %colors );
- #------------------------------------------------------------
- # Handle this token
- #------------------------------------------------------------
+ # which will look like this with the container broken:
- $current_depth = $depth;
+ # $color = join (
+ # '/',
+ # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
+ # );
- # most token types can skip the rest of this loop
- next unless ( $quick_filter{$type} );
+ # Here is an example of this rule for a long last term:
- # handle comma-arrow
- if ( $type eq '=>' ) {
- next if ( $last_nonblank_type eq '=>' );
- next if $rOpts_break_at_old_comma_breakpoints;
- next
- if ( $rOpts_comma_arrow_breakpoints == 3
- && !$override_cab3[$depth] );
- $want_comma_break[$depth] = 1;
- $index_before_arrow[$depth] = $i_last_nonblank_token;
- next;
- } ## end if ( $type eq '=>' )
+ # log_message( 0, 256, 128,
+ # "Number of routes in adj-RIB-in to be considered: $peercount" );
- elsif ( $type eq '.' ) {
- $last_dot_index[$depth] = $i;
- }
+ # And here is an example with a long first term:
- # Turn off alignment if we are sure that this is not a list
- # environment. To be safe, we will do this if we see certain
- # non-list tokens, such as ';', and also the environment is
- # not a list. Note that '=' could be in any of the = operators
- # (lextest.t). We can't just use the reported environment
- # because it can be incorrect in some cases.
- elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
- && !$self->is_in_list_by_i($i) )
- {
- $dont_align[$depth] = 1;
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
+ # $s = sprintf(
+ # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
+ # $r, $pu, $ps, $cu, $cs, $tt
+ # )
+ # if $style eq 'all';
- # now just handle any commas
- next unless ( $type eq ',' );
+ my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
- $last_dot_index[$depth] = undef;
- $last_comma_index[$depth] = $i;
+ my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
+ my $long_first_term =
+ $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
+ 0;
- # break here if this comma follows a '=>'
- # but not if there is a side comment after the comma
- if ( $want_comma_break[$depth] ) {
+ # break at every comma ...
+ if (
- if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- if ($rOpts_comma_arrow_breakpoints) {
- $want_comma_break[$depth] = 0;
- next;
- }
- }
+ # if requested by user or is best looking
+ $number_of_fields_best == 1
- $self->set_forced_breakpoint($i)
- unless ( $next_nonblank_type eq '#' );
+ # or if this is a sublist of a larger list
+ || $in_hierarchical_list
- # break before the previous token if it looks safe
- # Example of something that we will not try to break before:
- # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
- # Also we don't want to break at a binary operator (like +):
- # $c->createOval(
- # $x + $R, $y +
- # $R => $x - $R,
- # $y - $R, -fill => 'black',
- # );
- my $ibreak = $index_before_arrow[$depth] - 1;
- if ( $ibreak > 0
- && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
- {
- if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
-
- # don't break pointer calls, such as the following:
- # File::Spec->curdir => 1,
- # (This is tokenized as adjacent 'w' tokens)
- ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
-
- # And don't break before a comma, as in the following:
- # ( LONGER_THAN,=> 1,
- # EIGHTY_CHARACTERS,=> 2,
- # CAUSES_FORMATTING,=> 3,
- # LIKE_THIS,=> 4,
- # );
- # This example is for -tso but should be general rule
- if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
- && $tokens_to_go[ $ibreak + 1 ] ne ',' )
- {
- $self->set_forced_breakpoint($ibreak);
- }
- } ## end if ( $types_to_go[$ibreak...])
- } ## end if ( $ibreak > 0 && $tokens_to_go...)
+ # or if multiple commas and we don't have a long first or last
+ # term
+ || ( $comma_count > 1
+ && !( $long_last_term || $long_first_term ) )
+ )
+ {
+ foreach ( 0 .. $comma_count - 1 ) {
+ $self->set_forced_breakpoint( $rcomma_index->[$_] );
+ }
+ }
+ elsif ($long_last_term) {
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
+ $self->set_forced_breakpoint($i_last_comma);
+ ${$rdo_not_break_apart} = 1 unless $must_break_open;
+ }
+ elsif ($long_first_term) {
- # handle list which mixes '=>'s and ','s:
- # treat any list items so far as an interrupted list
- $interrupted_list[$depth] = 1;
- next;
- } ## end if ( $want_comma_break...)
-
- # Break after all commas above starting depth...
- # But only if the last closing token was followed by a comma,
- # to avoid breaking a list operator (issue c119)
- if ( $depth < $starting_depth
- && $comma_follows_last_closing_token
- && !$dont_align[$depth] )
- {
- $self->set_forced_breakpoint($i)
- unless ( $next_nonblank_type eq '#' );
- next;
- }
+ $self->set_forced_breakpoint($i_first_comma);
+ }
+ else {
- # add this comma to the list..
- my $item_count = $item_count_stack[$depth];
- if ( $item_count == 0 ) {
+ # let breaks be defined by default bond strength logic
+ }
+ return;
+ } ## end sub set_emergency_comma_breakpoints
+
+ sub break_multiline_list {
+ my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_;
+
+ # Overriden variables
+ my $item_count = $rhash_A->{_item_count_A};
+ my $identifier_count = $rhash_A->{_identifier_count_A};
+
+ # Derived variables:
+ my $ritem_lengths = $rhash_A->{_ritem_lengths};
+ my $ri_term_begin = $rhash_A->{_ri_term_begin};
+ my $ri_term_end = $rhash_A->{_ri_term_end};
+ my $ri_term_comma = $rhash_A->{_ri_term_comma};
+ my $rmax_length = $rhash_A->{_rmax_length};
+ my $comma_count = $rhash_A->{_comma_count};
+ my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
+ my $first_term_length = $rhash_A->{_first_term_length};
+ my $i_first_comma = $rhash_A->{_i_first_comma};
+ my $i_last_comma = $rhash_A->{_i_last_comma};
+ my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
+
+ # Veriables received from caller
+ my $i_opening_paren = $rhash_IN->{i_opening_paren};
+ my $i_closing_paren = $rhash_IN->{i_closing_paren};
+ my $rcomma_index = $rhash_IN->{rcomma_index};
+ my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
+ my $list_type = $rhash_IN->{list_type};
+ my $interrupted = $rhash_IN->{interrupted};
+ my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
+ my $must_break_open = $rhash_IN->{must_break_open};
+## NOTE: these input vars from caller use the values from rhash_A (see above):
+## my $item_count = $rhash_IN->{item_count};
+## my $identifier_count = $rhash_IN->{identifier_count};
+
+ # NOTE: i_opening_paren changes value below so we need to get these here
+ my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
+ my $opening_token = $tokens_to_go[$i_opening_paren];
- # but do not form a list with no opening structure
- # for example:
+ #---------------------------------------------------------------
+ # Section B1: Determine '$number_of_fields' = the best number of
+ # fields to use if this is to be formatted as a table.
+ #---------------------------------------------------------------
- # open INFILE_COPY, ">$input_file_copy"
- # or die ("very long message");
- if ( ( $opening_structure_index_stack[$depth] < 0 )
- && $self->is_in_block_by_i($i) )
- {
- $dont_align[$depth] = 1;
- }
- } ## end if ( $item_count == 0 )
+ # Now we know that this block spans multiple lines; we have to set
+ # at least one breakpoint -- real or fake -- as a signal to break
+ # open any outer containers.
+ set_fake_breakpoint();
- $comma_index[$depth][$item_count] = $i;
- ++$item_count_stack[$depth];
- if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
- $identifier_count_stack[$depth]++;
- }
- } ## end while ( ++$i <= $max_index_to_go)
+ # Set a flag indicating if we need to break open to keep -lp
+ # items aligned. This is necessary if any of the list terms
+ # exceeds the available space after the '('.
+ my $need_lp_break_open = $must_break_open;
+ my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
+ if ( $is_lp_formatting && !$must_break_open ) {
+ my $columns_if_unbroken =
+ $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
+ - total_line_length( $i_opening_minus, $i_opening_paren );
+ $need_lp_break_open =
+ ( $rmax_length->[0] > $columns_if_unbroken )
+ || ( $rmax_length->[1] > $columns_if_unbroken )
+ || ( $first_term_length > $columns_if_unbroken );
+ }
- #-------------------------------------------
- # end of loop over all tokens in this batch
- #-------------------------------------------
+ my $hash_B =
+ $self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting );
+ return if ( !defined($hash_B) );
+
+ # Updated variables
+ $i_first_comma = $hash_B->{_i_first_comma_B};
+ $i_opening_paren = $hash_B->{_i_opening_paren_B};
+ $item_count = $hash_B->{_item_count_B};
+
+ # New variables
+ my $columns = $hash_B->{_columns};
+ my $formatted_columns = $hash_B->{_formatted_columns};
+ my $formatted_lines = $hash_B->{_formatted_lines};
+ my $max_width = $hash_B->{_max_width};
+ my $new_identifier_count = $hash_B->{_new_identifier_count};
+ my $number_of_fields = $hash_B->{_number_of_fields};
+ my $odd_or_even = $hash_B->{_odd_or_even};
+ my $packed_columns = $hash_B->{_packed_columns};
+ my $packed_lines = $hash_B->{_packed_lines};
+ my $pair_width = $hash_B->{_pair_width};
+ my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list};
+ my $use_separate_first_term = $hash_B->{_use_separate_first_term};
- # set breaks for any unfinished lists ..
- foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
+ # are we an item contained in an outer list?
+ my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
- $interrupted_list[$dd] = 1;
- $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
- $self->set_comma_breakpoints( $dd, $rbond_strength_bias );
- $self->set_logical_breakpoints($dd)
- if ( $has_old_logical_breakpoints[$dd] );
- $self->set_for_semicolon_breakpoints($dd);
+ my $unused_columns = $formatted_columns - $packed_columns;
- # break open container...
- my $i_opening = $opening_structure_index_stack[$dd];
- if ( defined($i_opening) && $i_opening >= 0 ) {
- $self->set_forced_breakpoint($i_opening)
- unless (
- is_unbreakable_container($dd)
+ # set some empirical parameters to help decide if we should try to
+ # align; high sparsity does not look good, especially with few lines
+ my $sparsity = ($unused_columns) / ($formatted_columns);
+ my $max_allowed_sparsity =
+ ( $item_count < 3 ) ? 0.1
+ : ( $packed_lines == 1 ) ? 0.15
+ : ( $packed_lines == 2 ) ? 0.4
+ : 0.7;
- # Avoid a break which would place an isolated ' or "
- # on a line
- || ( $type eq 'Q'
- && $i_opening >= $max_index_to_go - 2
- && ( $token eq "'" || $token eq '"' ) )
- );
- }
- } ## end for ( my $dd = $current_depth...)
+ my $two_line_word_wrap_ok;
+ if ( $opening_token eq '(' ) {
- # Return a flag indicating if the input file had some good breakpoints.
- # This flag will be used to force a break in a line shorter than the
- # allowed line length.
- if ( $has_old_logical_breakpoints[$current_depth] ) {
- $saw_good_breakpoint = 1;
- }
+ # default is to allow wrapping of short paren lists
+ $two_line_word_wrap_ok = 1;
- # A complex line with one break at an = has a good breakpoint.
- # This is not complex ($total_depth_variation=0):
- # $res1
- # = 10;
- #
- # This is complex ($total_depth_variation=6):
- # $res2 =
- # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
+ # but turn off word wrap where requested
+ if ($rOpts_break_open_compact_parens) {
- # The check ($i_old_.. < $max_index_to_go) was added to fix b1333
- elsif ($i_old_assignment_break
- && $total_depth_variation > 4
- && $old_breakpoint_count == 1
- && $i_old_assignment_break < $max_index_to_go )
- {
- $saw_good_breakpoint = 1;
- } ## end elsif ( $i_old_assignment_break...)
+ # This parameter is a one-character flag, as follows:
+ # '0' matches no parens -> break open NOT OK -> word wrap OK
+ # '1' matches all parens -> break open OK -> word wrap NOT OK
+ # Other values are the same as used by the weld-exclusion-list
+ my $flag = $rOpts_break_open_compact_parens;
+ if ( $flag eq '*'
+ || $flag eq '1' )
+ {
+ $two_line_word_wrap_ok = 0;
+ }
+ elsif ( $flag eq '0' ) {
+ $two_line_word_wrap_ok = 1;
+ }
+ else {
+ my $seqno = $type_sequence_to_go[$i_opening_paren];
+ $two_line_word_wrap_ok =
+ !$self->match_paren_control_flag( $seqno, $flag );
+ }
+ }
+ }
- return $saw_good_breakpoint;
- } ## end sub break_lists
-} ## end closure break_lists
+ #-------------------------------------------------------------------
+ # Section B2: Check for shortcut methods, which avoid treating
+ # a list as a table for relatively small parenthesized lists. These
+ # are usually easier to read if not formatted as tables.
+ #-------------------------------------------------------------------
+ if (
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
+ && $opening_is_in_block # not a sub-container
+ && $two_line_word_wrap_ok # ok to wrap this paren list
+ )
+ {
-my %is_kwiZ;
-my %is_key_type;
+ # Section B2A: Shortcut method 1: for -lp and just one comma:
+ # This is a no-brainer, just break at the comma.
+ if (
+ $is_lp_formatting # -lp
+ && $item_count == 2 # two items, one comma
+ && !$must_break_open
+ )
+ {
+ my $i_break = $rcomma_index->[0];
+ $self->set_forced_breakpoint($i_break);
+ ${$rdo_not_break_apart} = 1;
+ return;
-BEGIN {
+ }
- # Added 'w' to fix b1172
- my @q = qw(k w i Z ->);
- @is_kwiZ{@q} = (1) x scalar(@q);
+ # Section B2B: Shortcut method 2 is for most small ragged lists
+ # which might look best if not displayed as a table.
+ if (
+ ( $number_of_fields == 2 && $item_count == 3 )
+ || (
+ $new_identifier_count > 0 # isn't all quotes
+ && $sparsity > 0.15
+ ) # would be fairly spaced gaps if aligned
+ )
+ {
- # added = for b1211
- @q = qw<( [ { L R } ] ) = b>;
- push @q, ',';
- @is_key_type{@q} = (1) x scalar(@q);
-}
+ my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
-use constant DEBUG_FIND_START => 0;
+ # NOTE: we should really use the true break count here,
+ # which can be greater if there are large terms and
+ # little space, but usually this will work well enough.
+ unless ($must_break_open) {
-sub find_token_starting_list {
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $is_lp_formatting && !$need_lp_break_open ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ return;
+ }
- # When testing to see if a block will fit on one line, some
- # previous token(s) may also need to be on the line; particularly
- # if this is a sub call. So we will look back at least one
- # token.
- my ( $self, $i_opening_paren ) = @_;
+ } ## end shortcut methods
- # This will be the return index
- my $i_opening_minus = $i_opening_paren;
+ # debug stuff
+ DEBUG_SPARSE && do {
- goto RETURN if ( $i_opening_minus <= 0 );
+ # How many spaces across the page will we fill?
+ my $columns_per_line =
+ ( int $number_of_fields / 2 ) * $pair_width +
+ ( $number_of_fields % 2 ) * $max_width;
- my $im1 = $i_opening_paren - 1;
- my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
- if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
- $iprev_nb -= 1;
- $type_prev_nb = $types_to_go[$iprev_nb];
- }
+ print STDOUT
+"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
- if ( $type_prev_nb eq ',' ) {
+ };
- # a previous comma is a good break point
- # $i_opening_minus = $i_opening_paren;
- }
+ #------------------------------------------------------------------
+ # Section B3: Compound List Rule 2:
+ # If this list is too long for one line, and it is an item of a
+ # larger list, then we must format it, regardless of sparsity
+ # (ian.t). One reason that we have to do this is to trigger
+ # Compound List Rule 1, above, which causes breaks at all commas of
+ # all outer lists. In this way, the structure will be properly
+ # displayed.
+ #------------------------------------------------------------------
- elsif (
- $tokens_to_go[$i_opening_paren] eq '('
+ # Decide if this list is too long for one line unless broken
+ my $total_columns = table_columns_available($i_opening_paren);
+ my $too_long = $packed_columns > $total_columns;
- # non-parens added here to fix case b1186
- || $is_kwiZ{$type_prev_nb}
- )
- {
- $i_opening_minus = $im1;
+ # For a paren list, include the length of the token just before the
+ # '(' because this is likely a sub call, and we would have to
+ # include the sub name on the same line as the list. This is still
+ # imprecise, but not too bad. (steve.t)
+ if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
- # Walk back to improve length estimate...
- # FIX for cases b1169 b1170 b1171: start walking back
- # at the previous nonblank. This makes the result insensitive
- # to the flag --space-function-paren, and similar.
- # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
- foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
- if ( $is_key_type{ $types_to_go[$j] } ) {
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
- # fix for b1211
- if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
- last;
+ # TODO: For an item after a '=>', try to include the length of the
+ # thing before the '=>'. This is crude and should be improved by
+ # actually looking back token by token.
+ if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
+ my $i_opening_minus_test = $i_opening_paren - 4;
+ if ( $i_opening_minus >= 0 ) {
+ $too_long = $self->excess_line_length( $i_opening_minus_test,
+ $i_effective_last_comma + 1 ) > 0;
}
- $i_opening_minus = $j;
}
- if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
- }
- RETURN:
+ # Always break lists contained in '[' and '{' if too long for 1 line,
+ # and always break lists which are too long and part of a more complex
+ # structure.
+ my $must_break_open_container = $must_break_open
+ || ( $too_long
+ && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
- DEBUG_FIND_START && print <<EOM;
-FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
-EOM
+ #--------------------------------------------------------------------
+ # Section B4: A table will work here. But do not attempt to align
+ # columns if this is a tiny table or it would be too spaced. It
+ # seems that the more packed lines we have, the sparser the list that
+ # can be allowed and still look ok.
+ #--------------------------------------------------------------------
- return $i_opening_minus;
-} ## end sub find_token_starting_list
+ if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
+ || ( $formatted_lines < 2 )
+ || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
+ )
+ {
+ #----------------------------------------------------------------
+ # Section B4A: too sparse: would not look good aligned in a table
+ #----------------------------------------------------------------
-{ ## begin closure set_comma_breakpoints_do
+ # use old breakpoints if this is a 'big' list
+ if ( $packed_lines > 2 && $item_count > 10 ) {
+ write_logfile_entry("List sparse: using old breakpoints\n");
+ $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
+ }
- my %is_keyword_with_special_leading_term;
+ # let the continuation logic handle it if 2 lines
+ else {
- BEGIN {
+ my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
- # These keywords have prototypes which allow a special leading item
- # followed by a list
- my @q =
- qw(formline grep kill map printf sprintf push chmod join pack unshift);
- @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
- }
+ unless ($must_break_open_container) {
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $is_lp_formatting && !$need_lp_break_open ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ }
+ return;
+ }
- use constant DEBUG_SPARSE => 0;
+ #--------------------------------------------
+ # Section B4B: Go ahead and format as a table
+ #--------------------------------------------
+ $self->write_formatted_table( $number_of_fields, $comma_count,
+ $rcomma_index, $use_separate_first_term );
+
+ return;
+ } ## end sub break_multiline_list
- sub set_comma_breakpoints_do {
+ sub table_layout_A {
- # Given a list with some commas, set breakpoints at some of the
- # commas, if necessary, to make it easy to read.
+ my ($rhash_IN) = @_;
- my ( $self, $rinput_hash ) = @_;
+ # Find lengths of all list items needed to calculate page layout
- my $depth = $rinput_hash->{depth};
- my $i_opening_paren = $rinput_hash->{i_opening_paren};
- my $i_closing_paren = $rinput_hash->{i_closing_paren};
- my $item_count = $rinput_hash->{item_count};
- my $identifier_count = $rinput_hash->{identifier_count};
- my $rcomma_index = $rinput_hash->{rcomma_index};
- my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
- my $list_type = $rinput_hash->{list_type};
- my $interrupted = $rinput_hash->{interrupted};
- my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
- my $must_break_open = $rinput_hash->{must_break_open};
- my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
+ # Returns:
+ # - nothing if this list is empty, or
+ # - a ref to a hash containg some derived parameters
+
+ my $i_opening_paren = $rhash_IN->{i_opening_paren};
+ my $i_closing_paren = $rhash_IN->{i_closing_paren};
+ my $identifier_count = $rhash_IN->{identifier_count};
+ my $rcomma_index = $rhash_IN->{rcomma_index};
+ my $item_count = $rhash_IN->{item_count};
# nothing to do if no commas seen
return if ( $item_count < 1 );
my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
my $i_last_comma = $i_true_last_comma;
if ( $i_last_comma >= $max_index_to_go ) {
- $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
+ $item_count -= 1;
return if ( $item_count < 1 );
+ $i_last_comma = $rcomma_index->[ $item_count - 1 ];
}
- my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
- #---------------------------------------------------------------
- # find lengths of all items in the list to calculate page layout
- #---------------------------------------------------------------
my $comma_count = $item_count;
- my @item_lengths;
- my @i_term_begin;
- my @i_term_end;
- my @i_term_comma;
+
+ my $ritem_lengths = [];
+ my $ri_term_begin = [];
+ my $ri_term_end = [];
+ my $ri_term_comma = [];
+
+ my $rmax_length = [ 0, 0 ];
+
my $i_prev_plus;
- my @max_length = ( 0, 0 );
my $first_term_length;
my $i = $i_opening_paren;
my $is_odd = 1;
$i = $rcomma_index->[$j];
my $i_term_end =
- ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
+ ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' )
+ ? $i - 2
+ : $i - 1;
my $i_term_begin =
( $types_to_go[$i_prev_plus] eq 'b' )
? $i_prev_plus + 1
: $i_prev_plus;
- push @i_term_begin, $i_term_begin;
- push @i_term_end, $i_term_end;
- push @i_term_comma, $i;
+ push @{$ri_term_begin}, $i_term_begin;
+ push @{$ri_term_end}, $i_term_end;
+ push @{$ri_term_comma}, $i;
# note: currently adding 2 to all lengths (for comma and space)
my $length =
2 + token_sequence_length( $i_term_begin, $i_term_end );
- push @item_lengths, $length;
+ push @{$ritem_lengths}, $length;
if ( $j == 0 ) {
$first_term_length = $length;
}
else {
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
+ if ( $length > $rmax_length->[$is_odd] ) {
+ $rmax_length->[$is_odd] = $length;
}
}
}
# add 2 to length because other lengths include a comma and a blank
$last_item_length += 2;
- push @item_lengths, $last_item_length;
- push @i_term_begin, $i_b + 1;
- push @i_term_end, $i_e;
- push @i_term_comma, undef;
+ push @{$ritem_lengths}, $last_item_length;
+ push @{$ri_term_begin}, $i_b + 1;
+ push @{$ri_term_end}, $i_e;
+ push @{$ri_term_comma}, undef;
my $i_odd = $item_count % 2;
- if ( $last_item_length > $max_length[$i_odd] ) {
- $max_length[$i_odd] = $last_item_length;
+ if ( $last_item_length > $rmax_length->[$i_odd] ) {
+ $rmax_length->[$i_odd] = $last_item_length;
}
$item_count++;
}
}
- #---------------------------------------------------------------
- # End of length calculations
- #---------------------------------------------------------------
-
- #---------------------------------------------------------------
- # Compound List Rule 1:
- # Break at (almost) every comma for a list containing a broken
- # sublist. This has higher priority than the Interrupted List
- # Rule.
- #---------------------------------------------------------------
- if ($has_broken_sublist) {
-
- # Break at every comma except for a comma between two
- # simple, small terms. This prevents long vertical
- # columns of, say, just 0's.
- my $small_length = 10; # 2 + actual maximum length wanted
-
- # We'll insert a break in long runs of small terms to
- # allow alignment in uniform tables.
- my $skipped_count = 0;
- my $columns = table_columns_available($i_first_comma);
- my $fields = int( $columns / $small_length );
- if ( $rOpts_maximum_fields_per_table
- && $fields > $rOpts_maximum_fields_per_table )
- {
- $fields = $rOpts_maximum_fields_per_table;
- }
- my $max_skipped_count = $fields - 1;
-
- my $is_simple_last_term = 0;
- my $is_simple_next_term = 0;
- foreach my $j ( 0 .. $item_count ) {
- $is_simple_last_term = $is_simple_next_term;
- $is_simple_next_term = 0;
- if ( $j < $item_count
- && $i_term_end[$j] == $i_term_begin[$j]
- && $item_lengths[$j] <= $small_length )
- {
- $is_simple_next_term = 1;
- }
- next if $j == 0;
- if ( $is_simple_last_term
- && $is_simple_next_term
- && $skipped_count < $max_skipped_count )
- {
- $skipped_count++;
- }
- else {
- $skipped_count = 0;
- my $i_tc = $i_term_comma[ $j - 1 ];
- last unless defined $i_tc;
- $self->set_forced_breakpoint($i_tc);
- }
- }
-
- # always break at the last comma if this list is
- # interrupted; we wouldn't want to leave a terminal '{', for
- # example.
- if ($interrupted) {
- $self->set_forced_breakpoint($i_true_last_comma);
- }
- return;
- }
-
-#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
-#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
-#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
-
- #---------------------------------------------------------------
- # Interrupted List Rule:
- # A list is forced to use old breakpoints if it was interrupted
- # by side comments or blank lines, or requested by user.
- #---------------------------------------------------------------
- if ( $rOpts_break_at_old_comma_breakpoints
- || $interrupted
- || $i_opening_paren < 0 )
- {
- $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
- return;
+ # be sure we do not extend beyond the current list length
+ if ( $i_effective_last_comma >= $max_index_to_go ) {
+ $i_effective_last_comma = $max_index_to_go - 1;
}
- #---------------------------------------------------------------
- # Looks like a list of items. We have to look at it and size it up.
- #---------------------------------------------------------------
-
- my $opening_token = $tokens_to_go[$i_opening_paren];
- my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
-
- #-------------------------------------------------------------------
- # Return if this will fit on one line
- #-------------------------------------------------------------------
-
- # The -bbxi=2 parameters can add an extra hidden level of indentation;
- # this needs a tolerance to avoid instability. Fixes b1259, 1260.
- my $tol = 0;
- if ( $break_before_container_types{$opening_token}
- && $container_indentation_options{$opening_token}
- && $container_indentation_options{$opening_token} == 2 )
- {
- $tol = $rOpts_indent_columns;
+ # Return the hash of derived variables.
+ return {
+
+ # Updated variables
+ _item_count_A => $item_count,
+ _identifier_count_A => $identifier_count,
+
+ # New variables
+ _ritem_lengths => $ritem_lengths,
+ _ri_term_begin => $ri_term_begin,
+ _ri_term_end => $ri_term_end,
+ _ri_term_comma => $ri_term_comma,
+ _rmax_length => $rmax_length,
+ _comma_count => $comma_count,
+ _i_effective_last_comma => $i_effective_last_comma,
+ _first_term_length => $first_term_length,
+ _i_first_comma => $i_first_comma,
+ _i_last_comma => $i_last_comma,
+ _i_true_last_comma => $i_true_last_comma,
+ };
- # use greater of -ci and -i (fix for case b1334)
- if ( $tol < $rOpts_continuation_indentation ) {
- $tol = $rOpts_continuation_indentation;
- }
- }
+ } ## end sub table_layout_A
- my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
- return
- unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
- + $tol > 0;
+ sub table_layout_B {
- #-------------------------------------------------------------------
- # Now we know that this block spans multiple lines; we have to set
- # at least one breakpoint -- real or fake -- as a signal to break
- # open any outer containers.
- #-------------------------------------------------------------------
- set_fake_breakpoint();
+ my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_;
- # be sure we do not extend beyond the current list length
- if ( $i_effective_last_comma >= $max_index_to_go ) {
- $i_effective_last_comma = $max_index_to_go - 1;
- }
+ # Determine variables for the best table layout, including
+ # the best number of fields.
- # Set a flag indicating if we need to break open to keep -lp
- # items aligned. This is necessary if any of the list terms
- # exceeds the available space after the '('.
- my $need_lp_break_open = $must_break_open;
- if ( $is_lp_formatting && !$must_break_open ) {
- my $columns_if_unbroken =
- $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
- - total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open =
- ( $max_length[0] > $columns_if_unbroken )
- || ( $max_length[1] > $columns_if_unbroken )
- || ( $first_term_length > $columns_if_unbroken );
- }
+ # Returns:
+ # - nothing if nothing more to do
+ # - a ref to a hash containg some derived parameters
+
+ # Variables from caller
+ my $i_opening_paren = $rhash_IN->{i_opening_paren};
+ my $list_type = $rhash_IN->{list_type};
+ my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
+ my $rcomma_index = $rhash_IN->{rcomma_index};
+ my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
+
+ # Table size variables
+ my $comma_count = $rhash_A->{_comma_count};
+ my $first_term_length = $rhash_A->{_first_term_length};
+ my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
+ my $i_first_comma = $rhash_A->{_i_first_comma};
+ my $identifier_count = $rhash_A->{_identifier_count_A};
+ my $item_count = $rhash_A->{_item_count_A};
+ my $ri_term_begin = $rhash_A->{_ri_term_begin};
+ my $ri_term_comma = $rhash_A->{_ri_term_comma};
+ my $ri_term_end = $rhash_A->{_ri_term_end};
+ my $ritem_lengths = $rhash_A->{_ritem_lengths};
+ my $rmax_length = $rhash_A->{_rmax_length};
# Specify if the list must have an even number of fields or not.
# It is generally safest to assume an even number, because the
# list items might be a hash list. But if we can be sure that
# it is not a hash, then we can allow an odd number for more
# flexibility.
- my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
-
- if ( $identifier_count >= $item_count - 1
+ # 1 = odd field count ok, 2 = want even count
+ my $odd_or_even = 2;
+ if (
+ $identifier_count >= $item_count - 1
|| $is_assignment{$next_nonblank_type}
- || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
+ || ( $list_type
+ && $list_type ne '=>'
+ && $list_type !~ /^[\:\?]$/ )
)
{
$odd_or_even = 1;
# do we have a long first term which should be
# left on a line by itself?
my $use_separate_first_term = (
- $odd_or_even == 1 # only if we can use 1 field/line
- && $item_count > 3 # need several items
+ $odd_or_even == 1 # only if we can use 1 field/line
+ && $item_count > 3 # need several items
&& $first_term_length >
- 2 * $max_length[0] - 2 # need long first term
+ 2 * $rmax_length->[0] - 2 # need long first term
&& $first_term_length >
- 2 * $max_length[1] - 2 # need long first term
+ 2 * $rmax_length->[1] - 2 # need long first term
);
# or do we know from the type of list that the first term should
if ($use_separate_first_term) {
# ..set a break and update starting values
- $use_separate_first_term = 1;
$self->set_forced_breakpoint($i_first_comma);
+ $item_count--;
+
+ #---------------------------------------------------------------
+ # Section B1A: Stop if one item remains ($i_first_comma = undef)
+ #---------------------------------------------------------------
+ # Fix for b1442: use '$item_count' here instead of '$comma_count'
+ # to make the result independent of any trailing comma.
+ return if ( $item_count <= 1 );
+
$i_opening_paren = $i_first_comma;
$i_first_comma = $rcomma_index->[1];
- $item_count--;
- return if $comma_count == 1;
- shift @item_lengths;
- shift @i_term_begin;
- shift @i_term_end;
- shift @i_term_comma;
+ shift @{$ritem_lengths};
+ shift @{$ri_term_begin};
+ shift @{$ri_term_end};
+ shift @{$ri_term_comma};
}
# if not, update the metrics to include the first term
else {
- if ( $first_term_length > $max_length[0] ) {
- $max_length[0] = $first_term_length;
+ if ( $first_term_length > $rmax_length->[0] ) {
+ $rmax_length->[0] = $first_term_length;
}
}
# Field width parameters
- my $pair_width = ( $max_length[0] + $max_length[1] );
+ my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
my $max_width =
- ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
+ ( $rmax_length->[0] > $rmax_length->[1] )
+ ? $rmax_length->[0]
+ : $rmax_length->[1];
# Number of free columns across the page width for laying out tables
my $columns = table_columns_available($i_first_comma);
# paren, but in some cases we might not.
if ( $rOpts_variable_maximum_line_length
&& $tokens_to_go[$i_opening_paren] eq '('
- && @i_term_begin )
- ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch
+ && @{$ri_term_begin} )
{
- my $ib = $i_term_begin[0];
+ my $ib = $ri_term_begin->[0];
my $type = $types_to_go[$ib];
# So far, the only known instance of this problem is when
}
}
- # Estimated maximum number of fields which fit this space
- # This will be our first guess
+ # Estimated maximum number of fields which fit this space.
+ # This will be our first guess:
my $number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even, $max_width,
$pair_width );
my $number_of_fields = $number_of_fields_max;
- # Find the best-looking number of fields
- # and make this our second guess if possible
+ # Find the best-looking number of fields.
+ # This will be our second guess, if possible.
my ( $number_of_fields_best, $ri_ragged_break_list,
$new_identifier_count )
- = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
- \@item_lengths, $max_width );
+ = $self->study_list_complexity( $ri_term_begin, $ri_term_end,
+ $ritem_lengths, $max_width );
if ( $number_of_fields_best != 0
&& $number_of_fields_best < $number_of_fields_max )
$number_of_fields = $number_of_fields_best;
}
- # ----------------------------------------------------------------------
- # If we are crowded and the -lp option is being used, try to
- # undo some indentation
- # ----------------------------------------------------------------------
+ # fix b1427
+ elsif ($number_of_fields_best > 1
+ && $number_of_fields_best > $number_of_fields_max )
+ {
+ $number_of_fields_best = $number_of_fields_max;
+ }
+
+ # If we are crowded and the -lp option is being used, try
+ # to undo some indentation
if (
$is_lp_formatting
&& (
)
)
{
- my $available_spaces =
- $self->get_available_spaces_to_go($i_first_comma);
- if ( $available_spaces > 0 ) {
-
- my $spaces_wanted = $max_width - $columns; # for 1 field
-
- if ( $number_of_fields_best == 0 ) {
- $number_of_fields_best =
- get_maximum_fields_wanted( \@item_lengths );
- }
-
- if ( $number_of_fields_best != 1 ) {
- my $spaces_wanted_2 =
- 1 + $pair_width - $columns; # for 2 fields
- if ( $available_spaces > $spaces_wanted_2 ) {
- $spaces_wanted = $spaces_wanted_2;
- }
- }
-
- if ( $spaces_wanted > 0 ) {
- my $deleted_spaces =
- $self->reduce_lp_indentation( $i_first_comma,
- $spaces_wanted );
-
- # redo the math
- if ( $deleted_spaces > 0 ) {
- $columns = table_columns_available($i_first_comma);
- $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even,
- $max_width, $pair_width );
- $number_of_fields = $number_of_fields_max;
+ ( $number_of_fields, $number_of_fields_best, $columns ) =
+ $self->lp_table_fix(
+
+ $columns,
+ $i_first_comma,
+ $max_width,
+ $number_of_fields,
+ $number_of_fields_best,
+ $odd_or_even,
+ $pair_width,
+ $ritem_lengths,
- if ( $number_of_fields_best == 1
- && $number_of_fields >= 1 )
- {
- $number_of_fields = $number_of_fields_best;
- }
- }
- }
- }
+ );
}
# try for one column if two won't work
if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
my $packed_lines = 1 + int( $packed_columns / $columns );
- # are we an item contained in an outer list?
- my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
-
+ #-----------------------------------------------------------------
+ # Section B1B: Stop here if we did not compute a positive number of
+ # fields. In this case we just have to bail out.
+ #-----------------------------------------------------------------
if ( $number_of_fields <= 0 ) {
-# #---------------------------------------------------------------
-# # We're in trouble. We can't find a single field width that works.
-# # There is no simple answer here; we may have a single long list
-# # item, or many.
-# #---------------------------------------------------------------
-#
-# In many cases, it may be best to not force a break if there is just one
-# comma, because the standard continuation break logic will do a better
-# job without it.
-#
-# In the common case that all but one of the terms can fit
-# on a single line, it may look better not to break open the
-# containing parens. Consider, for example
-#
-# $color =
-# join ( '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; }
-# keys %colors );
-#
-# which will look like this with the container broken:
-#
-# $color = join (
-# '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
-# );
-#
-# Here is an example of this rule for a long last term:
-#
-# log_message( 0, 256, 128,
-# "Number of routes in adj-RIB-in to be considered: $peercount" );
-#
-# And here is an example with a long first term:
-#
-# $s = sprintf(
-# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
-# $r, $pu, $ps, $cu, $cs, $tt
-# )
-# if $style eq 'all';
-
- $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
-
- my $long_last_term =
- $self->excess_line_length( 0, $i_last_comma ) <= 0;
- my $long_first_term =
- $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
- <= 0;
-
- # break at every comma ...
- if (
-
- # if requested by user or is best looking
- $number_of_fields_best == 1
-
- # or if this is a sublist of a larger list
- || $in_hierarchical_list
-
- # or if multiple commas and we don't have a long first or last
- # term
- || ( $comma_count > 1
- && !( $long_last_term || $long_first_term ) )
- )
- {
- foreach ( 0 .. $comma_count - 1 ) {
- $self->set_forced_breakpoint( $rcomma_index->[$_] );
- }
- }
- elsif ($long_last_term) {
-
- $self->set_forced_breakpoint($i_last_comma);
- ${$rdo_not_break_apart} = 1 unless $must_break_open;
- }
- elsif ($long_first_term) {
+ $self->set_emergency_comma_breakpoints(
- $self->set_forced_breakpoint($i_first_comma);
- }
- else {
+ $number_of_fields_best,
+ $rhash_IN,
+ $comma_count,
+ $i_first_comma,
- # let breaks be defined by default bond strength logic
- }
+ );
return;
}
- # --------------------------------------------------------
- # We have a tentative field count that seems to work.
+ #------------------------------------------------------------------
+ # Section B1B: We have a tentative field count that seems to work.
+ # Now we must look more closely to determine if a table layout will
+ # actually look okay.
+ #------------------------------------------------------------------
+
# How many lines will this require?
- # --------------------------------------------------------
my $formatted_lines = $item_count / ($number_of_fields);
if ( $formatted_lines != int $formatted_lines ) {
$formatted_lines = 1 + int $formatted_lines;
# So far we've been trying to fill out to the right margin. But
# compact tables are easier to read, so let's see if we can use fewer
# fields without increasing the number of lines.
- $number_of_fields =
- compactify_table( $item_count, $number_of_fields, $formatted_lines,
- $odd_or_even );
-
- # How many spaces across the page will we fill?
- my $columns_per_line =
- ( int $number_of_fields / 2 ) * $pair_width +
- ( $number_of_fields % 2 ) * $max_width;
+ $number_of_fields = compactify_table( $item_count, $number_of_fields,
+ $formatted_lines, $odd_or_even );
my $formatted_columns;
$formatted_columns = $packed_columns;
}
- my $unused_columns = $formatted_columns - $packed_columns;
-
- # set some empirical parameters to help decide if we should try to
- # align; high sparsity does not look good, especially with few lines
- my $sparsity = ($unused_columns) / ($formatted_columns);
- my $max_allowed_sparsity =
- ( $item_count < 3 ) ? 0.1
- : ( $packed_lines == 1 ) ? 0.15
- : ( $packed_lines == 2 ) ? 0.4
- : 0.7;
-
- my $two_line_word_wrap_ok;
- if ( $opening_token eq '(' ) {
-
- # default is to allow wrapping of short paren lists
- $two_line_word_wrap_ok = 1;
-
- # but turn off word wrap where requested
- if ($rOpts_break_open_compact_parens) {
-
- # This parameter is a one-character flag, as follows:
- # '0' matches no parens -> break open NOT OK -> word wrap OK
- # '1' matches all parens -> break open OK -> word wrap NOT OK
- # Other values are the same as used by the weld-exclusion-list
- my $flag = $rOpts_break_open_compact_parens;
- if ( $flag eq '*'
- || $flag eq '1' )
- {
- $two_line_word_wrap_ok = 0;
- }
- elsif ( $flag eq '0' ) {
- $two_line_word_wrap_ok = 1;
- }
- else {
- my $KK = $K_to_go[$i_opening_paren];
- $two_line_word_wrap_ok =
- !$self->match_paren_flag( $KK, $flag );
- }
- }
- }
-
- # Begin check for shortcut methods, which avoid treating a list
- # as a table for relatively small parenthesized lists. These
- # are usually easier to read if not formatted as tables.
- if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
- && $opening_is_in_block # not a sub-container
- && $two_line_word_wrap_ok # ok to wrap this paren list
- ##&& $opening_token eq '(' # is paren list
- )
- {
-
- # Shortcut method 1: for -lp and just one comma:
- # This is a no-brainer, just break at the comma.
- if (
- $is_lp_formatting # -lp
- && $item_count == 2 # two items, one comma
- && !$must_break_open
- )
- {
- my $i_break = $rcomma_index->[0];
- $self->set_forced_breakpoint($i_break);
- ${$rdo_not_break_apart} = 1;
- return;
-
- }
-
- # method 2 is for most small ragged lists which might look
- # best if not displayed as a table.
- if (
- ( $number_of_fields == 2 && $item_count == 3 )
- || (
- $new_identifier_count > 0 # isn't all quotes
- && $sparsity > 0.15
- ) # would be fairly spaced gaps if aligned
- )
- {
-
- my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
-
- # NOTE: we should really use the true break count here,
- # which can be greater if there are large terms and
- # little space, but usually this will work well enough.
- unless ($must_break_open) {
-
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $is_lp_formatting && !$need_lp_break_open ) {
- ${$rdo_not_break_apart} = 1;
- }
- }
- return;
- }
-
- } ## end shortcut methods
-
- # debug stuff
- DEBUG_SPARSE && do {
- print STDOUT
-"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
-
+ # Construce hash_B:
+ return {
+
+ # Updated variables
+ _i_first_comma_B => $i_first_comma,
+ _i_opening_paren_B => $i_opening_paren,
+ _item_count_B => $item_count,
+
+ # New variables
+ _columns => $columns,
+ _formatted_columns => $formatted_columns,
+ _formatted_lines => $formatted_lines,
+ _max_width => $max_width,
+ _new_identifier_count => $new_identifier_count,
+ _number_of_fields => $number_of_fields,
+ _odd_or_even => $odd_or_even,
+ _packed_columns => $packed_columns,
+ _packed_lines => $packed_lines,
+ _pair_width => $pair_width,
+ _ri_ragged_break_list => $ri_ragged_break_list,
+ _use_separate_first_term => $use_separate_first_term,
};
+ } ## end sub table_layout_B
- #---------------------------------------------------------------
- # Compound List Rule 2:
- # If this list is too long for one line, and it is an item of a
- # larger list, then we must format it, regardless of sparsity
- # (ian.t). One reason that we have to do this is to trigger
- # Compound List Rule 1, above, which causes breaks at all commas of
- # all outer lists. In this way, the structure will be properly
- # displayed.
- #---------------------------------------------------------------
-
- # Decide if this list is too long for one line unless broken
- my $total_columns = table_columns_available($i_opening_paren);
- my $too_long = $packed_columns > $total_columns;
+ sub lp_table_fix {
- # For a paren list, include the length of the token just before the
- # '(' because this is likely a sub call, and we would have to
- # include the sub name on the same line as the list. This is still
- # imprecise, but not too bad. (steve.t)
- if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
+ # try to undo some -lp indentation to improve table formatting
- $too_long = $self->excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
+ my (
- # FIXME: For an item after a '=>', try to include the length of the
- # thing before the '=>'. This is crude and should be improved by
- # actually looking back token by token.
- if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
- my $i_opening_minus_test = $i_opening_paren - 4;
- if ( $i_opening_minus >= 0 ) {
- $too_long = $self->excess_line_length( $i_opening_minus_test,
- $i_effective_last_comma + 1 ) > 0;
- }
- }
+ $self, #
- # Always break lists contained in '[' and '{' if too long for 1 line,
- # and always break lists which are too long and part of a more complex
- # structure.
- my $must_break_open_container = $must_break_open
- || ( $too_long
- && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
+ $columns,
+ $i_first_comma,
+ $max_width,
+ $number_of_fields,
+ $number_of_fields_best,
+ $odd_or_even,
+ $pair_width,
+ $ritem_lengths,
-#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+ ) = @_;
- #---------------------------------------------------------------
- # The main decision:
- # Now decide if we will align the data into aligned columns. Do not
- # attempt to align columns if this is a tiny table or it would be
- # too spaced. It seems that the more packed lines we have, the
- # sparser the list that can be allowed and still look ok.
- #---------------------------------------------------------------
+ my $available_spaces =
+ $self->get_available_spaces_to_go($i_first_comma);
+ if ( $available_spaces > 0 ) {
- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
- || ( $formatted_lines < 2 )
- || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
- )
- {
+ my $spaces_wanted = $max_width - $columns; # for 1 field
- #---------------------------------------------------------------
- # too sparse: would look ugly if aligned in a table;
- #---------------------------------------------------------------
+ if ( $number_of_fields_best == 0 ) {
+ $number_of_fields_best =
+ get_maximum_fields_wanted($ritem_lengths);
+ }
- # use old breakpoints if this is a 'big' list
- if ( $packed_lines > 2 && $item_count > 10 ) {
- write_logfile_entry("List sparse: using old breakpoints\n");
- $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
+ if ( $number_of_fields_best != 1 ) {
+ my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
+ if ( $available_spaces > $spaces_wanted_2 ) {
+ $spaces_wanted = $spaces_wanted_2;
+ }
}
- # let the continuation logic handle it if 2 lines
- else {
+ if ( $spaces_wanted > 0 ) {
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $i_first_comma,
+ $spaces_wanted );
- my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ # redo the math
+ if ( $deleted_spaces > 0 ) {
+ $columns = table_columns_available($i_first_comma);
+ $number_of_fields =
+ maximum_number_of_fields( $columns, $odd_or_even,
+ $max_width, $pair_width );
- unless ($must_break_open_container) {
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $is_lp_formatting && !$need_lp_break_open ) {
- ${$rdo_not_break_apart} = 1;
+ if ( $number_of_fields_best == 1
+ && $number_of_fields >= 1 )
+ {
+ $number_of_fields = $number_of_fields_best;
}
}
}
- return;
}
+ return ( $number_of_fields, $number_of_fields_best, $columns );
+ } ## end sub lp_table_fix
+
+ sub write_formatted_table {
+
+ # Write a table of comma separated items with fixed number of fields
+ my ( $self, $number_of_fields, $comma_count, $rcomma_index,
+ $use_separate_first_term )
+ = @_;
- #---------------------------------------------------------------
- # go ahead and format as a table
- #---------------------------------------------------------------
write_logfile_entry(
"List: auto formatting with $number_of_fields fields/row\n");
my $j_first_break =
- $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
+ $use_separate_first_term
+ ? $number_of_fields
+ : $number_of_fields - 1;
my $j = $j_first_break;
while ( $j < $comma_count ) {
$j += $number_of_fields;
}
return;
- } ## end sub set_comma_breakpoints_do
-} ## end closure set_comma_breakpoints_do
+ } ## end sub write_formatted_table
+
+} ## end closure set_comma_breakpoint_final
sub study_list_complexity {
&& $i_last_last_break != $i - 2 )
{
- ## FIXME: don't strand a small term
+ ## TODO: don't strand a small term
pop @i_ragged_break_list;
push @i_ragged_break_list, $i - 2;
push @i_ragged_break_list, $i - 1;
my ( $self, $i_first_comma, $i_last_comma ) = @_;
for my $i ( $i_first_comma .. $i_last_comma ) {
if ( $old_breakpoint_to_go[$i] ) {
- $self->set_forced_breakpoint($i);
+
+ # If the comma style is under certain controls, and if this is a
+ # comma breakpoint with the comma is at the beginning of the next
+ # line, then we must pass that index instead. This will allow sub
+ # set_forced_breakpoints to check and follow the user settings. This
+ # produces a uniform style and can prevent instability (b1422).
+ #
+ # The flag '$controlled_comma_style' will be set if the user
+ # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
+ # needed or set for the -boc flag.
+ my $ibreak = $i;
+ if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) {
+ my $index = $inext_to_go[$ibreak];
+ if ( $index > $ibreak && $types_to_go[$index] eq ',' ) {
+ $ibreak = $index;
+ }
+ }
+ $self->set_forced_breakpoint($ibreak);
}
}
return;
-}
+} ## end sub copy_old_breakpoints
sub set_nobreaks {
my ( $self, $i, $j ) = @_;
# shouldn't happen; non-critical error
else {
- 0 && do {
+ if (DEVEL_MODE) {
my ( $a, $b, $c ) = caller();
- print STDOUT
- "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
- };
+ Fault(<<EOM);
+NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go
+EOM
+ }
}
return;
} ## end sub set_nobreaks
# with a get_spaces method.
my $indentation = shift;
return ref($indentation) ? $indentation->get_spaces() : $indentation;
-}
+} ## end sub get_spaces
sub get_recoverable_spaces {
# to get them to line up with their opening parens
my $indentation = shift;
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
-}
+} ## end sub get_recoverable_spaces
sub get_available_spaces_to_go {
# an -lp indentation level. This survives between batches.
my $lp_position_predictor;
- # A level at which the lp format becomes too highly stressed to continue
- my $lp_cutoff_level;
-
BEGIN {
# Index names for the -lp stack variables.
_lp_container_seqno_ => $i++,
_lp_space_count_ => $i++,
};
- }
+ } ## end BEGIN
sub initialize_lp_vars {
$lp_position_predictor = 0;
$max_lp_stack = 0;
- $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
# we can turn off -lp if all levels will be at or above the cutoff
- if ( $lp_cutoff_level <= 1 ) {
+ if ( $high_stress_level <= 1 ) {
$rOpts_line_up_parentheses = 0;
$rOpts_extended_line_up_parentheses = 0;
}
@hash_test2{@q} = (1) x scalar(@q);
@q = qw( . || && );
@hash_test3{@q} = (1) x scalar(@q);
- }
+ } ## end BEGIN
+
+ # shared variables, re-initialized for each batch
+ my $rlp_object_list;
+ my $max_lp_object_list;
+ my %lp_comma_count;
+ my %lp_arrow_count;
+ my $space_count;
+ my $current_level;
+ my $current_ci_level;
+ my $ii_begin_line;
+ my $in_lp_mode;
+ my $stack_changed;
+ my $K_last_nonblank;
+ my $last_nonblank_token;
+ my $last_nonblank_type;
+ my $last_last_nonblank_type;
sub set_lp_indentation {
+ my ($self) = @_;
+
#------------------------------------------------------------------
# Define the leading whitespace for all tokens in the current batch
# when the -lp formatting is selected.
#------------------------------------------------------------------
- my ($self) = @_;
-
return unless ($rOpts_line_up_parentheses);
return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
# List of -lp indentation objects created in this batch
- my $rlp_object_list = [];
- my $max_lp_object_list = UNDEFINED_INDEX;
-
- my %last_lp_equals;
- my %lp_comma_count;
- my %lp_arrow_count;
- my $ii_begin_line = 0;
-
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $rbreak_before_container_by_seqno =
- $self->[_rbreak_before_container_by_seqno_];
- my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+ $rlp_object_list = [];
+ $max_lp_object_list = -1;
+
+ %lp_comma_count = ();
+ %lp_arrow_count = ();
+ $space_count = undef;
+ $current_level = undef;
+ $current_ci_level = undef;
+ $ii_begin_line = 0;
+ $in_lp_mode = 0;
+ $stack_changed = 1;
+ $K_last_nonblank = undef;
+ $last_nonblank_token = EMPTY_STRING;
+ $last_nonblank_type = EMPTY_STRING;
+ $last_last_nonblank_type = EMPTY_STRING;
+
+ my %last_lp_equals = ();
+
+ my $rLL = $self->[_rLL_];
+ my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
- my $nws = @{$radjusted_levels};
my $imin = 0;
# The 'starting_in_quote' flag means that the first token is the first
$imin += 1;
}
- my $K_last_nonblank;
my $Kpnb = $K_to_go[0] - 1;
if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
$Kpnb -= 1;
$K_last_nonblank = $Kpnb;
}
- my $last_nonblank_token = EMPTY_STRING;
- my $last_nonblank_type = EMPTY_STRING;
- my $last_last_nonblank_type = EMPTY_STRING;
-
if ( defined($K_last_nonblank) ) {
$last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
$last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
}
- my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
- my $stack_changed = 1;
-
#-----------------------------------
# Loop over all tokens in this batch
#-----------------------------------
foreach my $ii ( $imin .. $max_index_to_go ) {
- my $KK = $K_to_go[$ii];
- my $type = $types_to_go[$ii];
- my $token = $tokens_to_go[$ii];
- my $level = $levels_to_go[$ii];
- my $ci_level = $ci_levels_to_go[$ii];
- my $total_depth = $nesting_depth_to_go[$ii];
- my $standard_spaces = $leading_spaces_to_go[$ii];
-
- #--------------------------------------------------
- # Adjust levels if necessary to recycle whitespace:
- #--------------------------------------------------
- if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
- {
- $level = $radjusted_levels->[$KK];
- if ( $level < 0 ) { $level = 0 } # note: this should not happen
- }
+ my $type = $types_to_go[$ii];
+ my $token = $tokens_to_go[$ii];
+ my $level = $levels_to_go[$ii];
+ my $ci_level = $ci_levels_to_go[$ii];
+ my $total_depth = $nesting_depth_to_go[$ii];
# get the top state from the stack if it has changed
if ($stack_changed) {
$stack_changed = 0;
}
- #------------------------------
- # update the position predictor
- #------------------------------
+ #------------------------------------------------------------
+ # Break at a previous '=' if necessary to control line length
+ #------------------------------------------------------------
if ( $type eq '{' || $type eq '(' ) {
-
$lp_comma_count{ $total_depth + 1 } = 0;
$lp_arrow_count{ $total_depth + 1 } = 0;
# If we come to an opening token after an '=' token of some
# type, see if it would be helpful to 'break' after the '=' to
# save space
- my $last_equals = $last_lp_equals{$total_depth};
-
- # Skip an empty set of parens, such as after channel():
- # my $exchange = $self->_channel()->exchange(
- # This fixes issues b1318 b1322 b1323 b1328
- # TODO: maybe also skip parens with just one token?
- my $is_empty_container;
- if ( $last_equals && $ii < $max_index_to_go ) {
- my $seqno = $type_sequence_to_go[$ii];
- my $inext_nb = $ii + 1;
- $inext_nb++
- if ( $types_to_go[$inext_nb] eq 'b' );
- my $seqno_nb = $type_sequence_to_go[$inext_nb];
- $is_empty_container =
- $seqno && $seqno_nb && $seqno_nb == $seqno;
- }
-
- if ( $last_equals
- && $last_equals > $ii_begin_line
- && !$is_empty_container )
- {
+ my $ii_last_equals = $last_lp_equals{$total_depth};
+ if ($ii_last_equals) {
+ $self->lp_equals_break_check( $ii, $ii_last_equals );
+ }
+ }
+
+ #------------------------
+ # Handle decreasing depth
+ #------------------------
+ # Note that one token may have both decreasing and then increasing
+ # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
+ # in this example we would first go back to (1,0) then up to (2,0)
+ # in a single call.
+ if ( $level < $current_level || $ci_level < $current_ci_level ) {
+ $self->lp_decreasing_depth($ii);
+ }
- my $seqno = $type_sequence_to_go[$ii];
+ #------------------------
+ # handle increasing depth
+ #------------------------
+ if ( $level > $current_level || $ci_level > $current_ci_level ) {
+ $self->lp_increasing_depth($ii);
+ }
- # find the position if we break at the '='
- my $i_test = $last_equals;
+ #------------------
+ # Handle all tokens
+ #------------------
+ if ( $type ne 'b' ) {
- # Fix for issue b1229, check for break before
- if ( $want_break_before{ $types_to_go[$i_test] } ) {
- if ( $i_test > 0 ) { $i_test-- }
- }
- elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+ # Count commas and look for non-list characters. Once we see a
+ # non-list character, we give up and don't look for any more
+ # commas.
+ if ( $type eq '=>' ) {
+ $lp_arrow_count{$total_depth}++;
- my $test_position = total_line_length( $i_test, $ii );
- my $mll =
- $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
+ # remember '=>' like '=' for estimating breaks (but see
+ # above note for b1035)
+ $last_lp_equals{$total_depth} = $ii;
+ }
+
+ elsif ( $type eq ',' ) {
+ $lp_comma_count{$total_depth}++;
+ }
- #------------------------------------------------------
- # Break if structure will reach the maximum line length
- #------------------------------------------------------
+ elsif ( $is_assignment{$type} ) {
+ $last_lp_equals{$total_depth} = $ii;
+ }
- # Historically, -lp just used one-half line length here
- my $len_increase = $rOpts_maximum_line_length / 2;
+ # this token might start a new line if ..
+ if (
+ $ii > $ii_begin_line
- # For -xlp, we can also use the pre-computed lengths
- my $min_len = $rcollapsed_length_by_seqno->{$seqno};
- if ( $min_len && $min_len > $len_increase ) {
- $len_increase = $min_len;
- }
+ && (
- if (
+ # this is the first nonblank token of the line
+ $ii == 1 && $types_to_go[0] eq 'b'
- # if we might exceed the maximum line length
- $lp_position_predictor + $len_increase > $mll
+ # or previous character was one of these:
+ # /^([\:\?\,f])$/
+ || $hash_test2{$last_nonblank_type}
- # if a -bbx flag WANTS a break before this opening token
- || ( $seqno
- && $rbreak_before_container_by_seqno->{$seqno} )
+ # or previous character was opening and this is not
+ # closing
+ || ( $last_nonblank_type eq '{' && $type ne '}' )
+ || ( $last_nonblank_type eq '(' and $type ne ')' )
- # or we are beyond the 1/4 point and there was an old
- # break at an assignment (not '=>') [fix for b1035]
+ # or this token is one of these:
+ # /^([\.]|\|\||\&\&)$/
+ || $hash_test3{$type}
+
+ # or this is a closing structure
+ || ( $last_nonblank_type eq '}'
+ && $last_nonblank_token eq $last_nonblank_type )
+
+ # or previous token was keyword 'return'
+ || (
+ $last_nonblank_type eq 'k'
+ && ( $last_nonblank_token eq 'return'
+ && $type ne '{' )
+ )
+
+ # or starting a new line at certain keywords is fine
+ || ( $type eq 'k'
+ && $is_if_unless_and_or_last_next_redo_return{
+ $token} )
+
+ # or this is after an assignment after a closing
+ # structure
|| (
- $lp_position_predictor >
- $mll - $rOpts_maximum_line_length * 3 / 4
- && $types_to_go[$last_equals] ne '=>'
+ $is_assignment{$last_nonblank_type}
&& (
- $old_breakpoint_to_go[$last_equals]
- || ( $last_equals > 0
- && $old_breakpoint_to_go[ $last_equals - 1 ]
- )
- || ( $last_equals > 1
- && $types_to_go[ $last_equals - 1 ] eq 'b'
- && $old_breakpoint_to_go[ $last_equals - 2 ]
+ # /^[\}\)\]]$/
+ $hash_test1{$last_last_nonblank_type}
+
+ # and it is significantly to the right
+ || $lp_position_predictor > (
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2
)
)
)
- )
- {
+ )
+ )
+ {
+ check_for_long_gnu_style_lines($ii);
+ $ii_begin_line = $ii;
- # then make the switch -- note that we do not set a
- # real breakpoint here because we may not really need
- # one; sub break_lists will do that if necessary.
+ # back up 1 token if we want to break before that type
+ # otherwise, we may strand tokens like '?' or ':' on a line
+ if ( $ii_begin_line > 0 ) {
+ my $wbb =
+ $last_nonblank_type eq 'k'
+ ? $want_break_before{$last_nonblank_token}
+ : $want_break_before{$last_nonblank_type};
+ $ii_begin_line-- if ($wbb);
+ }
+ }
- my $Kc = $K_closing_container->{$seqno};
- if (
+ $K_last_nonblank = $K_to_go[$ii];
+ $last_last_nonblank_type = $last_nonblank_type;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
- # For -lp, only if the closing token is in this
- # batch (c117). Otherwise it cannot be done by sub
- # break_lists.
- defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
+ } ## end if ( $type ne 'b' )
- # For -xlp, we only need one nonblank token after
- # the opening token.
- || $rOpts_extended_line_up_parentheses
- )
- {
- $ii_begin_line = $i_test + 1;
- $lp_position_predictor = $test_position;
-
- #--------------------------------------------------
- # Fix for an opening container terminating a batch:
- #--------------------------------------------------
- # To get alignment of a -lp container with its
- # contents, we have to put a break after $i_test.
- # For $ii<$max_index_to_go, this will be done by
- # sub break_lists based on the indentation object.
- # But for $ii=$max_index_to_go, the indentation
- # object for this seqno will not be created until
- # the next batch, so we have to set a break at
- # $i_test right now in order to get one.
- if ( $ii == $max_index_to_go
- && !$block_type_to_go[$ii]
- && $type eq '{'
- && $seqno
- && !$ris_excluded_lp_container->{$seqno} )
- {
- $self->set_forced_lp_break( $ii_begin_line,
- $ii );
- }
- }
- }
- }
- } ## end update position predictor
+ # remember the predicted position of this token on the output line
+ if ( $ii > $ii_begin_line ) {
- #------------------------
- # Handle decreasing depth
- #------------------------
- # Note that one token may have both decreasing and then increasing
- # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
- # in this example we would first go back to (1,0) then up to (2,0)
- # in a single call.
- if ( $level < $current_level || $ci_level < $current_ci_level ) {
+ ## NOTE: this is a critical loop - the following call has been
+ ## expanded for about 2x speedup:
+ ## $lp_position_predictor =
+ ## total_line_length( $ii_begin_line, $ii );
- # loop to find the first entry at or completely below this level
- while (1) {
- if ($max_lp_stack) {
+ my $indentation = $leading_spaces_to_go[$ii_begin_line];
+ if ( ref($indentation) ) {
+ $indentation = $indentation->get_spaces();
+ }
+ $lp_position_predictor =
+ $indentation +
+ $summed_lengths_to_go[ $ii + 1 ] -
+ $summed_lengths_to_go[$ii_begin_line];
+ }
+ else {
+ $lp_position_predictor =
+ $space_count + $token_lengths_to_go[$ii];
+ }
- # save index of token which closes this level
- if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
- my $lp_object =
- $rLP->[$max_lp_stack]->[_lp_object_];
+ # Store the indentation object for this token.
+ # This allows us to manipulate the leading whitespace
+ # (in case we have to reduce indentation to fit a line) without
+ # having to change any token values.
- $lp_object->set_closed($ii);
+ #---------------------------------------------------------------
+ # replace leading whitespace with indentation objects where used
+ #---------------------------------------------------------------
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
+ $leading_spaces_to_go[$ii] = $lp_object;
+ if ( $max_lp_stack > 0
+ && $ci_level
+ && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
+ {
+ $reduced_spaces_to_go[$ii] =
+ $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
+ }
+ else {
+ $reduced_spaces_to_go[$ii] = $lp_object;
+ }
+ }
+ } ## end loop over all tokens in this batch
- my $comma_count = 0;
- my $arrow_count = 0;
- if ( $type eq '}' || $type eq ')' ) {
- $comma_count = $lp_comma_count{$total_depth};
- $arrow_count = $lp_arrow_count{$total_depth};
- $comma_count = 0 unless $comma_count;
- $arrow_count = 0 unless $arrow_count;
- }
+ undo_incomplete_lp_indentation()
+ if ( !$rOpts_extended_line_up_parentheses );
- $lp_object->set_comma_count($comma_count);
- $lp_object->set_arrow_count($arrow_count);
+ return;
+ } ## end sub set_lp_indentation
- # Undo any extra indentation if we saw no commas
- my $available_spaces =
- $lp_object->get_available_spaces();
- my $K_start = $lp_object->get_K_begin_line();
+ sub lp_equals_break_check {
+
+ my ( $self, $ii, $ii_last_equals ) = @_;
+
+ # If we come to an opening token after an '=' token of some
+ # type, see if it would be helpful to 'break' after the '=' to
+ # save space.
+
+ # Given:
+ # $ii = index of an opening token in the output batch
+ # $ii_begin_line = index of token starting next output line
+ # Update:
+ # $lp_position_predictor - updated position predictor
+ # $ii_begin_line = updated starting token index
+
+ # Skip an empty set of parens, such as after channel():
+ # my $exchange = $self->_channel()->exchange(
+ # This fixes issues b1318 b1322 b1323 b1328
+ my $is_empty_container;
+ if ( $ii_last_equals && $ii < $max_index_to_go ) {
+ my $seqno = $type_sequence_to_go[$ii];
+ my $inext_nb = $ii + 1;
+ $inext_nb++
+ if ( $types_to_go[$inext_nb] eq 'b' );
+ my $seqno_nb = $type_sequence_to_go[$inext_nb];
+ $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno;
+ }
+
+ if ( $ii_last_equals
+ && $ii_last_equals > $ii_begin_line
+ && !$is_empty_container )
+ {
- if ( $available_spaces > 0
- && $K_start >= $K_to_go[0]
- && ( $comma_count <= 0 || $arrow_count > 0 ) )
- {
+ my $seqno = $type_sequence_to_go[$ii];
- my $i = $lp_object->get_lp_item_index();
+ # find the position if we break at the '='
+ my $i_test = $ii_last_equals;
- # Safety check for a valid stack index. It
- # should be ok because we just checked that the
- # index K of the token associated with this
- # indentation is in this batch.
- if ( $i < 0 || $i > $max_lp_object_list ) {
- if (DEVEL_MODE) {
- my $lno = $rLL->[$KK]->[_LINE_INDEX_];
- Fault(<<EOM);
-Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
-EOM
- }
- }
- else {
- if ( $arrow_count == 0 ) {
- $rlp_object_list->[$i]
- ->permanently_decrease_available_spaces
- ($available_spaces);
- }
- else {
- $rlp_object_list->[$i]
- ->tentatively_decrease_available_spaces
- ($available_spaces);
- }
- foreach
- my $j ( $i + 1 .. $max_lp_object_list )
- {
- $rlp_object_list->[$j]
- ->decrease_SPACES($available_spaces);
- }
- }
- }
- }
+ # Fix for issue b1229, check if want break before this token
+ # Fix for issue b1356, if i_test is a blank, the leading spaces may
+ # be incorrect (if it was an interline blank).
+ # Fix for issue b1357 .. b1370, i_test must be prev nonblank
+ # ( the ci value for blanks can vary )
+ # See also case b223
+ # Fix for issue b1371-b1374 : all of these and the above are fixed
+ # by simply backing up one index and setting the leading spaces of
+ # a blank equal to that of the equals.
+ if ( $want_break_before{ $types_to_go[$i_test] } ) {
+ $i_test -= 1;
+ $leading_spaces_to_go[$i_test] =
+ $leading_spaces_to_go[$ii_last_equals]
+ if ( $types_to_go[$i_test] eq 'b' );
+ }
+ elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
- # go down one level
- --$max_lp_stack;
-
- my $rLP_top = $rLP->[$max_lp_stack];
- my $ci_lev = $rLP_top->[_lp_ci_level_];
- my $lev = $rLP_top->[_lp_level_];
- my $spaces = $rLP_top->[_lp_space_count_];
- if ( $rLP_top->[_lp_object_] ) {
- my $lp_obj = $rLP_top->[_lp_object_];
- ( $spaces, $lev, $ci_lev ) =
- @{ $lp_obj->get_spaces_level_ci() };
- }
+ my $test_position = total_line_length( $i_test, $ii );
+ my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
- # stop when we reach a level at or below the current
- # level
- if ( $lev <= $level && $ci_lev <= $ci_level ) {
- $space_count = $spaces;
- $current_level = $lev;
- $current_ci_level = $ci_lev;
- last;
- }
- }
+ #------------------------------------------------------
+ # Break if structure will reach the maximum line length
+ #------------------------------------------------------
- # reached bottom of stack .. should never happen because
- # only negative levels can get here, and $level was forced
- # to be positive above.
- else {
+ # Historically, -lp just used one-half line length here
+ my $len_increase = $rOpts_maximum_line_length / 2;
- # non-fatal, keep going except in DEVEL_MODE
- if (DEVEL_MODE) {
-##program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
- Fault(<<EOM);
-program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
-EOM
- }
- last;
- }
- }
- } ## end decreasing depth
+ # For -xlp, we can also use the pre-computed lengths
+ my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno};
+ if ( $min_len && $min_len > $len_increase ) {
+ $len_increase = $min_len;
+ }
- #------------------------
- # handle increasing depth
- #------------------------
- if ( $level > $current_level || $ci_level > $current_ci_level ) {
+ if (
- $stack_changed = 1;
-
- # Compute the standard incremental whitespace. This will be
- # the minimum incremental whitespace that will be used. This
- # choice results in a smooth transition between the gnu-style
- # and the standard style.
- my $standard_increment =
- ( $level - $current_level ) *
- $rOpts_indent_columns +
- ( $ci_level - $current_ci_level ) *
- $rOpts_continuation_indentation;
-
- # Now we have to define how much extra incremental space
- # ("$available_space") we want. This extra space will be
- # reduced as necessary when long lines are encountered or when
- # it becomes clear that we do not have a good list.
- my $available_spaces = 0;
- my $align_seqno = 0;
-
- my $last_nonblank_seqno;
- my $last_nonblank_block_type;
- if ( defined($K_last_nonblank) ) {
- $last_nonblank_seqno =
- $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
- $last_nonblank_block_type =
- $last_nonblank_seqno
- ? $rblock_type_of_seqno->{$last_nonblank_seqno}
- : undef;
- }
-
- $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
-
- #-----------------------------------------------
- # Initialize indentation spaces on empty stack..
- #-----------------------------------------------
- if ( $max_lp_stack == 0 ) {
- $space_count = $level * $rOpts_indent_columns;
- }
-
- #----------------------------------------
- # Add the standard space increment if ...
- #----------------------------------------
- elsif (
+ # if we might exceed the maximum line length
+ $lp_position_predictor + $len_increase > $mll
- # if this is a BLOCK, add the standard increment
- $last_nonblank_block_type
+ # if a -bbx flag WANTS a break before this opening token
+ || ( $seqno
+ && $self->[_rbreak_before_container_by_seqno_]->{$seqno} )
- # or if this is not a sequenced item
- || !$last_nonblank_seqno
+ # or we are beyond the 1/4 point and there was an old
+ # break at an assignment (not '=>') [fix for b1035]
+ || (
+ $lp_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
+ && $types_to_go[$ii_last_equals] ne '=>'
+ && (
+ $old_breakpoint_to_go[$ii_last_equals]
+ || ( $ii_last_equals > 0
+ && $old_breakpoint_to_go[ $ii_last_equals - 1 ] )
+ || ( $ii_last_equals > 1
+ && $types_to_go[ $ii_last_equals - 1 ] eq 'b'
+ && $old_breakpoint_to_go[ $ii_last_equals - 2 ] )
+ )
+ )
+ )
+ {
- # or this container is excluded by user rules
- # or contains here-docs or multiline qw text
- || defined($last_nonblank_seqno)
- && $ris_excluded_lp_container->{$last_nonblank_seqno}
+ # then make the switch -- note that we do not set a
+ # real breakpoint here because we may not really need
+ # one; sub break_lists will do that if necessary.
- # or if last nonblank token was not structural indentation
- || $last_nonblank_type ne '{'
+ my $Kc = $self->[_K_closing_container_]->{$seqno};
+ if (
- # and do not start -lp under stress .. fixes b1244, b1255
- || !$in_lp_mode && $level >= $lp_cutoff_level
+ # For -lp, only if the closing token is in this
+ # batch (c117). Otherwise it cannot be done by sub
+ # break_lists.
+ defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
+ # For -xlp, we only need one nonblank token after
+ # the opening token.
+ || $rOpts_extended_line_up_parentheses
)
{
-
- # If we have entered lp mode, use the top lp object to get
- # the current indentation spaces because it may have
- # changed. Fixes b1285, b1286.
- if ($in_lp_mode) {
- $space_count = $in_lp_mode->get_spaces();
+ $ii_begin_line = $i_test + 1;
+ $lp_position_predictor = $test_position;
+
+ #--------------------------------------------------
+ # Fix for an opening container terminating a batch:
+ #--------------------------------------------------
+ # To get alignment of a -lp container with its
+ # contents, we have to put a break after $i_test.
+ # For $ii<$max_index_to_go, this will be done by
+ # sub break_lists based on the indentation object.
+ # But for $ii=$max_index_to_go, the indentation
+ # object for this seqno will not be created until
+ # the next batch, so we have to set a break at
+ # $i_test right now in order to get one.
+ if ( $ii == $max_index_to_go
+ && !$block_type_to_go[$ii]
+ && $types_to_go[$ii] eq '{'
+ && $seqno
+ && !$self->[_ris_excluded_lp_container_]->{$seqno} )
+ {
+ $self->set_forced_lp_break( $ii_begin_line, $ii );
}
- $space_count += $standard_increment;
}
+ }
+ }
+ return;
+ } ## end sub lp_equals_break_check
- #---------------------------------------------------------------
- # -lp mode: try to use space to the first non-blank level change
- #---------------------------------------------------------------
- else {
-
- # see how much space we have available
- my $test_space_count = $lp_position_predictor;
- my $excess = 0;
- my $min_len =
- $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
- my $next_opening_too_far;
-
- if ( defined($min_len) ) {
- $excess =
- $test_space_count +
- $min_len -
- $maximum_line_length_at_level[$level];
- if ( $excess > 0 ) {
- $test_space_count -= $excess;
-
- # will the next opening token be a long way out?
- $next_opening_too_far =
- $lp_position_predictor + $excess >
- $maximum_line_length_at_level[$level];
- }
- }
+ sub lp_decreasing_depth {
+ my ( $self, $ii ) = @_;
- my $rLP_top = $rLP->[$max_lp_stack];
- my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
- if ( $rLP_top->[_lp_object_] ) {
- $min_gnu_indentation =
- $rLP_top->[_lp_object_]->get_spaces();
- }
- $available_spaces =
- $test_space_count - $min_gnu_indentation;
+ my $rLL = $self->[_rLL_];
- # Do not startup -lp indentation mode if no space ...
- # ... or if it puts the opening far to the right
- if ( !$in_lp_mode
- && ( $available_spaces <= 0 || $next_opening_too_far ) )
- {
- $space_count += $standard_increment;
- $available_spaces = 0;
- }
+ my $level = $levels_to_go[$ii];
+ my $ci_level = $ci_levels_to_go[$ii];
- # Use -lp mode
- else {
- $space_count = $test_space_count;
+ # loop to find the first entry at or completely below this level
+ while (1) {
- $in_lp_mode = 1;
- if ( $available_spaces >= $standard_increment ) {
- $min_gnu_indentation += $standard_increment;
- }
- elsif ( $available_spaces > 1 ) {
- $min_gnu_indentation += $available_spaces + 1;
- }
- ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
- elsif ( $is_opening_token{$last_nonblank_token} ) {
- if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
- $min_gnu_indentation += 2;
- }
- else {
- $min_gnu_indentation += 1;
- }
- }
- else {
- $min_gnu_indentation += $standard_increment;
- }
- $available_spaces = $space_count - $min_gnu_indentation;
+ # Be sure we have not hit the stack bottom - should never
+ # happen because only negative levels can get here, and
+ # $level was forced to be positive above.
+ if ( !$max_lp_stack ) {
- if ( $available_spaces < 0 ) {
- $space_count = $min_gnu_indentation;
- $available_spaces = 0;
- }
- $align_seqno = $last_nonblank_seqno;
- }
+ # non-fatal, just keep going except in DEVEL_MODE
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
+EOM
}
+ last;
+ }
- #-------------------------------------------
- # update the state, but not on a blank token
- #-------------------------------------------
- if ( $type ne 'b' ) {
-
- if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
- $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
- $in_lp_mode = 1;
- }
+ # save index of token which closes this level
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
- #----------------------------------------
- # Create indentation object if in lp-mode
- #----------------------------------------
- ++$max_lp_stack;
- my $lp_object;
- if ($in_lp_mode) {
-
- # A negative level implies not to store the item in the
- # item_list
- my $lp_item_index = 0;
- if ( $level >= 0 ) {
- $lp_item_index = ++$max_lp_object_list;
- }
+ $lp_object->set_closed($ii);
- my $K_begin_line = 0;
- if ( $ii_begin_line >= 0
- && $ii_begin_line <= $max_index_to_go )
- {
- $K_begin_line = $K_to_go[$ii_begin_line];
- }
+ my $comma_count = 0;
+ my $arrow_count = 0;
+ my $type = $types_to_go[$ii];
+ if ( $type eq '}' || $type eq ')' ) {
+ my $total_depth = $nesting_depth_to_go[$ii];
+ $comma_count = $lp_comma_count{$total_depth};
+ $arrow_count = $lp_arrow_count{$total_depth};
+ $comma_count = 0 unless $comma_count;
+ $arrow_count = 0 unless $arrow_count;
+ }
- # Minor Fix: when creating indentation at a side
- # comment we don't know what the space to the actual
- # next code token will be. We will allow a space for
- # sub correct_lp to move it in if necessary.
- if ( $type eq '#'
- && $max_index_to_go > 0
- && $align_seqno )
- {
- $available_spaces += 1;
- }
+ $lp_object->set_comma_count($comma_count);
+ $lp_object->set_arrow_count($arrow_count);
- $lp_object = Perl::Tidy::IndentationItem->new(
- spaces => $space_count,
- level => $level,
- ci_level => $ci_level,
- available_spaces => $available_spaces,
- lp_item_index => $lp_item_index,
- align_seqno => $align_seqno,
- stack_depth => $max_lp_stack,
- K_begin_line => $K_begin_line,
- standard_spaces => $standard_spaces,
- );
+ # Undo any extra indentation if we saw no commas
+ my $available_spaces = $lp_object->get_available_spaces();
+ my $K_start = $lp_object->get_K_begin_line();
- DEBUG_LP && do {
- my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
- print STDERR <<EOM;
-DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
-EOM
- };
+ if ( $available_spaces > 0
+ && $K_start >= $K_to_go[0]
+ && ( $comma_count <= 0 || $arrow_count > 0 ) )
+ {
- if ( $level >= 0 ) {
- $rlp_object_list->[$max_lp_object_list] =
- $lp_object;
- }
+ my $i = $lp_object->get_lp_item_index();
- ##if ( $last_nonblank_token =~ /^[\{\[\(]$/
- if ( $is_opening_token{$last_nonblank_token}
- && $last_nonblank_seqno )
- {
- $rlp_object_by_seqno->{$last_nonblank_seqno} =
- $lp_object;
- }
+ # Safety check for a valid stack index. It
+ # should be ok because we just checked that the
+ # index K of the token associated with this
+ # indentation is in this batch.
+ if ( $i < 0 || $i > $max_lp_object_list ) {
+ my $KK = $K_to_go[$ii];
+ my $lno = $rLL->[$KK]->[_LINE_INDEX_];
+ DEVEL_MODE && Fault(<<EOM);
+Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
+EOM
+ last;
}
- #------------------------------------
- # Store this indentation on the stack
- #------------------------------------
- $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
- $rLP->[$max_lp_stack]->[_lp_level_] = $level;
- $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
- $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
- $last_nonblank_seqno;
- $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
-
- # If the opening paren is beyond the half-line length, then
- # we will use the minimum (standard) indentation. This will
- # help avoid problems associated with running out of space
- # near the end of a line. As a result, in deeply nested
- # lists, there will be some indentations which are limited
- # to this minimum standard indentation. But the most deeply
- # nested container will still probably be able to shift its
- # parameters to the right for proper alignment, so in most
- # cases this will not be noticeable.
- if ( $available_spaces > 0 && $lp_object ) {
- my $halfway =
- $maximum_line_length_at_level[$level] -
- $rOpts_maximum_line_length / 2;
- $lp_object->tentatively_decrease_available_spaces(
- $available_spaces)
- if ( $space_count > $halfway );
+ if ( $arrow_count == 0 ) {
+ $rlp_object_list->[$i]
+ ->permanently_decrease_available_spaces(
+ $available_spaces);
+ }
+ else {
+ $rlp_object_list->[$i]
+ ->tentatively_decrease_available_spaces(
+ $available_spaces);
+ }
+ foreach my $j ( $i + 1 .. $max_lp_object_list ) {
+ $rlp_object_list->[$j]
+ ->decrease_SPACES($available_spaces);
}
}
- } ## end increasing depth
+ }
- #------------------
- # Handle all tokens
- #------------------
- if ( $type ne 'b' ) {
+ # go down one level
+ --$max_lp_stack;
+
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $ci_lev = $rLP_top->[_lp_ci_level_];
+ my $lev = $rLP_top->[_lp_level_];
+ my $spaces = $rLP_top->[_lp_space_count_];
+ if ( $rLP_top->[_lp_object_] ) {
+ my $lp_obj = $rLP_top->[_lp_object_];
+ ( $spaces, $lev, $ci_lev ) =
+ @{ $lp_obj->get_spaces_level_ci() };
+ }
+
+ # stop when we reach a level at or below the current
+ # level
+ if ( $lev <= $level && $ci_lev <= $ci_level ) {
+ $space_count = $spaces;
+ $current_level = $lev;
+ $current_ci_level = $ci_lev;
+ last;
+ }
+ }
+ return;
+ } ## end sub lp_decreasing_depth
+
+ sub lp_increasing_depth {
+ my ( $self, $ii ) = @_;
+
+ my $rLL = $self->[_rLL_];
+
+ my $type = $types_to_go[$ii];
+ my $level = $levels_to_go[$ii];
+ my $ci_level = $ci_levels_to_go[$ii];
+
+ $stack_changed = 1;
+
+ # Compute the standard incremental whitespace. This will be
+ # the minimum incremental whitespace that will be used. This
+ # choice results in a smooth transition between the gnu-style
+ # and the standard style.
+ my $standard_increment =
+ ( $level - $current_level ) * $rOpts_indent_columns +
+ ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
+
+ # Now we have to define how much extra incremental space
+ # ("$available_space") we want. This extra space will be
+ # reduced as necessary when long lines are encountered or when
+ # it becomes clear that we do not have a good list.
+ my $available_spaces = 0;
+ my $align_seqno = 0;
+ my $K_extra_space;
+
+ my $last_nonblank_seqno;
+ my $last_nonblank_block_type;
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
+ $last_nonblank_block_type =
+ $last_nonblank_seqno
+ ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno}
+ : undef;
+ }
+
+ $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
+
+ #-----------------------------------------------
+ # Initialize indentation spaces on empty stack..
+ #-----------------------------------------------
+ if ( $max_lp_stack == 0 ) {
+ $space_count = $level * $rOpts_indent_columns;
+ }
- # Count commas and look for non-list characters. Once we see a
- # non-list character, we give up and don't look for any more
- # commas.
- if ( $type eq '=>' ) {
- $lp_arrow_count{$total_depth}++;
+ #----------------------------------------
+ # Add the standard space increment if ...
+ #----------------------------------------
+ elsif (
- # remember '=>' like '=' for estimating breaks (but see
- # above note for b1035)
- $last_lp_equals{$total_depth} = $ii;
- }
+ # if this is a BLOCK, add the standard increment
+ $last_nonblank_block_type
- elsif ( $type eq ',' ) {
- $lp_comma_count{$total_depth}++;
- }
+ # or if this is not a sequenced item
+ || !$last_nonblank_seqno
- elsif ( $is_assignment{$type} ) {
- $last_lp_equals{$total_depth} = $ii;
- }
+ # or this container is excluded by user rules
+ # or contains here-docs or multiline qw text
+ || defined($last_nonblank_seqno)
+ && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno}
- # this token might start a new line if ..
- if (
+ # or if last nonblank token was not structural indentation
+ || $last_nonblank_type ne '{'
- # this is the first nonblank token of the line
- $ii == 1 && $types_to_go[0] eq 'b'
+ # and do not start -lp under stress .. fixes b1244, b1255
+ || !$in_lp_mode && $level >= $high_stress_level
- # or previous character was one of these:
- # /^([\:\?\,f])$/
- || $hash_test2{$last_nonblank_type}
+ )
+ {
- # or previous character was opening and this is not closing
- || ( $last_nonblank_type eq '{' && $type ne '}' )
- || ( $last_nonblank_type eq '(' and $type ne ')' )
+ # If we have entered lp mode, use the top lp object to get
+ # the current indentation spaces because it may have
+ # changed. Fixes b1285, b1286.
+ if ($in_lp_mode) {
+ $space_count = $in_lp_mode->get_spaces();
+ }
+ $space_count += $standard_increment;
+ }
- # or this token is one of these:
- # /^([\.]|\|\||\&\&)$/
- || $hash_test3{$type}
+ #---------------------------------------------------------------
+ # -lp mode: try to use space to the first non-blank level change
+ #---------------------------------------------------------------
+ else {
- # or this is a closing structure
- || ( $last_nonblank_type eq '}'
- && $last_nonblank_token eq $last_nonblank_type )
+ # see how much space we have available
+ my $test_space_count = $lp_position_predictor;
+ my $excess = 0;
+ my $min_len =
+ $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno};
+ my $next_opening_too_far;
- # or previous token was keyword 'return'
- || (
- $last_nonblank_type eq 'k'
- && ( $last_nonblank_token eq 'return'
- && $type ne '{' )
- )
+ if ( defined($min_len) ) {
+ $excess =
+ $test_space_count +
+ $min_len -
+ $maximum_line_length_at_level[$level];
+ if ( $excess > 0 ) {
+ $test_space_count -= $excess;
- # or starting a new line at certain keywords is fine
- || ( $type eq 'k'
- && $is_if_unless_and_or_last_next_redo_return{$token} )
+ # will the next opening token be a long way out?
+ $next_opening_too_far =
+ $lp_position_predictor + $excess >
+ $maximum_line_length_at_level[$level];
+ }
+ }
- # or this is after an assignment after a closing structure
- || (
- $is_assignment{$last_nonblank_type}
- && (
- # /^[\}\)\]]$/
- $hash_test1{$last_last_nonblank_type}
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
+ if ( $rLP_top->[_lp_object_] ) {
+ $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces();
+ }
+ $available_spaces = $test_space_count - $min_gnu_indentation;
- # and it is significantly to the right
- || $lp_position_predictor > (
- $maximum_line_length_at_level[$level] -
- $rOpts_maximum_line_length / 2
- )
- )
- )
- )
- {
- check_for_long_gnu_style_lines( $ii, $rlp_object_list );
- $ii_begin_line = $ii;
+ # Do not startup -lp indentation mode if no space ...
+ # ... or if it puts the opening far to the right
+ if ( !$in_lp_mode
+ && ( $available_spaces <= 0 || $next_opening_too_far ) )
+ {
+ $space_count += $standard_increment;
+ $available_spaces = 0;
+ }
- # back up 1 token if we want to break before that type
- # otherwise, we may strand tokens like '?' or ':' on a line
- if ( $ii_begin_line > 0 ) {
- if ( $last_nonblank_type eq 'k' ) {
+ # Use -lp mode
+ else {
+ $space_count = $test_space_count;
- if ( $want_break_before{$last_nonblank_token} ) {
- $ii_begin_line--;
- }
- }
- elsif ( $want_break_before{$last_nonblank_type} ) {
- $ii_begin_line--;
- }
- }
- } ## end if ( $ii == 1 && $types_to_go...)
+ $in_lp_mode = 1;
+ if ( $available_spaces >= $standard_increment ) {
+ $min_gnu_indentation += $standard_increment;
+ }
+ elsif ( $available_spaces > 1 ) {
+ $min_gnu_indentation += $available_spaces + 1;
- $K_last_nonblank = $KK;
+ # The "+1" space can cause mis-alignment if there is no
+ # blank space between the opening paren and the next
+ # nonblank token (i.e., -pt=2) and the container does not
+ # get broken open. So we will mark this token for later
+ # space removal by sub 'xlp_tweak' if this container
+ # remains intact (issue git #106).
+ if (
+ $type ne 'b'
- $last_last_nonblank_type = $last_nonblank_type;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
+ # Skip if the maximum line length is exceeded here
+ && $excess <= 0
- } ## end if ( $type ne 'b' )
+ # This is only for level changes, not ci level changes.
+ # But note: this test is here out of caution but I have
+ # not found a case where it is actually necessary.
+ && $is_opening_token{$last_nonblank_token}
- # remember the predicted position of this token on the output line
- if ( $ii > $ii_begin_line ) {
+ # Be sure we are at consecutive nonblanks. This test
+ # should be true, but it guards against future coding
+ # changes to level values assigned to blank spaces.
+ && $ii > 0
+ && $types_to_go[ $ii - 1 ] ne 'b'
- ## NOTE: this is a critical loop - the following call has been
- ## expanded for about 2x speedup:
- ## $lp_position_predictor =
- ## total_line_length( $ii_begin_line, $ii );
+ )
+ {
+ $K_extra_space = $K_to_go[$ii];
+ }
+ }
+ elsif ( $is_opening_token{$last_nonblank_token} ) {
+ if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
+ $min_gnu_indentation += 2;
+ }
+ else {
+ $min_gnu_indentation += 1;
+ }
+ }
+ else {
+ $min_gnu_indentation += $standard_increment;
+ }
+ $available_spaces = $space_count - $min_gnu_indentation;
- my $indentation = $leading_spaces_to_go[$ii_begin_line];
- if ( ref($indentation) ) {
- $indentation = $indentation->get_spaces();
+ if ( $available_spaces < 0 ) {
+ $space_count = $min_gnu_indentation;
+ $available_spaces = 0;
}
- $lp_position_predictor =
- $indentation +
- $summed_lengths_to_go[ $ii + 1 ] -
- $summed_lengths_to_go[$ii_begin_line];
- }
- else {
- $lp_position_predictor =
- $space_count + $token_lengths_to_go[$ii];
+ $align_seqno = $last_nonblank_seqno;
}
+ }
- # Store the indentation object for this token.
- # This allows us to manipulate the leading whitespace
- # (in case we have to reduce indentation to fit a line) without
- # having to change any token values.
+ #-------------------------------------------
+ # update the state, but not on a blank token
+ #-------------------------------------------
+ if ( $type ne 'b' ) {
- #---------------------------------------------------------------
- # replace leading whitespace with indentation objects where used
- #---------------------------------------------------------------
if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
- my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
- $leading_spaces_to_go[$ii] = $lp_object;
- if ( $max_lp_stack > 0
- && $ci_level
- && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
+ $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
+ $in_lp_mode = 1;
+ }
+
+ #----------------------------------------
+ # Create indentation object if in lp-mode
+ #----------------------------------------
+ ++$max_lp_stack;
+ my $lp_object;
+ if ($in_lp_mode) {
+
+ # A negative level implies not to store the item in the
+ # item_list
+ my $lp_item_index = 0;
+ if ( $level >= 0 ) {
+ $lp_item_index = ++$max_lp_object_list;
+ }
+
+ my $K_begin_line = 0;
+ if ( $ii_begin_line >= 0
+ && $ii_begin_line <= $max_index_to_go )
{
- $reduced_spaces_to_go[$ii] =
- $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
+ $K_begin_line = $K_to_go[$ii_begin_line];
}
- else {
- $reduced_spaces_to_go[$ii] = $lp_object;
+
+ # Minor Fix: when creating indentation at a side
+ # comment we don't know what the space to the actual
+ # next code token will be. We will allow a space for
+ # sub correct_lp to move it in if necessary.
+ if ( $type eq '#'
+ && $max_index_to_go > 0
+ && $align_seqno )
+ {
+ $available_spaces += 1;
+ }
+
+ my $standard_spaces = $leading_spaces_to_go[$ii];
+ $lp_object = Perl::Tidy::IndentationItem->new(
+ spaces => $space_count,
+ level => $level,
+ ci_level => $ci_level,
+ available_spaces => $available_spaces,
+ lp_item_index => $lp_item_index,
+ align_seqno => $align_seqno,
+ stack_depth => $max_lp_stack,
+ K_begin_line => $K_begin_line,
+ standard_spaces => $standard_spaces,
+ K_extra_space => $K_extra_space,
+ );
+
+ DEBUG_LP && do {
+ my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
+ my $token = $tokens_to_go[$ii];
+ print STDERR <<EOM;
+DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
+EOM
+ };
+
+ if ( $level >= 0 ) {
+ $rlp_object_list->[$max_lp_object_list] = $lp_object;
}
- }
- } ## end loop over all tokens in this batch
- undo_incomplete_lp_indentation($rlp_object_list)
- if ( !$rOpts_extended_line_up_parentheses );
+ if ( $is_opening_token{$last_nonblank_token}
+ && $last_nonblank_seqno )
+ {
+ $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} =
+ $lp_object;
+ }
+ }
+ #------------------------------------
+ # Store this indentation on the stack
+ #------------------------------------
+ $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
+ $rLP->[$max_lp_stack]->[_lp_level_] = $level;
+ $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
+ $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
+ $last_nonblank_seqno;
+ $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
+
+ # If the opening paren is beyond the half-line length, then
+ # we will use the minimum (standard) indentation. This will
+ # help avoid problems associated with running out of space
+ # near the end of a line. As a result, in deeply nested
+ # lists, there will be some indentations which are limited
+ # to this minimum standard indentation. But the most deeply
+ # nested container will still probably be able to shift its
+ # parameters to the right for proper alignment, so in most
+ # cases this will not be noticeable.
+ if ( $available_spaces > 0 && $lp_object ) {
+ my $halfway =
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2;
+ $lp_object->tentatively_decrease_available_spaces(
+ $available_spaces)
+ if ( $space_count > $halfway );
+ }
+ }
return;
- } ## end sub set_lp_indentation
+ } ## end sub lp_increasing_depth
sub check_for_long_gnu_style_lines {
# look at the current estimated maximum line length, and
# remove some whitespace if it exceeds the desired maximum
- my ( $mx_index_to_go, $rlp_object_list ) = @_;
-
- my $max_lp_object_list = @{$rlp_object_list} - 1;
+ my ($ii_to_go) = @_;
# nothing can be done if no stack items defined for this line
return if ( $max_lp_object_list < 0 );
- # see if we have exceeded the maximum desired line length
+ # See if we have exceeded the maximum desired line length ..
# keep 2 extra free because they are needed in some cases
# (result of trial-and-error testing)
+ my $tol = 2;
+
+ # But reduce tol to 0 at a terminal comma; fixes b1432
+ if ( $tokens_to_go[$ii_to_go] eq ','
+ && $ii_to_go < $max_index_to_go )
+ {
+ my $in = $ii_to_go + 1;
+ if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ }
+ if ( $is_closing_token{ $tokens_to_go[$in] } ) {
+ $tol = 0;
+ }
+ }
+
my $spaces_needed =
$lp_position_predictor -
- $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
+ $maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] +
+ $tol;
return if ( $spaces_needed <= 0 );
# was always done because it could cause problems otherwise, but recent
# improvements allow fairly good results to be obtained by skipping
# this step with the -xlp flag.
- my ($rlp_object_list) = @_;
-
- my $max_lp_object_list = @{$rlp_object_list} - 1;
# nothing to do if no stack items defined for this line
return if ( $max_lp_object_list < 0 );
# have been defined. Here we prepare the lines for passing to the vertical
# aligner. We do the following tasks:
# - mark certain vertical alignment tokens, such as '=', in each line
- # - make minor indentation adjustments
+ # - make final indentation adjustments
# - do logical padding: insert extra blank spaces to help display certain
# logical constructions
+ # - send the line to the vertical aligner
- my $this_batch = $self->[_this_batch_];
- my $ri_first = $this_batch->[_ri_first_];
- my $ri_last = $this_batch->[_ri_last_];
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $this_batch = $self->[_this_batch_];
+
+ my $do_not_pad = $this_batch->[_do_not_pad_];
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+ my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
+ my $ri_first = $this_batch->[_ri_first_];
+ my $ri_last = $this_batch->[_ri_last_];
$self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
my $n_last_line = @{$ri_first} - 1;
- my $do_not_pad = $this_batch->[_do_not_pad_];
- my $peak_batch_size = $this_batch->[_peak_batch_size_];
- my $starting_in_quote = $this_batch->[_starting_in_quote_];
- my $ending_in_quote = $this_batch->[_ending_in_quote_];
- my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
- my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
- my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
-
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
-
my $ibeg_next = $ri_first->[0];
my $iend_next = $ri_last->[0];
my $type_end_next = $types_to_go[$iend_next];
my $token_beg_next = $tokens_to_go[$ibeg_next];
- my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
-
my $rindentation_list = [0]; # ref to indentations for each line
- my ( $cscw_block_comment, $closing_side_comment );
+ my ( $cscw_block_comment, $closing_side_comment, $is_block_comment );
+
+ if ( !$max_index_to_go && $type_beg_next eq '#' ) {
+ $is_block_comment = 1;
+ }
+
if ($rOpts_closing_side_comments) {
( $closing_side_comment, $cscw_block_comment ) =
$self->add_closing_side_comment( $ri_first, $ri_last );
}
- # flush before a long if statement to avoid unwanted alignment
- if ( $n_last_line > 0
- && $type_beg_next eq 'k'
- && $is_if_unless{$token_beg_next} )
- {
- $self->flush_vertical_aligner();
+ if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) {
+ $self->undo_ci( $ri_first, $ri_last,
+ $this_batch->[_rix_seqno_controlling_ci_] );
}
- $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
- if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
+ # for multi-line batches ...
+ if ( $n_last_line > 0 ) {
+
+ # flush before a long if statement to avoid unwanted alignment
+ $self->flush_vertical_aligner()
+ if ( $type_beg_next eq 'k'
+ && $is_if_unless{$token_beg_next} );
- $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
- $starting_in_quote )
- if ( $n_last_line > 0 && $rOpts_logical_padding );
+ $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote )
+ if ($rOpts_logical_padding);
+
+ $self->xlp_tweak( $ri_first, $ri_last )
+ if ($rOpts_extended_line_up_parentheses);
+ }
if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
# ----------------------------------------------
# loop to send each line to the vertical aligner
# ----------------------------------------------
- my ( $type_beg, $type_end, $token_beg );
+ my ( $type_beg, $type_end, $token_beg, $ljump );
for my $n ( 0 .. $n_last_line ) {
my $Kend_code =
$batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
- # $ljump is a level jump needed by 'sub final_indentation_adjustment'
- my $ljump = 0;
-
- # Get some vars on line [n+1], if any:
+ # Get some vars on line [n+1], if any,
+ # and define $ljump = level jump needed by 'sub get_final_indentation'
if ( $n < $n_last_line ) {
$ibeg_next = $ri_first->[ $n + 1 ];
$iend_next = $ri_last->[ $n + 1 ];
$ljump =
$rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
+ else {
+ $ljump = 0;
+ }
# ---------------------------------------------
# get the vertical alignment info for this line
# --------------------------------------
# get the final indentation of this line
# --------------------------------------
- my ( $indentation, $lev, $level_end, $terminal_type,
- $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
- = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
- $rpatterns, $ri_first, $ri_last,
- $rindentation_list, $ljump, $starting_in_quote,
- $is_static_block_comment, );
+ my (
+
+ $indentation,
+ $lev,
+ $level_end,
+ $i_terminal,
+ $is_outdented_line,
+
+ ) = $self->get_final_indentation(
+
+ $ibeg,
+ $iend,
+ $rfields,
+ $rpatterns,
+ $ri_first,
+ $ri_last,
+ $rindentation_list,
+ $ljump,
+ $starting_in_quote,
+ $is_static_block_comment,
+
+ );
# --------------------------------
# define flag 'outdent_long_lines'
my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
if ($seqno_m) {
- $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m};
}
}
$rvao_args->{rvertical_tightness_flags} =
$self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
$ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
- if ( !$is_block_comment );
+ unless ( $is_block_comment
+ || $self->[_no_vertical_tightness_flags_] );
# ----------------------------------
# define 'is_terminal_ternary' flag
my $is_terminal_ternary = 0;
my $last_leading_type = $n > 0 ? $type_beg_last : ':';
+ my $terminal_type = $types_to_go[$i_terminal];
if ( $terminal_type ne ';'
&& $n_last_line > $n
&& $level_end == $lev )
# This flag tells the vertical aligner to reset the side comment
# location if we are entering a new block from level 0. This is
# intended to keep side comments from drifting too far to the right.
- if ( $terminal_block_type
+ if ( $block_type_to_go[$i_terminal]
&& $nesting_depth_end > $nesting_depth_beg )
{
- my $level_adj = $lev;
- my $radjusted_levels = $self->[_radjusted_levels_];
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
- {
- $level_adj = $radjusted_levels->[$Kbeg];
- if ( $level_adj < 0 ) { $level_adj = 0 }
- }
- if ( $level_adj == 0 ) {
- $rvao_args->{forget_side_comment} = 1;
- }
+ $rvao_args->{forget_side_comment} =
+ !$self->[_radjusted_levels_]->[$Kbeg];
}
# -----------------------------------
$do_not_pad = 0;
- # Set flag indicating if this line ends in an opening
- # token and is very short, so that a blank line is not
- # needed if the subsequent line is a comment.
- # Examples of what we are looking for:
- # {
- # && (
- # BEGIN {
- # default {
- # sub {
- $self->[_last_output_short_opening_token_]
-
- # line ends in opening token
- # /^[\{\(\[L]$/
- = $is_opening_type{$type_end}
-
- # and either
- && (
- # line has either single opening token
- $Kend == $Kbeg
-
- # or is a single token followed by opening token.
- # Note that sub identifiers have blanks like 'sub doit'
- # $token_beg !~ /\s+/
- || ( $Kend - $Kbeg <= 2 && index( $token_beg, SPACE ) < 0 )
- )
+ } ## end of loop to output each line
- # and limit total to 10 character widths
- && token_sequence_length( $ibeg, $iend ) <= 10;
+ # Set flag indicating if the last line ends in an opening
+ # token and is very short, so that a blank line is not
+ # needed if the subsequent line is a comment.
+ # Examples of what we are looking for:
+ # {
+ # && (
+ # BEGIN {
+ # default {
+ # sub {
+ $self->[_last_output_short_opening_token_]
+
+ # line ends in opening token
+ # /^[\{\(\[L]$/
+ = $is_opening_type{$type_end}
+
+ # and either
+ && (
+ # line has either single opening token
+ $iend_next == $ibeg_next
+
+ # or is a single token followed by opening token.
+ # Note that sub identifiers have blanks like 'sub doit'
+ # $token_beg !~ /\s+/
+ || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 )
+ )
- } ## end of loop to output each line
+ # and limit total to 10 character widths
+ && token_sequence_length( $ibeg_next, $iend_next ) <= 10;
# remember indentation of lines containing opening containers for
- # later use by sub final_indentation_adjustment
- $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
- if ( !$is_block_comment );
+ # later use by sub get_final_indentation
+ $self->save_opening_indentation( $ri_first, $ri_last,
+ $rindentation_list, $this_batch->[_runmatched_opening_indexes_] )
+ if ( $this_batch->[_runmatched_opening_indexes_]
+ || $types_to_go[$max_index_to_go] eq 'q' );
# output any new -cscw block comment
if ($cscw_block_comment) {
# Verify that the summed lengths are correct. We want to be sure that
# errors have not been introduced by programming changes. Summed lengths
- # are defined in sub $store_token. Operations like padding and unmasking
+ # are defined in sub store_token. Operations like padding and unmasking
# semicolons can change token lengths, but those operations are expected to
# update the summed lengths when they make changes. So the summed lengths
# should always be correct.
my $len_tok_i = $token_lengths_to_go[$i];
my $KK = $K_to_go[$i];
my $len_tok_K;
- if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
+
+ # For --indent-only, there is not always agreement between
+ # token lengths in _rLL_ and token_lengths_to_go, so skip that check.
+ if ( defined($KK) && !$rOpts_indent_only ) {
+ $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_];
+ }
if ( $len_by_sum != $len_tok_i
|| defined($len_tok_K) && $len_by_sum != $len_tok_K )
{
# eq and ne were removed from this list to improve alignment chances
@q = qw(if unless and or err for foreach while until);
@is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
- }
+ } ## end BEGIN
+
+ my $ralignment_type_to_go;
+ my $ralignment_counts;
+ my $ralignment_hash_by_line;
sub set_vertical_alignment_markers {
- # This routine takes the first step toward vertical alignment of the
- # lines of output text. It looks for certain tokens which can serve as
- # vertical alignment markers (such as an '=').
- #
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ #----------------------------------------------------------------------
+ # This routine looks at output lines for certain tokens which can serve
+ # as vertical alignment markers (such as an '=').
+ #----------------------------------------------------------------------
+
+ # Input parameters:
+ # $ri_first = ref to list of starting line indexes in _to_go arrays
+ # $ri_last = ref to list of ending line indexes in _to_go arrays
+
# Method: We look at each token $i in this output batch and set
# $ralignment_type_to_go->[$i] equal to those tokens at which we would
# accept vertical alignment.
- my ( $self, $ri_first, $ri_last ) = @_;
-
- my $ralignment_type_to_go;
- my $ralignment_counts = [];
- my $ralignment_hash_by_line = [];
+ # Initialize closure (and return) variables:
+ $ralignment_type_to_go = [];
+ $ralignment_counts = [];
+ $ralignment_hash_by_line = [];
# NOTE: closing side comments can insert up to 2 additional tokens
# beyond the original $max_index_to_go, so we need to check ri_last for
# - and nothing to do if we aren't allowed to change whitespace.
# -----------------------------------------------------------------
if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
- return ( $ralignment_type_to_go, $ralignment_counts,
- $ralignment_hash_by_line );
+ goto RETURN;
}
- my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
- my $ris_function_call_paren = $self->[_ris_function_call_paren_];
- my $rLL = $self->[_rLL_];
-
# -------------------------------
# First handle any side comment.
# -------------------------------
my $do_not_align = (
# it is any specially marked side comment
- ( defined($KK) && $rspecial_side_comment_type->{$KK} )
+ ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} )
# or it is a static side comment
|| ( $rOpts->{'static-side-comments'}
# Nothing more to do on this line if -nvc is set
# ----------------------------------------------
if ( !$rOpts_valign_code ) {
- return ( $ralignment_type_to_go, $ralignment_counts,
- $ralignment_hash_by_line );
+ goto RETURN;
}
# -------------------------------------
# Loop over each line of this batch ...
# -------------------------------------
- my $last_vertical_alignment_BEFORE_index;
- my $vert_last_nonblank_type;
- my $vert_last_nonblank_token;
foreach my $line ( 0 .. $max_line ) {
# back up before any side comment
if ( $iend > $i_terminal ) { $iend = $i_terminal }
- my $level_beg = $levels_to_go[$ibeg];
- my $token_beg = $tokens_to_go[$ibeg];
- my $type_beg = $types_to_go[$ibeg];
- my $type_beg_special_char =
- ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
-
- $last_vertical_alignment_BEFORE_index = -1;
- $vert_last_nonblank_type = $type_beg;
- $vert_last_nonblank_token = $token_beg;
-
- # ----------------------------------------------------------------
- # Initialization code merged from 'sub delete_needless_alignments'
- # ----------------------------------------------------------------
- my $i_good_paren = -1;
- my $i_elsif_close = $ibeg - 1;
- my $i_elsif_open = $iend + 1;
- my @imatch_list;
- if ( $type_beg eq 'k' ) {
-
- # Initialization for paren patch: mark a location of a paren we
- # should keep, such as one following something like a leading
- # 'if', 'elsif',
- $i_good_paren = $ibeg + 1;
- if ( $types_to_go[$i_good_paren] eq 'b' ) {
- $i_good_paren++;
- }
-
- # Initialization for 'elsif' patch: remember the paren range of
- # an elsif, and do not make alignments within them because this
- # can cause loss of padding and overall brace alignment in the
- # vertical aligner.
- if ( $token_beg eq 'elsif'
- && $i_good_paren < $iend
- && $tokens_to_go[$i_good_paren] eq '(' )
+ #----------------------------------
+ # Loop over all tokens on this line
+ #----------------------------------
+ $self->set_vertical_alignment_markers_token_loop( $line, $ibeg,
+ $iend );
+ }
+
+ RETURN:
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
+ } ## end sub set_vertical_alignment_markers
+
+ sub set_vertical_alignment_markers_token_loop {
+ my ( $self, $line, $ibeg, $iend ) = @_;
+
+ # Set vertical alignment markers for the tokens on one line
+ # of the current output batch. This is done by updating the
+ # three closure variables:
+ # $ralignment_type_to_go
+ # $ralignment_counts
+ # $ralignment_hash_by_line
+
+ # Input parameters:
+ # $line = index of this line in the current batch
+ # $ibeg, $iend = index range of tokens to check in the _to_go arrays
+
+ my $level_beg = $levels_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $type_beg = $types_to_go[$ibeg];
+ my $type_beg_special_char =
+ ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
+
+ my $last_vertical_alignment_BEFORE_index = -1;
+ my $vert_last_nonblank_type = $type_beg;
+ my $vert_last_nonblank_token = $token_beg;
+
+ # ----------------------------------------------------------------
+ # Initialization code merged from 'sub delete_needless_alignments'
+ # ----------------------------------------------------------------
+ my $i_good_paren = -1;
+ my $i_elsif_close = $ibeg - 1;
+ my $i_elsif_open = $iend + 1;
+ my @imatch_list;
+ if ( $type_beg eq 'k' ) {
+
+ # Initialization for paren patch: mark a location of a paren we
+ # should keep, such as one following something like a leading
+ # 'if', 'elsif',
+ $i_good_paren = $ibeg + 1;
+ if ( $types_to_go[$i_good_paren] eq 'b' ) {
+ $i_good_paren++;
+ }
+
+ # Initialization for 'elsif' patch: remember the paren range of
+ # an elsif, and do not make alignments within them because this
+ # can cause loss of padding and overall brace alignment in the
+ # vertical aligner.
+ if ( $token_beg eq 'elsif'
+ && $i_good_paren < $iend
+ && $tokens_to_go[$i_good_paren] eq '(' )
+ {
+ $i_elsif_open = $i_good_paren;
+ $i_elsif_close = $mate_index_to_go[$i_good_paren];
+ if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 }
+ }
+ } ## end if ( $type_beg eq 'k' )
+
+ # --------------------------------------------
+ # Loop over each token in this output line ...
+ # --------------------------------------------
+ foreach my $i ( $ibeg + 1 .. $iend ) {
+
+ next if ( $types_to_go[$i] eq 'b' );
+
+ my $type = $types_to_go[$i];
+ my $token = $tokens_to_go[$i];
+ my $alignment_type = EMPTY_STRING;
+
+ # ----------------------------------------------
+ # Check for 'paren patch' : Remove excess parens
+ # ----------------------------------------------
+
+ # Excess alignment of parens can prevent other good alignments.
+ # For example, note the parens in the first two rows of the
+ # following snippet. They would normally get marked for
+ # alignment and aligned as follows:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # This causes unnecessary paren alignment and prevents the
+ # third equals from aligning. If we remove the unwanted
+ # alignments we get:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # A rule for doing this which works well is to remove alignment
+ # of parens whose containers do not contain other aligning
+ # tokens, with the exception that we always keep alignment of
+ # the first opening paren on a line (for things like 'if' and
+ # 'elsif' statements).
+ if ( $token eq ')' && @imatch_list ) {
+
+ # undo the corresponding opening paren if:
+ # - it is at the top of the stack
+ # - and not the first overall opening paren
+ # - does not follow a leading keyword on this line
+ my $imate = $mate_index_to_go[$i];
+ if ( !defined($imate) ) { $imate = -1 }
+ if ( $imatch_list[-1] eq $imate
+ && ( $ibeg > 1 || @imatch_list > 1 )
+ && $imate > $i_good_paren )
{
- $i_elsif_open = $i_good_paren;
- $i_elsif_close = $mate_index_to_go[$i_good_paren];
- }
- } ## end if ( $type_beg eq 'k' )
-
- # --------------------------------------------
- # Loop over each token in this output line ...
- # --------------------------------------------
- foreach my $i ( $ibeg + 1 .. $iend ) {
-
- next if ( $types_to_go[$i] eq 'b' );
-
- my $type = $types_to_go[$i];
- my $token = $tokens_to_go[$i];
- my $alignment_type = EMPTY_STRING;
-
- # ----------------------------------------------
- # Check for 'paren patch' : Remove excess parens
- # ----------------------------------------------
-
- # Excess alignment of parens can prevent other good alignments.
- # For example, note the parens in the first two rows of the
- # following snippet. They would normally get marked for
- # alignment and aligned as follows:
-
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
-
- # This causes unnecessary paren alignment and prevents the
- # third equals from aligning. If we remove the unwanted
- # alignments we get:
-
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
-
- # A rule for doing this which works well is to remove alignment
- # of parens whose containers do not contain other aligning
- # tokens, with the exception that we always keep alignment of
- # the first opening paren on a line (for things like 'if' and
- # 'elsif' statements).
- if ( $token eq ')' && @imatch_list ) {
-
- # undo the corresponding opening paren if:
- # - it is at the top of the stack
- # - and not the first overall opening paren
- # - does not follow a leading keyword on this line
- my $imate = $mate_index_to_go[$i];
- if ( $imatch_list[-1] eq $imate
- && ( $ibeg > 1 || @imatch_list > 1 )
- && $imate > $i_good_paren )
- {
- if ( $ralignment_type_to_go->[$imate] ) {
- $ralignment_type_to_go->[$imate] = EMPTY_STRING;
- $ralignment_counts->[$line]--;
- delete $ralignment_hash_by_line->[$line]->{$imate};
- }
- pop @imatch_list;
+ if ( $ralignment_type_to_go->[$imate] ) {
+ $ralignment_type_to_go->[$imate] = EMPTY_STRING;
+ $ralignment_counts->[$line]--;
+ delete $ralignment_hash_by_line->[$line]->{$imate};
}
+ pop @imatch_list;
}
+ }
- # do not align tokens at lower level than start of line
- # except for side comments
- if ( $levels_to_go[$i] < $level_beg ) {
- next;
- }
+ # do not align tokens at lower level than start of line
+ # except for side comments
+ if ( $levels_to_go[$i] < $level_beg ) {
+ next;
+ }
- #--------------------------------------------------------
- # First see if we want to align BEFORE this token
- #--------------------------------------------------------
+ #--------------------------------------------------------
+ # First see if we want to align BEFORE this token
+ #--------------------------------------------------------
- # The first possible token that we can align before
- # is index 2 because: 1) it doesn't normally make sense to
- # align before the first token and 2) the second
- # token must be a blank if we are to align before
- # the third
- if ( $i < $ibeg + 2 ) { }
+ # The first possible token that we can align before
+ # is index 2 because: 1) it doesn't normally make sense to
+ # align before the first token and 2) the second
+ # token must be a blank if we are to align before
+ # the third
+ if ( $i < $ibeg + 2 ) { }
- # must follow a blank token
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
+ # must follow a blank token
+ elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
- # otherwise, do not align two in a row to create a
- # blank field
- elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
+ # otherwise, do not align two in a row to create a
+ # blank field
+ elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
- # align before one of these keywords
- # (within a line, since $i>1)
- elsif ( $type eq 'k' ) {
+ # align before one of these keywords
+ # (within a line, since $i>1)
+ elsif ( $type eq 'k' ) {
- # /^(if|unless|and|or|eq|ne)$/
- if ( $is_vertical_alignment_keyword{$token} ) {
- $alignment_type = $token;
- }
+ # /^(if|unless|and|or|eq|ne)$/
+ if ( $is_vertical_alignment_keyword{$token} ) {
+ $alignment_type = $token;
}
+ }
- # align qw in a 'use' statement (issue git #93)
- elsif ( $type eq 'q' ) {
- if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
- $alignment_type = $type;
- }
+ # align qw in a 'use' statement (issue git #93)
+ elsif ( $type eq 'q' ) {
+ if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
+ $alignment_type = $type;
}
+ }
- # align before one of these types..
- elsif ( $is_vertical_alignment_type{$type}
- && !$is_not_vertical_alignment_token{$token} )
+ # align before one of these types..
+ elsif ( $is_vertical_alignment_type{$type}
+ && !$is_not_vertical_alignment_token{$token} )
+ {
+ $alignment_type = $token;
+
+ # Do not align a terminal token. Although it might
+ # occasionally look ok to do this, this has been found to be
+ # a good general rule. The main problems are:
+ # (1) that the terminal token (such as an = or :) might get
+ # moved far to the right where it is hard to see because
+ # nothing follows it, and
+ # (2) doing so may prevent other good alignments.
+ # Current exceptions are && and || and =>
+ if ( $i == $iend ) {
+ $alignment_type = EMPTY_STRING
+ unless ( $is_terminal_alignment_type{$type} );
+ }
+
+ # Do not align leading ': (' or '. ('. This would prevent
+ # alignment in something like the following:
+ # $extra_space .=
+ # ( $input_line_number < 10 ) ? " "
+ # : ( $input_line_number < 100 ) ? " "
+ # : "";
+ # or
+ # $code =
+ # ( $case_matters ? $accessor : " lc($accessor) " )
+ # . ( $yesno ? " eq " : " ne " )
+
+ # Also, do not align a ( following a leading ? so we can
+ # align something like this:
+ # $converter{$_}->{ushortok} =
+ # $PDL::IO::Pic::biggrays
+ # ? ( m/GIF/ ? 0 : 1 )
+ # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
+ if ( $type_beg_special_char
+ && $i == $ibeg + 2
+ && $types_to_go[ $i - 1 ] eq 'b' )
{
- $alignment_type = $token;
+ $alignment_type = EMPTY_STRING;
+ }
- # Do not align a terminal token. Although it might
- # occasionally look ok to do this, this has been found to be
- # a good general rule. The main problems are:
- # (1) that the terminal token (such as an = or :) might get
- # moved far to the right where it is hard to see because
- # nothing follows it, and
- # (2) doing so may prevent other good alignments.
- # Current exceptions are && and || and =>
- if ( $i == $iend ) {
- $alignment_type = EMPTY_STRING
- unless ( $is_terminal_alignment_type{$type} );
- }
+ # Certain tokens only align at the same level as the
+ # initial line level
+ if ( $is_low_level_alignment_token{$token}
+ && $levels_to_go[$i] != $level_beg )
+ {
+ $alignment_type = EMPTY_STRING;
+ }
- # Do not align leading ': (' or '. ('. This would prevent
- # alignment in something like the following:
- # $extra_space .=
- # ( $input_line_number < 10 ) ? " "
- # : ( $input_line_number < 100 ) ? " "
- # : "";
- # or
- # $code =
- # ( $case_matters ? $accessor : " lc($accessor) " )
- # . ( $yesno ? " eq " : " ne " )
-
- # Also, do not align a ( following a leading ? so we can
- # align something like this:
- # $converter{$_}->{ushortok} =
- # $PDL::IO::Pic::biggrays
- # ? ( m/GIF/ ? 0 : 1 )
- # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
- if ( $type_beg_special_char
- && $i == $ibeg + 2
- && $types_to_go[ $i - 1 ] eq 'b' )
- {
- $alignment_type = EMPTY_STRING;
- }
+ if ( $token eq '(' ) {
- # Certain tokens only align at the same level as the
- # initial line level
- if ( $is_low_level_alignment_token{$token}
- && $levels_to_go[$i] != $level_beg )
+ # For a paren after keyword, only align if-like parens,
+ # such as:
+ # if ( $a ) { &a }
+ # elsif ( $b ) { &b }
+ # ^-------------------aligned parens
+ if ( $vert_last_nonblank_type eq 'k'
+ && !$is_if_unless_elsif{$vert_last_nonblank_token} )
{
$alignment_type = EMPTY_STRING;
}
- # For a paren after keyword, only align something like this:
- # if ( $a ) { &a }
- # elsif ( $b ) { &b }
- if ( $token eq '(' ) {
-
- if ( $vert_last_nonblank_type eq 'k' ) {
- $alignment_type = EMPTY_STRING
- unless
- $is_if_unless_elsif{$vert_last_nonblank_token};
- ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/;
- }
-
- # Do not align a spaced-function-paren if requested.
- # Issue git #53, #73.
- if ( !$rOpts_function_paren_vertical_alignment ) {
- my $seqno = $type_sequence_to_go[$i];
- if ( $ris_function_call_paren->{$seqno} ) {
- $alignment_type = EMPTY_STRING;
- }
- }
-
- # make () align with qw in a 'use' statement (git #93)
- if ( $tokens_to_go[0] eq 'use'
- && $types_to_go[0] eq 'k'
- && $mate_index_to_go[$i] == $i + 1 )
- {
- $alignment_type = 'q';
- }
+ # Do not align a spaced-function-paren if requested.
+ # Issue git #53, #73.
+ if ( !$rOpts_function_paren_vertical_alignment ) {
+ my $seqno = $type_sequence_to_go[$i];
+ $alignment_type = EMPTY_STRING
+ if ( $self->[_ris_function_call_paren_]->{$seqno} );
}
- # be sure the alignment tokens are unique
- # This didn't work well: reason not determined
- # if ($token ne $type) {$alignment_type .= $type}
- }
-
- # NOTE: This is deactivated because it causes the previous
- # if/elsif alignment to fail
- #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
- #{ $alignment_type = $type; }
+ # make () align with qw in a 'use' statement (git #93)
+ if ( $tokens_to_go[0] eq 'use'
+ && $types_to_go[0] eq 'k'
+ && defined( $mate_index_to_go[$i] )
+ && $mate_index_to_go[$i] == $i + 1 )
+ {
+ $alignment_type = 'q';
- if ($alignment_type) {
- $last_vertical_alignment_BEFORE_index = $i;
+ ## Note on discussion git #101. We could make this
+ ## a separate type '()' to separate it from qw's:
+ ## $alignment_type =
+ ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()';
+ }
}
- #--------------------------------------------------------
- # Next see if we want to align AFTER the previous nonblank
- #--------------------------------------------------------
+ # be sure the alignment tokens are unique
+ # This experiment didn't work well: reason not determined
+ # if ($token ne $type) {$alignment_type .= $type}
+ }
- # We want to line up ',' and interior ';' tokens, with the added
- # space AFTER these tokens. (Note: interior ';' is included
- # because it may occur in short blocks).
- elsif (
+ # NOTE: This is deactivated because it causes the previous
+ # if/elsif alignment to fail
+ #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+ #{ $alignment_type = $type; }
- # we haven't already set it
- ##!$alignment_type
+ if ($alignment_type) {
+ $last_vertical_alignment_BEFORE_index = $i;
+ }
- # previous token IS one of these:
- (
- $vert_last_nonblank_type eq ','
- || $vert_last_nonblank_type eq ';'
- )
+ #--------------------------------------------------------
+ # Next see if we want to align AFTER the previous nonblank
+ #--------------------------------------------------------
- # and its not the first token of the line
- ## && $i > $ibeg
+ # We want to line up ',' and interior ';' tokens, with the added
+ # space AFTER these tokens. (Note: interior ';' is included
+ # because it may occur in short blocks).
+ elsif (
- # and it follows a blank
- && $types_to_go[ $i - 1 ] eq 'b'
+ # previous token IS one of these:
+ (
+ $vert_last_nonblank_type eq ','
+ || $vert_last_nonblank_type eq ';'
+ )
- # and it's NOT one of these
- && !$is_closing_token{$type}
+ # and it follows a blank
+ && $types_to_go[ $i - 1 ] eq 'b'
- # then go ahead and align
- )
+ # and it's NOT one of these
+ && !$is_closing_token{$type}
- {
- $alignment_type = $vert_last_nonblank_type;
- }
+ # then go ahead and align
+ )
- #-----------------------
- # Set the alignment type
- #-----------------------
- if ($alignment_type) {
+ {
+ $alignment_type = $vert_last_nonblank_type;
+ }
- # but do not align the opening brace of an anonymous sub
- if ( $token eq '{'
- && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
- {
+ #-----------------------
+ # Set the alignment type
+ #-----------------------
+ if ($alignment_type) {
- }
+ # but do not align the opening brace of an anonymous sub
+ if ( $token eq '{'
+ && $block_type_to_go[$i]
+ && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
+ {
- # and do not make alignments within 'elsif' parens
- elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
+ }
- }
+ # and do not make alignments within 'elsif' parens
+ elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
- # and ignore any tokens which have leading padded spaces
- # example: perl527/lop.t
- elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
+ }
- }
+ # and ignore any tokens which have leading padded spaces
+ # example: perl527/lop.t
+ elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
- else {
- $ralignment_type_to_go->[$i] = $alignment_type;
- $ralignment_hash_by_line->[$line]->{$i} =
- $alignment_type;
- $ralignment_counts->[$line]++;
- push @imatch_list, $i;
- }
}
- $vert_last_nonblank_type = $type;
- $vert_last_nonblank_token = $token;
+ else {
+ $ralignment_type_to_go->[$i] = $alignment_type;
+ $ralignment_hash_by_line->[$line]->{$i} = $alignment_type;
+ $ralignment_counts->[$line]++;
+ push @imatch_list, $i;
+ }
}
+
+ $vert_last_nonblank_type = $type;
+ $vert_last_nonblank_token = $token;
}
+ return;
+ } ## end sub set_vertical_alignment_markers_token_loop
- return ( $ralignment_type_to_go, $ralignment_counts,
- $ralignment_hash_by_line );
- } ## end sub set_vertical_alignment_markers
} ## end closure set_vertical_alignment_markers
sub make_vertical_alignments {
#---------------------------------------------------------
# Step 1: Define the alignment tokens for the entire batch
#---------------------------------------------------------
- my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
- = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+ my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line );
+
+ # We only need to make this call if vertical alignment of code is
+ # requested or if a line might have a side comment.
+ if ( $rOpts_valign_code
+ || $types_to_go[$max_index_to_go] eq '#' )
+ {
+ ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
+ = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+ }
#----------------------------------------------
# Step 2: Break each line into alignment fields
# Undo continuation indentation in certain sequences
my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
my ( $line_1, $line_2, $lev_last );
- my $this_line_is_semicolon_terminated;
my $max_line = @{$ri_first} - 1;
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
# On a very large list test case, this new coding dropped the run time
# of this routine from 30 seconds to 169 milliseconds.
my @i_controlling_ci;
- if ( @{$rix_seqno_controlling_ci} ) {
+ if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) {
my @tmp = reverse @{$rix_seqno_controlling_ci};
my $ix_next = pop @tmp;
foreach my $line ( 0 .. $max_line ) {
# chain continues...
# check for chain ending at end of a statement
- if ( $line == $max_line ) {
+ my $is_semicolon_terminated = (
+ $line == $max_line
+ && (
+ $types_to_go[$iend] eq ';'
- # see of this line ends a statement
- $this_line_is_semicolon_terminated =
- $types_to_go[$iend] eq ';'
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' )
+ )
+ );
- # with possible side comment
- || ( $types_to_go[$iend] eq '#'
- && $iend - $ibeg >= 2
- && $types_to_go[ $iend - 2 ] eq ';'
- && $types_to_go[ $iend - 1 ] eq 'b' );
- }
$line_2 = $line
- if ($this_line_is_semicolon_terminated);
+ if ($is_semicolon_terminated);
}
else {
# SECTION 2: Undo ci at cuddled blocks
#-------------------------------------
- # Note that sub final_indentation_adjustment will be called later to
+ # Note that sub get_final_indentation will be called later to
# actually do this, but for now we will tentatively mark cuddled
# lines with ci=0 so that the the -xci loop which follows will be
# correct at cuddles.
$terminal_type = $types_to_go[ $iend - 2 ];
}
}
- if ( $terminal_type eq '{' ) {
- my $Kbeg = $K_to_go[$ibeg];
+
+ # Patch for rt144979, part 2. Coordinated with part 1.
+ # Skip cuddled braces.
+ my $seqno_beg = $type_sequence_to_go[$ibeg];
+ my $is_cuddled_closing_brace = $seqno_beg
+ && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
+
+ if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) {
$ci_levels_to_go[$ibeg] = 0;
}
}
# &Error_OutOfRange;
# }
#
- my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
- = @_;
+ my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_;
my $max_line = @{$ri_first} - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
# : $i == 2 ? ( "Then", "Rarity" )
# : ( "Then", "Name" );
- if ( $max_line > 1 ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $tokens_differ;
-
- # never indent line 1 of a '.' series because
- # previous line is most likely at same level.
- # TODO: we should also look at the leading_spaces
- # of the last output line and skip if it is same
- # as this line.
- next if ( $leading_token eq '.' );
-
- my $count = 1;
- foreach my $l ( 2 .. 3 ) {
- last if ( $line + $l > $max_line );
- my $ibeg_next_next = $ri_first->[ $line + $l ];
- if ( $tokens_to_go[$ibeg_next_next] ne
- $leading_token )
- {
- $tokens_differ = 1;
- last;
- }
- $count++;
- }
- next if ($tokens_differ);
- next if ( $count < 3 && $leading_token ne ':' );
- $ipad = $ibeg;
- }
- else {
- next;
+ next if ( $max_line <= 1 );
+
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
+
+ # never indent line 1 of a '.' series because
+ # previous line is most likely at same level.
+ # TODO: we should also look at the leading_spaces
+ # of the last output line and skip if it is same
+ # as this line.
+ next if ( $leading_token eq '.' );
+
+ my $count = 1;
+ foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
+ $count++;
+ my $ibeg_next_next = $ri_first->[ $line + $l ];
+ next
+ if ( $tokens_to_go[$ibeg_next_next] eq
+ $leading_token );
+ $tokens_differ = 1;
+ last;
}
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
+ $ipad = $ibeg;
}
}
}
# find any unclosed container
next
unless ( $type_sequence_to_go[$i]
+ && defined( $mate_index_to_go[$i] )
&& $mate_index_to_go[$i] > $iend );
# find next nonblank token to pad
# an editor. In that case either the user will see and
# fix the problem or it will be corrected next time the
# entire file is processed with perltidy.
+ my $this_batch = $self->[_this_batch_];
+ my $peak_batch_size = $this_batch->[_peak_batch_size_];
next if ( $ipad == 0 && $peak_batch_size <= 1 );
-## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
-## IT DID MORE HARM THAN GOOD
-## ceil(
-## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
-## / $upem
-## ),
-## # do not put leading padding for just 2 lines of math
-## if ( $ipad == $ibeg
-## && $line > 0
-## && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
-## && $is_math_op{$type_next}
-## && $line + 2 <= $max_line )
-## {
-## my $ibeg_next_next = $ri_first->[ $line + 2 ];
-## my $type_next_next = $types_to_go[$ibeg_next_next];
-## next if !$is_math_op{$type_next_next};
-## }
-
# next line must not be at greater depth
my $iend_next = $ri_last->[ $line + 1 ];
next
$tok = SPACE x $pad_spaces . $tok;
$tok_len += $pad_spaces;
}
+ elsif ( $pad_spaces == 0 ) {
+ return;
+ }
elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
$tok = EMPTY_STRING;
$tok_len = 0;
else {
# shouldn't happen
+ DEVEL_MODE
+ && Fault("unexpected request for pad spaces = $pad_spaces\n");
return;
}
return;
} ## end sub pad_token
+sub xlp_tweak {
+
+ # Remove one indentation space from unbroken containers marked with
+ # 'K_extra_space'. These are mostly two-line lists with short names
+ # formatted with -xlp -pt=2.
+ #
+ # Before this fix (extra space in line 2):
+ # is($module->VERSION, $expected,
+ # "$main_module->VERSION matches $module->VERSION ($expected)");
+ #
+ # After this fix:
+ # is($module->VERSION, $expected,
+ # "$main_module->VERSION matches $module->VERSION ($expected)");
+ #
+ # Notes:
+ # - This fixes issue git #106
+ # - This must be called after 'set_logical_padding'.
+ # - This is currently only applied to -xlp. It would also work for -lp
+ # but that style is essentially frozen.
+
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ # Must be 2 or more lines
+ return unless ( @{$ri_first} > 1 );
+
+ # Pull indentation object from start of second line
+ my $ibeg_1 = $ri_first->[1];
+ my $lp_object = $leading_spaces_to_go[$ibeg_1];
+ return if ( !ref($lp_object) );
+
+ # This only applies to an indentation object with a marked token
+ my $K_extra_space = $lp_object->get_K_extra_space();
+ return unless ($K_extra_space);
+
+ # Look for the marked token within the first line of this batch
+ my $ibeg_0 = $ri_first->[0];
+ my $iend_0 = $ri_last->[0];
+ my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0];
+ return if ( $ii <= $ibeg_0 || $ii > $iend_0 );
+
+ # Skip padded tokens, they have already been aligned
+ my $tok = $tokens_to_go[$ii];
+ return if ( substr( $tok, 0, 1 ) eq SPACE );
+
+ # Skip 'if'-like statements, this does not improve them
+ return
+ if ( $types_to_go[$ibeg_0] eq 'k'
+ && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } );
+
+ # Looks okay, reduce indentation by 1 space if possible
+ my $spaces = $lp_object->get_spaces();
+ if ( $spaces > 0 ) {
+ $lp_object->decrease_SPACES(1);
+ }
+
+ return;
+} ## end sub xlp_tweak
+
{ ## begin closure make_alignment_patterns
my %keyword_map;
@{is_binary_type}{@q} = (1) x scalar(@q);
# token keywords which prevent using leading word as a container name
- @_ = qw(and or err eq ne cmp);
- @is_binary_keyword{@_} = (1) x scalar(@_);
+ @q = qw(and or err eq ne cmp);
+ @is_binary_keyword{@q} = (1) x scalar(@q);
# Some common function calls whose args can be aligned. These do not
# give good alignments if the lengths differ significantly.
##'is_deeply' => 'is', # poor; names lengths too different
);
- }
+ } ## end BEGIN
sub make_alignment_patterns {
- # Here we do some important preliminary work for the
- # vertical aligner. We create four arrays for one
- # output line. These arrays contain strings that can
- # be tested by the vertical aligner to see if
- # consecutive lines can be aligned vertically.
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
+ $ralignment_hash )
+ = @_;
+
+ #------------------------------------------------------------------
+ # This sub creates arrays of vertical alignment info for one output
+ # line.
+ #------------------------------------------------------------------
+
+ # Input parameters:
+ # $ibeg, $iend - index range of this line in the _to_go arrays
+ # $ralignment_type_to_go - alignment type of tokens, like '=', if any
+ # $alignment_count - number of alignment tokens in the line
+ # $ralignment_hash - this contains all of the alignments for this
+ # line. It is not yet used but is available for future coding in
+ # case there is a need to do a preliminary scan of alignment tokens.
+
+ # The arrays which are created contain strings that can be tested by
+ # the vertical aligner to see if consecutive lines can be aligned
+ # vertically.
#
# The four arrays are indexed on the vertical
# alignment fields and are:
# allowed, even when the alignment tokens match.
# @field_lengths - the display width of each field
- my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
- $ralignment_hash )
- = @_;
-
- # The var $ralignment_hash contains all of the alignments for this
- # line. It is not yet used but is available for future coding in case
- # there is a need to do a preliminary scan of the alignment tokens.
if (DEVEL_MODE) {
my $new_count = 0;
if ( defined($ralignment_hash) ) {
# Shortcut for lines without alignments
# -------------------------------------
if ( !$alignment_count ) {
- my $rtokens = [];
- my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
+ my $rtokens = [];
+ my $rfield_lengths =
+ [ $summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg] ];
my $rpatterns;
my $rfields;
my $i_start = $ibeg;
my $depth = 0;
+ my $i_depth_prev = $i_start;
+ my $depth_prev = $depth;
my %container_name = ( 0 => EMPTY_STRING );
my @tokens = ();
if ( $ibeg == 0 && $iend == $max_index_to_go ) {
my $iterm = $max_index_to_go;
if ( $types_to_go[$iterm] eq '#' ) {
- $iterm = $iprev_to_go[$iterm];
+ $iterm = iprev_to_go($iterm);
}
# Alignment lines ending like '=> sub {'; fixes issue c093
&& !$is_my_local_our{ $tokens_to_go[$ibeg] }
&& $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
{
-
- # Make a container name by combining all leading barewords,
- # keywords and functions.
- my $name = EMPTY_STRING;
- my $count = 0;
- my $count_max;
- my $iname_end;
- my $ilast_blank;
- for ( $ibeg .. $iterm ) {
- my $type = $types_to_go[$_];
-
- if ( $type eq 'b' ) {
- $ilast_blank = $_;
- next;
- }
-
- my $token = $tokens_to_go[$_];
-
- # Give up if we find an opening paren, binary operator or
- # comma within or after the proposed container name.
- if ( $token eq '('
- || $is_binary_type{$type}
- || $type eq 'k' && $is_binary_keyword{$token} )
- {
- $name = EMPTY_STRING;
- last;
- }
-
- # The container name is only built of certain types:
- last if ( !$is_kwU{$type} );
-
- # Normally it is made of one word, but two words for 'use'
- if ( $count == 0 ) {
- if ( $type eq 'k'
- && $is_use_like{ $tokens_to_go[$_] } )
- {
- $count_max = 2;
- }
- else {
- $count_max = 1;
- }
- }
- elsif ( defined($count_max) && $count >= $count_max ) {
- last;
- }
-
- if ( defined( $name_map{$token} ) ) {
- $token = $name_map{$token};
- }
-
- $name .= SPACE . $token;
- $iname_end = $_;
- $count++;
- }
-
- # Require a space after the container name token(s)
- if ( $name
- && defined($ilast_blank)
- && $ilast_blank > $iname_end )
- {
- $name = substr( $name, 1 );
- $container_name{'0'} = $name;
- }
+ $container_name{'0'} =
+ make_uncontained_comma_name( $iterm, $ibeg, $iend );
}
}
- # --------------------
- # Loop over all tokens
- # --------------------
+ #--------------------------------
+ # Begin main loop over all tokens
+ #--------------------------------
my $j = 0; # field index
$patterns[0] = EMPTY_STRING;
my %token_count;
for my $i ( $ibeg .. $iend ) {
- # Keep track of containers balanced on this line only.
+ #-------------------------------------------------------------
+ # Part 1: keep track of containers balanced on this line only.
+ #-------------------------------------------------------------
# These are used below to prevent unwanted cross-line alignments.
# Unbalanced containers already avoid aligning across
# container boundaries.
-
- my $type = $types_to_go[$i];
- my $token = $tokens_to_go[$i];
- my $depth_last = $depth;
+ my $type = $types_to_go[$i];
if ( $type_sequence_to_go[$i] ) {
+ my $token = $tokens_to_go[$i];
if ( $is_opening_token{$token} ) {
# if container is balanced on this line...
my $i_mate = $mate_index_to_go[$i];
+ if ( !defined($i_mate) ) { $i_mate = -1 }
if ( $i_mate > $i && $i_mate <= $iend ) {
+ $i_depth_prev = $i;
+ $depth_prev = $depth;
$depth++;
# Append the previous token name to make the container name
# is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
# is_d( [ \$a, \$a ], [ \$b, \$c ] );
- my $name = $token;
- if ( $token eq '(' ) {
- $name = $self->make_paren_name($i);
- }
+ my $name =
+ $token eq '(' ? $self->make_paren_name($i) : $token;
# name cannot be '.', so change to something else if so
if ( $name eq '.' ) { $name = 'dot' }
# if we are not aligning on this paren...
if ( !$ralignment_type_to_go->[$i] ) {
- # Sum length from previous alignment
- my $len = token_sequence_length( $i_start, $i - 1 );
-
- # Minor patch: do not include the length of any '!'.
- # Otherwise, commas in the following line will not
- # match
- # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
- # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
- if ( grep { $_ eq '!' }
- @types_to_go[ $i_start .. $i - 1 ] )
- {
- $len -= 1;
- }
-
- if ( $i_start == $ibeg ) {
-
- # For first token, use distance from start of
- # line but subtract off the indentation due to
- # level. Otherwise, results could vary with
- # indentation.
- $len +=
- leading_spaces_to_go($ibeg) -
- $levels_to_go[$i_start] *
- $rOpts_indent_columns;
- if ( $len < 0 ) { $len = 0 }
- }
+ my $len = length_tag( $i, $ibeg, $i_start );
# tack this length onto the container name to try
# to make a unique token name
} ## end if ( $is_opening_token...)
elsif ( $is_closing_type{$token} ) {
+ $i_depth_prev = $i;
+ $depth_prev = $depth;
$depth-- if $depth > 0;
}
} ## end if ( $type_sequence_to_go...)
- # if we find a new synchronization token, we are done with
- # a field
+ #------------------------------------------------------------
+ # Part 2: if we find a new synchronization token, we are done
+ # with a field
+ #------------------------------------------------------------
if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
# If we are at an opening token which increased depth, we have
# to use the name from the previous depth.
+ my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth;
my $depth_p =
( $depth_last < $depth ? $depth_last : $depth );
if ( $container_name{$depth_p} ) {
$patterns[$j] = EMPTY_STRING;
} ## end if ( new synchronization token
- # continue accumulating tokens
+ #-----------------------------------------------
+ # Part 3: continue accumulating the next pattern
+ #-----------------------------------------------
# for keywords we have to use the actual text
if ( $type eq 'k' ) {
# everything else
else {
$patterns[$j] .= $type;
- }
- # remove any zero-level name at first fat comma
- if ( $depth == 0 && $type eq '=>' ) {
- $container_name{$depth} = EMPTY_STRING;
+ # remove any zero-level name at first fat comma
+ if ( $depth == 0 && $type eq '=>' ) {
+ $container_name{$depth} = EMPTY_STRING;
+ }
}
+
} ## end for my $i ( $ibeg .. $iend)
- # done with this line .. join text of tokens to make the last field
+ #---------------------------------------------------------------
+ # End of main loop .. join text of tokens to make the last field
+ #---------------------------------------------------------------
push( @fields,
join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
push @field_lengths,
return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
} ## end sub make_alignment_patterns
+ sub make_uncontained_comma_name {
+ my ( $iterm, $ibeg, $iend ) = @_;
+
+ # Make a container name by combining all leading barewords,
+ # keywords and functions.
+ my $name = EMPTY_STRING;
+ my $count = 0;
+ my $count_max;
+ my $iname_end;
+ my $ilast_blank;
+ for ( $ibeg .. $iterm ) {
+ my $type = $types_to_go[$_];
+
+ if ( $type eq 'b' ) {
+ $ilast_blank = $_;
+ next;
+ }
+
+ my $token = $tokens_to_go[$_];
+
+ # Give up if we find an opening paren, binary operator or
+ # comma within or after the proposed container name.
+ if ( $token eq '('
+ || $is_binary_type{$type}
+ || $type eq 'k' && $is_binary_keyword{$token} )
+ {
+ $name = EMPTY_STRING;
+ last;
+ }
+
+ # The container name is only built of certain types:
+ last if ( !$is_kwU{$type} );
+
+ # Normally it is made of one word, but two words for 'use'
+ if ( $count == 0 ) {
+ if ( $type eq 'k'
+ && $is_use_like{ $tokens_to_go[$_] } )
+ {
+ $count_max = 2;
+ }
+ else {
+ $count_max = 1;
+ }
+ }
+ elsif ( defined($count_max) && $count >= $count_max ) {
+ last;
+ }
+
+ if ( defined( $name_map{$token} ) ) {
+ $token = $name_map{$token};
+ }
+
+ $name .= SPACE . $token;
+ $iname_end = $_;
+ $count++;
+ }
+
+ # Require a space after the container name token(s)
+ if ( $name
+ && defined($ilast_blank)
+ && $ilast_blank > $iname_end )
+ {
+ $name = substr( $name, 1 );
+ }
+ return $name;
+ } ## end sub make_uncontained_comma_name
+
+ sub length_tag {
+
+ my ( $i, $ibeg, $i_start ) = @_;
+
+ # Generate a line length to be used as a tag for rejecting bad
+ # alignments. The tag is the length of the line from the previous
+ # matching token, or beginning of line, to the function name. This
+ # will allow the vertical aligner to reject undesirable matches.
+
+ # The basic method: sum length from previous alignment
+ my $len = token_sequence_length( $i_start, $i - 1 );
+
+ # Minor patch: do not include the length of any '!'.
+ # Otherwise, commas in the following line will not
+ # match
+ # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
+ # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
+ if ( grep { $_ eq '!' } @types_to_go[ $i_start .. $i - 1 ] ) {
+ $len -= 1;
+ }
+
+ if ( $i_start == $ibeg ) {
+
+ # For first token, use distance from start of
+ # line but subtract off the indentation due to
+ # level. Otherwise, results could vary with
+ # indentation.
+ $len +=
+ leading_spaces_to_go($ibeg) -
+ $levels_to_go[$i_start] * $rOpts_indent_columns;
+ }
+ if ( $len < 0 ) { $len = 0 }
+ return $len;
+ } ## end sub length_tag
+
} ## end closure make_alignment_patterns
sub make_paren_name {
return $name;
} ## end sub make_paren_name
-{ ## begin closure final_indentation_adjustment
+{ ## begin closure get_final_indentation
my ( $last_indentation_written, $last_unadjusted_indentation,
$last_leading_token );
- sub initialize_final_indentation_adjustment {
+ sub initialize_get_final_indentation {
$last_indentation_written = 0;
$last_unadjusted_indentation = 0;
$last_leading_token = EMPTY_STRING;
return;
- }
+ } ## end sub initialize_get_final_indentation
- sub final_indentation_adjustment {
+ sub get_final_indentation {
- #--------------------------------------------------------------------
- # This routine sets the final indentation of a line in the Formatter.
- #--------------------------------------------------------------------
+ my (
+ $self, #
+
+ $ibeg,
+ $iend,
+ $rfields,
+ $rpatterns,
+ $ri_first,
+ $ri_last,
+ $rindentation_list,
+ $level_jump,
+ $starting_in_quote,
+ $is_static_block_comment,
+
+ ) = @_;
+
+ #--------------------------------------------------------------
+ # This routine makes any necessary adjustments to get the final
+ # indentation of a line in the Formatter.
+ #--------------------------------------------------------------
# It starts with the basic indentation which has been defined for the
# leading token, and then takes into account any options that the user
# undo_ci, which was processed earlier, so care has to be taken to
# keep them coordinated.
- my (
- $self, $ibeg,
- $iend, $rfields,
- $rpatterns, $ri_first,
- $ri_last, $rindentation_list,
- $level_jump, $starting_in_quote,
- $is_static_block_comment,
- ) = @_;
-
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $ris_bli_container = $self->[_ris_bli_container_];
- my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
- my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
- my $rK_weld_left = $self->[_rK_weld_left_];
-
# Find the last code token of this line
my $i_terminal = $iend;
my $terminal_type = $types_to_go[$iend];
}
}
- my $terminal_block_type = $block_type_to_go[$i_terminal];
- my $is_outdented_line = 0;
+ my $is_outdented_line;
my $type_beg = $types_to_go[$ibeg];
my $token_beg = $tokens_to_go[$ibeg];
- my $block_type_beg = $block_type_to_go[$ibeg];
my $level_beg = $levels_to_go[$ibeg];
+ my $block_type_beg = $block_type_to_go[$ibeg];
my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
- my $K_beg = $K_to_go[$ibeg];
my $seqno_beg = $type_sequence_to_go[$ibeg];
- my $ibeg_weld_fix = $ibeg;
my $is_closing_type_beg = $is_closing_type{$type_beg};
- my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
# QW INDENTATION PATCH 3:
my $seqno_qw_closing;
# }
#
- # MOJO: Set a flag if this lines begins with ')->'
+ # MOJO patch: Set a flag if this lines begins with ')->'
my $leading_paren_arrow = (
$is_closing_type_beg
&& $token_beg eq ')'
# 2 - vertically align with opening token
# 3 - indent
#---------------------------------------------------------
+
my $adjust_indentation = 0;
- my $default_adjust_indentation = $adjust_indentation;
+ my $default_adjust_indentation = 0;
+ # Parameters needed for option 2, aligning with opening token:
my (
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
);
- # Honor any flag to reduce -ci set by the -bbxi=n option
- if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
+ #-------------------------------------
+ # Section 1A:
+ # if line starts with a sequenced item
+ #-------------------------------------
+ if ( $seqno_beg || $seqno_qw_closing ) {
+
+ # This can be tedious so we let a sub do it
+ (
+ $adjust_indentation,
+ $default_adjust_indentation,
+ $opening_indentation,
+ $opening_offset,
+ $is_leading,
+ $opening_exists,
+
+ ) = $self->get_closing_token_indentation(
+
+ $ibeg,
+ $iend,
+ $ri_first,
+ $ri_last,
+ $rindentation_list,
+ $level_jump,
+ $i_terminal,
+ $is_semicolon_terminated,
+ $seqno_qw_closing,
+
+ );
+ }
+
+ #--------------------------------------------------------
+ # Section 1B:
+ # if at ');', '};', '>;', and '];' of a terminal qw quote
+ #--------------------------------------------------------
+ elsif (
+ substr( $rpatterns->[0], 0, 2 ) eq 'qb'
+ && substr( $rfields->[0], -1, 1 ) eq ';'
+ ## $rpatterns->[0] =~ /^qb*;$/
+ && $rfields->[0] =~ /^([\)\}\]\>]);$/
+ )
+ {
+ if ( $closing_token_indentation{$1} == 0 ) {
+ $adjust_indentation = 1;
+ }
+ else {
+ $adjust_indentation = 3;
+ }
+ }
+
+ #---------------------------------------------------------
+ # Section 2: set indentation according to flag set above
+ #
+ # Select the indentation object to define leading
+ # whitespace. If we are outdenting something like '} } );'
+ # then we want to use one level below the last token
+ # ($i_terminal) in order to get it to fully outdent through
+ # all levels.
+ #---------------------------------------------------------
+ my $indentation;
+ my $lev;
+ my $level_end = $levels_to_go[$iend];
+
+ #------------------------------------
+ # Section 2A: adjust_indentation == 0
+ # No change in indentation
+ #------------------------------------
+ if ( $adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_beg;
+ $lev = $level_beg;
+ }
+
+ #-------------------------------------------------------------------
+ # Secton 2B: adjust_indentation == 1
+ # Change the indentation to be that of a different token on the line
+ #-------------------------------------------------------------------
+ elsif ( $adjust_indentation == 1 ) {
+
+ # Previously, the indentation of the terminal token was used:
+ # OLD CODING:
+ # $indentation = $reduced_spaces_to_go[$i_terminal];
+ # $lev = $levels_to_go[$i_terminal];
+
+ # Generalization for MOJO patch:
+ # Use the lowest level indentation of the tokens on the line.
+ # For example, here we can use the indentation of the ending ';':
+ # } until ($selection > 0 and $selection < 10); # ok to use ';'
+ # But this will not outdent if we use the terminal indentation:
+ # )->then( sub { # use indentation of the ->, not the {
+ # Warning: reduced_spaces_to_go[] may be a reference, do not
+ # do numerical checks with it
+
+ my $i_ind = $ibeg;
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ while ( $i_ind < $i_terminal ) {
+ $i_ind++;
+ if ( $levels_to_go[$i_ind] < $lev ) {
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ }
+ }
+ }
+
+ #--------------------------------------------------------------
+ # Secton 2C: adjust_indentation == 2
+ # Handle indented closing token which aligns with opening token
+ #--------------------------------------------------------------
+ elsif ( $adjust_indentation == 2 ) {
+
+ # handle option to align closing token with opening token
+ $lev = $level_beg;
+
+ # calculate spaces needed to align with opening token
+ my $space_count =
+ get_spaces($opening_indentation) + $opening_offset;
+
+ # Indent less than the previous line.
+ #
+ # Problem: For -lp we don't exactly know what it was if there
+ # were recoverable spaces sent to the aligner. A good solution
+ # would be to force a flush of the vertical alignment buffer, so
+ # that we would know. For now, this rule is used for -lp:
+ #
+ # When the last line did not start with a closing token we will
+ # be optimistic that the aligner will recover everything wanted.
+ #
+ # This rule will prevent us from breaking a hierarchy of closing
+ # tokens, and in a worst case will leave a closing paren too far
+ # indented, but this is better than frequently leaving it not
+ # indented enough.
+ my $last_spaces = get_spaces($last_indentation_written);
- # if this is an opening, it must be alone on the line ...
- if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
- $adjust_indentation = 1;
+ if ( ref($last_indentation_written)
+ && !$is_closing_token{$last_leading_token} )
+ {
+ $last_spaces +=
+ get_recoverable_spaces($last_indentation_written);
}
- # ... or a single welded unit (fix for b1173)
- elsif ($total_weld_count) {
- my $Kterm = $K_to_go[$i_terminal];
- my $Kterm_test = $rK_weld_left->{$Kterm};
- if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
- $Kterm = $Kterm_test;
- }
- if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
- }
- }
+ # reset the indentation to the new space count if it works
+ # only options are all or none: nothing in-between looks good
+ $lev = $level_beg;
- # Update the $is_bli flag as we go. It is initially 1.
- # We note seeing a leading opening brace by setting it to 2.
- # If we get to the closing brace without seeing the opening then we
- # turn it off. This occurs if the opening brace did not get output
- # at the start of a line, so we will then indent the closing brace
- # in the default way.
- if ( $is_bli_beg && $is_bli_beg == 1 ) {
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_opening = $K_opening_container->{$seqno_beg};
- if ( $K_beg eq $K_opening ) {
- $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
+ my $diff = $last_spaces - $space_count;
+ if ( $diff > 0 ) {
+ $indentation = $space_count;
}
- else { $is_bli_beg = 0 }
- }
+ else {
- # QW PATCH for the combination -lp -wn
- # For -lp formatting use $ibeg_weld_fix to get around the problem
- # that with -lp type formatting the opening and closing tokens to not
- # have sequence numbers.
- if ( $seqno_qw_closing && $total_weld_count ) {
- my $i_plus = $inext_to_go[$ibeg];
- if ( $i_plus <= $max_index_to_go ) {
- my $K_plus = $K_to_go[$i_plus];
- if ( defined( $rK_weld_left->{$K_plus} ) ) {
- $ibeg_weld_fix = $i_plus;
+ # We need to fix things ... but there is no good way to do it.
+ # The best solution is for the user to use a longer maximum
+ # line length. We could get a smooth variation if we just move
+ # the paren in using
+ # $space_count -= ( 1 - $diff );
+ # But unfortunately this can give a rather unbalanced look.
+
+ # For -xlp we currently allow a tolerance of one indentation
+ # level and then revert to a simpler default. This will jump
+ # suddenly but keeps a balanced look.
+ if ( $rOpts_extended_line_up_parentheses
+ && $diff >= -$rOpts_indent_columns
+ && $space_count > $leading_spaces_beg )
+ {
+ $indentation = $space_count;
+ }
+
+ # Otherwise revert to defaults
+ elsif ( $default_adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_beg;
+ }
+ elsif ( $default_adjust_indentation == 1 ) {
+ $indentation = $reduced_spaces_to_go[$i_terminal];
+ $lev = $levels_to_go[$i_terminal];
}
}
}
- # if we are at a closing token of some type..
- if ( $is_closing_type_beg || $seqno_qw_closing ) {
-
- # get the indentation of the line containing the corresponding
- # opening token
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
- $ri_last, $rindentation_list, $seqno_qw_closing );
+ #-------------------------------------------------------------
+ # Secton 2D: adjust_indentation == 3
+ # Full indentation of closing tokens (-icb and -icp or -cti=2)
+ #-------------------------------------------------------------
+ else {
- my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
+ # handle -icb (indented closing code block braces)
+ # Updated method for indented block braces: indent one full level if
+ # there is no continuation indentation. This will occur for major
+ # structures such as sub, if, else, but not for things like map
+ # blocks.
+ #
+ # Note: only code blocks without continuation indentation are
+ # handled here (if, else, unless, ..). In the following snippet,
+ # the terminal brace of the sort block will have continuation
+ # indentation as shown so it will not be handled by the coding
+ # here. We would have to undo the continuation indentation to do
+ # this, but it probably looks ok as is. This is a possible future
+ # update for semicolon terminated lines.
+ #
+ # if ($sortby eq 'date' or $sortby eq 'size') {
+ # @files = sort {
+ # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
+ # or $a cmp $b
+ # } @files;
+ # }
+ #
+ if ( $block_type_beg
+ && $ci_levels_to_go[$i_terminal] == 0 )
+ {
+ my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
+ $indentation = $spaces + $rOpts_indent_columns;
- # First set the default behavior:
- if (
+ # NOTE: for -lp we could create a new indentation object, but
+ # there is probably no need to do it
+ }
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
- $is_semicolon_terminated
+ # handle -icp and any -icb block braces which fall through above
+ # test such as the 'sort' block mentioned above.
+ else {
- # and 'cuddled parens' of the form: ")->pack("
- # Bug fix for RT #123749]: the types here were
- # incorrectly '(' and ')'. Corrected to be '{' and '}'
- || (
- $terminal_type eq '{'
- && $type_beg eq '}'
- && ( $nesting_depth_to_go[$iend] + 1 ==
- $nesting_depth_to_go[$ibeg] )
- )
+ # There are currently two ways to handle -icp...
+ # One way is to use the indentation of the previous line:
+ # $indentation = $last_indentation_written;
- # remove continuation indentation for any line like
- # } ... {
- # or without ending '{' and unbalanced, such as
- # such as '}->{$operator}'
- || (
- $type_beg eq '}'
+ # The other way is to use the indentation that the previous line
+ # would have had if it hadn't been adjusted:
+ $indentation = $last_unadjusted_indentation;
- && ( $types_to_go[$iend] eq '{'
- || $levels_to_go[$iend] < $level_beg )
- )
+ # Current method: use the minimum of the two. This avoids
+ # inconsistent indentation.
+ if ( get_spaces($last_indentation_written) <
+ get_spaces($indentation) )
+ {
+ $indentation = $last_indentation_written;
+ }
+ }
- # and when the next line is at a lower indentation level...
+ # use previous indentation but use own level
+ # to cause list to be flushed properly
+ $lev = $level_beg;
+ }
- # PATCH #1: and only if the style allows undoing continuation
- # for all closing token types. We should really wait until
- # the indentation of the next line is known and then make
- # a decision, but that would require another pass.
+ #-------------------------------------------------------------
+ # Remember indentation except for multi-line quotes, which get
+ # no indentation
+ #-------------------------------------------------------------
+ if ( !( $ibeg == 0 && $starting_in_quote ) ) {
+ $last_indentation_written = $indentation;
+ $last_unadjusted_indentation = $leading_spaces_beg;
+ $last_leading_token = $token_beg;
- # PATCH #2: and not if this token is under -xci control
- || ( $level_jump < 0
- && !$some_closing_token_indentation
- && !$rseqno_controlling_my_ci->{$K_beg} )
+ # Patch to make a line which is the end of a qw quote work with the
+ # -lp option. Make $token_beg look like a closing token as some
+ # type even if it is not. This variable will become
+ # $last_leading_token at the end of this loop. Then, if the -lp
+ # style is selected, and the next line is also a
+ # closing token, it will not get more indentation than this line.
+ # We need to do this because qw quotes (at present) only get
+ # continuation indentation, not one level of indentation, so we
+ # need to turn off the -lp indentation.
- # Patch for -wn=2, multiple welded closing tokens
- || ( $i_terminal > $ibeg
- && $is_closing_type{ $types_to_go[$iend] } )
+ # ... a picture is worth a thousand words:
- # Alternate Patch for git #51, isolated closing qw token not
- # outdented if no-delete-old-newlines is set. This works, but
- # a more general patch elsewhere fixes the real problem: ljump.
- # || ( $seqno_qw_closing && $ibeg == $i_terminal )
+ # perltidy -wn -gnu (Without this patch):
+ # ok(defined(
+ # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
+ # 2981014)])
+ # ));
- )
+ # perltidy -wn -gnu (With this patch):
+ # ok(defined(
+ # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
+ # 2981014)])
+ # ));
+ if ( $seqno_qw_closing
+ && ( length($token_beg) > 1 || $token_beg eq '>' ) )
{
- $adjust_indentation = 1;
+ $last_leading_token = ')';
}
+ }
- # outdent something like '),'
- if (
- $terminal_type eq ','
+ #---------------------------------------------------------------------
+ # Rule: lines with leading closing tokens should not be outdented more
+ # than the line which contained the corresponding opening token.
+ #---------------------------------------------------------------------
- # Removed this constraint for -wn
- # OLD: allow just one character before the comma
- # && $i_terminal == $ibeg + 1
+ # Updated per bug report in alex_bug.pl: we must not
+ # mess with the indentation of closing logical braces, so
+ # we must treat something like '} else {' as if it were
+ # an isolated brace
+ my $is_isolated_block_brace = $block_type_beg
+ && ( $i_terminal == $ibeg
+ || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
+ );
- # require LIST environment; otherwise, we may outdent too much -
- # this can happen in calls without parentheses (overload.t);
- && $terminal_is_in_list
- )
- {
- $adjust_indentation = 1;
- }
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
- # undo continuation indentation of a terminal closing token if
- # it is the last token before a level decrease. This will allow
- # a closing token to line up with its opening counterpart, and
- # avoids an indentation jump larger than 1 level.
- if ( $i_terminal == $ibeg
- && $is_closing_type_beg
- && defined($K_beg)
- && $K_beg < $Klimit )
- {
- my $K_plus = $K_beg + 1;
- my $type_plus = $rLL->[$K_plus]->[_TYPE_];
+ if (
+ defined($opening_indentation)
+ && !$leading_paren_arrow # MOJO patch
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon
+ )
+ {
+ if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
+ $indentation = $opening_indentation;
+ }
+ }
- if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
- $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
- }
+ #----------------------------------------------------
+ # remember the indentation of each line of this batch
+ #----------------------------------------------------
+ push @{$rindentation_list}, $indentation;
- if ( $type_plus eq '#' && $K_plus < $Klimit ) {
- $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
- if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
- $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
- }
+ #---------------------------------------------
+ # outdent lines with certain leading tokens...
+ #---------------------------------------------
+ if (
- # Note: we have skipped past just one comment (perhaps a
- # side comment). There could be more, and we could easily
- # skip past all the rest with the following code, or with a
- # while loop. It would be rare to have to do this, and
- # those block comments would still be indented, so it would
- # to leave them indented. So it seems best to just stop at
- # a maximum of one comment.
- ##if ($type_plus eq '#') {
- ## $K_plus = $self->K_next_code($K_plus);
- ##}
- }
+ # must be first word of this batch
+ $ibeg == 0
- if ( !$is_bli_beg && defined($K_plus) ) {
- my $lev = $level_beg;
- my $level_next = $rLL->[$K_plus]->[_LEVEL_];
+ # and ...
+ && (
- # and do not undo ci if it was set by the -xci option
- $adjust_indentation = 1
- if ( $level_next < $lev
- && !$rseqno_controlling_my_ci->{$K_beg} );
- }
+ # certain leading keywords if requested
+ $rOpts_outdent_keywords
+ && $type_beg eq 'k'
+ && $outdent_keyword{$token_beg}
- # Patch for RT #96101, in which closing brace of anonymous subs
- # was not outdented. We should look ahead and see if there is
- # a level decrease at the next token (i.e., a closing token),
- # but right now we do not have that information. For now
- # we see if we are in a list, and this works well.
- # See test files 'sub*.t' for good test cases.
- if ( $terminal_is_in_list
- && !$rOpts_indent_closing_brace
- && $block_type_beg
- && $block_type_beg =~ /$ASUB_PATTERN/ )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first,
- $ri_last, $rindentation_list );
- my $indentation = $leading_spaces_beg;
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
- }
+ # or labels if requested
+ || $rOpts_outdent_labels && $type_beg eq 'J'
- # YVES patch 1 of 2:
- # Undo ci of line with leading closing eval brace,
- # but not beyond the indentation of the line with
- # the opening brace.
- if (
- $block_type_beg eq 'eval'
- ##&& !$rOpts_line_up_parentheses
- && !ref($leading_spaces_beg)
- && !$rOpts_indent_closing_brace
- )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- my $indentation = $leading_spaces_beg;
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
+ # or static block comments if requested
+ || $is_static_block_comment
+ && $rOpts_outdent_static_block_comments
+ )
+ )
+ {
+ my $space_count = leading_spaces_to_go($ibeg);
+ if ( $space_count > 0 ) {
+ $space_count -= $rOpts_continuation_indentation;
+ $is_outdented_line = 1;
+ if ( $space_count < 0 ) { $space_count = 0 }
+
+ # do not promote a spaced static block comment to non-spaced;
+ # this is not normally necessary but could be for some
+ # unusual user inputs (such as -ci = -i)
+ if ( $type_beg eq '#' && $space_count == 0 ) {
+ $space_count = 1;
}
+
+ $indentation = $space_count;
}
+ }
- # patch for issue git #40: -bli setting has priority
- $adjust_indentation = 0 if ($is_bli_beg);
+ return (
- $default_adjust_indentation = $adjust_indentation;
+ $indentation,
+ $lev,
+ $level_end,
+ $i_terminal,
+ $is_outdented_line,
- # Now modify default behavior according to user request:
- # handle option to indent non-blocks of the form ); }; ];
- # But don't do special indentation to something like ')->pack('
- if ( !$block_type_beg ) {
+ );
+ } ## end sub get_final_indentation
- # Note that logical padding has already been applied, so we may
- # need to remove some spaces to get a valid hash key.
- my $tok = $token_beg;
- my $cti = $closing_token_indentation{$tok};
+ sub get_closing_token_indentation {
- # Fix the value of 'cti' for an isolated non-welded closing qw
- # delimiter.
- if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
+ # Determine indentation adjustment for a line with a leading closing
+ # token - i.e. one of these: ) ] } :
- # A quote delimiter which is not a container will not have
- # a cti value defined. In this case use the style of a
- # paren. For example
- # my @fars = (
- # qw<
- # far
- # farfar
- # farfars-far
- # >,
- # );
- if ( !defined($cti) && length($tok) == 1 ) {
+ my (
+ $self, #
+
+ $ibeg,
+ $iend,
+ $ri_first,
+ $ri_last,
+ $rindentation_list,
+ $level_jump,
+ $i_terminal,
+ $is_semicolon_terminated,
+ $seqno_qw_closing,
- # something other than ')', '}', ']' ; use flag for ')'
- $cti = $closing_token_indentation{')'};
+ ) = @_;
- # But for now, do not outdent non-container qw
- # delimiters because it would would change existing
- # formatting.
- if ( $tok ne '>' ) { $cti = 3 }
- }
+ my $adjust_indentation = 0;
+ my $default_adjust_indentation = $adjust_indentation;
+ my $terminal_type = $types_to_go[$i_terminal];
- # A non-welded closing qw cannot currently use -cti=1
- # because that option requires a sequence number to find
- # the opening indentation, and qw quote delimiters are not
- # sequenced items.
- if ( defined($cti) && $cti == 1 ) { $cti = 0 }
- }
+ my $type_beg = $types_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $level_beg = $levels_to_go[$ibeg];
+ my $block_type_beg = $block_type_to_go[$ibeg];
+ my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
+ my $seqno_beg = $type_sequence_to_go[$ibeg];
+ my $is_closing_type_beg = $is_closing_type{$type_beg};
- if ( !defined($cti) ) {
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
- # $cti may not be defined for several reasons.
- # -padding may have been applied so the character
- # has a length > 1
- # - we may have welded to a closing quote token.
- # Here is an example (perltidy -wn):
- # __PACKAGE__->load_components( qw(
- # > Core
- # >
- # > ) );
- $adjust_indentation = 0;
+ # Honor any flag to reduce -ci set by the -bbxi=n option
+ if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) {
- }
- elsif ( $cti == 1 ) {
- if ( $i_terminal <= $ibeg + 1
- || $is_semicolon_terminated )
- {
- $adjust_indentation = 2;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 2 ) {
- if ($is_semicolon_terminated) {
- $adjust_indentation = 3;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 3 ) {
- $adjust_indentation = 3;
- }
+ # if this is an opening, it must be alone on the line ...
+ if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
+ $adjust_indentation = 1;
}
- # handle option to indent blocks
- else {
- if (
- $rOpts_indent_closing_brace
- && (
- $i_terminal == $ibeg # isolated terminal '}'
- || $is_semicolon_terminated
- )
- ) # } xxxx ;
- {
- $adjust_indentation = 3;
+ # ... or a single welded unit (fix for b1173)
+ elsif ($total_weld_count) {
+ my $K_beg = $K_to_go[$ibeg];
+ my $Kterm = $K_to_go[$i_terminal];
+ my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm};
+ if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
+ $Kterm = $Kterm_test;
}
+ if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
}
}
- # if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif (
- substr( $rpatterns->[0], 0, 2 ) eq 'qb'
- && substr( $rfields->[0], -1, 1 ) eq ';'
- ##&& $rpatterns->[0] =~ /^qb*;$/
- && $rfields->[0] =~ /^([\)\}\]\>]);$/
- )
- {
- if ( $closing_token_indentation{$1} == 0 ) {
- $adjust_indentation = 1;
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
+
+ # Update the $is_bli flag as we go. It is initially 1.
+ # We note seeing a leading opening brace by setting it to 2.
+ # If we get to the closing brace without seeing the opening then we
+ # turn it off. This occurs if the opening brace did not get output
+ # at the start of a line, so we will then indent the closing brace
+ # in the default way.
+ if ( $is_bli_beg && $is_bli_beg == 1 ) {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_opening = $K_opening_container->{$seqno_beg};
+ my $K_beg = $K_to_go[$ibeg];
+ if ( $K_beg eq $K_opening ) {
+ $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
}
- else {
- $adjust_indentation = 3;
+ else { $is_bli_beg = 0 }
+ }
+
+ # QW PATCH for the combination -lp -wn
+ # For -lp formatting use $ibeg_weld_fix to get around the problem
+ # that with -lp type formatting the opening and closing tokens to not
+ # have sequence numbers.
+ my $ibeg_weld_fix = $ibeg;
+ if ( $seqno_qw_closing && $total_weld_count ) {
+ my $i_plus = $inext_to_go[$ibeg];
+ if ( $i_plus <= $max_index_to_go ) {
+ my $K_plus = $K_to_go[$i_plus];
+ if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) {
+ $ibeg_weld_fix = $i_plus;
+ }
}
}
- # if line begins with a ':', align it with any
- # previous line leading with corresponding ?
- elsif ( $type_beg eq ':' ) {
+ # if we are at a closing token of some type..
+ if ( $is_closing_type_beg || $seqno_qw_closing ) {
+
+ my $K_beg = $K_to_go[$ibeg];
+
+ # get the indentation of the line containing the corresponding
+ # opening token
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- if ($is_leading) { $adjust_indentation = 2; }
- }
-
- #---------------------------------------------------------
- # Section 2: set indentation according to flag set above
- #
- # Select the indentation object to define leading
- # whitespace. If we are outdenting something like '} } );'
- # then we want to use one level below the last token
- # ($i_terminal) in order to get it to fully outdent through
- # all levels.
- #---------------------------------------------------------
- my $indentation;
- my $lev;
- my $level_end = $levels_to_go[$iend];
-
- if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_beg;
- $lev = $level_beg;
- }
- elsif ( $adjust_indentation == 1 ) {
+ = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
+ $ri_last, $rindentation_list, $seqno_qw_closing );
- # Change the indentation to be that of a different token on the line
- # Previously, the indentation of the terminal token was used:
- # OLD CODING:
- # $indentation = $reduced_spaces_to_go[$i_terminal];
- # $lev = $levels_to_go[$i_terminal];
+ # Patch for rt144979, part 1. Coordinated with part 2.
+ # Do not undo ci for a cuddled closing brace control; it
+ # needs to be treated exactly the same ci as an isolated
+ # closing brace.
+ my $is_cuddled_closing_brace = $seqno_beg
+ && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
- # Generalization for MOJO:
- # Use the lowest level indentation of the tokens on the line.
- # For example, here we can use the indentation of the ending ';':
- # } until ($selection > 0 and $selection < 10); # ok to use ';'
- # But this will not outdent if we use the terminal indentation:
- # )->then( sub { # use indentation of the ->, not the {
- # Warning: reduced_spaces_to_go[] may be a reference, do not
- # do numerical checks with it
+ # First set the default behavior:
+ if (
- my $i_ind = $ibeg;
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- while ( $i_ind < $i_terminal ) {
- $i_ind++;
- if ( $levels_to_go[$i_ind] < $lev ) {
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- }
- }
- }
+ # default behavior is to outdent closing lines
+ # of the form: "); }; ]; )->xxx;"
+ $is_semicolon_terminated
- # handle indented closing token which aligns with opening token
- elsif ( $adjust_indentation == 2 ) {
+ # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT
+ # #123749]: the TYPES here were incorrectly ')' and '('. The
+ # corrected TYPES are '}' and '{'. But skip a cuddled block.
+ || (
+ $terminal_type eq '{'
+ && $type_beg eq '}'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
+ && !$is_cuddled_closing_brace
+ )
- # handle option to align closing token with opening token
- $lev = $level_beg;
+ # remove continuation indentation for any line like
+ # } ... {
+ # or without ending '{' and unbalanced, such as
+ # such as '}->{$operator}'
+ || (
+ $type_beg eq '}'
- # calculate spaces needed to align with opening token
- my $space_count =
- get_spaces($opening_indentation) + $opening_offset;
+ && ( $types_to_go[$iend] eq '{'
+ || $levels_to_go[$iend] < $level_beg )
- # Indent less than the previous line.
- #
- # Problem: For -lp we don't exactly know what it was if there
- # were recoverable spaces sent to the aligner. A good solution
- # would be to force a flush of the vertical alignment buffer, so
- # that we would know. For now, this rule is used for -lp:
- #
- # When the last line did not start with a closing token we will
- # be optimistic that the aligner will recover everything wanted.
- #
- # This rule will prevent us from breaking a hierarchy of closing
- # tokens, and in a worst case will leave a closing paren too far
- # indented, but this is better than frequently leaving it not
- # indented enough.
- my $last_spaces = get_spaces($last_indentation_written);
+ # but not if a cuddled block
+ && !$is_cuddled_closing_brace
+ )
- if ( ref($last_indentation_written)
- && !$is_closing_token{$last_leading_token} )
- {
- $last_spaces +=
- get_recoverable_spaces($last_indentation_written);
- }
+ # and when the next line is at a lower indentation level...
- # reset the indentation to the new space count if it works
- # only options are all or none: nothing in-between looks good
- $lev = $level_beg;
+ # PATCH #1: and only if the style allows undoing continuation
+ # for all closing token types. We should really wait until
+ # the indentation of the next line is known and then make
+ # a decision, but that would require another pass.
- my $diff = $last_spaces - $space_count;
- if ( $diff > 0 ) {
- $indentation = $space_count;
- }
- else {
+ # PATCH #2: and not if this token is under -xci control
+ || ( $level_jump < 0
+ && !$some_closing_token_indentation
+ && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} )
- # We need to fix things ... but there is no good way to do it.
- # The best solution is for the user to use a longer maximum
- # line length. We could get a smooth variation if we just move
- # the paren in using
- # $space_count -= ( 1 - $diff );
- # But unfortunately this can give a rather unbalanced look.
+ # Patch for -wn=2, multiple welded closing tokens
+ || ( $i_terminal > $ibeg
+ && $is_closing_type{ $types_to_go[$iend] } )
- # For -xlp we currently allow a tolerance of one indentation
- # level and then revert to a simpler default. This will jump
- # suddenly but keeps a balanced look.
- if ( $rOpts_extended_line_up_parentheses
- && $diff >= -$rOpts_indent_columns
- && $space_count > $leading_spaces_beg )
- {
- $indentation = $space_count;
- }
+ # Alternate Patch for git #51, isolated closing qw token not
+ # outdented if no-delete-old-newlines is set. This works, but
+ # a more general patch elsewhere fixes the real problem: ljump.
+ # || ( $seqno_qw_closing && $ibeg == $i_terminal )
- # Otherwise revert to defaults
- elsif ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_beg;
- }
- elsif ( $default_adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
- }
+ )
+ {
+ $adjust_indentation = 1;
}
- }
- # Full indentation of closing tokens (-icb and -icp or -cti=2)
- else {
+ # outdent something like '),'
+ if (
+ $terminal_type eq ','
- # handle -icb (indented closing code block braces)
- # Updated method for indented block braces: indent one full level if
- # there is no continuation indentation. This will occur for major
- # structures such as sub, if, else, but not for things like map
- # blocks.
- #
- # Note: only code blocks without continuation indentation are
- # handled here (if, else, unless, ..). In the following snippet,
- # the terminal brace of the sort block will have continuation
- # indentation as shown so it will not be handled by the coding
- # here. We would have to undo the continuation indentation to do
- # this, but it probably looks ok as is. This is a possible future
- # update for semicolon terminated lines.
- #
- # if ($sortby eq 'date' or $sortby eq 'size') {
- # @files = sort {
- # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
- # or $a cmp $b
- # } @files;
- # }
- #
- if ( $block_type_beg
- && $ci_levels_to_go[$i_terminal] == 0 )
- {
- my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
- $indentation = $spaces + $rOpts_indent_columns;
+ # Removed this constraint for -wn
+ # OLD: allow just one character before the comma
+ # && $i_terminal == $ibeg + 1
- # NOTE: for -lp we could create a new indentation object, but
- # there is probably no need to do it
+ # require LIST environment; otherwise, we may outdent too much -
+ # this can happen in calls without parentheses (overload.t);
+ && $self->is_in_list_by_i($i_terminal)
+ )
+ {
+ $adjust_indentation = 1;
}
- # handle -icp and any -icb block braces which fall through above
- # test such as the 'sort' block mentioned above.
- else {
-
- # There are currently two ways to handle -icp...
- # One way is to use the indentation of the previous line:
- # $indentation = $last_indentation_written;
-
- # The other way is to use the indentation that the previous line
- # would have had if it hadn't been adjusted:
- $indentation = $last_unadjusted_indentation;
+ # undo continuation indentation of a terminal closing token if
+ # it is the last token before a level decrease. This will allow
+ # a closing token to line up with its opening counterpart, and
+ # avoids an indentation jump larger than 1 level.
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ if ( $i_terminal == $ibeg
+ && $is_closing_type_beg
+ && defined($K_beg)
+ && $K_beg < $Klimit )
+ {
+ my $K_plus = $K_beg + 1;
+ my $type_plus = $rLL->[$K_plus]->[_TYPE_];
- # Current method: use the minimum of the two. This avoids
- # inconsistent indentation.
- if ( get_spaces($last_indentation_written) <
- get_spaces($indentation) )
- {
- $indentation = $last_indentation_written;
+ if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
}
- }
- # use previous indentation but use own level
- # to cause list to be flushed properly
- $lev = $level_beg;
- }
+ if ( $type_plus eq '#' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ }
- # remember indentation except for multi-line quotes, which get
- # no indentation
- unless ( $ibeg == 0 && $starting_in_quote ) {
- $last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_beg;
- $last_leading_token = $token_beg;
+ # Note: we have skipped past just one comment (perhaps a
+ # side comment). There could be more, and we could easily
+ # skip past all the rest with the following code, or with a
+ # while loop. It would be rare to have to do this, and
+ # those block comments would still be indented, so it would
+ # to leave them indented. So it seems best to just stop at
+ # a maximum of one comment.
+ ##if ($type_plus eq '#') {
+ ## $K_plus = $self->K_next_code($K_plus);
+ ##}
+ }
- # Patch to make a line which is the end of a qw quote work with the
- # -lp option. Make $token_beg look like a closing token as some
- # type even if it is not. This variable will become
- # $last_leading_token at the end of this loop. Then, if the -lp
- # style is selected, and the next line is also a
- # closing token, it will not get more indentation than this line.
- # We need to do this because qw quotes (at present) only get
- # continuation indentation, not one level of indentation, so we
- # need to turn off the -lp indentation.
+ if ( !$is_bli_beg && defined($K_plus) ) {
+ my $lev = $level_beg;
+ my $level_next = $rLL->[$K_plus]->[_LEVEL_];
- # ... a picture is worth a thousand words:
+ # and do not undo ci if it was set by the -xci option
+ $adjust_indentation = 1
+ if ( $level_next < $lev
+ && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} );
+ }
- # perltidy -wn -gnu (Without this patch):
- # ok(defined(
- # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
- # 2981014)])
- # ));
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( !$rOpts_indent_closing_brace
+ && $block_type_beg
+ && $self->[_ris_asub_block_]->{$seqno_beg}
+ && $self->is_in_list_by_i($i_terminal) )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first,
+ $ri_last, $rindentation_list );
+ my $indentation = $leading_spaces_beg;
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
+ }
- # perltidy -wn -gnu (With this patch):
- # ok(defined(
- # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
- # 2981014)])
- # ));
- if ( $seqno_qw_closing
- && ( length($token_beg) > 1 || $token_beg eq '>' ) )
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indentation of the line with
+ # the opening brace.
+ if ( $block_type_beg
+ && $block_type_beg eq 'eval'
+ && !ref($leading_spaces_beg)
+ && !$rOpts_indent_closing_brace )
{
- $last_leading_token = ')';
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_beg;
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
}
- }
- # be sure lines with leading closing tokens are not outdented more
- # than the line which contained the corresponding opening token.
+ # patch for issue git #40: -bli setting has priority
+ $adjust_indentation = 0 if ($is_bli_beg);
- #--------------------------------------------------------
- # updated per bug report in alex_bug.pl: we must not
- # mess with the indentation of closing logical braces so
- # we must treat something like '} else {' as if it were
- # an isolated brace
- #--------------------------------------------------------
- my $is_isolated_block_brace = $block_type_beg
- && ( $i_terminal == $ibeg
- || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
- );
+ $default_adjust_indentation = $adjust_indentation;
- # only do this for a ':; which is aligned with its leading '?'
- my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
+ # Now modify default behavior according to user request:
+ # handle option to indent non-blocks of the form ); }; ];
+ # But don't do special indentation to something like ')->pack('
+ if ( !$block_type_beg ) {
- if (
- defined($opening_indentation)
- && !$leading_paren_arrow # MOJO
- && !$is_isolated_block_brace
- && !$is_unaligned_colon
- )
- {
- if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
- $indentation = $opening_indentation;
- }
- }
+ # Note that logical padding has already been applied, so we may
+ # need to remove some spaces to get a valid hash key.
+ my $tok = $token_beg;
+ my $cti = $closing_token_indentation{$tok};
- # remember the indentation of each line of this batch
- push @{$rindentation_list}, $indentation;
+ # Fix the value of 'cti' for an isolated non-welded closing qw
+ # delimiter.
+ if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
- # outdent lines with certain leading tokens...
- if (
+ # A quote delimiter which is not a container will not have
+ # a cti value defined. In this case use the style of a
+ # paren. For example
+ # my @fars = (
+ # qw<
+ # far
+ # farfar
+ # farfars-far
+ # >,
+ # );
+ if ( !defined($cti) && length($tok) == 1 ) {
- # must be first word of this batch
- $ibeg == 0
+ # something other than ')', '}', ']' ; use flag for ')'
+ $cti = $closing_token_indentation{')'};
- # and ...
- && (
+ # But for now, do not outdent non-container qw
+ # delimiters because it would would change existing
+ # formatting.
+ if ( $tok ne '>' ) { $cti = 3 }
+ }
- # certain leading keywords if requested
- $rOpts_outdent_keywords
- && $type_beg eq 'k'
- && $outdent_keyword{$token_beg}
+ # A non-welded closing qw cannot currently use -cti=1
+ # because that option requires a sequence number to find
+ # the opening indentation, and qw quote delimiters are not
+ # sequenced items.
+ if ( defined($cti) && $cti == 1 ) { $cti = 0 }
+ }
- # or labels if requested
- || $rOpts_outdent_labels && $type_beg eq 'J'
+ if ( !defined($cti) ) {
- # or static block comments if requested
- || $is_static_block_comment
- && $rOpts_outdent_static_block_comments
- )
- )
- {
- my $space_count = leading_spaces_to_go($ibeg);
- if ( $space_count > 0 ) {
- $space_count -= $rOpts_continuation_indentation;
- $is_outdented_line = 1;
- if ( $space_count < 0 ) { $space_count = 0 }
+ # $cti may not be defined for several reasons.
+ # -padding may have been applied so the character
+ # has a length > 1
+ # - we may have welded to a closing quote token.
+ # Here is an example (perltidy -wn):
+ # __PACKAGE__->load_components( qw(
+ # > Core
+ # >
+ # > ) );
+ $adjust_indentation = 0;
- # do not promote a spaced static block comment to non-spaced;
- # this is not normally necessary but could be for some
- # unusual user inputs (such as -ci = -i)
- if ( $type_beg eq '#' && $space_count == 0 ) {
- $space_count = 1;
}
+ elsif ( $cti == 1 ) {
+ if ( $i_terminal <= $ibeg + 1
+ || $is_semicolon_terminated )
+ {
+ $adjust_indentation = 2;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 2 ) {
+ if ($is_semicolon_terminated) {
+ $adjust_indentation = 3;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 3 ) {
+ $adjust_indentation = 3;
+ }
+ }
- $indentation = $space_count;
+ # handle option to indent blocks
+ else {
+ if (
+ $rOpts_indent_closing_brace
+ && (
+ $i_terminal == $ibeg # isolated terminal '}'
+ || $is_semicolon_terminated
+ )
+ ) # } xxxx ;
+ {
+ $adjust_indentation = 3;
+ }
}
+ } ## end if ( $is_closing_type_beg || $seqno_qw_closing )
+
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $type_beg eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
}
- return ( $indentation, $lev, $level_end, $terminal_type,
- $terminal_block_type, $is_semicolon_terminated,
- $is_outdented_line );
- } ## end sub final_indentation_adjustment
-} ## end closure final_indentation_adjustment
+ return (
+
+ $adjust_indentation,
+ $default_adjust_indentation,
+ $opening_indentation,
+ $opening_offset,
+ $is_leading,
+ $opening_exists,
+
+ );
+ } ## end sub get_closing_token_indentation
+} ## end closure get_final_indentation
sub get_opening_indentation {
# $rindentation_list - reference to a list containing the indentation
# used for each line.
# $qw_seqno - optional sequence number to use if normal seqno not defined
- # (TODO: would be more general to just look this up from index i)
+ # (NOTE: would be more general to just look this up from index i)
#
# return:
# -the indentation of the line which contained the opening token
return ( $indent, $offset, $is_leading, $exists );
} ## end sub get_opening_indentation
+sub examine_vertical_tightness_flags {
+ my ($self) = @_;
+
+ # For efficiency, we will set a flag to skip all calls to sub
+ # 'set_vertical_tightness_flags' if vertical tightness is not possible with
+ # the user input parameters. If vertical tightness is possible, we will
+ # simply leave the flag undefined and return.
+
+ # Vertical tightness is never possible with --freeze-whitespace
+ if ($rOpts_freeze_whitespace) {
+ $self->[_no_vertical_tightness_flags_] = 1;
+ return;
+ }
+
+ # This sub is coordinated with sub set_vertical_tightness_flags.
+ # The Section numbers in the following comments are the sections
+ # in sub set_vertical_tightness_flags:
+
+ # Examine controls for Section 1a:
+ return if ($rOpts_line_up_parentheses);
+
+ foreach my $key ( keys %opening_vertical_tightness ) {
+ return if ( $opening_vertical_tightness{$key} );
+ }
+
+ # Examine controls for Section 1b:
+ foreach my $key ( keys %closing_vertical_tightness ) {
+ return if ( $closing_vertical_tightness{$key} );
+ }
+
+ # Examine controls for Section 1c:
+ foreach my $key ( keys %opening_token_right ) {
+ return if ( $opening_token_right{$key} );
+ }
+
+ # Examine controls for Section 1d:
+ foreach my $key ( keys %stack_opening_token ) {
+ return if ( $stack_opening_token{$key} );
+ }
+ foreach my $key ( keys %stack_closing_token ) {
+ return if ( $stack_closing_token{$key} );
+ }
+
+ # Examine controls for Section 2:
+ return if ($rOpts_block_brace_vertical_tightness);
+
+ # Examine controls for Section 3:
+ return if ($rOpts_stack_closing_block_brace);
+
+ # None of the controls used for vertical tightness are set, so
+ # we can skip all calls to sub set_vertical_tightness_flags
+ $self->[_no_vertical_tightness_flags_] = 1;
+ return;
+} ## end sub examine_vertical_tightness_flags
+
sub set_vertical_tightness_flags {
my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
= @_;
# Define vertical tightness controls for the nth line of a batch.
+ # Note: do not call this sub for a block comment or if
+ # $rOpts_freeze_whitespace is set.
# These parameters are passed to the vertical aligner to indicated
# if we should combine this line with the next line to achieve the
# continually increase if we allowed it when the -fws flag is set.
# See case b499 for an example.
- # Speedup: just return for a comment
- if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
- return;
- }
-
# Define these values...
my $vt_type = 0;
my $vt_opening_flag = 0;
my $vt_min_lines = 0;
my $vt_max_lines = 0;
- goto RETURN
- if ($rOpts_freeze_whitespace);
-
# Uses these global parameters:
# $rOpts_block_brace_tightness
# $rOpts_block_brace_vertical_tightness
# $rOpts_stack_closing_block_brace
+ # $rOpts_line_up_parentheses
# %opening_vertical_tightness
# %closing_vertical_tightness
# %opening_token_right
if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
&& $is_closing_type{$type_end_next} );
- # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
- # See similar patch above for $cvt.
+ # The flag '_rbreak_container_' avoids conflict of -bom and -pt=1
+ # or -pt=2; fixes b1270. See similar patch above for $cvt.
my $seqno = $type_sequence_to_go[$iend];
- if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
+ if ( $ovt
+ && $seqno
+ && $self->[_rbreak_container_]->{$seqno} )
+ {
$ovt = 0;
}
- if ( $ovt == 2
- && $self->[_rreduce_vertical_tightness_by_seqno_]->{$seqno} )
- {
- $ovt = 1;
+ # The flag '_rmax_vertical_tightness_' avoids welding conflicts.
+ if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) {
+ $ovt =
+ min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} );
}
unless (
&& $is_closing_token{$token_next}
&& $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
{
- my $ovt = $opening_vertical_tightness{$token_next};
my $cvt = $closing_vertical_tightness{$token_next};
# Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
# See similar patch above for $ovt.
my $seqno = $type_sequence_to_go[$ibeg_next];
- if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
+ if ( $cvt && $self->[_rbreak_container_]->{$seqno} ) {
$cvt = 0;
}
$cvt = 1;
}
+ # Fix for b1379, b1380, b1381, b1382, b1384 part 2,
+ # instablility with adding and deleting trailing commas:
+ # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380.
+ # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382.
+ # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384
+ if ( $cvt
+ && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} )
+ {
+ $cvt = 0;
+ }
+
if (
# Never append a trailing line like ')->pack(' because it
# allow closing up 2-line method calls
|| ( $rOpts_line_up_parentheses
&& $token_next eq ')'
+ && $type_sequence_to_go[$ibeg_next]
&& $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$ibeg_next] } )
)
my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
if ( $rOpts_line_up_parentheses
&& $total_weld_count
+ && $seqno_ibeg_next
&& $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
&& $self->is_welded_at_seqno($seqno_ibeg_next) )
{
&& $token_end ne '||' && $token_end ne '&&'
# Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
+ # Generalized from '=' to $is_assignment to fix b1375.
&& !(
- $token_end eq '='
+ $is_assignment{ $types_to_go[$iend] }
&& $rOpts_line_up_parentheses
+ && $type_sequence_to_go[$ibeg_next]
&& $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$ibeg_next] }
)
# looks bad if we align vertically with the wrong container
&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
+
+ # give -kba priority over -otr (b1445)
+ && !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] }
)
{
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
my $seq_next = $type_sequence_to_go[$ibeg_next];
$stackable = $stack_closing_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next]
- || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
+ || $seq_next && $self->[_rbreak_container_]->{$seq_next} );
}
elsif ($is_opening_token{$token_end}
&& $is_opening_token{$token_beg_next} )
elsif ($rOpts_block_brace_vertical_tightness
&& $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
+ && $block_type_to_go[$iend]
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/ )
{
# get the sequence numbers of the ends of this line
$vt_seqno_beg = $type_sequence_to_go[$ibeg];
- if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
- $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
+ if ( !$vt_seqno_beg ) {
+ if ( $types_to_go[$ibeg] eq 'q' ) {
+ $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
+ }
+ else { $vt_seqno_beg = EMPTY_STRING }
}
$vt_seqno_end = $type_sequence_to_go[$iend];
- if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
- $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
+ if ( !$vt_seqno_end ) {
+ if ( $types_to_go[$iend] eq 'q' ) {
+ $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
+ }
+ else { $vt_seqno_end = EMPTY_STRING }
}
- RETURN:
+ if ( !defined($vt_seqno) ) { $vt_seqno = EMPTY_STRING }
my $rvertical_tightness_flags = {
_vt_type => $vt_type,
&& (
(
$i + 1 <= $max_index_to_go
+ && $block_type_to_go[ $i + 1 ]
&& $block_type_to_go[ $i + 1 ] eq
$accumulating_text_for_block
)
|| ( $i + 2 <= $max_index_to_go
+ && $block_type_to_go[ $i + 2 ]
&& $block_type_to_go[ $i + 2 ] eq
$accumulating_text_for_block )
)
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
+ $block_type = EMPTY_STRING unless ($block_type);
# remember last nonblank token type
if ( $type ne '#' && $type ne 'b' ) {
')' => '(',
']' => '[',
);
- }
+ } ## end BEGIN
sub balance_csc_text {
# ..and the corresponding opening brace must is not in this batch
# (because we do not need to tag one-line blocks, although this
# should also be caught with a positive -csci value)
- && $mate_index_to_go[$i_terminal] < 0
+ && !defined( $mate_index_to_go[$i_terminal] )
# ..and either
&& (
}
$cscw_block_comment =
"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
-## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
}
}
- else {
- # No differences.. we can safely delete old comment if we
- # are below the threshold
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
+ # No differences.. we can safely delete old comment if we
+ # are below the threshold
+ elsif ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ # Since the line breaks have already been set, we have
+ # to remove the token from the _to_go array and also
+ # from the line range (this fixes issue c081).
+ # Note that we can only get here if -cscw has been set
+ # because otherwise the old comment is already deleted.
+ $token = undef;
+ my $ibeg = $ri_first->[-1];
+ my $iend = $ri_last->[-1];
+ if ( $iend > $ibeg
+ && $iend == $max_index_to_go
+ && $types_to_go[$max_index_to_go] eq '#' )
{
- # Since the line breaks have already been set, we have
- # to remove the token from the _to_go array and also
- # from the line range (this fixes issue c081).
- # Note that we can only get here if -cscw has been set
- # because otherwise the old comment is already deleted.
- $token = undef;
- my $ibeg = $ri_first->[-1];
- my $iend = $ri_last->[-1];
+ $iend--;
+ $max_index_to_go--;
if ( $iend > $ibeg
- && $iend == $max_index_to_go
- && $types_to_go[$max_index_to_go] eq '#' )
+ && $types_to_go[$max_index_to_go] eq 'b' )
{
$iend--;
$max_index_to_go--;
- if ( $iend > $ibeg
- && $types_to_go[$max_index_to_go] eq 'b' )
- {
- $iend--;
- $max_index_to_go--;
- }
- $ri_last->[-1] = $iend;
}
+ $ri_last->[-1] = $iend;
}
}
}
# This is the last routine called when a file is formatted.
# Flush buffer and write any informative messages
- my $self = shift;
+ my ( $self, $severe_error ) = @_;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->report_line_length_errors();
- $self->[_converged_] = $file_writer_object->get_convergence_check()
+ # Define the formatter self-check for convergence.
+ $self->[_converged_] =
+ $severe_error
+ || $file_writer_object->get_convergence_check()
|| $rOpts->{'indent-only'};
return;