# CODE SECTION 9: Process batches of code
# sub grind_batch_of_CODE
# CODE SECTION 10: Code to break long statments
-# sub set_continuation_breaks
+# sub break_long_lines
# CODE SECTION 11: Code to break long lists
-# sub scan_list
+# sub break_lists
# CODE SECTION 12: Code for setting indentation
-# CODE SECTION 13: Preparing batches for vertical alignment
-# sub send_lines_to_vertical_aligner
+# CODE SECTION 13: Preparing batch of lines for vertical alignment
+# sub convey_batch_to_vertical_aligner
# CODE SECTION 14: Code for creating closing side comments
# sub add_closing_side_comment
# CODE SECTION 15: Summarize
use strict;
use warnings;
-# this can be turned on for extra checking during development
+# This flag gets switched on during automated testing for extra checking
use constant DEVEL_MODE => 0;
{ #<<< A non-indenting brace to contain all lexical variables
use Carp;
-our $VERSION = '20210717';
+our $VERSION = '20220217';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
-This is probably an error introduced by a recent programming change.
+This is probably an error introduced by a recent programming change.
Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
==============================================================================
EOM
# Global variables ...
my (
- ##################################################################
+ #-----------------------------------------------------------------
# Section 1: Global variables which are either always constant or
# are constant after being configured by user-supplied
# parameters. They remain constant as a file is being processed.
- ##################################################################
+ #-----------------------------------------------------------------
# user parameters and shortcuts
$rOpts,
- $rOpts_closing_side_comment_maximum_text,
- $rOpts_continuation_indentation,
- $rOpts_indent_columns,
- $rOpts_line_up_parentheses,
- $rOpts_maximum_line_length,
- $rOpts_variable_maximum_line_length,
+ $rOpts_add_newlines,
+ $rOpts_add_whitespace,
+ $rOpts_blank_lines_after_opening_block,
$rOpts_block_brace_tightness,
$rOpts_block_brace_vertical_tightness,
- $rOpts_stack_closing_block_brace,
- $rOpts_maximum_consecutive_blank_lines,
-
- $rOpts_recombine,
- $rOpts_add_newlines,
+ $rOpts_break_after_labels,
+ $rOpts_break_at_old_attribute_breakpoints,
$rOpts_break_at_old_comma_breakpoints,
- $rOpts_ignore_old_breakpoints,
-
- $rOpts_keep_interior_semicolons,
- $rOpts_comma_arrow_breakpoints,
- $rOpts_maximum_fields_per_table,
- $rOpts_one_line_block_semicolons,
+ $rOpts_break_at_old_keyword_breakpoints,
+ $rOpts_break_at_old_logical_breakpoints,
$rOpts_break_at_old_semicolon_breakpoints,
-
- $rOpts_tee_side_comments,
- $rOpts_tee_block_comments,
- $rOpts_tee_pod,
- $rOpts_delete_side_comments,
+ $rOpts_break_at_old_ternary_breakpoints,
+ $rOpts_break_open_paren_list,
+ $rOpts_closing_side_comments,
+ $rOpts_closing_side_comment_else_flag,
+ $rOpts_closing_side_comment_maximum_text,
+ $rOpts_comma_arrow_breakpoints,
+ $rOpts_continuation_indentation,
$rOpts_delete_closing_side_comments,
- $rOpts_format_skipping,
- $rOpts_indent_only,
- $rOpts_static_block_comments,
-
- $rOpts_add_whitespace,
$rOpts_delete_old_whitespace,
+ $rOpts_delete_side_comments,
+ $rOpts_extended_continuation_indentation,
+ $rOpts_format_skipping,
$rOpts_freeze_whitespace,
$rOpts_function_paren_vertical_alignment,
- $rOpts_whitespace_cycle,
+ $rOpts_fuzzy_line_length,
+ $rOpts_ignore_old_breakpoints,
$rOpts_ignore_side_comment_lengths,
-
- $rOpts_break_at_old_attribute_breakpoints,
- $rOpts_break_at_old_keyword_breakpoints,
- $rOpts_break_at_old_logical_breakpoints,
- $rOpts_break_at_old_ternary_breakpoints,
+ $rOpts_indent_closing_brace,
+ $rOpts_indent_columns,
+ $rOpts_indent_only,
+ $rOpts_keep_interior_semicolons,
+ $rOpts_line_up_parentheses,
+ $rOpts_logical_padding,
+ $rOpts_maximum_consecutive_blank_lines,
+ $rOpts_maximum_fields_per_table,
+ $rOpts_maximum_line_length,
+ $rOpts_one_line_block_semicolons,
+ $rOpts_opening_brace_always_on_right,
+ $rOpts_outdent_keywords,
+ $rOpts_outdent_labels,
+ $rOpts_outdent_long_comments,
+ $rOpts_outdent_long_quotes,
+ $rOpts_outdent_static_block_comments,
+ $rOpts_recombine,
$rOpts_short_concatenation_item_length,
- $rOpts_closing_side_comment_else_flag,
- $rOpts_fuzzy_line_length,
+ $rOpts_stack_closing_block_brace,
+ $rOpts_static_block_comments,
+ $rOpts_sub_alias_list,
+ $rOpts_tee_block_comments,
+ $rOpts_tee_pod,
+ $rOpts_tee_side_comments,
+ $rOpts_variable_maximum_line_length,
+ $rOpts_valign,
+ $rOpts_valign_code,
+ $rOpts_valign_side_comments,
+ $rOpts_whitespace_cycle,
+ $rOpts_extended_line_up_parentheses,
# Static hashes initialized in a BEGIN block
%is_assignment,
- %is_keyword_returning_list,
%is_if_unless_and_or_last_next_redo_return,
%is_if_elsif_else_unless_while_until_for_foreach,
- %is_if_unless_while_until_for,
+ %is_if_unless_while_until_for_foreach,
%is_last_next_redo_return,
- %is_sort_map_grep,
- %is_sort_map_grep_eval,
%is_if_unless,
%is_and_or,
%is_chain_operator,
%is_opening_token,
%is_closing_token,
%is_equal_or_fat_comma,
- %is_block_with_ci,
%is_counted_type,
%is_opening_sequence_token,
%is_closing_sequence_token,
%is_anon_sub_1_brace_follower,
%is_other_brace_follower,
+ # Initialized and re-initialized in sub initialize_grep_and_friends;
+ # These can be modified by grep-alias-list
+ %is_sort_map_grep,
+ %is_sort_map_grep_eval,
+ %is_sort_map_grep_eval_do,
+ %is_block_with_ci,
+ %is_keyword_returning_list,
+ %block_type_map,
+
# Initialized in sub initialize_whitespace_hashes;
# Some can be modified according to user parameters.
%binary_ws_rules,
# Initialized in check_options, modified by prepare_cuddled_block_types:
%want_one_line_block,
- %is_braces_left_exclude_block,
# Initialized in sub prepare_cuddled_block_types
$rcuddled_block_types,
%stack_closing_token,
%weld_nested_exclusion_rules,
- %line_up_parentheses_exclusion_rules,
+ %line_up_parentheses_control_hash,
+ $line_up_parentheses_control_is_lxpl,
# regex patterns for text identification.
# Most are initialized in a sub make_**_pattern during configuration.
# Most can be configured by user parameters.
$SUB_PATTERN,
$ASUB_PATTERN,
- $ANYSUB_PATTERN,
$static_block_comment_pattern,
$static_side_comment_pattern,
$format_skipping_pattern_begin,
$format_skipping_pattern_end,
$non_indenting_brace_pattern,
+ $bl_exclusion_pattern,
+ $bl_pattern,
+ $bli_exclusion_pattern,
$bli_pattern,
$block_brace_vertical_tightness_pattern,
$blank_lines_after_opening_block_pattern,
# from level.
@maximum_line_length_at_level,
@maximum_text_length_at_level,
+ $stress_level_alpha,
+ $stress_level_beta,
# Total number of sequence items in a weld, for quick checks
$total_weld_count,
- #########################################################
+ #--------------------------------------------------------
# Section 2: Work arrays for the current batch of tokens.
- #########################################################
+ #--------------------------------------------------------
# These are re-initialized for each batch of code
# in sub initialize_batch_variables.
@levels_to_go,
@leading_spaces_to_go,
@reduced_spaces_to_go,
+ @standard_spaces_to_go,
@mate_index_to_go,
@ci_levels_to_go,
@nesting_depth_to_go,
BEGIN {
- # Initialize constants...
-
- # Array index names for token variables
+ # Index names for token variables.
+ # Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
- _BLOCK_TYPE_ => $i++,
_CI_LEVEL_ => $i++,
_CUMULATIVE_LENGTH_ => $i++,
_LINE_INDEX_ => $i++,
_KNEXT_SEQ_ITEM_ => $i++,
_LEVEL_ => $i++,
- _SLEVEL_ => $i++,
_TOKEN_ => $i++,
_TOKEN_LENGTH_ => $i++,
_TYPE_ => $i++,
# Number of token variables; must be last in list:
_NVARS => $i++,
};
+}
+
+BEGIN {
- # Array index names for $self (which is an array ref)
- $i = 0;
+ # Index names for $self variables.
+ # Do not combine with other BEGIN blocks (c101).
+ my $i = 0;
use constant {
_rlines_ => $i++,
_rlines_new_ => $i++,
_rLL_ => $i++,
_Klimit_ => $i++,
+ _rdepth_of_opening_seqno_ => $i++,
+ _rSS_ => $i++,
+ _Iss_opening_ => $i++,
+ _Iss_closing_ => $i++,
+ _rblock_type_of_seqno_ => $i++,
+ _ris_asub_block_ => $i++,
+ _ris_sub_block_ => $i++,
_K_opening_container_ => $i++,
_K_closing_container_ => $i++,
_K_opening_ternary_ => $i++,
_rhas_broken_code_block_ => $i++,
_rhas_ternary_ => $i++,
_ris_excluded_lp_container_ => $i++,
+ _rlp_object_by_seqno_ => $i++,
_rwant_reduced_ci_ => $i++,
_rno_xci_by_seqno_ => $i++,
+ _rbrace_left_ => $i++,
_ris_bli_container_ => $i++,
_rparent_of_seqno_ => $i++,
_rchildren_of_seqno_ => $i++,
_rspecial_side_comment_type_ => $i++,
- _rseqno_controlling_my_ci_ => $i++,
- _ris_seqno_controlling_ci_ => $i++,
- _save_logfile_ => $i++,
- _maximum_level_ => $i++,
+ _rseqno_controlling_my_ci_ => $i++,
+ _ris_seqno_controlling_ci_ => $i++,
+ _save_logfile_ => $i++,
+ _maximum_level_ => $i++,
+ _maximum_level_at_line_ => $i++,
+ _maximum_BLOCK_level_ => $i++,
+ _maximum_BLOCK_level_at_line_ => $i++,
_rKrange_code_without_comments_ => $i++,
_rbreak_before_Kfirst_ => $i++,
_rending_multiline_qw_seqno_by_K_ => $i++,
_rKrange_multiline_qw_by_seqno_ => $i++,
_rmultiline_qw_has_extra_level_ => $i++,
- _rbreak_before_container_by_seqno_ => $i++,
- _ris_essential_old_breakpoint_ => $i++,
- _roverride_cab3_ => $i++,
- _ris_assigned_structure_ => $i++,
+
+ _rcollapsed_length_by_seqno_ => $i++,
+ _rbreak_before_container_by_seqno_ => $i++,
+ _ris_essential_old_breakpoint_ => $i++,
+ _roverride_cab3_ => $i++,
+ _ris_assigned_structure_ => $i++,
+
+ _LAST_SELF_INDEX_ => $i - 1,
};
+}
+
+BEGIN {
- # Array index names for _this_batch_ (in above list)
- # So _this_batch_ is a sub-array of $self for
- # holding the batches of tokens being processed.
- $i = 0;
+ # Index names for batch variables.
+ # Do not combine with other BEGIN blocks (c101).
+ # These are stored in _this_batch_, which is a sub-array of $self.
+ my $i = 0;
use constant {
- _starting_in_quote_ => $i++,
- _ending_in_quote_ => $i++,
- _is_static_block_comment_ => $i++,
- _rlines_K_ => $i++,
- _do_not_pad_ => $i++,
- _ibeg0_ => $i++,
- _peak_batch_size_ => $i++,
- _max_index_to_go_ => $i++,
- _rK_to_go_ => $i++,
- _batch_count_ => $i++,
- _rix_seqno_controlling_ci_ => $i++,
- _batch_CODE_type_ => $i++,
+ _starting_in_quote_ => $i++,
+ _ending_in_quote_ => $i++,
+ _is_static_block_comment_ => $i++,
+ _ri_first_ => $i++,
+ _ri_last_ => $i++,
+ _do_not_pad_ => $i++,
+ _peak_batch_size_ => $i++,
+ _max_index_to_go_ => $i++,
+ _batch_count_ => $i++,
+ _rix_seqno_controlling_ci_ => $i++,
+ _batch_CODE_type_ => $i++,
+ _ri_starting_one_line_block_ => $i++,
};
+}
+
+BEGIN {
# Sequence number assigned to the root of sequence tree.
# The minimum of the actual sequences numbers is 4, so we can use 1
# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;
- # increment between sequence numbers for each type
- # For example, ?: pairs might have numbers 7,11,15,...
- use constant TYPE_SEQUENCE_INCREMENT => 4;
+ # This is the decimal range of printable characters in ASCII. It is used to
+ # make quick preliminary checks before resorting to using a regex.
+ use constant ORD_PRINTABLE_MIN => 33;
+ use constant ORD_PRINTABLE_MAX => 126;
# Initialize constant hashes ...
my @q;
);
@is_assignment{@q} = (1) x scalar(@q);
- @q = qw(
- grep
- keys
- map
- reverse
- sort
- split
- );
- @is_keyword_returning_list{@q} = (1) x scalar(@q);
-
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
@is_if_elsif_else_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
- @q = qw(if unless while until for);
- @is_if_unless_while_until_for{@q} =
+ @q = qw(if unless while until for foreach);
+ @is_if_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
@q = qw(last next redo return);
@is_last_next_redo_return{@q} = (1) x scalar(@q);
- @q = qw(sort map grep);
- @is_sort_map_grep{@q} = (1) x scalar(@q);
-
- @q = qw(sort map grep eval);
- @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
+ # Map related block names into a common name to allow vertical alignment
+ # used by sub make_alignment_patterns. Note: this is normally unchanged,
+ # but it contains 'grep' and can be re-initized in
+ # sub initialize_grep_and_friends in a testing mode.
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
@q = qw(if unless);
@is_if_unless{@q} = (1) x scalar(@q);
@q = qw< } ) ] : >;
@is_closing_sequence_token{@q} = (1) x scalar(@q);
- # a hash needed by sub scan_list for labeling containers
+ # a hash needed by sub break_lists for labeling containers
@q = qw( k => && || ? : . );
@is_container_label_type{@q} = (1) x scalar(@q);
push @q, ',';
@is_counted_type{@q} = (1) x scalar(@q);
- # These block types can take ci. This is used by the -xci option.
- # Note that the 'sub' in this list is an anonymous sub. To be more correct
- # we could remove sub and use ASUB pattern to also handle a
- # prototype/signature. But that would slow things down and would probably
- # never be useful.
- @q = qw( do sub eval sort map grep );
- @is_block_with_ci{@q} = (1) x scalar(@q);
-
}
-{ ## begin closure to count instanes
+{ ## begin closure to count instances
# methods to count instances
my $_count = 0;
sub get_count { return $_count; }
sub _increment_count { return ++$_count }
sub _decrement_count { return --$_count }
-} ## end closure to count instanes
+} ## end closure to count instances
sub new {
# initialize closure variables...
set_logger_object($logger_object);
set_diagnostics_object($diagnostics_object);
- initialize_gnu_vars();
+ initialize_lp_vars();
initialize_csc_vars();
- initialize_scan_list();
- initialize_saved_opening_indentation();
+ initialize_break_lists();
initialize_undo_ci();
initialize_process_line_of_CODE();
initialize_grind_batch_of_CODE();
- initialize_adjusted_indentation();
+ initialize_final_indentation_adjustment();
initialize_postponed_breakpoint();
initialize_batch_variables();
initialize_forced_breakpoint_vars();
- initialize_gnu_batch_vars();
initialize_write_line();
my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
$self->[_rlines_] = []; # = ref to array of lines of the file
$self->[_rlines_new_] = []; # = ref to array of output lines
- # 'rLL' = reference to the liner array of all tokens in the file.
+ # 'rLL' = reference to the continuous liner array of all tokens in a file.
# 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
- # 'LL' stuck because it is easy to type.
+ # 'LL' stuck because it is easy to type. The 'rLL' array is updated
+ # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
+ # with '$K' by convention.
$self->[_rLL_] = [];
$self->[_Klimit_] = undef; # = maximum K index for rLL.
- # Arrays for quickly traversing the structures
+ # Indexes into the rLL list
$self->[_K_opening_container_] = {};
$self->[_K_closing_container_] = {};
$self->[_K_opening_ternary_] = {};
# Array of phantom semicolons, in case we ever need to undo them
$self->[_rK_phantom_semicolons_] = undef;
+ # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
+ # numbers with + or - indicating opening or closing. This list represents
+ # the entire container tree and is invariant under reformatting. It can be
+ # used to quickly travel through the tree. Indexes in the rSS array begin
+ # with '$I' by convention. The 'Iss' arrays give the indexes in this list
+ # of opening and closing sequence numbers.
+ $self->[_rSS_] = [];
+ $self->[_Iss_opening_] = [];
+ $self->[_Iss_closing_] = [];
+
+ # Arrays to help traverse the tree
+ $self->[_rdepth_of_opening_seqno_] = [];
+ $self->[_rblock_type_of_seqno_] = {};
+ $self->[_ris_asub_block_] = {};
+ $self->[_ris_sub_block_] = {};
+
# Mostly list characteristics and processing flags
$self->[_rtype_count_by_seqno_] = {};
$self->[_ris_function_call_paren_] = {};
$self->[_rhas_broken_code_block_] = {};
$self->[_rhas_ternary_] = {};
$self->[_ris_excluded_lp_container_] = {};
+ $self->[_rlp_object_by_seqno_] = {};
$self->[_rwant_reduced_ci_] = {};
$self->[_rno_xci_by_seqno_] = {};
+ $self->[_rbrace_left_] = {};
$self->[_ris_bli_container_] = {};
$self->[_rparent_of_seqno_] = {};
$self->[_rchildren_of_seqno_] = {};
$self->[_in_tabbing_disagreement_] = 0;
$self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
$self->[_saw_END_or_DATA_] = 0;
+ $self->[_first_brace_tabbing_disagreement_] = undef;
+ $self->[_in_brace_tabbing_disagreement_] = undef;
# Hashes related to container welding...
$self->[_radjusted_levels_] = [];
$self->[_rseqno_controlling_my_ci_] = {};
$self->[_ris_seqno_controlling_ci_] = {};
- $self->[_rspecial_side_comment_type_] = {};
- $self->[_maximum_level_] = 0;
+ $self->[_rspecial_side_comment_type_] = {};
+ $self->[_maximum_level_] = 0;
+ $self->[_maximum_level_at_line_] = 0;
+ $self->[_maximum_BLOCK_level_] = 0;
+ $self->[_maximum_BLOCK_level_at_line_] = 0;
$self->[_rKrange_code_without_comments_] = [];
$self->[_rbreak_before_Kfirst_] = {};
$self->[_rKrange_multiline_qw_by_seqno_] = {};
$self->[_rmultiline_qw_has_extra_level_] = {};
+ $self->[_rcollapsed_length_by_seqno_] = {};
$self->[_rbreak_before_container_by_seqno_] = {};
$self->[_ris_essential_old_breakpoint_] = {};
$self->[_roverride_cab3_] = {};
# This flag will be updated later by a call to get_save_logfile()
$self->[_save_logfile_] = defined($logger_object);
+ # Be sure all variables in $self have been initialized above. To find the
+ # correspondence of index numbers and array names, copy a list to a file
+ # and use the unix 'nl' command to number lines 1..
+ if (DEVEL_MODE) {
+ my @non_existant;
+ foreach ( 0 .. _LAST_SELF_INDEX_ ) {
+ if ( !exists( $self->[$_] ) ) {
+ push @non_existant, $_;
+ }
+ }
+ if (@non_existant) {
+ Fault("These indexes in self not initialized: (@non_existant)\n");
+ }
+ }
+
bless $self, $class;
# Safety check..this is not a class yet
# CODE SECTION 2: Some Basic Utilities
######################################
+sub check_rLL {
+
+ # Verify that the rLL array has not been auto-vivified
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $num = @{$rLL};
+ if ( ( defined($Klimit) && $Klimit != $num - 1 )
+ || ( !defined($Klimit) && $num > 0 ) )
+ {
+
+ # This fault can occur if the array has been accessed for an index
+ # greater than $Klimit, which is the last token index. Just accessing
+ # the array above index $Klimit, not setting a value, can cause @rLL to
+ # increase beyond $Klimit. If this occurs, the problem can be located
+ # by making calls to this routine at different locations in
+ # sub 'finish_formatting'.
+ $Klimit = 'undef' if ( !defined($Klimit) );
+ $msg = "" unless $msg;
+ Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
+ }
+ return;
+}
+
+sub check_keys {
+ my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
+
+ # Check the keys of a hash:
+ # $rtest = ref to hash to test
+ # $rvalid = ref to hash with valid keys
+
+ # $msg = a message to write in case of error
+ # $exact_match defines the type of check:
+ # = false: test hash must not have unknown key
+ # = true: test hash must have exactly same keys as known hash
+ my @unknown_keys =
+ grep { !exists $rvalid->{$_} } keys %{$rtest};
+ my @missing_keys =
+ grep { !exists $rtest->{$_} } keys %{$rvalid};
+ my $error = @unknown_keys;
+ if ($exact_match) { $error ||= @missing_keys }
+ if ($error) {
+ local $" = ')(';
+ my @expected_keys = sort keys %{$rvalid};
+ @unknown_keys = sort @unknown_keys;
+ Fault(<<EOM);
+------------------------------------------------------------------------
+Program error detected checking hash keys
+Message is: '$msg'
+Expected keys: (@expected_keys)
+Unknown key(s): (@unknown_keys)
+Missing key(s): (@missing_keys)
+------------------------------------------------------------------------
+EOM
+ }
+ return;
+}
+
+sub check_token_array {
+ my $self = shift;
+
+ # Check for errors in the array of tokens. This is only called
+ # when the DEVEL_MODE flag is set, so this Fault will only occur
+ # during code development.
+ my $rLL = $self->[_rLL_];
+ for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+ my $nvars = @{ $rLL->[$KK] };
+ if ( $nvars != _NVARS ) {
+ my $NVARS = _NVARS;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ $type = '*' unless defined($type);
+
+ # The number of variables per token node is _NVARS and was set when
+ # the array indexes were generated. So if the number of variables
+ # is different we have done something wrong, like not store all of
+ # them in sub 'write_line' when they were received from the
+ # tokenizer.
+ Fault(
+"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
+ );
+ }
+ foreach my $var ( _TOKEN_, _TYPE_ ) {
+ if ( !defined( $rLL->[$KK]->[$var] ) ) {
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+
+ # This is a simple check that each token has some basic
+ # variables. In other words, that there are no holes in the
+ # array of tokens. Sub 'write_line' pushes tokens into the
+ # $rLL array, so this should guarantee no gaps.
+ Fault("Undefined variable $var for K=$KK, line=$iline\n");
+ }
+ }
+ }
+ return;
+}
+
+{ ## begin closure check_line_hashes
+
+ # This code checks that no autovivification occurs in the 'line' hash
+
+ my %valid_line_hash;
+
+ BEGIN {
+
+ # These keys are defined for each line in the formatter
+ # Each line must have exactly these quantities
+ my @valid_line_keys = qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _rK_range
+ _square_bracket_depth
+ _starting_in_quote
+ _ended_in_blank_token
+ _code_type
+
+ _ci_level_0
+ _level_0
+ _nesting_blocks_0
+ _nesting_tokens_0
+ );
+
+ @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
+ }
+
+ sub check_line_hashes {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $rline ( @{$rlines} ) {
+ my $iline = $rline->{_line_number};
+ my $line_type = $rline->{_line_type};
+ check_keys( $rline, \%valid_line_hash,
+ "Checkpoint: line number =$iline, line_type=$line_type", 1 );
+ }
+ return;
+ }
+} ## end closure check_line_hashes
+
{ ## begin closure for logger routines
my $logger_object;
return;
}
- sub report_definite_bug {
- if ($logger_object) {
- $logger_object->report_definite_bug();
- }
- return;
- }
-
sub get_saw_brace_error {
if ($logger_object) {
return $logger_object->get_saw_brace_error();
return $vao->get_output_line_number();
}
-sub check_token_array {
- my $self = shift;
-
- # Check for errors in the array of tokens. This is only called now
- # when the DEVEL_MODE flag is set, so this Fault will only occur
- # during code development.
- my $rLL = $self->[_rLL_];
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
- my $nvars = @{ $rLL->[$KK] };
- if ( $nvars != _NVARS ) {
- my $NVARS = _NVARS;
- my $type = $rLL->[$KK]->[_TYPE_];
- $type = '*' unless defined($type);
-
- # The number of variables per token node is _NVARS and was set when
- # the array indexes were generated. So if the number of variables
- # is different we have done something wrong, like not store all of
- # them in sub 'write_line' when they were received from the
- # tokenizer.
- Fault(
-"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
- );
- }
- foreach my $var ( _TOKEN_, _TYPE_ ) {
- if ( !defined( $rLL->[$KK]->[$var] ) ) {
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
-
- # This is a simple check that each token has some basic
- # variables. In other words, that there are no holes in the
- # array of tokens. Sub 'write_line' pushes tokens into the
- # $rLL array, so this should guarantee no gaps.
- Fault("Undefined variable $var for K=$KK, line=$iline\n");
- }
- }
- }
- return;
-}
-
sub want_blank_line {
my $self = shift;
$self->flush();
$vao->get_cached_line_count();
}
-sub trim {
-
- # trim leading and trailing whitespace from a string
- my $str = shift;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return $str;
-}
-
sub max {
my (@vals) = @_;
my $max = shift @vals;
initialize_whitespace_hashes();
initialize_bond_strength_hashes();
+ # This function must be called early to get hashes with grep initialized
+ initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
+
# Make needed regex patterns for matching text.
# NOTE: sub_matching_patterns must be made first because later patterns use
# them; see RT #133130.
}
make_bli_pattern();
+ make_bl_pattern();
make_block_brace_vertical_tightness_pattern();
make_blank_line_pattern();
make_keyword_group_list_pattern();
# Make initial list of desired one line block types
# They will be modified by 'prepare_cuddled_block_types'
+ # NOTE: this line must come after is_sort_map_grep_eval is
+ # initialized in sub 'initialize_grep_and_friends'
%want_one_line_block = %is_sort_map_grep_eval;
- # Default is to exclude one-line block types from -bl formatting
- # FIXME: Eventually a flag should be added to modify this.
- %is_braces_left_exclude_block = %is_sort_map_grep_eval;
-
prepare_cuddled_block_types();
if ( $rOpts->{'dump-cuddled-block-list'} ) {
dump_cuddled_block_list(*STDOUT);
Exit(0);
}
+ # -xlp implies -lp
+ if ( $rOpts->{'extended-line-up-parentheses'} ) {
+ $rOpts->{'line-up-parentheses'} ||= 1;
+ }
+
if ( $rOpts->{'line-up-parentheses'} ) {
if ( $rOpts->{'indent-only'}
with these flags.
-----------------------------------------------------------------------
EOM
- $rOpts->{'line-up-parentheses'} = 0;
+ $rOpts->{'line-up-parentheses'} = 0;
+ $rOpts->{'extended-line-up-parentheses'} = 0;
}
if ( $rOpts->{'whitespace-cycle'} ) {
( $lbs, $rbs );
}
}
+ return;
};
my $break_before = sub {
( $lbs, $rbs );
}
}
+ return;
};
$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
$break_before_container_types{'('} = $_ if $_ && $_ > 0;
}
+ #--------------------------------------------------------------
+ # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
+ #--------------------------------------------------------------
+ # The -vmll and -lp parameters do not really work well together.
+ # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
+ # NOTE: we could make this more precise by looking at any exclusion
+ # flags for -lp, and allowing -bbx=2 for excluded types.
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'ignore-old-breakpoints'}
+ && $rOpts->{'line-up-parentheses'} )
+ {
+ my @changed;
+ foreach my $key ( keys %break_before_container_types ) {
+ if ( $break_before_container_types{$key} == 2 ) {
+ $break_before_container_types{$key} = 1;
+ push @changed, $key;
+ }
+ }
+ if (@changed) {
+
+ # we could write a warning here
+ }
+ }
+
+ #-------------------------------------------------------------------
+ # The combination -xlp and -vmll can be unstable unless -iscl is set
+ #-------------------------------------------------------------------
+ # This is a temporary fix for issue b1302. See also b1306, b1310.
+ # FIXME: look for a better fix.
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'extended-line-up-parentheses'}
+ && !$rOpts->{'ignore-side-comment-lengths'} )
+ {
+ $rOpts->{'ignore-side-comment-lengths'} = 1;
+
+ # we could write a warning here
+ }
+
+ #-----------------------------------------------------------
+ # The combination -lp -vmll can be unstable if -ci<2 (b1267)
+ #-----------------------------------------------------------
+ # The -vmll and -lp parameters do not really work well together.
+ # This is a very crude fix for an unusual parameter combination.
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'line-up-parentheses'}
+ && $rOpts->{'continuation-indentation'} < 2 )
+ {
+ $rOpts->{'continuation-indentation'} = 2;
+ ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
+ }
+
%container_indentation_options = ();
foreach my $pair (
[ 'break-before-hash-brace-and-indent', '{' ],
'?' => ':',
);
- # note any requested old line breaks to keep
- %keep_break_before_type = ();
- %keep_break_after_type = ();
- if ( !$rOpts->{'ignore-old-breakpoints'} ) {
+ if ( $rOpts->{'ignore-old-breakpoints'} ) {
- # FIXME: could check for valid types here.
- # Invalid types are harmless but probably not intended.
- my @types;
- @types = ( split_words( $rOpts->{'keep-old-breakpoints-before'} ) );
- @keep_break_before_type{@types} = (1) x scalar(@types);
- @types = ( split_words( $rOpts->{'keep-old-breakpoints-after'} ) );
- @keep_break_after_type{@types} = (1) x scalar(@types);
- }
- else {
+ my @conflicts;
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
- );
$rOpts->{'break-at-old-method-breakpoints'} = 0;
+ push @conflicts, '--break-at-old-method-breakpoints (-bom)';
}
if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
- );
$rOpts->{'break-at-old-comma-breakpoints'} = 0;
+ push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
}
if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
- );
$rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
+ push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
}
if ( $rOpts->{'keep-old-breakpoints-before'} ) {
- Warn("Conflicting parameters: -iob and -kbb; -kbb will be ignored\n"
- );
$rOpts->{'keep-old-breakpoints-before'} = "";
+ push @conflicts, '--keep-old-breakpoints-before (-kbb)';
}
if ( $rOpts->{'keep-old-breakpoints-after'} ) {
- Warn("Conflicting parameters: -iob and -kba; -kba will be ignored\n"
- );
$rOpts->{'keep-old-breakpoints-after'} = "";
+ push @conflicts, '--keep-old-breakpoints-after (-kba)';
+ }
+
+ if (@conflicts) {
+ my $msg = join( "\n ",
+" Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
+ @conflicts )
+ . "\n";
+ Warn($msg);
}
# Note: These additional parameters are made inactive by -iob.
$rOpts->{'break-at-old-attribute-breakpoints'} = 0;
}
- #############################################################
+ %keep_break_before_type = ();
+ initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
+ 'kbb', \%keep_break_before_type );
+
+ %keep_break_after_type = ();
+ initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
+ 'kba', \%keep_break_after_type );
+
+ #------------------------------------------------------------
# Make global vars for frequently used options for efficiency
- #############################################################
+ #------------------------------------------------------------
- $rOpts_closing_side_comment_maximum_text =
- $rOpts->{'closing-side-comment-maximum-text'};
- $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
+ $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ $rOpts_blank_lines_after_opening_block =
+ $rOpts->{'blank-lines-after-opening-block'};
$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
$rOpts_block_brace_vertical_tightness =
$rOpts->{'block-brace-vertical-tightness'};
- $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
- $rOpts_maximum_consecutive_blank_lines =
- $rOpts->{'maximum-consecutive-blank-lines'};
- $rOpts_recombine = $rOpts->{'recombine'};
- $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
+ $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
- $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
- $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ $rOpts_break_at_old_keyword_breakpoints =
+ $rOpts->{'break-at-old-keyword-breakpoints'};
+ $rOpts_break_at_old_logical_breakpoints =
+ $rOpts->{'break-at-old-logical-breakpoints'};
$rOpts_break_at_old_semicolon_breakpoints =
$rOpts->{'break-at-old-semicolon-breakpoints'};
-
- $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
- $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
- $rOpts_tee_pod = $rOpts->{'tee-pod'};
- $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
+ $rOpts_break_at_old_ternary_breakpoints =
+ $rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_break_open_paren_list = $rOpts->{'break-open-paren-list'};
+ $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
+ $rOpts_closing_side_comment_else_flag =
+ $rOpts->{'closing-side-comment-else-flag'};
+ $rOpts_closing_side_comment_maximum_text =
+ $rOpts->{'closing-side-comment-maximum-text'};
+ $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+ $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
$rOpts_delete_closing_side_comments =
$rOpts->{'delete-closing-side-comments'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_indent_only = $rOpts->{'indent-only'};
- $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
-
- $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
$rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
- $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
-
+ $rOpts_extended_continuation_indentation =
+ $rOpts->{'extended-continuation-indentation'};
+ $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
$rOpts_function_paren_vertical_alignment =
$rOpts->{'function-paren-vertical-alignment'};
+ $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
$rOpts_ignore_side_comment_lengths =
$rOpts->{'ignore-side-comment-lengths'};
-
- $rOpts_break_at_old_attribute_breakpoints =
- $rOpts->{'break-at-old-attribute-breakpoints'};
- $rOpts_break_at_old_keyword_breakpoints =
- $rOpts->{'break-at-old-keyword-breakpoints'};
- $rOpts_break_at_old_logical_breakpoints =
- $rOpts->{'break-at-old-logical-breakpoints'};
- $rOpts_break_at_old_ternary_breakpoints =
- $rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
+ $rOpts_indent_columns = $rOpts->{'indent-columns'};
+ $rOpts_indent_only = $rOpts->{'indent-only'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
+ $rOpts_extended_line_up_parentheses =
+ $rOpts->{'extended-line-up-parentheses'};
+ $rOpts_logical_padding = $rOpts->{'logical-padding'};
+ $rOpts_maximum_consecutive_blank_lines =
+ $rOpts->{'maximum-consecutive-blank-lines'};
+ $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
+ $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ $rOpts_opening_brace_always_on_right =
+ $rOpts->{'opening-brace-always-on-right'};
+ $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
+ $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
+ $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
+ $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
+ $rOpts_outdent_static_block_comments =
+ $rOpts->{'outdent-static-block-comments'};
+ $rOpts_recombine = $rOpts->{'recombine'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
- $rOpts_closing_side_comment_else_flag =
- $rOpts->{'closing-side-comment-else-flag'};
- $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
+ $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
+ $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
+ $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'};
+ $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
+ $rOpts_tee_pod = $rOpts->{'tee-pod'};
+ $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
+ $rOpts_valign = $rOpts->{'valign'};
+ $rOpts_valign_code = $rOpts->{'valign-code'};
+ $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
}
}
+ # Define two measures of indentation level, alpha and beta, at which some
+ # formatting features come under stress and need to start shutting down.
+ # Some combination of the two will be used to shut down different
+ # formatting features.
+ # Put a reasonable upper limit on stress level (say 100) in case the
+ # whitespace-cycle variable is used.
+ my $stress_level_limit = min( 100, $level_max );
+
+ # Find stress_level_alpha, targeted at very short maximum line lengths.
+ $stress_level_alpha = $stress_level_limit + 1;
+ foreach my $level_test ( 0 .. $stress_level_limit ) {
+ my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
+ my $excess_inside_space =
+ $max_len -
+ $rOpts_continuation_indentation -
+ $rOpts_indent_columns - 8;
+ if ( $excess_inside_space <= 0 ) {
+ $stress_level_alpha = $level_test;
+ last;
+ }
+ }
+
+ # Find stress level beta, a stress level targeted at formatting
+ # at deep levels near the maximum line length. We start increasing
+ # from zero and stop at the first level which shows no more space.
+
+ # 'const' is a fixed number of spaces for a typical variable.
+ # Cases b1197-b1204 work ok with const=12 but not with const=8
+ my $const = 16;
+ my $denom = max( 1, $rOpts_indent_columns );
+ $stress_level_beta = 0;
+ foreach my $level ( 0 .. $stress_level_limit ) {
+ my $remaining_cycles = max(
+ 0,
+ (
+ $maximum_text_length_at_level[$level] -
+ $rOpts_continuation_indentation - $const
+ ) / $denom
+ );
+ last if ( $remaining_cycles <= 3 ); # 2 does not work
+ $stress_level_beta = $level;
+ }
+
initialize_weld_nested_exclusion_rules($rOpts);
- initialize_line_up_parentheses_exclusion_rules($rOpts);
+
+ %line_up_parentheses_control_hash = ();
+ $line_up_parentheses_control_is_lxpl = 1;
+ my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
+ my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
+ if ( $lpxl && $lpil ) {
+ Warn( <<EOM );
+You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
+EOM
+ }
+ if ($lpxl) {
+ $line_up_parentheses_control_is_lxpl = 1;
+ initialize_line_up_parentheses_control_hash(
+ $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
+ }
+ elsif ($lpil) {
+ $line_up_parentheses_control_is_lxpl = 0;
+ initialize_line_up_parentheses_control_hash(
+ $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
+ }
+
+ return;
+}
+
+use constant ALIGN_GREP_ALIASES => 0;
+
+sub initialize_grep_and_friends {
+ my ($str) = @_;
+
+ # Initialize or re-initialize hashes with 'grep' and grep aliases. This
+ # must be done after each set of options because new grep aliases may be
+ # used.
+
+ # re-initialize the hash ... this is critical!
+ %is_sort_map_grep = ();
+
+ my @q = qw(sort map grep);
+ @is_sort_map_grep{@q} = (1) x scalar(@q);
+
+ # Note that any 'grep-alias-list' string has been preprocessed to be a
+ # trimmed, space-separated list.
+ my @grep_aliases = split /\s+/, $str;
+ @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
+
+ ##@q = qw(sort map grep eval);
+ %is_sort_map_grep_eval = %is_sort_map_grep;
+ $is_sort_map_grep_eval{'eval'} = 1;
+
+ ##@q = qw(sort map grep eval do);
+ %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
+ $is_sort_map_grep_eval_do{'do'} = 1;
+
+ # These block types can take ci. This is used by the -xci option.
+ # Note that the 'sub' in this list is an anonymous sub. To be more correct
+ # we could remove sub and use ASUB pattern to also handle a
+ # prototype/signature. But that would slow things down and would probably
+ # never be useful.
+ ##@q = qw( do sub eval sort map grep );
+ %is_block_with_ci = %is_sort_map_grep_eval_do;
+ $is_block_with_ci{'sub'} = 1;
+
+ %is_keyword_returning_list = ();
+ @q = qw(
+ grep
+ keys
+ map
+ reverse
+ sort
+ split
+ );
+ push @q, @grep_aliases;
+ @is_keyword_returning_list{@q} = (1) x scalar(@q);
+
+ # This code enables vertical alignment of grep aliases for testing. It has
+ # not been found to be beneficial, so it is off by default. But it is
+ # useful for precise testing of the grep alias coding.
+ if (ALIGN_GREP_ALIASES) {
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
+ foreach (@q) {
+ $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
+ }
+ }
return;
}
return;
}
-sub initialize_line_up_parentheses_exclusion_rules {
- my ($rOpts) = @_;
- %line_up_parentheses_exclusion_rules = ();
- my $opt_name = 'line-up-parentheses-exclusion-list';
- my $str = $rOpts->{$opt_name};
+sub initialize_line_up_parentheses_control_hash {
+ my ( $str, $opt_name ) = @_;
return unless ($str);
$str =~ s/^\s+//;
$str =~ s/\s+$//;
next;
}
- if ( !defined( $line_up_parentheses_exclusion_rules{$key} ) ) {
- $line_up_parentheses_exclusion_rules{$key} = [ $flag1, $flag2 ];
+ if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
+ $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
next;
}
# check for multiple conflicting specifications
- my $rflags = $line_up_parentheses_exclusion_rules{$key};
+ my $rflags = $line_up_parentheses_control_hash{$key};
my $err;
if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
$err = 1;
EOM
}
- # Possible speedup: we could turn off -lp if it is not actually used
- my $all_off = 1;
- foreach my $key (qw# ( { [ #) {
- my $rflags = $line_up_parentheses_exclusion_rules{$key};
- if ( defined($rflags) ) {
- my ( $flag1, $flag2 ) = @{$rflags};
- if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
- if ($flag2) { $all_off = 0; last }
+ # Speedup: we can turn off -lp if it is not actually used
+ if ($line_up_parentheses_control_is_lxpl) {
+ my $all_off = 1;
+ foreach my $key (qw# ( { [ #) {
+ my $rflags = $line_up_parentheses_control_hash{$key};
+ if ( defined($rflags) ) {
+ my ( $flag1, $flag2 ) = @{$rflags};
+ if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
+ if ($flag2) { $all_off = 0; last }
+ }
+ }
+ if ($all_off) {
+ $rOpts->{'line-up-parentheses'} = "";
+ }
+ }
+
+ return;
+}
+
+use constant DEBUG_KB => 0;
+
+sub initialize_keep_old_breakpoints {
+ my ( $str, $short_name, $rkeep_break_hash ) = @_;
+ return unless $str;
+
+ my %flags = ();
+ my @list = split_words($str);
+ if ( DEBUG_KB && @list ) {
+ local $" = ' ';
+ print <<EOM;
+DEBUG_KB entering for '$short_name' with str=$str\n";
+list is: @list;
+EOM
+ }
+
+ # - pull out any any leading container code, like f( or *{
+ foreach (@list) {
+ if ( $_ =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
+ $_ = $2;
+ $flags{$2} = $1;
+ }
+ }
+
+ my @unknown_types;
+ foreach my $type (@list) {
+ if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
+ push @unknown_types, $type;
+ }
+ }
+
+ if (@unknown_types) {
+ my $num = @unknown_types;
+ local $" = ' ';
+ Warn(<<EOM);
+$num unrecognized token types were input with --$short_name :
+@unknown_types
+EOM
+ }
+
+ @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
+
+ foreach my $key ( keys %flags ) {
+ my $flag = $flags{$key};
+
+ if ( length($flag) != 1 ) {
+ Warn(<<EOM);
+Multiple entries given for '$key' in '$short_name'
+EOM
+ }
+ elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
+ Warn(<<EOM);
+Unknown flag '$flag' given for '$key' in '$short_name'
+EOM
+ }
+ elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
+ Warn(<<EOM);
+Unknown flag '$flag' given for '$key' in '$short_name'
+EOM
}
+
+ $rkeep_break_hash->{$key} = $flag;
+ }
+
+ # Temporary patch and warning during changeover from using type to token for
+ # containers . This can be eliminated after one or two future releases.
+ if ( $rkeep_break_hash->{'{'}
+ && $rkeep_break_hash->{'{'} eq '1'
+ && !$rkeep_break_hash->{'('}
+ && !$rkeep_break_hash->{'['} )
+ {
+ $rkeep_break_hash->{'('} = 1;
+ $rkeep_break_hash->{'['} = 1;
+ Warn(<<EOM);
+Sorry, but the format for the -kbb and -kba flags is changing a little.
+You entered '{' which currently matches '{' '(' and '[',
+but in the future it will only match '{'.
+To prevent this message please do one of the following:
+ use '{ ( [' if you want to match all opening containers, or
+ use '(' or '[' to match just those containers, or
+ use '*{' to match only opening braces
+EOM
+ }
+
+ if ( $rkeep_break_hash->{'}'}
+ && $rkeep_break_hash->{'}'} eq '1'
+ && !$rkeep_break_hash->{')'}
+ && !$rkeep_break_hash->{']'} )
+ {
+ $rkeep_break_hash->{'('} = 1;
+ $rkeep_break_hash->{'['} = 1;
+ Warn(<<EOM);
+Sorry, but the format for the -kbb and -kba flags is changing a little.
+You entered '}' which currently matches each of '}' ')' and ']',
+but in the future it will only match '}'.
+To prevent this message please do one of the following:
+ use '} ) ]' if you want to match all closing containers, or
+ use ')' or ']' to match just those containers, or
+ use '*}' to match only closing braces
+EOM
}
- if ($all_off) {
- # FIXME: This speedup works but is currently deactivated because at
- # present users of -lp could see some discontinuities in formatting,
- # such as those involving the choice of breaks at '='. Only if/when
- # these issues have been checked and resolved it should be reactivated
- # as a speedup.
- ## $rOpts->{'line-up-parentheses'} = "";
+ if ( DEBUG_KB && @list ) {
+ my @tmp = %flags;
+ local $" = ' ';
+ print <<EOM;
+
+DEBUG_KB -$short_name flag: $str
+final keys: @list
+special flags: @tmp
+EOM
+
}
return;
+
}
sub initialize_whitespace_hashes {
} ## end initialize_whitespace_hashes
+# The following hash is used to skip over needless if tests.
+# Be sure to update it when adding new checks in its block.
+my %is_special_ws_type;
+
+BEGIN {
+ my @q = qw(k w i C m - Q);
+ push @q, '#';
+ @is_special_ws_type{@q} = (1) x scalar(@q);
+}
+
+use constant DEBUG_WHITE => 0;
+
sub set_whitespace_flags {
# This routine is called once per file to set whitespace flags for that
#
my $self = shift;
- my $rLL = $self->[_rLL_];
- use constant DEBUG_WHITE => 0;
+
+ my $rLL = $self->[_rLL_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $jmax = @{$rLL} - 1;
my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
my $rwhitespace_flags = [];
my $ris_function_call_paren = {};
+ return $rwhitespace_flags if ( $jmax < 0 );
+
my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
- my ( $token, $type, $block_type, $seqno, $input_line_no );
- my (
- $last_token, $last_type, $last_block_type,
- $last_seqno, $last_input_line_no
- );
+ my ( $rtokh, $token, $type );
+ my ( $rtokh_last, $last_token, $last_type );
my $j_tight_closing_paren = -1;
- $token = ' ';
- $type = 'b';
- $block_type = '';
- $seqno = '';
- $input_line_no = 0;
- $last_token = ' ';
- $last_type = 'b';
- $last_block_type = '';
- $last_seqno = '';
- $last_input_line_no = 0;
+ $rtokh = [ @{ $rLL->[0] } ];
+ $token = ' ';
+ $type = 'b';
- my $jmax = @{$rLL} - 1;
+ $rtokh->[_TOKEN_] = $token;
+ $rtokh->[_TYPE_] = $type;
+ $rtokh->[_TYPE_SEQUENCE_] = '';
+ $rtokh->[_LINE_INDEX_] = 0;
my ($ws);
$closing_container_inside_ws{$sequence_number} = $ws_flag;
}
}
+ return;
};
- my $ws_opening_container_override = sub {
- my ( $ws, $sequence_number ) = @_;
- return $ws unless (%opening_container_inside_ws);
- if ($sequence_number) {
- my $ws_override = $opening_container_inside_ws{$sequence_number};
- if ($ws_override) { $ws = $ws_override }
- }
- return $ws;
- };
-
- my $ws_closing_container_override = sub {
- my ( $ws, $sequence_number ) = @_;
- return $ws unless (%closing_container_inside_ws);
- if ($sequence_number) {
- my $ws_override = $closing_container_inside_ws{$sequence_number};
- if ($ws_override) { $ws = $ws_override }
- }
- return $ws;
- };
+ my ( $ws_1, $ws_2, $ws_3, $ws_4 );
# main loop over all tokens to define the whitespace flags
for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
- my $rtokh = $rLL->[$j];
-
- # Set a default
- $rwhitespace_flags->[$j] = WS_OPTIONAL;
-
- if ( $rtokh->[_TYPE_] eq 'b' ) {
+ if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
+ $rwhitespace_flags->[$j] = WS_OPTIONAL;
next;
}
- # set a default value, to be changed as needed
- $ws = undef;
- $last_token = $token;
- $last_type = $type;
- $last_block_type = $block_type;
- $last_seqno = $seqno;
- $last_input_line_no = $input_line_no;
- $token = $rtokh->[_TOKEN_];
- $type = $rtokh->[_TYPE_];
- $block_type = $rtokh->[_BLOCK_TYPE_];
- $seqno = $rtokh->[_TYPE_SEQUENCE_];
- $input_line_no = $rtokh->[_LINE_INDEX_];
+ $rtokh_last = $rtokh;
+ $last_token = $token;
+ $last_type = $type;
+
+ $rtokh = $rLL->[$j];
+ $token = $rtokh->[_TOKEN_];
+ $type = $rtokh->[_TYPE_];
+
+ $ws = undef;
#---------------------------------------------------------------
# Whitespace Rules Section 1:
# /^[L\{\(\[]$/
if ( $is_opening_type{$last_type} ) {
+ my $seqno = $rtokh->[_TYPE_SEQUENCE_];
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
+ my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
+
$j_tight_closing_paren = -1;
# let us keep empty matched braces together: () {} []
}
# check for special cases which override the above rules
- $ws = $ws_opening_container_override->( $ws, $last_seqno );
+ if ( %opening_container_inside_ws && $last_seqno ) {
+ my $ws_override = $opening_container_inside_ws{$last_seqno};
+ if ($ws_override) { $ws = $ws_override }
+ }
- } # end setting space flag inside opening tokens
- my $ws_1;
- $ws_1 = $ws
- if DEBUG_WHITE;
+ $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
+ if DEBUG_WHITE;
+
+ } ## end setting space flag inside opening tokens
#---------------------------------------------------------------
# Whitespace Rules Section 2:
# /[\}\)\]R]/
if ( $is_closing_type{$type} ) {
+ my $seqno = $rtokh->[_TYPE_SEQUENCE_];
if ( $j == $j_tight_closing_paren ) {
$j_tight_closing_paren = -1;
if ( !defined($ws) ) {
my $tightness;
+ my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $type eq '}' && $token eq '}' && $block_type ) {
$tightness = $rOpts_block_brace_tightness;
}
}
# check for special cases which override the above rules
- $ws = $ws_closing_container_override->( $ws, $seqno );
-
- } # end setting space flag inside closing tokens
+ if ( %closing_container_inside_ws && $seqno ) {
+ my $ws_override = $closing_container_inside_ws{$seqno};
+ if ($ws_override) { $ws = $ws_override }
+ }
- my $ws_2;
- $ws_2 = $ws
- if DEBUG_WHITE;
+ $ws_4 = $ws_3 = $ws_2 = $ws
+ if DEBUG_WHITE;
+ } ## end setting space flag inside closing tokens
#---------------------------------------------------------------
# Whitespace Rules Section 3:
- # Use the binary rule table.
- #---------------------------------------------------------------
- if ( !defined($ws) ) {
- $ws = $binary_ws_rules{$last_type}{$type};
- }
- my $ws_3;
- $ws_3 = $ws
- if DEBUG_WHITE;
-
- #---------------------------------------------------------------
- # Whitespace Rules Section 4:
# Handle some special cases.
#---------------------------------------------------------------
- if ( $token eq '(' ) {
- # This will have to be tweaked as tokenization changes.
- # We usually want a space at '} (', for example:
- # <<snippets/space1.in>>
- # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
- #
- # But not others:
- # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
- # At present, the above & block is marked as type L/R so this case
- # won't go through here.
- if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
-
- # NOTE: some older versions of Perl had occasional problems if
- # spaces are introduced between keywords or functions and opening
- # parens. So the default is not to do this except is certain
- # cases. The current Perl seems to tolerate spaces.
-
- # Space between keyword and '('
- elsif ( $last_type eq 'k' ) {
- $ws = WS_NO
- unless ( $rOpts_space_keyword_paren
- || $space_after_keyword{$last_token} );
-
- # Set inside space flag if requested
- $set_container_ws_by_keyword->( $last_token, $seqno );
- }
-
- # Space between function and '('
- # -----------------------------------------------------
- # 'w' and 'i' checks for something like:
- # myfun( &myfun( ->myfun(
- # -----------------------------------------------------
-
- # Note that at this point an identifier may still have a leading
- # arrow, but the arrow will be split off during token respacing.
- # After that, the token may become a bare word without leading
- # arrow. The point is, it is best to mark function call parens
- # right here before that happens.
- # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
- # NOTE: this would be the place to allow spaces between repeated
- # parens, like () () (), as in case c017, but I decided that would
- # not be a good idea.
- elsif (( $last_type =~ /^[wCUG]$/ )
- || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ ) )
- {
- $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
- $set_container_ws_by_keyword->( $last_token, $seqno );
- $ris_function_call_paren->{$seqno} = 1;
- }
+ # /^[L\{\(\[]$/
+ elsif ( $is_opening_type{$type} ) {
+
+ if ( $token eq '(' ) {
+
+ my $seqno = $rtokh->[_TYPE_SEQUENCE_];
+
+ # This will have to be tweaked as tokenization changes.
+ # We usually want a space at '} (', for example:
+ # <<snippets/space1.in>>
+ # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
+ #
+ # But not others:
+ # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
+ # At present, the above & block is marked as type L/R so this case
+ # won't go through here.
+ if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
+
+ # NOTE: some older versions of Perl had occasional problems if
+ # spaces are introduced between keywords or functions and opening
+ # parens. So the default is not to do this except is certain
+ # cases. The current Perl seems to tolerate spaces.
+
+ # Space between keyword and '('
+ elsif ( $last_type eq 'k' ) {
+ $ws = WS_NO
+ unless ( $rOpts_space_keyword_paren
+ || $space_after_keyword{$last_token} );
+
+ # Set inside space flag if requested
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ }
+
+ # Space between function and '('
+ # -----------------------------------------------------
+ # 'w' and 'i' checks for something like:
+ # myfun( &myfun( ->myfun(
+ # -----------------------------------------------------
+
+ # Note that at this point an identifier may still have a leading
+ # arrow, but the arrow will be split off during token respacing.
+ # After that, the token may become a bare word without leading
+ # arrow. The point is, it is best to mark function call parens
+ # right here before that happens.
+ # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
+ # NOTE: this would be the place to allow spaces between repeated
+ # parens, like () () (), as in case c017, but I decided that would
+ # not be a good idea.
+ elsif (
+ ( $last_type =~ /^[wCUG]$/ )
+ || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ )
+ )
+ {
+ $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ $ris_function_call_paren->{$seqno} = 1;
+ }
+
+ # space between something like $i and ( in <<snippets/space2.in>>
+ # for $i ( 0 .. 20 ) {
+ # FIXME: eventually, type 'i' could be split into multiple
+ # token types so this can be a hardwired rule.
+ elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
+ $ws = WS_YES;
+ }
- # space between something like $i and ( in <<snippets/space2.in>>
- # for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' could be split into multiple
- # token types so this can be a hardwired rule.
- elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
- $ws = WS_YES;
+ # allow constant function followed by '()' to retain no space
+ elsif ($last_type eq 'C'
+ && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
+ {
+ $ws = WS_NO;
+ }
}
- # allow constant function followed by '()' to retain no space
- elsif ($last_type eq 'C'
- && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
- {
- $ws = WS_NO;
+ # patch for SWITCH/CASE: make space at ']{' optional
+ # since the '{' might begin a case or when block
+ elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
+ $ws = WS_OPTIONAL;
}
- }
- # patch for SWITCH/CASE: make space at ']{' optional
- # since the '{' might begin a case or when block
- elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
- $ws = WS_OPTIONAL;
- }
+ # keep space between 'sub' and '{' for anonymous sub definition
+ if ( $type eq '{' ) {
+ if ( $last_token eq 'sub' ) {
+ $ws = WS_YES;
+ }
+
+ # this is needed to avoid no space in '){'
+ if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
- # keep space between 'sub' and '{' for anonymous sub definition
- if ( $type eq '{' ) {
- if ( $last_token eq 'sub' ) {
- $ws = WS_YES;
+ # avoid any space before the brace or bracket in something like
+ # @opts{'a','b',...}
+ if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
+ $ws = WS_NO;
+ }
}
+ } ## end if ( $is_opening_type{$type} ) {
- # this is needed to avoid no space in '){'
- if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+ # Special checks for certain other types ...
+ # the hash '%is_special_ws_type' significantly speeds up this routine,
+ # but be sure to update it if a new check is added.
+ # Currently has types: qw(k w i C m - Q #)
+ elsif ( $is_special_ws_type{$type} ) {
+ if ( $type eq 'i' ) {
- # avoid any space before the brace or bracket in something like
- # @opts{'a','b',...}
- if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
- $ws = WS_NO;
+ # never a space before ->
+ if ( substr( $token, 0, 2 ) eq '->' ) {
+ $ws = WS_NO;
+ }
}
- }
- elsif ( $type eq 'i' ) {
+ elsif ( $type eq 'k' ) {
- # never a space before ->
- if ( substr( $token, 0, 2 ) eq '->' ) {
- $ws = WS_NO;
+ # Keywords 'for', 'foreach' are special cases for -kpit since
+ # the opening paren does not always immediately follow the
+ # keyword. So we have to search forward for the paren in this
+ # case. I have limited the search to 10 tokens ahead, just in
+ # case somebody has a big file and no opening paren. This
+ # should be enough for all normal code. Added the level check
+ # to fix b1236.
+ if ( $is_for_foreach{$token}
+ && %keyword_paren_inner_tightness
+ && defined( $keyword_paren_inner_tightness{$token} )
+ && $j < $jmax )
+ {
+ my $level = $rLL->[$j]->[_LEVEL_];
+ my $jp = $j;
+ for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
+ $jp++;
+ last if ( $jp > $jmax );
+ last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
+ next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
+ my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
+ $set_container_ws_by_keyword->( $token, $seqno_p );
+ last;
+ }
+ }
}
- }
- # retain any space between '-' and bare word
- elsif ( $type eq 'w' || $type eq 'C' ) {
- $ws = WS_OPTIONAL if $last_type eq '-';
+ # retain any space between '-' and bare word
+ elsif ( $type eq 'w' || $type eq 'C' ) {
+ $ws = WS_OPTIONAL if $last_type eq '-';
+
+ # never a space before ->
+ if ( substr( $token, 0, 2 ) eq '->' ) {
+ $ws = WS_NO;
+ }
+ }
- # never a space before ->
- if ( substr( $token, 0, 2 ) eq '->' ) {
- $ws = WS_NO;
+ # retain any space between '-' and bare word; for example
+ # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
+ # $myhash{USER-NAME}='steve';
+ elsif ( $type eq 'm' || $type eq '-' ) {
+ $ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
- }
- # retain any space between '-' and bare word; for example
- # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
- # $myhash{USER-NAME}='steve';
- elsif ( $type eq 'm' || $type eq '-' ) {
- $ws = WS_OPTIONAL if ( $last_type eq 'w' );
- }
+ # always space before side comment
+ elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
- # always space before side comment
- elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+ # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
+ # allow a space between a backslash and single or double quote
+ # to avoid fooling html formatters
+ elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
+ {
+ if ($rOpts_space_backslash_quote) {
+ if ( $rOpts_space_backslash_quote == 1 ) {
+ $ws = WS_OPTIONAL;
+ }
+ elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
+ else { } # shouldnt happen
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+ } ## end elsif ( $is_special_ws_type{$type} ...
# always preserver whatever space was used after a possible
# filehandle (except _) or here doc operator
$ws = WS_OPTIONAL;
}
- # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
- # allow a space between a backslash and single or double quote
- # to avoid fooling html formatters
- elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
- if ($rOpts_space_backslash_quote) {
- if ( $rOpts_space_backslash_quote == 1 ) {
- $ws = WS_OPTIONAL;
- }
- elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
- else { } # shouldnt happen
- }
- else {
- $ws = WS_NO;
- }
- }
- elsif ( $type eq 'k' ) {
-
- # Keywords 'for', 'foreach' are special cases for -kpit since the
- # opening paren does not always immediately follow the keyword. So
- # we have to search forward for the paren in this case. I have
- # limited the search to 10 tokens ahead, just in case somebody
- # has a big file and no opening paren. This should be enough for
- # all normal code.
- if ( $is_for_foreach{$token}
- && %keyword_paren_inner_tightness
- && defined( $keyword_paren_inner_tightness{$token} )
- && $j < $jmax )
- {
- my $jp = $j;
- for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
- $jp++;
- last if ( $jp > $jmax );
- next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
- my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
- $set_container_ws_by_keyword->( $token, $seqno );
- last;
- }
- }
- }
-
- my $ws_4;
- $ws_4 = $ws
+ $ws_4 = $ws_3 = $ws
if DEBUG_WHITE;
- #---------------------------------------------------------------
- # Whitespace Rules Section 5:
- # Apply default rules not covered above.
- #---------------------------------------------------------------
-
- # If we fall through to here, look at the pre-defined hash tables for
- # the two tokens, and:
- # if (they are equal) use the common value
- # if (either is zero or undef) use the other
- # if (either is -1) use it
- # That is,
- # left vs right
- # 1 vs 1 --> 1
- # 0 vs 0 --> 0
- # -1 vs -1 --> -1
- #
- # 0 vs -1 --> -1
- # 0 vs 1 --> 1
- # 1 vs 0 --> 1
- # -1 vs 0 --> -1
- #
- # -1 vs 1 --> -1
- # 1 vs -1 --> -1
if ( !defined($ws) ) {
- my $wl = $want_left_space{$type};
- my $wr = $want_right_space{$last_type};
- if ( !defined($wl) ) { $wl = 0 }
- if ( !defined($wr) ) { $wr = 0 }
- $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
- }
- if ( !defined($ws) ) {
- $ws = 0;
- write_diagnostics(
- "WS flag is undefined for tokens $last_token $token\n");
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 4:
+ # Use the binary rule table.
+ #---------------------------------------------------------------
+ $ws = $binary_ws_rules{$last_type}{$type};
+ $ws_4 = $ws if DEBUG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 5:
+ # Apply default rules not covered above.
+ #---------------------------------------------------------------
+
+ # If we fall through to here, look at the pre-defined hash tables for
+ # the two tokens, and:
+ # if (they are equal) use the common value
+ # if (either is zero or undef) use the other
+ # if (either is -1) use it
+ # That is,
+ # left vs right
+ # 1 vs 1 --> 1
+ # 0 vs 0 --> 0
+ # -1 vs -1 --> -1
+ #
+ # 0 vs -1 --> -1
+ # 0 vs 1 --> 1
+ # 1 vs 0 --> 1
+ # -1 vs 0 --> -1
+ #
+ # -1 vs 1 --> -1
+ # 1 vs -1 --> -1
+ if ( !defined($ws) ) {
+ my $wl = $want_left_space{$type};
+ my $wr = $want_right_space{$last_type};
+ if ( !defined($wl) ) {
+ $ws = defined($wr) ? $wr : 0;
+ }
+ elsif ( !defined($wr) ) {
+ $ws = $wl;
+ }
+ else {
+ $ws =
+ ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
+ }
+ }
}
# Treat newline as a whitespace. Otherwise, we might combine
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
- if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
+ if ( $ws == 0
+ && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
+ {
+ $ws = 1;
+ }
$rwhitespace_flags->[$j] = $ws;
- DEBUG_WHITE && do {
+ if (DEBUG_WHITE) {
my $str = substr( $last_token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
print STDOUT
"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
- };
+
+ # reset for next pass
+ $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
+ }
} ## end main loop
if ( $rOpts->{'tight-secret-operators'} ) {
my %essential_whitespace_filter_l2;
my %essential_whitespace_filter_r2;
my %is_type_with_space_before_bareword;
+ my %is_special_variable_char;
BEGIN {
my @q;
+
+ # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
+ # grep aliases on purpose, since here we are looking parens, not braces
@q = qw(sort grep map);
@is_sort_grep_map{@q} = (1) x scalar(@q);
@q = qw( Q & );
@is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
+ # These are the only characters which can (currently) form special
+ # variables, like $^W: (issue c066, c068).
+ @q =
+ qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
+ @{is_special_variable_char}{@q} = (1) x scalar(@q);
+
}
sub is_essential_whitespace {
# Note2: The -mangle option causes large numbers of calls to this
# routine and therefore is a good test. So if a change is made, be sure
- # to run a large number of files with the -mangle option and check for
- # differences.
+ # to use nytprof to profile with both old and reviesed coding using the
+ # -mangle option and check differences.
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
# example: pom.caputo:
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
|| $typel eq 'n' && $tokenr eq '.'
- || $typer eq 'n'
- && $tokenl eq '.'
+ || $typer eq 'n' && $tokenl eq '.'
# cases of a space before a bareword...
|| (
# keep a space between a token ending in '$' and any word;
# this caused trouble: "die @$ if $@"
- || $typel eq 'i' && $tokenl =~ /\$$/
+ ##|| $typel eq 'i' && $tokenl =~ /\$$/
+ || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
# don't combine $$ or $# with any alphanumeric
# (testfile mangle.t with --mangle)
- || $tokenl =~ /^\$[\$\#]$/
+ ##|| $tokenl =~ /^\$[\$\#]$/
+ || $tokenl eq '$$'
+ || $tokenl eq '$#'
)
) ## end $tokenr_is_bareword
|| $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
# perl is very fussy about spaces before <<
- || $tokenr =~ /^\<\</
+ || substr( $tokenr, 0, 2 ) eq '<<'
+ ##|| $tokenr =~ /^\<\</
# avoid combining tokens to create new meanings. Example:
# $a+ +$b must not become $a++$b
# be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
- || $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
- || $typel =~ /^(\+\+|\-\-)$/
+ ##|| $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
+ || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
+ || ( $typel eq '++' || $typel eq '--' )
&& $tokenr !~ /^[\;\}\)\]]/
+ ##|| $typel =~ /^(\+\+|\-\-)$/ && $tokenr !~ /^[\;\}\)\]]/
# need space after foreach my; for example, this will fail in
# older versions of Perl:
|| (
$tokenl eq 'my'
+ && substr( $tokenr, 0, 1 ) eq '$'
+ ##&& $tokenr =~ /^\$/
+
# /^(for|foreach)$/
&& $is_for_foreach{$tokenll}
- && $tokenr =~ /^\$/
)
+ # Keep space after like $^ if needed to avoid forming a different
+ # special variable (issue c068). For example:
+ # my $aa = $^ ? "none" : "ok";
+ || ( $typel eq 'i'
+ && length($tokenl) == 2
+ && substr( $tokenl, 1, 1 ) eq '^'
+ && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
+
# We must be sure that a space between a ? and a quoted string
# remains if the space before the ? remains. [Loca.pm, lockarea]
# ie,
my $last_nonblank_token = $token;
my $list_str = $left_bond_strength{'?'};
+ my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
+
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
$next_nonblank_type, $next_token, $next_type,
$total_nesting_depth, );
# strength on both sides of a blank is the same
if ( $type eq 'b' && $last_type ne 'b' ) {
$bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
+ $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
next;
}
# section.
if ( !defined($bsr) ) { $bsr = VERY_STRONG }
if ( !defined($bsl) ) { $bsl = VERY_STRONG }
- my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
- my $bond_str_1 = $bond_str;
+ my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
+ $bond_str_1 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# Bond Strength Section 2:
$bond_str = NO_BREAK;
}
- # in older version of perl, use strict can cause problems with
- # breaks before bare words following opening parens. For example,
- # this will fail under older versions if a break is made between
- # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
- # command"); close MAIL;
- if ( $type eq '{' ) {
+ # OLD COMMENT: In older version of perl, use strict can cause
+ # problems with breaks before bare words following opening parens.
+ # For example, this will fail under older versions if a break is
+ # made between '(' and 'MAIL':
- if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
+ # use strict; open( MAIL, "a long filename or command"); close MAIL;
- # but it's fine to break if the word is followed by a '=>'
- # or if it is obviously a sub call
- my $i_next_next_nonblank = $i_next_nonblank + 1;
- my $next_next_type = $types_to_go[$i_next_next_nonblank];
- if ( $next_next_type eq 'b'
- && $i_next_nonblank < $max_index_to_go )
- {
- $i_next_next_nonblank++;
- $next_next_type = $types_to_go[$i_next_next_nonblank];
- }
+ # NEW COMMENT: Third fix for b1213:
+ # This option does not seem to be needed any longer, and it can
+ # cause instabilities. It can be turned off, but to minimize
+ # changes to existing formatting it is retained only in the case
+ # where the previous token was 'open' and there was no line break.
+ # Even this could eventually be removed if it causes instability.
+ if ( $type eq '{' ) {
- # We'll check for an old breakpoint and keep a leading
- # bareword if it was that way in the input file.
- # Presumably it was ok that way. For example, the
- # following would remain unchanged:
- #
- # @months = (
- # January, February, March, April,
- # May, June, July, August,
- # September, October, November, December,
- # );
- #
- # This should be sufficient:
- if (
- !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ','
- || $next_next_type eq '}' )
- )
- {
- $bond_str = NO_BREAK;
- }
+ if ( $token eq '('
+ && $next_nonblank_type eq 'w'
+ && $last_nonblank_type eq 'k'
+ && $last_nonblank_token eq 'open'
+ && !$old_breakpoint_to_go[$i] )
+ {
+ $bond_str = NO_BREAK;
}
}
&& substr( $next_nonblank_token, 0, 1 ) eq '/' );
}
- my $bond_str_2 = $bond_str;
+ $bond_str_2 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# End of hardwired rules
&& $is_container_token{$next_nonblank_token} )
{
$rtype = $next_nonblank_type . $next_nonblank_token;
+
+ # Alternate Fix #1 for issue b1299. This version makes the
+ # decision as soon as possible. See Alternate Fix #2 also.
+ # Do not separate a bareword identifier from its paren: b1299
+ # This is currently needed for stability because if the bareword
+ # gets separated from a preceding '->' and following '(' then
+ # the tokenizer may switch from type 'i' to type 'w'. This
+ # patch will prevent this by keeping it adjacent to its '('.
+## if ( $next_nonblank_token eq '('
+## && $ltype eq 'i'
+## && substr( $token, 0, 1 ) =~ /^\w$/ )
+## {
+## $ltype = 'w';
+## }
}
# apply binary rules which apply regardless of space between tokens
$bond_str = NO_BREAK;
$tabulated_bond_str = $bond_str;
}
- my $bond_str_3 = $bond_str;
+
+ $bond_str_3 = $bond_str if (DEBUG_BOND);
# If the hardwired rules conflict with the tabulated bond
# strength then there is an inconsistency that should be fixed
if ( $type eq ',' ) {
- # add any bias set by sub scan_list at old comma break points
+ # add any bias set by sub break_lists at old comma break points
$bond_str += $bond_strength_to_go[$i];
}
$bond_str += $bias{$right_key};
}
}
- my $bond_str_4 = $bond_str;
+
+ $bond_str_4 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# Bond Strength Section 5:
$str .= ' ' x ( 16 - length($str) );
print STDOUT
"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
+
+ # reset for next pass
+ $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
};
+
} ## end main loop
return;
} ## end sub set_bond_strengths
# 'sub ::m' is a named sub
# 'sub' is an anonymous sub
# 'sub:' is a label, not a sub
+ # 'sub :' is a label, not a sub ( block type will be <sub:> )
+ # sub'_ is a named sub ( block type will be <sub '_> )
# 'substr' is a keyword
- $SUB_PATTERN = '^sub\s+(::|\w)'; # match normal sub
- $ASUB_PATTERN = '^sub$'; # match anonymous sub
- $ANYSUB_PATTERN = '^sub\b'; # match either type of sub
+ # So note that named subs always have a space after 'sub'
+ $SUB_PATTERN = '^sub\s'; # match normal sub
+ $ASUB_PATTERN = '^sub$'; # match anonymous sub
# Note (see also RT #133130): These patterns are used by
# sub make_block_pattern, which is used for making most patterns.
$sub_alias_list =~ s/\s+/\|/g;
$SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
$ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
}
return;
}
+sub make_bl_pattern {
+
+ # Set defaults lists to retain historical default behavior for -bl:
+ my $bl_list_string = '*';
+ my $bl_exclusion_list_string = 'sort map grep eval asub';
+
+ if ( defined( $rOpts->{'brace-left-list'} )
+ && $rOpts->{'brace-left-list'} )
+ {
+ $bl_list_string = $rOpts->{'brace-left-list'};
+ }
+ if ( $bl_list_string =~ /\bsub\b/ ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-brace-on-new-line'};
+ }
+ if ( $bl_list_string =~ /\basub\b/ ) {
+ $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-brace-on-new-line'};
+ }
+
+ $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
+
+ # for -bl, a list with '*' turns on -sbl and -asbl
+ if ( $bl_pattern =~ /\.\*/ ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-brace-on-new-line'};
+ $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-anonymous-brace-on-new-line'};
+ }
+
+ if ( defined( $rOpts->{'brace-left-exclusion-list'} )
+ && $rOpts->{'brace-left-exclusion-list'} )
+ {
+ $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
+ if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} = 0;
+ }
+ if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
+ $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
+ }
+ }
+
+ $bl_exclusion_pattern =
+ make_block_pattern( '-blxl', $bl_exclusion_list_string );
+ return;
+}
+
sub make_bli_pattern {
# default list of block types for which -bli would apply
my $bli_list_string = 'if else elsif unless while for foreach do : sub';
+ my $bli_exclusion_list_string = ' ';
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
&& $rOpts->{'brace-left-and-indent-list'} )
}
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
+
+ if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
+ && $rOpts->{'brace-left-and-indent-exclusion-list'} )
+ {
+ $bli_exclusion_list_string =
+ $rOpts->{'brace-left-and-indent-exclusion-list'};
+ }
+ $bli_exclusion_pattern =
+ make_block_pattern( '-blixl', $bli_exclusion_list_string );
return;
}
Warn("unrecognized block type $i after $abbrev, ignoring\n");
}
}
+
+ # Fix 2 for c091, prevent the pattern from matching an empty string
+ # '1 ' is an impossible block name.
+ if ( !@words ) { push @words, "1 " }
+
my $pattern = '(' . join( '|', @words ) . ')$';
my $sub_patterns = "";
if ( $seen{'sub'} ) {
if ( bad_pattern($test_csc_prefix_pattern) ) {
# shouldn't happen..must have screwed up escaping, above
- report_definite_bug();
- Warn(
-"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
- );
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
+EOM
+ }
# just warn and keep going with defaults
+ Warn(
+"Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
+ );
Warn("Please consider using a simpler -cscp prefix\n");
Warn("Using default -cscp instead; please check output\n");
}
{ ## begin closure write_line
- my $Last_line_had_side_comment;
- my $In_format_skipping_section;
- my $Saw_VERSION_in_this_file;
+ my $nesting_depth;
+
+ # Variables used by sub check_sequence_numbers:
+ my $last_seqno;
+ my %saw_opening_seqno;
+ my %saw_closing_seqno;
+ my $initial_seqno;
sub initialize_write_line {
- $Last_line_had_side_comment = 0;
- $In_format_skipping_section = 0;
- $Saw_VERSION_in_this_file = 0;
+ $nesting_depth = undef;
+
+ $last_seqno = SEQ_ROOT;
+ %saw_opening_seqno = ();
+ %saw_closing_seqno = ();
+
+ return;
+ }
+
+ sub check_sequence_numbers {
+
+ # Routine for checking sequence numbers. This only needs to be
+ # done occasionally in DEVEL_MODE to be sure everything is working
+ # correctly.
+ my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
+ my $jmax = @{$rtokens} - 1;
+ return unless ( $jmax >= 0 );
+ foreach my $j ( 0 .. $jmax ) {
+ my $seqno = $rtype_sequence->[$j];
+ my $token = $rtokens->[$j];
+ my $type = $rtoken_type->[$j];
+ my $err_msg =
+"Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
+
+ if ( !$seqno ) {
+
+ # Sequence numbers are generated for opening tokens, so every opening
+ # token should be sequenced. Closing tokens will be unsequenced
+ # if they do not have a matching opening token.
+ if ( $is_opening_sequence_token{$token}
+ && $type ne 'q'
+ && $type ne 'Q' )
+ {
+ Fault(
+ <<EOM
+$err_msg Unexpected opening token without sequence number
+EOM
+ );
+ }
+ }
+ else {
+
+ # Save starting seqno to identify sequence method:
+ # New method starts with 2 and has continuous numbering
+ # Old method starts with >2 and may have gaps
+ if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
+
+ if ( $is_opening_sequence_token{$token} ) {
+
+ # New method should have continuous numbering
+ if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
+ Fault(
+ <<EOM
+$err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
+EOM
+ );
+ }
+ $last_seqno = $seqno;
+
+ # Numbers must be unique
+ if ( $saw_opening_seqno{$seqno} ) {
+ my $lno = $saw_opening_seqno{$seqno};
+ Fault(
+ <<EOM
+$err_msg Already saw an opening tokens at line $lno with this sequence number
+EOM
+ );
+ }
+ $saw_opening_seqno{$seqno} = $input_line_no;
+ }
+
+ # only one closing item per seqno
+ elsif ( $is_closing_sequence_token{$token} ) {
+ if ( $saw_closing_seqno{$seqno} ) {
+ my $lno = $saw_closing_seqno{$seqno};
+ Fault(
+ <<EOM
+$err_msg Already saw a closing token with this seqno at line $lno
+EOM
+ );
+ }
+ $saw_closing_seqno{$seqno} = $input_line_no;
+
+ # Every closing seqno must have an opening seqno
+ if ( !$saw_opening_seqno{$seqno} ) {
+ Fault(
+ <<EOM
+$err_msg Saw a closing token but no opening token with this seqno
+EOM
+ );
+ }
+ }
+ # Sequenced items must be opening or closing
+ else {
+ Fault(
+ <<EOM
+$err_msg Unexpected token type with a sequence number
+EOM
+ );
+ }
+ }
+ }
return;
}
sub write_line {
- # This routine originally received lines of code and immediately processed
- # them. That was efficient when memory was limited, but now it just saves
- # the lines it receives. They get processed all together after the last
- # line is received.
+ # This routine receives lines one-by-one from the tokenizer and stores
+ # them in a format suitable for further processing. After the last
+ # line has been sent, the tokenizer will call sub 'finish_formatting'
+ # to do the actual formatting.
- # As tokenized lines are received they are converted to the format needed
- # for the final formatting.
my ( $self, $line_of_tokens_old ) = @_;
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
- my $rlines_new = $self->[_rlines_];
- my $maximum_level = $self->[_maximum_level_];
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines_new = $self->[_rlines_];
+
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rSS = $self->[_rSS_];
+ my $Iss_opening = $self->[_Iss_opening_];
+ my $Iss_closing = $self->[_Iss_closing_];
my $Kfirst;
my $line_of_tokens = {};
- foreach my $key (
+ foreach (
qw(
_curly_brace_depth
_ending_in_quote
)
)
{
- $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
+ $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
}
# Data needed by Logger
# Needed to avoid trimming quotes
$line_of_tokens->{_ended_in_blank_token} = undef;
- my $line_type = $line_of_tokens_old->{_line_type};
- my $input_line_no = $line_of_tokens_old->{_line_number};
- my $CODE_type = "";
+ my $line_type = $line_of_tokens_old->{_line_type};
+ my $line_number = $line_of_tokens_old->{_line_number};
+ my $CODE_type = "";
my $tee_output;
# Handle line of non-code
my $jmax = @{$rtokens} - 1;
if ( $jmax >= 0 ) {
$Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- foreach my $j ( 0 .. $jmax ) {
- # Clip negative nesting depths to zero to avoid problems.
- # Negative values can occur in files with unbalanced containers
- my $slevel = $rslevels->[$j];
- if ( $slevel < 0 ) { $slevel = 0 }
+ DEVEL_MODE
+ && check_sequence_numbers( $rtokens, $rtoken_type,
+ $rtype_sequence, $line_number );
+
+ # Find the starting nesting depth ...
+ # It must be the value of variable 'level' of the first token
+ # because the nesting depth is used as a token tag in the
+ # vertical aligner and is compared to actual levels.
+ # So vertical alignment problems will occur with any other
+ # starting value.
+ if ( !defined($nesting_depth) ) {
+ $nesting_depth = $rlevels->[0];
+ $nesting_depth = 0 if ( $nesting_depth < 0 );
+ $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
+ }
- if ( $rlevels->[$j] > $maximum_level ) {
- $maximum_level = $rlevels->[$j];
- }
+ foreach my $j ( 0 .. $jmax ) {
- # But do not clip the 'level' variable yet. We will do this
+ # Do not clip the 'level' variable yet. We will do this
# later, in sub 'store_token_to_go'. The reason is that in
# files with level errors, the logic in 'weld_cuddled_else'
# uses a stack logic that will give bad welds if we clip
# levels here.
## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
+ # Handle tokens with sequence numbers ...
+ my $seqno = $rtype_sequence->[$j];
+ if ($seqno) {
+ my $token = $rtokens->[$j];
+ my $sign = 1;
+ if ( $is_opening_token{$token} ) {
+ $K_opening_container->{$seqno} = @{$rLL};
+ $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
+ $nesting_depth++;
+
+ # Save a sequenced block type at its opening token.
+ # Note that unsequenced block types can occur in
+ # unbalanced code with errors but are ignored here.
+ if ( $rblock_type->[$j] ) {
+ my $block_type = $rblock_type->[$j];
+ $rblock_type_of_seqno->{$seqno} = $block_type;
+ if ( substr( $block_type, 0, 3 ) eq 'sub'
+ || $rOpts_sub_alias_list )
+ {
+ if ( $block_type =~ /$ASUB_PATTERN/ ) {
+ $self->[_ris_asub_block_]->{$seqno} = 1;
+ }
+ elsif ( $block_type =~ /$SUB_PATTERN/ ) {
+ $self->[_ris_sub_block_]->{$seqno} = 1;
+ }
+ }
+ }
+ }
+ elsif ( $is_closing_token{$token} ) {
+
+ # The opening depth should always be defined, and
+ # it should equal $nesting_depth-1. To protect
+ # against unforseen error conditions, however, we
+ # will check this and fix things if necessary. For
+ # a test case see issue c055.
+ my $opening_depth =
+ $rdepth_of_opening_seqno->[$seqno];
+ if ( !defined($opening_depth) ) {
+ $opening_depth = $nesting_depth - 1;
+ $opening_depth = 0 if ( $opening_depth < 0 );
+ $rdepth_of_opening_seqno->[$seqno] =
+ $opening_depth;
+
+ # This is not fatal but should not happen. The
+ # tokenizer generates sequence numbers
+ # incrementally upon encountering each new
+ # opening token, so every positive sequence
+ # number should correspond to an opening token.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
+EOM
+ }
+ }
+ $K_closing_container->{$seqno} = @{$rLL};
+ $nesting_depth = $opening_depth;
+ $sign = -1;
+ }
+ elsif ( $token eq '?' ) {
+ }
+ elsif ( $token eq ':' ) {
+ $sign = -1;
+ }
+
+ # The only sequenced types output by the tokenizer are
+ # the opening & closing containers and the ternary
+ # types. So we would only get here if the tokenizer has
+ # been changed to mark some other tokens with sequence
+ # numbers, or if an error has been introduced in a
+ # hash such as %is_opening_container
+ else {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
+Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
+EOM
+ }
+ }
+
+ if ( $sign > 0 ) {
+ $Iss_opening->[$seqno] = @{$rSS};
+
+ # For efficiency, we find the maximum level of
+ # opening tokens of any type. The actual maximum
+ # level will be that of their contents which is 1
+ # greater. That will be fixed in sub
+ # 'finish_formatting'.
+ my $level = $rlevels->[$j];
+ if ( $level > $self->[_maximum_level_] ) {
+ $self->[_maximum_level_] = $level;
+ $self->[_maximum_level_at_line_] = $line_number;
+ }
+ }
+ else { $Iss_closing->[$seqno] = @{$rSS} }
+ push @{$rSS}, $sign * $seqno;
+
+ }
+
my @tokary;
@tokary[
- _TOKEN_, _TYPE_, _BLOCK_TYPE_,
- _TYPE_SEQUENCE_, _LEVEL_, _SLEVEL_,
- _CI_LEVEL_, _LINE_INDEX_,
+ _TOKEN_, _TYPE_, _TYPE_SEQUENCE_,
+ _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
]
= (
- $rtokens->[$j], $rtoken_type->[$j],
- $rblock_type->[$j], $rtype_sequence->[$j],
- $rlevels->[$j], $slevel,
- $rci_levels->[$j], $input_line_no - 1,
+ $rtokens->[$j], $rtoken_type->[$j],
+ $seqno, $rlevels->[$j],
+ $rci_levels->[$j], $line_number - 1,
);
push @{$rLL}, \@tokary;
} ## end foreach my $j ( 0 .. $jmax )
$line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
} ## end if ( $jmax >= 0 )
- $CODE_type =
- $self->get_CODE_type( $line_of_tokens, $Kfirst, $Klimit,
- $input_line_no );
-
$tee_output ||=
$rOpts_tee_block_comments
&& $jmax == 0
&& $Klimit > $Kfirst
&& $rLL->[$Klimit]->[_TYPE_] eq '#';
- # Handle any requested side comment deletions. It is easier to get
- # this done here rather than farther down the pipeline because IO
- # lines take a different route, and because lines with deleted HSC
- # become BL lines. An since we are deleting now, we have to also
- # handle any tee- requests before the side comments vanish.
- my $delete_side_comment =
- $rOpts_delete_side_comments
- && defined($Kfirst)
- && $rLL->[$Klimit]->[_TYPE_] eq '#'
- && ( $Klimit > $Kfirst || $CODE_type eq 'HSC' )
- && (!$CODE_type
- || $CODE_type eq 'HSC'
- || $CODE_type eq 'IO'
- || $CODE_type eq 'NIN' );
-
- if (
- $rOpts_delete_closing_side_comments
- && !$delete_side_comment
- && defined($Kfirst)
- && $Klimit > $Kfirst
- && $rLL->[$Klimit]->[_TYPE_] eq '#'
- && ( !$CODE_type
- || $CODE_type eq 'HSC'
- || $CODE_type eq 'IO'
- || $CODE_type eq 'NIN' )
- )
- {
- my $token = $rLL->[$Klimit]->[_TOKEN_];
- my $K_m = $Klimit - 1;
- my $type_m = $rLL->[$K_m]->[_TYPE_];
- if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
- my $last_nonblank_block_type = $rLL->[$K_m]->[_BLOCK_TYPE_];
- if ( $token =~ /$closing_side_comment_prefix_pattern/
- && $last_nonblank_block_type =~
- /$closing_side_comment_list_pattern/ )
- {
- $delete_side_comment = 1;
- }
- } ## end if ( $rOpts_delete_closing_side_comments...)
-
- if ($delete_side_comment) {
- pop @{$rLL};
- $Klimit -= 1;
- if ( $Klimit > $Kfirst
- && $rLL->[$Klimit]->[_TYPE_] eq 'b' )
- {
- pop @{$rLL};
- $Klimit -= 1;
- }
-
- # The -io option outputs the line text, so we have to update
- # the line text so that the comment does not reappear.
- if ( $CODE_type eq 'IO' ) {
- my $line = "";
- foreach my $KK ( $Kfirst .. $Klimit ) {
- $line .= $rLL->[$KK]->[_TOKEN_];
- }
- $line_of_tokens->{_line_text} = $line . "\n";
- }
-
- # If we delete a hanging side comment the line becomes blank.
- if ( $CODE_type eq 'HSC' ) { $CODE_type = 'BL' }
- }
-
} ## end if ( $line_type eq 'CODE')
# Finish storing line variables
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
$line_of_tokens->{_code_type} = $CODE_type;
$self->[_Klimit_] = $Klimit;
- $self->[_maximum_level_] = $maximum_level;
push @{$rlines_new}, $line_of_tokens;
return;
}
+} ## end closure write_line
- sub get_CODE_type {
- my ( $self, $line_of_tokens, $Kfirst, $Klast, $input_line_no ) = @_;
+#############################################
+# CODE SECTION 5: Pre-process the entire file
+#############################################
- # We are looking at a line of code and setting a flag to
- # describe any special processing that it requires
+sub finish_formatting {
- # Possible CODE_types
- # 'VB' = Verbatim - line goes out verbatim (a quote)
- # 'FS' = Format Skipping - line goes out verbatim
- # 'BL' = Blank Line
- # 'HSC' = Hanging Side Comment - fix this hanging side comment
- # 'SBCX'= Static Block Comment Without Leading Space
- # 'SBC' = Static Block Comment
- # 'BC' = Block Comment - an ordinary full line comment
- # 'IO' = Indent Only - line goes out unchanged except for indentation
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'VER' = VERSION statement
- # '' = ordinary line of code with no restructions
+ my ( $self, $severe_error ) = @_;
- my $rLL = $self->[_rLL_];
+ # The file has been tokenized and is ready to be formatted.
+ # All of the relevant data is stored in $self, ready to go.
- my $CODE_type = "";
- my $input_line = $line_of_tokens->{_line_text};
- my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
+ # Check the maximum level. If it is extremely large we will give up and
+ # output the file verbatim. Note that the actual maximum level is 1
+ # greater than the saved value, so we fix that here.
+ $self->[_maximum_level_] += 1;
+ my $maximum_level = $self->[_maximum_level_];
+ my $maximum_table_index = $#maximum_line_length_at_level;
+ if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
+ $severe_error ||= 1;
+ Warn(<<EOM);
+The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
+Something may be wrong; formatting will be skipped.
+EOM
+ }
- my $is_block_comment = 0;
- my $has_side_comment = 0;
+ # output file verbatim if severe error or no formatting requested
+ if ( $severe_error || $rOpts->{notidy} ) {
+ $self->dump_verbatim();
+ $self->wrapup();
+ return;
+ }
- if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
- if ( $jmax == 0 ) { $is_block_comment = 1; }
- else { $has_side_comment = 1 }
- }
+ # Update the 'save_logfile' flag based to include any tokenization errors.
+ # We can save time by skipping logfile calls if it is not going to be saved.
+ my $logger_object = $self->[_logger_object_];
+ if ($logger_object) {
+ $self->[_save_logfile_] = $logger_object->get_save_logfile();
+ }
- # Write line verbatim if we are in a formatting skip section
- if ($In_format_skipping_section) {
+ $self->set_CODE_type();
- # Note: extra space appended to comment simplifies pattern matching
- if ( $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_end/ )
- {
- $In_format_skipping_section = 0;
- write_logfile_entry(
- "Line $input_line_no: Exiting format-skipping section\n");
- }
- $CODE_type = 'FS';
- goto RETURN;
- }
+ # Verify that the line hash does not have any unknown keys.
+ $self->check_line_hashes() if (DEVEL_MODE);
- # Check for a continued quote..
- if ( $line_of_tokens->{_starting_in_quote} ) {
+ # Make a pass through all tokens, adding or deleting any whitespace as
+ # required. Also make any other changes, such as adding semicolons.
+ # All token changes must be made here so that the token data structure
+ # remains fixed for the rest of this iteration.
+ $self->respace_tokens();
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- my $input_line_number = $line_of_tokens->{_line_number};
- $self->note_embedded_tab($input_line_number);
- }
- $CODE_type = 'VB';
- goto RETURN;
- }
- }
+ $self->set_excluded_lp_containers();
- # See if we are entering a formatting skip section
- if ( $rOpts_format_skipping
- && $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_begin/ )
- {
- $In_format_skipping_section = 1;
- write_logfile_entry(
- "Line $input_line_no: Entering format-skipping section\n");
- $CODE_type = 'FS';
- goto RETURN;
- }
+ $self->find_multiline_qw();
- # ignore trailing blank tokens (they will get deleted later)
- if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
- $jmax--;
- }
+ $self->keep_old_line_breaks();
- # blank line..
- if ( $jmax < 0 ) {
- $CODE_type = 'BL';
- goto RETURN;
- }
+ # Implement any welding needed for the -wn or -cb options
+ $self->weld_containers();
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment = 0;
- my $is_static_block_comment_without_leading_space = 0;
- if ( $is_block_comment
- && $rOpts->{'static-block-comments'}
- && $input_line =~ /$static_block_comment_pattern/ )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space =
- substr( $input_line, 0, 1 ) eq '#';
+ $self->collapsed_lengths()
+ if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
+
+ # Locate small nested blocks which should not be broken
+ $self->mark_short_nested_blocks();
+
+ $self->adjust_indentation_levels();
+
+ # Verify that the main token array looks OK. If this ever causes a fault
+ # then place similar checks before the sub calls above to localize the
+ # problem.
+ $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
+
+ # Finishes formatting and write the result to the line sink.
+ # Eventually this call should just change the 'rlines' data according to the
+ # new line breaks and then return so that we can do an internal iteration
+ # before continuing with the next stages of formatting.
+ $self->process_all_lines();
+
+ # A final routine to tie up any loose ends
+ $self->wrapup();
+ return;
+}
+
+sub set_CODE_type {
+ my ($self) = @_;
+
+ # This routine performs two tasks:
+
+ # TASK 1: Examine each line of code and set a flag '$CODE_type' to describe
+ # any special processing that it requires.
+
+ # TASK 2: Delete side comments if requested.
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
+ my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
+ my $rOpts_static_block_comment_prefix =
+ $rOpts->{'static-block-comment-prefix'};
+
+ # Remember indexes of lines with side comments
+ my @ix_side_comments;
+
+ my $In_format_skipping_section = 0;
+ my $Saw_VERSION_in_this_file = 0;
+ my $has_side_comment = 0;
+ my ( $Kfirst, $Klast );
+ my $CODE_type;
+
+ #------------------------------
+ # TASK 1: Loop to set CODE_type
+ #------------------------------
+
+ # Possible CODE_types
+ # 'VB' = Verbatim - line goes out verbatim (a quote)
+ # 'FS' = Format Skipping - line goes out verbatim
+ # 'BL' = Blank Line
+ # 'HSC' = Hanging Side Comment - fix this hanging side comment
+ # 'SBCX'= Static Block Comment Without Leading Space
+ # 'SBC' = Static Block Comment
+ # 'BC' = Block Comment - an ordinary full line comment
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'VER' = VERSION statement
+ # '' = ordinary line of code with no restructions
+
+ my $ix_line = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $ix_line++;
+ my $input_line_no = $line_of_tokens->{_line_number};
+ my $line_type = $line_of_tokens->{_line_type};
+
+ my $Last_line_had_side_comment = $has_side_comment;
+ if ($has_side_comment) {
+ push @ix_side_comments, $ix_line - 1;
}
+ $has_side_comment = 0;
- # Check for comments which are line directives
- # Treat exactly as static block comments without leading space
- # reference: perlsyn, near end, section Plain Old Comments (Not!)
- # example: '# line 42 "new_filename.plx"'
- if (
- $is_block_comment
- && $input_line =~ /^\# \s*
- line \s+ (\d+) \s*
- (?:\s("?)([^"]+)\2)? \s*
- $/x
- )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space = 1;
+ next unless ( $line_type eq 'CODE' );
+
+ my $Klast_prev = $Klast;
+
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $Kfirst, $Klast ) = @{$rK_range};
+
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = "";
+
+ my $input_line = $line_of_tokens->{_line_text};
+ my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
+
+ my $is_block_comment = 0;
+ if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
+ if ( $jmax == 0 ) { $is_block_comment = 1; }
+ else { $has_side_comment = 1 }
+ }
+
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
+
+ # Note: extra space appended to comment simplifies pattern matching
+ if (
+ $is_block_comment
+
+ # optional fast pre-check
+ && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
+ || $rOpts_format_skipping_end )
+
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_end/
+ )
+ {
+ $In_format_skipping_section = 0;
+ write_logfile_entry(
+ "Line $input_line_no: Exiting format-skipping section\n");
+ }
+ $CODE_type = 'FS';
+ goto NEXT;
}
- # look for hanging side comment
+ # Check for a continued quote..
+ if ( $line_of_tokens->{_starting_in_quote} ) {
+
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( ( $input_line =~ "\t" ) ) {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->note_embedded_tab($input_line_number);
+ }
+ $CODE_type = 'VB';
+ goto NEXT;
+ }
+ }
+
+ # See if we are entering a formatting skip section
if (
- $is_block_comment
- && $Last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
- && !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing
- # hanging side comments
- # like this
+ $is_block_comment
+
+ # optional fast pre-check
+ && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
+ || $rOpts_format_skipping_begin )
+
+ && $rOpts_format_skipping
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_begin/
)
{
- $has_side_comment = 1;
- $CODE_type = 'HSC';
- goto RETURN;
+ $In_format_skipping_section = 1;
+ write_logfile_entry(
+ "Line $input_line_no: Entering format-skipping section\n");
+ $CODE_type = 'FS';
+ goto NEXT;
+ }
+
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
+ }
+
+ # blank line..
+ if ( $jmax < 0 ) {
+ $CODE_type = 'BL';
+ goto NEXT;
}
- # Handle a block (full-line) comment..
+ # Handle comments
if ($is_block_comment) {
- if ($is_static_block_comment_without_leading_space) {
- $CODE_type = 'SBCX';
- goto RETURN;
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment = 0;
+ my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
+ if (
+
+ # optional fast pre-check
+ (
+ substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
+ || $rOpts_static_block_comment_prefix
+ )
+
+ && $rOpts_static_block_comments
+ && $input_line =~ /$static_block_comment_pattern/
+ )
+ {
+ $is_static_block_comment = 1;
+ }
+
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $no_leading_space
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ }
+
+ # look for hanging side comment ...
+ if (
+ $Last_line_had_side_comment # last line had side comment
+ && !$no_leading_space # there is some leading space
+ && !
+ $is_static_block_comment # do not make static comment hanging
+ )
+ {
+
+ # continuing an existing HSC chain?
+ if ( $last_CODE_type eq 'HSC' ) {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ goto NEXT;
+ }
+
+ # starting a new HSC chain?
+ elsif (
+
+ $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
+
+ && ( defined($Klast_prev) && $Klast_prev > 1 )
+
+ # and the previous side comment was not static (issue c070)
+ && !(
+ $rOpts->{'static-side-comments'}
+ && $rLL->[$Klast_prev]->[_TOKEN_] =~
+ /$static_side_comment_pattern/
+ )
+
+ )
+ {
+
+ # and it is not a closing side comment (issue c070).
+ my $K_penult = $Klast_prev - 1;
+ $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
+ my $follows_csc =
+ ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
+ && $rLL->[$K_penult]->[_TYPE_] eq '}'
+ && $rLL->[$Klast_prev]->[_TOKEN_] =~
+ /$closing_side_comment_prefix_pattern/ );
+
+ if ( !$follows_csc ) {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ goto NEXT;
+ }
+ }
}
- elsif ($is_static_block_comment) {
- $CODE_type = 'SBC';
- goto RETURN;
+
+ if ($is_static_block_comment) {
+ $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
+ goto NEXT;
}
elsif ($Last_line_had_side_comment
&& !$rOpts_maximum_consecutive_blank_lines
# cannot be inserted. There is related code in sub
# 'process_line_of_CODE'
$CODE_type = 'SBCX';
- goto RETURN;
+ goto NEXT;
}
else {
$CODE_type = 'BC';
- goto RETURN;
+ goto NEXT;
}
}
if ($rOpts_indent_only) {
$CODE_type = 'IO';
- goto RETURN;
+ goto NEXT;
}
if ( !$rOpts_add_newlines ) {
$CODE_type = 'NIN';
- goto RETURN;
+ goto NEXT;
}
# Patch needed for MakeMaker. Do not break a statement
# This code type has lower priority than others
$CODE_type = 'VER';
- goto RETURN;
+ goto NEXT;
}
- RETURN:
- $Last_line_had_side_comment = $has_side_comment;
- return $CODE_type;
+ NEXT:
+ $line_of_tokens->{_code_type} = $CODE_type;
}
-} ## end closure write_line
-
-#############################################
-# CODE SECTION 5: Pre-process the entire file
-#############################################
-
-sub finish_formatting {
-
- my ( $self, $severe_error ) = @_;
+ if ($has_side_comment) {
+ push @ix_side_comments, $ix_line;
+ }
- # The file has been tokenized and is ready to be formatted.
- # All of the relevant data is stored in $self, ready to go.
+ return
+ if ( !$rOpts_delete_side_comments
+ && !$rOpts_delete_closing_side_comments );
- # Check the maximum level. If it is extremely large we will
- # give up and output the file verbatim.
- my $maximum_level = $self->[_maximum_level_];
- my $maximum_table_index = $#maximum_line_length_at_level;
- if ( !$severe_error && $maximum_level > $maximum_table_index ) {
- $severe_error ||= 1;
- Warn(<<EOM);
-The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
-Something may be wrong; formatting will be skipped.
-EOM
- }
+ #-------------------------------------
+ # TASK 2: Loop to delete side comments
+ #-------------------------------------
- # output file verbatim if severe error or no formatting requested
- if ( $severe_error || $rOpts->{notidy} ) {
- $self->dump_verbatim();
- $self->wrapup();
- return;
- }
+ # Handle any requested side comment deletions. It is easier to get
+ # this done here rather than farther down the pipeline because IO
+ # lines take a different route, and because lines with deleted HSC
+ # become BL lines. We have already handled any tee requests in sub
+ # getline, so it is safe to delete side comments now.
- # Update the 'save_logfile' flag based to include any tokenization errors.
- # We can save time by skipping logfile calls if it is not going to be saved.
- my $logger_object = $self->[_logger_object_];
- if ($logger_object) {
- $self->[_save_logfile_] = $logger_object->get_save_logfile();
- }
+ # Also, we can get this done efficiently here.
- # Make a pass through all tokens, adding or deleting any whitespace as
- # required. Also make any other changes, such as adding semicolons.
- # All token changes must be made here so that the token data structure
- # remains fixed for the rest of this iteration.
- $self->respace_tokens();
+ foreach my $ix (@ix_side_comments) {
+ my $line_of_tokens = $rlines->[$ix];
+ my $line_type = $line_of_tokens->{_line_type};
- $self->find_multiline_qw();
+ # This fault shouldn't happen because we only saved CODE lines with
+ # side comments in the TASK 1 loop above.
+ if ( $line_type ne 'CODE' ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Hit unexpected line_type = '$line_type' while deleting side comments, should be 'CODE'
+EOM
+ }
+ next;
+ }
- $self->keep_old_line_breaks();
+ my $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $delete_side_comment =
+ $rOpts_delete_side_comments
+ && defined($Kfirst)
+ && $rLL->[$Klast]->[_TYPE_] eq '#'
+ && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
+ && (!$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' );
- # Implement any welding needed for the -wn or -cb options
- $self->weld_containers();
+ if (
+ $rOpts_delete_closing_side_comments
+ && !$delete_side_comment
+ && defined($Kfirst)
+ && $Klast > $Kfirst
+ && $rLL->[$Klast]->[_TYPE_] eq '#'
+ && ( !$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' )
+ )
+ {
+ my $token = $rLL->[$Klast]->[_TOKEN_];
+ my $K_m = $Klast - 1;
+ my $type_m = $rLL->[$K_m]->[_TYPE_];
+ if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
+ my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
+ if ($seqno_m) {
+ my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ if ( $block_type_m
+ && $token =~ /$closing_side_comment_prefix_pattern/
+ && $block_type_m =~ /$closing_side_comment_list_pattern/ )
+ {
+ $delete_side_comment = 1;
+ }
+ }
+ } ## end if ( $rOpts_delete_closing_side_comments...)
- # Locate small nested blocks which should not be broken
- $self->mark_short_nested_blocks();
+ if ($delete_side_comment) {
- $self->adjust_indentation_levels();
+ # We are actually just changing the side comment to a blank.
+ # This may produce multiple blanks in a row, but sub respace_tokens
+ # will check for this and fix it.
+ $rLL->[$Klast]->[_TYPE_] = 'b';
+ $rLL->[$Klast]->[_TOKEN_] = ' ';
- $self->set_excluded_lp_containers();
+ # The -io option outputs the line text, so we have to update
+ # the line text so that the comment does not reappear.
+ if ( $CODE_type eq 'IO' ) {
+ my $line = "";
+ foreach my $KK ( $Kfirst .. $Klast - 1 ) {
+ $line .= $rLL->[$KK]->[_TOKEN_];
+ }
+ $line =~ s/\s+$//;
+ $line_of_tokens->{_line_text} = $line . "\n";
+ }
- # Finishes formatting and write the result to the line sink.
- # Eventually this call should just change the 'rlines' data according to the
- # new line breaks and then return so that we can do an internal iteration
- # before continuing with the next stages of formatting.
- $self->process_all_lines();
+ # If we delete a hanging side comment the line becomes blank.
+ if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
+ }
+ }
- # A final routine to tie up any loose ends
- $self->wrapup();
return;
}
my %wU;
my %wiq;
+my %is_wit;
+my %is_sigil;
my %is_nonlist_keyword;
my %is_nonlist_type;
+my %is_special_check_type;
+my %is_s_y_m_slash;
+my %is_unexpected_equals;
BEGIN {
@q = qw(w i q Q G C Z);
@{wiq}{@q} = (1) x scalar(@q);
+ @q = qw(w i t);
+ @{is_wit}{@q} = (1) x scalar(@q);
+
+ @q = qw($ & % * @);
+ @{is_sigil}{@q} = (1) x scalar(@q);
+
# Parens following these keywords will not be marked as lists. Note that
# 'for' is not included and is handled separately, by including 'f' in the
# hash %is_counted_type, since it may or may not be a c-style for loop.
@q = qw( && || );
@is_nonlist_type{@q} = (1) x scalar(@q);
+ @q = qw( s y m / );
+ @is_s_y_m_slash{@q} = (1) x scalar(@q);
+
+ @q = qw( = == != );
+ @is_unexpected_equals{@q} = (1) x scalar(@q);
+
}
sub respace_tokens {
my $depth_next = 0;
my $depth_next_max = 0;
- my $K_closing_container = $self->[_K_closing_container_];
+ # Note that $K_opening_container and $K_closing_container have values
+ # defined in sub get_line() for the previous K indexes. They were needed
+ # in case option 'indent-only' was set, and we didn't get here. We no longer
+ # need those and will eliminate them now to avoid any possible mixing of
+ # old and new values.
+ my $K_opening_container = $self->[_K_opening_container_] = {};
+ my $K_closing_container = $self->[_K_closing_container_] = {};
+
my $K_closing_ternary = $self->[_K_closing_ternary_];
- my $K_opening_container = $self->[_K_opening_container_];
my $K_opening_ternary = $self->[_K_opening_ternary_];
my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
my $roverride_cab3 = $self->[_roverride_cab3_];
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $last_nonblank_type = ';';
- my $last_nonblank_token = ';';
- my $last_nonblank_block_type = '';
- my $nonblank_token_count = 0;
- my $last_nonblank_token_lx = 0;
+ my $last_nonblank_code_type = ';';
+ my $last_nonblank_code_token = ';';
+ my $last_nonblank_block_type = '';
+ my $last_last_nonblank_code_type = ';';
+ my $last_last_nonblank_code_token = ';';
my %K_first_here_doc_by_seqno;
# This will be the index of this item in the new array
my $KK_new = @{$rLL_new};
- my $type = $item->[_TYPE_];
- my $is_blank = $type eq 'b';
+ my $type = $item->[_TYPE_];
+ my $is_blank = $type eq 'b';
+ my $block_type = "";
- # Do not output consecutive blanks. This should not happen, but
- # is worth checking because later routines make this assumption.
+ # Do not output consecutive blanks. This situation should have been
+ # prevented earlier, but it is worth checking because later routines
+ # make this assumption.
if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
return;
}
# check for a sequenced item (i.e., container or ?/:)
my $type_sequence = $item->[_TYPE_SEQUENCE_];
+ my $token = $item->[_TOKEN_];
if ($type_sequence) {
- my $token = $item->[_TOKEN_];
if ( $is_opening_token{$token} ) {
$K_opening_container->{$type_sequence} = $KK_new;
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
# Fix for case b1100: Count a line ending in ', [' as having
# a line-ending comma. Otherwise, these commas can be hidden
# with something like --opening-square-bracket-right
- if ( $last_nonblank_type eq ','
+ if ( $last_nonblank_code_type eq ','
&& $Ktoken_vars == $Klast_old_code
&& $Ktoken_vars > $Kfirst_old )
{
$rlec_count_by_seqno->{$type_sequence}++;
}
- if ( $last_nonblank_type eq '='
- || $last_nonblank_type eq '=>' )
+ if ( $last_nonblank_code_type eq '='
+ || $last_nonblank_code_type eq '=>' )
{
$ris_assigned_structure->{$type_sequence} =
- $last_nonblank_type;
+ $last_nonblank_code_type;
}
my $seqno_parent = $seqno_stack{ $depth_next - 1 };
elsif ( $is_closing_token{$token} ) {
$K_closing_container->{$type_sequence} = $KK_new;
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
# Do not include terminal commas in counts
- if ( $last_nonblank_type eq ','
- || $last_nonblank_type eq '=>' )
+ if ( $last_nonblank_code_type eq ','
+ || $last_nonblank_code_type eq '=>' )
{
my $seqno = $seqno_stack{ $depth_next - 1 };
if ($seqno) {
- $rtype_count_by_seqno->{$seqno}->{$last_nonblank_type}
- --;
+ $rtype_count_by_seqno->{$seqno}
+ ->{$last_nonblank_code_type}--;
if ( $Ktoken_vars == $Kfirst_old
- && $last_nonblank_type eq ','
+ && $last_nonblank_code_type eq ','
&& $rlec_count_by_seqno->{$seqno} )
{
$rlec_count_by_seqno->{$seqno}--;
# of those was checked above. So we would only get here
# if the tokenizer has been changed to mark some other
# tokens with sequence numbers.
- my $type = $item->[_TYPE_];
- Fault(
+ if (DEVEL_MODE) {
+ my $type = $item->[_TYPE_];
+ Fault(
"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
- );
+ );
+ }
}
}
}
# or ignoring side comment lengths.
my $token_length =
$is_encoded_data
- ? $length_function->( $item->[_TOKEN_] )
- : length( $item->[_TOKEN_] );
+ ? $length_function->($token)
+ : length($token);
# handle comments
my $is_comment = $type eq '#';
if ($is_comment) {
# trim comments if necessary
- if ( $item->[_TOKEN_] =~ s/\s+$// ) {
- $token_length = $length_function->( $item->[_TOKEN_] );
+ my $ord = ord( substr( $token, -1, 1 ) );
+ if (
+ $ord > 0
+ && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ && $token =~ s/\s+$//
+ )
+ {
+ $token_length = $length_function->($token);
+ $item->[_TOKEN_] = $token;
}
# Mark length of side comments as just 1 if sc lengths are ignored
{
$set_permanently_broken->($seqno);
}
-
}
$item->[_TOKEN_LENGTH_] = $token_length;
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
if ( !$is_blank && !$is_comment ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $item->[_TOKEN_];
- $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
- $last_nonblank_token_lx = $item->[_LINE_INDEX_];
- $nonblank_token_count++;
+
+ # Remember the most recent two non-blank, non-comment tokens.
+ # NOTE: the phantom semicolon code may change the output stack
+ # without updating these values. Phantom semicolons are considered
+ # the same as blanks for now, but future needs might change that.
+ # See the related note in sub '$add_phantom_semicolon'.
+ $last_last_nonblank_code_type = $last_nonblank_code_type;
+ $last_last_nonblank_code_token = $last_nonblank_code_token;
+
+ $last_nonblank_code_type = $type;
+ $last_nonblank_code_token = $token;
+ $last_nonblank_block_type = $block_type;
# count selected types
if ( $is_counted_type{$type} ) {
# and finally, add this item to the new array
push @{$rLL_new}, $item;
+ return;
};
my $store_token_and_space = sub {
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
- my $rcopy = copy_token_as_type( $item, 'b', ' ' );
+ my $rcopy = [ @{$item} ];
+ $rcopy->[_TYPE_] = 'b';
+ $rcopy->[_TOKEN_] = ' ';
+ $rcopy->[_TYPE_SEQUENCE_] = '';
+
$rcopy->[_LINE_INDEX_] =
$rLL_new->[-1]->[_LINE_INDEX_];
# Patch 23-Jan-2021 to fix -lp blinkers:
# The level and ci_level of newly created spaces should be the same
- # as the previous token. Otherwise the coding for the -lp option,
- # in sub set_leading_whitespace, can create a blinking state in
- # some rare cases.
+ # as the previous token. Otherwise the coding for the -lp option
+ # can create a blinking state in some rare cases.
$rcopy->[_LEVEL_] =
$rLL_new->[-1]->[_LEVEL_];
$rcopy->[_CI_LEVEL_] =
# then the token
$store_token->($item);
- };
-
- my $K_end_q = sub {
- my ($KK) = @_;
- my $K_end = $KK;
-
- my $Kn = $KK + 1;
- if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
-
- while ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
- $K_end = $Kn;
-
- $Kn += 1;
- if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
- }
-
- return $K_end;
+ return;
};
my $add_phantom_semicolon = sub {
return unless ( defined($Kp) );
# we are only adding semicolons for certain block types
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ return unless ($type_sequence);
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ return unless ($block_type);
return
unless ( $ok_to_add_semicolon_for_block_type{$block_type}
|| $block_type =~ /^(sub|package)/
|| $block_type =~ /^\w+\:$/ );
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
-
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
# Do not add a semicolon if...
return
if (
# it would follow a comment (and be isolated)
- $previous_nonblank_type eq '#'
+ $type_p eq '#'
# it follows a code block ( because they are not always wanted
# there and may add clutter)
- || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
+ || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
# it would follow a label
- || $previous_nonblank_type eq 'J'
+ || $type_p eq 'J'
# it would be inside a 'format' statement (and cause syntax error)
- || ( $previous_nonblank_type eq 'k'
- && $previous_nonblank_token =~ /format/ )
+ || ( $type_p eq 'k'
+ && $token_p =~ /format/ )
);
# If it is also a CLOSING token we have to look closer...
if (
$seqno_inner
- && $is_closing_token{$previous_nonblank_token}
+ && $is_closing_token{$token_p}
# we only need to look if there is just one inner container..
&& defined( $rchildren_of_seqno->{$type_sequence} )
$rLL_new->[$Ktop]->[_TOKEN_] = $tok;
$rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
$rLL_new->[$Ktop]->[_TYPE_] = ';';
- $rLL_new->[$Ktop]->[_SLEVEL_] =
- $rLL->[$KK]->[_SLEVEL_];
+
+ # NOTE: we are changing the output stack without updating variables
+ # $last_nonblank_code_type, etc. Future needs might require that
+ # those variables be updated here. For now, it seems ok to skip
+ # this.
# Save list of new K indexes of phantom semicolons.
# This will be needed if we want to undo them for iterations in
}
else {
- # insert a new token
+ # Patch for issue c078: keep line indexes in order. If the top
+ # token is a space that we are keeping (due to '-wls=';') then
+ # we have to check that old line indexes stay in order.
+ # In very rare
+ # instances in which side comments have been deleted and converted
+ # into blanks, we may have filtered down multiple blanks into just
+ # one. In that case the top blank may have a higher line number
+ # than the previous nonblank token. Although the line indexes of
+ # blanks are not really significant, we need to keep them in order
+ # in order to pass error checks.
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
+ my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
+ my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
+ if ( $new_top_ix < $old_top_ix ) {
+ $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
+ }
+ }
+
my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
- $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
$store_token->($rcopy);
push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
}
+ return;
};
my $check_Q = sub {
my $token = $rLL->[$KK]->[_TOKEN_];
$self->note_embedded_tab($line_number) if ( $token =~ "\t" );
+ # The remainder of this routine looks for something like
+ # '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
+
+ # Start by looking for a token begining with one of: s y m / tr
+ return
+ unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
+ || substr( $token, 0, 2 ) eq 'tr' );
+
+ # ... and preceded by one of: = == !=
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ return unless ( $is_unexpected_equals{$previous_nonblank_type} );
my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
my $previous_nonblank_type_2 = 'b';
my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
if (
- $token =~ /^(s|tr|y|m|\/)/
- && $previous_nonblank_token =~ /^(=|==|!=)$/
+ ##$token =~ /^(s|tr|y|m|\/)/
+ ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
+ 1
# preceded by simple scalar
&& $previous_nonblank_type_2 eq 'i'
&& !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
)
{
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
+ my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
+ my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
complain(
-"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
);
}
+ return;
};
- ############################################
+ #-------------------------------------------
# Main loop to respace all lines of the file
- ############################################
+ #-------------------------------------------
my $last_K_out;
- # Testing option to break qw. Do not use; it can make a mess.
- my $ALLOW_BREAK_MULTILINE_QW = 0;
- my $in_multiline_qw;
foreach my $line_of_tokens ( @{$rlines} ) {
my $input_line_number = $line_of_tokens->{_line_number};
# package the tokenized lines as it received them. If we
# get a fault here it has not output a continuous sequence
# of K values. Or a line of CODE may have been mismarked as
- # something else.
+ # something else. There is no good way to continue after such an
+ # error.
+ # FIXME: Calling Fault will produce zero output; it would be best to
+ # find a way to dump the input file.
if ( defined($last_K_out) ) {
if ( $Kfirst != $last_K_out + 1 ) {
Fault(
}
else {
- # This line was mis-marked by sub scan_comment
+ # This line was mis-marked by sub scan_comment. Catch in
+ # DEVEL_MODE, otherwise try to repair and keep going.
Fault(
"Program bug. A hanging side comment has been mismarked"
- );
+ ) if (DEVEL_MODE);
+
+ $CODE_type = "";
+ $line_of_tokens->{_code_type} = $CODE_type;
}
}
# if last line was normal CODE.
# Patch for rt #125012: use K_previous_code rather than '_nonblank'
# because comments may disappear.
- my $type_next = $rLL->[$Kfirst]->[_TYPE_];
- my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
- my $Kp = $self->K_previous_code( undef, $rLL_new );
- if ( $last_line_type eq 'CODE'
- && $type_next ne 'b'
- && defined($Kp) )
- {
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
-
- my ( $token_pp, $type_pp );
- my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
-
+ if ( $last_line_type eq 'CODE' ) {
+ my $type_next = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
if (
-
is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
+ $last_last_nonblank_code_token,
+ $last_last_nonblank_code_type,
+ $last_nonblank_code_token,
+ $last_nonblank_code_type,
+ $token_next,
+ $type_next,
)
)
{
}
}
- ########################################################
+ #-------------------------------------------------------
# Loop to copy all tokens on this line, with any changes
- ########################################################
+ #-------------------------------------------------------
my $type_sequence;
for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
$Ktoken_vars = $KK;
|| $rOpts_delete_old_whitespace )
{
- my $Kp = $self->K_previous_nonblank($KK);
- next unless defined($Kp);
- my $token_p = $rLL->[$Kp]->[_TOKEN_];
- my $type_p = $rLL->[$Kp]->[_TYPE_];
-
- my ( $token_pp, $type_pp );
-
- my $Kpp = $self->K_previous_nonblank($Kp);
- if ( defined($Kpp) ) {
- $token_pp = $rLL->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
my $token_next = $rLL->[$Knext]->[_TOKEN_];
my $type_next = $rLL->[$Knext]->[_TYPE_];
my $do_not_delete = is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
+ $last_last_nonblank_code_token,
+ $last_last_nonblank_code_type,
+ $last_nonblank_code_token,
+ $last_nonblank_code_type,
+ $token_next,
+ $type_next,
);
+ # Note that repeated blanks will get filtered out here
next unless ($do_not_delete);
}
if ($type_sequence) {
- if ( $is_closing_token{$token} ) {
-
- # Insert a tentative missing semicolon if the next token is
- # a closing block brace
- if (
- $type eq '}'
- && $token eq '}'
+ # Insert a tentative missing semicolon if the next token is
+ # a closing block brace
+ if (
+ $type eq '}'
+ && $token eq '}'
- # not preceded by a ';'
- && $last_nonblank_type ne ';'
+ # not preceded by a ';'
+ && $last_nonblank_code_type ne ';'
- # and this is not a VERSION stmt (is all one line, we
- # are not inserting semicolons on one-line blocks)
- && $CODE_type ne 'VER'
+ # and this is not a VERSION stmt (is all one line, we
+ # are not inserting semicolons on one-line blocks)
+ && $CODE_type ne 'VER'
- # and we are allowed to add semicolons
- && $rOpts->{'add-semicolons'}
- )
- {
- $add_phantom_semicolon->($KK);
- }
+ # and we are allowed to add semicolons
+ && $rOpts->{'add-semicolons'}
+ )
+ {
+ $add_phantom_semicolon->($KK);
}
}
# Modify certain tokens here for whitespace
# The following is not yet done, but could be:
# sub (x x x)
- elsif ( $type =~ /^[wit]$/ ) {
+ # ( $type =~ /^[wit]$/ )
+ elsif ( $is_wit{$type} ) {
- # Examples: <<snippets/space1.in>>
# change '$ var' to '$var' etc
# change '@ ' to '@'
- my ( $sigil, $word ) = split /\s+/, $token, 2;
- if ( length($sigil) == 1
- && $sigil =~ /^[\$\&\%\*\@]$/ )
+ # Examples: <<snippets/space1.in>>
+ my $ord = ord( substr( $token, 1, 1 ) );
+ if (
+
+ # quick test for possible blank at second char
+ $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ )
{
- $token = $sigil;
- $token .= $word if ($word);
- $rtoken_vars->[_TOKEN_] = $token;
+ my ( $sigil, $word ) = split /\s+/, $token, 2;
+
+ # $sigil =~ /^[\$\&\%\*\@]$/ )
+ if ( $is_sigil{$sigil} ) {
+ $token = $sigil;
+ $token .= $word if ( defined($word) ); # fix c104
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
}
- # Split identifiers with leading arrows, inserting blanks if
- # necessary. It is easier and safer here than in the
- # tokenizer. For example '->new' becomes two tokens, '->' and
- # 'new' with a possible blank between.
+ # Split identifiers with leading arrows, inserting blanks
+ # if necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->'
+ # and 'new' with a possible blank between.
#
# Note: there is a related patch in sub set_whitespace_flags
- if ( substr( $token, 0, 1 ) eq '-'
+ elsif (length($token) > 2
+ && substr( $token, 0, 2 ) eq '->'
&& $token =~ /^\-\>(.*)$/
&& $1 )
{
next;
}
- if ( $token =~ /$ANYSUB_PATTERN/ ) {
+ # Trim certain spaces in identifiers
+ if ( $type eq 'i' ) {
+
+ if (
+ (
+ substr( $token, 0, 3 ) eq 'sub'
+ || $rOpts_sub_alias_list
+ )
+ && $token =~ /$SUB_PATTERN/
+ )
+ {
+
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ my $spp = $rOpts->{'space-prototype-paren'};
+ if ( defined($spp) ) {
+ if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
+ elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ }
- # -spp = 0 : no space before opening prototype paren
- # -spp = 1 : stable (follow input spacing)
- # -spp = 2 : always space before opening prototype paren
- my $spp = $rOpts->{'space-prototype-paren'};
- if ( defined($spp) ) {
- if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
- elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ # one space max, and no tabs
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
}
- # one space max, and no tabs
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # clean up spaces in package identifiers, like
+ # "package Bob::Dog;"
+ elsif ( substr( $token, 0, 7 ) eq 'package'
+ && $token =~ /^package\s/ )
+ {
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # clean up spaces in package identifiers, like
- # "package Bob::Dog;"
- if ( $token =~ /^package\s/ ) {
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ my $ord = ord( substr( $token, -1, 1 ) );
+ if (
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- if ( $type eq 'i' ) {
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
+ # quick check for possible ending space
+ $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ )
+ {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
}
}
$rOpts->{'delete-semicolons'}
&& (
(
- $last_nonblank_type eq '}'
+ $last_nonblank_block_type
+ && $last_nonblank_code_type eq '}'
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
|| $last_nonblank_block_type =~ /^\w+:$/
)
)
- || $last_nonblank_type eq ';'
+ || $last_nonblank_code_type eq ';'
)
)
{
# This looks like a deletable semicolon, but even if a
- # semicolon can be deleted it is necessarily best to do so.
- # We apply these additional rules for deletion:
+ # semicolon can be deleted it is not necessarily best to do
+ # so. We apply these additional rules for deletion:
# - Always ok to delete a ';' at the end of a line
# - Never delete a ';' before a '#' because it would
# promote it to a block comment.
# do not delete only nonblank token in a file
else {
+ my $Kp = $self->K_previous_code( undef, $rLL_new );
my $Kn = $self->K_next_nonblank($KK);
- $ok_to_delete = defined($Kn) || $nonblank_token_count;
+ $ok_to_delete = defined($Kn) || defined($Kp);
}
if ($ok_to_delete) {
}
}
- # patch to add space to something like "x10"
- # This avoids having to split this token in the pre-tokenizer
+ # Old patch to add space to something like "x10".
+ # Note: This is now done in the Tokenizer, but this code remains
+ # for reference.
elsif ( $type eq 'n' ) {
- if ( $token =~ /^x\d+/ ) {
+ if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
$token =~ s/x/x /;
$rtoken_vars->[_TOKEN_] = $token;
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
+EOM
+ }
}
}
$rtoken_vars->[_TOKEN_] = $token;
$self->note_embedded_tab($input_line_number)
if ( $token =~ "\t" );
-
- if ($in_multiline_qw) {
-
- # If we are at the end of a multiline qw ..
- if ( $in_multiline_qw == $KK ) {
-
- # Split off the closing delimiter character
- # so that the formatter can put a line break there if necessary
- my $part1 = $token;
- my $part2 = substr( $part1, -1, 1, "" );
-
- if ($part1) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q', $part1 );
- $store_token->($rcopy);
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
-
- }
- $in_multiline_qw = undef;
-
- # store without preceding blank
- $store_token->($rtoken_vars);
- next;
- }
- else {
- # continuing a multiline qw
- $store_token->($rtoken_vars);
- next;
- }
- }
-
- else {
-
- # we are encountered new qw token...see if multiline
- if ($ALLOW_BREAK_MULTILINE_QW) {
- my $K_end = $K_end_q->($KK);
- if ( $K_end != $KK ) {
-
- # Starting multiline qw...
- # set flag equal to the ending K
- $in_multiline_qw = $K_end;
-
- # Split off the leading part so that the formatter can
- # put a line break there if necessary
- if ( $token =~ /^(qw\s*.)(.*)$/ ) {
- my $part1 = $1;
- my $part2 = $2;
- if ($part2) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q',
- $part1 );
- $store_token_and_space->(
- $rcopy,
- $rwhitespace_flags->[$KK] == WS_YES
- );
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
-
- # Second part goes without intermediate blank
- $store_token->($rtoken_vars);
- next;
- }
- }
- }
- }
- else {
-
- # this is a new single token qw -
- # store with possible preceding blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
- next;
- }
- }
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
+ next;
} ## end if ( $type eq 'q' )
# change 'LABEL :' to 'LABEL:'
# type here now that we have had a better look at the contents of the
# container. This fixes case b1085. To find the corresponding code in
# Tokenizer.pm search for 'b1085' with an editor.
- my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
+ my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
# Always remove the trailing space
$rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
}
- $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type;
- $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type;
+ $rblock_type_of_seqno->{$seqno} = $block_type;
}
# Handle a list container
if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
$self->[_Klimit_] = $Klimit;
- # DEBUG OPTION: make sure the new array looks okay.
- # This is no longer needed but should be retained for future development.
+ # During development, verify that the new array still looks okay.
DEVEL_MODE && $self->check_token_array();
# reset the token limits of each line
}
else {
- # This sub assumes it will be called with just two types, 'b' or 'q'
- Fault(
-"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
- );
+ # Unexpected type ... this sub will work as long as both $token and
+ # $type are defined, but we should catch any unexpected types during
+ # development.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
+EOM
+ }
+ else {
+ # shouldn't happen
+ }
}
my @rnew_token = @{$rold_token};
$rnew_token[_TYPE_] = $type;
$rnew_token[_TOKEN_] = $token;
- $rnew_token[_BLOCK_TYPE_] = '';
$rnew_token[_TYPE_SEQUENCE_] = '';
return \@rnew_token;
}
# We seem to have encountered a gap in our array.
# This shouldn't happen because sub write_line() pushed
# items into the $rLL array.
- Fault("Undefined entry for k=$Knnb");
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
}
if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
&& $rLL->[$Knnb]->[_TYPE_] ne '#' )
# $store_token() within sub respace_tokens. Tokens are pushed on
# so there shouldn't be any gaps.
if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
}
if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
$Knnb++;
# avoid this error.
Fault(
"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
+ ) if (DEVEL_MODE);
+ return;
}
my $Kpnb = $KK - 1;
while ( $Kpnb >= 0 ) {
# avoid this error.
Fault(
"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
+ ) if (DEVEL_MODE);
+ return;
}
my $Kpnb = $KK - 1;
return unless ( $Kpnb >= 0 );
return;
}
-sub get_old_line_index {
-
- # return index of the original line that token K was on
- my ( $self, $K ) = @_;
- my $rLL = $self->[_rLL_];
- return 0 unless defined($K);
- return $rLL->[$K]->[_LINE_INDEX_];
-}
-
-sub get_old_line_count {
-
- # return number of input lines separating two tokens
- my ( $self, $Kbeg, $Kend ) = @_;
- my $rLL = $self->[_rLL_];
- return 0 unless defined($Kbeg);
- return 0 unless defined($Kend);
- return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
-}
-
sub parent_seqno_by_K {
# Return the sequence number of the parent container of token K, if any.
my ( $self, $KK ) = @_;
- return unless defined($KK);
-
- # Note: This routine is relatively slow. I tried replacing it with a hash
- # which is easily created in sub respace_tokens. But the total time with a
- # hash was greater because this routine is called once per line whereas a
- # hash must be created token-by-token.
+ my $rLL = $self->[_rLL_];
- my $rLL = $self->[_rLL_];
- my $KNEXT = $KK;
+ # The task is to jump forward to the next container token
+ # and use the sequence number of either it or its parent.
# For example, consider the following with seqno=5 of the '[' and ']'
# being called with index K of the first token of each line:
# unbalanced files, last sequence number will either be undefined or it may
# be at a deeper level. In either case we will just return SEQ_ROOT to
# have a defined value and allow formatting to proceed.
- my $parent_seqno = SEQ_ROOT;
- while ( defined($KNEXT) ) {
- my $Kt = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$Kt];
- my $type = $rtoken_vars->[_TYPE_];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my $parent_seqno = SEQ_ROOT;
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ }
+ else {
+ my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type = $rLL->[$Kt]->[_TYPE_];
- # if next container token is closing, it is the parent seqno
- if ( $is_closing_type{$type} ) {
- if ( $Kt > $KK ) {
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
$parent_seqno = $type_sequence;
}
+
+ # otherwise we want its parent container
else {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
- last;
- }
-
- # if next container token is opening, we want its parent container
- elsif ( $is_opening_type{$type} ) {
- $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
- last;
}
-
- # not a container - must be ternary - keep going
}
-
$parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
return $parent_seqno;
}
# or is at root level
# or there is some kind of error (i.e. unbalanced file)
# returns false otherwise
+ return 1 if ( $i < 0 ); # shouldn't happen, bad call
my $seqno = $parent_seqno_to_go[$i];
return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
- my $Kopening = $self->[_K_opening_container_]->{$seqno};
- return 1 unless defined($Kopening);
- my $rLL = $self->[_rLL_];
- return 1 if $rLL->[$Kopening]->[_BLOCK_TYPE_];
+ return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
return;
}
# since they have probably changed due to inserting and deleting blanks
# and a few other tokens.
- my $Kmax = -1;
-
# This is the next token and its line index:
my $Knext = 0;
- my $inext;
- if ( defined($rLL) && @{$rLL} ) {
- $Kmax = @{$rLL} - 1;
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
+ my $Kmax = defined($Klimit) ? $Klimit : -1;
+
+ # Verify that old line indexes are in still order. If this error occurs,
+ # check locations where sub 'respace_tokens' creates new tokens (like
+ # blank spaces). It must have set a bad old line index.
+ if ( DEVEL_MODE && defined($Klimit) ) {
+ my $iline = $rLL->[0]->[_LINE_INDEX_];
+ for ( my $KK = 1 ; $KK <= $Klimit ; $KK++ ) {
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline < $iline_last ) {
+ my $KK_m = $KK - 1;
+ my $token_m = $rLL->[$KK_m]->[_TOKEN_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $type_m = $rLL->[$KK_m]->[_TYPE_];
+ my $type = $rLL->[$KK]->[_TYPE_];
+ Fault(<<EOM);
+Line indexes out of order at index K=$KK:
+at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
+at KK =$KK: old line=$iline, type='$type', token='$token',
+EOM
+ }
+ }
}
- # Remember the most recently output token index
- my $Klast_out;
-
my $iline = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
my $line_type = $line_of_tokens->{_line_type};
- my $CODE_type = $line_of_tokens->{_code_type};
if ( $line_type eq 'CODE' ) {
- my @K_array;
- my $rK_range;
- if ( $Knext <= $Kmax ) {
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- while ( $inext <= $iline ) {
- push @K_array, $Knext;
- $Knext += 1;
- if ( $Knext > $Kmax ) {
- $inext = undef;
- last;
- }
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
- }
-
- # Delete any terminal blank token
- if (@K_array) {
- if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
- pop @K_array;
- }
+ # Get the old number of tokens on this line
+ my $rK_range_old = $line_of_tokens->{_rK_range};
+ my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
+ my $Kdiff_old = 0;
+ if ( defined($Kfirst_old) ) {
+ $Kdiff_old = $Klast_old - $Kfirst_old;
}
- # Define the range of K indexes for the line:
+ # Find the range of NEW K indexes for the line:
# $Kfirst = index of first token on line
- # $Klast_out = index of last token on line
+ # $Klast = index of last token on line
my ( $Kfirst, $Klast );
- if (@K_array) {
- $Kfirst = $K_array[0];
- $Klast = $K_array[-1];
- $Klast_out = $Klast;
- if ( defined($Kfirst) ) {
+ my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
+
+ # Optimization: Although the actual K indexes may be completely
+ # changed after respacing, the number of tokens on any given line
+ # will often be nearly unchanged. So we will see if we can start
+ # our search by guessing that the new line has the same number
+ # of tokens as the old line.
+ my $Knext_guess = $Knext + $Kdiff_old;
+ if ( $Knext_guess > $Knext
+ && $Knext_guess < $Kmax
+ && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
+ {
+
+ # the guess is good, so we can start our search here
+ $Knext = $Knext_guess + 1;
+ }
+
+ while ($Knext <= $Kmax
+ && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
+ {
+ $Knext++;
+ }
+
+ if ( $Knext > $Knext_beg ) {
+
+ $Klast = $Knext - 1;
+
+ # Delete any terminal blank token
+ if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
+
+ if ( $Klast < $Knext_beg ) {
+ $Klast = undef;
+ }
+ else {
+
+ $Kfirst = $Knext_beg;
# Save ranges of non-comment code. This will be used by
# sub keep_old_line_breaks.
# Only save ending K indexes of code types which are blank
# or 'VER'. These will be used for a convergence check.
- # See related code in sub 'send_lines_to_vertical_aligner'.
+ # See related code in sub 'convey_batch_to_vertical_aligner'
+ my $CODE_type = $line_of_tokens->{_code_type};
if ( !$CODE_type
|| $CODE_type eq 'VER' )
{
# Deleting semicolons can create new empty code lines
# which should be marked as blank
if ( !defined($Kfirst) ) {
- my $code_type = $line_of_tokens->{_code_type};
- if ( !$code_type ) {
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type ) {
$line_of_tokens->{_code_type} = 'BL';
}
}
# There shouldn't be any nodes beyond the last one. This routine is
# relinking lines and tokens after the tokens have been respaced. A fault
# here indicates some kind of bug has been introduced into the above loops.
- if ( defined($inext) ) {
+ # There is not good way to keep going; we better stop here.
+ # FIXME: This will produce zero output. it would be best to find a way to
+ # dump the input file.
+ if ( $Knext <= $Kmax ) {
Fault("unexpected tokens at end of file when reconstructing lines");
}
$ris_essential_old_breakpoint->{$Klast_prev} = 1;
}
}
-
return;
}
my $ris_broken_container = $self->[_ris_broken_container_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
- # This code moved here from sub scan_list to fix b1120
+ # This code moved here from sub break_lists to fix b1120
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
foreach my $item ( @{$rKrange_code_without_comments} ) {
my ( $Kfirst, $Klast ) = @{$item};
my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
next unless ($seqno);
- # Patch to avoid blinkers: but do not do this unless the
- # container holds a list, or the opening and closing parens are
- # separated by more than one line.
- # Fixes case b977.
- next
- if (
- !$ris_list_by_seqno->{$seqno}
- && ( !$ris_broken_container->{$seqno}
- || $ris_broken_container->{$seqno} <= 1 )
- );
+ # Note: in previous versions there was a fix here to avoid
+ # instability between conflicting -bom and -pvt or -pvtc flags.
+ # The fix skipped -bom for a small line difference. But this
+ # was troublesome, and instead the fix has been moved to
+ # sub set_vertical_tightness_flags where priority is given to
+ # the -bom flag over -pvt and -pvtc flags. Both opening and
+ # closing paren flags are involved because even though -bom only
+ # requests breaking before the closing paren, automated logic
+ # opens the opening paren when the closing paren opens.
+ # Relevant cases are b977, b1215, b1270, b1303
+
$rwant_container_open->{$seqno} = 1;
}
}
return unless ( %keep_break_before_type || %keep_break_after_type );
- foreach my $item ( @{$rKrange_code_without_comments} ) {
- my ( $Kfirst, $Klast ) = @{$item};
+ my $check_for_break = sub {
+ my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $type_first = $rLL->[$Kfirst]->[_TYPE_];
- if ( $keep_break_before_type{$type_first} ) {
- $rbreak_before_Kfirst->{$Kfirst} = 1;
+ # non-container tokens use the type as the key
+ if ( !$seqno ) {
+ my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $rkeep_break_hash->{$type} ) {
+ $rbreak_hash->{$KK} = 1;
+ }
}
- my $type_last = $rLL->[$Klast]->[_TYPE_];
- if ( $keep_break_after_type{$type_last} ) {
- $rbreak_after_Klast->{$Klast} = 1;
+ # container tokens use the token as the key
+ else {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $flag = $rkeep_break_hash->{$token};
+ if ($flag) {
+
+ my $match = $flag eq '1' || $flag eq '*';
+
+ # check for special matching codes
+ if ( !$match ) {
+ if ( $token eq '(' || $token eq ')' ) {
+ $match = $self->match_paren_flag( $KK, $flag );
+ }
+ elsif ( $token eq '{' || $token eq '}' ) {
+
+ # These tentative codes 'b' and 'B' for brace types are
+ # placeholders for possible future brace types. They
+ # are not documented and may be changed.
+ my $block_type =
+ $self->[_rblock_type_of_seqno_]->{$seqno};
+ if ( $flag eq 'b' ) { $match = $block_type }
+ elsif ( $flag eq 'B' ) { $match = !$block_type }
+ else {
+ # unknown code - no match
+ }
+ }
+ }
+ $rbreak_hash->{$KK} = 1 if ($match);
+ }
}
+ };
+
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ $check_for_break->(
+ $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
+ );
+ $check_for_break->(
+ $Klast, \%keep_break_after_type, $rbreak_after_Klast
+ );
}
return;
}
$self->weld_nested_quotes();
}
- ##############################################################
+ #-------------------------------------------------------------
# All welding is done. Finish setting up weld data structures.
- ##############################################################
+ #-------------------------------------------------------------
my $rLL = $self->[_rLL_];
my $rK_weld_left = $self->[_rK_weld_left_];
my @keys = keys %{$rK_weld_right};
$total_weld_count = @keys;
- # Note that this loop is processed in unsorted order for efficiency
+ # First pass to process binary welds.
+ # This loop is processed in unsorted order for efficiency.
foreach my $Kstart (@keys) {
my $Kend = $rK_weld_right->{$Kstart};
# An error here would be due to an incorrect initialization introduced
# in one of the above weld routines, like sub weld_nested.
if ( $Kend <= $Kstart ) {
- Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n");
+ Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
+ if (DEVEL_MODE);
+ next;
}
- $rweld_len_right_at_K->{$Kstart} =
- $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$Kstart]->[_CUMULATIVE_LENGTH_];
-
- $rK_weld_left->{$Kend} = $Kstart; # fix in case of missing left link
+ # Set weld values for all tokens this welded pair
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
# Remember the leftmost index of welds which continue to the right
if ( defined( $rK_weld_right->{$Kend} )
}
}
- # Update the end index and lengths of any long welds to extend to the far
- # end. This has to be processed in sorted order.
- # Left links added for b1173.
- my $Kend = -1;
- foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
+ # Second pass to process chains of welds (these are rare).
+ # This has to be processed in sorted order.
+ if (@K_multi_weld) {
+ my $Kend = -1;
+ foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
- # skip any interior K which was originally missing a left link
- next if ( $Kstart <= $Kend );
+ # Skip any interior K which was originally missing a left link
+ next if ( $Kstart <= $Kend );
- my @Klist;
- push @Klist, $Kstart;
- $Kend = $rK_weld_right->{$Kstart};
- $rK_weld_left->{$Kend} = $Kstart;
- my $Knext = $rK_weld_right->{$Kend};
- while ( defined($Knext) ) {
- push @Klist, $Kend;
- $Kend = $Knext;
- $rK_weld_left->{$Kend} = $Kstart;
- $Knext = $rK_weld_right->{$Kend};
- }
- pop @Klist; # values for last entry are already correct
- foreach my $KK (@Klist) {
-
- # Ending indexes must only be shifted to the right for long welds.
- # An error here would be due to a programming error introduced in
- # the code immediately above.
- my $Kend_old = $rK_weld_right->{$KK};
- if ( !defined($Kend_old) || $Kend < $Kend_old ) {
- Fault(
-"Bad weld link at K=$KK, old end is K=$Kend_old, new end is $Kend\n"
- );
+ # Find the end of this chain
+ $Kend = $rK_weld_right->{$Kstart};
+ my $Knext = $rK_weld_right->{$Kend};
+ while ( defined($Knext) ) {
+ $Kend = $Knext;
+ $Knext = $rK_weld_right->{$Kend};
}
- $rK_weld_right->{$KK} = $Kend;
- $rweld_len_right_at_K->{$KK} =
- $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
- $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ # Set weld values this chain
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
}
}
# Called once per file to handle cuddled formatting
- my $rK_weld_left = $self->[_rK_weld_left_];
- my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# This routine implements the -cb flag by finding the appropriate
# closing and opening block braces and welding them together.
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
- Fault("sequence = $type_sequence not defined at K=$KK");
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
}
# NOTE: we must use the original levels here. They can get changed
if ( $token eq '{' ) {
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
if ( !$block_type ) {
# patch for unrecognized block types which may not be labeled
}
next unless $Kp;
$block_type = $rLL->[$Kp]->[_TOKEN_];
-
}
if ( $in_chain{$level} ) {
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# We define an array of pairs of nested containers
my @nested_pairs;
my $K_inner_opening = $K_opening_container->{$inner_seqno};
next unless defined($K_outer_opening) && defined($K_inner_opening);
+ my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
+ my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
+
# Verify that the inner opening token is the next container after the
# outer opening token.
my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
# | |
# ic oc
- next if $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] ne 'sub';
+ next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
next unless defined($seqno_signature);
# @array) )
my $Kn_first = $K_outer_opening;
my $Kn_last_nonblank;
+ my $saw_comment;
for (
my $Kn = $K_outer_opening + 1 ;
$Kn <= $K_inner_opening ;
# skip chain of identifier tokens
my $last_type = $type;
my $last_is_name = $is_name;
- $type = $rLL->[$Kn]->[_TYPE_];
+ $type = $rLL->[$Kn]->[_TYPE_];
+ if ( $type eq '#' ) { $saw_comment = 1; last }
$is_name = $is_name_type->{$type};
next if ( $is_name && $last_is_name );
last if ( $nonblank_count > 2 );
}
+ # Do not weld across a comment .. fix for c058.
+ next if ($saw_comment);
+
# Patch for b1104: do not weld to a paren preceded by sort/map/grep
# because the special line break rules may cause a blinking state
if ( defined($Kn_last_nonblank)
# this
# sub make_anon_with_my_sub { sub {
# because it probably hides the structure a little too much.
- || ( $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub'
+ || ( $inner_blocktype
+ && $inner_blocktype eq 'sub'
&& $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
- && !$rLL->[$K_outer_opening]->[_BLOCK_TYPE_] )
+ && !$outer_blocktype )
)
{
push @nested_pairs,
return \@nested_pairs;
}
-sub is_excluded_weld {
+sub match_paren_flag {
+
+ # Decide if this paren is excluded by user request:
+ # undef matches no parens
+ # '*' matches all parens
+ # 'k' matches only if the previous nonblank token is a perl builtin
+ # keyword (such as 'if', 'while'),
+ # 'K' matches if 'k' does not, meaning if the previous token is not a
+ # keyword.
+ # 'f' matches if the previous token is a function other than a keyword.
+ # 'F' matches if 'f' does not.
+ # 'w' matches if either 'k' or 'f' match.
+ # 'W' matches if 'w' does not.
+ my ( $self, $KK, $flag ) = @_;
- # decide if this weld is excluded by user request
- my ( $self, $KK, $is_leading ) = @_;
- my $rLL = $self->[_rLL_];
- my $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $rflags = $weld_nested_exclusion_rules{$token};
- return 0 unless ( defined($rflags) );
- my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
return 0 unless ( defined($flag) );
+ return 0 if $flag eq '0';
+ return 1 if $flag eq '1';
return 1 if $flag eq '*';
+ return 0 unless ( defined($KK) );
+
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ return 0 unless ($seqno);
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $K_opening = $KK;
+ if ( !$is_opening_token{$token} ) {
+ $K_opening = $self->[_K_opening_container_]->{$seqno};
+ }
+ return unless ( defined($K_opening) );
my ( $is_f, $is_k, $is_w );
- my $Kp = $self->K_previous_nonblank($KK);
+ my $Kp = $self->K_previous_nonblank($K_opening);
if ( defined($Kp) ) {
- my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
my $type_p = $rLL->[$Kp]->[_TYPE_];
# keyword?
# either keyword or function call?
$is_w = $is_k || $is_f;
}
-
my $match;
if ( $flag eq 'k' ) { $match = $is_k }
elsif ( $flag eq 'K' ) { $match = !$is_k }
return $match;
}
-# types needed for welding RULE 6
+sub is_excluded_weld {
+
+ # decide if this weld is excluded by user request
+ my ( $self, $KK, $is_leading ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $weld_nested_exclusion_rules{$token};
+ return 0 unless ( defined($rflags) );
+ my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
+ return 0 unless ( defined($flag) );
+ return 1 if $flag eq '*';
+ return $self->match_paren_flag( $KK, $flag );
+}
+
+# hashes to simplify welding logic
my %type_ok_after_bareword;
+my %is_ternary;
+my %has_tight_paren;
BEGIN {
+ # types needed for welding RULE 6
my @q = qw# => -> { ( [ #;
@type_ok_after_bareword{@q} = (1) x scalar(@q);
+
+ @q = qw( ? : );
+ @is_ternary{@q} = (1) x scalar(@q);
+
+ # these types do not 'like' to be separated from a following paren
+ @q = qw(w i q Q G C Z U);
+ @{has_tight_paren}{@q} = (1) x scalar(@q);
}
use constant DEBUG_WELD => 0;
my $rK_range = $rlines->[$iline_oo]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
- # Define a reference index from which to start measuring
- my $Kref = $Kfirst;
+ #-------------------------------------------------------------------------
+ # We now define a reference index, '$Kref', from which to start measuring
+ # This choice turns out to be critical for keeping welds stable during
+ # iterations, so we go through a number of STEPS...
+ #-------------------------------------------------------------------------
+
+ # STEP 1: Our starting guess is to use measure from the first token of the
+ # current line. This is usually a good guess.
+ my $Kref = $Kfirst;
+
+ # STEP 2: See if we should go back a little farther
my $Kprev = $self->K_previous_nonblank($Kfirst);
if ( defined($Kprev) ) {
- # The -iob and -wn flags do not work well together. To avoid
- # blinking states we have to override -iob at certain key line
- # breaks.
- $self->[_ris_essential_old_breakpoint_]->{$Kprev} = 1;
+ # Avoid measuring from between an opening paren and a previous token
+ # which should stay close to it ... fixes b1185
+ my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ if ( $Kouter_opening == $Kfirst
+ && $token_oo eq '('
+ && $has_tight_paren{$type_prev} )
+ {
+ $Kref = $Kprev;
+ }
# Back up and count length from a token like '=' or '=>' if -lp
# is used (this fixes b520)
# ...or if a break is wanted before there
- my $type_prev = $rLL->[$Kprev]->[_TYPE_];
- if ( $rOpts_line_up_parentheses
+ elsif ($rOpts_line_up_parentheses
|| $want_break_before{$type_prev} )
{
- if ( substr( $type_prev, 0, 1 ) eq '=' ) {
+
+ # If there are other sequence items between the start of this line
+ # and the opening token in question, then do not include tokens on
+ # the previous line in length calculations. This check added to
+ # fix case b1174 which had a '?' on the line
+ my $no_previous_seq_item = $Kref == $Kouter_opening
+ || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
+
+ if ( $no_previous_seq_item
+ && substr( $type_prev, 0, 1 ) eq '=' )
+ {
$Kref = $Kprev;
# Fix for b1144 and b1112: backup to the first nonblank
}
}
+ # STEP 3: Now look ahead for a ternary and, if found, use it.
+ # This fixes case b1182.
+ # Also look for a ')' at the same level and, if found, use it.
+ # This fixes case b1224.
+ if ( $Kref < $Kouter_opening ) {
+ my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
+ my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ while ( $Knext < $Kouter_opening ) {
+ if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
+ if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
+ || $rLL->[$Knext]->[_TOKEN_] eq ')' )
+ {
+ $Kref = $Knext;
+ last;
+ }
+ }
+ $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
+ }
+ }
+
# Define the starting measurements we will need
$starting_lentot =
$Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
$maximum_text_length = $maximum_text_length_at_level[$starting_level] -
$starting_ci * $rOpts_continuation_indentation;
- # Now fix these if necessary to avoid known problems...
-
- # FIX1: Switch to using the outer opening token as the reference
+ # STEP 4: Switch to using the outer opening token as the reference
# point if a line break before it would make a longer line.
# Fixes case b1055 and is also an alternate fix for b1065.
my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
my $new_weld_ok = 1;
- # FIX2 for b1020: Avoid problem areas with the -wn -lp combination. The
+ # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
# combination -wn -lp -dws -naws does not work well and can cause blinkers.
# It will probably only occur in stress testing. For this situation we
# will only start a new weld if we start at a 'good' location.
# - Require blank before certain previous characters to fix b1111.
# - Add ';' to fix case b1139
# - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
+ # - relaxed constraints for b1227
if ( $starting_ci
&& $rOpts_line_up_parentheses
&& $rOpts_delete_old_whitespace
my $type_pp = 'b';
if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
unless (
- $type_prev =~ /^[\,\.\;]/
- || $type_prev =~ /^[=\{\[\(\L]/ && $type_pp eq 'b'
+ $type_prev =~ /^[\,\.\;]/
+ || $type_prev =~ /^[=\{\[\(\L]/
+ && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
|| $type_first =~ /^[=\,\.\;\{\[\(\L]/
|| $type_first eq '||'
- || ( $type_first eq 'k' && $token_first eq 'if'
- || $token_first eq 'or' )
+ || (
+ $type_first eq 'k'
+ && ( $token_first eq 'if'
+ || $token_first eq 'or' )
+ )
)
{
$msg =
-"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev'\n";
+"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
$new_weld_ok = 0;
}
}
-
return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
}
# involves setting certain hash values which will be checked
# later during formatting.
- my $rLL = $self->[_rLL_];
- my $rlines = $self->[_rlines_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
# Find nested pairs of container tokens for any welding.
my $rnested_pairs = $self->find_nested_pairs();
my $iline_outer_opening = -1;
my $weld_count_this_start = 0;
- my $multiline_tol =
- 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
+ # OLD: $single_line_tol added to fix cases b1180 b1181
+ # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
+ # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
+ my $single_line_tol = 0;
+
+ my $multiline_tol = $single_line_tol + 1 +
+ max( $rOpts_indent_columns, $rOpts_continuation_indentation );
+
+ # Define a welding cutoff level: do not start a weld if the inside
+ # container level equals or exceeds this level.
+
+ # We use the minimum of two criteria, either of which may be more
+ # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
+ # the 'beta' value is more restrictive in other cases (b1243).
+
+ my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
+
+ # The vertical tightness flags can throw off line length calculations.
+ # This patch was added to fix instability issue b1284.
+ # It works to always use a tol of 1 for 1 line block length tests, but
+ # this restricted value keeps test case wn6.wn working as before.
+ # It may be necessary to include '[' and '{' here in the future.
+ my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
my $length_to_opening_seqno = sub {
my ($seqno) = @_;
}
+ # RULE: Avoid welding under stress. The idea is that we need to have a
+ # little space* within a welded container to avoid instability. Note
+ # that after each weld the level values are reduced, so long multiple
+ # welds can still be made. This rule will seldom be a limiting factor
+ # in actual working code. Fixes b1206, b1243.
+ my $inner_level = $inner_opening->[_LEVEL_];
+ if ( $inner_level >= $weld_cutoff_level ) { next }
+
# Set flag saying if this pair starts a new weld
my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
my $iline_ic = $inner_closing->[_LINE_INDEX_];
my $iline_oc = $outer_closing->[_LINE_INDEX_];
my $token_oo = $outer_opening->[_TOKEN_];
+ my $token_io = $inner_opening->[_TOKEN_];
my $is_multiline_weld =
$iline_oo == $iline_io
&& $iline_io != $iline_ic;
if (DEBUG_WELD) {
- my $token_io = $rLL->[$Kinner_opening]->[_TOKEN_];
- my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
- my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
+ my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
+ my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
$Msg .= <<EOM;
Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
EOM
}
- # If this pair is not adjacent to the previous pair (skipped or not),
- # then measure lengths from the start of line of oo.
+ # DO-NOT-WELD RULE 0:
+ # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
+ # by one line). This can produce instabilities (fixes b1250 b1251
+ # 1256).
+ if ( !$is_multiline_weld
+ && $iline_ic == $iline_io + 1
+ && $token_oo eq '('
+ && $token_io eq '(' )
+ {
+ if (DEBUG_WELD) {
+ $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
+ print $Msg;
+ }
+ next;
+ }
+
+ # If this pair is not adjacent to the previous pair (skipped or not),
+ # then measure lengths from the start of line of oo.
if (
!$touch_previous_pair
# An existing one-line weld is a line in which
# (1) the containers are all on one line, and
- # (2) the line does not exceed the allowable length, and
- # This flag is used to avoid creating blinkers.
- # FIX1: Changed 'excess_length_to_K' to 'excess_length_of_line'
- # to get exact lengths and fix b604 b605.
+ # (2) the line does not exceed the allowable length
if ( $iline_oo == $iline_oc ) {
- # All the tokens are on one line, now check their length
- my $excess =
- $self->excess_line_length_for_Krange( $Kfirst, $Klast );
- if ( $excess <= 0 ) {
-
- # All tokens are on one line and fit. This is a valid
- # existing one-line weld except for some edge cases
- # involving -lp:
-
- # FIX2: Patch for b1114: add a tolerance of one level if
- # this line has an unbalanced start. This helps prevent
- # blinkers in unusual cases for lines near the length limit
- # by making it more likely that RULE 2 will prevent a weld.
- # FIX3: for b1131: only use level difference in -lp mode.
- # FIX4: for b1141, b1142: reduce the tolerance for longer
- # leading tokens
- if ( $rOpts_line_up_parentheses
- && $outer_opening->[_LEVEL_] -
- $rLL->[$Kfirst]->[_LEVEL_] )
+ # All the tokens are on one line, now check their length.
+ # Start with the full line index range. We will reduce this
+ # in the coding below in some cases.
+ my $Kstart = $Kfirst;
+ my $Kstop = $Klast;
+
+ # Note that the following minimal choice for measuring will
+ # work and will not cause any instabilities because it is
+ # invariant:
+
+ ## my $Kstart = $Kouter_opening;
+ ## my $Kstop = $Kouter_closing;
+
+ # But that can lead to some undesirable welds. So a little
+ # more complicated method has been developed.
+
+ # We are trying to avoid creating bad two-line welds when we are
+ # working on long, previously unwelded input text, such as
+
+ # INPUT (example of a long input line weld candidate):
+ ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
+
+ # GOOD two-line break: (not welded; result marked too long):
+ ## $mutation->transpos(
+ ## $self->RNA->position($mutation->label, $atg_label));
+
+ # BAD two-line break: (welded; result if we weld):
+ ## $mutation->transpos($self->RNA->position(
+ ## $mutation->label, $atg_label));
+
+ # We can only get an approximate estimate of the final length,
+ # since the line breaks may change, and for -lp mode because
+ # even the indentation is not yet known.
+
+ my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
+ my $level_last = $rLL->[$Klast]->[_LEVEL_];
+ my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
+
+ # - measure to the end of the original line if balanced
+ # - measure to the closing container if unbalanced (fixes b1230)
+ #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
+ if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
+
+ # - measure from the start of the original line if balanced
+ # - measure from the most previous token with same level
+ # if unbalanced (b1232)
+ if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
+ $Kstart = $Kouter_opening;
+ for (
+ my $KK = $Kouter_opening - 1 ;
+ $KK > $Kfirst ;
+ $KK -= 1
+ )
{
-
- # We only need a tolerance if the leading text before
- # the first opening token is shorter than the
- # indentation length. For simplicity we just use the
- # length of the first token here. If necessary, we
- # could be more exact in the future and find the
- # total length up to the first opening token.
- # See cases b1114, b1141, b1142.
- my $tolx = max( 0,
- $rOpts_indent_columns -
- $rLL->[$Kfirst]->[_TOKEN_LENGTH_] );
-
- if ( $excess + $tolx <= 0 ) {
- $is_one_line_weld = 1;
- }
- }
- else {
- $is_one_line_weld = 1;
+ next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
+ last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
+ $Kstart = $KK;
}
}
+
+ my $excess =
+ $self->excess_line_length_for_Krange( $Kstart, $Kstop );
+
+ # Coding simplified here for case b1219.
+ # Increased tol from 0 to 1 when pvt>0 to fix b1284.
+ $is_one_line_weld = $excess <= $one_line_tol;
}
# DO-NOT-WELD RULE 1:
# $top_label->set_text( gettext(
# "Unable to create personal directory - check permissions.") );
-
if ( $iline_oc == $iline_oo + 1
&& $iline_io == $iline_ic
&& $token_oo eq '(' )
}
} ## end starting new weld sequence
+ else {
+
+ # set the 1-line flag if continuing a weld sequence; fixes b1239
+ $is_one_line_weld = ( $iline_oo == $iline_oc );
+ }
+
# DO-NOT-WELD RULE 2:
# Do not weld an opening paren to an inner one line brace block
# We will just use old line numbers for this test and require
# as here:
# $_[0]->code_handler
- # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
# Here is another example where we do not want to weld:
# $wrapped->add_around_modifier(
# The effect of this change on typical code is very minimal. Sometimes
# it may take a second iteration to converge, but this gives protection
# against blinking.
-
if ( !$do_not_weld_rule
&& !$is_one_line_weld
&& $iline_ic == $iline_io )
if ( $token_oo eq '(' || $iline_oo != $iline_io );
}
+ # DO-NOT-WELD RULE 2A:
+ # Do not weld an opening asub brace in -lp mode if -asbl is set. This
+ # helps avoid instabilities in one-line block formation, and fixes
+ # b1241. Previously, the '$is_one_line_weld' flag was tested here
+ # instead of -asbl, and this fixed most cases. But it turns out that
+ # the real problem was the -asbl flag, and switching to this was
+ # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
+ if (
+ !$do_not_weld_rule
+ ##&& $is_one_line_weld
+ && $rOpts_line_up_parentheses
+ && $rOpts_asbl
+ && $ris_asub_block->{$outer_seqno}
+ )
+ {
+ $do_not_weld_rule = '2A';
+ }
+
# DO-NOT-WELD RULE 3:
# Do not weld if this makes our line too long.
# Use a tolerance which depends on if the old tokens were welded
# We can use zero tolerance if it looks like we are working on an
# existing weld.
my $tol =
- $is_one_line_weld || $is_multiline_weld
- ? 0
+ $is_one_line_weld || $is_multiline_weld
+ ? $single_line_tol
: $multiline_tol;
# By how many characters does this exceed the text window?
# then we will do the weld and retain the one-line block
if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
- my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
+ my $block_type = $rblock_type_of_seqno->{$outer_seqno};
if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
my $io_line = $inner_opening->[_LINE_INDEX_];
my $ic_line = $inner_closing->[_LINE_INDEX_];
if ($do_not_weld_rule) {
- # After neglecting a pair, we start measuring from start of point io
- my $starting_level = $inner_opening->[_LEVEL_];
- my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
- $starting_lentot =
- $self->cumulative_length_before_K($Kinner_opening);
- $maximum_text_length =
- $maximum_text_length_at_level[$starting_level] -
- $starting_ci_level * $rOpts_continuation_indentation;
+ # After neglecting a pair, we start measuring from start of point
+ # io ... but not if previous type does not like to be separated
+ # from its container (fixes case b1184)
+ my $Kprev = $self->K_previous_nonblank($Kinner_opening);
+ my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
+ if ( !$has_tight_paren{$type_prev} ) {
+ my $starting_level = $inner_opening->[_LEVEL_];
+ my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
+ $starting_lentot =
+ $self->cumulative_length_before_K($Kinner_opening);
+ $maximum_text_length =
+ $maximum_text_length_at_level[$starting_level] -
+ $starting_ci_level * $rOpts_continuation_indentation;
+ }
if (DEBUG_WELD) {
$Msg .= "Not welding due to RULE $do_not_weld_rule\n";
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
- Fault("sequence = $outer_seqno not defined at K=$KK");
+ Fault("sequence = $outer_seqno not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
}
my $token = $rtoken_vars->[_TOKEN_];
my $is_old_weld =
( $iline_oo == $iline_io && $iline_ic == $iline_oc );
+ # Fix for case b1189. If quote is marked as type 'Q' then only weld
+ # if the two closing tokens are on the same input line. Otherwise,
+ # the closing line will be output earlier in the pipeline than
+ # other CODE lines and welding will not actually occur. This will
+ # leave a half-welded structure with potential formatting
+ # instability. This might be fixed by adding a check for a weld on
+ # a closing Q token and sending it down the normal channel, but it
+ # would complicate the code and is potentially risky.
+ next
+ if (!$is_old_weld
+ && $next_type eq 'Q'
+ && $iline_ic != $iline_oc );
+
# If welded, the line must not exceed allowed line length
( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
= $self->setup_new_weld_measurements( $Kouter_opening,
return;
}
-sub is_welded_right_at_i {
- my ( $self, $i ) = @_;
- return unless ( $total_weld_count && $i >= 0 );
-
- # Back up at a blank. This routine is sometimes called at blanks.
- # TODO: this routine can eventually be eliminated by setting the weld flags
- # for all K indexes between the start and end of a weld, not just at
- # sequenced items.
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- return defined( $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
-}
-
sub is_welded_at_seqno {
my ( $self, $seqno ) = @_;
return unless ( $rOpts->{'one-line-block-nesting'} );
- my $K_opening_container = $self->[_K_opening_container_];
- my $K_closing_container = $self->[_K_closing_container_];
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rlines = $self->[_rlines_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# Variables needed for estimating line lengths
my $maximum_text_length;
# the bottom of sub 'respace_tokens' which set the values of
# _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
# loop control lines above.
- Fault("sequence = $type_sequence not defined at K=$KK");
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
}
# Patch: do not mark short blocks with welds.
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- next unless ($block_type);
+ next unless ( $rblock_type_of_seqno->{$type_sequence} );
# Keep a stack of all acceptable block braces seen.
# Only consider blocks entirely on one line so dump the stack when line
# levels. It would be much nicer to have the weld routines also use this
# adjustment, but that gets complicated when we combine -gnu -wn and have
# some welded quotes.
- my $radjusted_levels = $self->[_radjusted_levels_];
+ my $Klimit = $self->[_Klimit_];
my $rLL = $self->[_rLL_];
- foreach my $KK ( 0 .. @{$rLL} - 1 ) {
+ my $radjusted_levels = $self->[_radjusted_levels_];
+
+ return unless ( defined($Klimit) );
+
+ foreach my $KK ( 0 .. $Klimit ) {
$radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
}
# Set adjusted levels for the whitespace cycle option.
$self->whitespace_cycle_adjustment();
+ $self->braces_left_setup();
+
# Adjust continuation indentation if -bli is set
$self->bli_adjustment();
$self->extended_ci()
- if ( $rOpts->{'extended-continuation-indentation'} );
+ if ($rOpts_extended_continuation_indentation);
# Now clip any adjusted levels to be non-negative
$self->clip_adjusted_levels();
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
+ my $Klimit = $self->[_Klimit_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $Kmax = @{$rLL} - 1;
- my @seqno_stack;
-
- my $is_non_indenting_brace = sub {
- my ($KK) = @_;
-
- # looking for an opening block brace
- my $token = $rLL->[$KK]->[_TOKEN_];
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- return unless ( $token eq '{' && $block_type );
+ # First locate all of the marked blocks
+ my @K_stack;
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+ my $KK = $K_opening_container->{$seqno};
# followed by a comment
my $K_sc = $KK + 1;
$K_sc += 1
- if ( $K_sc <= $Kmax && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
- return unless ( $K_sc <= $Kmax );
+ if ( $K_sc <= $Klimit && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
+ next unless ( $K_sc <= $Klimit );
my $type_sc = $rLL->[$K_sc]->[_TYPE_];
- return unless ( $type_sc eq '#' );
+ next unless ( $type_sc eq '#' );
# on the same line
my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
- return unless ( $line_index_sc == $line_index );
+ next unless ( $line_index_sc == $line_index );
# get the side comment text
my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
# we added it back for the match. That way we require an exact
# match to the special string and also allow additional text.
$token_sc .= "\n";
- my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
- if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
- return $is_nib;
- };
+ next unless ( $token_sc =~ /$non_indenting_brace_pattern/ );
+ $rspecial_side_comment_type->{$K_sc} = 'NIB';
+ push @K_stack, [ $KK, 1 ];
+ my $Kc = $K_closing_container->{$seqno};
+ push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
+ }
+ return unless (@K_stack);
+ @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
- foreach my $KK ( 0 .. $Kmax ) {
- my $num = @seqno_stack;
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- if ($seqno) {
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
- push @seqno_stack, $seqno;
- }
- if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
- pop @seqno_stack;
- $num -= 1;
+ # Then loop to remove indentation within marked blocks
+ my $KK_last = 0;
+ my $ndeep = 0;
+ foreach my $item (@K_stack) {
+ my ( $KK, $inc ) = @{$item};
+ if ( $ndeep > 0 ) {
+
+ foreach ( $KK_last + 1 .. $KK ) {
+ $radjusted_levels->[$_] -= $ndeep;
}
+
+ # We just subtracted the old $ndeep value, which only applies to a
+ # '{'. The new $ndeep applies to a '}', so we undo the error.
+ if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
}
- next unless $num;
- $radjusted_levels->[$KK] -= $num;
+
+ $ndeep += $inc;
+ $KK_last = $KK;
}
return;
}
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $radjusted_levels = $self->[_radjusted_levels_];
+ my $maximum_level = $self->[_maximum_level_];
- my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
- if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
+ if ( $rOpts_whitespace_cycle
+ && $rOpts_whitespace_cycle > 0
+ && $rOpts_whitespace_cycle < $maximum_level )
+ {
my $Kmax = @{$rLL} - 1;
my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $length_tol =
max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
if ($rOpts_ignore_old_breakpoints) {
- $length_tol += $rOpts_maximum_line_length;
+
+ # Patch suggested by b1231; the old tol was excessive.
+ ## $length_tol += $rOpts_maximum_line_length;
+ $length_tol *= 2;
}
my $rbreak_before_container_by_seqno = {};
my $rwant_reduced_ci = {};
foreach my $seqno ( keys %{$K_opening_container} ) {
- #################################################################
+ #----------------------------------------------------------------
# Part 1: Examine any -bbx=n flags
- #################################################################
+ #----------------------------------------------------------------
+ next if ( $rblock_type_of_seqno->{$seqno} );
my $KK = $K_opening_container->{$seqno};
- next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
# This must be a list or contain a list.
# Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
my $break_option = $break_before_container_types{$token};
next unless ($break_option);
+ # Do not use -bbx under stress for stability ... fixes b1300
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ if ( $level >= $stress_level_beta ) {
+ DEBUG_BBX
+ && print
+"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
+ next;
+ }
+
# Require previous nonblank to be '=' or '=>'
my $Kprev = $KK - 1;
next if ( $Kprev < 0 );
my $ci = $rLL->[$KK]->[_CI_LEVEL_];
+ #--------------------------------------------
+ # New coding for option 2 (break if complex).
+ #--------------------------------------------
+ # This new coding uses clues which are invariant under formatting to
+ # decide if a list is complex. For now it is only applied when -lp
+ # and -vmll are used, but eventually it may become the standard method.
+ # Fixes b1274, b1275, and others, including b1099.
+ if ( $break_option == 2 ) {
+
+ if ( $rOpts_line_up_parentheses
+ || $rOpts_variable_maximum_line_length )
+ {
+
+ # Start with the basic definition of a complex list...
+ my $is_complex = $is_list && $has_list;
+
+ # and it is also complex if the parent is a list
+ if ( !$is_complex ) {
+ my $parent = $rparent_of_seqno->{$seqno};
+ if ( $self->is_list_by_seqno($parent) ) {
+ $is_complex = 1;
+ }
+ }
+
+ # finally, we will call it complex if there are inner opening
+ # and closing container tokens, not parens, within the outer
+ # container tokens.
+ if ( !$is_complex ) {
+ my $Kp = $self->K_next_nonblank($KK);
+ my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
+ if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
+
+ my $Kc = $K_closing_container->{$seqno};
+ my $Km = $self->K_previous_nonblank($Kc);
+ my $token_m =
+ defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+
+ # ignore any optional ending comma
+ if ( $token_m eq ',' ) {
+ $Km = $self->K_previous_nonblank($Km);
+ $token_m =
+ defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+ }
+
+ $is_complex ||=
+ $is_closing_token{$token_m} && $token_m ne ')';
+ }
+ }
+
+ # Convert to option 3 (always break) if complex
+ next unless ($is_complex);
+ $break_option = 3;
+ }
+ }
+
+ # Fix for b1231: the has_list_with_lec does not cover all cases.
+ # A broken container containing a list and with line-ending commas
+ # will stay broken, so can be treated as if it had a list with lec.
+ $has_list_with_lec ||=
+ $has_list
+ && $ris_broken_container->{$seqno}
+ && $rlec_count_by_seqno->{$seqno};
+
DEBUG_BBX
&& print STDOUT
"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
}
}
- # Patch to fix b1099 for -lp
- # ok in -lp mode if this is a list which contains a list
- if ( !$ok_to_break && $rOpts_line_up_parentheses ) {
- if ( $is_list && $has_list ) {
- $ok_to_break = 1;
- DEBUG_BBX && do { $Msg = "is list or has list" };
- }
- }
-
if ( !$ok_to_break ) {
DEBUG_BBX
&& print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
next unless ($ci_flag);
# -bbxi=1: This option removes ci and is handled in
- # later sub set_adjusted_indentation
+ # later sub final_indentation_adjustment
if ( $ci_flag == 1 ) {
$rwant_reduced_ci->{$seqno} = 1;
next;
# -bbxi=2 ...
- #################################################################
+ #----------------------------------------------------------------
# Part 2: Perform tests before committing to changing ci and level
- #################################################################
+ #----------------------------------------------------------------
# Before changing the ci level of the opening container, we need
# to be sure that the container will be broken in the later stages of
# Only consider containers already broken
next if ( !$ris_broken_container->{$seqno} );
+ # Patch to fix issue b1305: the combination of -naws and ci>i appears
+ # to cause an instability. It should almost never occur in practice.
+ next
+ if (!$rOpts_add_whitespace
+ && $rOpts_continuation_indentation > $rOpts_indent_columns );
+
# Always ok to change ci for permanently broken containers
if ( $ris_permanently_broken->{$seqno} ) {
goto OK;
# single line. Use the least possble indentation in the estmate (ci=0),
# so we are not subtracting $ci * $rOpts_continuation_indentation from
# tablulated $maximum_text_length value.
- my $level = $rLL->[$KK]->[_LEVEL_];
my $maximum_text_length = $maximum_text_length_at_level[$level];
my $K_closing = $K_closing_container->{$seqno};
my $length = $self->cumulative_length_before_K($K_closing) -
my $rlines = $self->[_rlines_];
my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my %available_space;
# Certain block types arrive from the tokenizer without CI but should
# have it for this option. These include anonymous subs and
# do sort map grep eval
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type && $is_block_with_ci{$block_type} ) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
if ($seqno_top) {
$maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
+ # Fix for b1197 b1198 b1199 b1200 b1201 b1202
+ # Do not apply -xci if we are running out of space
+ if ( $level >= $stress_level_beta ) {
+ DEBUG_XCI
+ && print
+"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
+ next;
+ }
+
# remember how much space is available for patch b1031 above
my $space =
$maximum_text_length - $len_tol - $rOpts_continuation_indentation;
return;
}
+sub braces_left_setup {
+
+ # Called once per file to mark all -bl, -sbl, and -asbl containers
+ my $self = shift;
+
+ my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
+ my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
+ my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+ return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ # We will turn on this hash for braces controlled by these flags:
+ my $rbrace_left = $self->[_rbrace_left_];
+
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $ris_sub_block = $self->[_ris_sub_block_];
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+
+ # use -asbl flag for an anonymous sub block
+ if ( $ris_asub_block->{$seqno} ) {
+ if ($rOpts_asbl) {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+
+ # use -sbl flag for a named sub
+ elsif ( $ris_sub_block->{$seqno} ) {
+ if ($rOpts_sbl) {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+
+ # use -bl flag if not a sub block of any type
+ else {
+ if ( $rOpts_bl
+ && $block_type =~ /$bl_pattern/
+ && $block_type !~ /$bl_exclusion_pattern/ )
+ {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+ }
+ return;
+}
+
sub bli_adjustment {
# Called once per file to implement the --brace-left-and-indent option.
return unless ( $rOpts->{'brace-left-and-indent'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
- my $ris_bli_container = $self->[_ris_bli_container_];
- my $K_opening_container = $self->[_K_opening_container_];
- my $KNEXT = $self->[_K_first_seq_item_];
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- if ( $block_type && $block_type =~ /$bli_pattern/ ) {
- my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $K_opening = $K_opening_container->{$seqno};
- if ( defined($K_opening) ) {
- if ( $KK eq $K_opening ) {
- $rLL->[$KK]->[_CI_LEVEL_]++;
- $ris_bli_container->{$seqno} = 1;
- }
- else {
- $rLL->[$KK]->[_CI_LEVEL_] =
- $rLL->[$K_opening]->[_CI_LEVEL_];
- }
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rbrace_left = $self->[_rbrace_left_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ( $block_type
+ && $block_type =~ /$bli_pattern/
+ && $block_type !~ /$bli_exclusion_pattern/ )
+ {
+ $ris_bli_container->{$seqno} = 1;
+ $rbrace_left->{$seqno} = 1;
+ my $Ko = $K_opening_container->{$seqno};
+ my $Kc = $K_closing_container->{$seqno};
+ if ( defined($Ko) && defined($Kc) ) {
+ $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
}
}
}
# Give multiline qw lists extra indentation instead of CI. This option
# works well but is currently only activated when the -xci flag is set.
# The reason is to avoid unexpected changes in formatting.
- if ( $rOpts->{'extended-continuation-indentation'} ) {
+ if ($rOpts_extended_continuation_indentation) {
while ( my ( $qw_seqno, $rKrange ) =
each %{$rKrange_multiline_qw_by_seqno} )
{
my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
# allow space(s) after the qw
- if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) eq ' ' ) {
+ if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
+ {
$token_beg =~ s/\s+//;
}
# For the -lp option we need to mark all parent containers of
# multiline quotes
- if ($rOpts_line_up_parentheses) {
+ if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
while ( my ( $qw_seqno, $rKrange ) =
each %{$rKrange_multiline_qw_by_seqno} )
return;
}
-sub is_excluded_lp {
+use constant DEBUG_COLLAPSED_LENGTHS => 0;
- # decide if this container is excluded by user request
- # returns true if this token is excluded (i.e., may not use -lp)
- # returns false otherwise
+# Minimum space reserved for contents of a code block. A value of 40 has given
+# reasonable results. With a large line length, say -l=120, this will not
+# normally be noticable but it will prevent making a mess in some edge cases.
+use constant MIN_BLOCK_LEN => 40;
- # note similarity with sub 'is_excluded_weld'
- my ( $self, $KK ) = @_;
- my $rLL = $self->[_rLL_];
- my $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $rflags = $line_up_parentheses_exclusion_rules{$token};
- return 0 unless ( defined($rflags) );
- my ( $flag1, $flag2 ) = @{$rflags};
+my %is_handle_type;
- # There are two flags:
- # flag1 excludes based on the preceding nonblank word
- # flag2 excludes based on the contents of the container
- return 0 unless ( defined($flag1) );
- return 1 if $flag1 eq '*';
+BEGIN {
+ my @q = qw( w C U G i k => );
+ @is_handle_type{@q} = (1) x scalar(@q);
- # Find the previous token
- my ( $is_f, $is_k, $is_w );
- my $Kp = $self->K_previous_nonblank($KK);
- if ( defined($Kp) ) {
- my $type_p = $rLL->[$Kp]->[_TYPE_];
- my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my $i = 0;
+ use constant {
+ _max_prong_len_ => $i++,
+ _handle_len_ => $i++,
+ _seqno_o_ => $i++,
+ _iline_o_ => $i++,
+ _K_o_ => $i++,
+ _K_c_ => $i++,
+ _interrupted_list_rule_ => $i++,
+ };
+}
- # keyword?
- $is_k = $type_p eq 'k';
+sub collapsed_lengths {
- # function call?
- $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+ my $self = shift;
- # either keyword or function call?
- $is_w = $is_k || $is_f;
- }
+ #----------------------------------------------------------------
+ # Define the collapsed lengths of containers for -xlp indentation
+ #----------------------------------------------------------------
- # Check for exclusion based on flag1 and the previous token:
- my $match;
- if ( $flag1 eq 'k' ) { $match = $is_k }
- elsif ( $flag1 eq 'K' ) { $match = !$is_k }
- elsif ( $flag1 eq 'f' ) { $match = $is_f }
- elsif ( $flag1 eq 'F' ) { $match = !$is_f }
- elsif ( $flag1 eq 'w' ) { $match = $is_w }
- elsif ( $flag1 eq 'W' ) { $match = !$is_w }
- return $match if ($match);
-
- # Check for exclusion based on flag2 and the container contents
- # Current options to filter on contents:
- # 0 or blank: ignore container contents
- # 1 exclude non-lists or lists with sublists
- # 2 same as 1 but also exclude lists with code blocks
-
- # Note:
- # Containers with multiline-qw containers are automatically
- # excluded so do not need to be checked.
- if ($flag2) {
+ # We need an estimate of the minimum required line length starting at any
+ # opening container for the -xlp style. This is needed to avoid using too
+ # much indentation space for lower level containers and thereby running
+ # out of space for outer container tokens due to the maximum line length
+ # limit.
- my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ # The basic idea is that at each node in the tree we imagine that we have a
+ # fork with a handle and collapsable prongs:
+ #
+ # |------------
+ # |--------
+ # ------------|-------
+ # handle |------------
+ # |--------
+ # prongs
+ #
+ # Each prong has a minimum collapsed length. The collapsed length at a node
+ # is the maximum of these minimum lengths, plus the handle length. Each of
+ # the prongs may itself be a tree node.
- my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
- my $has_list = $self->[_rhas_list_]->{$seqno};
- my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
- my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
- if ( !$is_list
- || $has_list
- || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
- {
- $match = 1;
- }
- }
- return $match;
-}
+ # This is just a rough calculation to get an approximate starting point for
+ # indentation. Later routines will be more precise. It is important that
+ # these estimates be independent of the line breaks of the input stream in
+ # order to avoid instabilities.
-sub set_excluded_lp_containers {
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
- my ($self) = @_;
- return unless ($rOpts_line_up_parentheses);
- my $rLL = $self->[_rLL_];
- return unless ( defined($rLL) && @{$rLL} );
+ my $K_start_multiline_qw;
+ my $level_start_multiline_qw = 0;
+ my $max_prong_len = 0;
+ my $handle_len = 0;
+ my @stack;
+ my $len = 0;
+ my $last_nonblank_type = 'b';
+ push @stack,
+ [ $max_prong_len, $handle_len, SEQ_ROOT, undef, undef, undef, undef ];
- my $K_opening_container = $self->[_K_opening_container_];
- my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ next if ( $line_type ne 'CODE' );
+ my $CODE_type = $line_of_tokens->{_code_type};
- foreach my $seqno ( keys %{$K_opening_container} ) {
- my $KK = $K_opening_container->{$seqno};
- next unless defined($KK);
+ # Always skip blank lines
+ next if ( $CODE_type eq 'BL' );
- # code blocks are always excluded by the -lp coding so we can skip them
- next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
+ # Note on other line types:
+ # 'FS' (Format Skipping) lines may contain opening/closing tokens so
+ # we have to process them to keep the stack correctly sequenced.
+ # 'VB' (Verbatim) lines could be skipped, but testing shows that
+ # results look better if we include their lengths.
- # see if a user exclusion rule turns off -lp for this container
- if ( $self->is_excluded_lp($KK) ) {
- $ris_excluded_lp_container->{$seqno} = 1;
- }
- }
- return;
-}
+ # Also note that we could exclude -xlp formatting of containers with
+ # 'FS' and 'VB' lines, but in testing that was not really beneficial.
-######################################
-# CODE SECTION 6: Process line-by-line
-######################################
+ # So we process tokens in 'FS' and 'VB' lines like all the rest...
-sub process_all_lines {
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $K_first, $K_last ) = @{$rK_range};
+ next unless ( defined($K_first) && defined($K_last) );
+
+ my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
+
+ # Always ignore block comments
+ next if ( $has_comment && $K_first == $K_last );
+
+ # Handle an intermediate line of a multiline qw quote. These may
+ # require including some -ci or -i spaces. See cases c098/x063.
+ # Updated to check all lines (not just $K_first==$K_last) to fix b1316
+ my $K_begin_loop = $K_first;
+ if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
+
+ my $KK = $K_first;
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
+
+ # remember the level of the start
+ if ( !defined($K_start_multiline_qw) ) {
+ $K_start_multiline_qw = $K_first;
+ $level_start_multiline_qw = $level;
+ my $seqno_qw =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]
+ ->{$K_start_multiline_qw};
+ if ( !$seqno_qw ) {
+ my $Kp = $self->K_previous_nonblank($K_first);
+ if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
+
+ $K_start_multiline_qw = $Kp;
+ $level_start_multiline_qw =
+ $rLL->[$K_start_multiline_qw]->[_LEVEL_];
+ }
+ }
+ }
- # Main loop over all lines of a file.
- # Lines are processed according to type.
+ $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- my $self = shift;
- my $rlines = $self->[_rlines_];
- my $sink_object = $self->[_sink_object_];
- my $fh_tee = $self->[_fh_tee_];
- my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
- my $file_writer_object = $self->[_file_writer_object_];
- my $logger_object = $self->[_logger_object_];
- my $vertical_aligner_object = $self->[_vertical_aligner_object_];
- my $save_logfile = $self->[_save_logfile_];
+ # We may have to add the spaces of one level or ci level ... it
+ # depends depends on the -xci flag, the -wn flag, and if the qw
+ # uses a container token as the quote delimiter.
- # Note for RT#118553, leave only one newline at the end of a file.
- # Example code to do this is in comments below:
- # my $Opt_trim_ending_blank_lines = 0;
- # if ($Opt_trim_ending_blank_lines) {
- # while ( my $line_of_tokens = pop @{$rlines} ) {
- # my $line_type = $line_of_tokens->{_line_type};
- # if ( $line_type eq 'CODE' ) {
- # my $CODE_type = $line_of_tokens->{_code_type};
- # next if ( $CODE_type eq 'BL' );
- # }
- # push @{$rlines}, $line_of_tokens;
- # last;
- # }
- # }
+ # First rule: add ci if there is a $ci_level
+ if ($ci_level) {
+ $len += $rOpts_continuation_indentation;
+ }
- # But while this would be a trivial update, it would have very undesirable
- # side effects when perltidy is run from within an editor on a small snippet.
- # So this is best done with a separate filter, such
- # as 'delete_ending_blank_lines.pl' in the examples folder.
+ # Second rule: otherwise, look for an extra indentation level
+ # from the start and add one indentation level if found.
+ elsif ( $level > $level_start_multiline_qw ) {
+ $len += $rOpts_indent_columns;
+ }
- # Flag to prevent blank lines when POD occurs in a format skipping sect.
- my $in_format_skipping_section;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
- # set locations for blanks around long runs of keywords
- my $rwant_blank_line_after = $self->keyword_group_scan();
+ $last_nonblank_type = 'q';
- my $line_type = "";
- my $i_last_POD_END = -10;
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $i++;
+ $K_begin_loop = $K_first + 1;
+
+ # We can skip to the next line if more tokens
+ next if ( $K_begin_loop > $K_last );
- # insert blank lines requested for keyword sequences
- if ( $i > 0
- && defined( $rwant_blank_line_after->{ $i - 1 } )
- && $rwant_blank_line_after->{ $i - 1 } == 1 )
- {
- $self->want_blank_line();
+ }
+ $K_start_multiline_qw = undef;
+
+ # Find the terminal token, before any side comment
+ my $K_terminal = $K_last;
+ if ($has_comment) {
+ $K_terminal -= 1;
+ $K_terminal -= 1
+ if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
+ && $K_terminal > $K_first );
+ }
+
+ # Use length to terminal comma if interrupded list rule applies
+ if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
+ my $K_c = $stack[-1]->[_K_c_];
+ if (
+ defined($K_c)
+ && $rLL->[$K_terminal]->[_TYPE_] eq ','
+
+ # Ignore a terminal comma, causes instability (b1297)
+ && ( $K_c - $K_terminal > 2
+ || $rLL->[ $K_terminal + 1 ]->[_TYPE_] eq 'b' )
+ )
+ {
+ my $Kend = $K_terminal;
+
+ # This caused an instability in b1311 by making the result
+ # dependent on input. It is not really necessary because the
+ # comment length is added at the end of the loop.
+ ##if ( $has_comment
+ ## && !$rOpts_ignore_side_comment_lengths )
+ ##{
+ ## $Kend = $K_last;
+ ##}
+
+ $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
+
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ }
+
+ # Loop over tokens on this line ...
+ foreach my $KK ( $K_begin_loop .. $K_terminal ) {
+
+ my $type = $rLL->[$KK]->[_TYPE_];
+ next if ( $type eq 'b' );
+
+ #------------------------
+ # Handle sequenced tokens
+ #------------------------
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($seqno) {
+
+ my $token = $rLL->[$KK]->[_TOKEN_];
+
+ #----------------------------
+ # Entering a new container...
+ #----------------------------
+ if ( $is_opening_token{$token} ) {
+
+ # save current prong length
+ $stack[-1]->[_max_prong_len_] = $max_prong_len;
+ $max_prong_len = 0;
+
+ # Start new prong one level deeper
+ my $handle_len = 0;
+ if ( $rblock_type_of_seqno->{$seqno} ) {
+
+ # code blocks do not use -lp indentation, but behave as
+ # if they had a handle of one indentation length
+ $handle_len = $rOpts_indent_columns;
+
+ }
+ elsif ( $is_handle_type{$last_nonblank_type} ) {
+ $handle_len = $len;
+ $handle_len += 1
+ if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
+ }
+
+ # Set a flag if the 'Interrupted List Rule' will be applied
+ # (see sub copy_old_breakpoints).
+ # - Added check on has_broken_list to fix issue b1298
+
+ my $interrupted_list_rule =
+ $ris_permanently_broken->{$seqno}
+ && $ris_list_by_seqno->{$seqno}
+ && !$rhas_broken_list->{$seqno}
+ && !$rOpts_ignore_old_breakpoints;
+
+ # NOTES: Since we are looking at old line numbers we have
+ # to be very careful not to introduce an instability.
+
+ # This following causes instability (b1288-b1296):
+ # $interrupted_list_rule ||=
+ # $rOpts_break_at_old_comma_breakpoints;
+
+ # - We could turn off the interrupted list rule if there is
+ # a broken sublist, to follow 'Compound List Rule 1'.
+ # - We could use the _rhas_broken_list_ flag for this.
+ # - But it seems safer not to do this, to avoid
+ # instability, since the broken sublist could be
+ # temporary. It seems better to let the formatting
+ # stabilize by itself after one or two iterations.
+ # - So, not doing this for now
+
+ # Include length to a comma ending this line
+ if ( $interrupted_list_rule
+ && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
+ {
+ my $Kend = $K_terminal;
+ if ( $Kend < $K_last
+ && !$rOpts_ignore_side_comment_lengths )
+ {
+ $Kend = $K_last;
+ }
+
+ # Measure from the next blank if any (fixes b1301)
+ my $Kbeg = $KK;
+ if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
+ && $Kbeg < $Kend )
+ {
+ $Kbeg++;
+ }
+
+ my $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+
+ my $K_c = $K_closing_container->{$seqno};
+
+ push @stack,
+ [
+ $max_prong_len, $handle_len,
+ $seqno, $iline,
+ $KK, $K_c,
+ $interrupted_list_rule
+ ];
+ }
+
+ #--------------------
+ # Exiting a container
+ #--------------------
+ elsif ( $is_closing_token{$token} ) {
+ if (@stack) {
+
+ # The current prong ends - get its handle
+ my $item = pop @stack;
+ my $handle_len = $item->[_handle_len_];
+ my $seqno_o = $item->[_seqno_o_];
+ my $iline_o = $item->[_iline_o_];
+ my $K_o = $item->[_K_o_];
+ my $K_c_expect = $item->[_K_c_];
+ my $collapsed_len = $max_prong_len;
+
+ if ( $seqno_o ne $seqno ) {
+
+ # Shouldn't happen - must have skipped some lines.
+ # Not fatal but -lp formatting could get messed up.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
+EOM
+ }
+ }
+
+ #------------------------------------------
+ # Rules to avoid scrunching code blocks ...
+ #------------------------------------------
+ # Some test cases:
+ # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
+ if ( $rblock_type_of_seqno->{$seqno} ) {
+
+ my $K_c = $KK;
+ my $block_length = MIN_BLOCK_LEN;
+ my $is_one_line_block;
+ my $level = $rLL->[$K_o]->[_LEVEL_];
+ if ( defined($K_o) && defined($K_c) ) {
+ my $block_length =
+ $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
+ $is_one_line_block = $iline == $iline_o;
+ }
+
+ # Code block rule 1: Use the total block length if
+ # it is less than the minimum.
+ if ( $block_length < MIN_BLOCK_LEN ) {
+ $collapsed_len = $block_length;
+ }
+
+ # Code block rule 2: Use the full length of a
+ # one-line block to avoid breaking it, unless
+ # extremely long. We do not need to do a precise
+ # check here, because if it breaks then it will
+ # stay broken on later iterations.
+ elsif ($is_one_line_block
+ && $block_length <
+ $maximum_line_length_at_level[$level] )
+ {
+ $collapsed_len = $block_length;
+ }
+
+ # Code block rule 3: Otherwise the length should be
+ # at least MIN_BLOCK_LEN to avoid scrunching code
+ # blocks.
+ elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
+ $collapsed_len = MIN_BLOCK_LEN;
+ }
+ }
+
+ # Store the result. Some extra space, '2', allows for
+ # length of an opening token, inside space, comma, ...
+ # This constant has been tuned to give good overall
+ # results.
+ $collapsed_len += 2;
+ $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
+
+ # Restart scanning the lower level prong
+ if (@stack) {
+ $max_prong_len = $stack[-1]->[_max_prong_len_];
+ $collapsed_len += $handle_len;
+ if ( $collapsed_len > $max_prong_len ) {
+ $max_prong_len = $collapsed_len;
+ }
+ }
+ }
+ }
+
+ # it is a ternary - no special processing for these yet
+ else {
+
+ }
+
+ $len = 0;
+ $last_nonblank_type = $type;
+ next;
+ }
+
+ #----------------------------
+ # Handle non-container tokens
+ #----------------------------
+ my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
+
+ # Count lengths of things like 'xx => yy' as a single item
+ if ( $type eq '=>' ) {
+ $len += $token_length + 1;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ elsif ( $last_nonblank_type eq '=>' ) {
+ $len += $token_length;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+
+ # but only include one => per item
+ if ( $last_nonblank_type eq '=>' ) { $len = $token_length }
+ }
+
+ # include everthing to end of line after a here target
+ elsif ( $type eq 'h' ) {
+ $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+
+ # for everything else just use the token length
+ else {
+ $len = $token_length;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ $last_nonblank_type = $type;
+
+ } ## end loop over tokens on this line
+
+ # Now take care of any side comment
+ if ($has_comment) {
+ if ($rOpts_ignore_side_comment_lengths) {
+ $len = 0;
+ }
+ else {
+
+ # For a side comment when -iscl is not set, measure length from
+ # the start of the previous nonblank token
+ my $len0 =
+ $K_terminal > 0
+ ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
+ : 0;
+ $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ }
+
+ } ## end loop over lines
+
+ if (DEBUG_COLLAPSED_LENGTHS) {
+ print "\nCollapsed lengths--\n";
+ foreach
+ my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
+ {
+ my $clen = $rcollapsed_length_by_seqno->{$key};
+ print "$key -> $clen\n";
+ }
+ }
+
+ return;
+}
+
+sub is_excluded_lp {
+
+ # Decide if this container is excluded by user request:
+ # returns true if this token is excluded (i.e., may not use -lp)
+ # returns false otherwise
+
+ # The control hash can either describe:
+ # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
+ # what to include: $line_up_parentheses_control_is_lxpl = 0
+
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $line_up_parentheses_control_hash{$token};
+
+ #-----------------------------------------------
+ # TEST #1: check match to listed container types
+ #-----------------------------------------------
+ if ( !defined($rflags) ) {
+
+ # There is no entry for this container, so we are done
+ return !$line_up_parentheses_control_is_lxpl;
+ }
+
+ my ( $flag1, $flag2 ) = @{$rflags};
+
+ #-----------------------------------------------------------
+ # TEST #2: check match to flag1, the preceding nonblank word
+ #-----------------------------------------------------------
+ my $match_flag1 = !defined($flag1) || $flag1 eq '*';
+ if ( !$match_flag1 ) {
+
+ # Find the previous token
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank($KK);
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ # keyword?
+ $is_k = $type_p eq 'k';
+
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
+ }
+
+ # Check for match based on flag1 and the previous token:
+ if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
+ elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
+ elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
+ elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
+ elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
+ elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
+ }
+
+ # See if we can exclude this based on the flag1 test...
+ if ($line_up_parentheses_control_is_lxpl) {
+ return 1 if ($match_flag1);
+ }
+ else {
+ return 1 if ( !$match_flag1 );
+ }
+
+ #-------------------------------------------------------------
+ # TEST #3: exclusion based on flag2 and the container contents
+ #-------------------------------------------------------------
+
+ # Note that this is an exclusion test for both -lpxl or -lpil input methods
+ # The options are:
+ # 0 or blank: ignore container contents
+ # 1 exclude non-lists or lists with sublists
+ # 2 same as 1 but also exclude lists with code blocks
+
+ my $match_flag2;
+ if ($flag2) {
+
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
+ my $has_list = $self->[_rhas_list_]->{$seqno};
+ my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
+ my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
+
+ if ( !$is_list
+ || $has_list
+ || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
+ {
+ $match_flag2 = 1;
+ }
+ }
+ return $match_flag2;
+}
+
+sub set_excluded_lp_containers {
+
+ my ($self) = @_;
+ return unless ($rOpts_line_up_parentheses);
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+
+ # code blocks are always excluded by the -lp coding so we can skip them
+ next if ( $rblock_type_of_seqno->{$seqno} );
+
+ my $KK = $K_opening_container->{$seqno};
+ next unless defined($KK);
+
+ # see if a user exclusion rule turns off -lp for this container
+ if ( $self->is_excluded_lp($KK) ) {
+ $ris_excluded_lp_container->{$seqno} = 1;
+ }
+ }
+ return;
+}
+
+######################################
+# CODE SECTION 6: Process line-by-line
+######################################
+
+sub process_all_lines {
+
+ #----------------------------------------------------------
+ # Main loop to format all lines of a file according to type
+ #----------------------------------------------------------
+
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ my $sink_object = $self->[_sink_object_];
+ my $fh_tee = $self->[_fh_tee_];
+ my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $logger_object = $self->[_logger_object_];
+ my $vertical_aligner_object = $self->[_vertical_aligner_object_];
+ my $save_logfile = $self->[_save_logfile_];
+
+ # Note for RT#118553, leave only one newline at the end of a file.
+ # Example code to do this is in comments below:
+ # my $Opt_trim_ending_blank_lines = 0;
+ # if ($Opt_trim_ending_blank_lines) {
+ # while ( my $line_of_tokens = pop @{$rlines} ) {
+ # my $line_type = $line_of_tokens->{_line_type};
+ # if ( $line_type eq 'CODE' ) {
+ # my $CODE_type = $line_of_tokens->{_code_type};
+ # next if ( $CODE_type eq 'BL' );
+ # }
+ # push @{$rlines}, $line_of_tokens;
+ # last;
+ # }
+ # }
+
+ # But while this would be a trivial update, it would have very undesirable
+ # side effects when perltidy is run from within an editor on a small snippet.
+ # So this is best done with a separate filter, such
+ # as 'delete_ending_blank_lines.pl' in the examples folder.
+
+ # Flag to prevent blank lines when POD occurs in a format skipping sect.
+ my $in_format_skipping_section;
+
+ # set locations for blanks around long runs of keywords
+ my $rwant_blank_line_after = $self->keyword_group_scan();
+
+ my $line_type = "";
+ my $i_last_POD_END = -10;
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $i++;
+
+ # insert blank lines requested for keyword sequences
+ if ( $i > 0
+ && defined( $rwant_blank_line_after->{ $i - 1 } )
+ && $rwant_blank_line_after->{ $i - 1 } == 1 )
+ {
+ $self->want_blank_line();
}
my $last_line_type = $line_type;
# HERE_END - last line of here-doc (target word)
# FORMAT - format section
# FORMAT_END - last line of format section, '.'
+ # SKIP - code skipping section
+ # SKIP_END - last line of code skipping section, '#>>V'
# DATA_START - __DATA__ line
# DATA - unidentified text following __DATA__
# END_START - __END__ line
}
else {
- # Let logger see all non-blank lines of code. This is a slow operation
- # so we avoid it if it is not going to be saved.
+ # Let logger see all non-blank lines of code. This is a slow
+ # operation so we avoid it if it is not going to be saved.
if ( $save_logfile && $logger_object ) {
$logger_object->black_box( $line_of_tokens,
$vertical_aligner_object->get_output_line_number );
$self->[_saw_END_or_DATA_] = 1;
}
+ # Patch to avoid losing blank lines after a code-skipping block;
+ # fixes case c047.
+ elsif ( $line_type eq 'SKIP_END' ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ }
+
# write unindented non-code line
if ( !$skip_line ) {
$self->write_unindented_line($input_line);
sub keyword_group_scan {
my $self = shift;
- # Called once per file to process the --keyword-group-blanks-* parameters.
+ #-------------------------------------------------------------------------
+ # Called once per file to process any --keyword-group-blanks-* parameters.
+ #-------------------------------------------------------------------------
# Manipulate blank lines around keyword groups (kgb* flags)
# Scan all lines looking for runs of consecutive lines beginning with
my $rlines = $self->[_rlines_];
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
# variables for the current group and subgroups:
my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
my $number_of_groups_seen = 0;
- ####################
+ #-------------------
# helper subroutines
- ####################
+ #-------------------
my $insert_blank_after = sub {
my ($i) = @_;
}
}
}
+ return;
};
my $delete_if_blank = sub {
while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
+ return;
};
my $end_group = sub {
@group = ();
@subgroup = ();
@iblanks = ();
+
+ return;
};
my $find_container_end = sub {
- # If the keyword lines ends with an open token, find the closing token
- # '$K_closing' so that we can easily skip past the contents of the
- # container.
- return if ( $K_last <= $K_first );
- my $KK = $K_last;
- my $type_last = $rLL->[$KK]->[_TYPE_];
- my $tok_last = $rLL->[$KK]->[_TOKEN_];
- if ( $type_last eq '#' ) {
- $KK = $self->K_previous_nonblank($KK);
- $tok_last = $rLL->[$KK]->[_TOKEN_];
- }
- if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
+ # If the keyword line is continued onto subsequent lines, find the
+ # closing token '$K_closing' so that we can easily skip past the
+ # contents of the container.
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $lev = $rLL->[$KK]->[_LEVEL_];
- if ( $lev == $level_beg ) {
- $K_closing = $K_closing_container->{$type_sequence};
- }
- }
+ # We only set this value if we find a simple list, meaning
+ # -contents only one level deep
+ # -not welded
+
+ # First check: skip if next line is not one deeper
+ my $Knext_nonblank = $self->K_next_nonblank($K_last);
+ goto RETURN if ( !defined($Knext_nonblank) );
+ my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
+ goto RETURN if ( $level_next != $level_beg + 1 );
+
+ # Find the parent container of the first token on the next line
+ my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
+ goto RETURN unless ( defined($parent_seqno) );
+
+ # Must not be a weld (can be unstable)
+ goto RETURN
+ if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
+
+ # Opening container must exist and be on this line
+ my $Ko = $K_opening_container->{$parent_seqno};
+ goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
+
+ # Verify that the closing container exists and is on a later line
+ my $Kc = $K_closing_container->{$parent_seqno};
+ goto RETURN unless ( defined($Kc) && $Kc > $K_last );
+
+ # That's it
+ $K_closing = $Kc;
+ goto RETURN;
+
+ RETURN:
+ return;
};
my $add_to_group = sub {
return;
};
- ###################################
+ #----------------------------------
# loop over all lines of the source
- ###################################
+ #----------------------------------
$end_group->();
my $i = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
return $rhash_of_desires;
}
- # This is not for keywords in lists ( keyword 'my' can occur in lists,
- # see case b760)
- next if ( $self->is_list_by_K($K_first) );
-
my $level = $rLL->[$K_first]->[_LEVEL_];
my $type = $rLL->[$K_first]->[_TYPE_];
my $token = $rLL->[$K_first]->[_TOKEN_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # End a group 'badly' at an unexpected level. This will prevent
+ # blank lines being incorrectly placed after the end of the group.
+ # We are looking for any deviation from two acceptable patterns:
+ # PATTERN 1: a simple list; secondary lines are at level+1
+ # PATTERN 2: a long statement; all secondary lines same level
+ # This was added as a fix for case b1177, in which a complex structure
+ # got incorrectly inserted blank lines.
+ if ( $ibeg >= 0 ) {
+
+ # Check for deviation from PATTERN 1, simple list:
+ if ( defined($K_closing) && $K_first < $K_closing ) {
+ $end_group->(1) if ( $level != $level_beg + 1 );
+ }
+
+ # Check for deviation from PATTERN 2, single statement:
+ elsif ( $level != $level_beg ) { $end_group->(1) }
+ }
+
+ # Do not look for keywords in lists ( keyword 'my' can occur in lists,
+ # see case b760); fixed for c048.
+ if ( $self->is_list_by_K($K_first) ) {
+ if ( $ibeg >= 0 ) { $iend = $i }
+ next;
+ }
+
# see if this is a code type we seek (i.e. comment)
if ( $CODE_type
&& $Opt_comment_pattern
# first end old group if any; we might be starting new
# keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
+ if ( $ibeg >= 0 ) { $end_group->(); }
$add_to_group->( $i, $tok, $level );
}
next;
# first end old group if any; we might be starting new
# keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
+ if ( $ibeg >= 0 ) { $end_group->(); }
$add_to_group->( $i, $token, $level );
}
next;
# - bail out on a large level change; we may have walked into a
# data structure or anoymous sub code.
if ( $level > $level_beg + 1 || $level < $level_beg ) {
- $end_group->();
+ $end_group->(1);
next;
}
# flags needed by the store routine
my $line_of_tokens;
my $no_internal_newlines;
- my $side_comment_follows;
my $CODE_type;
# range of K of tokens for the current line
my ( $K_first, $K_last );
- my ( $rLL, $radjusted_levels );
+ my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
+ $rblock_type_of_seqno, $ri_starting_one_line_block );
- # past stored nonblank tokens
+ # past stored nonblank tokens and flags
my (
- $last_last_nonblank_token, $last_last_nonblank_type,
- $last_nonblank_token, $last_nonblank_type,
- $last_nonblank_block_type, $K_last_nonblank_code,
- $K_last_last_nonblank_code, $looking_for_else,
- $is_static_block_comment, $batch_CODE_type,
- $last_line_had_side_comment,
+ $K_last_nonblank_code, $K_last_last_nonblank_code,
+ $looking_for_else, $is_static_block_comment,
+ $batch_CODE_type, $last_line_had_side_comment,
+ $next_parent_seqno, $next_slevel,
);
# Called once at the start of a new file
sub initialize_process_line_of_CODE {
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_nonblank_block_type = "";
$K_last_nonblank_code = undef;
$K_last_last_nonblank_code = undef;
$looking_for_else = 0;
$is_static_block_comment = 0;
$batch_CODE_type = "";
$last_line_had_side_comment = 0;
+ $next_parent_seqno = SEQ_ROOT;
+ $next_slevel = undef;
return;
}
# Called before the start of each new batch
sub initialize_batch_variables {
- $max_index_to_go = UNDEFINED_INDEX;
- @summed_lengths_to_go = @nesting_depth_to_go = (0);
+ $max_index_to_go = UNDEFINED_INDEX;
+ @summed_lengths_to_go = @nesting_depth_to_go = (0);
+ $ri_starting_one_line_block = [];
# The initialization code for the remaining batch arrays is as follows
# and can be activated for testing. But profiling shows that it is
sub leading_spaces_to_go {
# return the number of indentation spaces for a token in the output
- # stream; these were previously stored by 'set_leading_whitespace'.
+ # stream
my ($ii) = @_;
return 0 if ( $ii < 0 );
my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
# Add one token to the next batch.
- # $Ktoken_vars = the index K in the global token array
- # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
- # unless they are temporarily being overridden
-
- # NOTE: This routine needs to be coded efficiently because it is called
- # once per token. I have gotten it down from the second slowest to the
- # eighth slowest, but that still seems rather slow for what it does.
-
- # This closure variable has already been defined, for efficiency:
- # my $radjusted_levels = $self->[_radjusted_levels_];
+ # $Ktoken_vars = the index K in the global token array
+ # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
+ # unless they are temporarily being overridden
my $type = $rtoken_vars->[_TYPE_];
# happen, but it is worth checking. Later code can then make the
# simplifying assumption that blank tokens are not consecutive.
elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
+
+ if (DEVEL_MODE) {
+
+ # if this happens, it is may be that consecutive blanks
+ # were inserted into the token stream in 'respace_tokens'
+ my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
+ Fault("consecutive blanks near line $lno; please fix");
+ }
return;
}
}
if ( $level < 0 ) { $level = 0 }
$levels_to_go[$max_index_to_go] = $level;
- $nesting_depth_to_go[$max_index_to_go] = $rtoken_vars->[_SLEVEL_];
- $block_type_to_go[$max_index_to_go] = $rtoken_vars->[_BLOCK_TYPE_];
- $type_sequence_to_go[$max_index_to_go] =
+ my $seqno = $type_sequence_to_go[$max_index_to_go] =
$rtoken_vars->[_TYPE_SEQUENCE_];
- $nobreak_to_go[$max_index_to_go] =
- $side_comment_follows ? 2 : $no_internal_newlines;
+ if ( $max_index_to_go == 0 ) {
- my $length = $rtoken_vars->[_TOKEN_LENGTH_];
+ # Update the next parent sequence number for each new batch.
- # Safety check that length is defined. Should not be needed now.
- # Former patch for indent-only, in which the entire set of tokens is
- # turned into type 'q'. Lengths may have not been defined because sub
- # 'respace_tokens' is bypassed. We do not need lengths in this case,
- # but we will use the character count to have a defined value. In the
+ #------------------------------------------
+ # Begin coding from sub parent_seqno_from_K
+ #------------------------------------------
+
+ ## $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
+ $next_parent_seqno = SEQ_ROOT;
+ if ($seqno) {
+ $next_parent_seqno = $rparent_of_seqno->{$seqno};
+ }
+ else {
+ my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ my $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type = $rLL->[$Kt]->[_TYPE_];
+
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
+ $next_parent_seqno = $type_sequence;
+ }
+
+ # otherwise we want its parent container
+ else {
+ $next_parent_seqno =
+ $rparent_of_seqno->{$type_sequence};
+ }
+ }
+ }
+ $next_parent_seqno = SEQ_ROOT
+ unless ( defined($next_parent_seqno) );
+
+ #----------------------------------------
+ # End coding from sub parent_seqno_from_K
+ #----------------------------------------
+
+ $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
+ }
+
+ # Initialize some sequence-dependent variables to their normal values
+ my $parent_seqno = $next_parent_seqno;
+ my $slevel = $next_slevel;
+ my $block_type = "";
+
+ # Then fix them at container tokens:
+ if ($seqno) {
+ if ( $is_opening_token{$token} ) {
+ $next_parent_seqno = $seqno;
+ $slevel = $rdepth_of_opening_seqno->[$seqno];
+ $next_slevel = $slevel + 1;
+ $block_type = $rblock_type_of_seqno->{$seqno};
+ }
+ elsif ( $is_closing_token{$token} ) {
+ $next_slevel = $rdepth_of_opening_seqno->[$seqno];
+ $slevel = $next_slevel + 1;
+ $block_type = $rblock_type_of_seqno->{$seqno};
+ $parent_seqno = $rparent_of_seqno->{$seqno};
+ $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
+ $next_parent_seqno = $parent_seqno;
+ }
+ else {
+ # ternary token: nothing to do
+ }
+ $block_type = "" unless ( defined($block_type) );
+ }
+
+ $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
+ $nesting_depth_to_go[$max_index_to_go] = $slevel;
+ $block_type_to_go[$max_index_to_go] = $block_type;
+ $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
+
+ my $length = $rtoken_vars->[_TOKEN_LENGTH_];
+
+ # Safety check that length is defined. Should not be needed now.
+ # Former patch for indent-only, in which the entire set of tokens is
+ # turned into type 'q'. Lengths may have not been defined because sub
+ # 'respace_tokens' is bypassed. We do not need lengths in this case,
+ # but we will use the character count to have a defined value. In the
# future, it would be nicer to have 'respace_tokens' convert the lines
# to quotes and get correct lengths.
if ( !defined($length) ) { $length = length($token) }
$leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces + $rOpts_continuation_indentation * $ci_level;
}
-
- # Correct these values if -lp is used
- if ($rOpts_line_up_parentheses) {
- $self->set_leading_whitespace( $Ktoken_vars, $K_last_nonblank_code,
- $K_last_last_nonblank_code, $level, $ci_level,
- $in_continued_quote );
- }
+ $standard_spaces_to_go[$max_index_to_go] =
+ $leading_spaces_to_go[$max_index_to_go];
DEBUG_STORE && do {
my ( $a, $b, $c ) = caller();
$this_batch->[_starting_in_quote_] = $starting_in_quote;
$this_batch->[_ending_in_quote_] = $ending_in_quote;
$this_batch->[_max_index_to_go_] = $max_index_to_go;
- $this_batch->[_rK_to_go_] = \@K_to_go;
$this_batch->[_batch_CODE_type_] = $batch_CODE_type;
# The flag $is_static_block_comment applies to the line which just
&& $max_index_to_go == 0
&& $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
+ $this_batch->[_ri_starting_one_line_block_] =
+ $ri_starting_one_line_block;
+
$self->[_this_batch_] = $this_batch;
$last_line_had_side_comment =
initialize_batch_variables();
initialize_forced_breakpoint_vars();
- initialize_gnu_batch_vars()
- if $rOpts_line_up_parentheses;
return;
}
# end the current batch, EXCEPT for a few special cases
my ($self) = @_;
- # Exception 1: Do not end line in a weld
- return
- if ( $total_weld_count
- && $self->is_welded_right_at_i($max_index_to_go) );
+ if ( $max_index_to_go < 0 ) {
- # Exception 2: just set a tentative breakpoint if we might be in a
- # one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- $self->set_forced_breakpoint($max_index_to_go);
+ # This is harmless but should be elimintated in development
+ if (DEVEL_MODE) {
+ Fault("End batch called with nothing to do; please fix\n");
+ }
return;
}
+ # Exceptions when a line does not end with a comment... (fixes c058)
+ if ( $types_to_go[$max_index_to_go] ne '#' ) {
+
+ # Exception 1: Do not end line in a weld
+ return
+ if ( $total_weld_count
+ && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
+
+ # Exception 2: just set a tentative breakpoint if we might be in a
+ # one-line block
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ return;
+ }
+ }
+
$self->flush_batch_of_CODE();
return;
}
# Exception: if we are flushing within the code stream only to insert
# blank line(s), then we can keep the batch intact at a weld. This
# improves formatting of -ce. See test 'ce1.ce'
- if ( $CODE_type && $CODE_type eq 'BL' ) { $self->end_batch() }
+ if ( $CODE_type && $CODE_type eq 'BL' ) {
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
# otherwise, we have to shut things down completely.
else { $self->flush_batch_of_CODE() }
my ( $self, $my_line_of_tokens ) = @_;
- # This routine is called once per INPUT line to process all of the
+ #----------------------------------------------------------------
+ # This routine is called once per INPUT line to format all of the
# tokens on that line.
+ #----------------------------------------------------------------
# It outputs full-line comments and blank lines immediately.
# appropriate for lists and logical structures, and to keep line
# lengths below the requested maximum line length.
+ #-----------------------------------
+ # begin initialize closure variables
+ #-----------------------------------
$line_of_tokens = $my_line_of_tokens;
$CODE_type = $line_of_tokens->{_code_type};
- my $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
-
- # initialize closure variables
my $rK_range = $line_of_tokens->{_rK_range};
( $K_first, $K_last ) = @{$rK_range};
-
- # remember original starting index in case it changes
- my $K_first_true = $K_first;
-
- $rLL = $self->[_rLL_];
- $radjusted_levels = $self->[_radjusted_levels_];
-
- my $file_writer_object = $self->[_file_writer_object_];
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $sink_object = $self->[_sink_object_];
- my $fh_tee = $self->[_fh_tee_];
- my $ris_bli_container = $self->[_ris_bli_container_];
- my $rK_weld_left = $self->[_rK_weld_left_];
-
if ( !defined($K_first) ) {
# Empty line: This can happen if tokens are deleted, for example
# with the -mangle parameter
return;
}
+ $rLL = $self->[_rLL_];
+ $radjusted_levels = $self->[_radjusted_levels_];
+ $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
+ $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ #---------------------------------
+ # end initialize closure variables
+ #---------------------------------
+ # This flag will become nobreak_to_go and should be set to 2 to prevent
+ # a line break AFTER the current token.
$no_internal_newlines = 0;
if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
$no_internal_newlines = 2;
}
- $side_comment_follows = 0;
+ my $input_line = $line_of_tokens->{_line_text};
+
my $is_comment =
( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
my $is_static_block_comment_without_leading_space =
# Add interline blank if any
my $last_old_nonblank_type = "b";
my $first_new_nonblank_token = "";
+ my $K_first_true = $K_first;
if ( $max_index_to_go >= 0 ) {
$last_old_nonblank_type = $types_to_go[$max_index_to_go];
$first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
my $in_quote = $line_of_tokens->{_ending_in_quote};
$ending_in_quote = $in_quote;
- my $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
- ######################################
- # Handle a block (full-line) comment..
- ######################################
+ #------------------------------------
+ # Handle a block (full-line) comment.
+ #------------------------------------
if ($is_comment) {
if ( $rOpts->{'delete-block-comments'} ) {
}
destroy_one_line_block();
- $self->end_batch();
+ $self->end_batch() if ( $max_index_to_go >= 0 );
# output a blank line before block comments
if (
)
{
$self->flush(); # switching to new output stream
+ my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_blank_code_line();
$self->[_last_line_leading_type_] = 'b';
}
$self->flush();
# Note that last arg in call here is 'undef' for comments
+ my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line(
$rtok_first->[_TOKEN_] . "\n", undef );
$self->[_last_line_leading_type_] = '#';
# compare input/output indentation except for continuation lines
# (because they have an unknown amount of initial blank space)
# and lines which are quotes (because they may have been outdented)
- $self->compare_indentation_levels( $K_first, $guessed_indentation_level,
- $input_line_number )
- unless ( $is_hanging_side_comment
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+ unless ( $is_hanging_side_comment
|| $rtok_first->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0
- && $rtok_first->[_TYPE_] eq 'Q' );
+ || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
+ {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->compare_indentation_levels( $K_first,
+ $guessed_indentation_level, $input_line_number );
+ }
- ##########################
+ #------------------------
# Handle indentation-only
- ##########################
+ #------------------------
# NOTE: In previous versions we sent all qw lines out immediately here.
# No longer doing this: also write a line which is entirely a 'qw' list
return;
}
- ############################
+ #---------------------------
# Handle all other lines ...
- ############################
+ #---------------------------
# If we just saw the end of an elsif block, write nag message
# if we do not see another elseif or an else.
}
# This is a good place to kill incomplete one-line blocks
- if (
- (
- ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $last_old_nonblank_type eq ';' )
- && ( $first_new_nonblank_token ne '}' )
- )
-
- # Patch for RT #98902. Honor request to break at old commas.
- || ( $rOpts_break_at_old_comma_breakpoints
- && $max_index_to_go >= 0
- && $last_old_nonblank_type eq ',' )
- )
- {
- $forced_breakpoint_to_go[$max_index_to_go] = 1
- if ($rOpts_break_at_old_comma_breakpoints);
- destroy_one_line_block();
- $self->end_batch();
- }
+ if ( $max_index_to_go >= 0 ) {
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $last_old_nonblank_type eq ';' )
+ && ( $first_new_nonblank_token ne '}' )
+ )
- # Keep any requested breaks before this line. Note that we have to
- # use the original K_first because it may have been reduced above
- # to add a blank. The value of the flag is as follows:
- # 1 => hard break, flush the batch
- # 2 => soft break, set breakpoint and continue building the batch
- if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
- destroy_one_line_block();
- if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
- $self->set_forced_breakpoint($max_index_to_go);
- }
- else {
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $last_old_nonblank_type eq ',' )
+ )
+ {
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
+ destroy_one_line_block();
$self->end_batch();
}
+
+ # Keep any requested breaks before this line. Note that we have to
+ # use the original K_first because it may have been reduced above
+ # to add a blank. The value of the flag is as follows:
+ # 1 => hard break, flush the batch
+ # 2 => soft break, set breakpoint and continue building the batch
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
+ destroy_one_line_block();
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ }
+ else {
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
+ }
}
+ #--------------------------------------
# loop to process the tokens one-by-one
+ #--------------------------------------
# We do not want a leading blank if the previous batch just got output
if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
foreach my $Ktoken_vars ( $K_first .. $K_last ) {
- my $rtoken_vars = $rLL->[$Ktoken_vars];
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ my $type = $rtoken_vars->[_TYPE_];
# If we are continuing after seeing a right curly brace, flush
# buffer unless we see what we are looking for, as in
# } else ...
if ( $rbrace_follower && $type ne 'b' ) {
-
+ my $token = $rtoken_vars->[_TOKEN_];
unless ( $rbrace_follower->{$token} ) {
- $self->end_batch();
+ $self->end_batch() if ( $max_index_to_go >= 0 );
}
$rbrace_follower = undef;
}
- # Get next nonblank on this line
- my $next_nonblank_token = '';
- my $next_nonblank_token_type = 'b';
+ my (
+ $block_type, $type_sequence,
+ $is_opening_BLOCK, $is_closing_BLOCK,
+ $nobreak_BEFORE_BLOCK
+ );
+ if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
+
+ my $token = $rtoken_vars->[_TOKEN_];
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
+
+ if ( $block_type
+ && $token eq $type
+ && $block_type ne 't'
+ && !$self->[_rshort_nested_]->{$type_sequence} )
+ {
+
+ if ( $type eq '{' ) {
+ $is_opening_BLOCK = 1;
+ $nobreak_BEFORE_BLOCK = $no_internal_newlines;
+ }
+ elsif ( $type eq '}' ) {
+ $is_closing_BLOCK = 1;
+ $nobreak_BEFORE_BLOCK = $no_internal_newlines;
+ }
+ }
+ }
+
+ # Find next nonblank token on this line and look for a side comment
+ my ( $Knnb, $side_comment_follows );
+
+ # if before last token ...
if ( $Ktoken_vars < $K_last ) {
- my $Knnb = $Ktoken_vars + 1;
- if ( $rLL->[$Knnb]->[_TYPE_] eq 'b'
- && $Knnb < $K_last )
+ $Knnb = $Ktoken_vars + 1;
+ if ( $Knnb < $K_last
+ && $rLL->[$Knnb]->[_TYPE_] eq 'b' )
{
$Knnb++;
}
- $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
- $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
- }
-
- # Do not allow breaks which would promote a side comment to a
- # block comment. In order to allow a break before an opening
- # or closing BLOCK, followed by a side comment, those sections
- # of code will handle this flag separately.
- $side_comment_follows = ( $next_nonblank_token_type eq '#' );
- my $is_opening_BLOCK =
- ( $type eq '{'
- && $token eq '{'
- && $block_type
- && !$rshort_nested->{$type_sequence}
- && $block_type ne 't' );
- my $is_closing_BLOCK =
- ( $type eq '}'
- && $token eq '}'
- && $block_type
- && !$rshort_nested->{$type_sequence}
- && $block_type ne 't' );
-
- if ( $side_comment_follows
- && !$is_opening_BLOCK
- && !$is_closing_BLOCK )
- {
- $no_internal_newlines = 1;
+
+ if ( $rLL->[$Knnb]->[_TYPE_] eq '#' ) {
+ $side_comment_follows = 1;
+
+ # Do not allow breaks which would promote a side comment to
+ # a block comment.
+ $no_internal_newlines = 2;
+ }
+ }
+
+ # if at last token ...
+ else {
+
+ #---------------------
+ # handle side comments
+ #---------------------
+ if ( $type eq '#' ) {
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ next;
+ }
+ }
+
+ #--------------
+ # handle blanks
+ #--------------
+ if ( $type eq 'b' ) {
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ next;
+ }
+
+ # Process non-blank and non-comment tokens ...
+
+ #-----------------
+ # handle semicolon
+ #-----------------
+ if ( $type eq ';' ) {
+
+ my $next_nonblank_token_type = 'b';
+ my $next_nonblank_token = '';
+ if ( defined($Knnb) ) {
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
+
+ my $break_before_semicolon = ( $Ktoken_vars == $K_first )
+ && $rOpts_break_at_old_semicolon_breakpoints;
+
+ # kill one-line blocks with too many semicolons
+ $semicolons_before_block_self_destruct--;
+ if (
+ $break_before_semicolon
+ || ( $semicolons_before_block_self_destruct < 0 )
+ || ( $semicolons_before_block_self_destruct == 0
+ && $next_nonblank_token_type !~ /^[b\}]$/ )
+ )
+ {
+ destroy_one_line_block();
+ $self->end_batch()
+ if ( $break_before_semicolon
+ && $max_index_to_go >= 0 );
+ }
+
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+
+ $self->end_batch()
+ unless (
+ $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons
+ && $Ktoken_vars < $K_last )
+ || ( $next_nonblank_token eq '}' )
+ );
+
}
- # We're only going to handle breaking for code BLOCKS at this
- # (top) level. Other indentation breaks will be handled by
- # sub scan_list, which is better suited to dealing with them.
- if ($is_opening_BLOCK) {
+ #-----------
+ # handle '{'
+ #-----------
+ elsif ($is_opening_BLOCK) {
# Tentatively output this token. This is required before
# calling starting_one_line_block. We may have to unstore
my $keyword_on_same_line = 1;
if (
$max_index_to_go >= 0
- && $last_nonblank_type eq ')'
- && ( ( $rtoken_vars->[_SLEVEL_] < $nesting_depth_to_go[0] )
+ && defined($K_last_nonblank_code)
+ && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
+ && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
|| $too_long )
)
{
$keyword_on_same_line = 0;
}
- # decide if user requested break before '{'
- my $want_break =
+ # Break before '{' if requested with -bl or -bli flag
+ my $want_break = $self->[_rbrace_left_]->{$type_sequence};
- # This test was added to minimize changes in -bl formatting
- # caused by other changes to fix cases b562 .. b983
- # Previously, the -bl flag was being applied almost randomly
- # to sort/map/grep/eval blocks, depending on if they were
- # flagged as possible one-line blocks. usually time they
- # were not given -bl formatting. The following flag was
- # added to minimize changes to existing formatting.
- $is_braces_left_exclude_block{$block_type}
- ? 0
-
- # use -bl flag if not a sub block of any type
- : $block_type !~ /$ANYSUB_PATTERN/
- ? $rOpts->{'opening-brace-on-new-line'}
-
- # use -sbl flag for a named sub block
- : $block_type !~ /$ASUB_PATTERN/
- ? $rOpts->{'opening-sub-brace-on-new-line'}
-
- # use -asbl flag for an anonymous sub block
- : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
-
- # Break if requested with -bli flag
- $want_break ||= $ris_bli_container->{$type_sequence};
-
- # Do not break if this token is welded to the left
+ # But do not break if this token is welded to the left
if ( $total_weld_count
- && defined( $rK_weld_left->{$Ktoken_vars} ) )
+ && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
{
$want_break = 0;
}
- # Break before an opening '{' ...
+ # Break BEFORE an opening '{' ...
if (
# if requested
# it will be outdented (eval.t, overload.t), and the user
# has not insisted on keeping it on the right
|| ( !$keyword_on_same_line
- && !$rOpts->{'opening-brace-always-on-right'} )
+ && !$rOpts_opening_brace_always_on_right )
)
{
# but only if allowed
- unless ($no_internal_newlines) {
+ unless ($nobreak_BEFORE_BLOCK) {
# since we already stored this token, we must unstore it
$self->unstore_token_to_go();
# then output the line
- $self->end_batch();
+ $self->end_batch() if ( $max_index_to_go >= 0 );
# and now store this token at the start of a new line
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
}
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
-
# now output this line
- unless ($no_internal_newlines) {
- $self->end_batch();
- }
+ $self->end_batch()
+ if ( $max_index_to_go >= 0 && !$no_internal_newlines );
}
+ #-----------
+ # handle '}'
+ #-----------
elsif ($is_closing_BLOCK) {
+ my $next_nonblank_token_type = 'b';
+ my $next_nonblank_token = '';
+ if ( defined($Knnb) ) {
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
+
# If there is a pending one-line block ..
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ # Fix for b1208: if a side comment follows this closing
+ # brace then we must include its length in the length test
+ # ... unless the -issl flag is set (fixes b1307-1309).
+ # Assume a minimum of 1 blank space to the comment.
+ my $added_length =
+ $side_comment_follows
+ && !$rOpts_ignore_side_comment_lengths
+ ? 1 + $rLL->[$Knnb]->[_TOKEN_LENGTH_]
+ : 0;
+
# we have to terminate it if..
if (
# initial estimate). note: must allow 1 space for this
# token
$self->excess_line_length( $index_start_one_line_block,
- $max_index_to_go ) >= 0
+ $max_index_to_go ) + $added_length >= 0
# or if it has too many semicolons
|| ( $semicolons_before_block_self_destruct == 0
- && $last_nonblank_type ne ';' )
+ && defined($K_last_nonblank_code)
+ && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
)
{
destroy_one_line_block();
}
# put a break before this closing curly brace if appropriate
- unless ( $no_internal_newlines
- || $index_start_one_line_block != UNDEFINED_INDEX )
- {
-
- # write out everything before this closing curly brace
- $self->end_batch();
- }
-
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
+ $self->end_batch()
+ if ( $max_index_to_go >= 0
+ && !$nobreak_BEFORE_BLOCK
+ && $index_start_one_line_block == UNDEFINED_INDEX );
# store the closing curly brace
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# we have to actually make it by removing tentative
# breaks that were set within it
$self->undo_forced_breakpoint_stack(0);
+
+ # For -lp, extend the nobreak to include a trailing
+ # terminal ','. This is because the -lp indentation was
+ # not known when making one-line blocks, so we may be able
+ # to move the line back to fit. Otherwise we may create a
+ # needlessly stranded comma on the next line.
+ my $iend_nobreak = $max_index_to_go - 1;
+ if ( $rOpts_line_up_parentheses
+ && $next_nonblank_token_type eq ','
+ && $Knnb eq $K_last )
+ {
+ my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
+ my $is_excluded =
+ $self->[_ris_excluded_lp_container_]->{$p_seqno};
+ $iend_nobreak = $max_index_to_go if ( !$is_excluded );
+ }
+
$self->set_nobreaks( $index_start_one_line_block,
- $max_index_to_go - 1 );
+ $iend_nobreak );
+
+ # save starting block indexes so that sub correct_lp can
+ # check and adjust -lp indentation (c098)
+ push @{$ri_starting_one_line_block},
+ $index_start_one_line_block;
# then re-initialize for the next one-line block
destroy_one_line_block();
# tokens
if ( $block_type eq 'do' ) {
$rbrace_follower = \%is_do_follower;
- if ( $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
+ if (
+ $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
)
{
$rbrace_follower = { ')' => 1 };
}
# anonymous sub
- elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
-
+ elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
if ($is_one_line_block) {
+
$rbrace_follower = \%is_anon_sub_1_brace_follower;
+
+ # Exceptions to help keep -lp intact, see git #74 ...
+ # Exception 1: followed by '}' on this line
+ if ( $Ktoken_vars < $K_last
+ && $next_nonblank_token eq '}' )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
+
+ # Exception 2: followed by '}' on next line if -lp set.
+ # The -lp requirement allows the formatting to follow
+ # old breaks when -lp is not used, minimizing changes.
+ # Fixes issue c087.
+ elsif ($Ktoken_vars == $K_last
+ && $rOpts_line_up_parentheses )
+ {
+ my $K_closing_container =
+ $self->[_K_closing_container_];
+ my $K_opening_container =
+ $self->[_K_opening_container_];
+ my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
+ my $Kc = $K_closing_container->{$p_seqno};
+ my $is_excluded =
+ $self->[_ris_excluded_lp_container_]->{$p_seqno};
+ if ( defined($Kc)
+ && $rLL->[$Kc]->[_TOKEN_] eq '}'
+ && !$is_excluded
+ && $Kc - $Ktoken_vars <= 2 )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
+ }
}
else {
$rbrace_follower = \%is_anon_sub_brace_follower;
unless ( $rbrace_follower->{$next_nonblank_token} ) {
$self->end_batch()
- unless ($no_internal_newlines);
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
}
$rbrace_follower = undef;
}
else {
$self->end_batch()
- unless ($no_internal_newlines);
- }
-
- } # end treatment of closing block token
-
- # handle semicolon
- elsif ( $type eq ';' ) {
-
- my $break_before_semicolon = ( $Ktoken_vars == $K_first )
- && $rOpts_break_at_old_semicolon_breakpoints;
-
- # kill one-line blocks with too many semicolons
- $semicolons_before_block_self_destruct--;
- if (
- $break_before_semicolon
- || ( $semicolons_before_block_self_destruct < 0 )
- || ( $semicolons_before_block_self_destruct == 0
- && $next_nonblank_token_type !~ /^[b\}]$/ )
- )
- {
- destroy_one_line_block();
- $self->end_batch() if ($break_before_semicolon);
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
}
- $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
-
- $self->end_batch()
- unless (
- $no_internal_newlines
- || ( $rOpts_keep_interior_semicolons
- && $Ktoken_vars < $K_last )
- || ( $next_nonblank_token eq '}' )
- );
-
- }
+ } ## end treatment of closing block token
+ #------------------------------
# handle here_doc target string
+ #------------------------------
elsif ( $type eq 'h' ) {
# no newlines after seeing here-target
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
+ #-----------------------------
# handle all other token types
+ #-----------------------------
else {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- }
- # remember two previous nonblank OUTPUT tokens
- if ( $type ne '#' && $type ne 'b' ) {
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_nonblank_token = $token;
- $last_nonblank_type = $type;
- $last_nonblank_block_type = $block_type;
- $K_last_last_nonblank_code = $K_last_nonblank_code;
- $K_last_nonblank_code = $Ktoken_vars;
+ # break after a label if requested
+ if ( $type eq 'J' && $rOpts_break_after_labels == 1 ) {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
}
- } # end of loop over all tokens in this 'line_of_tokens'
+ # remember two previous nonblank, non-comment OUTPUT tokens
+ $K_last_last_nonblank_code = $K_last_nonblank_code;
+ $K_last_nonblank_code = $Ktoken_vars;
+
+ } ## end of loop over all tokens in this line
my $type = $rLL->[$K_last]->[_TYPE_];
my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
|| $is_VERSION_statement
# to keep a label at the end of a line
- || $type eq 'J'
+ || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
# if we have a hard break request
|| $break_flag && $break_flag != 2
)
{
destroy_one_line_block();
- $self->end_batch();
+ $self->end_batch() if ( $max_index_to_go >= 0 );
}
# Check for a soft break request
# sub xxx ( ... do { ... } ) {
# ^----- next block_type
my $K_test = $self->K_next_nonblank($K_oc);
- if ( defined($K_test) ) {
- my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
- if ( $block_type
- && $rLL->[$K_test]->[_TYPE_] eq '{'
- && $block_type =~ /$ANYSUB_PATTERN/ )
- {
- return 1;
+ if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
+ my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
+ if ($seqno_test) {
+ if ( $self->[_ris_asub_block_]->{$seqno_test}
+ || $self->[_ris_sub_block_]->{$seqno_test} )
+ {
+ return 1;
+ }
}
}
return 1;
}
+my %is_brace_semicolon_colon;
+
+BEGIN {
+ my @q = qw( { } ; : );
+ @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
+}
+
sub starting_one_line_block {
# after seeing an opening curly brace, look for the closing brace and see
my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
- my $rbreak_container = $self->[_rbreak_container_];
- my $rshort_nested = $self->[_rshort_nested_];
- my $rLL = $self->[_rLL_];
- my $K_opening_container = $self->[_K_opening_container_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# kill any current block - we can only go 1 deep
destroy_one_line_block();
# indicates that a programming change may have caused a flush operation to
# clean out the previously stored tokens.
if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
- Fault("program bug: store_token_to_go called incorrectly\n");
+ Fault("program bug: store_token_to_go called incorrectly\n")
+ if (DEVEL_MODE);
+ return 0;
}
# Return if block should be broken
my $ris_bli_container = $self->[_ris_bli_container_];
my $is_bli = $ris_bli_container->{$type_sequence};
- my $block_type = $rLL->[$Kj]->[_BLOCK_TYPE_];
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ $block_type = "" unless ( defined($block_type) );
my $index_max_forced_break = get_index_max_forced_break();
my $previous_nonblank_token = '';
}
# find the starting keyword for this block (such as 'if', 'else', ...)
- if ( $max_index_to_go == 0
- || $block_type =~ /^[\{\}\;\:]$/
- || $block_type =~ /^package/ )
+ if (
+ $max_index_to_go == 0
+ ##|| $block_type =~ /^[\{\}\;\:]$/
+ || $is_brace_semicolon_colon{$block_type}
+ || substr( $block_type, 0, 7 ) eq 'package'
+ )
{
$i_start = $max_index_to_go;
}
elsif (
$i_last_nonblank >= 0
&& ( $previous_nonblank_token eq $block_type
- || $block_type =~ /$ANYSUB_PATTERN/
- || $block_type =~ /\(\)/ )
+ || $self->[_ris_asub_block_]->{$type_sequence}
+ || $self->[_ris_sub_block_]->{$type_sequence}
+ || substr( $block_type, -2, 2 ) eq '()' )
)
{
$i_start = $i_last_nonblank;
# create( TypeFoo $e) {$bubba}
# the blocktype would be marked as create()
my $stripped_block_type = $block_type;
- $stripped_block_type =~ s/\(\)$//;
-
+ if ( substr( $block_type, -2, 2 ) eq '()' ) {
+ $stripped_block_type = substr( $block_type, 0, -2 );
+ }
unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
return 0;
}
# closing brace.
elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
&& $rLL->[$Ki]->[_TYPE_] eq '{'
- && $rLL->[$Ki]->[_BLOCK_TYPE_]
+ && $rblock_type_of_seqno->{$type_sequence}
&& !$nobreak )
{
return 0;
# if we find our closing brace..
elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
&& $rLL->[$Ki]->[_TYPE_] eq '}'
- && $rLL->[$Ki]->[_BLOCK_TYPE_]
+ && $rblock_type_of_seqno->{$type_sequence}
&& !$nobreak )
{
# line break logic in sub process_line_of_CODE.
# When the second line is input it gets recombined by
# process_line_of_CODE and passed to the output routines. The
- # output routines (set_continuation_breaks) do not break it apart
+ # output routines (break_long_lines) do not break it apart
# because the bond strengths are set to the highest possible value
# for grep/map/eval/sort blocks, so the first version gets output.
# It would be possible to fix this by changing bond strengths,
# but they are high to prevent errors in older versions of perl.
-
+ # See c100 for eval test.
if ( $Ki < $K_last
- && $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
- && !$is_sort_map_grep{$block_type} )
+ && $rLL->[$K_last]->[_TYPE_] eq '#'
+ && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
+ && !$rOpts_ignore_side_comment_lengths
+ && !$is_sort_map_grep_eval{$block_type}
+ && $K_last - $Ki_nonblank <= 2 )
{
+ # Only include the side comment for if/else/elsif/unless if it
+ # immediately follows (because the current '$rbrace_follower'
+ # logic for these will give an immediate brake after these
+ # closing braces). So for example a line like this
+ # if (...) { ... } ; # very long comment......
+ # will already break like this:
+ # if (...) { ... }
+ # ; # very long comment......
+ # so we do not need to include the length of the comment, which
+ # would break the block. Project 'bioperl' has coding like this.
+ if ( $block_type !~ /^(if|else|elsif|unless)$/
+ || $K_last == $Ki_nonblank )
+ {
+ $Ki_nonblank = $K_last;
+ $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
- $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
-
- if ( $Ki_nonblank > $Ki + 1 ) {
+ if ( $Ki_nonblank > $Ki + 1 ) {
- # source whitespace could be anything, assume
- # at least one space before the hash on output
- if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
- $pos += 1;
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
+ $pos += 1;
+ }
+ else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
}
- else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
- }
- if ( $pos >= $maximum_line_length ) {
- return 0;
+ if ( $pos >= $maximum_line_length ) {
+ return 0;
+ }
}
}
$structural_indentation_level = $radjusted_levels->[$K_first];
}
- my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
- && $rLL->[$K_first]->[_BLOCK_TYPE_];
+ # record max structural depth for log file
+ if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
+ $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
+ $self->[_maximum_BLOCK_level_at_line_] = $line_number;
+ }
+
+ my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
+ my $is_closing_block =
+ $type_sequence
+ && $self->[_rblock_type_of_seqno_]->{$type_sequence}
+ && $rLL->[$K_first]->[_TYPE_] eq '}';
if ( $guessed_indentation_level ne $structural_indentation_level ) {
$self->[_last_tabbing_disagreement_] = $line_number;
if ( !$self->[_first_brace_tabbing_disagreement_] ) {
$self->[_first_brace_tabbing_disagreement_] = $line_number;
}
-
}
if ( !$self->[_in_tabbing_disagreement_] ) {
sub set_forced_breakpoint {
my ( $self, $i ) = @_;
- return unless defined $i && $i >= 0;
+ # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
- # Back up at a blank in case we need an = break.
- # This is a backup fix for cases like b932.
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
+ # Exceptions:
+ # - If the token at index $i is a blank, backup to $i-1 to
+ # get to the previous nonblank token.
+ # - For certain tokens, the break may be placed BEFORE the token
+ # at index $i, depending on user break preference settings.
+ # - If a break is made after an opening token, then a break will
+ # also be made before the corresponding closing token.
- # no breaks between welded tokens
- return if ( $total_weld_count && $self->is_welded_right_at_i($i) );
+ # Returns '$i_nonblank':
+ # = index of the token after which the breakpoint was actually placed
+ # = undef if breakpoint was not set.
+ my $i_nonblank;
- my $token = $tokens_to_go[$i];
- my $type = $types_to_go[$i];
+ if ( !defined($i) || $i < 0 ) {
- # For certain tokens, use user settings to decide if we break before or
- # after it
- if ( $break_before_or_after_token{$token}
+ # Calls with bad index $i are harmless but waste time and should
+ # be caught and eliminated during code development.
+ if (DEVEL_MODE) {
+ my ( $a, $b, $c ) = caller();
+ Fault(
+"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
+ );
+ }
+ return;
+ }
+
+ # Break after token $i
+ $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
+
+ # If we break at an opening container..break at the closing
+ my $set_closing;
+ if ( defined($i_nonblank)
+ && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
+ {
+ $set_closing = 1;
+ $self->set_closing_breakpoint($i_nonblank);
+ }
+
+ DEBUG_FORCE && do {
+ my ( $a, $b, $c ) = caller();
+ my $msg =
+"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
+ if ( !defined($i_nonblank) ) {
+ $i = "" unless defined($i);
+ $msg .= " but could not set break after i='$i'\n";
+ }
+ else {
+ $msg .= <<EOM;
+set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
+EOM
+ if ( defined($set_closing) ) {
+ $msg .=
+" Also set closing breakpoint corresponding to this token\n";
+ }
+ }
+ print STDOUT $msg;
+ };
+
+ return $i_nonblank;
+ }
+
+ sub set_forced_breakpoint_AFTER {
+ my ( $self, $i ) = @_;
+
+ # This routine is only called by sub set_forced_breakpoint and
+ # sub set_closing_breakpoint.
+
+ # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
+
+ # Exceptions:
+ # - If the token at index $i is a blank, backup to $i-1 to
+ # get to the previous nonblank token.
+ # - For certain tokens, the break may be placed BEFORE the token
+ # at index $i, depending on user break preference settings.
+
+ # Returns:
+ # - the index of the token after which the break was set, or
+ # - undef if no break was set
+
+ return unless ( defined($i) && $i >= 0 );
+
+ # Back up at a blank so we have a token to examine.
+ # This was added to fix for cases like b932 involving an '=' break.
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
+
+ # Never break between welded tokens
+ return
+ if ( $total_weld_count
+ && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
+
+ my $token = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
+
+ # For certain tokens, use user settings to decide if we break before or
+ # after it
+ if ( $break_before_or_after_token{$token}
&& ( $type eq $token || $type eq 'k' ) )
{
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
if ( $i >= 0 && $i <= $max_index_to_go ) {
my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
- DEBUG_FORCE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
- };
-
- ######################################################################
- # NOTE: if we call set_closing_breakpoint below it will then call
- # this routing back. So there is the possibility of an infinite
- # loop if a programming error is made. As a precaution, I have
- # added a check on the forced_breakpoint flag, so that we won't
- # keep trying to set it. That will give additional protection
- # against a loop.
- ######################################################################
-
if ( $i_nonblank >= 0
&& $nobreak_to_go[$i_nonblank] == 0
&& !$forced_breakpoint_to_go[$i_nonblank] )
$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
= $i_nonblank;
- # if we break at an opening container..break at the closing
- if ( $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
- {
- $self->set_closing_breakpoint($i_nonblank);
- }
+ # success
+ return $i_nonblank;
}
}
return;
my ( $a, $b, $c ) = caller();
# Bad call, can only be due to a recent programming change.
- # Better stop here.
Fault(
"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
- );
+ ) if (DEVEL_MODE);
+ return;
}
while ( $forced_breakpoint_undo_count > $i_start ) {
if ( $mate_index_to_go[$i_break] >= 0 ) {
- # CAUTION: infinite recursion possible here:
- # set_closing_breakpoint calls set_forced_breakpoint, and
- # set_forced_breakpoint call set_closing_breakpoint
- # ( test files attrib.t, BasicLyx.pm.html).
- # Don't reduce the '2' in the statement below
+ # Don't reduce the '2' in the statement below.
+ # Test files: attrib.t, BasicLyx.pm.html
if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
# break before } ] and ), but sub set_forced_breakpoint will decide
# to break before or after a ? and :
my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
- $self->set_forced_breakpoint(
+ $self->set_forced_breakpoint_AFTER(
$mate_index_to_go[$i_break] - $inc );
}
}
my $peak_batch_size;
my $batch_count;
+ # variables to keep track of unbalanced containers.
+ my %saved_opening_indentation;
+ my @unmatched_opening_indexes_in_this_batch;
+
sub initialize_grind_batch_of_CODE {
- @nonblank_lines_at_depth = ();
- $peak_batch_size = 0;
- $batch_count = 0;
+ @nonblank_lines_at_depth = ();
+ $peak_batch_size = 0;
+ $batch_count = 0;
+ %saved_opening_indentation = ();
return;
}
use constant DEBUG_GRIND => 0;
+ sub check_grind_input {
+
+ # Check for valid input to sub grind_batch_of_CODE. An error here
+ # would most likely be due to an error in 'sub store_token_to_go'.
+ my ($self) = @_;
+
+ # Be sure there are tokens in the batch
+ if ( $max_index_to_go < 0 ) {
+ Fault(<<EOM);
+sub grind incorrectly called with max_index_to_go=$max_index_to_go
+EOM
+ }
+ my $Klimit = $self->[_Klimit_];
+
+ # The local batch tokens must be a continous part of the global token
+ # array.
+ my $KK;
+ foreach my $ii ( 0 .. $max_index_to_go ) {
+
+ my $Km = $KK;
+
+ $KK = $K_to_go[$ii];
+ if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
+ $KK = '(undef)' unless defined($KK);
+ Fault(<<EOM);
+at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
+EOM
+ }
+
+ if ( $ii > 0 && $KK != $Km + 1 ) {
+ my $im = $ii - 1;
+ Fault(<<EOM);
+Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
+EOM
+ }
+ }
+ return;
+ }
+
sub grind_batch_of_CODE {
my ($self) = @_;
- my $file_writer_object = $self->[_file_writer_object_];
my $this_batch = $self->[_this_batch_];
$batch_count++;
- my $starting_in_quote = $this_batch->[_starting_in_quote_];
- my $ending_in_quote = $this_batch->[_ending_in_quote_];
- my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
- my $rK_to_go = $this_batch->[_rK_to_go_];
- my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
-
- my $rLL = $self->[_rLL_];
+ $self->check_grind_input() if (DEVEL_MODE);
# This routine is only called from sub flush_batch_of_code, so that
# routine is a better spot for debugging.
$token = $tokens_to_go[$max_index_to_go];
$type = $types_to_go[$max_index_to_go];
}
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
+ my $output_str = "";
+ if ( $max_index_to_go > 20 ) {
+ my $mm = $max_index_to_go - 10;
+ $output_str = join( "", @tokens_to_go[ 0 .. 10 ] ) . " ... "
+ . join( "", @tokens_to_go[ $mm .. $max_index_to_go ] );
+ }
+ else {
+ $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
+ }
print STDERR <<EOM;
grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
$output_str
EOM
};
- # Safety check - shouldn't happen. The calling routine must not call
- # here unless there are tokens in the batch to be processed. This
- # fault can only be triggered by a recent programming change.
- if ( $max_index_to_go < 0 ) {
- Fault(
-"sub grind incorrectly called with max_index_to_go=$max_index_to_go"
- );
+ return if ( $max_index_to_go < 0 );
+
+ $self->set_lp_indentation()
+ if ($rOpts_line_up_parentheses);
+
+ #----------------------------
+ # Shortcut for block comments
+ #----------------------------
+ if (
+ $max_index_to_go == 0
+ && $types_to_go[0] eq '#'
+
+ # this shortcut does not work for -lp yet
+ && !$rOpts_line_up_parentheses
+ )
+ {
+ my $ibeg = 0;
+ $this_batch->[_ri_first_] = [$ibeg];
+ $this_batch->[_ri_last_] = [$ibeg];
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_do_not_pad_] = 0;
+ $this_batch->[_batch_count_] = $batch_count;
+ $this_batch->[_rix_seqno_controlling_ci_] = [];
+
+ $self->convey_batch_to_vertical_aligner();
+
+ my $level = $levels_to_go[$ibeg];
+ $self->[_last_last_line_leading_level_] =
+ $self->[_last_line_leading_level_];
+ $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
+ $self->[_last_line_leading_level_] = $level;
+ $nonblank_lines_at_depth[$level] = 1;
+ return;
}
- # Initialize some batch variables
+ #-------------
+ # Normal route
+ #-------------
+
+ my $rLL = $self->[_rLL_];
+ my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
+ my $rwant_container_open = $self->[_rwant_container_open_];
+
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+
+ #-------------------------------------------------------
+ # Loop over the batch to initialize some batch variables
+ #-------------------------------------------------------
my $comma_count_in_batch = 0;
my $ilast_nonblank = -1;
my @colon_list;
my @ix_seqno_controlling_ci;
+ my %comma_arrow_count = ();
+ my $comma_arrow_count_contained = 0;
+ my @unmatched_closing_indexes_in_this_batch;
+
+ @unmatched_opening_indexes_in_this_batch = ();
+
for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
$bond_strength_to_go[$i] = 0;
$iprev_to_go[$i] = $ilast_nonblank;
# This is a good spot to efficiently collect information needed
# for breaking lines...
- if ( $type eq ',' ) { $comma_count_in_batch++; }
-
- # gather info needed by sub set_continuation_breaks
- my $seqno = $type_sequence_to_go[$i];
- if ($seqno) {
+ # gather info needed by sub break_long_lines
+ if ( $type_sequence_to_go[$i] ) {
+ my $seqno = $type_sequence_to_go[$i];
+ my $token = $tokens_to_go[$i];
# remember indexes of any tokens controlling xci
# in this batch. This list is needed by sub undo_ci.
push @ix_seqno_controlling_ci, $i;
}
- if ( $type eq '?' ) {
- push @colon_list, $type;
+ if ( $is_opening_sequence_token{$token} ) {
+ if ( $rwant_container_open->{$seqno} ) {
+ $self->set_forced_breakpoint($i);
+ }
+ push @unmatched_opening_indexes_in_this_batch, $i;
+ if ( $type eq '?' ) {
+ push @colon_list, $type;
+ }
}
- elsif ( $type eq ':' ) {
- push @colon_list, $type;
+ elsif ( $is_closing_sequence_token{$token} ) {
+
+ if ( $i > 0 && $rwant_container_open->{$seqno} ) {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
+
+ my $i_mate =
+ pop @unmatched_opening_indexes_in_this_batch;
+ if ( defined($i_mate) && $i_mate >= 0 ) {
+ if ( $type_sequence_to_go[$i_mate] ==
+ $type_sequence_to_go[$i] )
+ {
+ $mate_index_to_go[$i] = $i_mate;
+ $mate_index_to_go[$i_mate] = $i;
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $comma_arrow_count{$seqno} ) {
+ $comma_arrow_count_contained +=
+ $comma_arrow_count{$seqno};
+ }
+ }
+ else {
+ push @unmatched_opening_indexes_in_this_batch,
+ $i_mate;
+ push @unmatched_closing_indexes_in_this_batch,
+ $i;
+ }
+ }
+ else {
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ if ( $type eq ':' ) {
+ push @colon_list, $type;
+ }
+ } ## end elsif ( $is_closing_sequence_token...)
+
+ } ## end if ($seqno)
+
+ elsif ( $type eq ',' ) { $comma_count_in_batch++; }
+ elsif ( $tokens_to_go[$i] eq '=>' ) {
+ if (@unmatched_opening_indexes_in_this_batch) {
+ my $j = $unmatched_opening_indexes_in_this_batch[-1];
+ my $seqno = $type_sequence_to_go[$j];
+ $comma_arrow_count{$seqno}++;
}
}
- }
- }
-
- my $comma_arrow_count_contained =
- $self->match_opening_and_closing_tokens();
+ } ## end if ( $type ne 'b' )
+ } ## end for ( my $i = 0 ; $i <=...)
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
+ my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
+ @unmatched_closing_indexes_in_this_batch;
+ #------------------------
+ # Set special breakpoints
+ #------------------------
# If this line ends in a code block brace, set breaks at any
# previous closing code block braces to breakup a chain of code
# blocks on one line. This is very rare but can happen for
}
}
+ #-----------------------------------------------
+ # insertion of any blank lines before this batch
+ #-----------------------------------------------
+
my $imin = 0;
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- # anything left to write?
- if ( $imin <= $imax ) {
-
- my $last_line_leading_type = $self->[_last_line_leading_type_];
- my $last_line_leading_level = $self->[_last_line_leading_level_];
- my $last_last_line_leading_level =
- $self->[_last_last_line_leading_level_];
-
- # add a blank line before certain key types but not after a comment
- if ( $last_line_leading_type ne '#' ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
-
- # blank lines before subs except declarations and one-liners
- if ( $leading_type eq 'i' ) {
- if ( $leading_token =~ /$SUB_PATTERN/ ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
- }
+ if ( $imin > $imax ) {
+ if (DEVEL_MODE) {
+ my $K0 = $K_to_go[0];
+ my $lno = "";
+ if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
+ Fault(<<EOM);
+Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
+EOM
+ }
+ return;
+ }
- # break before all package declarations
- elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
- $want_blank = $rOpts->{'blank-lines-before-packages'};
- }
- }
+ my $last_line_leading_type = $self->[_last_line_leading_type_];
+ my $last_line_leading_level = $self->[_last_line_leading_level_];
+ my $last_last_line_leading_level =
+ $self->[_last_last_line_leading_level_];
- # break before certain key blocks except one-liners
- if ( $leading_type eq 'k' ) {
- if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' )
- {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( terminal_type_i( $imin, $imax ) ne '}' );
- }
+ # add a blank line before certain key types but not after a comment
+ if ( $last_line_leading_type ne '#' ) {
+ my $want_blank = 0;
+ my $leading_token = $tokens_to_go[$imin];
+ my $leading_type = $types_to_go[$imin];
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($last_line_leading_type ne 'b'
- && $leading_token =~
- /^(unless|if|while|until|for|foreach)$/ )
- {
- my $lc =
- $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
+ # blank lines before subs except declarations and one-liners
+ if ( $leading_type eq 'i' ) {
+ if (
- # patch for RT #128216: no blank line inserted at a level
- # change
- if ( $levels_to_go[$imin] != $last_line_leading_level )
- {
- $lc = 0;
- }
+ # quick check
+ (
+ substr( $leading_token, 0, 3 ) eq 'sub'
+ || $rOpts_sub_alias_list
+ )
- $want_blank =
- $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && $self->consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && terminal_type_i( $imin, $imax ) ne '}';
- }
+ # slow check
+ && $leading_token =~ /$SUB_PATTERN/
+ )
+ {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
}
- # Check for blank lines wanted before a closing brace
- if ( $leading_token eq '}' ) {
- if ( $rOpts->{'blank-lines-before-closing-block'}
- && $block_type_to_go[$imin]
- && $block_type_to_go[$imin] =~
- /$blank_lines_before_closing_block_pattern/ )
- {
- my $nblanks =
- $rOpts->{'blank-lines-before-closing-block'};
- if ( $nblanks > $want_blank ) {
- $want_blank = $nblanks;
- }
- }
+ # break before all package declarations
+ elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
+ $want_blank = $rOpts->{'blank-lines-before-packages'};
}
+ }
- if ($want_blank) {
+ # break before certain key blocks except one-liners
+ if ( $leading_type eq 'k' ) {
+ if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( $imin, $imax ) ne '}' );
+ }
+
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($last_line_leading_type ne 'b'
+ && $is_if_unless_while_until_for_foreach{$leading_token} )
+ {
+ my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
- # future: send blank line down normal path to VerticalAligner
- $self->flush_vertical_aligner();
- $file_writer_object->require_blank_code_lines($want_blank);
+ # patch for RT #128216: no blank line inserted at a level
+ # change
+ if ( $levels_to_go[$imin] != $last_line_leading_level ) {
+ $lc = 0;
+ }
+
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $self->consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && terminal_type_i( $imin, $imax ) ne '}';
}
}
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] )
- )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
+ }
+ }
}
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+
+ if ($want_blank) {
+
+ # future: send blank line down normal path to VerticalAligner
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->require_blank_code_lines($want_blank);
}
+ }
- $self->[_last_line_leading_type_] = $last_line_leading_type;
- $self->[_last_line_leading_level_] = $last_line_leading_level;
- $self->[_last_last_line_leading_level_] =
- $last_last_line_leading_level;
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ $last_last_line_leading_level = $last_line_leading_level;
+ $last_line_leading_level = $levels_to_go[$imin];
+ if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
+ $last_line_leading_type = $types_to_go[$imin];
+ if ( $last_line_leading_level == $last_last_line_leading_level
+ && $last_line_leading_type ne 'b'
+ && $last_line_leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
+ {
+ $nonblank_lines_at_depth[$last_line_leading_level]++;
+ }
+ else {
+ $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ }
- # Flag to remember if we called sub 'pad_array_to_go'.
- # Some routines (scan_list(), set_continuation_breaks() ) need some
- # extra tokens added at the end of the batch. Most batches do not
- # use these routines, so we will avoid calling 'pad_array_to_go'
- # unless it is needed.
- my $called_pad_array_to_go;
+ $self->[_last_line_leading_type_] = $last_line_leading_type;
+ $self->[_last_line_leading_level_] = $last_line_leading_level;
+ $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
- # set all forced breakpoints for good list formatting
- my $is_long_line = $max_index_to_go > 0
- && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
+ #--------------------------
+ # scan lists and long lines
+ #--------------------------
- my $old_line_count_in_batch =
- $max_index_to_go == 0
- ? 1
- : $self->get_old_line_count( $K_to_go[0],
- $K_to_go[$max_index_to_go] );
+ # Flag to remember if we called sub 'pad_array_to_go'.
+ # Some routines (break_lists(), break_long_lines() ) need some
+ # extra tokens added at the end of the batch. Most batches do not
+ # use these routines, so we will avoid calling 'pad_array_to_go'
+ # unless it is needed.
+ my $called_pad_array_to_go;
- if (
- $is_long_line
- || $old_line_count_in_batch > 1
+ # set all forced breakpoints for good list formatting
+ my $is_long_line = $max_index_to_go > 0
+ && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
- # must always call scan_list() with unbalanced batches because
- # it is maintaining some stacks
- || is_unbalanced_batch()
+ my $old_line_count_in_batch = 1;
+ if ( $max_index_to_go > 0 ) {
+ my $Kbeg = $K_to_go[0];
+ my $Kend = $K_to_go[$max_index_to_go];
+ $old_line_count_in_batch +=
+ $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
+ }
- # call scan_list if we might want to break at commas
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- && $rOpts_maximum_fields_per_table <=
- $comma_count_in_batch
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
+ if (
+ $is_long_line
+ || $old_line_count_in_batch > 1
+
+ # must always call break_lists() with unbalanced batches because
+ # it is maintaining some stacks
+ || $is_unbalanced_batch
+
+ # call break_lists if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
- # call scan_list if user may want to break open some one-line
- # hash references
- || ( $comma_arrow_count_contained
- && $rOpts_comma_arrow_breakpoints != 3 )
- )
- {
- # add a couple of extra terminal blank tokens
- $self->pad_array_to_go();
- $called_pad_array_to_go = 1;
+ # call break_lists if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
+ )
+ {
+ # add a couple of extra terminal blank tokens
+ $self->pad_array_to_go();
+ $called_pad_array_to_go = 1;
- ## This caused problems in one version of perl for unknown reasons:
- ## $saw_good_break ||= scan_list();
- my $sgb = $self->scan_list($is_long_line);
- $saw_good_break ||= $sgb;
- }
+ my $sgb = $self->break_lists($is_long_line);
+ $saw_good_break ||= $sgb;
+ }
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
- # write a single line if..
- if (
+ #-------------------------
+ # write a single line if..
+ #-------------------------
+ if (
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
+ # we aren't allowed to add any newlines
+ !$rOpts_add_newlines
- # or,
- || (
+ # or,
+ || (
- # this line is 'short'
- !$is_long_line
+ # this line is 'short'
+ !$is_long_line
- # and we didn't see a good breakpoint
- && !$saw_good_break
+ # and we didn't see a good breakpoint
+ && !$saw_good_break
- # and we don't already have an interior breakpoint
- && !get_forced_breakpoint_count()
- )
- )
- {
- @{$ri_first} = ($imin);
- @{$ri_last} = ($imax);
- }
+ # and we don't already have an interior breakpoint
+ && !get_forced_breakpoint_count()
+ )
+ )
+ {
+ @{$ri_first} = ($imin);
+ @{$ri_last} = ($imax);
+ }
- # otherwise use multiple lines
- else {
+ #-----------------------------
+ # otherwise use multiple lines
+ #-----------------------------
+ else {
- # add a couple of extra terminal blank tokens if we haven't
- # already done so
- $self->pad_array_to_go() unless ($called_pad_array_to_go);
+ # add a couple of extra terminal blank tokens if we haven't
+ # already done so
+ $self->pad_array_to_go() unless ($called_pad_array_to_go);
- ( $ri_first, $ri_last ) =
- $self->set_continuation_breaks( $saw_good_break,
- \@colon_list );
+ ( $ri_first, $ri_last ) =
+ $self->break_long_lines( $saw_good_break, \@colon_list );
- $self->break_all_chain_tokens( $ri_first, $ri_last );
+ $self->break_all_chain_tokens( $ri_first, $ri_last );
- $self->break_equals( $ri_first, $ri_last );
+ $self->break_equals( $ri_first, $ri_last );
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ($rOpts_recombine) {
- ( $ri_first, $ri_last ) =
- $self->recombine_breakpoints( $ri_first, $ri_last );
- }
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ $self->recombine_breakpoints( $ri_first, $ri_last )
+ if ( $rOpts_recombine && @{$ri_first} > 1 );
- $self->insert_final_ternary_breaks( $ri_first, $ri_last )
- if (@colon_list);
- }
+ $self->insert_final_ternary_breaks( $ri_first, $ri_last )
+ if (@colon_list);
+ }
- $self->insert_breaks_before_list_opening_containers( $ri_first,
- $ri_last )
- if ( %break_before_container_types && $max_index_to_go > 0 );
+ $self->insert_breaks_before_list_opening_containers( $ri_first,
+ $ri_last )
+ if ( %break_before_container_types && $max_index_to_go > 0 );
+
+ #-------------------
+ # -lp corrector step
+ #-------------------
+ my $do_not_pad = 0;
+ if ($rOpts_line_up_parentheses) {
+ $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
+ }
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad =
- $self->correct_lp_indentation( $ri_first, $ri_last );
+ #--------------------------
+ # unmask phantom semicolons
+ #--------------------------
+ if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
+ my $i = $imax;
+ my $tok = ';';
+ my $tok_len = 1;
+ if ( $want_left_space{';'} != WS_NO ) {
+ $tok = ' ;';
+ $tok_len = 2;
}
+ $tokens_to_go[$i] = $tok;
+ $token_lengths_to_go[$i] = $tok_len;
+ my $KK = $K_to_go[$i];
+ $rLL->[$KK]->[_TOKEN_] = $tok;
+ $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+ my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
+ $self->note_added_semicolon($line_number);
- # unmask any invisible line-ending semicolon. They were placed by
- # sub respace_tokens but we only now know if we actually need them.
- if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
- my $i = $imax;
- my $tok = ';';
- my $tok_len = 1;
- if ( $want_left_space{';'} != WS_NO ) {
- $tok = ' ;';
- $tok_len = 2;
- }
- $tokens_to_go[$i] = $tok;
- $token_lengths_to_go[$i] = $tok_len;
- my $KK = $K_to_go[$i];
- $rLL->[$KK]->[_TOKEN_] = $tok;
- $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
- my $line_number = 1 + $self->get_old_line_index($KK);
- $self->note_added_semicolon($line_number);
- }
-
- if ( $rOpts_one_line_block_semicolons == 0 ) {
- $self->delete_one_line_semicolons( $ri_first, $ri_last );
- }
-
- # The line breaks for this batch of code have been finalized. Now we
- # can to package the results for further processing. We will switch
- # from the local '_to_go' buffer arrays (i-index) back to the global
- # token arrays (K-index) at this point.
- my $rlines_K;
- my $index_error;
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- if ( $iend - $ibeg != $Kend - $Kbeg ) {
- $index_error = $n unless defined($index_error);
- }
- push @{$rlines_K},
- [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
- }
-
- # Check correctness of the mapping between the i and K token
- # indexes. (The K index is the global index, the i index is the
- # batch index). It is important to do this check because an error
- # would be disastrous. The reason that we should never see an
- # index error here is that sub 'store_token_to_go' has a check to
- # make sure that the indexes in batches remain continuous. Since
- # sub 'store_token_to_go' controls feeding tokens into batches,
- # no index discrepancies should occur unless a recent programming
- # change has introduced a bug.
- if ( defined($index_error) ) {
-
- # Temporary debug code - should never get here
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- my $idiff = $iend - $ibeg;
- my $Kdiff = $Kend - $Kbeg;
- print STDERR <<EOM;
-line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
-EOM
- }
- Fault(
- "Index error at line $index_error; i and K ranges differ");
+ foreach ( $imax .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
}
+ }
- $this_batch->[_rlines_K_] = $rlines_K;
- $this_batch->[_ibeg0_] = $ri_first->[0];
- $this_batch->[_peak_batch_size_] = $peak_batch_size;
- $this_batch->[_do_not_pad_] = $do_not_pad;
- $this_batch->[_batch_count_] = $batch_count;
- $this_batch->[_rix_seqno_controlling_ci_] =
- \@ix_seqno_controlling_ci;
+ if ( $rOpts_one_line_block_semicolons == 0 ) {
+ $self->delete_one_line_semicolons( $ri_first, $ri_last );
+ }
- $self->send_lines_to_vertical_aligner();
+ #--------------------
+ # ship this batch out
+ #--------------------
+ $this_batch->[_ri_first_] = $ri_first;
+ $this_batch->[_ri_last_] = $ri_last;
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_do_not_pad_] = $do_not_pad;
+ $this_batch->[_batch_count_] = $batch_count;
+ $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
- # Insert any requested blank lines after an opening brace. We have
- # to skip back before any side comment to find the terminal token
- my $iterm;
- for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
- next if $types_to_go[$iterm] eq '#';
- next if $types_to_go[$iterm] eq 'b';
- last;
- }
+ $self->convey_batch_to_vertical_aligner();
- # write requested number of blank lines after an opening block brace
- if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
- if ( $rOpts->{'blank-lines-after-opening-block'}
- && $block_type_to_go[$iterm]
- && $block_type_to_go[$iterm] =~
- /$blank_lines_after_opening_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
- $self->flush_vertical_aligner();
- $file_writer_object->require_blank_code_lines($nblanks);
+ #-------------------------------------------------------------------
+ # Write requested number of blank lines after an opening block brace
+ #-------------------------------------------------------------------
+ if ($rOpts_blank_lines_after_opening_block) {
+ my $iterm = $imax;
+ if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
+ $iterm -= 1;
+ if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
+ $iterm -= 1;
}
}
+
+ if ( $types_to_go[$iterm] eq '{'
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts_blank_lines_after_opening_block;
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
}
# Remember the largest batch size processed. This is needed by the
return;
}
-} ## end closure grind_batch_of_CODE
-
-{ ## begin closure match_opening_and_closing_tokens
-
- # closure to keep track of unbalanced containers.
- # arrays shared by the routines in this block:
- my %saved_opening_indentation;
- my @unmatched_opening_indexes_in_this_batch;
- my @unmatched_closing_indexes_in_this_batch;
- my %comma_arrow_count;
-
- sub initialize_saved_opening_indentation {
- %saved_opening_indentation = ();
- return;
- }
-
- sub is_unbalanced_batch {
- return @unmatched_opening_indexes_in_this_batch +
- @unmatched_closing_indexes_in_this_batch;
- }
-
- sub match_opening_and_closing_tokens {
-
- # Match up indexes of opening and closing braces, etc, in this batch.
- # This has to be done after all tokens are stored because unstoring
- # of tokens would otherwise cause trouble.
-
- my ($self) = @_;
- my $rwant_container_open = $self->[_rwant_container_open_];
- my $rparent_of_seqno = $self->[_rparent_of_seqno_];
-
- @unmatched_opening_indexes_in_this_batch = ();
- @unmatched_closing_indexes_in_this_batch = ();
- %comma_arrow_count = ();
- my $comma_arrow_count_contained = 0;
- my $parent_seqno = $self->parent_seqno_by_K( $K_to_go[0] );
-
- foreach my $i ( 0 .. $max_index_to_go ) {
- $parent_seqno_to_go[$i] = $parent_seqno;
-
- my $seqno = $type_sequence_to_go[$i];
- if ($seqno) {
- my $token = $tokens_to_go[$i];
- if ( $is_opening_sequence_token{$token} ) {
- if ( $is_opening_token{$token} ) {
- $parent_seqno = $seqno;
- }
-
- if ( $rwant_container_open->{$seqno} ) {
- $self->set_forced_breakpoint($i);
- }
-
- push @unmatched_opening_indexes_in_this_batch, $i;
- }
- elsif ( $is_closing_sequence_token{$token} ) {
-
- if ( $is_closing_token{$token} ) {
- $parent_seqno = $rparent_of_seqno->{$seqno};
- $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
- $parent_seqno_to_go[$i] = $parent_seqno;
- }
-
- if ( $rwant_container_open->{$seqno} ) {
- $self->set_forced_breakpoint( $i - 1 );
- }
-
- my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
- if ( defined($i_mate) && $i_mate >= 0 ) {
- if ( $type_sequence_to_go[$i_mate] ==
- $type_sequence_to_go[$i] )
- {
- $mate_index_to_go[$i] = $i_mate;
- $mate_index_to_go[$i_mate] = $i;
- my $seqno = $type_sequence_to_go[$i];
- if ( $comma_arrow_count{$seqno} ) {
- $comma_arrow_count_contained +=
- $comma_arrow_count{$seqno};
- }
- }
- else {
- push @unmatched_opening_indexes_in_this_batch,
- $i_mate;
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- else {
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- }
- elsif ( $tokens_to_go[$i] eq '=>' ) {
- if (@unmatched_opening_indexes_in_this_batch) {
- my $j = $unmatched_opening_indexes_in_this_batch[-1];
- my $seqno = $type_sequence_to_go[$j];
- $comma_arrow_count{$seqno}++;
- }
- }
- }
-
- return $comma_arrow_count_contained;
- }
sub save_opening_indentation {
return ( $indent, $offset, $is_leading, $exists );
}
-} ## end closure match_opening_and_closing_tokens
+} ## end closure grind_batch_of_CODE
sub lookup_opening_indentation {
if ( !@{$ri_last} ) {
# An error here implies a bug introduced by a recent program change.
- # Every batch of code has lines.
- Fault("Error in opening_indentation: no lines");
- return;
+ # Every batch of code has lines, so this should never happen.
+ if (DEVEL_MODE) {
+ Fault("Error in opening_indentation: no lines");
+ }
+ return ( 0, 0, 0 );
}
my $nline = $rindentation_list->[0]; # line number of previous lookup
# We better stop here.
else {
my $i_last_line = $ri_last->[-1];
- Fault(<<EOM);
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
Program bug in call to lookup_opening_indentation - index out of range
called with index i_opening=$i_opening > $i_last_line = max index of last line
This batch has max index = $max_index_to_go,
EOM
- report_definite_bug(); # old coding, will not get here
+ }
$nline = $#{$ri_last};
}
$rindentation_list->[0] =
- $nline; # save line number to start looking next call
+ $nline; # save line number to start looking next call
my $ibeg = $ri_start->[$nline];
my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
my $is_leading = ( $ibeg == $i_opening );
return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
-{ ## begin closure terminal_type_i
+sub terminal_type_i {
- my %is_sort_map_grep_eval_do;
+ # returns type of last token on this line (terminal token), as follows:
+ # returns # for a full-line comment
+ # returns ' ' for a blank line
+ # otherwise returns final token type
- BEGIN {
- my @q = qw(sort map grep eval do);
- @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
- }
-
- sub terminal_type_i {
-
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
-
- my ( $ibeg, $iend ) = @_;
-
- # Start at the end and work backwards
- my $i = $iend;
- my $type_i = $types_to_go[$i];
+ my ( $ibeg, $iend ) = @_;
- # Check for side comment
- if ( $type_i eq '#' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
- }
- $type_i = $types_to_go[$i];
- }
+ # Start at the end and work backwards
+ my $i = $iend;
+ my $type_i = $types_to_go[$i];
- # Skip past a blank
- if ( $type_i eq 'b' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
- }
- $type_i = $types_to_go[$i];
+ # Check for side comment
+ if ( $type_i eq '#' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
+ $type_i = $types_to_go[$i];
+ }
- # Found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $block_type_to_go[$i];
- if (
- $type_i eq '}'
- && ( !$block_type
- || ( $is_sort_map_grep_eval_do{$block_type} ) )
- )
- {
- $type_i = 'b';
+ # Skip past a blank
+ if ( $type_i eq 'b' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
}
- return wantarray ? ( $type_i, $i ) : $type_i;
+ $type_i = $types_to_go[$i];
}
-} ## end closure terminal_type_i
+ # Found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/map/grep/eval/do because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $block_type_to_go[$i];
+ if (
+ $type_i eq '}'
+ && ( !$block_type
+ || $is_sort_map_grep_eval_do{$block_type} )
+ )
+ {
+ $type_i = 'b';
+ }
+ return wantarray ? ( $type_i, $i ) : $type_i;
+}
sub pad_array_to_go {
- # To simplify coding in scan_list and set_bond_strengths, it helps to
+ # To simplify coding in break_lists and set_bond_strengths, it helps to
# create some extra blank tokens at the end of the arrays. We also add
# some undef's to help guard against using invalid data.
my ($self) = @_;
if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
- # Nesting depths are equivalent to the _SLEVEL_ variable which is
- # clipped to be >=0 in sub write_line, so it should not be possible
- # to get here unless the code has a bracing error which leaves a
- # closing brace with zero nesting depth.
+ # Nesting depths are set to be >=0 in sub write_line, so it should
+ # not be possible to get here unless the code has a bracing error
+ # which leaves a closing brace with zero nesting depth.
unless ( get_saw_brace_error() ) {
- warning(
-"Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
- );
- report_definite_bug();
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program bug in pad_array_to_go: hit nesting error which should have been caught
+EOM
+ }
}
}
else {
sub insert_additional_breaks {
# this routine will add line breaks at requested locations after
- # sub set_continuation_breaks has made preliminary breaks.
+ # sub break_long_lines has made preliminary breaks.
my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
my $i_f;
# shouldn't happen unless caller passes bad indexes
if ( $line_number >= @{$ri_last} ) {
- warning(
-"Non-fatal program bug: couldn't set break at $i_break_left\n"
- );
- report_definite_bug();
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Non-fatal program bug: couldn't set break at $i_break_left
+EOM
+ }
return;
}
$i_f = $ri_first->[$line_number];
return;
}
-sub in_same_container_i {
-
- # check to see if tokens at i1 and i2 are in the
- # same container, and not separated by a comma, ? or :
- # This is an interface between the _to_go arrays to the rLL array
- my ( $self, $i1, $i2 ) = @_;
-
- # quick check
- return if ( $parent_seqno_to_go[$i1] ne $parent_seqno_to_go[$i2] );
-
- # full check
- return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
-}
-
-{ ## begin closure in_same_container_K
+{ ## begin closure in_same_container_i
my $ris_break_token;
my $ris_comma_token;
@{$ris_break_token}{@q} = (1) x scalar(@q);
}
- sub in_same_container_K {
+ sub in_same_container_i {
- # Check to see if tokens at K1 and K2 are in the same container,
- # and not separated by certain characters: => , ? : || or
- # This version uses the newer $rLL data structure.
+ # Check to see if tokens at i1 and i2 are in the same container, and
+ # not separated by certain characters: => , ? : || or
+ # This is an interface between the _to_go arrays to the rLL array
+ my ( $self, $i1, $i2 ) = @_;
- my ( $self, $K1, $K2 ) = @_;
- if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
- my $rLL = $self->[_rLL_];
- my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
+ # quick check
+ my $parent_seqno_1 = $parent_seqno_to_go[$i1];
+ return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
+
+ if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
+ my $K1 = $K_to_go[$i1];
+ my $K2 = $K_to_go[$i2];
+ my $rLL = $self->[_rLL_];
+
+ my $depth_1 = $nesting_depth_to_go[$i1];
return if ( $depth_1 < 0 );
- return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
+
+ # Shouldn't happen since i1 and i2 have same parent:
+ return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
# Select character set to scan for
- my $type_1 = $rLL->[$K1]->[_TYPE_];
+ my $type_1 = $types_to_go[$i1];
my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
# Fast preliminary loop to verify that tokens are in the same container
$KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
last if !defined($KK);
last if ( $KK >= $K2 );
- my $depth_K = $rLL->[$KK]->[_SLEVEL_];
- return if ( $depth_K < $depth_1 );
- next if ( $depth_K > $depth_1 );
+ my $ii = $i1 + $KK - $K1;
+ my $depth_i = $nesting_depth_to_go[$ii];
+ return if ( $depth_i < $depth_1 );
+ next if ( $depth_i > $depth_1 );
if ( $type_1 ne ':' ) {
- my $tok_K = $rLL->[$KK]->[_TOKEN_];
- return if ( $tok_K eq '?' || $tok_K eq ':' );
+ my $tok_i = $tokens_to_go[$ii];
+ return if ( $tok_i eq '?' || $tok_i eq ':' );
}
}
# Slow loop checking for certain characters
- ###########################################################
+ #-----------------------------------------------------
# This is potentially a slow routine and not critical.
# For safety just give up for large differences.
# See test file 'infinite_loop.txt'
- ###########################################################
- return if ( $K2 - $K1 > 200 );
+ #-----------------------------------------------------
+ return if ( $i2 - $i1 > 200 );
- foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
+ foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
- my $depth_K = $rLL->[$K]->[_SLEVEL_];
- next if ( $depth_K > $depth_1 );
- return if ( $depth_K < $depth_1 ); # redundant, checked above
- my $tok = $rLL->[$K]->[_TOKEN_];
- return if ( $rbreak->{$tok} );
+ my $depth_i = $nesting_depth_to_go[$ii];
+ next if ( $depth_i > $depth_1 );
+ return if ( $depth_i < $depth_1 );
+ my $tok_i = $tokens_to_go[$ii];
+ return if ( $rbreak->{$tok_i} );
}
return 1;
}
-} ## end closure in_same_container_K
+} ## end closure in_same_container_i
sub break_equals {
next if ($semicolon_count);
# ...ok, then make the semicolon invisible
+ my $len = $token_lengths_to_go[$i_semicolon];
$tokens_to_go[$i_semicolon] = "";
$token_lengths_to_go[$i_semicolon] = 0;
$rLL->[$K_semicolon]->[_TOKEN_] = "";
$rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
+ foreach ( $i_semicolon .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] -= $len;
+ }
}
return;
}
sub recombine_breakpoints {
- # sub set_continuation_breaks is very liberal in setting line breaks
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end ) = @_;
+
+ # sub break_long_lines is very liberal in setting line breaks
# for long lines, always setting breaks at good breakpoints, even
# when that creates small lines. Sometimes small line fragments
# are produced which would look better if they were combined.
# That's the task of this routine.
- #
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $self, $ri_beg, $ri_end ) = @_;
+
+ # do nothing under extreme stress
+ return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
my $rK_weld_right = $self->[_rK_weld_right_];
my $rK_weld_left = $self->[_rK_weld_left_];
+ my $nmax = @{$ri_end} - 1;
+ return if ( $nmax <= 0 );
+
+ my $nmax_start = $nmax;
+
# Make a list of all good joining tokens between the lines
# n-1 and n.
my @joint;
- my $nmax = @{$ri_end} - 1;
- for my $n ( 1 .. $nmax ) {
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
+ # Break the total batch sub-sections with lengths short enough to
+ # recombine
+ my $rsections = [];
+ my $nbeg = 0;
+ my $nend;
+ my $nmax_section = 0;
+ foreach my $nn ( 1 .. $nmax ) {
+ my $ibeg_1 = $ri_beg->[ $nn - 1 ];
+ my $iend_1 = $ri_end->[ $nn - 1 ];
+ my $iend_2 = $ri_end->[$nn];
+ my $ibeg_2 = $ri_beg->[$nn];
+
+ # Define the joint variable
my ( $itok, $itokp, $itokm );
-
foreach my $itest ( $iend_1, $ibeg_2 ) {
my $type = $types_to_go[$itest];
if ( $is_math_op{$type}
$itok = $itest;
}
}
- $joint[$n] = [$itok];
+ $joint[$nn] = [$itok];
+
+ # Update the section list
+ my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ if (
+ $excess <= 1
+
+ # The number 5 here is an arbitrary small number intended
+ # to keep most small matches in one sub-section.
+ || ( defined($nend) && ( $nn < 5 || $nmax - $nn < 5 ) )
+ )
+ {
+ $nend = $nn;
+ }
+ else {
+ if ( defined($nend) ) {
+ push @{$rsections}, [ $nbeg, $nend ];
+ my $num = $nend - $nbeg;
+ if ( $num > $nmax_section ) { $nmax_section = $num }
+ $nbeg = $nn;
+ $nend = undef;
+ }
+ $nbeg = $nn;
+ }
+ }
+ if ( defined($nend) ) {
+ push @{$rsections}, [ $nbeg, $nend ];
+ my $num = $nend - $nbeg;
+ if ( $num > $nmax_section ) { $nmax_section = $num }
}
- my $more_to_do = 1;
+ my $num_sections = @{$rsections};
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @{$ri_end};
- my $reverse = 0;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $nmax = @{$ri_end} - 1;
+ # This is potentially an O(n-squared) loop, but not critical, so we can
+ # put a finite limit on the total number of iterations. This is
+ # suggested by issue c118, which pushed about 5.e5 lines through here
+ # and caused an excessive run time.
- # Safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
-
- # Shouldn't happen because splice below decreases nmax on each
- # iteration. An error can only be due to a recent programming
- # change.
- Fault("Program bug-infinite loop in recombine breakpoints\n");
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $skip_Section_3;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
-
- # loop over all remaining lines in this batch
- for my $iter ( 1 .. $nmax ) {
-
- # alternating sweep direction gives symmetric results
- # for recombining lines which exceed the line length
- # such as eval {{{{.... }}}}
- my $n;
- if ($reverse) { $n = 1 + $nmax - $iter; }
- else { $n = $iter }
-
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # -----line $n-1--- | -----line $n-----
- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
- # ^
- # |
- # We want to decide if we should remove the line break
- # between the tokens at $iend_1 and $ibeg_2
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- #
- # beginning and ending tokens of the lines we are working on
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
- my $ibeg_nmax = $ri_beg->[$nmax];
-
- # combined line cannot be too long
- my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
- next if ( $excess > 0 );
-
- my $type_iend_1 = $types_to_go[$iend_1];
- my $type_iend_2 = $types_to_go[$iend_2];
- my $type_ibeg_1 = $types_to_go[$ibeg_1];
- my $type_ibeg_2 = $types_to_go[$ibeg_2];
-
- # terminal token of line 2 if any side comment is ignored:
- my $iend_2t = $iend_2;
- my $type_iend_2t = $type_iend_2;
-
- # some beginning indexes of other lines, which may not exist
- my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
- my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
- my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
-
- my $bs_tweak = 0;
-
- #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
- # $nesting_depth_to_go[$ibeg_1] );
-
- DEBUG_RECOMBINE && do {
- print STDERR
-"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
- };
+ # Three lines of defence have been put in place to prevent excessive
+ # run times:
+ # 1. do nothing if formatting under stress (c118 was under stress)
+ # 2. break into small sub-sections to decrease the maximum n-squared.
+ # 3. put a finite limit on the number of iterations.
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
+ # Testing shows that most batches only require one or two iterations.
+ # A very large batch which is broken into sub-sections can require one
+ # iteration per section. This suggests the limit here, which allows
+ # up to 10 iterations plus one pass per sub-section.
+ my $it_count = 0;
+ my $it_count_max =
+ 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
- # a terminal '{' should stay where it is
- # unless preceded by a fat comma
- next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
+ if ( DEBUG_RECOMBINE > 1 ) {
+ my $max = 0;
+ print STDERR "-----\n$num_sections sections found for nmax=$nmax\n";
+ foreach my $sect ( @{$rsections} ) {
+ my ( $nbeg, $nend ) = @{$sect};
+ my $num = $nend - $nbeg;
+ if ( $num > $max ) { $max = $num }
+ print STDERR "$nbeg $nend\n";
+ }
+ print STDERR "max size=$max of $nmax lines\n";
+ }
- if ( $type_iend_2 eq '#'
- && $iend_2 - $ibeg_2 >= 2
- && $types_to_go[ $iend_2 - 1 ] eq 'b' )
- {
- $iend_2t = $iend_2 - 2;
- $type_iend_2t = $types_to_go[$iend_2t];
- }
+ # Loop over all sub-sections. Note that we have to work backwards
+ # from the end of the batch since the sections use original line
+ # numbers, and the line numbers change as we go.
+ while ( my $section = pop @{$rsections} ) {
+ my ( $nbeg, $nend ) = @{$section};
- $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
+ # number of ending lines to leave untouched in this pass
+ $nmax = @{$ri_end} - 1;
+ my $num_freeze = $nmax - $nend;
+
+ my $more_to_do = 1;
+
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = $nmax + 1;
+ my $reverse = 0;
+
+ while ($more_to_do) {
+
+ # Safety check for excess total iterations
+ $it_count++;
+ if ( $it_count > $it_count_max ) {
+ goto RETURN;
}
- #----------------------------------------------------------
- # Recombine Section 0:
- # Examine the special token joining this line pair, if any.
- # Put as many tests in this section to avoid duplicate code and
- # to make formatting independent of whether breaks are to the
- # left or right of an operator.
- #----------------------------------------------------------
+ my $n_best = 0;
+ my $bs_best;
+ my $nmax = @{$ri_end} - 1;
- my ($itok) = @{ $joint[$n] };
- if ($itok) {
+ # Safety check for infinite loop: the line count must decrease
+ unless ( $nmax < $nmax_last ) {
- my $type = $types_to_go[$itok];
+ # Shouldn't happen because splice below decreases nmax on
+ # each iteration. An error can only be due to a recent
+ # programming change. We better stop here.
+ if (DEVEL_MODE) {
+ Fault(
+"Program bug-infinite loop in recombine breakpoints\n"
+ );
+ }
+ $more_to_do = 0;
+ last;
+ }
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $skip_Section_3;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
+
+ # loop over all remaining lines in this batch
+ my $nstop = $nmax - $num_freeze;
+ for my $iter ( $nbeg + 1 .. $nstop ) {
+
+ # alternating sweep direction gives symmetric results
+ # for recombining lines which exceed the line length
+ # such as eval {{{{.... }}}}
+ my $n;
+ if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
+ else { $n = $iter }
+
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $ibeg_nmax = $ri_beg->[$nmax];
+
+ # combined line cannot be too long
+ my $excess =
+ $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ next if ( $excess > 0 );
- if ( $type eq ':' ) {
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
- # do not join at a colon unless it disobeys the break
- # request
- if ( $itok eq $iend_1 ) {
- next unless $want_break_before{$type};
- }
- else {
- $leading_amp_count++;
- next if $want_break_before{$type};
- }
- } ## end if ':'
+ # terminal token of line 2 if any side comment is ignored:
+ my $iend_2t = $iend_2;
+ my $type_iend_2t = $type_iend_2;
+
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
- # handle math operators + - * /
- elsif ( $is_math_op{$type} ) {
+ my $bs_tweak = 0;
- # Combine these lines if this line is a single
- # number, or if it is a short term with same
- # operator as the previous line. For example, in
- # the following code we will combine all of the
- # short terms $A, $B, $C, $D, $E, $F, together
- # instead of leaving them one per line:
- # my $time =
- # $A * $B * $C * $D * $E * $F *
- # ( 2. * $eps * $sigma * $area ) *
- # ( 1. / $tcold**3 - 1. / $thot**3 );
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
+
+ DEBUG_RECOMBINE > 1 && do {
+ print STDERR
+"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
- # This can be important in math-intensive code.
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
- my $good_combo;
+ # a terminal '{' should stay where it is
+ # unless preceded by a fat comma
+ next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
- my $itokp = min( $inext_to_go[$itok], $iend_2 );
- my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
- my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
- my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+ if ( $type_iend_2 eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' )
+ {
+ $iend_2t = $iend_2 - 2;
+ $type_iend_2t = $types_to_go[$iend_2t];
+ }
- # check for a number on the right
- if ( $types_to_go[$itokp] eq 'n' ) {
+ $this_line_is_semicolon_terminated =
+ $type_iend_2t eq ';';
+ }
- # ok if nothing else on right
- if ( $itokp == $iend_2 ) {
- $good_combo = 1;
+ #----------------------------------------------------------
+ # Recombine Section 0:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code
+ # and to make formatting independent of whether breaks are
+ # to the left or right of an operator.
+ #----------------------------------------------------------
+
+ my ($itok) = @{ $joint[$n] };
+ if ($itok) {
+
+ my $type = $types_to_go[$itok];
+
+ if ( $type eq ':' ) {
+
+ # do not join at a colon unless it disobeys the
+ # break request
+ if ( $itok eq $iend_1 ) {
+ next unless $want_break_before{$type};
}
else {
-
- # look one more token to right..
- # okay if math operator or some termination
- $good_combo =
- ( ( $itokpp == $iend_2 )
- && $is_math_op{ $types_to_go[$itokpp] } )
- || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ $leading_amp_count++;
+ next if $want_break_before{$type};
}
- }
+ } ## end if ':'
- # check for a number on the left
- if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
- # okay if nothing else to left
- if ( $itokm == $ibeg_1 ) {
- $good_combo = 1;
- }
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
- # otherwise look one more token to left
- else {
+ # This can be important in math-intensive code.
+
+ my $good_combo;
+
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
+ my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
+
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
- # okay if math operator, comma, or assignment
- $good_combo = ( $itokmm == $ibeg_1 )
- && ( $is_math_op{ $types_to_go[$itokmm] }
- || $types_to_go[$itokmm] =~ /^[,]$/
- || $is_assignment{ $types_to_go[$itokmm] }
- );
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp]
+ } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ }
}
- }
- # look for a single short token either side of the
- # operator
- if ( !$good_combo ) {
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
- # Slight adjustment factor to make results
- # independent of break before or after operator in
- # long summed lists. (An operator and a space make
- # two spaces).
- my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
- $good_combo =
+ # otherwise look one more token to left
+ else {
- # numbers or id's on both sides of this joint
- $types_to_go[$itokp] =~ /^[in]$/
- && $types_to_go[$itokm] =~ /^[in]$/
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm]
+ } );
+ }
+ }
- # one of the two lines must be short:
- && (
- (
- # no more than 2 nonblank tokens right of
- # joint
- $itokpp == $iend_2
-
- # short
- && token_sequence_length( $itokp, $iend_2 )
- < $two +
- $rOpts_short_concatenation_item_length
- )
- || (
- # no more than 2 nonblank tokens left of
- # joint
- $itokmm == $ibeg_1
-
- # short
- && token_sequence_length( $ibeg_1, $itokm )
- < 2 - $two +
- $rOpts_short_concatenation_item_length
- )
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
- )
+ # Slight adjustment factor to make results
+ # independent of break before or after operator
+ # in long summed lists. (An operator and a
+ # space make two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
- # keep pure terms; don't mix +- with */
- && !(
- $is_plus_minus{$type}
- && ( $is_mult_div{ $types_to_go[$itokmm] }
- || $is_mult_div{ $types_to_go[$itokpp] } )
- )
- && !(
- $is_mult_div{$type}
- && ( $is_plus_minus{ $types_to_go[$itokmm] }
- || $is_plus_minus{ $types_to_go[$itokpp] } )
- )
+ $good_combo =
- ;
- }
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
+
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right
+ # of joint
+ $itokpp == $iend_2
+
+ # short
+ && token_sequence_length(
+ $itokp, $iend_2
+ ) < $two +
+ $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
+
+ # short
+ && token_sequence_length(
+ $ibeg_1, $itokm
+ ) < 2 - $two +
+ $rOpts_short_concatenation_item_length
+ )
+
+ )
+
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] }
+ )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp]
+ } )
+ )
+
+ ;
+ }
- # it is also good to combine if we can reduce to 2 lines
- if ( !$good_combo ) {
+ # it is also good to combine if we can reduce to 2
+ # lines
+ if ( !$good_combo ) {
- # index on other line where same token would be in a
- # long chain.
- my $iother =
- ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+ # index on other line where same token would be
+ # in a long chain.
+ my $iother =
+ ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
- $good_combo =
- $n == 2
- && $n == $nmax
- && $types_to_go[$iother] ne $type;
- }
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
- next unless ($good_combo);
+ next unless ($good_combo);
- } ## end math
+ } ## end math
- elsif ( $is_amp_amp{$type} ) {
- ##TBD
- } ## end &&, ||
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
- elsif ( $is_assignment{$type} ) {
- ##TBD
- } ## end assignment
- }
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
- #----------------------------------------------------------
- # Recombine Section 1:
- # Join welded nested containers immediately
- #----------------------------------------------------------
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Join welded nested containers immediately
+ #----------------------------------------------------------
- if (
- $total_weld_count
- && ( $type_sequence_to_go[$iend_1]
- && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
- || $type_sequence_to_go[$ibeg_2]
- && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
- )
- {
- $n_best = $n;
- last;
- }
+ if (
+ $total_weld_count
+ && ( $type_sequence_to_go[$iend_1]
+ && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
+ || $type_sequence_to_go[$ibeg_2]
+ && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
+ )
+ {
+ $n_best = $n;
+ last;
+ }
- $reverse = 0;
+ $reverse = 0;
- #----------------------------------------------------------
- # Recombine Section 2:
- # Examine token at $iend_1 (right end of first line of pair)
- #----------------------------------------------------------
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
- # an isolated '}' may join with a ';' terminated segment
- if ( $type_iend_1 eq '}' ) {
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
# Check for cases where combining a semicolon terminated
# statement with a previous isolated closing paren will
# trailing clause will be far to the right.
#
# The logic here is synchronized with the logic in sub
- # sub set_adjusted_indentation, which actually does
+ # sub final_indentation_adjustment, which actually does
# the outdenting.
#
- $skip_Section_3 ||= $this_line_is_semicolon_terminated
-
- # only one token on last line
- && $ibeg_1 == $iend_1
-
- # must be structural paren
- && $tokens_to_go[$iend_1] eq ')'
-
- # style must allow outdenting,
- && !$closing_token_indentation{')'}
-
- # only leading '&&', '||', and ':' if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with a
- # previous colon or question (count could be wrong).
- && $type_ibeg_2 ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$iend_1] ==
- $nesting_depth_to_go[$iend_2] + 1 );
-
- # YVES patch 2 of 2:
- # Allow cuddled eval chains, like this:
- # eval {
- # #STUFF;
- # 1; # return true
- # } or do {
- # #handle error
- # };
- # This patch works together with a patch in
- # setting adjusted indentation (where the closing eval
- # brace is outdented if possible).
- # The problem is that an 'eval' block has continuation
- # indentation and it looks better to undo it in some
- # cases. If we do not use this patch we would get:
- # eval {
- # #STUFF;
- # 1; # return true
- # }
- # or do {
- # #handle error
- # };
- # The alternative, for uncuddled style, is to create
- # a patch in set_adjusted_indentation which undoes
- # the indentation of a leading line like 'or do {'.
- # This doesn't work well with -icb through
- if (
- $block_type_to_go[$iend_1] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'}
- && $tokens_to_go[$iend_2] eq '{'
- && (
- ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
- || ( $type_ibeg_2 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_2] } )
- || $is_if_unless{ $tokens_to_go[$ibeg_2] }
- )
- )
- {
- $skip_Section_3 ||= 1;
- }
+ $skip_Section_3 ||= $this_line_is_semicolon_terminated
+
+ # only one token on last line
+ && $ibeg_1 == $iend_1
+
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
+
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
+
+ # only leading '&&', '||', and ':' if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
+
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
+
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in final_indentation_adjustment which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !ref( $leading_spaces_to_go[$iend_1] )
+ && !$rOpts_indent_closing_brace
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $skip_Section_3 ||= 1;
+ }
- next
- unless (
- $skip_Section_3
+ next
+ unless (
+ $skip_Section_3
- # handle '.' and '?' specially below
- || ( $type_ibeg_2 =~ /^[\.\?]$/ )
- );
- }
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
+ );
+ }
- elsif ( $type_iend_1 eq '{' ) {
+ elsif ( $type_iend_1 eq '{' ) {
- # YVES
- # honor breaks at opening brace
- # Added to prevent recombining something like this:
- # } || eval { package main;
- next if $forced_breakpoint_to_go[$iend_1];
- }
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ next if $forced_breakpoint_to_go[$iend_1];
+ }
- # do not recombine lines with ending &&, ||,
- elsif ( $is_amp_amp{$type_iend_1} ) {
- next unless $want_break_before{$type_iend_1};
- }
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ next unless $want_break_before{$type_iend_1};
+ }
- # Identify and recombine a broken ?/: chain
- elsif ( $type_iend_1 eq '?' ) {
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
- # Do not recombine different levels
- next
- if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+ # Do not recombine different levels
+ next
+ if (
+ $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
- # do not recombine unless next line ends in :
- next unless $type_iend_2 eq ':';
- }
+ # do not recombine unless next line ends in :
+ next unless $type_iend_2 eq ':';
+ }
- # for lines ending in a comma...
- elsif ( $type_iend_1 eq ',' ) {
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
- # Do not recombine at comma which is following the
- # input bias.
- # TODO: might be best to make a special flag
- next if ( $old_breakpoint_to_go[$iend_1] );
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
- # An isolated '},' may join with an identifier + ';'
- # This is useful for the class of a 'bless' statement
- # (bless.t)
- if ( $type_ibeg_1 eq '}'
- && $type_ibeg_2 eq 'i' )
- {
- next
- unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
- && ( $iend_2 == ( $ibeg_2 + 1 ) )
- && $this_line_is_semicolon_terminated );
+ # An isolated '},' may join with an identifier + ';'
+ # This is useful for the class of a 'bless' statement
+ # (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
- # override breakpoint
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- # but otherwise ..
- else {
+ # but otherwise ..
+ else {
- # do not recombine after a comma unless this will leave
- # just 1 more line
- next unless ( $n + 1 >= $nmax );
+ # do not recombine after a comma unless this will
+ # leave just 1 more line
+ next unless ( $n + 1 >= $nmax );
- # do not recombine if there is a change in indentation depth
- next
- if (
- $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
-
- # do not recombine a "complex expression" after a
- # comma. "complex" means no parens.
- my $saw_paren;
- foreach my $ii ( $ibeg_2 .. $iend_2 ) {
- if ( $tokens_to_go[$ii] eq '(' ) {
- $saw_paren = 1;
- last;
+ # do not recombine if there is a change in
+ # indentation depth
+ next
+ if ( $levels_to_go[$iend_1] !=
+ $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
}
+ next if $saw_paren;
}
- next if $saw_paren;
}
- }
- # opening paren..
- elsif ( $type_iend_1 eq '(' ) {
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
- # No longer doing this
- }
-
- elsif ( $type_iend_1 eq ')' ) {
-
- # No longer doing this
- }
-
- # keep a terminal for-semicolon
- elsif ( $type_iend_1 eq 'f' ) {
- next;
- }
+ # No longer doing this
+ }
- # if '=' at end of line ...
- elsif ( $is_assignment{$type_iend_1} ) {
+ elsif ( $type_iend_1 eq ')' ) {
- # keep break after = if it was in input stream
- # this helps prevent 'blinkers'
- next if $old_breakpoint_to_go[$iend_1]
+ # No longer doing this
+ }
- # don't strand an isolated '='
- && $iend_1 != $ibeg_1;
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ next;
+ }
- my $is_short_quote =
- ( $type_ibeg_2 eq 'Q'
- && $ibeg_2 == $iend_2
- && token_sequence_length( $ibeg_2, $ibeg_2 ) <
- $rOpts_short_concatenation_item_length );
- my $is_ternary =
- ( $type_ibeg_1 eq '?'
- && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $ibeg_1 != $iend_1
- && !$is_short_quote
- && !$is_ternary )
- {
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
next
- unless (
- (
+ if (
+ $old_breakpoint_to_go[$iend_1]
- # unless we can reduce this to two lines
- $nmax < $n + 2
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1
+ );
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ibeg_nmax] eq ';' )
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary = (
+ $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
+ && $types_to_go[$ibeg_3] eq ':' )
+ );
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
- # or the next line ends in an open paren or brace
- # and the break hasn't been forced [dima.t]
- || ( !$forced_breakpoint_to_go[$iend_1]
- && $type_iend_2 eq '{' )
- )
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && (
+ # or three lines, the last with a leading
+ # semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
- # RT#127633 - the leading tokens are not operators
- ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # or they are different
- || ( $ibeg_3 >= 0
- && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
- )
- );
+ # or the next line ends in an open paren or
+ # brace and the break hasn't been forced
+ # [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
- if (
+ # do not recombine if the two lines might align
+ # well this is a very approximate test for this
+ && (
- # Recombine if we can make two lines
- $nmax >= $n + 2
+ # RT#127633 - the leading tokens are not
+ # operators
+ ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have
- # ending comma
- && ( !$rOpts_line_up_parentheses
- || $type_iend_2 ne ',' )
- )
- {
-
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$ibeg_2];
- foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # or they are different
+ || ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne
+ $types_to_go[$ibeg_3] )
+ )
+ );
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
+ if (
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
+
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && !(
+ $ibeg_3 > 0
+ && ref( $leading_spaces_to_go[$ibeg_3] )
+ && $type_iend_2 eq ','
+ )
+ )
+ {
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax )
- ? $ri_end->[ $n + 1 ]
- : $iend_2;
- foreach my $i ( $iend_2 .. $istop ) {
+ # otherwise, scan the rhs line up to last token
+ # for complexity. Note that we are not
+ # counting the last token in case it is an
+ # opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
- last if ( $tv > 2 );
+ last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
- # do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
+ # ok to recombine if no level changes before
+ # last token
+ if ( $tv > 0 ) {
+
+ # otherwise, do not recombine if more than
+ # two level changes.
+ next if ( $tv > 1 );
+
+ # check total complexity of the two
+ # adjacent lines that will occur if we do
+ # this join
+ my $istop =
+ ( $n < $nmax )
+ ? $ri_end->[ $n + 1 ]
+ : $iend_2;
+ foreach my $i ( $iend_2 .. $istop ) {
+ if (
+ $nesting_depth_to_go[$i] != $depth )
+ {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # do not recombine if total is more than 2
+ # level changes
+ next if ( $tv > 2 );
+ }
}
}
- }
- unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$iend_1] = 0;
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
}
- }
- # for keywords..
- elsif ( $type_iend_1 eq 'k' ) {
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- # but only if followed by multiple lines
- && $n < $nmax
- );
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
- if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
- next
- unless $want_break_before{ $tokens_to_go[$iend_1] };
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1]
+ };
+ }
}
- }
- #----------------------------------------------------------
- # Recombine Section 3:
- # Examine token at $ibeg_2 (left end of second line of pair)
- #----------------------------------------------------------
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- # Note that we are skipping the rest of this section
- # and the rest of the loop to do the join
- if ($skip_Section_3) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- $n_best = $n;
- last;
- }
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ # Note that we are skipping the rest of this section
+ # and the rest of the loop to do the join
+ if ($skip_Section_3) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ $n_best = $n;
+ last;
+ }
- # handle lines with leading &&, ||
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- $leading_amp_count++;
+ $leading_amp_count++;
- # ok to recombine if it follows a ? or :
- # and is followed by an open paren..
- my $ok =
- ( $is_ternary{$type_ibeg_1}
- && $tokens_to_go[$iend_2] eq '(' )
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1}
+ && $tokens_to_go[$iend_2] eq '(' )
# or is followed by a ? or : at same depth
#
# each one as in the second example. However, it
# sometimes makes things worse to check for this because
# it prevents multiple recombinations. So this is not done.
- || ( $ibeg_3 >= 0
- && $is_ternary{ $types_to_go[$ibeg_3] }
- && $nesting_depth_to_go[$ibeg_3] ==
- $nesting_depth_to_go[$ibeg_2] );
-
- next if !$ok && $want_break_before{$type_ibeg_2};
- $forced_breakpoint_to_go[$iend_1] = 0;
-
- # tweak the bond strength to give this joint priority
- # over ? and :
- $bs_tweak = 0.25;
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $type_ibeg_2 eq '?' ) {
-
- # Do not recombine different levels
- my $lev = $levels_to_go[$ibeg_2];
- next if ( $lev ne $levels_to_go[$ibeg_1] );
-
- # Do not recombine a '?' if either next line or
- # previous line does not start with a ':'. The reasons
- # are that (1) no alignment of the ? will be possible
- # and (2) the expression is somewhat complex, so the
- # '?' is harder to see in the interior of the line.
- my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
- my $precedes_colon =
- $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
- next unless ( $follows_colon || $precedes_colon );
-
- # we will always combining a ? line following a : line
- if ( !$follows_colon ) {
-
- # ...otherwise recombine only if it looks like a chain.
- # we will just look at a few nearby lines to see if
- # this looks like a chain.
- my $local_count = 0;
- foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
- $local_count++
- if $ii >= 0
- && $types_to_go[$ii] eq ':'
- && $levels_to_go[$ii] == $lev;
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ # Combine a trailing && term with an || term: fix for
+ # c060 This is rare but can happen.
+ $ok ||= 1
+ if ( $ibeg_3 < 0
+ && $type_ibeg_2 eq '&&'
+ && $type_ibeg_1 eq '||'
+ && $nesting_depth_to_go[$ibeg_2] ==
+ $nesting_depth_to_go[$ibeg_1] );
+
+ next if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a
+ # chain. we will just look at a few nearby lines
+ # to see if this looks like a chain.
+ my $local_count = 0;
+ foreach
+ my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
+ {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
}
- next unless ( $local_count > 1 );
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
- # do not recombine lines with leading '.'
- elsif ( $type_ibeg_2 eq '.' ) {
- my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
- next
- unless (
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank =
+ min( $inext_to_go[$ibeg_2], $iend_2 );
+ next
+ unless (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
- (
- $n == 2
- && $n == $nmax
- && $type_ibeg_1 ne $type_ibeg_2
- )
+ (
+ $n == 2
+ && $n == $nmax
+ && $type_ibeg_1 ne $type_ibeg_2
+ )
- # ... or this would strand a short quote , like this
- # . "some long quote"
- # . "\n";
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $iend_2 - 1
- && $token_lengths_to_go[$i_next_nonblank] <
- $rOpts_short_concatenation_item_length )
- );
- }
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length )
+ );
+ }
- # handle leading keyword..
- elsif ( $type_ibeg_2 eq 'k' ) {
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
- # handle leading "or"
- if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
- $type_ibeg_1 eq '}'
- || (
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+ $type_ibeg_1 eq '}'
+ || (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1]
+ }
+
+ # important: only combine a very simple
+ # or statement because the step below
+ # may have combined a trailing 'and'
+ # with this or, and we do not want to
+ # then combine everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ )
+ );
+
+ #X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless ( $old_breakpoint_to_go[$iend_1] );
+ }
+
+ # handle leading 'and' and 'xor'
+ elsif ($tokens_to_go[$ibeg_2] eq 'and'
+ || $tokens_to_go[$ibeg_2] eq 'xor' )
+ {
+
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
+
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
-
- # important: only combine a very simple or
- # statement because the step below may have
- # combined a trailing 'and' with this or,
- # and we do not want to then combine
- # everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
)
- )
- );
+ );
+ }
- #X: RT #81854
- $forced_breakpoint_to_go[$iend_1] = 0
- unless $old_breakpoint_to_go[$iend_1];
- }
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
- # handle leading 'and' and 'xor'
- elsif ($tokens_to_go[$ibeg_2] eq 'and'
- || $tokens_to_go[$ibeg_2] eq 'xor' )
- {
+ # Combine something like:
+ # next
+ # if ( $lang !~ /${l}$/i );
+ # into:
+ # next if ( $lang !~ /${l}$/i );
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # Decide if we will combine a single terminal 'and'
- # after an 'if' or 'unless'.
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ );
+ }
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
- || $tokens_to_go[$ibeg_1] eq 'or' )
- )
- );
+ # handle all other leading keywords
+ else {
+
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ next
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
+ }
}
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+ # similar treatment of && and || as above for 'and' and
+ # 'or': NOTE: This block of code is currently bypassed
+ # because of a previous block but is retained for possible
+ # future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
+
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- # Combine something like:
- # next
- # if ( $lang !~ /${l}$/i );
- # into:
- # next if ( $lang !~ /${l}$/i );
next
unless (
$this_line_is_semicolon_terminated
- # previous line begins with 'and' or 'or'
+ # previous line begins with an 'if' or 'unless'
+ # keyword
&& $type_ibeg_1 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_1] }
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
);
}
- # handle all other leading keywords
- else {
-
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{$type_iend_1} ) {
- next
- if ( ( $type_iend_1 ne 'k' )
- && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
- }
- }
- }
-
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
-
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
-
- next
- unless (
- $this_line_is_semicolon_terminated
-
- # previous line begins with an 'if' or 'unless' keyword
- && $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
-
- );
- }
-
- # handle line with leading = or similar
- elsif ( $is_assignment{$type_ibeg_2} ) {
- next unless ( $n == 1 || $n == $nmax );
- next if $old_breakpoint_to_go[$iend_1];
- next
- unless (
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ next unless ( $n == 1 || $n == $nmax );
+ next if ( $old_breakpoint_to_go[$iend_1] );
+ next
+ unless (
- # unless we can reduce this to two lines
- $nmax == 2
+ # unless we can reduce this to two lines
+ $nmax == 2
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # or this is a short line ending in ;
- || ( $n == $nmax && $this_line_is_semicolon_terminated )
- );
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # or this is a short line ending in ;
+ || ( $n == $nmax
+ && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- #----------------------------------------------------------
- # Recombine Section 4:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
- my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
- # Require a few extra spaces before recombining lines if we are
- # at an old breakpoint unless this is a simple list or terminal
- # line. The goal is to avoid oscillating between two
- # quasi-stable end states. For example this snippet caused
- # problems:
+ # Require a few extra spaces before recombining lines if we are
+ # at an old breakpoint unless this is a simple list or terminal
+ # line. The goal is to avoid oscillating between two
+ # quasi-stable end states. For example this snippet caused
+ # problems:
## my $this =
## bless {
## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
## },
## $type;
- next
- if ( $old_breakpoint_to_go[$iend_1]
- && !$this_line_is_semicolon_terminated
- && $n < $nmax
- && $excess + 4 > 0
- && $type_iend_2 ne ',' );
-
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $ri_beg->[ $n + 1 ];
next
- if (
- $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
- && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
-
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $iend_1 - $ibeg_1 <= 2
- && $type_ibeg_1 eq 'k'
- && $tokens_to_go[$ibeg_1] eq 'if'
- && $tokens_to_go[$iend_1] ne '('
- )
- );
- }
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
- # honor no-break's
- next if ( $bs >= NO_BREAK - 1 );
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $ri_beg->[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ # honor no-break's
+ ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
- if ( $bs > $bs_best ) {
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
$n_best = $n;
$bs_best = $bs;
}
+ else {
+
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ }
}
- }
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @{$ri_beg}, $n_best, 1;
- splice @{$ri_end}, $n_best - 1, 1;
- splice @joint, $n_best, 1;
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @{$ri_beg}, $n_best, 1;
+ splice @{$ri_end}, $n_best - 1, 1;
+ splice @joint, $n_best, 1;
- # keep going if we are still making progress
- $more_to_do++;
- }
+ # keep going if we are still making progress
+ $more_to_do++;
+ }
+ } # end iteration loop
+
+ } # end loop over sections
+
+ RETURN:
+
+ if (DEBUG_RECOMBINE) {
+ my $nmax = @{$ri_end} - 1;
+ print STDERR
+"exiting recombine with $nmax lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
}
- return ( $ri_beg, $ri_end );
+ return;
}
} ## end closure recombine_breakpoints
return;
}
+use constant DEBUG_CORRECT_LP => 0;
+
sub correct_lp_indentation {
# When the -lp option is used, we need to make a last pass through
# tries to patch things up once the actual opening paren locations
# are known.
my ( $self, $ri_first, $ri_last ) = @_;
- my $do_not_pad = 0;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $do_not_pad = 0;
# Note on flag '$do_not_pad':
# We want to avoid a situation like this, where the aligner inserts
# We leave it to the aligner to decide how to do this.
# first remove continuation indentation if appropriate
+ my $rLL = $self->[_rLL_];
my $max_line = @{$ri_first} - 1;
- # looking at each line of this batch..
+ #---------------------------------------------------------------------------
+ # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
+ #---------------------------------------------------------------------------
+
+ # The point is that sub 'starting_one_line_block' made one-line blocks based
+ # on default indentation, not -lp indentation. So some of the one-line
+ # blocks may be too long when given -lp indentation. We will fix that now
+ # if possible, using the list of these closing block indexes.
+ my $ri_starting_one_line_block =
+ $self->[_this_batch_]->[_ri_starting_one_line_block_];
+ if ( @{$ri_starting_one_line_block} ) {
+ my @ilist = @{$ri_starting_one_line_block};
+ my $inext = shift(@ilist);
+
+ # loop over lines, checking length of each with a one-line block
+ my ( $ibeg, $iend );
+ foreach my $line ( 0 .. $max_line ) {
+ $iend = $ri_last->[$line];
+ next if ( $inext > $iend );
+ $ibeg = $ri_first->[$line];
+
+ # This is just for lines with indentation objects (c098)
+ my $excess =
+ ref( $leading_spaces_to_go[$ibeg] )
+ ? $self->excess_line_length( $ibeg, $iend )
+ : 0;
+
+ if ( $excess > 0 ) {
+ my $available_spaces = $self->get_available_spaces_to_go($ibeg);
+
+ if ( $available_spaces > 0 ) {
+ my $delete_want = min( $available_spaces, $excess );
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $ibeg, $delete_want );
+ $available_spaces =
+ $self->get_available_spaces_to_go($ibeg);
+ }
+ }
+
+ # skip forward to next one-line block to check
+ while (@ilist) {
+ $inext = shift @ilist;
+ next if ( $inext <= $iend );
+ last if ( $inext > $iend );
+ }
+ last if ( $inext <= $iend );
+ }
+ }
+
+ #-------------------------------------------------------------------
+ # PASS 2: look for and fix other problems in each line of this batch
+ #-------------------------------------------------------------------
+
+ # look at each output line ...
my ( $ibeg, $iend );
foreach my $line ( 0 .. $max_line ) {
$ibeg = $ri_first->[$line];
$iend = $ri_last->[$line];
- # looking at each token in this output line..
+ # looking at each token in this output line ...
foreach my $i ( $ibeg .. $iend ) {
# How many space characters to place before this token
# for special alignment. Actual padding is done in the
# continue block.
- # looking for next unvisited indentation item
+ # looking for next unvisited indentation item ...
my $indentation = $leading_spaces_to_go[$i];
- if ( !$indentation->get_marked() ) {
- $indentation->set_marked(1);
-
- # looking for indentation item for which we are aligning
- # with parens, braces, and brackets
- next unless ( $indentation->get_align_paren() );
-
- # skip closed container on this line
- if ( $i > $ibeg ) {
- my $im = max( $ibeg, $iprev_to_go[$i] );
- if ( $type_sequence_to_go[$im]
- && $mate_index_to_go[$im] <= $iend )
- {
- next;
- }
- }
- if ( $line == 1 && $i == $ibeg ) {
- $do_not_pad = 1;
- }
+ # This is just for indentation objects (c098)
+ next unless ( ref($indentation) );
- # Ok, let's see what the error is and try to fix it
- my $actual_pos;
- my $predicted_pos = $indentation->get_spaces();
- if ( $i > $ibeg ) {
+ # Visit each indentation object just once
+ next if ( $indentation->get_marked() );
- # token is mid-line - use length to previous token
- $actual_pos = total_line_length( $ibeg, $i - 1 );
+ # Mark first visit
+ $indentation->set_marked(1);
- # for mid-line token, we must check to see if all
- # additional lines have continuation indentation,
- # and remove it if so. Otherwise, we do not get
- # good alignment.
- my $closing_index = $indentation->get_closed();
- if ( $closing_index > $iend ) {
- my $ibeg_next = $ri_first->[ $line + 1 ];
- if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
- $self->undo_lp_ci( $line, $i, $closing_index,
- $ri_first, $ri_last );
- }
+ # Skip indentation objects which do not align with container tokens
+ my $align_seqno = $indentation->get_align_seqno();
+ next unless ($align_seqno);
+
+ # Skip a container which is entirely on this line
+ my $Ko = $K_opening_container->{$align_seqno};
+ my $Kc = $K_closing_container->{$align_seqno};
+ if ( defined($Ko) && defined($Kc) ) {
+ next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
+ }
+
+ if ( $line == 1 && $i == $ibeg ) {
+ $do_not_pad = 1;
+ }
+
+ #--------------------------------------------
+ # Now see what the error is and try to fix it
+ #--------------------------------------------
+ my $closing_index = $indentation->get_closed();
+ my $predicted_pos = $indentation->get_spaces();
+
+ # Find actual position:
+ my $actual_pos;
+
+ if ( $i == $ibeg ) {
+
+ # Case 1: token is first character of of batch - table lookup
+ if ( $line == 0 ) {
+
+ $actual_pos = $predicted_pos;
+
+ my ( $indent, $offset, $is_leading, $exists ) =
+ get_saved_opening_indentation($align_seqno);
+ if ( defined($indent) ) {
+
+ # FIXME: should use '1' here if no space after opening
+ # and '2' if want space; hardwired at 1 like -gnu-style
+ $actual_pos = get_spaces($indent) + $offset + 1;
}
}
- elsif ( $line > 0 ) {
- # handle case where token starts a new line;
- # use length of previous line
+ # Case 2: token starts a new line - use length of previous line
+ else {
+
my $ibegm = $ri_first->[ $line - 1 ];
my $iendm = $ri_last->[ $line - 1 ];
$actual_pos = total_line_length( $ibegm, $iendm );
# follow -pt style
++$actual_pos
if ( $types_to_go[ $iendm + 1 ] eq 'b' );
- }
- else {
- # token is first character of first line of batch
- $actual_pos = $predicted_pos;
}
+ }
- my $move_right = $actual_pos - $predicted_pos;
+ # Case 3: $i>$ibeg: token is mid-line - use length to previous token
+ else {
- # done if no error to correct (gnu2.t)
- if ( $move_right == 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
+ $actual_pos = total_line_length( $ibeg, $i - 1 );
+
+ # for mid-line token, we must check to see if all
+ # additional lines have continuation indentation,
+ # and remove it if so. Otherwise, we do not get
+ # good alignment.
+ if ( $closing_index > $iend ) {
+ my $ibeg_next = $ri_first->[ $line + 1 ];
+ if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
+ $self->undo_lp_ci( $line, $i, $closing_index,
+ $ri_first, $ri_last );
+ }
}
+ }
- # if we have not seen closure for this indentation in
- # this batch, we can only pass on a request to the
- # vertical aligner
- my $closing_index = $indentation->get_closed();
+ # By how many spaces (plus or minus) would we need to increase the
+ # indentation to get alignment with the opening token?
+ my $move_right = $actual_pos - $predicted_pos;
- if ( $closing_index < 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
+ if (DEBUG_CORRECT_LP) {
+ my $tok = substr( $tokens_to_go[$i], 0, 8 );
+ my $avail = $self->get_available_spaces_to_go($ibeg);
+ print
+"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
+ }
+
+ # nothing more to do if no error to correct (gnu2.t)
+ if ( $move_right == 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
+
+ # Get any collapsed length defined for -xlp
+ my $collapsed_length =
+ $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
+ $collapsed_length = 0 unless ( defined($collapsed_length) );
+
+ if (DEBUG_CORRECT_LP) {
+ print
+"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
+ }
+
+ # if we have not seen closure for this indentation in this batch,
+ # and do not have a collapsed length estimate, we can only pass on
+ # a request to the vertical aligner
+ if ( $closing_index < 0 && !$collapsed_length ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
+
+ # If necessary, look ahead to see if there is really any leading
+ # whitespace dependent on this whitespace, and also find the
+ # longest line using this whitespace. Since it is always safe to
+ # move left if there are no dependents, we only need to do this if
+ # we may have dependent nodes or need to move right.
+
+ my $have_child = $indentation->get_have_child();
+ my %saw_indentation;
+ my $line_count = 1;
+ $saw_indentation{$indentation} = $indentation;
+
+ # How far can we move right before we hit the limit?
+ # let $right_margen = the number of spaces that we can increase
+ # the current indentation before hitting the maximum line length.
+ my $right_margin = 0;
+
+ if ( $have_child || $move_right > 0 ) {
+ $have_child = 0;
+
+ # include estimated collapsed length for incomplete containers
+ my $max_length = 0;
+ if ( $Kc > $K_to_go[$max_index_to_go] ) {
+ $max_length = $collapsed_length + $predicted_pos;
}
- # If necessary, look ahead to see if there is really any
- # leading whitespace dependent on this whitespace, and
- # also find the longest line using this whitespace.
- # Since it is always safe to move left if there are no
- # dependents, we only need to do this if we may have
- # dependent nodes or need to move right.
+ if ( $i == $ibeg ) {
+ my $length = total_line_length( $ibeg, $iend );
+ if ( $length > $max_length ) { $max_length = $length }
+ }
- my $right_margin = 0;
- my $have_child = $indentation->get_have_child();
+ # look ahead at the rest of the lines of this batch..
+ foreach my $line_t ( $line + 1 .. $max_line ) {
+ my $ibeg_t = $ri_first->[$line_t];
+ my $iend_t = $ri_last->[$line_t];
+ last if ( $closing_index <= $ibeg_t );
- my %saw_indentation;
- my $line_count = 1;
- $saw_indentation{$indentation} = $indentation;
+ # remember all different indentation objects
+ my $indentation_t = $leading_spaces_to_go[$ibeg_t];
+ $saw_indentation{$indentation_t} = $indentation_t;
+ $line_count++;
- if ( $have_child || $move_right > 0 ) {
- $have_child = 0;
- my $max_length = 0;
- if ( $i == $ibeg ) {
- $max_length = total_line_length( $ibeg, $iend );
+ # remember longest line in the group
+ my $length_t = total_line_length( $ibeg_t, $iend_t );
+ if ( $length_t > $max_length ) {
+ $max_length = $length_t;
}
+ }
- # look ahead at the rest of the lines of this batch..
- foreach my $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $ri_first->[$line_t];
- my $iend_t = $ri_last->[$line_t];
- last if ( $closing_index <= $ibeg_t );
-
- # remember all different indentation objects
- my $indentation_t = $leading_spaces_to_go[$ibeg_t];
- $saw_indentation{$indentation_t} = $indentation_t;
- $line_count++;
-
- # remember longest line in the group
- my $length_t = total_line_length( $ibeg_t, $iend_t );
- if ( $length_t > $max_length ) {
- $max_length = $length_t;
- }
- }
- $right_margin =
- $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
- $max_length;
- if ( $right_margin < 0 ) { $right_margin = 0 }
- }
-
- my $first_line_comma_count =
- grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_comma_count();
- my $arrow_count = $indentation->get_arrow_count();
-
- # This is a simple approximate test for vertical alignment:
- # if we broke just after an opening paren, brace, bracket,
- # and there are 2 or more commas in the first line,
- # and there are no '=>'s,
- # then we are probably vertically aligned. We could set
- # an exact flag in sub scan_list, but this is good
- # enough.
- my $indentation_count = keys %saw_indentation;
- my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
-
- # Make the move if possible ..
- if (
+ $right_margin =
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
+ $max_length;
+ if ( $right_margin < 0 ) { $right_margin = 0 }
+ }
+
+ my $first_line_comma_count =
+ grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
+ my $comma_count = $indentation->get_comma_count();
+ my $arrow_count = $indentation->get_arrow_count();
+
+ # This is a simple approximate test for vertical alignment:
+ # if we broke just after an opening paren, brace, bracket,
+ # and there are 2 or more commas in the first line,
+ # and there are no '=>'s,
+ # then we are probably vertically aligned. We could set
+ # an exact flag in sub break_lists, but this is good
+ # enough.
+ my $indentation_count = keys %saw_indentation;
+ my $is_vertically_aligned =
+ ( $i == $ibeg
+ && $first_line_comma_count > 1
+ && $indentation_count == 1
+ && ( $arrow_count == 0 || $arrow_count == $line_count ) );
+
+ # Make the move if possible ..
+ if (
- # we can always move left
- $move_right < 0
+ # we can always move left
+ $move_right < 0
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
-
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_available_spaces( -$move );
- }
+ # -xlp
+
+ # incomplete container
+ || ( $rOpts_extended_line_up_parentheses
+ && $Kc > $K_to_go[$max_index_to_go] )
+ || $closing_index < 0
+
+ # but we should only move right if we are sure it will
+ # not spoil vertical alignment
+ || ( $comma_count == 0 )
+ || ( $comma_count > 0 && !$is_vertically_aligned )
+ )
+ {
+ my $move =
+ ( $move_right <= $right_margin )
+ ? $move_right
+ : $right_margin;
+
+ if (DEBUG_CORRECT_LP) {
+ print
+ "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
}
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
- else {
- $indentation->set_recoverable_spaces($move_right);
+ foreach ( keys %saw_indentation ) {
+ $saw_indentation{$_}
+ ->permanently_decrease_available_spaces( -$move );
}
}
- }
- }
+
+ # Otherwise, record what we want and the vertical aligner
+ # will try to recover it.
+ else {
+ $indentation->set_recoverable_spaces($move_right);
+ }
+ } ## end loop over tokens in a line
+ } ## end loop over lines
return $do_not_pad;
}
# CODE SECTION 10: Code to break long statments
###############################################
-sub set_continuation_breaks {
+sub break_long_lines {
- # Called once per batch to set breaks in long lines.
+ #-----------------------------------------------------------
+ # Break a batch of tokens into lines which do not exceed the
+ # maximum line length.
+ #-----------------------------------------------------------
# Define an array of indexes for inserting newline characters to
# keep the line lengths below the maximum desired length. There is
# This routine is part of series of routines which adjust line
# lengths. It is only called if a statement is longer than the
# maximum line length, or if a preliminary scanning located
- # desirable break points. Sub scan_list has already looked at
+ # desirable break points. Sub break_lists has already looked at
# these tokens and set breakpoints (in array
# $forced_breakpoint_to_go[$i]) where it wants breaks (for example
# after commas, after opening parens, and before closing parens).
# @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
# order.
- use constant DEBUG_BREAKPOINTS => 0;
+ use constant DEBUG_BREAK_LINES => 0;
my @i_first = (); # the first index to output
my @i_last = (); # the last index to output
my $maximum_line_length =
$maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
+ # Do not separate an isolated bare word from an opening paren.
+ # Alternate Fix #2 for issue b1299. This waits as long as possible
+ # to make the decision.
+ if ( $types_to_go[$i_begin] eq 'i'
+ && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
+ {
+ my $i_next_nonblank = $inext_to_go[$i_begin];
+ if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
+ $bond_strength_to_go[$i_begin] = NO_BREAK;
+ }
+ }
+
#-------------------------------------------------------
# BEGINNING of inner loop to find the best next breakpoint
#-------------------------------------------------------
)
{
$strength -= $tiny_bias;
- DEBUG_BREAKPOINTS && do { $Msg .= " :-bias at i=$i_test" };
+ DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
}
# otherwise increase strength a bit if this token would be at the
$starting_sum;
if ( $len >= $maximum_line_length ) {
$strength += $tiny_bias;
- DEBUG_BREAKPOINTS && do { $Msg .= " :+bias at i=$i_test" };
+ DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
}
}
# with lower level than the start of the line,
# unless we've already seen a better break.
#
- ##############################################
+ #------------------------------------
# Note on an issue with a preceding ?
- ##############################################
+ #------------------------------------
# We don't include a ? in the above list, but there may
# be a break at a previous ? if the line is long.
# Because of this we do not want to force a break if
)
{
$self->set_forced_breakpoint($i_next_nonblank);
- DEBUG_BREAKPOINTS
+ DEBUG_BREAK_LINES
&& do { $Msg .= " :Forced break at i=$i_next_nonblank" };
}
if (
- # Try to put a break where requested by scan_list
+ # Try to put a break where requested by break_lists
$forced_breakpoint_to_go[$i_test]
# break between ) { in a continued line so that the '{' can
# be outdented
- # See similar logic in scan_list which catches instances
+ # See similar logic in break_lists which catches instances
# where a line is just something like ') {'. We have to
# be careful because the corresponding block keyword might
# not be on the first line, such as 'for' here:
# sub block breaks handled at higher level, unless
# it looks like the preceding list is long and broken
&& !(
- $next_nonblank_block_type =~ /$ANYSUB_PATTERN/
+
+ (
+ $next_nonblank_block_type =~ /$SUB_PATTERN/
+ || $next_nonblank_block_type =~ /$ASUB_PATTERN/
+ )
&& ( $nesting_depth_to_go[$i_begin] ==
$nesting_depth_to_go[$i_next_nonblank] )
)
- && !$rOpts->{'opening-brace-always-on-right'}
+ && !$rOpts_opening_brace_always_on_right
)
# There is an implied forced break at a terminal opening brace
if ( $strength < NO_BREAK - 1 ) {
$strength = $lowest_strength - $tiny_bias;
$must_break = 1;
- DEBUG_BREAKPOINTS
+ DEBUG_BREAK_LINES
&& do { $Msg .= " :set must_break at i=$i_next_nonblank" };
}
}
)
{
if ( $i_lowest >= 0 ) {
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :quit at good terminal='$next_nonblank_type'";
};
last;
)
{
$i_test = min( $imax, $inext_to_go[$i_test] );
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :redo at i=$i_test";
};
redo;
# a leading alignment of certain common tokens, and it
# is different from the latest candidate break
if ($leading_alignment_type) {
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .=
" :last at leading_alignment='$leading_alignment_type'";
};
)
{
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :last at good old break\n";
};
last;
if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
|| $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
{
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :last-noskip_short";
};
last;
$lowest_next_type = $next_nonblank_type;
$i_lowest_next_nonblank = $i_next_nonblank;
if ($must_break) {
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :last-must_break";
};
last;
&& !$is_closing_type{$next_nonblank_type} )
{
$too_long = $next_length >= $maximum_line_length;
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :too_long=$too_long" if ($too_long);
}
}
}
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
my $ltok = $token;
my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
my $i_testp2 = $i_test + 2;
)
{
$too_long = 0;
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .= " :do_not_strand next='$next_nonblank_type'";
};
}
|| $i_test == $imax
)
{
- DEBUG_BREAKPOINTS && do {
+ DEBUG_BREAK_LINES && do {
$Msg .=
" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
};
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- DEBUG_BREAKPOINTS
+ DEBUG_BREAK_LINES
&& print STDOUT
"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
$Msg = "";
#-------------------------------------------------------
# ?/: rule 2 : if we break at a '?', then break at its ':'
#
- # Note: this rule is also in sub scan_list to handle a break
+ # Note: this rule is also in sub break_lists to handle a break
# at the start and end of a line (in case breaks are dictated
# by side comments).
#-------------------------------------------------------
# update indentation size
if ( $i_begin <= $imax ) {
$leading_spaces = leading_spaces_to_go($i_begin);
- DEBUG_BREAKPOINTS
+ DEBUG_BREAK_LINES
&& print STDOUT
"updating leading spaces to be $leading_spaces at i=$i_begin\n";
}
# CODE SECTION 11: Code to break long lists
###########################################
-{ ## begin closure scan_list
+{ ## begin closure break_lists
# These routines and variables are involved in finding good
# places to break long lists.
+ use constant DEBUG_BREAK_LISTS => 0;
+
my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_nonblank_block_type, $last_old_breakpoint_count,
- $minimum_depth, $next_nonblank_block_type,
- $next_nonblank_token, $next_nonblank_type,
- $old_breakpoint_count, $starting_breakpoint_count,
- $starting_depth, $token,
- $type, $type_sequence,
+ $block_type, $current_depth,
+ $depth, $i,
+ $i_last_nonblank_token, $last_nonblank_token,
+ $last_nonblank_type, $last_nonblank_block_type,
+ $last_old_breakpoint_count, $minimum_depth,
+ $next_nonblank_block_type, $next_nonblank_token,
+ $next_nonblank_type, $old_breakpoint_count,
+ $starting_breakpoint_count, $starting_depth,
+ $token, $type,
+ $type_sequence,
);
my (
my ( @has_broken_sublist, @dont_align, @want_comma_break );
my $length_tol;
- my $length_tol_boost;
+ my $lp_tol_boost;
+ my $list_stress_level;
- sub initialize_scan_list {
+ sub initialize_break_lists {
@dont_align = ();
@has_broken_sublist = ();
@want_comma_break = ();
- ####################################################
+ #---------------------------------------------------
# Set tolerances to prevent formatting instabilities
- ####################################################
+ #---------------------------------------------------
# Define tolerances to use when checking if closed
# containers will fit on one line. This is necessary to avoid
# 'find_token_starting_list' to go back before an initial blank space.
# This fixed these three cases, and allowed the tolerances to be
# reduced to continue to fix all other known cases of instability.
- # This gives the current tolerance formulation (note that
- # variable $length_tol_boost is always 0 now):
+ # This gives the current tolerance formulation.
+
+ $lp_tol_boost = 0;
- $length_tol_boost = 0;
if ($rOpts_line_up_parentheses) {
- if ( $rOpts->{'extended-continuation-indentation'} ) {
- $length_tol += 2;
- $length_tol_boost = 0; # was 1 for FIX2, 0 for FIX3
+ # boost tol for combination -lp -xci
+ if ($rOpts_extended_continuation_indentation) {
+ $lp_tol_boost = 2;
}
+
+ # boost tol for combination -lp and any -vtc > 0, but only for
+ # non-list containers
else {
- $length_tol_boost = 0; # was 3 for FIX2, 0 for FIX3
+ foreach ( keys %closing_vertical_tightness ) {
+ next
+ unless ( $closing_vertical_tightness{$_} );
+ $lp_tol_boost = 1; # Fixes B1193;
+ last;
+ }
}
}
- # The -xci option alone also needs a slightly larger tol for non-lists
- elsif ( $rOpts->{'extended-continuation-indentation'} ) {
- $length_tol_boost = 0; # was 1 for FIX2, 0 for FIX3
- }
+ # Define a level where list formatting becomes highly stressed and
+ # needs to be simplified. Introduced for case b1262.
+ $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
+
return;
}
my $bp_count = 0;
my $do_not_break_apart = 0;
+ # Do not break a list unless there are some non-line-ending commas.
+ # This avoids getting different results with only non-essential commas,
+ # and fixes b1192.
+ my $seqno = $type_sequence_stack[$dd];
+ my $real_comma_count =
+ $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
+
# anything to do?
if ( $item_count_stack[$dd] ) {
}
# handle commas within containers...
- else {
+ elsif ($real_comma_count) {
my $fbc = get_forced_breakpoint_count();
# always open comma lists not preceded by keywords,
}
# These types are excluded at breakpoints to prevent blinking
- my %is_uncontained_comma_break_excluded_type;
+ # Switched from excluded to included as part of fix for b1214
+ ##my %is_uncontained_comma_break_excluded_type;
+ my %is_uncontained_comma_break_included_type;
BEGIN {
- my @q = qw< L { ( [ ? : + - >;
- @is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
+ ##my @q = qw< L { ( [ ? : + - =~ >;
+ ##@is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
+
+ my @q = qw< k R } ) ] Y Z U w i q Q .
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
+ @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
}
sub do_uncontained_comma_breaks {
# (3) NEW: there are one or more old comma breaks (see return example)
# (4) the first comma is at the starting level ...
# ... fixes cases b064 b065 b068 b210 b747
- #
+ # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
+ # ... fixes b1220. If ci>0 we are in the middle of a snippet,
+ # maybe because -boc has been forcing out previous lines.
+
# For example, we will follow the user and break after
# 'print' in this snippet:
# print
#
my $i_first_comma = $comma_index[$dd]->[0];
my $level_comma = $levels_to_go[$i_first_comma];
- if ( $old_breakpoint_to_go[$i_first_comma]
+ my $ci_start = $ci_levels_to_go[0];
+
+ # Here we want to use the value of ci before any -xci adjustment
+ if ( $ci_start && $rOpts_extended_continuation_indentation ) {
+ my $K0 = $K_to_go[0];
+ if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
+ }
+ if ( !$ci_start
+ && $old_breakpoint_to_go[$i_first_comma]
&& $level_comma == $levels_to_go[0] )
{
my $ibreak = -1;
# Changed rule from multiple old commas to just one here:
if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
{
- my $ibreakm = $ibreak;
- $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
- if ( $ibreakm >= 0 ) {
+ my $ibreak_m = $ibreak;
+ $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
+ if ( $ibreak_m >= 0 ) {
# In order to avoid blinkers we have to be fairly
# restrictive:
- # Rule 1: Do not to break before an opening token
- # Rule 2: avoid breaking at ternary operators
- # (see b931, which is similar to the above print example)
- # Rule 3: Do not break at chain operators to fix case b1119
- # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
+ # OLD Rules:
+ # Rule 1: Do not to break before an opening token
+ # Rule 2: avoid breaking at ternary operators
+ # (see b931, which is similar to the above print example)
+ # Rule 3: Do not break at chain operators to fix case b1119
+ # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
+
+ # NEW Rule, replaced above rules after case b1214:
+ # only break at one of the included types
# Be sure to test any changes to these rules against runs
# with -l=0 such as the 'bbvt' test (perltidyrc_colin)
# series.
+ my $type_m = $types_to_go[$ibreak_m];
- my $typem = $types_to_go[$ibreakm];
- if ( !$is_uncontained_comma_break_excluded_type{$typem} ) {
+ # Switched from excluded to included for b1214. If necessary
+ # the token could also be checked if type_m eq 'k'
+ ##if ( !$is_uncontained_comma_break_excluded_type{$type_m} ) {
+ ##my $token_m = $tokens_to_go[$ibreak_m];
+ if ( $is_uncontained_comma_break_included_type{$type_m} ) {
$self->set_forced_breakpoint($ibreak);
}
}
return $is_sort_map_grep{ $container_type[$dd] };
}
- sub scan_list {
+ sub break_lists {
my ( $self, $is_long_line ) = @_;
- # This routine is responsible for setting line breaks for all lists,
- # so that hierarchical structure can be displayed and so that list
- # items can be vertically aligned. The output of this routine is
- # stored in the array @forced_breakpoint_to_go, which is used to set
- # final breakpoints.
+ #----------------------------------------------------------------------
+ # This routine is called once per batch, if the batch is a list, to set
+ # line breaks so that hierarchical structure can be displayed and so
+ # that list items can be vertically aligned. The output of this
+ # routine is stored in the array @forced_breakpoint_to_go, which is
+ # used by sub 'break_long_lines' to set final breakpoints.
+ #----------------------------------------------------------------------
- # It is called once per batch if the batch is a list.
my $rLL = $self->[_rLL_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $ris_broken_container = $self->[_ris_broken_container_];
$starting_depth = $nesting_depth_to_go[0];
- $block_type = ' ';
- $current_depth = $starting_depth;
- $i = -1;
- $last_colon_sequence_number = -1;
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_nonblank_block_type = ' ';
- $last_old_breakpoint_count = 0;
+ $block_type = ' ';
+ $current_depth = $starting_depth;
+ $i = -1;
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_nonblank_block_type = ' ';
+ $last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; # forces update in check below
$old_breakpoint_count = 0;
$starting_breakpoint_count = get_forced_breakpoint_count();
my $total_depth_variation = 0;
my $i_old_assignment_break;
my $depth_last = $starting_depth;
+ my $comma_follows_last_closing_token;
check_for_new_minimum_depth($current_depth);
my $saw_good_breakpoint;
my $i_line_end = -1;
my $i_line_start = -1;
+ my $i_last_colon = -1;
- # loop over all tokens in this batch
+ #----------------------------------------
+ # Main loop over all tokens in this batch
+ #----------------------------------------
while ( ++$i <= $max_index_to_go ) {
if ( $type ne 'b' ) {
$i_last_nonblank_token = $i - 1;
# formatting.
if ( $type eq '#' ) {
if ( $i != $max_index_to_go ) {
- warning(
-"Non-fatal program bug: backup logic required to break after a comment\n"
- );
- report_definite_bug();
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Non-fatal program bug: backup logic required to break after a comment
+EOM
+ }
$nobreak_to_go[$i] = 0;
$self->set_forced_breakpoint($i);
} ## end if ( $i != $max_index_to_go)
&& $i > 0
# if one of these keywords:
- # /^(if|unless|while|until|for)$/
- && $is_if_unless_while_until_for{$token}
+ && $is_if_unless_while_until_for_foreach{$token}
# but do not break at something like '1 while'
&& ( $last_nonblank_type ne 'n' || $i > 2 )
# handle any postponed closing breakpoints
if ( $is_closing_sequence_token{$token} ) {
if ( $type eq ':' ) {
- $last_colon_sequence_number = $type_sequence;
+ $i_last_colon = $i;
# retain break at a ':' line break
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_ternary_breakpoints )
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_ternary_breakpoints
+ && $levels_to_go[$i] < $list_stress_level )
{
$self->set_forced_breakpoint($i);
- # break at previous '='
- if ( $i_equals[$depth] > 0 ) {
+ # Break at a previous '=', but only if it is before
+ # the mating '?'. Mate_index test fixes b1287.
+ my $ieq = $i_equals[$depth];
+ if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
$self->set_forced_breakpoint(
$i_equals[$depth] );
$i_equals[$depth] = -1;
} ## end if ( $type eq ':' )
if ( has_postponed_breakpoint($type_sequence) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
- $self->set_forced_breakpoint( $i - $inc );
+ if ( $i >= $inc ) {
+ $self->set_forced_breakpoint( $i - $inc );
+ }
}
} ## end if ( $is_closing_sequence_token{$token} )
)
{
+ # don't break if # this has a side comment, and
# don't break at a '?' if preceded by ':' on
# this line of previous ?/: pair on this line.
# This is an attempt to preserve a chain of ?/:
- # expressions (elsif2.t). And don't break if
- # this has a side comment.
- $self->set_forced_breakpoint($i)
- unless (
- $type_sequence == (
- $last_colon_sequence_number +
- TYPE_SEQUENCE_INCREMENT
+ # expressions (elsif2.t).
+ if (
+ (
+ $i_last_colon < 0
+ || $parent_seqno_to_go[$i_last_colon] !=
+ $parent_seqno_to_go[$i]
)
- || $tokens_to_go[$max_index_to_go] eq '#'
- );
+ && $tokens_to_go[$max_index_to_go] ne '#'
+ )
+ {
+ $self->set_forced_breakpoint($i);
+ }
$self->set_closing_breakpoint($i);
} ## end if ( $i_colon <= 0 ||...)
} ## end elsif ( $token eq '?' )
+
+ elsif ( $is_opening_token{$token} ) {
+
+ # do requeste -lp breaks at the OPENING token for BROKEN
+ # blocks. NOTE: this can be done for both -lp and -xlp,
+ # but only -xlp can really take advantage of this. So this
+ # is currently restricted to -xlp to avoid excess changes to
+ # existing -lp formatting.
+ if ( $rOpts_extended_line_up_parentheses
+ && $mate_index_to_go[$i] < 0 )
+ {
+ my $lp_object =
+ $self->[_rlp_object_by_seqno_]->{$type_sequence};
+ if ($lp_object) {
+ my $K_begin_line = $lp_object->get_K_begin_line();
+ my $i_begin_line = $K_begin_line - $K_to_go[0];
+ $self->set_forced_lp_break( $i_begin_line, $i );
+ }
+ }
+ }
+
} ## end if ($type_sequence)
#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
# prepare for a new list when depth increases
# token $i is a '(','{', or '['
#------------------------------------------------------------
- if ( $depth > $current_depth ) {
+ # hardened against bad input syntax: depth jump must be 1 and type
+ # must be opening..fixes c102
+ if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
$type_sequence_stack[$depth] = $type_sequence;
$override_cab3[$depth] =
# statements (like this one). See similar coding in
# set_continuation breaks. We have also catch it here for
# short line fragments which otherwise will not go through
- # set_continuation_breaks.
+ # break_long_lines.
if (
$block_type
&& $mate_index_to_go[$i_last_nonblank_token] < 0
# and user wants brace to left
- && !$rOpts->{'opening-brace-always-on-right'}
+ && !$rOpts_opening_brace_always_on_right
&& ( $type eq '{' ) # should be true
&& ( $token eq '{' ) # should be true
# finish off any old list when depth decreases
# token $i is a ')','}', or ']'
#------------------------------------------------------------
- elsif ( $depth < $current_depth ) {
+ # hardened against bad input syntax: depth jump must be 1 and type
+ # must be closing .. fixes c102
+ elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
check_for_new_minimum_depth($depth);
+ $comma_follows_last_closing_token =
+ $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
+
# force all outer logical containers to break after we see on
# old breakpoint
$has_old_logical_breakpoints[$depth] ||=
$has_old_logical_breakpoints[$current_depth];
# Patch to break between ') {' if the paren list is broken.
- # There is similar logic in set_continuation_breaks for
+ # There is similar logic in break_long_lines for
# non-broken lists.
if ( $token eq ')'
&& $next_nonblank_block_type
&& $interrupted_list[$current_depth]
&& $next_nonblank_type eq '{'
- && !$rOpts->{'opening-brace-always-on-right'} )
+ && !$rOpts_opening_brace_always_on_right )
{
$self->set_forced_breakpoint($i);
} ## end if ( $token eq ')' && ...
my $i_opening = $opening_structure_index_stack[$current_depth];
my $saw_opening_structure = ( $i_opening >= 0 );
+ my $lp_object;
+ if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
+ $lp_object = $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$i_opening] };
+ }
# this term is long if we had to break at interior commas..
my $is_long_term = $bp_count > 0;
$cab_flag = 5;
}
+ # Ignore old breakpoints when under stress.
+ # Fixes b1203 b1204 as well as b1197-b1200.
+ # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
+ # b1264 to see if this check is still required at all, and
+ # these still require a check, but at higher level beta+3
+ # instead of beta: b1193 b780
+ if ( $saw_opening_structure
+ && !$lp_object
+ && $levels_to_go[$i_opening] >= $list_stress_level )
+ {
+ $cab_flag = 2;
+
+ # Do not break hash braces under stress (fixes b1238)
+ $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
+
+ # This option fixes b1235, b1237, b1240 with old and new -lp,
+ # but formatting is nicer with next option.
+ ## $is_long_term ||=
+ ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
+
+ # This option fixes b1240 but not b1235, b1237 with new -lp,
+ # but this gives better formatting than the previous option.
+ $do_not_break_apart ||=
+ $levels_to_go[$i_opening] > $stress_level_beta;
+ }
+
if ( !$is_long_term
&& $saw_opening_structure
&& $is_opening_token{ $tokens_to_go[$i_opening] }
my $excess =
$self->excess_line_length( $i_opening_minus, $i );
- my $tol =
- $length_tol_boost
- && !$ris_list_by_seqno->{$type_sequence}
- ? $length_tol + $length_tol_boost
- : $length_tol;
+ # Use standard spaces for indentation of lists in -lp mode
+ # if it gives a longer line length. This helps to avoid an
+ # instability due to forming and breaking one-line blocks.
+ # This fixes case b1314.
+ my $indentation = $leading_spaces_to_go[$i_opening_minus];
+ if ( ref($indentation)
+ && $ris_broken_container->{$type_sequence} )
+ {
+ my $lp_spaces = $indentation->get_spaces();
+ my $std_spaces =
+ $standard_spaces_to_go[$i_opening_minus];
+ my $diff = $std_spaces - $lp_spaces;
+ if ( $diff > 0 ) { $excess += $diff }
+ }
+
+ my $tol = $length_tol;
+
+ # boost tol for an -lp container
+ if (
+ $lp_tol_boost
+ && $lp_object
+ && ( $rOpts_extended_continuation_indentation
+ || !$ris_list_by_seqno->{$type_sequence} )
+ )
+ {
+ $tol += $lp_tol_boost;
+ }
# Patch to avoid blinking with -bbxi=2 and -cab=2
# in which variations in -ci cause unstable formatting
# open up a long 'for' or 'foreach' container to allow
# leading term alignment unless -lp is used.
- $has_comma_breakpoints = 1
- unless $rOpts_line_up_parentheses;
+ $has_comma_breakpoints = 1 unless ($lp_object);
} ## end if ( $is_long_term && ...)
if (
)
{
- # For -lp option, we must put a breakpoint before
- # the token which has been identified as starting
- # this indentation level. This is necessary for
- # proper alignment.
- if ( $rOpts_line_up_parentheses && $saw_opening_structure )
- {
- my $item = $leading_spaces_to_go[ $i_opening + 1 ];
- if ( $i_opening + 1 < $max_index_to_go
- && $types_to_go[ $i_opening + 1 ] eq 'b' )
- {
- $item = $leading_spaces_to_go[ $i_opening + 2 ];
- }
- if ( defined($item) ) {
- my $i_start_2;
- my $K_start_2 = $item->get_starting_index_K();
- if ( defined($K_start_2) ) {
- $i_start_2 = $K_start_2 - $K_to_go[0];
- }
- if (
- defined($i_start_2)
-
- # we are breaking after an opening brace, paren,
- # so don't break before it too
- && $i_start_2 ne $i_opening
- && $i_start_2 >= 0
- && $i_start_2 <= $max_index_to_go
- )
- {
-
- # Only break for breakpoints at the same
- # indentation level as the opening paren
- my $test1 = $nesting_depth_to_go[$i_opening];
- my $test2 = $nesting_depth_to_go[$i_start_2];
- if ( $test2 == $test1 ) {
-
- # Back up at a blank (fixes case b932)
- my $ibr = $i_start_2 - 1;
- if ( $ibr > 0
- && $types_to_go[$ibr] eq 'b' )
- {
- $ibr--;
- }
-
- $self->set_forced_breakpoint($ibr);
-
- }
- } ## end if ( defined($i_start_2...))
- } ## end if ( defined($item) )
- } ## end if ( $rOpts_line_up_parentheses...)
+ # do special -lp breaks at the CLOSING token for INTACT
+ # blocks (because we might not do them if the block does
+ # not break open)
+ if ($lp_object) {
+ my $K_begin_line = $lp_object->get_K_begin_line();
+ my $i_begin_line = $K_begin_line - $K_to_go[0];
+ $self->set_forced_lp_break( $i_begin_line, $i_opening );
+ }
# break after opening structure.
# note: break before closing structure will be automatic
if ( $minimum_depth <= $current_depth ) {
- $self->set_forced_breakpoint($i_opening)
- unless ( $do_not_break_apart
- || is_unbreakable_container($current_depth) );
+ if ( $i_opening >= 0 ) {
+ $self->set_forced_breakpoint($i_opening)
+ unless ( $do_not_break_apart
+ || is_unbreakable_container($current_depth) );
+ }
# break at ',' of lower depth level before opening token
if ( $last_comma_index[$depth] ) {
$self->set_forced_breakpoint($icomma);
}
}
- } # end logic to open up a container
+ } ## end logic to open up a container
# Break open a logical container open if it was already open
elsif ($is_simple_logical_expression
next;
} ## end if ( $want_comma_break...)
- # break after all commas above starting depth
- if ( $depth < $starting_depth && !$dont_align[$depth] ) {
+ # Break after all commas above starting depth...
+ # But only if the last closing token was followed by a comma,
+ # to avoid breaking a list operator (issue c119)
+ if ( $depth < $starting_depth
+ && $comma_follows_last_closing_token
+ && !$dont_align[$depth] )
+ {
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
next;
# break open container...
my $i_opening = $opening_structure_index_stack[$dd];
- $self->set_forced_breakpoint($i_opening)
- unless (
- is_unbreakable_container($dd)
+ if ( defined($i_opening) && $i_opening >= 0 ) {
+ $self->set_forced_breakpoint($i_opening)
+ unless (
+ is_unbreakable_container($dd)
- # Avoid a break which would place an isolated ' or "
- # on a line
- || ( $type eq 'Q'
- && $i_opening >= $max_index_to_go - 2
- && ( $token eq "'" || $token eq '"' ) )
- );
+ # Avoid a break which would place an isolated ' or "
+ # on a line
+ || ( $type eq 'Q'
+ && $i_opening >= $max_index_to_go - 2
+ && ( $token eq "'" || $token eq '"' ) )
+ );
+ }
} ## end for ( my $dd = $current_depth...)
# Return a flag indicating if the input file had some good breakpoints.
} ## end elsif ( $i_old_assignment_break...)
return $saw_good_breakpoint;
- } ## end sub scan_list
-} ## end closure scan_list
+ } ## end sub break_lists
+} ## end closure break_lists
my %is_kwiZ;
+my %is_key_type;
BEGIN {
# Added 'w' to fix b1172
- my @q = qw(k w i Z);
+ my @q = qw(k w i Z ->);
@is_kwiZ{@q} = (1) x scalar(@q);
+
+ # added = for b1211
+ @q = qw<( [ { L R } ] ) = b>;
+ push @q, ',';
+ @is_key_type{@q} = (1) x scalar(@q);
}
+use constant DEBUG_FIND_START => 0;
+
sub find_token_starting_list {
# When testing to see if a block will fit on one line, some
# a previous comma is a good break point
# $i_opening_minus = $i_opening_paren;
}
- elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
+
+ elsif (
+ $tokens_to_go[$i_opening_paren] eq '('
+
+ # non-parens added here to fix case b1186
+ || $is_kwiZ{$type_prev_nb}
+ )
+ {
$i_opening_minus = $im1;
# Walk back to improve length estimate...
# to the flag --space-function-paren, and similar.
# previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
- last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
+ ##last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
+ ##last if ( $is_key_type{ $types_to_go[$j] } );
+ if ( $is_key_type{ $types_to_go[$j] } ) {
+
+ # fix for b1211
+ if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
+ last;
+ }
$i_opening_minus = $j;
}
if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
}
- # Handle non-parens
- elsif ( $is_kwiZ{$type_prev_nb} ) { $i_opening_minus = $iprev_nb }
-
RETURN:
+ DEBUG_FIND_START && print <<EOM;
+FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
+EOM
+
return $i_opening_minus;
}
$i_last_comma = $rcomma_index->[ --$item_count - 1 ];
return if ( $item_count < 1 );
}
+ my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
#---------------------------------------------------------------
# find lengths of all items in the list to calculate page layout
# Return if this will fit on one line
#-------------------------------------------------------------------
+ # The -bbxi=2 parameters can add an extra hidden level of indentation;
+ # this needs a tolerance to avoid instability. Fixes b1259, 1260.
+ my $tol = 0;
+ if ( $break_before_container_types{$opening_token}
+ && $container_indentation_options{$opening_token}
+ && $container_indentation_options{$opening_token} == 2 )
+ {
+ $tol = $rOpts_indent_columns;
+ }
+
my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
return
unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
- > 0;
+ + $tol > 0;
#-------------------------------------------------------------------
# Now we know that this block spans multiple lines; we have to set
# items aligned. This is necessary if any of the list terms
# exceeds the available space after the '('.
my $need_lp_break_open = $must_break_open;
- if ( $rOpts_line_up_parentheses && !$must_break_open ) {
+ if ( $is_lp_formatting && !$must_break_open ) {
my $columns_if_unbroken =
$maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
- total_line_length( $i_opening_minus, $i_opening_paren );
# Number of free columns across the page width for laying out tables
my $columns = table_columns_available($i_first_comma);
+ # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
+ # to break after an opening paren, then the maximum line length for the
+ # first line could be less than the later lines. So we need to reduce
+ # the line length. Normally, we will get a break after an opening
+ # paren, but in some cases we might not.
+ if ( $rOpts_variable_maximum_line_length
+ && $tokens_to_go[$i_opening_paren] eq '('
+ && @i_term_begin )
+ ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch
+ {
+ my $ib = $i_term_begin[0];
+ my $type = $types_to_go[$ib];
+
+ # So far, the only known instance of this problem is when
+ # a bareword follows an opening paren with -vmll
+ if ( $type eq 'w' ) {
+
+ # If a line starts with paren+space+terms, then its max length
+ # could be up to ci+2-i spaces less than if the term went out
+ # on a line after the paren. So..
+ my $tol = max( 0,
+ 2 + $rOpts_continuation_indentation -
+ $rOpts_indent_columns );
+ $columns = max( 0, $columns - $tol );
+
+ ## Here is the original b1210 fix, but it failed on b1216-b1218
+ ##my $columns2 = table_columns_available($i_opening_paren);
+ ##$columns = min( $columns, $columns2 );
+ }
+ }
+
# Estimated maximum number of fields which fit this space
# This will be our first guess
my $number_of_fields_max =
# undo some indentation
# ----------------------------------------------------------------------
if (
- $rOpts_line_up_parentheses
+ $is_lp_formatting
&& (
$number_of_fields == 0
|| ( $number_of_fields == 1
: ( $packed_lines == 2 ) ? 0.4
: 0.7;
+ my $two_line_word_wrap_ok;
+ if ( $opening_token eq '(' ) {
+
+ # default is to allow wrapping of short paren lists
+ $two_line_word_wrap_ok = 1;
+
+ # but turn off word wrap where requested
+ if ($rOpts_break_open_paren_list) {
+
+ # This parameter is a one-character flag, as follows:
+ # '0' matches no parens -> break open NOT OK -> word wrap OK
+ # '1' matches all parens -> break open OK -> word wrap NOT OK
+ # Other values are the same as used by the weld-exclusion-list
+ my $flag = $rOpts_break_open_paren_list;
+ if ( $flag eq '*'
+ || $flag eq '1' )
+ {
+ $two_line_word_wrap_ok = 0;
+ }
+ elsif ( $flag eq '0' ) {
+ $two_line_word_wrap_ok = 1;
+ }
+ else {
+ my $KK = $K_to_go[$i_opening_paren];
+ $two_line_word_wrap_ok =
+ !$self->match_paren_flag( $KK, $flag );
+ }
+ }
+ }
+
# Begin check for shortcut methods, which avoid treating a list
# as a table for relatively small parenthesized lists. These
# are usually easier to read if not formatted as tables.
if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
- && $opening_is_in_block # not a sub-container
- && $opening_token eq '(' # is paren list
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
+ && $opening_is_in_block # not a sub-container
+ && $two_line_word_wrap_ok # ok to wrap this paren list
+ ##&& $opening_token eq '(' # is paren list
)
{
# Shortcut method 1: for -lp and just one comma:
# This is a no-brainer, just break at the comma.
if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
+ $is_lp_formatting # -lp
+ && $item_count == 2 # two items, one comma
&& !$must_break_open
)
{
if ( $break_count <= 1 ) {
${$rdo_not_break_apart} = 1;
}
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
+ elsif ( $is_lp_formatting && !$need_lp_break_open ) {
${$rdo_not_break_apart} = 1;
}
}
return;
}
- } # end shortcut methods
+ } ## end shortcut methods
# debug stuff
DEBUG_SPARSE && do {
# structure.
my $must_break_open_container = $must_break_open
|| ( $too_long
- && ( $in_hierarchical_list || $opening_token ne '(' ) );
+ && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
if ( $break_count <= 1 ) {
${$rdo_not_break_apart} = 1;
}
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
+ elsif ( $is_lp_formatting && !$need_lp_break_open ) {
${$rdo_not_break_apart} = 1;
}
}
return ref($item) ? $item->get_available_spaces() : 0;
}
-{ ## begin closure set_leading_whitespace (for -lp indentation)
+{ ## begin closure set_lp_indentation
- # These routines are called batch-by-batch to handle the -lp indentation
- # option. The coding is rather complex, but is only for -lp.
+ use constant DEBUG_LP => 0;
- my $gnu_position_predictor;
- my $gnu_sequence_number;
- my $line_start_index_to_go;
- my $max_gnu_item_index;
- my $max_gnu_stack_index;
- my %gnu_arrow_count;
- my %gnu_comma_count;
- my %last_gnu_equals;
- my @gnu_item_list;
- my @gnu_stack;
+ # Stack of -lp index objects which survives between batches.
+ my $rLP;
+ my $max_lp_stack;
- sub initialize_gnu_vars {
+ # The predicted position of the next opening container which may start
+ # an -lp indentation level. This survives between batches.
+ my $lp_position_predictor;
- # initialize gnu variables for a new file;
- # must be called once at the start of a new file.
+ # A level at which the lp format becomes too highly stressed to continue
+ my $lp_cutoff_level;
- # initialize the leading whitespace stack to negative levels
- # so that we can never run off the end of the stack
- $gnu_position_predictor =
- 0; # where the current token is predicted to be
- $max_gnu_stack_index = 0;
- $max_gnu_item_index = -1;
- $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- return;
- }
+ BEGIN {
- sub initialize_gnu_batch_vars {
+ # Index names for the -lp stack variables.
+ # Do not combine with other BEGIN blocks (c101).
- # initialize gnu variables for a new batch;
- # must be called before each new batch
- $gnu_sequence_number++; # increment output batch counter
- %last_gnu_equals = ();
- %gnu_comma_count = ();
- %gnu_arrow_count = ();
- $line_start_index_to_go = 0;
- $max_gnu_item_index = UNDEFINED_INDEX;
- return;
+ my $i = 0;
+ use constant {
+ _lp_ci_level_ => $i++,
+ _lp_level_ => $i++,
+ _lp_object_ => $i++,
+ _lp_container_seqno_ => $i++,
+ _lp_space_count_ => $i++,
+ };
}
- sub new_lp_indentation_item {
+ sub initialize_lp_vars {
- # this is an interface to the IndentationItem class
- my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
-
- # A negative level implies not to store the item in the item_list
- my $index = 0;
- if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
+ # initialize gnu variables for a new file;
+ # must be called once at the start of a new file.
- my $starting_index_K = 0;
- if ( defined($line_start_index_to_go)
- && $line_start_index_to_go >= 0
- && $line_start_index_to_go <= $max_index_to_go )
- {
- $starting_index_K = $K_to_go[$line_start_index_to_go];
- }
-
- my $item = Perl::Tidy::IndentationItem->new(
- spaces => $spaces,
- level => $level,
- ci_level => $ci_level,
- available_spaces => $available_spaces,
- index => $index,
- gnu_sequence_number => $gnu_sequence_number,
- align_paren => $align_paren,
- stack_depth => $max_gnu_stack_index,
- starting_index_K => $starting_index_K,
- );
+ $lp_position_predictor = 0;
+ $max_lp_stack = 0;
+ $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
- if ( $level >= 0 ) {
- $gnu_item_list[$max_gnu_item_index] = $item;
+ # we can turn off -lp if all levels will be at or above the cutoff
+ if ( $lp_cutoff_level <= 1 ) {
+ $rOpts_line_up_parentheses = 0;
+ $rOpts_extended_line_up_parentheses = 0;
}
- return $item;
+ $rLP = [];
+
+ # initialize the leading whitespace stack to negative levels
+ # so that we can never run off the end of the stack
+ $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
+ $rLP->[$max_lp_stack]->[_lp_level_] = -1;
+ $rLP->[$max_lp_stack]->[_lp_object_] = undef;
+ $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
+ $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
+
+ return;
}
- sub set_leading_whitespace {
+ # hashes for efficient testing
+ my %hash_test1;
+ my %hash_test2;
+ my %hash_test3;
- # This routine defines leading whitespace for the case of -lp formatting
- # given: the level and continuation_level of a token,
- # define: space count of leading string which would apply if it
- # were the first token of a new line.
+ BEGIN {
+ my @q = qw< } ) ] >;
+ @hash_test1{@q} = (1) x scalar(@q);
+ @q = qw(: ? f);
+ push @q, ',';
+ @hash_test2{@q} = (1) x scalar(@q);
+ @q = qw( . || && );
+ @hash_test3{@q} = (1) x scalar(@q);
+ }
- my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
- $level_abs, $ci_level, $in_continued_quote )
- = @_;
+ sub set_lp_indentation {
+
+ #------------------------------------------------------------------
+ # Define the leading whitespace for all tokens in the current batch
+ # when the -lp formatting is selected.
+ #------------------------------------------------------------------
+
+ my ($self) = @_;
return unless ($rOpts_line_up_parentheses);
return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
+ # List of -lp indentation objects created in this batch
+ my $rlp_object_list = [];
+ my $max_lp_object_list = UNDEFINED_INDEX;
+
+ my %last_lp_equals;
+ my %lp_comma_count;
+ my %lp_arrow_count;
+ my $ii_begin_line = 0;
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- my $rLL = $self->[_rLL_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
+ my $K_opening_container = $self->[_K_opening_container_]; ##TESTING
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
my $rbreak_before_container_by_seqno =
$self->[_rbreak_before_container_by_seqno_];
+ my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+
+ my $nws = @{$radjusted_levels};
+ my $imin = 0;
+
+ # The 'starting_in_quote' flag means that the first token is the first
+ # token of a line and it is also the continuation of some kind of
+ # multi-line quote or pattern. It must have no added leading
+ # whitespace, so we can skip it.
+ if ($starting_in_quote) {
+ $imin += 1;
+ }
- # find needed previous nonblank tokens
- my $last_nonblank_token = '';
- my $last_nonblank_type = '';
- my $last_nonblank_block_type = '';
+ my $K_last_nonblank;
+ my $Kpnb = $K_to_go[0] - 1;
+ if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
+ $Kpnb -= 1;
+ }
+ if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
+ $K_last_nonblank = $Kpnb;
+ }
- # and previous nonblank tokens, just in this batch:
- my $last_nonblank_token_in_batch = '';
- my $last_nonblank_type_in_batch = '';
- my $last_last_nonblank_type_in_batch = '';
+ my $last_nonblank_token = '';
+ my $last_nonblank_type = '';
+ my $last_last_nonblank_type = '';
if ( defined($K_last_nonblank) ) {
$last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
$last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
- $last_nonblank_block_type =
- $rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
-
- if ( $K_last_nonblank >= $K_to_go[0] ) {
- $last_nonblank_token_in_batch = $last_nonblank_token;
- $last_nonblank_type_in_batch = $last_nonblank_type;
- if ( defined($K_last_last_nonblank)
- && $K_last_last_nonblank > $K_to_go[0] )
- {
- $last_last_nonblank_type_in_batch =
- $rLL->[$K_last_last_nonblank]->[_TYPE_];
- }
- }
}
- ################################################################
+ my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
+ my $stack_changed = 1;
- # Adjust levels if necessary to recycle whitespace:
- my $level = $level_abs;
- my $radjusted_levels = $self->[_radjusted_levels_];
- my $nK = @{$rLL};
- my $nws = @{$radjusted_levels};
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
- $level = $radjusted_levels->[$Kj];
- if ( $level < 0 ) { $level = 0 } # note: this should not happen
- }
+ #-----------------------------------
+ # Loop over all tokens in this batch
+ #-----------------------------------
+ foreach my $ii ( $imin .. $max_index_to_go ) {
- # The continued_quote flag means that this is the first token of a
- # line, and it is the continuation of some kind of multi-line quote
- # or pattern. It requires special treatment because it must have no
- # added leading whitespace. So we create a special indentation item
- # which is not in the stack.
- if ($in_continued_quote) {
- my $space_count = 0;
- my $available_space = 0;
- $level = -1; # flag to prevent storing in item_list
- $leading_spaces_to_go[$max_index_to_go] =
- $reduced_spaces_to_go[$max_index_to_go] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, 0 );
- return;
- }
+ my $KK = $K_to_go[$ii];
+ my $type = $types_to_go[$ii];
+ my $token = $tokens_to_go[$ii];
+ my $level = $levels_to_go[$ii];
+ my $ci_level = $ci_levels_to_go[$ii];
+ my $total_depth = $nesting_depth_to_go[$ii];
+
+ #--------------------------------------------------
+ # Adjust levels if necessary to recycle whitespace:
+ #--------------------------------------------------
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
+ {
+ $level = $radjusted_levels->[$KK];
+ if ( $level < 0 ) { $level = 0 } # note: this should not happen
+ }
- # get the top state from the stack
- my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
- my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
- my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+ # get the top state from the stack if it has changed
+ if ($stack_changed) {
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $lp_object = $rLP_top->[_lp_object_];
+ if ($lp_object) {
+ ( $space_count, $current_level, $current_ci_level ) =
+ @{ $lp_object->get_spaces_level_ci() };
+ }
+ else {
+ $current_ci_level = $rLP_top->[_lp_ci_level_];
+ $current_level = $rLP_top->[_lp_level_];
+ $space_count = $rLP_top->[_lp_space_count_];
+ }
+ $stack_changed = 0;
+ }
- my $type = $types_to_go[$max_index_to_go];
- my $token = $tokens_to_go[$max_index_to_go];
- my $total_depth = $nesting_depth_to_go[$max_index_to_go];
+ #------------------------------
+ # update the position predictor
+ #------------------------------
+ if ( $type eq '{' || $type eq '(' ) {
- if ( $type eq '{' || $type eq '(' ) {
+ $lp_comma_count{ $total_depth + 1 } = 0;
+ $lp_arrow_count{ $total_depth + 1 } = 0;
- $gnu_comma_count{ $total_depth + 1 } = 0;
- $gnu_arrow_count{ $total_depth + 1 } = 0;
+ # If we come to an opening token after an '=' token of some
+ # type, see if it would be helpful to 'break' after the '=' to
+ # save space
+ my $last_equals = $last_lp_equals{$total_depth};
+ if ( $last_equals && $last_equals > $ii_begin_line ) {
- # If we come to an opening token after an '=' token of some type,
- # see if it would be helpful to 'break' after the '=' to save space
- my $last_equals = $last_gnu_equals{$total_depth};
- if ( $last_equals && $last_equals > $line_start_index_to_go ) {
+ my $seqno = $type_sequence_to_go[$ii];
- my $seqno = $type_sequence_to_go[$max_index_to_go];
+ # find the position if we break at the '='
+ my $i_test = $last_equals;
- # find the position if we break at the '='
- my $i_test = $last_equals;
- if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+ # Fix for issue b1229, check for break before
+ if ( $want_break_before{ $types_to_go[$i_test] } ) {
+ if ( $i_test > 0 ) { $i_test-- }
+ }
+ elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
- # TESTING
- ##my $too_close = ($i_test==$max_index_to_go-1);
+ # TESTING
+ ##my $too_close = ($i_test==$ii-1);
- my $test_position =
- total_line_length( $i_test, $max_index_to_go );
- my $mll =
- $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
+ my $test_position = total_line_length( $i_test, $ii );
+ my $mll =
+ $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
- my $bbc_flag = $break_before_container_types{$token};
+ #------------------------------------------------------
+ # Break if structure will reach the maximum line length
+ #------------------------------------------------------
- if (
+ # Historically, -lp just used one-half line length here
+ my $len_increase = $rOpts_maximum_line_length / 2;
+
+ # For -xlp, we can also use the pre-computed lengths
+ my $min_len = $rcollapsed_length_by_seqno->{$seqno};
+ if ( $min_len && $min_len > $len_increase ) {
+ $len_increase = $min_len;
+ }
- # the equals is not just before an open paren (testing)
- ##!$too_close &&
+ if (
- # if we are beyond the midpoint
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length / 2
+ # the equals is not just before an open paren (testing)
+ ##!$too_close &&
- # if a -bbx flag WANTS a break before this opening token
- || ( $seqno && $rbreak_before_container_by_seqno->{$seqno} )
+ # if we might exceed the maximum line length
+ $lp_position_predictor + $len_increase > $mll
- # or if we MIGHT want a break (fixes case b826 b909 b989)
- || ( $bbc_flag && $bbc_flag >= 2 )
+ # if a -bbx flag WANTS a break before this opening token
+ || ( $seqno
+ && $rbreak_before_container_by_seqno->{$seqno} )
- # or we are beyond the 1/4 point and there was an old
- # break at an assignment (not '=>') [fix for b1035]
- || (
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length * 3 / 4
- && $types_to_go[$last_equals] ne '=>'
- && (
- $old_breakpoint_to_go[$last_equals]
- || ( $last_equals > 0
- && $old_breakpoint_to_go[ $last_equals - 1 ] )
- || ( $last_equals > 1
- && $types_to_go[ $last_equals - 1 ] eq 'b'
- && $old_breakpoint_to_go[ $last_equals - 2 ] )
+ # or we are beyond the 1/4 point and there was an old
+ # break at an assignment (not '=>') [fix for b1035]
+ || (
+ $lp_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
+ && $types_to_go[$last_equals] ne '=>'
+ && (
+ $old_breakpoint_to_go[$last_equals]
+ || ( $last_equals > 0
+ && $old_breakpoint_to_go[ $last_equals - 1 ]
+ )
+ || ( $last_equals > 1
+ && $types_to_go[ $last_equals - 1 ] eq 'b'
+ && $old_breakpoint_to_go[ $last_equals - 2 ]
+ )
+ )
)
- )
- )
- {
+ )
+ {
- # then make the switch -- note that we do not set a real
- # breakpoint here because we may not really need one; sub
- # scan_list will do that if necessary
- $line_start_index_to_go = $i_test + 1;
- $gnu_position_predictor = $test_position;
- }
- }
- }
+ # then make the switch -- note that we do not set a
+ # real breakpoint here because we may not really need
+ # one; sub break_lists will do that if necessary.
- my $halfway =
- $maximum_line_length_at_level[$level] -
- $rOpts_maximum_line_length / 2;
+ my $Kc = $K_closing_container->{$seqno};
+ if (
- # Check for decreasing depth ..
- # Note that one token may have both decreasing and then increasing
- # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
- # in this example we would first go back to (1,0) then up to (2,0)
- # in a single call.
- if ( $level < $current_level || $ci_level < $current_ci_level ) {
+ # For -lp, only if the closing token is in this
+ # batch (c117). Otherwise it cannot be done by sub
+ # break_lists.
+ defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
- # loop to find the first entry at or completely below this level
- my ( $lev, $ci_lev );
- while (1) {
- if ($max_gnu_stack_index) {
-
- # save index of token which closes this level
- $gnu_stack[$max_gnu_stack_index]
- ->set_closed($max_index_to_go);
-
- # Undo any extra indentation if we saw no commas
- my $available_spaces =
- $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
-
- my $comma_count = 0;
- my $arrow_count = 0;
- if ( $type eq '}' || $type eq ')' ) {
- $comma_count = $gnu_comma_count{$total_depth};
- $arrow_count = $gnu_arrow_count{$total_depth};
- $comma_count = 0 unless $comma_count;
- $arrow_count = 0 unless $arrow_count;
- }
- $gnu_stack[$max_gnu_stack_index]
- ->set_comma_count($comma_count);
- $gnu_stack[$max_gnu_stack_index]
- ->set_arrow_count($arrow_count);
-
- if ( $available_spaces > 0 ) {
-
- if ( $comma_count <= 0 || $arrow_count > 0 ) {
-
- my $i =
- $gnu_stack[$max_gnu_stack_index]->get_index();
- my $seqno =
- $gnu_stack[$max_gnu_stack_index]
- ->get_sequence_number();
-
- # Be sure this item was created in this batch. This
- # should be true because we delete any available
- # space from open items at the end of each batch.
- if ( $gnu_sequence_number != $seqno
- || $i > $max_gnu_item_index )
+ # For -xlp, we only need one nonblank token after
+ # the opening token.
+ || $rOpts_extended_line_up_parentheses
+ )
+ {
+ $ii_begin_line = $i_test + 1;
+ $lp_position_predictor = $test_position;
+
+ #--------------------------------------------------
+ # Fix for an opening container terminating a batch:
+ #--------------------------------------------------
+ # To get alignment of a -lp container with its
+ # contents, we have to put a break after $i_test.
+ # For $ii<$max_index_to_go, this will be done by
+ # sub break_lists based on the indentation object.
+ # But for $ii=$max_index_to_go, the indentation
+ # object for this seqno will not be created until
+ # the next batch, so we have to set a break at
+ # $i_test right now in order to get one.
+ if ( $ii == $max_index_to_go
+ && !$block_type_to_go[$ii]
+ && $type eq '{'
+ && $seqno
+ && !$ris_excluded_lp_container->{$seqno} )
{
- warning(
-"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
- );
- report_definite_bug();
+ $self->set_forced_lp_break( $ii_begin_line,
+ $ii );
+ }
+ }
+ }
+ }
+ } ## end update position predictor
+
+ #------------------------
+ # Handle decreasing depth
+ #------------------------
+ # Note that one token may have both decreasing and then increasing
+ # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
+ # in this example we would first go back to (1,0) then up to (2,0)
+ # in a single call.
+ if ( $level < $current_level || $ci_level < $current_ci_level ) {
+
+ # loop to find the first entry at or completely below this level
+ my ( $lev, $ci_lev );
+ while (1) {
+ if ($max_lp_stack) {
+
+ # save index of token which closes this level
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ my $lp_object =
+ $rLP->[$max_lp_stack]->[_lp_object_];
+
+ $lp_object->set_closed($ii);
+
+ my $comma_count = 0;
+ my $arrow_count = 0;
+ if ( $type eq '}' || $type eq ')' ) {
+ $comma_count = $lp_comma_count{$total_depth};
+ $arrow_count = $lp_arrow_count{$total_depth};
+ $comma_count = 0 unless $comma_count;
+ $arrow_count = 0 unless $arrow_count;
}
- else {
- if ( $arrow_count == 0 ) {
- $gnu_item_list[$i]
- ->permanently_decrease_available_spaces(
- $available_spaces);
+ $lp_object->set_comma_count($comma_count);
+ $lp_object->set_arrow_count($arrow_count);
+
+ # Undo any extra indentation if we saw no commas
+ my $available_spaces =
+ $lp_object->get_available_spaces();
+ my $K_start = $lp_object->get_K_begin_line();
+
+ if ( $available_spaces > 0
+ && $K_start >= $K_to_go[0]
+ && ( $comma_count <= 0 || $arrow_count > 0 ) )
+ {
+
+ my $i = $lp_object->get_lp_item_index();
+
+ # Safety check for a valid stack index. It
+ # should be ok because we just checked that the
+ # index K of the token associated with this
+ # indentation is in this batch.
+ if ( $i < 0 || $i > $max_lp_object_list ) {
+ if (DEVEL_MODE) {
+ my $lno = $rLL->[$KK]->[_LINE_INDEX_];
+ Fault(<<EOM);
+Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
+EOM
+ }
}
else {
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces(
- $available_spaces);
- }
- foreach my $j ( $i + 1 .. $max_gnu_item_index )
- {
- $gnu_item_list[$j]
- ->decrease_SPACES($available_spaces);
+ if ( $arrow_count == 0 ) {
+ $rlp_object_list->[$i]
+ ->permanently_decrease_available_spaces
+ ($available_spaces);
+ }
+ else {
+ $rlp_object_list->[$i]
+ ->tentatively_decrease_available_spaces
+ ($available_spaces);
+ }
+ foreach
+ my $j ( $i + 1 .. $max_lp_object_list )
+ {
+ $rlp_object_list->[$j]
+ ->decrease_SPACES($available_spaces);
+ }
}
}
}
+
+ # go down one level
+ --$max_lp_stack;
+
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $ci_lev = $rLP_top->[_lp_ci_level_];
+ my $lev = $rLP_top->[_lp_level_];
+ my $spaces = $rLP_top->[_lp_space_count_];
+ if ( $rLP_top->[_lp_object_] ) {
+ my $lp_obj = $rLP_top->[_lp_object_];
+ ( $spaces, $lev, $ci_lev ) =
+ @{ $lp_obj->get_spaces_level_ci() };
+ }
+
+ # stop when we reach a level at or below the current
+ # level
+ if ( $lev <= $level && $ci_lev <= $ci_level ) {
+ $space_count = $spaces;
+ $current_level = $lev;
+ $current_ci_level = $ci_lev;
+ last;
+ }
}
- # go down one level
- --$max_gnu_stack_index;
- $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
- $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
-
- # stop when we reach a level at or below the current level
- if ( $lev <= $level && $ci_lev <= $ci_level ) {
- $space_count =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
- $current_level = $lev;
- $current_ci_level = $ci_lev;
+ # reached bottom of stack .. should never happen because
+ # only negative levels can get here, and $level was forced
+ # to be positive above.
+ else {
+
+ # non-fatal, keep going except in DEVEL_MODE
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
+EOM
+ }
last;
}
}
+ } ## end decreasing depth
- # reached bottom of stack .. should never happen because
- # only negative levels can get here, and $level was forced
- # to be positive above.
- else {
- warning(
-"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
- );
- report_definite_bug();
- last;
- }
- }
- }
+ #------------------------
+ # handle increasing depth
+ #------------------------
+ if ( $level > $current_level || $ci_level > $current_ci_level ) {
- # handle increasing depth
- if ( $level > $current_level || $ci_level > $current_ci_level ) {
+ $stack_changed = 1;
- # Compute the standard incremental whitespace. This will be
- # the minimum incremental whitespace that will be used. This
- # choice results in a smooth transition between the gnu-style
- # and the standard style.
- my $standard_increment =
- ( $level - $current_level ) *
- $rOpts_indent_columns +
- ( $ci_level - $current_ci_level ) *
- $rOpts_continuation_indentation;
+ # Compute the standard incremental whitespace. This will be
+ # the minimum incremental whitespace that will be used. This
+ # choice results in a smooth transition between the gnu-style
+ # and the standard style.
+ my $standard_increment =
+ ( $level - $current_level ) *
+ $rOpts_indent_columns +
+ ( $ci_level - $current_ci_level ) *
+ $rOpts_continuation_indentation;
- # Now we have to define how much extra incremental space
- # ("$available_space") we want. This extra space will be
- # reduced as necessary when long lines are encountered or when
- # it becomes clear that we do not have a good list.
- my $available_space = 0;
- my $align_paren = 0;
- my $excess = 0;
+ # Now we have to define how much extra incremental space
+ # ("$available_space") we want. This extra space will be
+ # reduced as necessary when long lines are encountered or when
+ # it becomes clear that we do not have a good list.
+ my $available_spaces = 0;
+ my $align_seqno = 0;
+ my $excess = 0;
- my $last_nonblank_seqno;
- if ( defined($K_last_nonblank) ) {
- $last_nonblank_seqno =
- $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
- }
+ my $last_nonblank_seqno;
+ my $last_nonblank_block_type;
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_seqno =
+ $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
+ $last_nonblank_block_type =
+ $last_nonblank_seqno
+ ? $rblock_type_of_seqno->{$last_nonblank_seqno}
+ : undef;
+ }
- # initialization on empty stack..
- if ( $max_gnu_stack_index == 0 ) {
- $space_count = $level * $rOpts_indent_columns;
- }
+ $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
- # if this is a BLOCK, add the standard increment
- elsif ($last_nonblank_block_type) {
- $space_count += $standard_increment;
- }
+ #-----------------------------------------------
+ # Initialize indentation spaces on empty stack..
+ #-----------------------------------------------
+ if ( $max_lp_stack == 0 ) {
+ $space_count = $level * $rOpts_indent_columns;
+ }
- # add the standard increment for containers excluded by user rules
- # or which contain here-docs or multiline qw text
- elsif ( defined($last_nonblank_seqno)
- && $ris_excluded_lp_container->{$last_nonblank_seqno} )
- {
- $space_count += $standard_increment;
- }
+ #----------------------------------------
+ # Add the standard space increment if ...
+ #----------------------------------------
+ elsif (
- # if last nonblank token was not structural indentation,
- # just use standard increment
- elsif ( $last_nonblank_type ne '{' ) {
- $space_count += $standard_increment;
- }
+ # if this is a BLOCK, add the standard increment
+ $last_nonblank_block_type
- # otherwise use the space to the first non-blank level change token
- else {
+ # or if this is not a sequenced item
+ || !$last_nonblank_seqno
- $space_count = $gnu_position_predictor;
+ # or this continer is excluded by user rules
+ # or contains here-docs or multiline qw text
+ || defined($last_nonblank_seqno)
+ && $ris_excluded_lp_container->{$last_nonblank_seqno}
- my $min_gnu_indentation =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ # or if last nonblank token was not structural indentation
+ || $last_nonblank_type ne '{'
- $available_space = $space_count - $min_gnu_indentation;
- if ( $available_space >= $standard_increment ) {
- $min_gnu_indentation += $standard_increment;
- }
- elsif ( $available_space > 1 ) {
- $min_gnu_indentation += $available_space + 1;
+ # and do not start -lp under stress .. fixes b1244, b1255
+ || !$in_lp_mode && $level >= $lp_cutoff_level
+
+ )
+ {
+
+ # If we have entered lp mode, use the top lp object to get
+ # the current indentation spaces because it may have
+ # changed. Fixes b1285, b1286.
+ if ($in_lp_mode) {
+ $space_count = $in_lp_mode->get_spaces();
+ }
+ $space_count += $standard_increment;
}
- elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
- if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
- $min_gnu_indentation += 2;
+
+ #---------------------------------------------------------------
+ # -lp mode: try to use space to the first non-blank level change
+ #---------------------------------------------------------------
+ else {
+
+ # see how much space we have available
+ my $test_space_count = $lp_position_predictor;
+ my $excess = 0;
+ my $min_len =
+ $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
+ my $next_opening_too_far;
+
+ if ( defined($min_len) ) {
+ $excess =
+ $test_space_count +
+ $min_len -
+ $maximum_line_length_at_level[$level];
+ if ( $excess > 0 ) {
+ $test_space_count -= $excess;
+
+ # will the next opening token be a long way out?
+ $next_opening_too_far =
+ $lp_position_predictor + $excess >
+ $maximum_line_length_at_level[$level];
+ }
+ }
+
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
+ if ( $rLP_top->[_lp_object_] ) {
+ $min_gnu_indentation =
+ $rLP_top->[_lp_object_]->get_spaces();
}
+ $available_spaces =
+ $test_space_count - $min_gnu_indentation;
+
+ # Do not startup -lp indentation mode if no space ...
+ # ... or if it puts the opening far to the right
+ if ( !$in_lp_mode
+ && ( $available_spaces <= 0 || $next_opening_too_far ) )
+ {
+ $space_count += $standard_increment;
+ $available_spaces = 0;
+ }
+
+ # Use -lp mode
else {
- $min_gnu_indentation += 1;
+ $space_count = $test_space_count;
+
+ $in_lp_mode = 1;
+ if ( $available_spaces >= $standard_increment ) {
+ $min_gnu_indentation += $standard_increment;
+ }
+ elsif ( $available_spaces > 1 ) {
+ $min_gnu_indentation += $available_spaces + 1;
+ }
+ elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+ if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
+ $min_gnu_indentation += 2;
+ }
+ else {
+ $min_gnu_indentation += 1;
+ }
+ }
+ else {
+ $min_gnu_indentation += $standard_increment;
+ }
+ $available_spaces = $space_count - $min_gnu_indentation;
+
+ if ( $available_spaces < 0 ) {
+ $space_count = $min_gnu_indentation;
+ $available_spaces = 0;
+ }
+ $align_seqno = $last_nonblank_seqno;
}
}
- else {
- $min_gnu_indentation += $standard_increment;
- }
- $available_space = $space_count - $min_gnu_indentation;
- if ( $available_space < 0 ) {
- $space_count = $min_gnu_indentation;
- $available_space = 0;
+ #-------------------------------------------
+ # update the state, but not on a blank token
+ #-------------------------------------------
+ if ( $type ne 'b' ) {
+
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
+ $in_lp_mode = 1;
+ }
+
+ #----------------------------------------
+ # Create indentation object if in lp-mode
+ #----------------------------------------
+ ++$max_lp_stack;
+ my $lp_object;
+ if ($in_lp_mode) {
+
+ # A negative level implies not to store the item in the
+ # item_list
+ my $lp_item_index = 0;
+ if ( $level >= 0 ) {
+ $lp_item_index = ++$max_lp_object_list;
+ }
+
+ my $K_begin_line = 0;
+ if ( $ii_begin_line >= 0
+ && $ii_begin_line <= $max_index_to_go )
+ {
+ $K_begin_line = $K_to_go[$ii_begin_line];
+ }
+
+ # Minor Fix: when creating indentation at a side
+ # comment we don't know what the space to the actual
+ # next code token will be. We will allow a space for
+ # sub correct_lp to move it in if necessary.
+ if ( $type eq '#'
+ && $max_index_to_go > 0
+ && $align_seqno )
+ {
+ $available_spaces += 1;
+ }
+
+ $lp_object = Perl::Tidy::IndentationItem->new(
+ spaces => $space_count,
+ level => $level,
+ ci_level => $ci_level,
+ available_spaces => $available_spaces,
+ lp_item_index => $lp_item_index,
+ align_seqno => $align_seqno,
+ stack_depth => $max_lp_stack,
+ K_begin_line => $K_begin_line,
+ );
+
+ DEBUG_LP && do {
+ my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
+ print STDERR <<EOM;
+DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
+EOM
+ };
+
+ if ( $level >= 0 ) {
+ $rlp_object_list->[$max_lp_object_list] =
+ $lp_object;
+ }
+
+ if ( $last_nonblank_token =~ /^[\{\[\(]$/
+ && $last_nonblank_seqno )
+ {
+ $rlp_object_by_seqno->{$last_nonblank_seqno} =
+ $lp_object;
+ }
+ }
+
+ #------------------------------------
+ # Store this indentation on the stack
+ #------------------------------------
+ $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
+ $rLP->[$max_lp_stack]->[_lp_level_] = $level;
+ $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
+ $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
+ $last_nonblank_seqno;
+ $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
+
+ # If the opening paren is beyond the half-line length, then
+ # we will use the minimum (standard) indentation. This will
+ # help avoid problems associated with running out of space
+ # near the end of a line. As a result, in deeply nested
+ # lists, there will be some indentations which are limited
+ # to this minimum standard indentation. But the most deeply
+ # nested container will still probably be able to shift its
+ # parameters to the right for proper alignment, so in most
+ # cases this will not be noticeable.
+ if ( $available_spaces > 0 && $lp_object ) {
+ my $halfway =
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2;
+ $lp_object->tentatively_decrease_available_spaces(
+ $available_spaces)
+ if ( $space_count > $halfway );
+ }
}
- $align_paren = 1;
- }
+ } ## end increasing depth
- # update state, but not on a blank token
- if ( $types_to_go[$max_index_to_go] ne 'b' ) {
+ #------------------
+ # Handle all tokens
+ #------------------
+ if ( $type ne 'b' ) {
- $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
+ # Count commas and look for non-list characters. Once we see a
+ # non-list character, we give up and don't look for any more
+ # commas.
+ if ( $type eq '=>' ) {
+ $lp_arrow_count{$total_depth}++;
- ++$max_gnu_stack_index;
- $gnu_stack[$max_gnu_stack_index] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, $align_paren );
+ # remember '=>' like '=' for estimating breaks (but see
+ # above note for b1035)
+ $last_lp_equals{$total_depth} = $ii;
+ }
- # If the opening paren is beyond the half-line length, then
- # we will use the minimum (standard) indentation. This will
- # help avoid problems associated with running out of space
- # near the end of a line. As a result, in deeply nested
- # lists, there will be some indentations which are limited
- # to this minimum standard indentation. But the most deeply
- # nested container will still probably be able to shift its
- # parameters to the right for proper alignment, so in most
- # cases this will not be noticeable.
- if ( $available_space > 0 && $space_count > $halfway ) {
- $gnu_stack[$max_gnu_stack_index]
- ->tentatively_decrease_available_spaces($available_space);
+ elsif ( $type eq ',' ) {
+ $lp_comma_count{$total_depth}++;
}
- }
- }
- # Count commas and look for non-list characters. Once we see a
- # non-list character, we give up and don't look for any more commas.
- if ( $type eq '=>' ) {
- $gnu_arrow_count{$total_depth}++;
+ elsif ( $is_assignment{$type} ) {
+ $last_lp_equals{$total_depth} = $ii;
+ }
- # remember '=>' like '=' for estimating breaks (but see above note
- # for b1035)
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ # this token might start a new line if ..
+ if (
- elsif ( $type eq ',' ) {
- $gnu_comma_count{$total_depth}++;
- }
+ # this is the first nonblank token of the line
+ $ii == 1 && $types_to_go[0] eq 'b'
- elsif ( $is_assignment{$type} ) {
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ # or previous character was one of these:
+ # /^([\:\?\,f])$/
+ || $hash_test2{$last_nonblank_type}
- # this token might start a new line
- # if this is a non-blank..
- if ( $type ne 'b' ) {
+ # or previous character was opening and this is not closing
+ || ( $last_nonblank_type eq '{' && $type ne '}' )
+ || ( $last_nonblank_type eq '(' and $type ne ')' )
- # and if ..
- if (
+ # or this token is one of these:
+ # /^([\.]|\|\||\&\&)$/
+ || $hash_test3{$type}
+
+ # or this is a closing structure
+ || ( $last_nonblank_type eq '}'
+ && $last_nonblank_token eq $last_nonblank_type )
+
+ # or previous token was keyword 'return'
+ || (
+ $last_nonblank_type eq 'k'
+ && ( $last_nonblank_token eq 'return'
+ && $type ne '{' )
+ )
- # this is the first nonblank token of the line
- $max_index_to_go == 1 && $types_to_go[0] eq 'b'
+ # or starting a new line at certain keywords is fine
+ || ( $type eq 'k'
+ && $is_if_unless_and_or_last_next_redo_return{$token} )
+
+ # or this is after an assignment after a closing structure
+ || (
+ $is_assignment{$last_nonblank_type}
+ && (
+ # /^[\}\)\]]$/
+ $hash_test1{$last_last_nonblank_type}
+
+ # and it is significantly to the right
+ || $lp_position_predictor > (
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2
+ )
+ )
+ )
+ )
+ {
+ check_for_long_gnu_style_lines( $ii, $rlp_object_list );
+ $ii_begin_line = $ii;
- # or previous character was one of these:
- || $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
+ # back up 1 token if we want to break before that type
+ # otherwise, we may strand tokens like '?' or ':' on a line
+ if ( $ii_begin_line > 0 ) {
+ if ( $last_nonblank_type eq 'k' ) {
- # or previous character was opening and this does not close it
- || ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
- || ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
+ if ( $want_break_before{$last_nonblank_token} ) {
+ $ii_begin_line--;
+ }
+ }
+ elsif ( $want_break_before{$last_nonblank_type} ) {
+ $ii_begin_line--;
+ }
+ }
+ } ## end if ( $ii == 1 && $types_to_go...)
- # or this token is one of these:
- || $type =~ /^([\.]|\|\||\&\&)$/
+ $K_last_nonblank = $KK;
- # or this is a closing structure
- || ( $last_nonblank_type_in_batch eq '}'
- && $last_nonblank_token_in_batch eq
- $last_nonblank_type_in_batch )
+ $last_last_nonblank_type = $last_nonblank_type;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
- # or previous token was keyword 'return'
- || (
- $last_nonblank_type_in_batch eq 'k'
- && ( $last_nonblank_token_in_batch eq 'return'
- && $type ne '{' )
- )
+ } ## end if ( $type ne 'b' )
- # or starting a new line at certain keywords is fine
- || ( $type eq 'k'
- && $is_if_unless_and_or_last_next_redo_return{$token} )
+ # remember the predicted position of this token on the output line
+ if ( $ii > $ii_begin_line ) {
- # or this is after an assignment after a closing structure
- || (
- $is_assignment{$last_nonblank_type_in_batch}
- && (
- $last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
+ ## NOTE: this is a critical loop - the following call has been
+ ## expanded for about 2x speedup:
+ ## $lp_position_predictor =
+ ## total_line_length( $ii_begin_line, $ii );
- # and it is significantly to the right
- || $gnu_position_predictor > $halfway
- )
- )
- )
- {
- check_for_long_gnu_style_lines($max_index_to_go);
- $line_start_index_to_go = $max_index_to_go;
+ my $indentation = $leading_spaces_to_go[$ii_begin_line];
+ if ( ref($indentation) ) {
+ $indentation = $indentation->get_spaces();
+ }
+ $lp_position_predictor =
+ $indentation +
+ $summed_lengths_to_go[ $ii + 1 ] -
+ $summed_lengths_to_go[$ii_begin_line];
+ }
+ else {
+ $lp_position_predictor =
+ $space_count + $token_lengths_to_go[$ii];
+ }
- # back up 1 token if we want to break before that type
- # otherwise, we may strand tokens like '?' or ':' on a line
- if ( $line_start_index_to_go > 0 ) {
- if ( $last_nonblank_type_in_batch eq 'k' ) {
+ # Store the indentation object for this token.
+ # This allows us to manipulate the leading whitespace
+ # (in case we have to reduce indentation to fit a line) without
+ # having to change any token values.
- if ( $want_break_before{$last_nonblank_token_in_batch} )
- {
- $line_start_index_to_go--;
- }
- }
- elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
- $line_start_index_to_go--;
- }
+ #---------------------------------------------------------------
+ # replace leading whitespace with indentation objects where used
+ #---------------------------------------------------------------
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
+ $leading_spaces_to_go[$ii] = $lp_object;
+ if ( $max_lp_stack > 0
+ && $ci_level
+ && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
+ {
+ $reduced_spaces_to_go[$ii] =
+ $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
+ }
+ else {
+ $reduced_spaces_to_go[$ii] = $lp_object;
}
}
- }
+ } ## end loop over all tokens in this batch
+
+ undo_incomplete_lp_indentation($rlp_object_list)
+ if ( !$rOpts_extended_line_up_parentheses );
- # remember the predicted position of this token on the output line
- if ( $max_index_to_go > $line_start_index_to_go ) {
- $gnu_position_predictor =
- total_line_length( $line_start_index_to_go, $max_index_to_go );
- }
- else {
- $gnu_position_predictor =
- $space_count + $token_lengths_to_go[$max_index_to_go];
- }
-
- # store the indentation object for this token
- # this allows us to manipulate the leading whitespace
- # (in case we have to reduce indentation to fit a line) without
- # having to change any token values
- $leading_spaces_to_go[$max_index_to_go] =
- $gnu_stack[$max_gnu_stack_index];
- $reduced_spaces_to_go[$max_index_to_go] =
- ( $max_gnu_stack_index > 0 && $ci_level )
- ? $gnu_stack[ $max_gnu_stack_index - 1 ]
- : $gnu_stack[$max_gnu_stack_index];
return;
}
# look at the current estimated maximum line length, and
# remove some whitespace if it exceeds the desired maximum
- my ($mx_index_to_go) = @_;
+ my ( $mx_index_to_go, $rlp_object_list ) = @_;
- # this is only for the '-lp' style
- return unless ($rOpts_line_up_parentheses);
+ my $max_lp_object_list = @{$rlp_object_list} - 1;
# nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ return if ( $max_lp_object_list < 0 );
# see if we have exceeded the maximum desired line length
# keep 2 extra free because they are needed in some cases
# (result of trial-and-error testing)
my $spaces_needed =
- $gnu_position_predictor -
+ $lp_position_predictor -
$maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
return if ( $spaces_needed <= 0 );
my $i;
# loop over all whitespace items created for the current batch
- for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
- my $item = $gnu_item_list[$i];
+ for ( $i = 0 ; $i <= $max_lp_object_list ; $i++ ) {
+ my $item = $rlp_object_list->[$i];
# item must still be open to be a candidate (otherwise it
# cannot influence the current token)
: $available_spaces;
# remove the incremental space from this item
- $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
+ $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
my $i_debug = $i;
# update the leading whitespace of this item and all items
# that came after it
- for ( ; $i <= $max_gnu_item_index ; $i++ ) {
+ for ( ; $i <= $max_lp_object_list ; $i++ ) {
- my $old_spaces = $gnu_item_list[$i]->get_spaces();
+ my $old_spaces = $rlp_object_list->[$i]->get_spaces();
if ( $old_spaces >= $deleted_spaces ) {
- $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
+ $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
}
# shouldn't happen except for code bug:
else {
- my $level = $gnu_item_list[$i_debug]->get_level();
- my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
- my $old_level = $gnu_item_list[$i]->get_level();
- my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
- warning(
-"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
- );
- report_definite_bug();
+ # non-fatal, keep going except in DEVEL_MODE
+ if (DEVEL_MODE) {
+ my $level = $rlp_object_list->[$i_debug]->get_level();
+ my $ci_level =
+ $rlp_object_list->[$i_debug]->get_ci_level();
+ my $old_level = $rlp_object_list->[$i]->get_level();
+ my $old_ci_level =
+ $rlp_object_list->[$i]->get_ci_level();
+ Fault(<<EOM);
+program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
+EOM
+ }
}
}
- $gnu_position_predictor -= $deleted_spaces;
- $spaces_needed -= $deleted_spaces;
+ $lp_position_predictor -= $deleted_spaces;
+ $spaces_needed -= $deleted_spaces;
last unless ( $spaces_needed > 0 );
}
return;
}
- sub finish_lp_batch {
+ sub undo_incomplete_lp_indentation {
+
+ #------------------------------------------------------------------
+ # Undo indentation for all incomplete -lp indentation levels of the
+ # current batch unless -xlp is set.
+ #------------------------------------------------------------------
# This routine is called once after each output stream batch is
- # finished to undo indentation for all incomplete -lp
- # indentation levels. It is too risky to leave a level open,
- # because then we can't backtrack in case of a long line to follow.
- # This means that comments and blank lines will disrupt this
- # indentation style. But the vertical aligner may be able to
- # get the space back if there are side comments.
-
- # this is only for the 'lp' style
- return unless ($rOpts_line_up_parentheses);
+ # finished to undo indentation for all incomplete -lp indentation
+ # levels. If this routine is called then comments and blank lines will
+ # disrupt this indentation style. In older versions of perltidy this
+ # was always done because it could cause problems otherwise, but recent
+ # improvements allow fairly good results to be obtained by skipping
+ # this step with the -xlp flag.
+ my ($rlp_object_list) = @_;
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ my $max_lp_object_list = @{$rlp_object_list} - 1;
+
+ # nothing to do if no stack items defined for this line
+ return if ( $max_lp_object_list < 0 );
# loop over all whitespace items created for the current batch
- foreach my $i ( 0 .. $max_gnu_item_index ) {
- my $item = $gnu_item_list[$i];
+ foreach my $i ( 0 .. $max_lp_object_list ) {
+ my $item = $rlp_object_list->[$i];
# only look for open items
next if ( $item->get_closed() >= 0 );
if ( $available_spaces > 0 ) {
# delete incremental space for this item
- $gnu_item_list[$i]
+ $rlp_object_list->[$i]
->tentatively_decrease_available_spaces($available_spaces);
# Reduce the total indentation space of any nodes that follow
# Note that any such nodes must necessarily be dependents
# of this node.
- foreach ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$_]->decrease_SPACES($available_spaces);
+ foreach ( $i + 1 .. $max_lp_object_list ) {
+ $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
}
}
}
return;
}
-} ## end closure set_leading_whitespace
+} ## end closure set_lp_indentation
+
+#----------------------------------------------------------------------
+# sub to set a requested break before an opening container in -lp mode.
+#----------------------------------------------------------------------
+sub set_forced_lp_break {
+
+ my ( $self, $i_begin_line, $i_opening ) = @_;
+
+ # Given:
+ # $i_begin_line = index of break in the _to_go arrays
+ # $i_opening = index of the opening container
+
+ # Set any requested break at a token before this opening container
+ # token. This is often an '=' or '=>' but can also be things like
+ # '.', ',', 'return'. It was defined by sub set_lp_indentation.
+
+ # Important:
+ # For intact containers, call this at the closing token.
+ # For broken containers, call this at the opening token.
+ # This will avoid needless breaks when it turns out that the
+ # container does not actually get broken. This isn't known until
+ # the closing container for intact blocks.
+
+ return
+ if ( $i_begin_line < 0
+ || $i_begin_line > $max_index_to_go );
+
+ # Handle request to put a break break immediately before this token.
+ # We may not want to do that since we are also breaking after it.
+ if ( $i_begin_line == $i_opening ) {
+
+ # The following rules should be reviewed. We may want to always
+ # allow the break. If we do not do the break, the indentation
+ # may be off.
+
+ # RULE: don't break before it unless it is welded to a qw.
+ # This works well, but we may want to relax this to allow
+ # breaks in additional cases.
+ return
+ if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
+ return unless ( $types_to_go[$max_index_to_go] eq 'q' );
+ }
+
+ # Only break for breakpoints at the same
+ # indentation level as the opening paren
+ my $test1 = $nesting_depth_to_go[$i_opening];
+ my $test2 = $nesting_depth_to_go[$i_begin_line];
+ return if ( $test2 != $test1 );
+
+ # Back up at a blank (fixes case b932)
+ my $ibr = $i_begin_line - 1;
+ if ( $ibr > 0
+ && $types_to_go[$ibr] eq 'b' )
+ {
+ $ibr--;
+ }
+ if ( $ibr >= 0 ) {
+ my $i_nonblank = $self->set_forced_breakpoint($ibr);
+
+ # Crude patch to prevent sub recombine_breakpoints from undoing
+ # this break, especially after an '='. It will leave old
+ # breakpoints alone. See c098/x045 for some examples.
+ if ( defined($i_nonblank) ) {
+ $old_breakpoint_to_go[$i_nonblank] = 1;
+ }
+ }
+ return;
+}
sub reduce_lp_indentation {
# reduce the leading whitespace at token $i if possible by $spaces_needed
# (a large value of $spaces_needed will remove all excess space)
- # NOTE: to be called from scan_list only for a sequence of tokens
+ # NOTE: to be called from break_lists only for a sequence of tokens
# contained between opening and closing parens/braces/brackets
my ( $self, $i, $spaces_wanted ) = @_;
# CODE SECTION 13: Preparing batches for vertical alignment
###########################################################
-sub send_lines_to_vertical_aligner {
+sub check_convey_batch_input {
+
+ # Check for valid input to sub convey_batch_to_vertical_aligner. An
+ # error here would most likely be due to an error in the calling
+ # routine 'sub grind_batch_of_CODE'.
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ if ( !defined($ri_first) || !defined($ri_last) ) {
+ Fault(<<EOM);
+Undefined line ranges ri_first and/r ri_last
+EOM
+ }
+
+ my $nmax = @{$ri_first} - 1;
+ my $nmax_check = @{$ri_last} - 1;
+ if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
+ Fault(<<EOM);
+Line range index error: nmax=$nmax but nmax_check=$nmax_check
+These should be equal and >=0
+EOM
+ }
+ my ( $ibeg, $iend );
+ foreach my $n ( 0 .. $nmax ) {
+ my $ibeg_m = $ibeg;
+ my $iend_m = $iend;
+ $ibeg = $ri_first->[$n];
+ $iend = $ri_last->[$n];
+ if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
+ Fault(<<EOM);
+Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
+These should have iend >= ibeg and be in the range (0..$max_index_to_go)
+EOM
+ }
+ next if ( $n == 0 );
+ if ( $ibeg <= $iend_m ) {
+ Fault(<<EOM);
+Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
+EOM
+ }
+ }
+ return;
+}
+
+sub convey_batch_to_vertical_aligner {
my ($self) = @_;
# logical constructions
my $this_batch = $self->[_this_batch_];
- my $rlines_K = $this_batch->[_rlines_K_];
- if ( !@{$rlines_K} ) {
+ my $ri_first = $this_batch->[_ri_first_];
+ my $ri_last = $this_batch->[_ri_last_];
- # This can't happen because sub grind_batch_of_CODE always receives
- # tokens which it turns into one or more lines. If we get here it means
- # that a programming error has caused those lines to be lost.
- Fault("Unexpected call with no lines");
- return;
- }
- my $n_last_line = @{$rlines_K} - 1;
+ $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
+
+ my $n_last_line = @{$ri_first} - 1;
my $do_not_pad = $this_batch->[_do_not_pad_];
my $peak_batch_size = $this_batch->[_peak_batch_size_];
my $starting_in_quote = $this_batch->[_starting_in_quote_];
my $ending_in_quote = $this_batch->[_ending_in_quote_];
my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
- my $ibeg0 = $this_batch->[_ibeg0_];
- my $rK_to_go = $this_batch->[_rK_to_go_];
- my $batch_count = $this_batch->[_batch_count_];
my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
+ my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
- my $rLL = $self->[_rLL_];
- my $Klimit = $self->[_Klimit_];
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
- my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
- my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
-
- # Construct indexes to the global_to_go arrays so that called routines can
- # still access those arrays. This might eventually be removed
- # when all called routines have been converted to access token values
- # in the rLL array instead.
- my $Kbeg0 = $Kbeg_next;
- my ( $ri_first, $ri_last );
- foreach my $rline ( @{$rlines_K} ) {
- my ( $Kbeg, $Kend ) = @{$rline};
- my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
- my $iend = $ibeg0 + $Kend - $Kbeg0;
- push @{$ri_first}, $ibeg;
- push @{$ri_last}, $iend;
- }
+ my $ibeg_next = $ri_first->[0];
+ my $iend_next = $ri_last->[0];
+
+ my $type_beg_next = $types_to_go[$ibeg_next];
+ my $type_end_next = $types_to_go[$iend_next];
+ my $token_beg_next = $tokens_to_go[$ibeg_next];
+
+ my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
+ my $rindentation_list = [0]; # ref to indentations for each line
my ( $cscw_block_comment, $closing_side_comment );
- if ( $rOpts->{'closing-side-comments'} ) {
+ if ($rOpts_closing_side_comments) {
( $closing_side_comment, $cscw_block_comment ) =
- $self->add_closing_side_comment();
+ $self->add_closing_side_comment( $ri_first, $ri_last );
}
- my $rindentation_list = [0]; # ref to indentations for each line
-
- # define the array @{$ralignment_type_to_go} for the output tokens
- # which will be non-blank for each special token (such as =>)
- # for which alignment is required.
- my $ralignment_type_to_go =
- $self->set_vertical_alignment_markers( $ri_first, $ri_last );
-
# flush before a long if statement to avoid unwanted alignment
if ( $n_last_line > 0
&& $type_beg_next eq 'k'
$self->flush_vertical_aligner();
}
- $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci );
+ $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
+ if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
$self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
$starting_in_quote )
- if ( $rOpts->{'logical-padding'} );
+ if ( $n_last_line > 0 && $rOpts_logical_padding );
- # Resum lengths. We need accurate lengths for making alignment patterns,
- # and we may have unmasked a semicolon which was not included at the start.
- for ( 0 .. $max_index_to_go ) {
- $summed_lengths_to_go[ $_ + 1 ] =
- $summed_lengths_to_go[$_] + $token_lengths_to_go[$_];
- }
+ if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
- # loop to prepare each line for shipment
- my ( $Kbeg, $type_beg, $token_beg );
- my ( $Kend, $type_end );
+ # ----------------------------------------------------------
+ # define the vertical alignments for all lines of this batch
+ # ----------------------------------------------------------
+ my $rline_alignments =
+ $self->make_vertical_alignments( $ri_first, $ri_last );
+
+ # ----------------------------------------------
+ # loop to send each line to the vertical aligner
+ # ----------------------------------------------
+ my ( $type_beg, $token_beg );
+ my ($type_end);
+ my ( $ibeg, $iend );
for my $n ( 0 .. $n_last_line ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- my $rline = $rlines_K->[$n];
- my $forced_breakpoint = $rline->[2];
+ # ----------------------------------------------------------------
+ # This hash will hold the args for vertical alignment of this line
+ # We will populate it as we go.
+ # ----------------------------------------------------------------
+ my $rvao_args = {};
- # we may need to look at variables on three consecutive lines ...
+ my $type_beg_last = $type_beg;
+ my $type_end_last = $type_end;
- # Some vars on line [n-1], if any:
- my $Kbeg_last = $Kbeg;
- my $type_beg_last = $type_beg;
- my $token_beg_last = $token_beg;
- my $Kend_last = $Kend;
- my $type_end_last = $type_end;
+ my $ibeg = $ibeg_next;
+ my $iend = $iend_next;
+ my $Kbeg = $K_to_go[$ibeg];
+ my $Kend = $K_to_go[$iend];
- # Some vars on line [n]:
- $Kbeg = $Kbeg_next;
$type_beg = $type_beg_next;
- $token_beg = $token_beg_next;
- $Kend = $Kend_next;
$type_end = $type_end_next;
+ $token_beg = $token_beg_next;
- # Only forward ending K values of non-comments down the pipeline.
- # This is equivalent to checking that the last CODE_type is blank or
- # equal to 'VER'. See also sub resync_lines_and_tokens for related
- # coding. Note that '$batch_CODE_type' is the code type of the line
- # to which the ending token belongs.
- my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
+ # ---------------------------------------------------
+ # Define the check value 'Kend' to send for this line
+ # ---------------------------------------------------
+ # The 'Kend' value is an integer for checking that lines come out of
+ # the far end of the pipeline in the right order. It increases
+ # linearly along the token stream. But we only send ending K values of
+ # non-comments down the pipeline. This is equivalent to checking that
+ # the last CODE_type is blank or equal to 'VER'. See also sub
+ # resync_lines_and_tokens for related coding. Note that
+ # '$batch_CODE_type' is the code type of the line to which the ending
+ # token belongs.
my $Kend_code =
$batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
- # We use two slightly different definitions of level jump at the end
- # of line:
- # $ljump is the level jump needed by 'sub set_adjusted_indentation'
- # $level_jump is the level jump needed by the vertical aligner.
- my $ljump = 0; # level jump at end of line
+ # $ljump is a level jump needed by 'sub final_indentation_adjustment'
+ my $ljump = 0;
# Get some vars on line [n+1], if any:
if ( $n < $n_last_line ) {
- ( $Kbeg_next, $Kend_next ) =
- @{ $rlines_K->[ $n + 1 ] };
- $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ $ibeg_next = $ri_first->[ $n + 1 ];
+ $iend_next = $ri_last->[ $n + 1 ];
+
+ $type_beg_next = $types_to_go[$ibeg_next];
+ $type_end_next = $types_to_go[$iend_next];
+ $token_beg_next = $tokens_to_go[$ibeg_next];
+
+ my $Kbeg_next = $K_to_go[$ibeg_next];
$ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
- else {
+ elsif ( !$is_block_comment && $Kend < $Klimit ) {
# Patch for git #51, a bare closing qw paren was not outdented
# if the flag '-nodelete-old-newlines is set
- my $Kbeg_next = $self->K_next_code($Kend);
- if ( defined($Kbeg_next) ) {
- $ljump =
- $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
+ # Note that we are just looking ahead for the next nonblank
+ # character. We could scan past an arbitrary number of block
+ # comments or hanging side comments by calling K_next_code, but it
+ # could add significant run time with very little to be gained.
+ my $Kbeg_next = $Kend + 1;
+ if ( $Kbeg_next < $Klimit
+ && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
+ {
+ $Kbeg_next += 1;
}
+ $ljump =
+ $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
- # level jump at end of line for the vertical aligner:
- my $level_jump =
- $Kend >= $Klimit
- ? 0
- : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
-
- $self->delete_needless_alignments( $ibeg, $iend,
- $ralignment_type_to_go );
+ # ---------------------------------------------
+ # get the vertical alignment info for this line
+ # ---------------------------------------------
+ # The lines are broken into fields which can be spaced by the vertical
+ # to achieve vertical alignment. These fields are the actual text
+ # which will be output, so from here on no more changes can be made to
+ # the text.
+ my $rline_alignment = $rline_alignments->[$n];
my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
- $self->make_alignment_patterns( $ibeg, $iend,
- $ralignment_type_to_go );
+ @{$rline_alignment};
+
+ # Programming check: (shouldn't happen)
+ # The number of tokens which separate the fields must always be
+ # one less than the number of fields. If this is not true then
+ # an error has been introduced in sub make_alignment_patterns.
+ if (DEVEL_MODE) {
+ if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
+ my $nt = @{$rtokens};
+ my $nf = @{$rfields};
+ my $msg = <<EOM;
+Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
+The number of tokens = $nt should be one less than number of fields: $nf
+EOM
+ Fault($msg);
+ }
+ }
+ # --------------------------------------
+ # get the final indentation of this line
+ # --------------------------------------
my ( $indentation, $lev, $level_end, $terminal_type,
$terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
- = $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
+ = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
$rpatterns, $ri_first, $ri_last,
$rindentation_list, $ljump, $starting_in_quote,
$is_static_block_comment, );
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
-
+ # --------------------------------
+ # define flag 'outdent_long_lines'
+ # --------------------------------
+ if (
+ # we will allow outdenting of long lines..
# which are long quotes, if allowed
- ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+ ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
# which are long block comments, if allowed
- || (
+ || (
$type_beg eq '#'
- && $rOpts->{'outdent-long-comments'}
+ && $rOpts_outdent_long_comments
# but not if this is a static block comment
&& !$is_static_block_comment
- )
- );
+ )
+ )
+ {
+ $rvao_args->{outdent_long_lines} = 1;
- my $break_alignment_before = $is_outdented_line || $do_not_pad;
- my $break_alignment_after = $is_outdented_line;
+ # convert -lp indentation objects to spaces to allow outdenting
+ if ( ref($indentation) ) {
+ $indentation = $indentation->get_spaces();
+ }
+ }
+
+ # --------------------------------------------------
+ # define flags 'break_alignment_before' and '_after'
+ # --------------------------------------------------
+
+ # These flags tell the vertical aligner to stop alignment before or
+ # after this line.
+ if ($is_outdented_line) {
+ $rvao_args->{break_alignment_before} = 1;
+ $rvao_args->{break_alignment_after} = 1;
+ }
+ elsif ($do_not_pad) {
+ $rvao_args->{break_alignment_before} = 1;
+ }
# flush at an 'if' which follows a line with (1) terminal semicolon
# or (2) terminal block_type which is not an 'if'. This prevents
# unwanted alignment between the lines.
- if ( $type_beg eq 'k' && $token_beg eq 'if' ) {
- my $Km = $self->K_previous_code($Kbeg);
- my $type_m = 'b';
- my $block_type_m = 'b';
- if ( defined($Km) ) {
- $type_m = $rLL->[$Km]->[_TYPE_];
- $block_type_m = $rLL->[$Km]->[_BLOCK_TYPE_];
+ elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
+ my $type_m = 'b';
+ my $block_type_m;
+
+ if ( $Kbeg > 0 ) {
+ my $Km = $Kbeg - 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ if ( $type_m eq 'b' && $Km > 0 ) {
+ $Km -= 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ }
+ if ( $type_m eq '#' && $Km > 0 ) {
+ $Km -= 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ if ( $type_m eq 'b' && $Km > 0 ) {
+ $Km -= 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ }
+ }
+
+ my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
+ if ($seqno_m) {
+ $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ }
}
# break after anything that is not if-like
- $break_alignment_before ||= $type_m eq ';'
- || ( $type_m eq '}'
- && $block_type_m ne 'if'
- && $block_type_m ne 'unless'
- && $block_type_m ne 'elsif'
- && $block_type_m ne 'else' );
+ if (
+ $type_m eq ';'
+ || ( $type_m eq '}'
+ && $block_type_m
+ && $block_type_m ne 'if'
+ && $block_type_m ne 'unless'
+ && $block_type_m ne 'elsif'
+ && $block_type_m ne 'else' )
+ )
+ {
+ $rvao_args->{break_alignment_before} = 1;
+ }
}
- my $rvertical_tightness_flags =
+ # ----------------------------------
+ # define 'rvertical_tightness_flags'
+ # ----------------------------------
+ # These flags tell the vertical aligner if/when to combine consecutive
+ # lines, based on the user input parameters.
+ $rvao_args->{rvertical_tightness_flags} =
$self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
+ $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
+ if ( !$is_block_comment );
- # Set a flag at the final ':' of a ternary chain to request
- # vertical alignment of the final term. Here is a
- # slightly complex example:
+ # ----------------------------------
+ # define 'is_terminal_ternary' flag
+ # ----------------------------------
+
+ # This flag is set at the final ':' of a ternary chain to request
+ # vertical alignment of the final term. Here is a slightly complex
+ # example:
#
# $self->{_text} = (
# !$section ? ''
# : ' elsewhere in this document'
# );
#
- my $is_terminal_ternary = 0;
-
if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
- my $last_leading_type = $n > 0 ? $type_beg_last : ':';
+
+ my $is_terminal_ternary = 0;
+ my $last_leading_type = $n > 0 ? $type_beg_last : ':';
if ( $terminal_type ne ';'
&& $n_last_line > $n
&& $level_end == $lev )
{
+ my $Kbeg_next = $K_to_go[$ibeg_next];
$level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
$terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
}
$KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
}
}
+ $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
}
- my $level_adj = $lev;
- my $radjusted_levels = $self->[_radjusted_levels_];
- if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
- $level_adj = $radjusted_levels->[$Kbeg];
- if ( $level_adj < 0 ) { $level_adj = 0 }
- }
-
+ # -------------------------------------------------
# add any new closing side comment to the last line
+ # -------------------------------------------------
if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
+
$rfields->[-1] .= " $closing_side_comment";
# NOTE: Patch for csc. We can just use 1 for the length of the csc
# because its length should not be a limiting factor from here on.
$rfield_lengths->[-1] += 2;
+
+ # repack
+ $rline_alignment =
+ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
}
- # Programming check: (shouldn't happen)
- # The number of tokens which separate the fields must always be
- # one less than the number of fields. If this is not true then
- # an error has been introduced in sub make_alignment_patterns.
- if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
- my $nt = @{$rtokens};
- my $nf = @{$rfields};
- my $msg = <<EOM;
-Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
-The number of tokens = $nt should be one less than number of fields: $nf
-EOM
- Fault($msg);
- }
-
- # Set flag which tells if this line is contained in a multi-line list
- my $list_seqno = $self->is_list_by_K($Kbeg);
-
- # send this new line down the pipe
- my $rvalign_hash = {};
- $rvalign_hash->{level} = $lev;
- $rvalign_hash->{level_end} = $level_end;
- $rvalign_hash->{level_adj} = $level_adj;
- $rvalign_hash->{indentation} = $indentation;
- $rvalign_hash->{list_seqno} = $list_seqno;
- $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
- $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
- $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
- $rvalign_hash->{level_jump} = $level_jump;
- $rvalign_hash->{rfields} = $rfields;
- $rvalign_hash->{rpatterns} = $rpatterns;
- $rvalign_hash->{rtokens} = $rtokens;
- $rvalign_hash->{rfield_lengths} = $rfield_lengths;
- $rvalign_hash->{terminal_block_type} = $terminal_block_type;
- $rvalign_hash->{batch_count} = $batch_count;
- $rvalign_hash->{break_alignment_before} = $break_alignment_before;
- $rvalign_hash->{break_alignment_after} = $break_alignment_after;
- $rvalign_hash->{Kend} = $Kend_code;
- $rvalign_hash->{ci_level} = $ci_levels_to_go[$ibeg];
+ # ------------------------
+ # define flag 'list_seqno'
+ # ------------------------
+
+ # This flag indicates if this line is contained in a multi-line list
+ if ( !$is_block_comment ) {
+ my $parent_seqno = $parent_seqno_to_go[$ibeg];
+ $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
+ }
+
+ # The alignment tokens have been marked with nesting_depths, so we need
+ # to pass nesting depths to the vertical aligner. They remain invariant
+ # under all formatting operations. Previously, level values were sent
+ # to the aligner. But they can be altered in welding and other
+ # opeartions, and this can lead to alignement errors.
+ my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
+ my $nesting_depth_end = $nesting_depth_to_go[$iend];
+
+ # A quirk in the definition of nesting depths is that the closing token
+ # has the same depth as internal tokens. The vertical aligner is
+ # programmed to expect them to have the lower depth, so we fix this.
+ if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
+ if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
+
+ # Adjust nesting depths to keep -lp indentation for qw lists. This is
+ # required because qw lists contained in brackets do not get nesting
+ # depths, but the vertical aligner is watching nesting depth changes to
+ # decide if a -lp block is intact. Without this patch, qw lists
+ # enclosed in angle brackets will not get the correct -lp indentation.
+
+ # Looking for line with isolated qw ...
+ if ( $rOpts_line_up_parentheses
+ && $type_beg eq 'q'
+ && $ibeg == $iend )
+ {
+
+ # ... which is part of a multiline qw
+ my $Km = $self->K_previous_nonblank($Kbeg);
+ my $Kp = $self->K_next_nonblank($Kbeg);
+ if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
+ || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
+ {
+ $nesting_depth_beg++;
+ $nesting_depth_end++;
+ }
+ }
+ # ---------------------------------
+ # define flag 'forget_side_comment'
+ # ---------------------------------
+
+ # This flag tells the vertical aligner to reset the side comment
+ # location if we are entering a new block from level 0. This is
+ # intended to keep side comments from drifting too far to the right.
+ if ( $terminal_block_type
+ && $nesting_depth_end > $nesting_depth_beg )
+ {
+ my $level_adj = $lev;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
+ {
+ $level_adj = $radjusted_levels->[$Kbeg];
+ if ( $level_adj < 0 ) { $level_adj = 0 }
+ }
+ if ( $level_adj == 0 ) {
+ $rvao_args->{forget_side_comment} = 1;
+ }
+ }
+
+ # -----------------------------------
+ # Store the remaining non-flag values
+ # -----------------------------------
+ $rvao_args->{Kend} = $Kend_code;
+ $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
+ $rvao_args->{indentation} = $indentation;
+ $rvao_args->{level_end} = $nesting_depth_end;
+ $rvao_args->{level} = $nesting_depth_beg;
+ $rvao_args->{rline_alignment} = $rline_alignment;
+ $rvao_args->{maximum_line_length} =
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
+
+ # --------------------------------------
+ # send this line to the vertical aligner
+ # --------------------------------------
my $vao = $self->[_vertical_aligner_object_];
- $vao->valign_input($rvalign_hash);
+ $vao->valign_input($rvao_args);
$do_not_pad = 0;
# and limit total to 10 character widths
&& token_sequence_length( $ibeg, $iend ) <= 10;
- } # end of loop to output each line
+ } ## end of loop to output each line
# remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+ # later use by sub final_indentation_adjustment
+ $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
+ if ( !$is_block_comment );
# output any new -cscw block comment
if ($cscw_block_comment) {
return;
}
+sub check_batch_summed_lengths {
+
+ my ( $self, $msg ) = @_;
+ $msg = "" unless defined($msg);
+ my $rLL = $self->[_rLL_];
+
+ # Verify that the summed lengths are correct. We want to be sure that
+ # errors have not been introduced by programming changes. Summed lengths
+ # are defined in sub $store_token. Operations like padding and unmasking
+ # semicolons can change token lengths, but those operations are expected to
+ # update the summed lengths when they make changes. So the summed lengths
+ # should always be correct.
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ my $len_by_sum =
+ $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
+ my $len_tok_i = $token_lengths_to_go[$i];
+ my $KK = $K_to_go[$i];
+ my $len_tok_K;
+ if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
+ if ( $len_by_sum != $len_tok_i
+ || defined($len_tok_K) && $len_by_sum != $len_tok_K )
+ {
+ my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
+ $KK = 'undef' unless defined($KK);
+ my $tok = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
+ Fault(<<EOM);
+Summed lengths are appear to be incorrect. $msg
+lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
+near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
+EOM
+ }
+ }
+ return;
+}
+
{ ## begin closure set_vertical_alignment_markers
my %is_vertical_alignment_type;
my %is_not_vertical_alignment_token;
# accept vertical alignment.
my ( $self, $ri_first, $ri_last ) = @_;
- my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
my $ralignment_type_to_go;
+ my $ralignment_counts = [];
+ my $ralignment_hash_by_line = [];
- # Initialize the alignment array. Note that closing side comments can
- # insert up to 2 additional tokens beyond the original
- # $max_index_to_go, so we need to check ri_last for the last index.
+ # NOTE: closing side comments can insert up to 2 additional tokens
+ # beyond the original $max_index_to_go, so we need to check ri_last for
+ # the last index.
my $max_line = @{$ri_first} - 1;
- my $iend = $ri_last->[$max_line];
- if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
+ my $max_i = $ri_last->[$max_line];
+ if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
- # nothing to do if we aren't allowed to change whitespace
- # or there is only 1 token
- if ( $iend == 0 || !$rOpts_add_whitespace ) {
- for my $i ( 0 .. $iend ) {
- $ralignment_type_to_go->[$i] = '';
- }
- return $ralignment_type_to_go;
+ # -----------------------------------------------------------------
+ # Shortcut:
+ # - no alignments if there is only 1 token.
+ # - and nothing to do if we aren't allowed to change whitespace.
+ # -----------------------------------------------------------------
+ if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
}
- # remember the index of last nonblank token before any sidecomment
- my $i_terminal = $max_index_to_go;
- if ( $types_to_go[$i_terminal] eq '#' ) {
- if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
- if ( $i_terminal > 0 ) { --$i_terminal }
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ my $ris_function_call_paren = $self->[_ris_function_call_paren_];
+ my $rLL = $self->[_rLL_];
+
+ # -------------------------------
+ # First handle any side comment.
+ # -------------------------------
+ my $i_terminal = $max_i;
+ if ( $types_to_go[$max_i] eq '#' ) {
+
+ # We know $max_i > 0 if we get here.
+ $i_terminal -= 1;
+ if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
+ $i_terminal -= 1;
+ }
+
+ my $token = $tokens_to_go[$max_i];
+ my $KK = $K_to_go[$max_i];
+
+ # Do not align various special side comments
+ my $do_not_align = (
+
+ # it is any specially marked side comment
+ ( defined($KK) && $rspecial_side_comment_type->{$KK} )
+
+ # or it is a static side comment
+ || ( $rOpts->{'static-side-comments'}
+ && $token =~ /$static_side_comment_pattern/ )
+
+ # or a closing side comment
+ || ( $types_to_go[$i_terminal] eq '}'
+ && $tokens_to_go[$i_terminal] eq '}'
+ && $token =~ /$closing_side_comment_prefix_pattern/ )
+ );
+
+ # - For the specific combination -vc -nvsc, we put all side comments
+ # at fixed locations. Note that we will lose hanging side comment
+ # alignments. Otherwise, hsc's can move to strange locations.
+ # - For -nvc -nvsc we make all side comments vertical alignments
+ # because the vertical aligner will check for -nvsc and be able
+ # to reduce the final padding to the side comments for long lines.
+ # and keep hanging side comments aligned.
+ if ( !$do_not_align
+ && !$rOpts_valign_side_comments
+ && $rOpts_valign_code )
+ {
+
+ $do_not_align = 1;
+ my $ipad = $max_i - 1;
+ if ( $types_to_go[$ipad] eq 'b' ) {
+ my $pad_spaces =
+ $rOpts->{'minimum-space-to-comment'} -
+ $token_lengths_to_go[$ipad];
+ $self->pad_token( $ipad, $pad_spaces );
+ }
+ }
+
+ if ( !$do_not_align ) {
+ $ralignment_type_to_go->[$max_i] = '#';
+ $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
+ $ralignment_counts->[$max_line]++;
}
}
- # look at each line of this batch..
- my $last_vertical_alignment_before_index;
+ # ----------------------------------------------
+ # Nothing more to do on this line if -nvc is set
+ # ----------------------------------------------
+ if ( !$rOpts_valign_code ) {
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
+ }
+
+ # -------------------------------------
+ # Loop over each line of this batch ...
+ # -------------------------------------
+ my $last_vertical_alignment_BEFORE_index;
my $vert_last_nonblank_type;
my $vert_last_nonblank_token;
my $vert_last_nonblank_block_type;
foreach my $line ( 0 .. $max_line ) {
+
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
- $last_vertical_alignment_before_index = -1;
- $vert_last_nonblank_type = '';
- $vert_last_nonblank_token = '';
- $vert_last_nonblank_block_type = '';
- # look at each token in this output line..
+ next if ( $iend <= $ibeg );
+
+ # back up before any side comment
+ if ( $iend > $i_terminal ) { $iend = $i_terminal }
+
my $level_beg = $levels_to_go[$ibeg];
- foreach my $i ( $ibeg .. $iend ) {
- my $alignment_type = '';
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $type_beg = $types_to_go[$ibeg];
+ my $type_beg_special_char =
+ ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
+
+ $last_vertical_alignment_BEFORE_index = -1;
+ $vert_last_nonblank_type = $type_beg;
+ $vert_last_nonblank_token = $token_beg;
+
+ # ----------------------------------------------------------------
+ # Initialization code merged from 'sub delete_needless_alignments'
+ # ----------------------------------------------------------------
+ my $i_good_paren = -1;
+ my $i_elsif_close = $ibeg - 1;
+ my $i_elsif_open = $iend + 1;
+ my @imatch_list;
+ if ( $type_beg eq 'k' ) {
+
+ # Initialization for paren patch: mark a location of a paren we
+ # should keep, such as one following something like a leading
+ # 'if', 'elsif',
+ $i_good_paren = $ibeg + 1;
+ if ( $types_to_go[$i_good_paren] eq 'b' ) {
+ $i_good_paren++;
+ }
+
+ # Initializtion for 'elsif' patch: remember the paren range of
+ # an elsif, and do not make alignments within them because this
+ # can cause loss of padding and overall brace alignment in the
+ # vertical aligner.
+ if ( $token_beg eq 'elsif'
+ && $i_good_paren < $iend
+ && $tokens_to_go[$i_good_paren] eq '(' )
+ {
+ $i_elsif_open = $i_good_paren;
+ $i_elsif_close = $mate_index_to_go[$i_good_paren];
+ }
+ } ## end if ( $type_beg eq 'k' )
+
+ # --------------------------------------------
+ # Loop over each token in this output line ...
+ # --------------------------------------------
+ foreach my $i ( $ibeg + 1 .. $iend ) {
+
+ next if ( $types_to_go[$i] eq 'b' );
+
my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
+ my $alignment_type = '';
+
+ # ----------------------------------------------
+ # Check for 'paren patch' : Remove excess parens
+ # ----------------------------------------------
+
+ # Excess alignment of parens can prevent other good alignments.
+ # For example, note the parens in the first two rows of the
+ # following snippet. They would normally get marked for
+ # alignment and aligned as follows:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # This causes unnecessary paren alignment and prevents the
+ # third equals from aligning. If we remove the unwanted
+ # alignments we get:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # A rule for doing this which works well is to remove alignment
+ # of parens whose containers do not contain other aligning
+ # tokens, with the exception that we always keep alignment of
+ # the first opening paren on a line (for things like 'if' and
+ # 'elsif' statements).
+ if ( $token eq ')' && @imatch_list ) {
+
+ # undo the corresponding opening paren if:
+ # - it is at the top of the stack
+ # - and not the first overall opening paren
+ # - does not follow a leading keyword on this line
+ my $imate = $mate_index_to_go[$i];
+ if ( $imatch_list[-1] eq $imate
+ && ( $ibeg > 1 || @imatch_list > 1 )
+ && $imate > $i_good_paren )
+ {
+ if ( $ralignment_type_to_go->[$imate] ) {
+ $ralignment_type_to_go->[$imate] = '';
+ $ralignment_counts->[$line]--;
+ delete $ralignment_hash_by_line->[$line]->{$imate};
+ }
+ pop @imatch_list;
+ }
+ }
- # do not align tokens at lower level then start of line
+ # do not align tokens at lower level than start of line
# except for side comments
- if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
- && $type ne '#' )
- {
- $ralignment_type_to_go->[$i] = '';
+ if ( $levels_to_go[$i] < $level_beg ) {
next;
}
# must follow a blank token
elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
- # align a side comment --
- elsif ( $type eq '#' ) {
-
- my $KK = $K_to_go[$i];
- my $sc_type = $rspecial_side_comment_type->{$KK};
-
- unless (
-
- # it is any specially marked side comment
- $sc_type
-
- # or it is a static side comment
- || ( $rOpts->{'static-side-comments'}
- && $token =~ /$static_side_comment_pattern/ )
-
- # or a closing side comment
- || ( $vert_last_nonblank_block_type
- && $token =~
- /$closing_side_comment_prefix_pattern/ )
- )
- {
- $alignment_type = $type;
- } ## Example of a static side comment
- }
-
# otherwise, do not align two in a row to create a
# blank field
- elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
+ elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
# align before one of these keywords
# (within a line, since $i>1)
}
# align before one of these types..
- # Note: add '.' after new vertical aligner is operational
elsif ( $is_vertical_alignment_type{$type}
&& !$is_not_vertical_alignment_token{$token} )
{
# nothing follows it, and
# (2) doing so may prevent other good alignments.
# Current exceptions are && and || and =>
- if ( $i == $iend || $i >= $i_terminal ) {
+ if ( $i == $iend ) {
$alignment_type = ""
unless ( $is_terminal_alignment_type{$type} );
}
# $PDL::IO::Pic::biggrays
# ? ( m/GIF/ ? 0 : 1 )
# : ( m/GIF|RAST|IFF/ ? 0 : 1 );
- if (
- $i == $ibeg + 2
- && $types_to_go[ $i - 1 ] eq 'b'
- && ( $types_to_go[$ibeg] eq '.'
- || $types_to_go[$ibeg] eq ':'
- || $types_to_go[$ibeg] eq '?' )
- )
+ if ( $type_beg_special_char
+ && $i == $ibeg + 2
+ && $types_to_go[ $i - 1 ] eq 'b' )
{
$alignment_type = "";
}
}
# Do not align a spaced-function-paren if requested.
- # Issue git #53. Note that $i-1 is a blank token if we
- # get here.
- if ( !$rOpts_function_paren_vertical_alignment
- && $i > $ibeg + 1 )
- {
- my $type_m = $types_to_go[ $i - 2 ];
- my $token_m = $tokens_to_go[ $i - 2 ];
-
- # this is the same test as 'space-function-paren'
- if ( $type_m =~ /^[wUG]$/
- || $type_m eq '->'
- || $type_m =~ /^[wi]$/
- && $token_m =~ /^(\&|->)/ )
- {
+ # Issue git #53, #73.
+ if ( !$rOpts_function_paren_vertical_alignment ) {
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $ris_function_call_paren->{$seqno} ) {
$alignment_type = "";
}
}
#{ $alignment_type = $type; }
if ($alignment_type) {
- $last_vertical_alignment_before_index = $i;
+ $last_vertical_alignment_BEFORE_index = $i;
}
#--------------------------------------------------------
# We want to line up ',' and interior ';' tokens, with the added
# space AFTER these tokens. (Note: interior ';' is included
# because it may occur in short blocks).
- if (
+ elsif (
# we haven't already set it
- !$alignment_type
+ ##!$alignment_type
+
+ # previous token IS one of these:
+ (
+ $vert_last_nonblank_type eq ','
+ || $vert_last_nonblank_type eq ';'
+ )
# and its not the first token of the line
- && ( $i > $ibeg )
+ ## && $i > $ibeg
# and it follows a blank
&& $types_to_go[ $i - 1 ] eq 'b'
- # and previous token IS one of these:
- && ( $vert_last_nonblank_type eq ','
- || $vert_last_nonblank_type eq ';' )
-
# and it's NOT one of these
- && ( $type ne 'b'
- && $type ne '#'
- && !$is_closing_token{$type} )
+ && !$is_closing_token{$type}
# then go ahead and align
)
- {
- $alignment_type = $vert_last_nonblank_type;
- }
+ {
+ $alignment_type = $vert_last_nonblank_type;
+ }
+
+ #-----------------------
+ # Set the alignment type
+ #-----------------------
+ if ($alignment_type) {
+
+ # but do not align the opening brace of an anonymous sub
+ if ( $token eq '{'
+ && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
+ {
+
+ }
+
+ # and do not make alignments within 'elsif' parens
+ elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
+
+ }
- #--------------------------------------------------------
- # Undo alignment in special cases
- #--------------------------------------------------------
- if ($alignment_type) {
+ # and ignore any tokens which have leading padded spaces
+ # example: perl527/lop.t
+ elsif ( substr( $alignment_type, 0, 1 ) eq ' ' ) {
- # do not align the opening brace of an anonymous sub
- if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
- $alignment_type = "";
}
- }
- #--------------------------------------------------------
- # then store the value
- #--------------------------------------------------------
- $ralignment_type_to_go->[$i] = $alignment_type;
- if ( $type ne 'b' ) {
- $vert_last_nonblank_type = $type;
- $vert_last_nonblank_token = $token;
- $vert_last_nonblank_block_type = $block_type;
+ else {
+ $ralignment_type_to_go->[$i] = $alignment_type;
+ $ralignment_hash_by_line->[$line]->{$i} =
+ $alignment_type;
+ $ralignment_counts->[$line]++;
+ push @imatch_list, $i;
+ }
}
+
+ $vert_last_nonblank_type = $type;
+ $vert_last_nonblank_token = $token;
}
}
- return $ralignment_type_to_go;
- }
+
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
+ } ## end sub set_vertical_alignment_markers
} ## end closure set_vertical_alignment_markers
+sub make_vertical_alignments {
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ #----------------------------
+ # Shortcut for a single token
+ #----------------------------
+ if ( $max_index_to_go == 0 ) {
+ if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
+ my $rtokens = [];
+ my $rfields = [ $tokens_to_go[0] ];
+ my $rpatterns = [ $types_to_go[0] ];
+ my $rfield_lengths =
+ [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
+ return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
+ }
+
+ # Strange line packing, not fatal but should not happen
+ elsif (DEVEL_MODE) {
+ my $max_line = @{$ri_first} - 1;
+ my $ibeg = $ri_first->[0];
+ my $iend = $ri_last->[0];
+ my $tok_b = $tokens_to_go[$ibeg];
+ my $tok_e = $tokens_to_go[$iend];
+ my $type_b = $types_to_go[$ibeg];
+ my $type_e = $types_to_go[$iend];
+ Fault(
+"Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
+ );
+ }
+ }
+
+ #---------------------------------------------------------
+ # Step 1: Define the alignment tokens for the entire batch
+ #---------------------------------------------------------
+ my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
+ = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+
+ #----------------------------------------------
+ # Step 2: Break each line into alignment fields
+ #----------------------------------------------
+ my $rline_alignments = [];
+ my $max_line = @{$ri_first} - 1;
+ foreach my $line ( 0 .. $max_line ) {
+
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
+
+ my $rtok_fld_pat_len = $self->make_alignment_patterns(
+ $ibeg, $iend, $ralignment_type_to_go,
+ $ralignment_counts->[$line],
+ $ralignment_hash_by_line->[$line]
+ );
+ push @{$rline_alignments}, $rtok_fld_pat_len;
+ }
+ return $rline_alignments;
+} ## end sub make_vertical_alignments
+
sub get_seqno {
# get opening and closing sequence numbers of a token for the vertical
# tokens by the vertical aligner.
my ( $self, $ii, $ending_in_quote ) = @_;
- my $rLL = $self->[_rLL_];
- my $this_batch = $self->[_this_batch_];
- my $rK_to_go = $this_batch->[_rK_to_go_];
+ my $rLL = $self->[_rLL_];
- my $KK = $rK_to_go->[$ii];
+ my $KK = $K_to_go[$ii];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
# Loop over all lines of the batch ...
- # Workaround for problem c007, in which the combination -lp -xci
- # can produce a "Program bug" message in unusual circumstances.
- my $skip_SECTION_1 = $rOpts_line_up_parentheses
- && $rOpts->{'extended-continuation-indentation'};
+ # Workaround originally created for problem c007, in which the
+ # combination -lp -xci could produce a "Program bug" message in unusual
+ # circumstances.
+ my $skip_SECTION_1;
+ if ( $rOpts_line_up_parentheses
+ && $rOpts_extended_continuation_indentation )
+ {
+
+ # Only set this flag if -lp is actually used here
+ foreach my $line ( 0 .. $max_line ) {
+ my $ibeg = $ri_first->[$line];
+ if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
+ $skip_SECTION_1 = 1;
+ last;
+ }
+ }
+ }
foreach my $line ( 0 .. $max_line ) {
my $iend = $ri_last->[$line];
my $lev = $levels_to_go[$ibeg];
- ####################################
+ #-----------------------------------
# SECTION 1: Undo needless common CI
- ####################################
+ #-----------------------------------
# We are looking at leading tokens and looking for a sequence all
# at the same level and all at a higher level than enclosing lines.
}
}
- ######################################
+ #-------------------------------------
# SECTION 2: Undo ci at cuddled blocks
- ######################################
+ #-------------------------------------
- # Note that sub set_adjusted_indentation will be called later to
+ # Note that sub final_indentation_adjustment will be called later to
# actually do this, but for now we will tentatively mark cuddled
# lines with ci=0 so that the the -xci loop which follows will be
# correct at cuddles.
}
}
- #########################################################
+ #--------------------------------------------------------
# SECTION 3: Undo ci set by sub extended_ci if not needed
- #########################################################
+ #--------------------------------------------------------
# Undo the ci of the leading token if its controlling token
# went out on a previous line without ci
# This is needed to compensate for a change which was made in 'sub
# starting_one_line_block' to prevent blinkers. Previously, that sub
# would not look at the total block size and rely on sub
- # set_continuation_breaks to break up long blocks. Consequently, the
+ # break_long_lines to break up long blocks. Consequently, the
# first line of those batches would end in the opening block brace of a
# sort/map/grep/eval block. When this was changed to immediately check
# for blocks which were too long, the opening block brace would go out
# : $cells[$b] <=> $cells[$a]; # batch n
# } ( 0 .. $#cells ); # batch n
- my $rLL = $self->[_rLL_];
- my $K0 = $K_to_go[0];
- my $Kprev = $self->K_previous_code($K0);
+ my $rLL = $self->[_rLL_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
my $is_short_block;
- if ( defined($Kprev)
- && $rLL->[$Kprev]->[_BLOCK_TYPE_] )
- {
- my $block_type = $rLL->[$Kprev]->[_BLOCK_TYPE_];
- $is_short_block = $is_sort_map_grep_eval{$block_type};
- $is_short_block ||= $want_one_line_block{$block_type};
+ if ( $K_to_go[0] > 0 ) {
+ my $Kp = $K_to_go[0] - 1;
+ if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
+ $Kp -= 1;
+ }
+ if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp -= 1;
+ if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
+ $Kp -= 1;
+ }
+ }
+ my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
+ if ($seqno) {
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ($block_type) {
+ $is_short_block = $is_sort_map_grep_eval{$block_type};
+ $is_short_block ||= $want_one_line_block{$block_type};
+ }
+ }
}
# looking at each line of this batch..
# make sure this won't change if -lp is used
my $indentation_1 = $leading_spaces_to_go[$ibeg];
- if ( ref($indentation_1) ) {
- if ( $indentation_1->get_recoverable_spaces() == 0 ) {
- my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
- unless ( $indentation_2->get_recoverable_spaces() == 0 )
- {
- $pad_spaces = 0;
- }
+ if ( ref($indentation_1)
+ && $indentation_1->get_recoverable_spaces() == 0 )
+ {
+ my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
+ if ( ref($indentation_2)
+ && $indentation_2->get_recoverable_spaces() != 0 )
+ {
+ $pad_spaces = 0;
}
}
$iendm = $iend;
$ibegm = $ibeg;
$has_leading_op = $has_leading_op_next;
- } # end of loop over lines
+ } ## end of loop over lines
return;
}
} ## end closure set_logical_padding
{ ## begin closure make_alignment_patterns
- my %block_type_map;
my %keyword_map;
my %operator_map;
my %is_w_n_C;
+ my %is_my_local_our;
+ my %is_kwU;
+ my %is_use_like;
+ my %is_binary_type;
+ my %is_binary_keyword;
+ my %name_map;
BEGIN {
- # map related block names into a common name to
- # allow alignment
- %block_type_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'if',
- 'default' => 'if',
- 'case' => 'if',
- 'sort' => 'map',
- 'grep' => 'map',
- );
+ # Note: %block_type_map is now global to enable the -gal=s option
# map certain keywords to the same 'if' class to align
# long if/elsif sequences. [elsif.pl]
'n' => 1,
'C' => 1,
);
- }
-
- sub delete_needless_alignments {
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
-
- # Remove unwanted alignments. This routine is a place to remove
- # alignments which might cause problems at later stages. There are
- # currently two types of fixes:
-
- # 1. Remove excess parens
- # 2. Remove alignments within 'elsif' conditions
-
- # Patch #1: Excess alignment of parens can prevent other good
- # alignments. For example, note the parens in the first two rows of
- # the following snippet. They would normally get marked for alignment
- # and aligned as follows:
-
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
-
- # This causes unnecessary paren alignment and prevents the third equals
- # from aligning. If we remove the unwanted alignments we get:
-
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
-
- # A rule for doing this which works well is to remove alignment of
- # parens whose containers do not contain other aligning tokens, with
- # the exception that we always keep alignment of the first opening
- # paren on a line (for things like 'if' and 'elsif' statements).
-
- # Setup needed constants
- my $i_good_paren = -1;
- my $imin_match = $iend + 1;
- my $i_elsif_close = $ibeg - 1;
- my $i_elsif_open = $iend + 1;
- if ( $iend > $ibeg ) {
- if ( $types_to_go[$ibeg] eq 'k' ) {
-
- # Paren patch: mark a location of a paren we should keep, such
- # as one following something like a leading 'if', 'elsif',..
- $i_good_paren = $ibeg + 1;
- if ( $types_to_go[$i_good_paren] eq 'b' ) {
- $i_good_paren++;
- }
- # 'elsif' patch: remember the range of the parens of an elsif,
- # and do not make alignments within them because this can cause
- # loss of padding and overall brace alignment in the vertical
- # aligner.
- if ( $tokens_to_go[$ibeg] eq 'elsif'
- && $i_good_paren < $iend
- && $tokens_to_go[$i_good_paren] eq '(' )
- {
- $i_elsif_open = $i_good_paren;
- $i_elsif_close = $mate_index_to_go[$i_good_paren];
- }
- }
- }
+ # leading keywords which to skip for efficiency when making parenless
+ # container names
+ my @q = qw( my local our return );
+ @{is_my_local_our}{@q} = (1) x scalar(@q);
- # Loop to make the fixes on this line
- my @imatch_list;
- for my $i ( $ibeg .. $iend ) {
-
- if ( $ralignment_type_to_go->[$i] ) {
+ # leading keywords where we should just join one token to form
+ # parenless name
+ @q = qw( use );
+ @{is_use_like}{@q} = (1) x scalar(@q);
- # Patch #2: undo alignment within elsif parens
- if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
- $ralignment_type_to_go->[$i] = '';
- next;
- }
- push @imatch_list, $i;
+ # leading token types which may be used to make a container name
+ @q = qw( k w U );
+ @{is_kwU}{@q} = (1) x scalar(@q);
- }
- if ( $tokens_to_go[$i] eq ')' ) {
+ # token types which prevent using leading word as a container name
+ @q = qw(
+ x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
+ &= // >> ~. &. |. ^.
+ **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
+ );
+ push @q, ',';
+ @{is_binary_type}{@q} = (1) x scalar(@q);
+
+ # token keywords which prevent using leading word as a container name
+ @_ = qw(and or err eq ne cmp);
+ @is_binary_keyword{@_} = (1) x scalar(@_);
+
+ # Some common function calls whose args can be aligned. These do not
+ # give good alignments if the lengths differ significantly.
+ %name_map = (
+ 'unlike' => 'like',
+ 'isnt' => 'is',
+ ##'is_deeply' => 'is', # poor; names lengths too different
+ );
- # Patch #1: undo the corresponding opening paren if:
- # - it is at the top of the stack
- # - and not the first overall opening paren
- # - does not follow a leading keyword on this line
- my $imate = $mate_index_to_go[$i];
- if ( @imatch_list
- && $imatch_list[-1] eq $imate
- && ( $ibeg > 1 || @imatch_list > 1 )
- && $imate > $i_good_paren )
- {
- $ralignment_type_to_go->[$imate] = '';
- pop @imatch_list;
- }
- }
- }
- return;
}
sub make_alignment_patterns {
# Here we do some important preliminary work for the
- # vertical aligner. We create three arrays for one
+ # vertical aligner. We create four arrays for one
# output line. These arrays contain strings that can
# be tested by the vertical aligner to see if
# consecutive lines can be aligned vertically.
#
- # The three arrays are indexed on the vertical
+ # The four arrays are indexed on the vertical
# alignment fields and are:
# @tokens - a list of any vertical alignment tokens for this line.
# These are tokens, such as '=' '&&' '#' etc which
# @patterns - a modified list of token types, one for each alignment
# field. These should normally each match before alignment is
# allowed, even when the alignment tokens match.
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+ # @field_lengths - the display width of each field
+
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
+ $ralignment_hash )
+ = @_;
+
+ # The var $ralignment_hash contains all of the alignments for this
+ # line. It is not yet used but is available for future coding in case
+ # there is a need to do a preliminary scan of the alignment tokens.
+ if (DEVEL_MODE) {
+ my $new_count = 0;
+ if ( defined($ralignment_hash) ) {
+ $new_count = keys %{$ralignment_hash};
+ }
+ my $old_count = $alignment_count;
+ $old_count = 0 unless ($old_count);
+ if ( $new_count != $old_count ) {
+ my $K = $K_to_go[$ibeg];
+ my $rLL = $self->[_rLL_];
+ my $lnl = $rLL->[$K]->[_LINE_INDEX_];
+ Fault(
+"alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
+ );
+ }
+ }
+
+ # -------------------------------------
+ # Shortcut for lines without alignments
+ # -------------------------------------
+ if ( !$alignment_count ) {
+ my $rtokens = [];
+ my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
+ $summed_lengths_to_go[$ibeg] ];
+ my $rpatterns;
+ my $rfields;
+ if ( $ibeg == $iend ) {
+ $rfields = [ $tokens_to_go[$ibeg] ];
+ $rpatterns = [ $types_to_go[$ibeg] ];
+ }
+ else {
+ $rfields = [ join( '', @tokens_to_go[ $ibeg .. $iend ] ) ];
+ $rpatterns = [ join( '', @types_to_go[ $ibeg .. $iend ] ) ];
+ }
+ return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
+ }
+
+ my $i_start = $ibeg;
+ my $depth = 0;
+ my %container_name = ( 0 => "" );
+
my @tokens = ();
my @fields = ();
my @patterns = ();
my @field_lengths = ();
- my $i_start = $ibeg;
- # For a 'use' statement, use the module name as container name.
- # Fixes issue rt136416.
- my $cname = "";
- if ( $types_to_go[$ibeg] eq 'k' && $tokens_to_go[$ibeg] eq 'use' ) {
- my $inext = $inext_to_go[$ibeg];
- if ( $inext <= $iend ) { $cname = $tokens_to_go[$inext] }
- }
+ #-------------------------------------------------------------
+ # Make a container name for any uncontained commas, issue c089
+ #-------------------------------------------------------------
+ # This is a generalization of the fix for rt136416 which was a
+ # specialized patch just for 'use Module' statements.
+ # We restrict this to semicolon-terminated statements; that way
+ # we know that the top level commas are not in a list container.
+ if ( $ibeg == 0 && $iend == $max_index_to_go ) {
+ my $iterm = $max_index_to_go;
+ if ( $types_to_go[$iterm] eq '#' ) {
+ $iterm = $iprev_to_go[$iterm];
+ }
+
+ # Alignment lines ending like '=> sub {'; fixes issue c093
+ my $term_type_ok = $types_to_go[$iterm] eq ';';
+ $term_type_ok ||=
+ $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
+
+ if ( $iterm > $ibeg
+ && $term_type_ok
+ && !$is_my_local_our{ $tokens_to_go[$ibeg] }
+ && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
+ {
- my $depth = 0;
- my %container_name = ( 0 => "$cname" );
+ # Make a container name by combining all leading barewords,
+ # keywords and functions.
+ my $name = "";
+ my $count = 0;
+ my $count_max;
+ my $iname_end;
+ my $ilast_blank;
+ for ( $ibeg .. $iterm ) {
+ my $type = $types_to_go[$_];
+
+ if ( $type eq 'b' ) {
+ $ilast_blank = $_;
+ next;
+ }
+
+ my $token = $tokens_to_go[$_];
+
+ # Give up if we find an opening paren, binary operator or
+ # comma within or after the proposed container name.
+ if ( $token eq '('
+ || $is_binary_type{$type}
+ || $type eq 'k' && $is_binary_keyword{$token} )
+ {
+ $name = "";
+ last;
+ }
+
+ # The container name is only built of certain types:
+ last if ( !$is_kwU{$type} );
+ # Normally it is made of one word, but two words for 'use'
+ if ( $count == 0 ) {
+ if ( $type eq 'k'
+ && $is_use_like{ $tokens_to_go[$_] } )
+ {
+ $count_max = 2;
+ }
+ else {
+ $count_max = 1;
+ }
+ }
+ elsif ( defined($count_max) && $count >= $count_max ) {
+ last;
+ }
+
+ if ( defined( $name_map{$token} ) ) {
+ $token = $name_map{$token};
+ }
+
+ $name .= ' ' . $token;
+ $iname_end = $_;
+ $count++;
+ }
+
+ # Require a space after the container name token(s)
+ if ( $name
+ && defined($ilast_blank)
+ && $ilast_blank > $iname_end )
+ {
+ $name = substr( $name, 1 );
+ $container_name{'0'} = $name;
+ }
+ }
+ }
+
+ # --------------------
+ # Loop over all tokens
+ # --------------------
my $j = 0; # field index
$patterns[0] = "";
my $token = $tokens_to_go[$i];
my $depth_last = $depth;
if ( $type_sequence_to_go[$i] ) {
- if ( $is_opening_type{$token} ) {
+ if ( $is_opening_token{$token} ) {
# if container is balanced on this line...
my $i_mate = $mate_index_to_go[$i];
# Containers beginning with { and [ are given those names
# for uniqueness. That way commas in different containers
# will not match. Here is an example of what this prevents:
- # a => [ 1, 2, 3 ],
+ # a => [ 1, 2, 3 ],
# b => { b1 => 4, b2 => 5 },
# Here is another example of what we avoid by labeling the
# commas properly:
if ( $token eq '(' ) {
$name = $self->make_paren_name($i);
}
+
+ # name cannot be '.', so change to something else if so
+ if ( $name eq '.' ) { $name = 'dot' }
+
$container_name{$depth} = "+" . $name;
- # Make the container name even more unique if necessary.
- # If we are not vertically aligning this opening paren,
- # append a character count to avoid bad alignment because
- # it usually looks bad to align commas within containers
- # for which the opening parens do not align. Here
- # is an example very BAD alignment of commas (because
- # the atan2 functions are not all aligned):
- # $XY =
- # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
- # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
- # $X * atan2( $X, 1 ) -
- # $Y * atan2( $Y, 1 );
- #
- # On the other hand, it is usually okay to align commas
- # if opening parens align, such as:
- # glVertex3d( $cx + $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy + $s * $ys, $z );
- # glVertex3d( $cx - $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy - $s * $ys, $z );
- #
- # To distinguish between these situations, we will append
- # the length of the line from the previous matching
- # token, or beginning of line, to the function name.
- # This will allow the vertical aligner to reject
- # undesirable matches.
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment since
+ # it usually looks bad to align commas within containers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas
+ # if opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we append
+ # the length of the line from the previous matching
+ # token, or beginning of line, to the function name.
+ # This will allow the vertical aligner to reject
+ # undesirable matches.
# if we are not aligning on this paren...
if ( !$ralignment_type_to_go->[$i] ) {
if ( $i_start == $ibeg ) {
- # For first token, use distance from start of line
- # but subtract off the indentation due to level.
- # Otherwise, results could vary with indentation.
+ # For first token, use distance from start of
+ # line but subtract off the indentation due to
+ # level. Otherwise, results could vary with
+ # indentation.
$len +=
leading_spaces_to_go($ibeg) -
$levels_to_go[$i_start] *
# tack this length onto the container name to try
# to make a unique token name
$container_name{$depth} .= "-" . $len;
- }
- }
- }
+ } ## end if ( !$ralignment_type_to_go...)
+ } ## end if ( $i_mate > $i && $i_mate...)
+ } ## end if ( $is_opening_token...)
+
elsif ( $is_closing_type{$token} ) {
$depth-- if $depth > 0;
}
- }
+ } ## end if ( $type_sequence_to_go...)
# if we find a new synchronization token, we are done with
# a field
$i_start = $i;
$j++;
$patterns[$j] = "";
- }
+ } ## end if ( new synchronization token
# continue accumulating tokens
$patterns[$j] .= $type;
}
- # handle non-keywords..
- else {
+ # Mark most things before arrows as a quote to
+ # get them to line up. Testfile: mixed.pl.
+
+ # handle $type =~ /^[wnC]$/
+ elsif ( $is_w_n_C{$type} ) {
my $type_fix = $type;
- # Mark most things before arrows as a quote to
- # get them to line up. Testfile: mixed.pl.
- # $type =~ /^[wnC]$/
- if ( $i < $iend - 1 && $is_w_n_C{$type} ) {
+ if ( $i < $iend - 1 ) {
my $next_type = $types_to_go[ $i + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
}
}
- # Convert a bareword within braces into a quote for matching.
- # This will allow alignment of expressions like this:
+ # Convert a bareword within braces into a quote for
+ # matching. This will allow alignment of expressions like
+ # this:
# local ( $SIG{'INT'} ) = IGNORE;
# local ( $SIG{ALRM} ) = 'POSTMAN';
if ( $type eq 'w'
# patch to make numbers and quotes align
if ( $type eq 'n' ) { $type_fix = 'Q' }
- # patch to ignore any ! in patterns
- if ( $type eq '!' ) { $type_fix = '' }
-
$patterns[$j] .= $type_fix;
+ } ## end elsif ( $is_w_n_C{$type} )
- # remove any zero-level name at first fat comma
- if ( $depth == 0 && $type eq '=>' ) {
- $container_name{$depth} = "";
- }
+ # ignore any ! in patterns
+ elsif ( $type eq '!' ) { }
+ # everything else
+ else {
+ $patterns[$j] .= $type;
}
- }
+
+ # remove any zero-level name at first fat comma
+ if ( $depth == 0 && $type eq '=>' ) {
+ $container_name{$depth} = "";
+ }
+ } ## end for my $i ( $ibeg .. $iend)
# done with this line .. join text of tokens to make the last field
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
push @field_lengths,
$summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
- return ( \@tokens, \@fields, \@patterns, \@field_lengths );
- }
+ return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
+ } ## end sub make_alignment_patterns
} ## end closure make_alignment_patterns
return $name;
}
-{ ## begin closure set_adjusted_indentation
+{ ## begin closure final_indentation_adjustment
my ( $last_indentation_written, $last_unadjusted_indentation,
$last_leading_token );
- sub initialize_adjusted_indentation {
+ sub initialize_final_indentation_adjustment {
$last_indentation_written = 0;
$last_unadjusted_indentation = 0;
$last_leading_token = "";
return;
}
- sub set_adjusted_indentation {
+ sub final_indentation_adjustment {
- # This routine has the final say regarding the actual indentation of
- # a line. It starts with the basic indentation which has been
- # defined for the leading token, and then takes into account any
- # options that the user has set regarding special indenting and
- # outdenting.
+ #--------------------------------------------------------------------
+ # This routine sets the final indentation of a line in the Formatter.
+ #--------------------------------------------------------------------
+
+ # It starts with the basic indentation which has been defined for the
+ # leading token, and then takes into account any options that the user
+ # has set regarding special indenting and outdenting.
# This routine has to resolve a number of complex interacting issues,
# including:
) = @_;
my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
my $ris_bli_container = $self->[_ris_bli_container_];
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
my $rK_weld_left = $self->[_rK_weld_left_];
- # we need to know the last token of this line
- my ( $terminal_type, $i_terminal ) = terminal_type_i( $ibeg, $iend );
+ # Find the last code token of this line
+ my $i_terminal = $iend;
+ my $terminal_type = $types_to_go[$iend];
+ if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
+ $i_terminal -= 1;
+ $terminal_type = $types_to_go[$i_terminal];
+ if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
+ $i_terminal -= 1;
+ $terminal_type = $types_to_go[$i_terminal];
+ }
+ }
my $terminal_block_type = $block_type_to_go[$i_terminal];
my $is_outdented_line = 0;
- my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
-
- my $type_beg = $types_to_go[$ibeg];
- my $token_beg = $tokens_to_go[$ibeg];
- my $K_beg = $K_to_go[$ibeg];
- my $ibeg_weld_fix = $ibeg;
- my $seqno_beg = $type_sequence_to_go[$ibeg];
- my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
+ my $type_beg = $types_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $block_type_beg = $block_type_to_go[$ibeg];
+ my $level_beg = $levels_to_go[$ibeg];
+ my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
+ my $K_beg = $K_to_go[$ibeg];
+ my $seqno_beg = $type_sequence_to_go[$ibeg];
+ my $ibeg_weld_fix = $ibeg;
+ my $is_closing_type_beg = $is_closing_type{$type_beg};
+ my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
# QW INDENTATION PATCH 3:
my $seqno_qw_closing;
# MOJO: Set a flag if this lines begins with ')->'
my $leading_paren_arrow = (
- $types_to_go[$ibeg] eq '}'
- && $tokens_to_go[$ibeg] eq ')'
+ $is_closing_type_beg
+ && $token_beg eq ')'
&& (
( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
|| ( $ibeg < $i_terminal - 1
)
);
- ##########################################################
+ #---------------------------------------------------------
# Section 1: set a flag and a default indentation
#
# Most lines are indented according to the initial token.
# 1 - outdent
# 2 - vertically align with opening token
# 3 - indent
- ##########################################################
+ #---------------------------------------------------------
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
# that with -lp type formatting the opening and closing tokens to not
# have sequence numbers.
if ( $seqno_qw_closing && $total_weld_count ) {
- my $K_next_nonblank = $self->K_next_code($K_beg);
- if ( defined($K_next_nonblank)
- && defined( $rK_weld_left->{$K_next_nonblank} ) )
- {
- my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
- if ( $itest <= $max_index_to_go ) {
- $ibeg_weld_fix = $itest;
+ my $i_plus = $inext_to_go[$ibeg];
+ if ( $i_plus <= $max_index_to_go ) {
+ my $K_plus = $K_to_go[$i_plus];
+ if ( defined( $rK_weld_left->{$K_plus} ) ) {
+ $ibeg_weld_fix = $i_plus;
}
}
}
# if we are at a closing token of some type..
- if ( $is_closing_type{$type_beg} || $seqno_qw_closing ) {
+ if ( $is_closing_type_beg || $seqno_qw_closing ) {
# get the indentation of the line containing the corresponding
# opening token
= $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
$ri_last, $rindentation_list, $seqno_qw_closing );
+ my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
+
# First set the default behavior:
if (
)
# remove continuation indentation for any line like
- # } ... {
+ # } ... {
# or without ending '{' and unbalanced, such as
# such as '}->{$operator}'
|| (
$type_beg eq '}'
&& ( $types_to_go[$iend] eq '{'
- || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
+ || $levels_to_go[$iend] < $level_beg )
)
# and when the next line is at a lower indentation level...
# it is the last token before a level decrease. This will allow
# a closing token to line up with its opening counterpart, and
# avoids an indentation jump larger than 1 level.
- if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
- && $i_terminal == $ibeg
- && defined($K_beg) )
+ if ( $i_terminal == $ibeg
+ && $is_closing_type_beg
+ && defined($K_beg)
+ && $K_beg < $Klimit )
{
- my $K_next_nonblank = $self->K_next_code($K_beg);
+ my $K_plus = $K_beg + 1;
+ my $type_plus = $rLL->[$K_plus]->[_TYPE_];
+
+ if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ }
+
+ if ( $type_plus eq '#' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ }
- if ( !$is_bli_beg && defined($K_next_nonblank) ) {
- my $lev = $rLL->[$K_beg]->[_LEVEL_];
- my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
+ # Note: we have skipped past just one comment (perhaps a
+ # side comment). There could be more, and we could easily
+ # skip past all the rest with the following code, or with a
+ # while loop. It would be rare to have to do this, and
+ # those block comments would still be indented, so it would
+ # to leave them indented. So it seems best to just stop at
+ # a maximum of one comment.
+ ##if ($type_plus eq '#') {
+ ## $K_plus = $self->K_next_code($K_plus);
+ ##}
+ }
+
+ if ( !$is_bli_beg && defined($K_plus) ) {
+ my $lev = $level_beg;
+ my $level_next = $rLL->[$K_plus]->[_LEVEL_];
# and do not undo ci if it was set by the -xci option
$adjust_indentation = 1
# but right now we do not have that information. For now
# we see if we are in a list, and this works well.
# See test files 'sub*.t' for good test cases.
- if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
- && $terminal_is_in_list
- && !$rOpts->{'indent-closing-brace'} )
+ if ( $terminal_is_in_list
+ && !$rOpts_indent_closing_brace
+ && $block_type_beg
+ && $block_type_beg =~ /$ASUB_PATTERN/ )
{
(
$opening_indentation, $opening_offset,
)
= $self->get_opening_indentation( $ibeg, $ri_first,
$ri_last, $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
+ my $indentation = $leading_spaces_beg;
if ( defined($opening_indentation)
&& get_spaces($indentation) >
get_spaces($opening_indentation) )
# Undo ci of line with leading closing eval brace,
# but not beyond the indention of the line with
# the opening brace.
- if ( $block_type_to_go[$ibeg] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'} )
+ if (
+ $block_type_beg eq 'eval'
+ ##&& !$rOpts_line_up_parentheses
+ && !ref($leading_spaces_beg)
+ && !$rOpts_indent_closing_brace
+ )
{
(
$opening_indentation, $opening_offset,
)
= $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
+ my $indentation = $leading_spaces_beg;
if ( defined($opening_indentation)
&& get_spaces($indentation) >
get_spaces($opening_indentation) )
# Now modify default behavior according to user request:
# handle option to indent non-blocks of the form ); }; ];
# But don't do special indentation to something like ')->pack('
- if ( !$block_type_to_go[$ibeg] ) {
+ if ( !$block_type_beg ) {
# Note that logical padding has already been applied, so we may
# need to remove some spaces to get a valid hash key.
- my $tok = $tokens_to_go[$ibeg];
+ my $tok = $token_beg;
my $cti = $closing_token_indentation{$tok};
# Fix the value of 'cti' for an isloated non-welded closing qw
# handle option to indent blocks
else {
if (
- $rOpts->{'indent-closing-brace'}
+ $rOpts_indent_closing_brace
&& (
$i_terminal == $ibeg # isolated terminal '}'
|| $is_semicolon_terminated
}
# if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ($rpatterns->[0] =~ /^qb*;$/
- && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
+ elsif (
+ substr( $rpatterns->[0], 0, 2 ) eq 'qb'
+ && substr( $rfields->[0], -1, 1 ) eq ';'
+ ##&& $rpatterns->[0] =~ /^qb*;$/
+ && $rfields->[0] =~ /^([\)\}\]\>]);$/
+ )
{
if ( $closing_token_indentation{$1} == 0 ) {
$adjust_indentation = 1;
# if line begins with a ':', align it with any
# previous line leading with corresponding ?
- elsif ( $types_to_go[$ibeg] eq ':' ) {
+ elsif ( $type_beg eq ':' ) {
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
if ($is_leading) { $adjust_indentation = 2; }
}
- ##########################################################
+ #---------------------------------------------------------
# Section 2: set indentation according to flag set above
#
# Select the indentation object to define leading
# then we want to use one level below the last token
# ($i_terminal) in order to get it to fully outdent through
# all levels.
- ##########################################################
+ #---------------------------------------------------------
my $indentation;
my $lev;
my $level_end = $levels_to_go[$iend];
if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- $lev = $levels_to_go[$ibeg];
+ $indentation = $leading_spaces_beg;
+ $lev = $level_beg;
}
elsif ( $adjust_indentation == 1 ) {
elsif ( $adjust_indentation == 2 ) {
# handle option to align closing token with opening token
- $lev = $levels_to_go[$ibeg];
+ $lev = $level_beg;
# calculate spaces needed to align with opening token
my $space_count =
# indented, but this is better than frequently leaving it not
# indented enough.
my $last_spaces = get_spaces($last_indentation_written);
- if ( !$is_closing_token{$last_leading_token} ) {
+
+ if ( ref($last_indentation_written)
+ && !$is_closing_token{$last_leading_token} )
+ {
$last_spaces +=
get_recoverable_spaces($last_indentation_written);
}
# reset the indentation to the new space count if it works
# only options are all or none: nothing in-between looks good
- $lev = $levels_to_go[$ibeg];
- if ( $space_count < $last_spaces ) {
- if ($rOpts_line_up_parentheses) {
- my $lev = $levels_to_go[$ibeg];
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
+ $lev = $level_beg;
+
+ my $diff = $last_spaces - $space_count;
+ if ( $diff > 0 ) {
+ $indentation = $space_count;
+ }
+ else {
+
+ # We need to fix things ... but there is no good way to do it.
+ # The best solution is for the user to use a longer maximum
+ # line length. We could get a smooth variation if we just move
+ # the paren in using
+ # $space_count -= ( 1 - $diff );
+ # But unfortunately this can give a rather unbalanced look.
+
+ # For -xlp we currently allow a tolerance of one indentation
+ # level and then revert to a simpler default. This will jump
+ # suddenly but keeps a balanced look.
+ if ( $rOpts_extended_line_up_parentheses
+ && $diff >= -$rOpts_indent_columns
+ && $space_count > $leading_spaces_beg )
+ {
$indentation = $space_count;
}
- }
- # revert to default if it doesn't work
- else {
- $space_count = leading_spaces_to_go($ibeg);
- if ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
+ # Otherwise revert to defaults
+ elsif ( $default_adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_beg;
}
elsif ( $default_adjust_indentation == 1 ) {
$indentation = $reduced_spaces_to_go[$i_terminal];
# } @files;
# }
#
- if ( $block_type_to_go[$ibeg]
+ if ( $block_type_beg
&& $ci_levels_to_go[$i_terminal] == 0 )
{
my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
# use previous indentation but use own level
# to cause list to be flushed properly
- $lev = $levels_to_go[$ibeg];
+ $lev = $level_beg;
}
# remember indentation except for multi-line quotes, which get
# no indentation
unless ( $ibeg == 0 && $starting_in_quote ) {
$last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
- $last_leading_token = $tokens_to_go[$ibeg];
+ $last_unadjusted_indentation = $leading_spaces_beg;
+ $last_leading_token = $token_beg;
# Patch to make a line which is the end of a qw quote work with the
# -lp option. Make $token_beg look like a closing token as some
# $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
# 2981014)])
# ));
- ## if ($seqno_qw_closing) { $last_leading_token = ')' }
if ( $seqno_qw_closing
&& ( length($token_beg) > 1 || $token_beg eq '>' ) )
{
# be sure lines with leading closing tokens are not outdented more
# than the line which contained the corresponding opening token.
- #############################################################
+ #--------------------------------------------------------
# updated per bug report in alex_bug.pl: we must not
# mess with the indentation of closing logical braces so
# we must treat something like '} else {' as if it were
# an isolated brace
- #############################################################
- my $is_isolated_block_brace = $block_type_to_go[$ibeg]
- && (
- $i_terminal == $ibeg
- || $is_if_elsif_else_unless_while_until_for_foreach{
- $block_type_to_go[$ibeg]
- }
+ #--------------------------------------------------------
+ my $is_isolated_block_brace = $block_type_beg
+ && ( $i_terminal == $ibeg
+ || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
);
# only do this for a ':; which is aligned with its leading '?'
- my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
if (
defined($opening_indentation)
&& (
# certain leading keywords if requested
- (
- $rOpts->{'outdent-keywords'}
- && $types_to_go[$ibeg] eq 'k'
- && $outdent_keyword{ $tokens_to_go[$ibeg] }
- )
+ $rOpts_outdent_keywords
+ && $type_beg eq 'k'
+ && $outdent_keyword{$token_beg}
# or labels if requested
- || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
+ || $rOpts_outdent_labels && $type_beg eq 'J'
# or static block comments if requested
- || ( $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-static-block-comments'}
- && $is_static_block_comment )
+ || $is_static_block_comment
+ && $rOpts_outdent_static_block_comments
)
)
-
{
my $space_count = leading_spaces_to_go($ibeg);
if ( $space_count > 0 ) {
# do not promote a spaced static block comment to non-spaced;
# this is not normally necessary but could be for some
# unusual user inputs (such as -ci = -i)
- if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
+ if ( $type_beg eq '#' && $space_count == 0 ) {
$space_count = 1;
}
- if ($rOpts_line_up_parentheses) {
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
- }
+ $indentation = $space_count;
}
}
$terminal_block_type, $is_semicolon_terminated,
$is_outdented_line );
}
-} ## end closure set_adjusted_indentation
+} ## end closure final_indentation_adjustment
sub get_opening_indentation {
= @_;
# Define vertical tightness controls for the nth line of a batch.
- # We create an array of parameters which tell the vertical aligner
+
+ # These parameters are passed to the vertical aligner to indicated
# if we should combine this line with the next line to achieve the
- # desired vertical tightness. The array of parameters contains:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
+ # desired vertical tightness. This was previously an array but
+ # has been converted to a hash:
+
+ # old hash Meaning
+ # index key
#
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false. Will be
- # true if appropriate -vt flag is set. Otherwise, Will be
- # made true only for 2 line container in parens with -lp
+ # 0 _vt_type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
#
- # These flags are used by sub set_leading_whitespace in
- # the vertical aligner
-
- my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
+ # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
+ # 1b _vt_closing_flag: spaces of padding to use if closing
+ # 2 _vt_seqno: sequence number of container
+ # 3 _vt_valid flag: do not append if this flag is false. Will be
+ # true if appropriate -vt flag is set. Otherwise, Will be
+ # made true only for 2 line container in parens with -lp
+ # 4 _vt_seqno_beg: sequence number of first token of line
+ # 5 _vt_seqno_end: sequence number of last token of line
+ # 6 _vt_min_lines: min number of lines for joining opening cache,
+ # 0=no constraint
+ # 7 _vt_max_lines: max number of lines for joining opening cache,
+ # 0=no constraint
# The vertical tightness mechanism can add whitespace, so whitespace can
# continually increase if we allowed it when the -fws flag is set.
# See case b499 for an example.
- return $rvertical_tightness_flags if ($rOpts_freeze_whitespace);
- # Uses these parameters:
+ # Speedup: just return for a comment
+ if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
+ return;
+ }
+
+ # Define these values...
+ my $vt_type = 0;
+ my $vt_opening_flag = 0;
+ my $vt_closing_flag = 0;
+ my $vt_seqno = 0;
+ my $vt_valid_flag = 0;
+ my $vt_seqno_beg = 0;
+ my $vt_seqno_end = 0;
+ my $vt_min_lines = 0;
+ my $vt_max_lines = 0;
+
+ goto RETURN
+ if ($rOpts_freeze_whitespace);
+
+ # Uses these global parameters:
# $rOpts_block_brace_tightness
# $rOpts_block_brace_vertical_tightness
# $rOpts_stack_closing_block_brace
my $ibeg_next = $ri_first->[ $n + 1 ];
my $token_end = $tokens_to_go[$iend];
my $iend_next = $ri_last->[ $n + 1 ];
+
if (
$type_sequence_to_go[$iend]
&& !$block_type_to_go[$iend]
# allow 2-line method call to be closed up
|| ( $rOpts_line_up_parentheses
&& $token_end eq '('
+ && $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$iend] }
&& $iend > $ibeg
&& $types_to_go[ $iend - 1 ] ne 'b' )
)
)
{
-
# avoid multiple jumps in nesting depth in one line if
# requested
my $ovt = $opening_vertical_tightness{$token_end};
my $iend_next = $ri_last->[ $n + 1 ];
+
+ # Turn off the -vt flag if the next line ends in a weld.
+ # This avoids an instability with one-line welds (fixes b1183).
+ my $type_end_next = $types_to_go[$iend_next];
+ $ovt = 0
+ if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
+ && $is_closing_type{$type_end_next} );
+
+ # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
+ # See similar patch above for $cvt.
+ my $seqno = $type_sequence_to_go[$iend];
+ if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
+ $ovt = 0;
+ }
+
unless (
$ovt < 2
&& ( $nesting_depth_to_go[ $iend_next + 1 ] !=
# and aligner will validate it if it sees the closing paren
# within 2 lines.
my $valid_flag = $ovt;
- @{$rvertical_tightness_flags} =
- ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
+
+ $vt_type = 1;
+ $vt_opening_flag = $ovt;
+ $vt_seqno = $type_sequence_to_go[$iend];
+ $vt_valid_flag = $valid_flag;
}
}
my $ovt = $opening_vertical_tightness{$token_next};
my $cvt = $closing_vertical_tightness{$token_next};
+ # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
+ # See similar patch above for $ovt.
+ my $seqno = $type_sequence_to_go[$ibeg_next];
+ if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
+ $cvt = 0;
+ }
+
# Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
# otherwise. Added for rt136417.
if ( $cvt == 3 ) {
$cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
}
+ # The unusual combination -pvtc=2 -dws -naws can be unstable.
+ # This fixes b1282, b1283. This can be moved to set_options.
+ if ( $cvt == 2
+ && $rOpts_delete_old_whitespace
+ && !$rOpts_add_whitespace )
+ {
+ $cvt = 1;
+ }
+
if (
# Never append a trailing line like ')->pack(' because it
# allow closing up 2-line method calls
|| ( $rOpts_line_up_parentheses
- && $token_next eq ')' )
+ && $token_next eq ')'
+ && $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$ibeg_next] } )
)
)
)
if ($ok) {
my $valid_flag = $cvt;
- @{$rvertical_tightness_flags} = (
- 2,
- $tightness{$token_next} == 2 ? 0 : 1,
- $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
+ my $min_lines = 0;
+ my $max_lines = 0;
+
+ # Fix for b1187 and b1188: Blinking can occur if we allow
+ # welded tokens to re-form into one-line blocks during
+ # vertical alignment when -lp used. So for this case we
+ # set the minimum number of lines to be 1 instead of 0.
+ # The maximum should be 1 if -vtc is not used. If -vtc is
+ # used, we turn the valid
+ # flag off and set the maximum to 0. This is equivalent to
+ # using a large number.
+ my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
+ if ( $rOpts_line_up_parentheses
+ && $total_weld_count
+ && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
+ && $self->is_welded_at_seqno($seqno_ibeg_next) )
+ {
+ $min_lines = 1;
+ $max_lines = $cvt ? 0 : 1;
+ $valid_flag = 0;
+ }
+
+ $vt_type = 2;
+ $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
+ $vt_seqno = $type_sequence_to_go[$ibeg_next];
+ $vt_valid_flag = $valid_flag;
+ $vt_min_lines = $min_lines;
+ $vt_max_lines = $max_lines;
}
}
}
&& $token_end ne '||' && $token_end ne '&&'
# Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
- && !( $token_end eq '=' && $rOpts_line_up_parentheses )
+ && !(
+ $token_end eq '='
+ && $rOpts_line_up_parentheses
+ && $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$ibeg_next] }
+ )
# looks bad if we align vertically with the wrong container
&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
)
{
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+
+ $vt_type = 2;
+ $vt_closing_flag = $spaces;
+ $vt_seqno = $type_sequence_to_go[$ibeg_next];
+ $vt_valid_flag = 1;
}
#--------------------------------------------------------------
if ( $is_closing_token{$token_end}
&& $is_closing_token{$token_beg_next} )
{
+
+ # avoid instability of combo -bom and -sct; b1179
+ my $seq_next = $type_sequence_to_go[$ibeg_next];
$stackable = $stack_closing_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
+ unless ( $block_type_to_go[$ibeg_next]
+ || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
}
elsif ($is_opening_token{$token_end}
&& $is_opening_token{$token_beg_next} )
&& $types_to_go[$iend_next] eq '#' )
)
{
- my $valid_flag = 1;
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} = (
- 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
+
+ $vt_type = 2;
+ $vt_closing_flag = $spaces;
+ $vt_seqno = $type_sequence_to_go[$ibeg_next];
+ $vt_valid_flag = 1;
+
}
}
}
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/ )
{
- @{$rvertical_tightness_flags} =
- ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
+ $vt_type = 3;
+ $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
+ $vt_seqno = 0;
+ $vt_valid_flag = 1;
}
#--------------------------------------------------------------
&& ( !$closing_side_comment || $n < $n_last_line ) )
{
my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
- @{$rvertical_tightness_flags} =
- ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
+
+ $vt_type = 4;
+ $vt_closing_flag = $spaces;
+ $vt_seqno = $type_sequence_to_go[$iend];
+ $vt_valid_flag = 1;
+
}
- # pack in the sequence numbers of the ends of this line
- my $seqno_beg = $type_sequence_to_go[$ibeg];
- if ( !$seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
- $seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
+ # get the sequence numbers of the ends of this line
+ $vt_seqno_beg = $type_sequence_to_go[$ibeg];
+ if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
+ $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
}
- my $seqno_end = $type_sequence_to_go[$iend];
- if ( !$seqno_end && $types_to_go[$iend] eq 'q' ) {
- $seqno_end = $self->get_seqno( $iend, $ending_in_quote );
+
+ $vt_seqno_end = $type_sequence_to_go[$iend];
+ if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
+ $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
}
- $rvertical_tightness_flags->[4] = $seqno_beg;
- $rvertical_tightness_flags->[5] = $seqno_end;
- return $rvertical_tightness_flags;
+
+ RETURN:
+
+ my $rvertical_tightness_flags = {
+ _vt_type => $vt_type,
+ _vt_opening_flag => $vt_opening_flag,
+ _vt_closing_flag => $vt_closing_flag,
+ _vt_seqno => $vt_seqno,
+ _vt_valid_flag => $vt_valid_flag,
+ _vt_seqno_beg => $vt_seqno_beg,
+ _vt_seqno_end => $vt_seqno_end,
+ _vt_min_lines => $vt_min_lines,
+ _vt_max_lines => $vt_max_lines,
+ };
+
+ return ($rvertical_tightness_flags);
}
##########################################################
sub add_closing_side_comment {
- my $self = shift;
- my $rLL = $self->[_rLL_];
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $rLL = $self->[_rLL_];
# add closing side comments after closing block braces if -csc used
my ( $closing_side_comment, $cscw_block_comment );
if (
$terminal_type eq '}'
+ # Fix 1 for c091, this is only for blocks
+ && $block_type_to_go[$i_terminal]
+
# ..and either
&& (
if ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
+ # Since the line breaks have already been set, we have
+ # to remove the token from the _to_go array and also
+ # from the line range (this fixes issue c081).
+ # Note that we can only get here if -cscw has been set
+ # because otherwise the old comment is already deleted.
$token = undef;
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq '#' );
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq 'b' );
+ my $ibeg = $ri_first->[-1];
+ my $iend = $ri_last->[-1];
+ if ( $iend > $ibeg
+ && $iend == $max_index_to_go
+ && $types_to_go[$max_index_to_go] eq '#' )
+ {
+ $iend--;
+ $max_index_to_go--;
+ if ( $iend > $ibeg
+ && $types_to_go[$max_index_to_go] eq 'b' )
+ {
+ $iend--;
+ $max_index_to_go--;
+ }
+ $ri_last->[-1] = $iend;
+ }
}
}
}
# switch to the new csc (unless we deleted it!)
if ($token) {
- $tokens_to_go[$max_index_to_go] = $token;
+
+ my $len_tok = length($token); # NOTE: length no longer important
+ my $added_len =
+ $len_tok - $token_lengths_to_go[$max_index_to_go];
+
+ $tokens_to_go[$max_index_to_go] = $token;
+ $token_lengths_to_go[$max_index_to_go] = $len_tok;
my $K = $K_to_go[$max_index_to_go];
- $rLL->[$K]->[_TOKEN_] = $token;
- $rLL->[$K]->[_TOKEN_LENGTH_] =
- length($token); # NOTE: length no longer important
+ $rLL->[$K]->[_TOKEN_] = $token;
+ $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
}
}
$file_writer_object->decrement_output_line_number()
; # fix up line number since it was incremented
we_are_at_the_last_line();
+
+ my $max_depth = $self->[_maximum_BLOCK_level_];
+ my $at_line = $self->[_maximum_BLOCK_level_at_line_];
+ write_logfile_entry(
+"Maximum leading structural depth is $max_depth in input at line $at_line\n"
+ );
+
my $added_semicolon_count = $self->[_added_semicolon_count_];
my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];