# The Perl::Tidy::Formatter package adds indentation, whitespace, and
# line breaks to the token stream
#
-# WARNING: This is not a real class for speed reasons. Only one
-# Formatter may be used.
-#
#####################################################################
+# Index...
+# CODE SECTION 1: Preliminary code, global definitions and sub new
+# sub new
+# CODE SECTION 2: Some Basic Utilities
+# CODE SECTION 3: Check and process options
+# sub check_options
+# CODE SECTION 4: Receive lines from the tokenizer
+# sub write_line
+# CODE SECTION 5: Pre-process the entire file
+# sub finish_formatting
+# CODE SECTION 6: Process line-by-line
+# sub process_all_lines
+# CODE SECTION 7: Process lines of code
+# process_line_of_CODE
+# CODE SECTION 8: Utilities for setting breakpoints
+# sub set_forced_breakpoint
+# CODE SECTION 9: Process batches of code
+# sub grind_batch_of_CODE
+# CODE SECTION 10: Code to break long statments
+# sub set_continuation_breaks
+# CODE SECTION 11: Code to break long lists
+# sub scan_list
+# CODE SECTION 12: Code for setting indentation
+# CODE SECTION 13: Preparing batches for vertical alignment
+# sub send_lines_to_vertical_aligner
+# CODE SECTION 14: Code for creating closing side comments
+# sub add_closing_side_comment
+# CODE SECTION 15: Summarize
+# sub wrapup
+
+#######################################################################
+# CODE SECTION 1: Preliminary code and global definitions up to sub new
+#######################################################################
+
package Perl::Tidy::Formatter;
use strict;
use warnings;
+
+# this can be turned on for extra checking during development
+use constant DEVEL_MODE => 0;
+
+{ #<<< A non-indenting brace to contain all lexical variables
+
use Carp;
-our $VERSION = '20200110';
+our $VERSION = '20210717';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
+sub AUTOLOAD {
+
+ # Catch any undefined sub calls so that we are sure to get
+ # some diagnostic information. This sub should never be called
+ # except for a programming error.
+ our $AUTOLOAD;
+ return if ( $AUTOLOAD =~ /\bDESTROY$/ );
+ my ( $pkg, $fname, $lno ) = caller();
+ my $my_package = __PACKAGE__;
+ print STDERR <<EOM;
+======================================================================
+Error detected in package '$my_package', version $VERSION
+Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
+Called from package: '$pkg'
+Called from File '$fname' at line '$lno'
+This error is probably due to a recent programming change
+======================================================================
+EOM
+ exit 1;
+}
+
+sub DESTROY {
+ my $self = shift;
+ $self->_decrement_count();
+ return;
+}
+
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
return;
}
+sub Fault {
+ my ($msg) = @_;
+
+ # This routine is called for errors that really should not occur
+ # except if there has been a bug introduced by a recent program change.
+ # Please add comments at calls to Fault to explain why the call
+ # should not occur, and where to look to fix it.
+ my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+ my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+ my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $input_stream_name = get_input_stream_name();
+
+ Die(<<EOM);
+==============================================================================
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
+==============================================================================
+EOM
+
+ # We shouldn't get here, but this return is to keep Perl-Critic from
+ # complaining.
+ return;
+}
+
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
croak "unexpected return from Perl::Tidy::Exit";
}
+# Global variables ...
+my (
+
+ ##################################################################
+ # Section 1: Global variables which are either always constant or
+ # are constant after being configured by user-supplied
+ # parameters. They remain constant as a file is being processed.
+ ##################################################################
+
+ # user parameters and shortcuts
+ $rOpts,
+ $rOpts_closing_side_comment_maximum_text,
+ $rOpts_continuation_indentation,
+ $rOpts_indent_columns,
+ $rOpts_line_up_parentheses,
+ $rOpts_maximum_line_length,
+ $rOpts_variable_maximum_line_length,
+ $rOpts_block_brace_tightness,
+ $rOpts_block_brace_vertical_tightness,
+ $rOpts_stack_closing_block_brace,
+ $rOpts_maximum_consecutive_blank_lines,
+
+ $rOpts_recombine,
+ $rOpts_add_newlines,
+ $rOpts_break_at_old_comma_breakpoints,
+ $rOpts_ignore_old_breakpoints,
+
+ $rOpts_keep_interior_semicolons,
+ $rOpts_comma_arrow_breakpoints,
+ $rOpts_maximum_fields_per_table,
+ $rOpts_one_line_block_semicolons,
+ $rOpts_break_at_old_semicolon_breakpoints,
+
+ $rOpts_tee_side_comments,
+ $rOpts_tee_block_comments,
+ $rOpts_tee_pod,
+ $rOpts_delete_side_comments,
+ $rOpts_delete_closing_side_comments,
+ $rOpts_format_skipping,
+ $rOpts_indent_only,
+ $rOpts_static_block_comments,
+
+ $rOpts_add_whitespace,
+ $rOpts_delete_old_whitespace,
+ $rOpts_freeze_whitespace,
+ $rOpts_function_paren_vertical_alignment,
+ $rOpts_whitespace_cycle,
+ $rOpts_ignore_side_comment_lengths,
+
+ $rOpts_break_at_old_attribute_breakpoints,
+ $rOpts_break_at_old_keyword_breakpoints,
+ $rOpts_break_at_old_logical_breakpoints,
+ $rOpts_break_at_old_ternary_breakpoints,
+ $rOpts_short_concatenation_item_length,
+ $rOpts_closing_side_comment_else_flag,
+ $rOpts_fuzzy_line_length,
+
+ # Static hashes initialized in a BEGIN block
+ %is_assignment,
+ %is_keyword_returning_list,
+ %is_if_unless_and_or_last_next_redo_return,
+ %is_if_elsif_else_unless_while_until_for_foreach,
+ %is_if_unless_while_until_for,
+ %is_last_next_redo_return,
+ %is_sort_map_grep,
+ %is_sort_map_grep_eval,
+ %is_if_unless,
+ %is_and_or,
+ %is_chain_operator,
+ %is_block_without_semicolon,
+ %ok_to_add_semicolon_for_block_type,
+ %is_opening_type,
+ %is_closing_type,
+ %is_opening_token,
+ %is_closing_token,
+ %is_equal_or_fat_comma,
+ %is_block_with_ci,
+ %is_counted_type,
+ %is_opening_sequence_token,
+ %is_closing_sequence_token,
+ %is_container_label_type,
+
+ @all_operators,
+
+ # Initialized in check_options. These are constants and could
+ # just as well be initialized in a BEGIN block.
+ %is_do_follower,
+ %is_if_brace_follower,
+ %is_else_brace_follower,
+ %is_anon_sub_brace_follower,
+ %is_anon_sub_1_brace_follower,
+ %is_other_brace_follower,
+
+ # Initialized in sub initialize_whitespace_hashes;
+ # Some can be modified according to user parameters.
+ %binary_ws_rules,
+ %want_left_space,
+ %want_right_space,
+
+ # Configured in sub initialize_bond_strength_hashes
+ %right_bond_strength,
+ %left_bond_strength,
+
+ # Hashes for -kbb=s and -kba=s
+ %keep_break_before_type,
+ %keep_break_after_type,
+
+ # Initialized in check_options, modified by prepare_cuddled_block_types:
+ %want_one_line_block,
+ %is_braces_left_exclude_block,
+
+ # Initialized in sub prepare_cuddled_block_types
+ $rcuddled_block_types,
+
+ # Initialized and configured in check_optioms
+ %outdent_keyword,
+ %keyword_paren_inner_tightness,
+
+ %want_break_before,
+
+ %break_before_container_types,
+ %container_indentation_options,
+
+ %space_after_keyword,
+
+ %tightness,
+ %matching_token,
+
+ %opening_vertical_tightness,
+ %closing_vertical_tightness,
+ %closing_token_indentation,
+ $some_closing_token_indentation,
+
+ %opening_token_right,
+ %stack_opening_token,
+ %stack_closing_token,
+
+ %weld_nested_exclusion_rules,
+ %line_up_parentheses_exclusion_rules,
+
+ # regex patterns for text identification.
+ # Most are initialized in a sub make_**_pattern during configuration.
+ # Most can be configured by user parameters.
+ $SUB_PATTERN,
+ $ASUB_PATTERN,
+ $ANYSUB_PATTERN,
+ $static_block_comment_pattern,
+ $static_side_comment_pattern,
+ $format_skipping_pattern_begin,
+ $format_skipping_pattern_end,
+ $non_indenting_brace_pattern,
+ $bli_pattern,
+ $block_brace_vertical_tightness_pattern,
+ $blank_lines_after_opening_block_pattern,
+ $blank_lines_before_closing_block_pattern,
+ $keyword_group_list_pattern,
+ $keyword_group_list_comment_pattern,
+ $closing_side_comment_prefix_pattern,
+ $closing_side_comment_list_pattern,
+
+ # Table to efficiently find indentation and max line length
+ # from level.
+ @maximum_line_length_at_level,
+ @maximum_text_length_at_level,
+
+ # Total number of sequence items in a weld, for quick checks
+ $total_weld_count,
+
+ #########################################################
+ # Section 2: Work arrays for the current batch of tokens.
+ #########################################################
+
+ # These are re-initialized for each batch of code
+ # in sub initialize_batch_variables.
+ $max_index_to_go,
+ @block_type_to_go,
+ @type_sequence_to_go,
+ @bond_strength_to_go,
+ @forced_breakpoint_to_go,
+ @token_lengths_to_go,
+ @summed_lengths_to_go,
+ @levels_to_go,
+ @leading_spaces_to_go,
+ @reduced_spaces_to_go,
+ @mate_index_to_go,
+ @ci_levels_to_go,
+ @nesting_depth_to_go,
+ @nobreak_to_go,
+ @old_breakpoint_to_go,
+ @tokens_to_go,
+ @K_to_go,
+ @types_to_go,
+ @inext_to_go,
+ @iprev_to_go,
+ @parent_seqno_to_go,
+
+);
+
BEGIN {
+ # Initialize constants...
+
+ # Array index names for token variables
+ my $i = 0;
+ use constant {
+ _BLOCK_TYPE_ => $i++,
+ _CI_LEVEL_ => $i++,
+ _CUMULATIVE_LENGTH_ => $i++,
+ _LINE_INDEX_ => $i++,
+ _KNEXT_SEQ_ITEM_ => $i++,
+ _LEVEL_ => $i++,
+ _SLEVEL_ => $i++,
+ _TOKEN_ => $i++,
+ _TOKEN_LENGTH_ => $i++,
+ _TYPE_ => $i++,
+ _TYPE_SEQUENCE_ => $i++,
+
+ # Number of token variables; must be last in list:
+ _NVARS => $i++,
+ };
+
+ # Array index names for $self (which is an array ref)
+ $i = 0;
+ use constant {
+ _rlines_ => $i++,
+ _rlines_new_ => $i++,
+ _rLL_ => $i++,
+ _Klimit_ => $i++,
+ _K_opening_container_ => $i++,
+ _K_closing_container_ => $i++,
+ _K_opening_ternary_ => $i++,
+ _K_closing_ternary_ => $i++,
+ _K_first_seq_item_ => $i++,
+ _rK_phantom_semicolons_ => $i++,
+ _rtype_count_by_seqno_ => $i++,
+ _ris_function_call_paren_ => $i++,
+ _rlec_count_by_seqno_ => $i++,
+ _ris_broken_container_ => $i++,
+ _ris_permanently_broken_ => $i++,
+ _rhas_list_ => $i++,
+ _rhas_broken_list_ => $i++,
+ _rhas_broken_list_with_lec_ => $i++,
+ _rhas_code_block_ => $i++,
+ _rhas_broken_code_block_ => $i++,
+ _rhas_ternary_ => $i++,
+ _ris_excluded_lp_container_ => $i++,
+ _rwant_reduced_ci_ => $i++,
+ _rno_xci_by_seqno_ => $i++,
+ _ris_bli_container_ => $i++,
+ _rparent_of_seqno_ => $i++,
+ _rchildren_of_seqno_ => $i++,
+ _ris_list_by_seqno_ => $i++,
+ _rbreak_container_ => $i++,
+ _rshort_nested_ => $i++,
+ _length_function_ => $i++,
+ _is_encoded_data_ => $i++,
+ _fh_tee_ => $i++,
+ _sink_object_ => $i++,
+ _file_writer_object_ => $i++,
+ _vertical_aligner_object_ => $i++,
+ _logger_object_ => $i++,
+ _radjusted_levels_ => $i++,
+ _this_batch_ => $i++,
+
+ _last_output_short_opening_token_ => $i++,
+
+ _last_line_leading_type_ => $i++,
+ _last_line_leading_level_ => $i++,
+ _last_last_line_leading_level_ => $i++,
+
+ _added_semicolon_count_ => $i++,
+ _first_added_semicolon_at_ => $i++,
+ _last_added_semicolon_at_ => $i++,
+
+ _deleted_semicolon_count_ => $i++,
+ _first_deleted_semicolon_at_ => $i++,
+ _last_deleted_semicolon_at_ => $i++,
+
+ _embedded_tab_count_ => $i++,
+ _first_embedded_tab_at_ => $i++,
+ _last_embedded_tab_at_ => $i++,
+
+ _first_tabbing_disagreement_ => $i++,
+ _last_tabbing_disagreement_ => $i++,
+ _tabbing_disagreement_count_ => $i++,
+ _in_tabbing_disagreement_ => $i++,
+ _first_brace_tabbing_disagreement_ => $i++,
+ _in_brace_tabbing_disagreement_ => $i++,
+
+ _saw_VERSION_in_this_file_ => $i++,
+ _saw_END_or_DATA_ => $i++,
+
+ _rK_weld_left_ => $i++,
+ _rK_weld_right_ => $i++,
+ _rweld_len_right_at_K_ => $i++,
+
+ _rspecial_side_comment_type_ => $i++,
+
+ _rseqno_controlling_my_ci_ => $i++,
+ _ris_seqno_controlling_ci_ => $i++,
+ _save_logfile_ => $i++,
+ _maximum_level_ => $i++,
+
+ _rKrange_code_without_comments_ => $i++,
+ _rbreak_before_Kfirst_ => $i++,
+ _rbreak_after_Klast_ => $i++,
+ _rwant_container_open_ => $i++,
+ _converged_ => $i++,
+
+ _rstarting_multiline_qw_seqno_by_K_ => $i++,
+ _rending_multiline_qw_seqno_by_K_ => $i++,
+ _rKrange_multiline_qw_by_seqno_ => $i++,
+ _rmultiline_qw_has_extra_level_ => $i++,
+ _rbreak_before_container_by_seqno_ => $i++,
+ _ris_essential_old_breakpoint_ => $i++,
+ _roverride_cab3_ => $i++,
+ _ris_assigned_structure_ => $i++,
+ };
+
+ # Array index names for _this_batch_ (in above list)
+ # So _this_batch_ is a sub-array of $self for
+ # holding the batches of tokens being processed.
+ $i = 0;
+ use constant {
+ _starting_in_quote_ => $i++,
+ _ending_in_quote_ => $i++,
+ _is_static_block_comment_ => $i++,
+ _rlines_K_ => $i++,
+ _do_not_pad_ => $i++,
+ _ibeg0_ => $i++,
+ _peak_batch_size_ => $i++,
+ _max_index_to_go_ => $i++,
+ _rK_to_go_ => $i++,
+ _batch_count_ => $i++,
+ _rix_seqno_controlling_ci_ => $i++,
+ _batch_CODE_type_ => $i++,
+ };
+
+ # Sequence number assigned to the root of sequence tree.
+ # The minimum of the actual sequences numbers is 4, so we can use 1
+ use constant SEQ_ROOT => 1;
+
# Codes for insertion and deletion of blanks
use constant DELETE => 0;
use constant STABLE => 1;
use constant INSERT => 2;
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
- use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
- use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
- use constant FORMATTER_DEBUG_FLAG_BOND => 0;
- use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_CI => 0;
- use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
- use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
- use constant FORMATTER_DEBUG_FLAG_LIST => 0;
- use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
- use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
- use constant FORMATTER_DEBUG_FLAG_STORE => 0;
- use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
- use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
-
- my $debug_warning = sub {
- print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
- };
-
- FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
- FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
- FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
- FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
- FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
- FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
- FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
- FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
- FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
- FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
- FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
- FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
- FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
- FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
-}
+ # whitespace codes
+ use constant WS_YES => 1;
+ use constant WS_OPTIONAL => 0;
+ use constant WS_NO => -1;
-use vars qw{
-
- @gnu_stack
- $max_gnu_stack_index
- $gnu_position_predictor
- $line_start_index_to_go
- $last_indentation_written
- $last_unadjusted_indentation
- $last_leading_token
- $last_output_short_opening_token
- $peak_batch_size
-
- $saw_VERSION_in_this_file
- $saw_END_or_DATA_
-
- @gnu_item_list
- $max_gnu_item_index
- $gnu_sequence_number
- $last_output_indentation
- %last_gnu_equals
- %gnu_comma_count
- %gnu_arrow_count
-
- @block_type_to_go
- @type_sequence_to_go
- @container_environment_to_go
- @bond_strength_to_go
- @forced_breakpoint_to_go
- @token_lengths_to_go
- @summed_lengths_to_go
- @levels_to_go
- @leading_spaces_to_go
- @reduced_spaces_to_go
- @mate_index_to_go
- @ci_levels_to_go
- @nesting_depth_to_go
- @nobreak_to_go
- @old_breakpoint_to_go
- @tokens_to_go
- @K_to_go
- @types_to_go
- @inext_to_go
- @iprev_to_go
-
- %saved_opening_indentation
-
- $max_index_to_go
- $comma_count_in_batch
- $last_nonblank_index_to_go
- $last_nonblank_type_to_go
- $last_nonblank_token_to_go
- $last_last_nonblank_index_to_go
- $last_last_nonblank_type_to_go
- $last_last_nonblank_token_to_go
- @nonblank_lines_at_depth
- $starting_in_quote
- $ending_in_quote
- @whitespace_level_stack
- $whitespace_last_level
-
- $format_skipping_pattern_begin
- $format_skipping_pattern_end
-
- $forced_breakpoint_count
- $forced_breakpoint_undo_count
- @forced_breakpoint_undo_stack
- %postponed_breakpoint
-
- $tabbing
- $embedded_tab_count
- $first_embedded_tab_at
- $last_embedded_tab_at
- $deleted_semicolon_count
- $first_deleted_semicolon_at
- $last_deleted_semicolon_at
- $added_semicolon_count
- $first_added_semicolon_at
- $last_added_semicolon_at
- $first_tabbing_disagreement
- $last_tabbing_disagreement
- $in_tabbing_disagreement
- $tabbing_disagreement_count
- $input_line_tabbing
-
- $last_line_leading_type
- $last_line_leading_level
- $last_last_line_leading_level
-
- %block_leading_text
- %block_opening_line_number
- $csc_new_statement_ok
- $csc_last_label
- %csc_block_label
- $accumulating_text_for_block
- $leading_block_text
- $rleading_block_if_elsif_text
- $leading_block_text_level
- $leading_block_text_length_exceeded
- $leading_block_text_line_length
- $leading_block_text_line_number
- $closing_side_comment_prefix_pattern
- $closing_side_comment_list_pattern
-
- $blank_lines_after_opening_block_pattern
- $blank_lines_before_closing_block_pattern
-
- $last_nonblank_token
- $last_nonblank_type
- $last_last_nonblank_token
- $last_last_nonblank_type
- $last_nonblank_block_type
- $last_output_level
- %is_do_follower
- %is_if_brace_follower
- %space_after_keyword
- $rbrace_follower
- $looking_for_else
- %is_last_next_redo_return
- %is_other_brace_follower
- %is_else_brace_follower
- %is_anon_sub_brace_follower
- %is_anon_sub_1_brace_follower
- %is_sort_map_grep
- %is_sort_map_grep_eval
- %want_one_line_block
- %is_sort_map_grep_eval_do
- %is_block_without_semicolon
- %is_if_unless
- %is_and_or
- %is_assignment
- %is_chain_operator
- %is_if_unless_and_or_last_next_redo_return
- %ok_to_add_semicolon_for_block_type
-
- @has_broken_sublist
- @dont_align
- @want_comma_break
-
- $is_static_block_comment
- $index_start_one_line_block
- $semicolons_before_block_self_destruct
- $index_max_forced_break
- $input_line_number
- $diagnostics_object
- $vertical_aligner_object
- $logger_object
- $file_writer_object
- $formatter_self
- @ci_stack
- %want_break_before
- %outdent_keyword
- $static_block_comment_pattern
- $static_side_comment_pattern
- %opening_vertical_tightness
- %closing_vertical_tightness
- %closing_token_indentation
- $some_closing_token_indentation
-
- %opening_token_right
- %stack_opening_token
- %stack_closing_token
-
- $block_brace_vertical_tightness_pattern
- $keyword_group_list_pattern
- $keyword_group_list_comment_pattern
-
- $rOpts_add_newlines
- $rOpts_add_whitespace
- $rOpts_block_brace_tightness
- $rOpts_block_brace_vertical_tightness
- $rOpts_brace_left_and_indent
- $rOpts_comma_arrow_breakpoints
- $rOpts_break_at_old_keyword_breakpoints
- $rOpts_break_at_old_comma_breakpoints
- $rOpts_break_at_old_logical_breakpoints
- $rOpts_break_at_old_method_breakpoints
- $rOpts_break_at_old_ternary_breakpoints
- $rOpts_break_at_old_attribute_breakpoints
- $rOpts_closing_side_comment_else_flag
- $rOpts_closing_side_comment_maximum_text
- $rOpts_continuation_indentation
- $rOpts_delete_old_whitespace
- $rOpts_fuzzy_line_length
- $rOpts_indent_columns
- $rOpts_line_up_parentheses
- $rOpts_maximum_fields_per_table
- $rOpts_maximum_line_length
- $rOpts_variable_maximum_line_length
- $rOpts_short_concatenation_item_length
- $rOpts_keep_old_blank_lines
- $rOpts_ignore_old_breakpoints
- $rOpts_format_skipping
- $rOpts_space_function_paren
- $rOpts_space_keyword_paren
- $rOpts_keep_interior_semicolons
- $rOpts_ignore_side_comment_lengths
- $rOpts_stack_closing_block_brace
- $rOpts_space_backslash_quote
- $rOpts_whitespace_cycle
- $rOpts_one_line_block_semicolons
-
- %is_opening_type
- %is_closing_type
- %is_keyword_returning_list
- %tightness
- %matching_token
- $rOpts
- %right_bond_strength
- %left_bond_strength
- %binary_ws_rules
- %want_left_space
- %want_right_space
- %is_digraph
- %is_trigraph
- $bli_pattern
- $bli_list_string
- %is_closing_type
- %is_opening_type
- %is_closing_token
- %is_opening_token
-
- %weld_len_left_closing
- %weld_len_right_closing
- %weld_len_left_opening
- %weld_len_right_opening
-
- $rcuddled_block_types
-
- $SUB_PATTERN
- $ASUB_PATTERN
-
- $NVARS
-
-};
+ # Token bond strengths.
+ use constant NO_BREAK => 10000;
+ use constant VERY_STRONG => 100;
+ use constant STRONG => 2.1;
+ use constant NOMINAL => 1.1;
+ use constant WEAK => 0.8;
+ use constant VERY_WEAK => 0.55;
-BEGIN {
+ # values for testing indexes in output array
+ use constant UNDEFINED_INDEX => -1;
- # Array index names for token variables
- my $i = 0;
- use constant {
- _BLOCK_TYPE_ => $i++,
- _CI_LEVEL_ => $i++,
- _CONTAINER_ENVIRONMENT_ => $i++,
- _CONTAINER_TYPE_ => $i++,
- _CUMULATIVE_LENGTH_ => $i++,
- _LINE_INDEX_ => $i++,
- _KNEXT_SEQ_ITEM_ => $i++,
- _LEVEL_ => $i++,
- _LEVEL_TRUE_ => $i++,
- _SLEVEL_ => $i++,
- _TOKEN_ => $i++,
- _TYPE_ => $i++,
- _TYPE_SEQUENCE_ => $i++,
- };
- $NVARS = 1 + _TYPE_SEQUENCE_;
+ # Maximum number of little messages; probably need not be changed.
+ use constant MAX_NAG_MESSAGES => 6;
- # default list of block types for which -bli would apply
- $bli_list_string = 'if else elsif unless while for foreach do : sub';
+ # increment between sequence numbers for each type
+ # For example, ?: pairs might have numbers 7,11,15,...
+ use constant TYPE_SEQUENCE_INCREMENT => 4;
+ # Initialize constant hashes ...
my @q;
- @q = qw(
- .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x=
- );
- @is_digraph{@q} = (1) x scalar(@q);
-
- @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
- @is_trigraph{@q} = (1) x scalar(@q);
-
@q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
+ # These block types may have text between the keyword and opening
+ # curly. Note: 'else' does not, but must be included to allow trailing
+ # if/elsif text to be appended.
+ # patch for SWITCH/CASE: added 'case' and 'when'
+ @q = qw(if elsif else unless while until for foreach case when catch);
+ @is_if_elsif_else_unless_while_until_for_foreach{@q} =
+ (1) x scalar(@q);
+
+ @q = qw(if unless while until for);
+ @is_if_unless_while_until_for{@q} =
+ (1) x scalar(@q);
+
@q = qw(last next redo return);
@is_last_next_redo_return{@q} = (1) x scalar(@q);
@q = qw(sort map grep eval);
@is_sort_map_grep_eval{@q} = (1) x scalar(@q);
- @q = qw(sort map grep eval do);
- @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
-
@q = qw(if unless);
@is_if_unless{@q} = (1) x scalar(@q);
@q = qw(&& || and or : ? . + - * /);
@is_chain_operator{@q} = (1) x scalar(@q);
+ # Operators that the user can request break before or after.
+ # Note that some are keywords
+ @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
+
# We can remove semicolons after blocks preceded by these keywords
@q =
qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
@q = qw< } ) ] >;
@is_closing_token{@q} = (1) x scalar(@q);
- # Patterns for standardizing matches to block types for regular subs and
- # anonymous subs. Examples
- # 'sub process' is a named sub
- # 'sub ::m' is a named sub
- # 'sub' is an anonymous sub
- # 'sub:' is a label, not a sub
- # 'substr' is a keyword
- $SUB_PATTERN = '^sub\s+(::|\w)';
- $ASUB_PATTERN = '^sub$';
-}
+ @q = qw< { ( [ ? >;
+ @is_opening_sequence_token{@q} = (1) x scalar(@q);
-# whitespace codes
-use constant WS_YES => 1;
-use constant WS_OPTIONAL => 0;
-use constant WS_NO => -1;
+ @q = qw< } ) ] : >;
+ @is_closing_sequence_token{@q} = (1) x scalar(@q);
-# Token bond strengths.
-use constant NO_BREAK => 10000;
-use constant VERY_STRONG => 100;
-use constant STRONG => 2.1;
-use constant NOMINAL => 1.1;
-use constant WEAK => 0.8;
-use constant VERY_WEAK => 0.55;
+ # a hash needed by sub scan_list for labeling containers
+ @q = qw( k => && || ? : . );
+ @is_container_label_type{@q} = (1) x scalar(@q);
-# values for testing indexes in output array
-use constant UNDEFINED_INDEX => -1;
+ # Braces -bbht etc must follow these. Note: experimentation with
+ # including a simple comma shows that it adds little and can lead
+ # to poor formatting in complex lists.
+ @q = qw( = => );
+ @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
-# Maximum number of little messages; probably need not be changed.
-use constant MAX_NAG_MESSAGES => 6;
+ @q = qw( => ; h f );
+ push @q, ',';
+ @is_counted_type{@q} = (1) x scalar(@q);
-# increment between sequence numbers for each type
-# For example, ?: pairs might have numbers 7,11,15,...
-use constant TYPE_SEQUENCE_INCREMENT => 4;
+ # These block types can take ci. This is used by the -xci option.
+ # Note that the 'sub' in this list is an anonymous sub. To be more correct
+ # we could remove sub and use ASUB pattern to also handle a
+ # prototype/signature. But that would slow things down and would probably
+ # never be useful.
+ @q = qw( do sub eval sort map grep );
+ @is_block_with_ci{@q} = (1) x scalar(@q);
-{
+}
+
+{ ## begin closure to count instanes
# methods to count instances
my $_count = 0;
sub get_count { return $_count; }
sub _increment_count { return ++$_count }
sub _decrement_count { return --$_count }
-}
-
-sub trim {
-
- # trim leading and trailing whitespace from a string
- my $str = shift;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return $str;
-}
-
-sub max {
- my @vals = @_;
- my $max = shift @vals;
- foreach my $val (@vals) {
- $max = ( $max < $val ) ? $val : $max;
- }
- return $max;
-}
-
-sub min {
- my @vals = @_;
- my $min = shift @vals;
- foreach my $val (@vals) {
- $min = ( $min > $val ) ? $val : $min;
- }
- return $min;
-}
-
-sub split_words {
-
- # given a string containing words separated by whitespace,
- # return the list of words
- my ($str) = @_;
- return unless $str;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return split( /\s+/, $str );
-}
-
-sub check_keys {
- my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
-
- # Check the keys of a hash:
- # $rtest = ref to hash to test
- # $rvalid = ref to hash with valid keys
-
- # $msg = a message to write in case of error
- # $exact_match defines the type of check:
- # = false: test hash must not have unknown key
- # = true: test hash must have exactly same keys as known hash
- my @unknown_keys =
- grep { !exists $rvalid->{$_} } keys %{$rtest};
- my @missing_keys =
- grep { !exists $rtest->{$_} } keys %{$rvalid};
- my $error = @unknown_keys;
- if ($exact_match) { $error ||= @missing_keys }
- if ($error) {
- local $" = ')(';
- my @expected_keys = sort keys %{$rvalid};
- @unknown_keys = sort @unknown_keys;
- Die(<<EOM);
-------------------------------------------------------------------------
-Program error detected checking hash keys
-Message is: '$msg'
-Expected keys: (@expected_keys)
-Unknown key(s): (@unknown_keys)
-Missing key(s): (@missing_keys)
-------------------------------------------------------------------------
-EOM
- }
- return;
-}
-
-# interface to Perl::Tidy::Logger routines
-sub warning {
- my ($msg) = @_;
- if ($logger_object) { $logger_object->warning($msg); }
- return;
-}
-
-sub complain {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->complain($msg);
- }
- return;
-}
-
-sub write_logfile_entry {
- my @msg = @_;
- if ($logger_object) {
- $logger_object->write_logfile_entry(@msg);
- }
- return;
-}
-
-sub black_box {
- my @msg = @_;
- if ($logger_object) { $logger_object->black_box(@msg); }
- return;
-}
-
-sub report_definite_bug {
- if ($logger_object) {
- $logger_object->report_definite_bug();
- }
- return;
-}
-
-sub get_saw_brace_error {
- if ($logger_object) {
- return $logger_object->get_saw_brace_error();
- }
- return;
-}
-
-sub we_are_at_the_last_line {
- if ($logger_object) {
- $logger_object->we_are_at_the_last_line();
- }
- return;
-}
-
-# interface to Perl::Tidy::Diagnostics routine
-sub write_diagnostics {
- my $msg = shift;
- if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
- return;
-}
-
-sub get_added_semicolon_count {
- my $self = shift;
- return $added_semicolon_count;
-}
-
-sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
-}
-
-sub get_output_line_number {
- return $vertical_aligner_object->get_output_line_number();
-}
+} ## end closure to count instanes
sub new {
sink_object => undef,
diagnostics_object => undef,
logger_object => undef,
+ length_function => sub { return length( $_[0] ) },
+ is_encoded_data => "",
+ fh_tee => undef,
);
my %args = ( %defaults, @args );
- $logger_object = $args{logger_object};
- $diagnostics_object = $args{diagnostics_object};
+ my $length_function = $args{length_function};
+ my $is_encoded_data = $args{is_encoded_data};
+ my $fh_tee = $args{fh_tee};
+ my $logger_object = $args{logger_object};
+ my $diagnostics_object = $args{diagnostics_object};
# we create another object with a get_line() and peek_ahead() method
my $sink_object = $args{sink_object};
- $file_writer_object =
+ my $file_writer_object =
Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
- # initialize the leading whitespace stack to negative levels
- # so that we can never run off the end of the stack
- $peak_batch_size = 0; # flag to determine if we have output code
- $gnu_position_predictor = 0; # where the current token is predicted to be
- $max_gnu_stack_index = 0;
- $max_gnu_item_index = -1;
- $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- $last_output_indentation = 0;
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
- $last_output_short_opening_token = 0;
-
- $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
- $saw_END_or_DATA_ = 0;
-
- @block_type_to_go = ();
- @type_sequence_to_go = ();
- @container_environment_to_go = ();
- @bond_strength_to_go = ();
- @forced_breakpoint_to_go = ();
- @summed_lengths_to_go = (); # line length to start of ith token
- @token_lengths_to_go = ();
- @levels_to_go = ();
- @mate_index_to_go = ();
- @ci_levels_to_go = ();
- @nesting_depth_to_go = (0);
- @nobreak_to_go = ();
- @old_breakpoint_to_go = ();
- @tokens_to_go = ();
- @K_to_go = ();
- @types_to_go = ();
- @leading_spaces_to_go = ();
- @reduced_spaces_to_go = ();
- @inext_to_go = ();
- @iprev_to_go = ();
-
- @whitespace_level_stack = ();
- $whitespace_last_level = -1;
-
- @dont_align = ();
- @has_broken_sublist = ();
- @want_comma_break = ();
-
- @ci_stack = ("");
- $first_tabbing_disagreement = 0;
- $last_tabbing_disagreement = 0;
- $tabbing_disagreement_count = 0;
- $in_tabbing_disagreement = 0;
- $input_line_tabbing = undef;
-
- $last_last_line_leading_level = 0;
- $last_line_leading_level = 0;
- $last_line_leading_type = '#';
-
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_nonblank_block_type = "";
- $last_output_level = 0;
- $looking_for_else = 0;
- $embedded_tab_count = 0;
- $first_embedded_tab_at = 0;
- $last_embedded_tab_at = 0;
- $deleted_semicolon_count = 0;
- $first_deleted_semicolon_at = 0;
- $last_deleted_semicolon_at = 0;
- $added_semicolon_count = 0;
- $first_added_semicolon_at = 0;
- $last_added_semicolon_at = 0;
- $is_static_block_comment = 0;
- %postponed_breakpoint = ();
-
- # variables for adding side comments
- %block_leading_text = ();
- %block_opening_line_number = ();
- $csc_new_statement_ok = 1;
- %csc_block_label = ();
-
- %saved_opening_indentation = ();
-
- reset_block_text_accumulator();
-
- prepare_for_new_input_lines();
-
- $vertical_aligner_object =
- Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
- $logger_object, $diagnostics_object );
+ # initialize closure variables...
+ set_logger_object($logger_object);
+ set_diagnostics_object($diagnostics_object);
+ initialize_gnu_vars();
+ initialize_csc_vars();
+ initialize_scan_list();
+ initialize_saved_opening_indentation();
+ initialize_undo_ci();
+ initialize_process_line_of_CODE();
+ initialize_grind_batch_of_CODE();
+ initialize_adjusted_indentation();
+ initialize_postponed_breakpoint();
+ initialize_batch_variables();
+ initialize_forced_breakpoint_vars();
+ initialize_gnu_batch_vars();
+ initialize_write_line();
+
+ my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
+ rOpts => $rOpts,
+ file_writer_object => $file_writer_object,
+ logger_object => $logger_object,
+ diagnostics_object => $diagnostics_object,
+ length_function => $length_function
+ );
+
+ write_logfile_entry("\nStarting tokenization pass...\n");
if ( $rOpts->{'entab-leading-whitespace'} ) {
write_logfile_entry(
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
- # This hash holds the main data structures for formatting
- # All hash keys must be defined here.
- $formatter_self = {
- rlines => [], # = ref to array of lines of the file
- rlines_new => [], # = ref to array of output lines
- # (FOR FUTURE DEVELOPMENT)
- rLL => [], # = ref to array with all tokens
- # in the file. LL originally meant
- # 'Linked List'. Linked lists were a
- # bad idea but LL is easy to type.
- Klimit => undef, # = maximum K index for rLL. This is
- # needed to catch any autovivification
- # problems.
- rnested_pairs => [], # for welding decisions
- K_opening_container => {}, # for quickly traversing structure
- K_closing_container => {}, # for quickly traversing structure
- K_opening_ternary => {}, # for quickly traversing structure
- K_closing_ternary => {}, # for quickly traversing structure
- rcontainer_map => {}, # hierarchical map of containers
- rK_phantom_semicolons =>
- undef, # for undoing phantom semicolons if iterating
- rpaired_to_inner_container => {},
- rbreak_container => {}, # prevent one-line blocks
- rshort_nested => {}, # blocks not forced open
- rvalid_self_keys => [], # for checking
- valign_batch_count => 0,
- };
- my @valid_keys = keys %{$formatter_self};
- $formatter_self->{rvalid_self_keys} = \@valid_keys;
-
- bless $formatter_self, $class;
+ # Initialize the $self array reference.
+ # To add an item, first add a constant index in the BEGIN block above.
+ my $self = [];
+
+ # Basic data structures...
+ $self->[_rlines_] = []; # = ref to array of lines of the file
+ $self->[_rlines_new_] = []; # = ref to array of output lines
+
+ # 'rLL' = reference to the liner array of all tokens in the file.
+ # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
+ # 'LL' stuck because it is easy to type.
+ $self->[_rLL_] = [];
+ $self->[_Klimit_] = undef; # = maximum K index for rLL.
+
+ # Arrays for quickly traversing the structures
+ $self->[_K_opening_container_] = {};
+ $self->[_K_closing_container_] = {};
+ $self->[_K_opening_ternary_] = {};
+ $self->[_K_closing_ternary_] = {};
+ $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
+
+ # Array of phantom semicolons, in case we ever need to undo them
+ $self->[_rK_phantom_semicolons_] = undef;
+
+ # Mostly list characteristics and processing flags
+ $self->[_rtype_count_by_seqno_] = {};
+ $self->[_ris_function_call_paren_] = {};
+ $self->[_rlec_count_by_seqno_] = {};
+ $self->[_ris_broken_container_] = {};
+ $self->[_ris_permanently_broken_] = {};
+ $self->[_rhas_list_] = {};
+ $self->[_rhas_broken_list_] = {};
+ $self->[_rhas_broken_list_with_lec_] = {};
+ $self->[_rhas_code_block_] = {};
+ $self->[_rhas_broken_code_block_] = {};
+ $self->[_rhas_ternary_] = {};
+ $self->[_ris_excluded_lp_container_] = {};
+ $self->[_rwant_reduced_ci_] = {};
+ $self->[_rno_xci_by_seqno_] = {};
+ $self->[_ris_bli_container_] = {};
+ $self->[_rparent_of_seqno_] = {};
+ $self->[_rchildren_of_seqno_] = {};
+ $self->[_ris_list_by_seqno_] = {};
+
+ $self->[_rbreak_container_] = {}; # prevent one-line blocks
+ $self->[_rshort_nested_] = {}; # blocks not forced open
+ $self->[_length_function_] = $length_function;
+ $self->[_is_encoded_data_] = $is_encoded_data;
+
+ # Some objects...
+ $self->[_fh_tee_] = $fh_tee;
+ $self->[_sink_object_] = $sink_object;
+ $self->[_file_writer_object_] = $file_writer_object;
+ $self->[_vertical_aligner_object_] = $vertical_aligner_object;
+ $self->[_logger_object_] = $logger_object;
+
+ # Reference to the batch being processed
+ $self->[_this_batch_] = [];
+
+ # Memory of processed text...
+ $self->[_last_last_line_leading_level_] = 0;
+ $self->[_last_line_leading_level_] = 0;
+ $self->[_last_line_leading_type_] = '#';
+ $self->[_last_output_short_opening_token_] = 0;
+ $self->[_added_semicolon_count_] = 0;
+ $self->[_first_added_semicolon_at_] = 0;
+ $self->[_last_added_semicolon_at_] = 0;
+ $self->[_deleted_semicolon_count_] = 0;
+ $self->[_first_deleted_semicolon_at_] = 0;
+ $self->[_last_deleted_semicolon_at_] = 0;
+ $self->[_embedded_tab_count_] = 0;
+ $self->[_first_embedded_tab_at_] = 0;
+ $self->[_last_embedded_tab_at_] = 0;
+ $self->[_first_tabbing_disagreement_] = 0;
+ $self->[_last_tabbing_disagreement_] = 0;
+ $self->[_tabbing_disagreement_count_] = 0;
+ $self->[_in_tabbing_disagreement_] = 0;
+ $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
+ $self->[_saw_END_or_DATA_] = 0;
+
+ # Hashes related to container welding...
+ $self->[_radjusted_levels_] = [];
+
+ # Weld data structures
+ $self->[_rK_weld_left_] = {};
+ $self->[_rK_weld_right_] = {};
+ $self->[_rweld_len_right_at_K_] = {};
+
+ # -xci stuff
+ $self->[_rseqno_controlling_my_ci_] = {};
+ $self->[_ris_seqno_controlling_ci_] = {};
+
+ $self->[_rspecial_side_comment_type_] = {};
+ $self->[_maximum_level_] = 0;
+
+ $self->[_rKrange_code_without_comments_] = [];
+ $self->[_rbreak_before_Kfirst_] = {};
+ $self->[_rbreak_after_Klast_] = {};
+ $self->[_rwant_container_open_] = {};
+ $self->[_converged_] = 0;
+
+ # qw stuff
+ $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
+ $self->[_rending_multiline_qw_seqno_by_K_] = {};
+ $self->[_rKrange_multiline_qw_by_seqno_] = {};
+ $self->[_rmultiline_qw_has_extra_level_] = {};
+
+ $self->[_rbreak_before_container_by_seqno_] = {};
+ $self->[_ris_essential_old_breakpoint_] = {};
+ $self->[_roverride_cab3_] = {};
+ $self->[_ris_assigned_structure_] = {};
+
+ # This flag will be updated later by a call to get_save_logfile()
+ $self->[_save_logfile_] = defined($logger_object);
+
+ bless $self, $class;
# Safety check..this is not a class yet
if ( _increment_count() > 1 ) {
confess
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
- return $formatter_self;
+ return $self;
}
-# Future routines for storing new lines
-sub push_line {
- my ( $self, $rline ) = @_;
+######################################
+# CODE SECTION 2: Some Basic Utilities
+######################################
- # my $rline = $rlines->[$index_old];
- # push @{$rlines_new}, $rline;
- return;
-}
+{ ## begin closure for logger routines
+ my $logger_object;
-sub push_old_line {
- my ( $self, $index_old ) = @_;
+ # Called once per file to initialize the logger object
+ sub set_logger_object {
+ $logger_object = shift;
+ return;
+ }
- # TODO: This will copy line with index $index_old to the new line array
- # my $rlines = $self->{rlines};
- # my $rline = $rlines->[$index_old];
- # $self->push_line($rline);
- return;
-}
+ sub get_logger_object {
+ return $logger_object;
+ }
-sub push_blank_line {
- my ($self) = @_;
+ sub get_input_stream_name {
+ my $input_stream_name = "";
+ if ($logger_object) {
+ $input_stream_name = $logger_object->get_input_stream_name();
+ }
+ return $input_stream_name;
+ }
- # my $rline = ...
- # $self->push_line($rline);
- return;
-}
+ # interface to Perl::Tidy::Logger routines
+ sub warning {
+ my ($msg) = @_;
+ if ($logger_object) { $logger_object->warning($msg); }
+ return;
+ }
-sub push_CODE_line {
- my ( $self, $Kmin, $Kmax ) = @_;
+ sub complain {
+ my ($msg) = @_;
+ if ($logger_object) {
+ $logger_object->complain($msg);
+ }
+ return;
+ }
- # TODO: This will store the values for one new line of CODE
- # CHECK TOKEN RANGE HERE
- # $self->push_line($rline);
- return;
-}
+ sub write_logfile_entry {
+ my @msg = @_;
+ if ($logger_object) {
+ $logger_object->write_logfile_entry(@msg);
+ }
+ return;
+ }
-sub increment_valign_batch_count {
- my ($self) = shift;
- return ++$self->{valign_batch_count};
-}
+ sub report_definite_bug {
+ if ($logger_object) {
+ $logger_object->report_definite_bug();
+ }
+ return;
+ }
-sub get_valign_batch_count {
- my ($self) = shift;
- return $self->{valign_batch_count};
-}
+ sub get_saw_brace_error {
+ if ($logger_object) {
+ return $logger_object->get_saw_brace_error();
+ }
+ return;
+ }
-sub Fault {
- my ($msg) = @_;
+ sub we_are_at_the_last_line {
+ if ($logger_object) {
+ $logger_object->we_are_at_the_last_line();
+ }
+ return;
+ }
- # This routine is called for errors that really should not occur
- # except if there has been a bug introduced by a recent program change
- my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
- my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
- my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
- my $input_stream_name = $logger_object->get_input_stream_name();
+} ## end closure for logger routines
- Die(<<EOM);
-==============================================================================
-While operating on input stream with name: '$input_stream_name'
-A fault was detected at line $line0 of sub '$subroutine1'
-in file '$filename1'
-which was called from line $line1 of sub '$subroutine2'
-Message: '$msg'
-This is probably an error introduced by a recent programming change.
-==============================================================================
-EOM
+{ ## begin closure for diagnostics routines
+ my $diagnostics_object;
- # This is for Perl-Critic
- return;
+ # Called once per file to initialize the diagnostics object
+ sub set_diagnostics_object {
+ $diagnostics_object = shift;
+ return;
+ }
+
+ sub write_diagnostics {
+ my ($msg) = @_;
+ if ($diagnostics_object) {
+ $diagnostics_object->write_diagnostics($msg);
+ }
+ return;
+ }
+} ## end closure for diagnostics routines
+
+sub get_convergence_check {
+ my ($self) = @_;
+ return $self->[_converged_];
}
-sub check_self_hash {
- my $self = shift;
- my @valid_self_keys = @{ $self->{rvalid_self_keys} };
- my %valid_self_hash;
- @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
- check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
- return;
+sub get_added_semicolon_count {
+ my $self = shift;
+ return $self->[_added_semicolon_count_];
+}
+
+sub get_output_line_number {
+ my ($self) = @_;
+ my $vao = $self->[_vertical_aligner_object_];
+ return $vao->get_output_line_number();
}
sub check_token_array {
my $self = shift;
- # Check for errors in the array of tokens
- # Uses package variable $NVARS
- $self->check_self_hash();
- my $rLL = $self->{rLL};
+ # Check for errors in the array of tokens. This is only called now
+ # when the DEVEL_MODE flag is set, so this Fault will only occur
+ # during code development.
+ my $rLL = $self->[_rLL_];
for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
my $nvars = @{ $rLL->[$KK] };
- if ( $nvars != $NVARS ) {
- my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $nvars != _NVARS ) {
+ my $NVARS = _NVARS;
+ my $type = $rLL->[$KK]->[_TYPE_];
$type = '*' unless defined($type);
+
+ # The number of variables per token node is _NVARS and was set when
+ # the array indexes were generated. So if the number of variables
+ # is different we have done something wrong, like not store all of
+ # them in sub 'write_line' when they were received from the
+ # tokenizer.
Fault(
"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
);
foreach my $var ( _TOKEN_, _TYPE_ ) {
if ( !defined( $rLL->[$KK]->[$var] ) ) {
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+
+ # This is a simple check that each token has some basic
+ # variables. In other words, that there are no holes in the
+ # array of tokens. Sub 'write_line' pushes tokens into the
+ # $rLL array, so this should guarantee no gaps.
Fault("Undefined variable $var for K=$KK, line=$iline\n");
}
}
return;
}
-sub set_rLL_max_index {
+sub want_blank_line {
my $self = shift;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->want_blank_line();
+ return;
+}
- # Set the limit of the rLL array, assuming that it is correct.
- # This should only be called by routines after they make changes
- # to tokenization
- my $rLL = $self->{rLL};
- if ( !defined($rLL) ) {
+sub write_unindented_line {
+ my ( $self, $line ) = @_;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_line($line);
+ return;
+}
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
- }
- my $Klimit_old = $self->{Klimit};
- my $num = @{$rLL};
- my $Klimit;
- if ( $num > 0 ) { $Klimit = $num - 1 }
- $self->{Klimit} = $Klimit;
- return ($Klimit);
+sub consecutive_nonblank_lines {
+ my ($self) = @_;
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $vao = $self->[_vertical_aligner_object_];
+ return $file_writer_object->get_consecutive_nonblank_lines() +
+ $vao->get_cached_line_count();
}
-sub get_rLL_max_index {
- my $self = shift;
+sub trim {
- # the memory location $rLL and number of tokens should be obtained
- # from this routine so that any autovivication can be immediately caught.
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- if ( !defined($rLL) ) {
+ # trim leading and trailing whitespace from a string
+ my $str = shift;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return $str;
+}
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
- }
- my $num = @{$rLL};
- if ( $num == 0 && defined($Klimit)
- || $num > 0 && !defined($Klimit)
- || $num > 0 && $Klimit != $num - 1 )
- {
+sub max {
+ my (@vals) = @_;
+ my $max = shift @vals;
+ for (@vals) { $max = $_ > $max ? $_ : $max }
+ return $max;
+}
- # Possible autovivification problem...
- if ( !defined($Klimit) ) { $Klimit = '*' }
- Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
- }
- return ($Klimit);
+sub min {
+ my (@vals) = @_;
+ my $min = shift @vals;
+ for (@vals) { $min = $_ < $min ? $_ : $min }
+ return $min;
}
-sub prepare_for_new_input_lines {
-
- # Remember the largest batch size processed. This is needed
- # by the pad routine to avoid padding the first nonblank token
- if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
- $peak_batch_size = $max_index_to_go;
- }
-
- $gnu_sequence_number++; # increment output batch counter
- %last_gnu_equals = ();
- %gnu_comma_count = ();
- %gnu_arrow_count = ();
- $line_start_index_to_go = 0;
- $max_gnu_item_index = UNDEFINED_INDEX;
- $index_max_forced_break = UNDEFINED_INDEX;
- $max_index_to_go = UNDEFINED_INDEX;
- $last_nonblank_index_to_go = UNDEFINED_INDEX;
- $last_nonblank_type_to_go = '';
- $last_nonblank_token_to_go = '';
- $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
- $last_last_nonblank_type_to_go = '';
- $last_last_nonblank_token_to_go = '';
- $forced_breakpoint_count = 0;
- $forced_breakpoint_undo_count = 0;
- $rbrace_follower = undef;
- $summed_lengths_to_go[0] = 0;
- $comma_count_in_batch = 0;
- $starting_in_quote = 0;
+sub split_words {
- destroy_one_line_block();
- return;
+ # given a string containing words separated by whitespace,
+ # return the list of words
+ my ($str) = @_;
+ return unless $str;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return split( /\s+/, $str );
}
-sub keyword_group_scan {
- my $self = shift;
+###########################################
+# CODE SECTION 3: Check and process options
+###########################################
- # Manipulate blank lines around keyword groups (kgb* flags)
- # Scan all lines looking for runs of consecutive lines beginning with
- # selected keywords. Example keywords are 'my', 'our', 'local', ... but
- # they may be anything. We will set flags requesting that blanks be
- # inserted around and within them according to input parameters. Note
- # that we are scanning the lines as they came in in the input stream, so
- # they are not necessarily well formatted.
+sub check_options {
- # The output of this sub is a return hash ref whose keys are the indexes of
- # lines after which we desire a blank line. For line index i:
- # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
- # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
- my $rhash_of_desires = {};
+ # This routine is called to check the user-supplied run parameters
+ # and to configure the control hashes to them.
+ $rOpts = shift;
- my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
- my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
- my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
- my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
- my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
+ initialize_whitespace_hashes();
+ initialize_bond_strength_hashes();
- # A range of sizes can be input with decimal notation like 'min.max' with
- # any number of dots between the two numbers. Examples:
- # string => min max matches
- # 1.1 1 1 exactly 1
- # 1.3 1 3 1,2, or 3
- # 1..3 1 3 1,2, or 3
- # 5 5 - 5 or more
- # 6. 6 - 6 or more
- # .2 - 2 up to 2
- # 1.0 1 0 nothing
- my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
- if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
- || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
- {
- Warn(<<EOM);
-Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
-ignoring all -kgb flags
-EOM
- return $rhash_of_desires;
- }
- $Opt_size_min = 1 unless ($Opt_size_min);
+ # Make needed regex patterns for matching text.
+ # NOTE: sub_matching_patterns must be made first because later patterns use
+ # them; see RT #133130.
+ make_sub_matching_pattern();
+ make_static_block_comment_pattern();
+ make_static_side_comment_pattern();
+ make_closing_side_comment_prefix();
+ make_closing_side_comment_list_pattern();
+ $format_skipping_pattern_begin =
+ make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
+ $format_skipping_pattern_end =
+ make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
+ make_non_indenting_brace_pattern();
- if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
- return $rhash_of_desires;
+ # If closing side comments ARE selected, then we can safely
+ # delete old closing side comments unless closing side comment
+ # warnings are requested. This is a good idea because it will
+ # eliminate any old csc's which fall below the line count threshold.
+ # We cannot do this if warnings are turned on, though, because we
+ # might delete some text which has been added. So that must
+ # be handled when comments are created. And we cannot do this
+ # with -io because -csc will be skipped altogether.
+ if ( $rOpts->{'closing-side-comments'} ) {
+ if ( !$rOpts->{'closing-side-comment-warnings'}
+ && !$rOpts->{'indent-only'} )
+ {
+ $rOpts->{'delete-closing-side-comments'} = 1;
+ }
}
- # codes for $Opt_blanks_before and $Opt_blanks_after:
- # 0 = never (delete if exist)
- # 1 = stable (keep unchanged)
- # 2 = always (insert if missing)
-
- return $rhash_of_desires
- unless $Opt_size_min > 0
- && ( $Opt_blanks_before != 1
- || $Opt_blanks_after != 1
- || $Opt_blanks_inside
- || $Opt_blanks_delete );
-
- my $Opt_pattern = $keyword_group_list_pattern;
- my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
- my $Opt_repeat_count =
- $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
+ # If closing side comments ARE NOT selected, but warnings ARE
+ # selected and we ARE DELETING csc's, then we will pretend to be
+ # adding with a huge interval. This will force the comments to be
+ # generated for comparison with the old comments, but not added.
+ elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
+ if ( $rOpts->{'delete-closing-side-comments'} ) {
+ $rOpts->{'delete-closing-side-comments'} = 0;
+ $rOpts->{'closing-side-comments'} = 1;
+ $rOpts->{'closing-side-comment-interval'} = 100000000;
+ }
+ }
- my $rlines = $self->{rlines};
- my $rLL = $self->{rLL};
- my $K_closing_container = $self->{K_closing_container};
+ make_bli_pattern();
+ make_block_brace_vertical_tightness_pattern();
+ make_blank_line_pattern();
+ make_keyword_group_list_pattern();
- # variables for the current group and subgroups:
- my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
- @subgroup );
+ # Make initial list of desired one line block types
+ # They will be modified by 'prepare_cuddled_block_types'
+ %want_one_line_block = %is_sort_map_grep_eval;
- # Definitions:
- # ($ibeg, $iend) = starting and ending line indexes of this entire group
- # $count = total number of keywords seen in this entire group
- # $level_beg = indententation level of this group
- # @group = [ $i, $token, $count ] =list of all keywords & blanks
- # @subgroup = $j, index of group where token changes
- # @iblanks = line indexes of blank lines in input stream in this group
- # where i=starting line index
- # token (the keyword)
- # count = number of this token in this subgroup
- # j = index in group where token changes
- #
- # These vars will contain values for the most recently seen line:
- my ( $line_type, $CODE_type, $K_first, $K_last );
+ # Default is to exclude one-line block types from -bl formatting
+ # FIXME: Eventually a flag should be added to modify this.
+ %is_braces_left_exclude_block = %is_sort_map_grep_eval;
- my $number_of_groups_seen = 0;
+ prepare_cuddled_block_types();
+ if ( $rOpts->{'dump-cuddled-block-list'} ) {
+ dump_cuddled_block_list(*STDOUT);
+ Exit(0);
+ }
- ####################
- # helper subroutines
- ####################
+ if ( $rOpts->{'line-up-parentheses'} ) {
- my $insert_blank_after = sub {
- my ($i) = @_;
- $rhash_of_desires->{$i} = 1;
- my $ip = $i + 1;
- if ( defined( $rhash_of_desires->{$ip} )
- && $rhash_of_desires->{$ip} == 2 )
+ if ( $rOpts->{'indent-only'}
+ || !$rOpts->{'add-newlines'}
+ || !$rOpts->{'delete-old-newlines'} )
{
- $rhash_of_desires->{$ip} = 0;
+ Warn(<<EOM);
+-----------------------------------------------------------------------
+Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
+
+The -lp indentation logic requires that perltidy be able to coordinate
+arbitrarily large numbers of line breakpoints. This isn't possible
+with these flags.
+-----------------------------------------------------------------------
+EOM
+ $rOpts->{'line-up-parentheses'} = 0;
}
- return;
- };
-
- my $split_into_sub_groups = sub {
- # place blanks around long sub-groups of keywords
- # ...if requested
- return unless ($Opt_blanks_inside);
+ if ( $rOpts->{'whitespace-cycle'} ) {
+ Warn(<<EOM);
+Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
+EOM
+ $rOpts->{'whitespace-cycle'} = 0;
+ }
+ }
- # loop over sub-groups, index k
- push @subgroup, scalar @group;
- my $kbeg = 1;
- my $kend = @subgroup - 1;
- for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
+ # At present, tabs are not compatible with the line-up-parentheses style
+ # (it would be possible to entab the total leading whitespace
+ # just prior to writing the line, if desired).
+ if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # index j runs through all keywords found
- my $j_b = $subgroup[ $k - 1 ];
- my $j_e = $subgroup[$k] - 1;
+ # Likewise, tabs are not compatible with outdenting..
+ if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # index i is the actual line number of a keyword
- my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
- my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
- my $num = $count_e - $count_b + 1;
+ if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # This subgroup runs from line $ib to line $ie-1, but may contain
- # blank lines
- if ( $num >= $Opt_size_min ) {
+ if ( !$rOpts->{'space-for-semicolon'} ) {
+ $want_left_space{'f'} = -1;
+ }
- # if there are blank lines, we require that at least $num lines
- # be non-blank up to the boundary with the next subgroup.
- my $nog_b = my $nog_e = 1;
- if ( @iblanks && !$Opt_blanks_delete ) {
- my $j_bb = $j_b + $num - 1;
- my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
- $nog_b = $count_bb - $count_b + 1 == $num;
+ if ( $rOpts->{'space-terminal-semicolon'} ) {
+ $want_left_space{';'} = 1;
+ }
- my $j_ee = $j_e - ( $num - 1 );
- my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
- $nog_e = $count_e - $count_ee + 1 == $num;
- }
- if ( $nog_b && $k > $kbeg ) {
- $insert_blank_after->( $i_b - 1 );
- }
- if ( $nog_e && $k < $kend ) {
- my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
- $insert_blank_after->( $i_ep - 1 );
- }
- }
+ # We should put an upper bound on any -sil=n value. Otherwise enormous
+ # files could be created by mistake.
+ for ( $rOpts->{'starting-indentation-level'} ) {
+ if ( $_ && $_ > 100 ) {
+ Warn(<<EOM);
+The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
+EOM
+ $_ = 0;
}
- };
+ }
- my $delete_if_blank = sub {
- my ($i) = @_;
+ # implement outdenting preferences for keywords
+ %outdent_keyword = ();
+ my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
+ unless (@okw) {
+ @okw = qw(next last redo goto return); # defaults
+ }
- # delete line $i if it is blank
- return unless ( $i >= 0 && $i < @{$rlines} );
- my $line_type = $rlines->[$i]->{_line_type};
- return if ( $line_type ne 'CODE' );
- my $code_type = $rlines->[$i]->{_code_type};
- if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
- return;
- };
-
- my $delete_inner_blank_lines = sub {
-
- # always remove unwanted trailing blank lines from our list
- return unless (@iblanks);
- while ( my $ibl = pop(@iblanks) ) {
- if ( $ibl < $iend ) { push @iblanks, $ibl; last }
- $iend = $ibl;
+ # FUTURE: if not a keyword, assume that it is an identifier
+ foreach (@okw) {
+ if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
+ $outdent_keyword{$_} = 1;
}
+ else {
+ Warn("ignoring '$_' in -okwl list; not a perl keyword");
+ }
+ }
- # now mark mark interior blank lines for deletion if requested
- return unless ($Opt_blanks_delete);
-
- while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
-
- };
-
- my $end_group = sub {
+ # setup hash for -kpit option
+ %keyword_paren_inner_tightness = ();
+ my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
+ if ( defined($kpit_value) && $kpit_value != 1 ) {
+ my @kpit =
+ split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
+ unless (@kpit) {
+ @kpit = qw(if elsif unless while until for foreach); # defaults
+ }
- # end a group of keywords
- my ($bad_ending) = @_;
- if ( defined($ibeg) && $ibeg >= 0 ) {
+ # we will allow keywords and user-defined identifiers
+ foreach (@kpit) {
+ $keyword_paren_inner_tightness{$_} = $kpit_value;
+ }
+ }
- # then handle sufficiently large groups
- if ( $count >= $Opt_size_min ) {
+ # implement user whitespace preferences
+ if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
+ @want_left_space{@q} = (1) x scalar(@q);
+ }
- $number_of_groups_seen++;
+ if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
+ @want_right_space{@q} = (1) x scalar(@q);
+ }
- # do any blank deletions regardless of the count
- $delete_inner_blank_lines->();
+ if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
+ @want_left_space{@q} = (-1) x scalar(@q);
+ }
- if ( $ibeg > 0 ) {
- my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
+ if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
+ @want_right_space{@q} = (-1) x scalar(@q);
+ }
+ if ( $rOpts->{'dump-want-left-space'} ) {
+ dump_want_left_space(*STDOUT);
+ Exit(0);
+ }
- # patch for hash bang line which is not currently marked as
- # a comment; mark it as a comment
- if ( $ibeg == 1 && !$code_type ) {
- my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
- $code_type = 'BC'
- if ( $line_text && $line_text =~ /^#/ );
- }
+ if ( $rOpts->{'dump-want-right-space'} ) {
+ dump_want_right_space(*STDOUT);
+ Exit(0);
+ }
- # Do not insert a blank after a comment
- # (this could be subject to a flag in the future)
- if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
- if ( $Opt_blanks_before == INSERT ) {
- $insert_blank_after->( $ibeg - 1 );
+ # default keywords for which space is introduced before an opening paren
+ # (at present, including them messes up vertical alignment)
+ my @sak = qw(my local our and or xor err eq ne if else elsif until
+ unless while for foreach return switch case given when catch);
+ %space_after_keyword = map { $_ => 1 } @sak;
- }
- elsif ( $Opt_blanks_before == DELETE ) {
- $delete_if_blank->( $ibeg - 1 );
- }
- }
- }
+ # first remove any or all of these if desired
+ if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
- # We will only put blanks before code lines. We could loosen
- # this rule a little, but we have to be very careful because
- # for example we certainly don't want to drop a blank line
- # after a line like this:
- # my $var = <<EOM;
- if ( $line_type eq 'CODE' && defined($K_first) ) {
+ # -nsak='*' selects all the above keywords
+ if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
+ @space_after_keyword{@q} = (0) x scalar(@q);
+ }
- # - Do not put a blank before a line of different level
- # - Do not put a blank line if we ended the search badly
- # - Do not put a blank at the end of the file
- # - Do not put a blank line before a hanging side comment
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # then allow user to add to these defaults
+ if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
+ @space_after_keyword{@q} = (1) x scalar(@q);
+ }
- if ( $level == $level_beg
- && $ci_level == 0
- && !$bad_ending
- && $iend < @{$rlines}
- && $CODE_type ne 'HSC' )
- {
- if ( $Opt_blanks_after == INSERT ) {
- $insert_blank_after->($iend);
- }
- elsif ( $Opt_blanks_after == DELETE ) {
- $delete_if_blank->( $iend + 1 );
- }
- }
- }
+ # implement user break preferences
+ my $break_after = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
}
- $split_into_sub_groups->();
}
-
- # reset for another group
- $ibeg = -1;
- $iend = undef;
- $level_beg = -1;
- $K_closing = undef;
- @group = ();
- @subgroup = ();
- @iblanks = ();
};
- my $find_container_end = sub {
-
- # If the keyword lines ends with an open token, find the closing token
- # '$K_closing' so that we can easily skip past the contents of the
- # container.
- return if ( $K_last <= $K_first );
- my $KK = $K_last;
- my $type_last = $rLL->[$KK]->[_TYPE_];
- my $tok_last = $rLL->[$KK]->[_TOKEN_];
- if ( $type_last eq '#' ) {
- $KK = $self->K_previous_nonblank($KK);
- $tok_last = $rLL->[$KK]->[_TOKEN_];
- }
- if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
-
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $lev = $rLL->[$KK]->[_LEVEL_];
- if ( $lev == $level_beg ) {
- $K_closing = $K_closing_container->{$type_sequence};
+ my $break_before = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
}
}
};
- my $add_to_group = sub {
- my ( $i, $token, $level ) = @_;
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
- # End the previous group if we have reached the maximum
- # group size
- if ( $Opt_size_max && @group >= $Opt_size_max ) {
- $end_group->();
- }
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
- if ( @group == 0 ) {
- $ibeg = $i;
- $level_beg = $level;
- $count = 0;
- }
+ # make note if breaks are before certain key types
+ %want_break_before = ();
+ foreach my $tok ( @all_operators, ',' ) {
+ $want_break_before{$tok} =
+ $left_bond_strength{$tok} < $right_bond_strength{$tok};
+ }
- $count++;
- $iend = $i;
+ # Coordinate ?/: breaks, which must be similar
+ if ( !$want_break_before{':'} ) {
+ $want_break_before{'?'} = $want_break_before{':'};
+ $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
+ $left_bond_strength{'?'} = NO_BREAK;
+ }
- # New sub-group?
- if ( !@group || $token ne $group[-1]->[1] ) {
- push @subgroup, scalar(@group);
- }
- push @group, [ $i, $token, $count ];
+ # Only make a hash entry for the next parameters if values are defined.
+ # That allows a quick check to be made later.
+ %break_before_container_types = ();
+ for ( $rOpts->{'break-before-hash-brace'} ) {
+ $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-square-bracket'} ) {
+ $break_before_container_types{'['} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-paren'} ) {
+ $break_before_container_types{'('} = $_ if $_ && $_ > 0;
+ }
- # remember if this line ends in an open container
- $find_container_end->();
+ %container_indentation_options = ();
+ foreach my $pair (
+ [ 'break-before-hash-brace-and-indent', '{' ],
+ [ 'break-before-square-bracket-and-indent', '[' ],
+ [ 'break-before-paren-and-indent', '(' ],
+ )
+ {
+ my ( $key, $tok ) = @{$pair};
+ my $opt = $rOpts->{$key};
+ if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
+ {
- return;
- };
+ # (1) -lp is not compatable with opt=2, silently set to opt=0
+ # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
+ if ( $opt == 2 ) {
+ if ( $rOpts->{'line-up-parentheses'}
+ || $rOpts->{'indent-columns'} ==
+ $rOpts->{'continuation-indentation'} )
+ {
+ $opt = 0;
+ }
+ }
+ $container_indentation_options{$tok} = $opt;
+ }
+ }
- ###################################
- # loop over all lines of the source
- ###################################
- $end_group->();
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ # Define here tokens which may follow the closing brace of a do statement
+ # on the same line, as in:
+ # } while ( $something);
+ my @dof = qw(until while unless if ; : );
+ push @dof, ',';
+ @is_do_follower{@dof} = (1) x scalar(@dof);
- $i++;
- last
- if ( $Opt_repeat_count > 0
- && $number_of_groups_seen >= $Opt_repeat_count );
+ # What tokens may follow the closing brace of an if or elsif block?
+ # Not used. Previously used for cuddled else, but no longer needed.
+ %is_if_brace_follower = ();
- $CODE_type = "";
- $K_first = undef;
- $K_last = undef;
- $line_type = $line_of_tokens->{_line_type};
+ # nothing can follow the closing curly of an else { } block:
+ %is_else_brace_follower = ();
- # always end a group at non-CODE
- if ( $line_type ne 'CODE' ) { $end_group->(); next }
+ # what can follow a multi-line anonymous sub definition closing curly:
+ my @asf = qw# ; : => or and && || ~~ !~~ ) #;
+ push @asf, ',';
+ @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
- $CODE_type = $line_of_tokens->{_code_type};
+ # what can follow a one-line anonymous sub closing curly:
+ # one-line anonymous subs also have ']' here...
+ # see tk3.t and PP.pm
+ my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
+ push @asf1, ',';
+ @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
- # end any group at a format skipping line
- if ( $CODE_type && $CODE_type eq 'FS' ) {
- $end_group->();
- next;
- }
+ # What can follow a closing curly of a block
+ # which is not an if/elsif/else/do/sort/map/grep/eval/sub
+ # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
+ my @obf = qw# ; : => or and && || ) #;
+ push @obf, ',';
+ @is_other_brace_follower{@obf} = (1) x scalar(@obf);
- # continue in a verbatim (VB) type; it may be quoted text
- if ( $CODE_type eq 'VB' ) {
- if ( $ibeg >= 0 ) { $iend = $i; }
- next;
- }
+ $right_bond_strength{'{'} = WEAK;
+ $left_bond_strength{'{'} = VERY_STRONG;
- # and continue in blank (BL) types
- if ( $CODE_type eq 'BL' ) {
- if ( $ibeg >= 0 ) {
- $iend = $i;
- push @{iblanks}, $i;
+ # make -l=0 equal to -l=infinite
+ if ( !$rOpts->{'maximum-line-length'} ) {
+ $rOpts->{'maximum-line-length'} = 1000000;
+ }
- # propagate current subgroup token
- my $tok = $group[-1]->[1];
- push @group, [ $i, $tok, $count ];
+ # make -lbl=0 equal to -lbl=infinite
+ if ( !$rOpts->{'long-block-line-count'} ) {
+ $rOpts->{'long-block-line-count'} = 1000000;
+ }
+
+ my $ole = $rOpts->{'output-line-ending'};
+ if ($ole) {
+ my %endings = (
+ dos => "\015\012",
+ win => "\015\012",
+ mac => "\015",
+ unix => "\012",
+ );
+
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
+
+ if ( defined( $endings_inverted{$ole} ) ) {
+
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Die(<<EOM);
+Unrecognized line ending '$ole'; expecting one of: $str
+EOM
+ }
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Warn("Ignoring -ple; conflicts with -ole\n");
+ $rOpts->{'preserve-line-endings'} = undef;
}
- next;
}
+ }
- # examine the first token of this line
- my $rK_range = $line_of_tokens->{_rK_range};
- ( $K_first, $K_last ) = @{$rK_range};
- if ( !defined($K_first) ) {
+ # hashes used to simplify setting whitespace
+ %tightness = (
+ '{' => $rOpts->{'brace-tightness'},
+ '}' => $rOpts->{'brace-tightness'},
+ '(' => $rOpts->{'paren-tightness'},
+ ')' => $rOpts->{'paren-tightness'},
+ '[' => $rOpts->{'square-bracket-tightness'},
+ ']' => $rOpts->{'square-bracket-tightness'},
+ );
+ %matching_token = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '?' => ':',
+ );
- # Unexpected blank line..shouldn't happen
- # $rK_range should be defined for line type CODE
- Warn(
-"Programming Error: Unexpected Blank Line in sub 'keyword_group_scan'. Ignoring"
+ # note any requested old line breaks to keep
+ %keep_break_before_type = ();
+ %keep_break_after_type = ();
+ if ( !$rOpts->{'ignore-old-breakpoints'} ) {
+
+ # FIXME: could check for valid types here.
+ # Invalid types are harmless but probably not intended.
+ my @types;
+ @types = ( split_words( $rOpts->{'keep-old-breakpoints-before'} ) );
+ @keep_break_before_type{@types} = (1) x scalar(@types);
+ @types = ( split_words( $rOpts->{'keep-old-breakpoints-after'} ) );
+ @keep_break_after_type{@types} = (1) x scalar(@types);
+ }
+ else {
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
);
- return $rhash_of_desires;
+ $rOpts->{'break-at-old-method-breakpoints'} = 0;
+ }
+ if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
+ );
+ $rOpts->{'break-at-old-comma-breakpoints'} = 0;
+ }
+ if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
+ Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
+ );
+ $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
+ }
+ if ( $rOpts->{'keep-old-breakpoints-before'} ) {
+ Warn("Conflicting parameters: -iob and -kbb; -kbb will be ignored\n"
+ );
+ $rOpts->{'keep-old-breakpoints-before'} = "";
+ }
+ if ( $rOpts->{'keep-old-breakpoints-after'} ) {
+ Warn("Conflicting parameters: -iob and -kba; -kba will be ignored\n"
+ );
+ $rOpts->{'keep-old-breakpoints-after'} = "";
}
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $type = $rLL->[$K_first]->[_TYPE_];
- my $token = $rLL->[$K_first]->[_TOKEN_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # Note: These additional parameters are made inactive by -iob.
+ # They are silently turned off here because they are on by default.
+ # We would generate unexpected warnings if we issued a warning.
+ $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
+ $rOpts->{'break-at-old-logical-breakpoints'} = 0;
+ $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
+ $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
+ }
- # see if this is a code type we seek (i.e. comment)
- if ( $CODE_type
- && $Opt_comment_pattern
- && $CODE_type =~ /$Opt_comment_pattern/o )
- {
+ #############################################################
+ # Make global vars for frequently used options for efficiency
+ #############################################################
- my $tok = $CODE_type;
+ $rOpts_closing_side_comment_maximum_text =
+ $rOpts->{'closing-side-comment-maximum-text'};
+ $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
+ $rOpts_indent_columns = $rOpts->{'indent-columns'};
+ $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
+ $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
+ $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
+ $rOpts_block_brace_vertical_tightness =
+ $rOpts->{'block-brace-vertical-tightness'};
+ $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
+ $rOpts_maximum_consecutive_blank_lines =
+ $rOpts->{'maximum-consecutive-blank-lines'};
+ $rOpts_recombine = $rOpts->{'recombine'};
+ $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ $rOpts_break_at_old_comma_breakpoints =
+ $rOpts->{'break-at-old-comma-breakpoints'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+ $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
+ $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ $rOpts_break_at_old_semicolon_breakpoints =
+ $rOpts->{'break-at-old-semicolon-breakpoints'};
+
+ $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
+ $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
+ $rOpts_tee_pod = $rOpts->{'tee-pod'};
+ $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
+ $rOpts_delete_closing_side_comments =
+ $rOpts->{'delete-closing-side-comments'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_indent_only = $rOpts->{'indent-only'};
+ $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
- # Continuing a group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $tok, $level );
- }
+ $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
+ $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
- # Start new group
- else {
+ $rOpts_function_paren_vertical_alignment =
+ $rOpts->{'function-paren-vertical-alignment'};
+ $rOpts_ignore_side_comment_lengths =
+ $rOpts->{'ignore-side-comment-lengths'};
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
- $add_to_group->( $i, $tok, $level );
+ $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
+ $rOpts_break_at_old_keyword_breakpoints =
+ $rOpts->{'break-at-old-keyword-breakpoints'};
+ $rOpts_break_at_old_logical_breakpoints =
+ $rOpts->{'break-at-old-logical-breakpoints'};
+ $rOpts_break_at_old_ternary_breakpoints =
+ $rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_short_concatenation_item_length =
+ $rOpts->{'short-concatenation-item-length'};
+ $rOpts_closing_side_comment_else_flag =
+ $rOpts->{'closing-side-comment-else-flag'};
+ $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
+
+ # Note that both opening and closing tokens can access the opening
+ # and closing flags of their container types.
+ %opening_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness'},
+ '{' => $rOpts->{'brace-vertical-tightness'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness'},
+ ')' => $rOpts->{'paren-vertical-tightness'},
+ '}' => $rOpts->{'brace-vertical-tightness'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness'},
+ );
+
+ %closing_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness-closing'},
+ '{' => $rOpts->{'brace-vertical-tightness-closing'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ ')' => $rOpts->{'paren-vertical-tightness-closing'},
+ '}' => $rOpts->{'brace-vertical-tightness-closing'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ );
+
+ # assume flag for '>' same as ')' for closing qw quotes
+ %closing_token_indentation = (
+ ')' => $rOpts->{'closing-paren-indentation'},
+ '}' => $rOpts->{'closing-brace-indentation'},
+ ']' => $rOpts->{'closing-square-bracket-indentation'},
+ '>' => $rOpts->{'closing-paren-indentation'},
+ );
+
+ # flag indicating if any closing tokens are indented
+ $some_closing_token_indentation =
+ $rOpts->{'closing-paren-indentation'}
+ || $rOpts->{'closing-brace-indentation'}
+ || $rOpts->{'closing-square-bracket-indentation'}
+ || $rOpts->{'indent-closing-brace'};
+
+ %opening_token_right = (
+ '(' => $rOpts->{'opening-paren-right'},
+ '{' => $rOpts->{'opening-hash-brace-right'},
+ '[' => $rOpts->{'opening-square-bracket-right'},
+ );
+
+ %stack_opening_token = (
+ '(' => $rOpts->{'stack-opening-paren'},
+ '{' => $rOpts->{'stack-opening-hash-brace'},
+ '[' => $rOpts->{'stack-opening-square-bracket'},
+ );
+
+ %stack_closing_token = (
+ ')' => $rOpts->{'stack-closing-paren'},
+ '}' => $rOpts->{'stack-closing-hash-brace'},
+ ']' => $rOpts->{'stack-closing-square-bracket'},
+ );
+
+ # Create a table of maximum line length vs level for later efficient use.
+ # We will make the tables very long to be sure it will not be exceeded.
+ # But we have to choose a fixed length. A check will be made at the start
+ # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
+ # my standard test problems have indentation levels of about 150, so this
+ # should be fairly large. If the choice of a maximum level ever becomes
+ # an issue then these table values could be returned in a sub with a simple
+ # memoization scheme.
+
+ # Also create a table of the maximum spaces available for text due to the
+ # level only. If a line has continuation indentation, then that space must
+ # be subtracted from the table value. This table is used for preliminary
+ # estimates in welding, extended_ci, BBX, and marking short blocks.
+ my $level_max = 1000;
+
+ # The basic scheme:
+ foreach my $level ( 0 .. $level_max ) {
+ my $indent = $level * $rOpts_indent_columns;
+ $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
+ $maximum_text_length_at_level[$level] =
+ $rOpts_maximum_line_length - $indent;
+ }
+
+ # Correct the maximum_text_length table if the -wc=n flag is used
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+ if ($rOpts_whitespace_cycle) {
+ if ( $rOpts_whitespace_cycle > 0 ) {
+ foreach my $level ( 0 .. $level_max ) {
+ my $level_mod = $level % $rOpts_whitespace_cycle;
+ my $indent = $level_mod * $rOpts_indent_columns;
+ $maximum_text_length_at_level[$level] =
+ $rOpts_maximum_line_length - $indent;
}
- next;
}
+ else {
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
+ }
+ }
- # See if it is a keyword we seek, but never start a group in a
- # continuation line; the code may be badly formatted.
- if ( $ci_level == 0
- && $type eq 'k'
- && $token =~ /$Opt_pattern/o )
- {
+ # Correct the tables if the -vmll flag is used. These values override the
+ # previous values.
+ if ($rOpts_variable_maximum_line_length) {
+ foreach my $level ( 0 .. $level_max ) {
+ $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
+ $maximum_line_length_at_level[$level] =
+ $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
+ }
+ }
- # Continuing a keyword group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $token, $level );
- }
+ initialize_weld_nested_exclusion_rules($rOpts);
+ initialize_line_up_parentheses_exclusion_rules($rOpts);
+ return;
+}
- # Start new keyword group
- else {
+sub initialize_weld_nested_exclusion_rules {
+ my ($rOpts) = @_;
+ %weld_nested_exclusion_rules = ();
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
- $add_to_group->( $i, $token, $level );
- }
+ my $opt_name = 'weld-nested-exclusion-list';
+ my $str = $rOpts->{$opt_name};
+ return unless ($str);
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//;
+ return unless ($str);
+
+ # There are four container tokens.
+ my %token_keys = (
+ '(' => '(',
+ '[' => '[',
+ '{' => '{',
+ 'q' => 'q',
+ );
+
+ # We are parsing an exclusion list for nested welds. The list is a string
+ # with spaces separating any number of items. Each item consists of three
+ # pieces of information:
+ # <optional position> <optional type> <type of container>
+ # < ^ or . > < k or K > < ( [ { >
+
+ # The last character is the required container type and must be one of:
+ # ( = paren
+ # [ = square bracket
+ # { = brace
+
+ # An optional leading position indicator:
+ # ^ means the leading token position in the weld
+ # . means a secondary token position in the weld
+ # no position indicator means all positions match
+
+ # An optional alphanumeric character between the position and container
+ # token selects to which the rule applies:
+ # k = any keyword
+ # K = any non-keyword
+ # f = function call
+ # F = not a function call
+ # w = function or keyword
+ # W = not a function or keyword
+ # no letter means any preceding type matches
+
+ # Examples:
+ # ^( - the weld must not start with a paren
+ # .( - the second and later tokens may not be parens
+ # ( - no parens in weld
+ # ^K( - exclude a leading paren not preceded by a keyword
+ # .k( - exclude a secondary paren preceded by a keyword
+ # [ { - exclude all brackets and braces
+
+ my @items = split /\s+/, $str;
+ my $msg1;
+ my $msg2;
+ foreach my $item (@items) {
+ my $item_save = $item;
+ my $tok = chop($item);
+ my $key = $token_keys{$tok};
+ if ( !defined($key) ) {
+ $msg1 .= " '$item_save'";
next;
}
+ if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
+ $weld_nested_exclusion_rules{$key} = [];
+ }
+ my $rflags = $weld_nested_exclusion_rules{$key};
- # This is not one of our keywords, but we are in a keyword group
- # so see if we should continue or quit
- elsif ( $ibeg >= 0 ) {
+ # A 'q' means do not weld quotes
+ if ( $tok eq 'q' ) {
+ $rflags->[0] = '*';
+ $rflags->[1] = '*';
+ next;
+ }
- # - bail out on a large level change; we may have walked into a
- # data structure or anoymous sub code.
- if ( $level > $level_beg + 1 || $level < $level_beg ) {
- $end_group->();
+ my $pos = '*';
+ my $select = '*';
+ if ($item) {
+ if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
+ $pos = $1 if ($1);
+ $select = $2 if ($2);
+ }
+ else {
+ $msg1 .= " '$item_save'";
next;
}
+ }
- # - keep going on a continuation line of the same level, since
- # it is probably a continuation of our previous keyword,
- # - and keep going past hanging side comments because we never
- # want to interrupt them.
- if ( ( ( $level == $level_beg ) && $ci_level > 0 )
- || $CODE_type eq 'HSC' )
- {
- $iend = $i;
- next;
+ my $err;
+ if ( $pos eq '^' || $pos eq '*' ) {
+ if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
+ $err = 1;
}
-
- # - continue if if we are within in a container which started with
- # the line of the previous keyword.
- if ( defined($K_closing) && $K_first <= $K_closing ) {
-
- # continue if entire line is within container
- if ( $K_last <= $K_closing ) { $iend = $i; next }
-
- # continue at ); or }; or ];
- my $KK = $K_closing + 1;
- if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
- if ( $KK < $K_last ) {
- if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
- if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
- $end_group->(1);
- next;
- }
- }
- $iend = $i;
- next;
- }
-
- $end_group->(1);
- next;
+ $rflags->[0] = $select;
+ }
+ if ( $pos eq '.' || $pos eq '*' ) {
+ if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
+ $err = 1;
}
-
- # - end the group if none of the above
- $end_group->();
- next;
+ $rflags->[1] = $select;
}
-
- # not in a keyword group; continue
- else { next }
+ if ($err) { $msg2 .= " '$item_save'"; }
}
-
- # end of loop over all lines
- $end_group->();
- return $rhash_of_desires;
+ if ($msg1) {
+ Warn(<<EOM);
+Unexpecting symbol(s) encountered in --$opt_name will be ignored:
+$msg1
+EOM
+ }
+ if ($msg2) {
+ Warn(<<EOM);
+Multiple specifications were encountered in the --weld-nested-exclusion-list for:
+$msg2
+Only the last will be used.
+EOM
+ }
+ return;
}
-sub break_lines {
-
- # Loop over old lines to set new line break points
-
- my $self = shift;
- my $rlines = $self->{rlines};
-
- # Note for RT#118553, leave only one newline at the end of a file.
- # Example code to do this is in comments below:
- # my $Opt_trim_ending_blank_lines = 0;
- # if ($Opt_trim_ending_blank_lines) {
- # while ( my $line_of_tokens = pop @{$rlines} ) {
- # my $line_type = $line_of_tokens->{_line_type};
- # if ( $line_type eq 'CODE' ) {
- # my $CODE_type = $line_of_tokens->{_code_type};
- # next if ( $CODE_type eq 'BL' );
- # }
- # push @{$rlines}, $line_of_tokens;
- # last;
- # }
- # }
-
- # But while this would be a trivial update, it would have very undesirable
- # side effects when perltidy is run from within an editor on a small snippet.
- # So this is best done with a separate filter, such
- # as 'delete_ending_blank_lines.pl' in the examples folder.
-
- # Flag to prevent blank lines when POD occurs in a format skipping sect.
- my $in_format_skipping_section;
-
- # set locations for blanks around long runs of keywords
- my $rwant_blank_line_after = $self->keyword_group_scan();
-
- my $line_type = "";
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $i++;
-
- # insert blank lines requested for keyword sequences
- if ( $i > 0
- && defined( $rwant_blank_line_after->{ $i - 1 } )
- && $rwant_blank_line_after->{ $i - 1 } == 1 )
- {
- $self->want_blank_line();
+sub initialize_line_up_parentheses_exclusion_rules {
+ my ($rOpts) = @_;
+ %line_up_parentheses_exclusion_rules = ();
+ my $opt_name = 'line-up-parentheses-exclusion-list';
+ my $str = $rOpts->{$opt_name};
+ return unless ($str);
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//;
+ return unless ($str);
+
+ # The format is space separated items, where each item must consist of a
+ # string with a token type preceded by an optional text token and followed
+ # by an integer:
+ # For example:
+ # W(1
+ # = (flag1)(key)(flag2), where
+ # flag1 = 'W'
+ # key = '('
+ # flag2 = '1'
+
+ my @items = split /\s+/, $str;
+ my $msg1;
+ my $msg2;
+ foreach my $item (@items) {
+ my $item_save = $item;
+ my ( $flag1, $key, $flag2 );
+ if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
+ $flag1 = $1 if $1;
+ $key = $2 if $2;
+ $flag2 = $3 if $3;
}
-
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
-
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
-
- # put a blank line after an =cut which comes before __END__ and __DATA__
- # (required by podchecker)
- if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
- $self->want_blank_line();
- }
+ else {
+ $msg1 .= " '$item_save'";
+ next;
}
- # handle line of code..
- if ( $line_type eq 'CODE' ) {
-
- my $CODE_type = $line_of_tokens->{_code_type};
- $in_format_skipping_section = $CODE_type eq 'FS';
-
- # Handle blank lines
- if ( $CODE_type eq 'BL' ) {
-
- # If keep-old-blank-lines is zero, we delete all
- # old blank lines and let the blank line rules generate any
- # needed blanks.
-
- # We also delete lines requested by the keyword-group logic
- my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
- && $rwant_blank_line_after->{$i} == 2 );
-
- # But the keep-old-blank-lines flag has priority over kgb flags
- $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
-
- if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
- $self->flush();
- $file_writer_object->write_blank_code_line(
- $rOpts_keep_old_blank_lines == 2 );
- $last_line_leading_type = 'b';
- }
- next;
- }
- else {
-
- # let logger see all non-blank lines of code
- my $output_line_number = get_output_line_number();
- black_box( $line_of_tokens, $output_line_number );
- }
-
- # Handle Format Skipping (FS) and Verbatim (VB) Lines
- if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
- $self->write_unindented_line("$input_line");
- $file_writer_object->reset_consecutive_blank_lines();
- next;
- }
-
- # Handle block comment to be deleted
- elsif ( $CODE_type eq 'DEL' ) {
- $self->flush();
- next;
- }
-
- # Handle all other lines of code
- $self->print_line_of_tokens($line_of_tokens);
+ if ( !defined($key) ) {
+ $msg1 .= " '$item_save'";
+ next;
}
- # handle line of non-code..
- else {
-
- # set special flags
- my $skip_line = 0;
- my $tee_line = 0;
- if ( $line_type =~ /^POD/ ) {
+ # Check for valid flag1
+ if ( !defined($flag1) ) { $flag1 = '*' }
+ elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
+ $msg1 .= " '$item_save'";
+ next;
+ }
- # Pod docs should have a preceding blank line. But stay
- # out of __END__ and __DATA__ sections, because
- # the user may be using this section for any purpose whatsoever
- if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
- if ( !$skip_line
- && !$in_format_skipping_section
- && $line_type eq 'POD_START'
- && !$saw_END_or_DATA_ )
- {
- $self->want_blank_line();
- }
- }
+ # Check for valid flag2
+ # 0 or blank: ignore container contents
+ # 1 all containers with sublists match
+ # 2 all containers with sublists, code blocks or ternary operators match
+ # ... this could be extended in the future
+ if ( !defined($flag2) ) { $flag2 = 0 }
+ elsif ( $flag2 !~ /^[012]$/ ) {
+ $msg1 .= " '$item_save'";
+ next;
+ }
- # leave the blank counters in a predictable state
- # after __END__ or __DATA__
- elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $saw_END_or_DATA_ = 1;
- }
+ if ( !defined( $line_up_parentheses_exclusion_rules{$key} ) ) {
+ $line_up_parentheses_exclusion_rules{$key} = [ $flag1, $flag2 ];
+ next;
+ }
- # write unindented non-code line
- if ( !$skip_line ) {
- if ($tee_line) { $file_writer_object->tee_on() }
- $self->write_unindented_line($input_line);
- if ($tee_line) { $file_writer_object->tee_off() }
- }
+ # check for multiple conflicting specifications
+ my $rflags = $line_up_parentheses_exclusion_rules{$key};
+ my $err;
+ if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
+ $err = 1;
+ $rflags->[0] = $flag1;
+ }
+ if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
+ $err = 1;
+ $rflags->[1] = $flag2;
}
+ $msg2 .= " '$item_save'" if ($err);
+ next;
}
- return;
-}
-
-{ ## Beginning of routine to check line hashes
-
- my %valid_line_hash;
-
- BEGIN {
-
- # These keys are defined for each line in the formatter
- # Each line must have exactly these quantities
- my @valid_line_keys = qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _rK_range
- _square_bracket_depth
- _starting_in_quote
- _ended_in_blank_token
- _code_type
-
- _ci_level_0
- _level_0
- _nesting_blocks_0
- _nesting_tokens_0
- );
-
- @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
+ if ($msg1) {
+ Warn(<<EOM);
+Unexpecting symbol(s) encountered in --$opt_name will be ignored:
+$msg1
+EOM
}
-
- sub check_line_hashes {
- my $self = shift;
- $self->check_self_hash();
- my $rlines = $self->{rlines};
- foreach my $rline ( @{$rlines} ) {
- my $iline = $rline->{_line_number};
- my $line_type = $rline->{_line_type};
- check_keys( $rline, \%valid_line_hash,
- "Checkpoint: line number =$iline, line_type=$line_type", 1 );
- }
- return;
+ if ($msg2) {
+ Warn(<<EOM);
+Multiple specifications were encountered in the $opt_name at:
+$msg2
+Only the last will be used.
+EOM
}
-} ## End check line hashes
-
-sub write_line {
-
- # We are caching tokenized lines as they arrive and converting them to the
- # format needed for the final formatting.
- my ( $self, $line_of_tokens_old ) = @_;
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- my $rlines_new = $self->{rlines};
-
- my $Kfirst;
- my $line_of_tokens = {};
- foreach my $key (
- qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _square_bracket_depth
- _starting_in_quote
- )
- )
- {
- $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
- }
-
- # Data needed by Logger
- $line_of_tokens->{_level_0} = 0;
- $line_of_tokens->{_ci_level_0} = 0;
- $line_of_tokens->{_nesting_blocks_0} = "";
- $line_of_tokens->{_nesting_tokens_0} = "";
-
- # Needed to avoid trimming quotes
- $line_of_tokens->{_ended_in_blank_token} = undef;
-
- my $line_type = $line_of_tokens_old->{_line_type};
- my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
- if ( $line_type eq 'CODE' ) {
-
- my $rtokens = $line_of_tokens_old->{_rtokens};
- my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
- my $rblock_type = $line_of_tokens_old->{_rblock_type};
- my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
- my $rcontainer_environment =
- $line_of_tokens_old->{_rcontainer_environment};
- my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
- my $rlevels = $line_of_tokens_old->{_rlevels};
- my $rslevels = $line_of_tokens_old->{_rslevels};
- my $rci_levels = $line_of_tokens_old->{_rci_levels};
- my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
- my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
-
- my $jmax = @{$rtokens} - 1;
- if ( $jmax >= 0 ) {
- $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- foreach my $j ( 0 .. $jmax ) {
-
- # Clip negative nesting depths to zero to avoid problems.
- # Negative values can occur in files with unbalanced containers
- my $slevel = $rslevels->[$j];
- if ( $slevel < 0 ) { $slevel = 0 }
-
- my @tokary;
- @tokary[
- _TOKEN_, _TYPE_,
- _BLOCK_TYPE_, _CONTAINER_TYPE_,
- _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
- _LEVEL_, _LEVEL_TRUE_,
- _SLEVEL_, _CI_LEVEL_,
- _LINE_INDEX_,
- ]
- = (
- $rtokens->[$j], $rtoken_type->[$j],
- $rblock_type->[$j], $rcontainer_type->[$j],
- $rcontainer_environment->[$j], $rtype_sequence->[$j],
- $rlevels->[$j], $rlevels->[$j],
- $slevel, $rci_levels->[$j],
- $input_line_no,
- );
- push @{$rLL}, \@tokary;
- }
-
- $Klimit = @{$rLL} - 1;
-
- # Need to remember if we can trim the input line
- $line_of_tokens->{_ended_in_blank_token} =
- $rtoken_type->[$jmax] eq 'b';
-
- $line_of_tokens->{_level_0} = $rlevels->[0];
- $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
- $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
- $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
+ # Possible speedup: we could turn off -lp if it is not actually used
+ my $all_off = 1;
+ foreach my $key (qw# ( { [ #) {
+ my $rflags = $line_up_parentheses_exclusion_rules{$key};
+ if ( defined($rflags) ) {
+ my ( $flag1, $flag2 ) = @{$rflags};
+ if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
+ if ($flag2) { $all_off = 0; last }
}
}
+ if ($all_off) {
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
- $line_of_tokens->{_code_type} = "";
- $self->{Klimit} = $Klimit;
+ # FIXME: This speedup works but is currently deactivated because at
+ # present users of -lp could see some discontinuities in formatting,
+ # such as those involving the choice of breaks at '='. Only if/when
+ # these issues have been checked and resolved it should be reactivated
+ # as a speedup.
+ ## $rOpts->{'line-up-parentheses'} = "";
+ }
- push @{$rlines_new}, $line_of_tokens;
return;
}
sub initialize_whitespace_hashes {
- # initialize these global hashes, which control the use of
- # whitespace around tokens:
+ # This is called once before formatting begins to initialize these global
+ # hashes, which control the use of whitespace around tokens:
#
# %binary_ws_rules
# %want_left_space
>;
push( @spaces_right_side, ',' ); # avoids warning message
- # Note that we are in a BEGIN block here. Later in processing
+ %want_left_space = ();
+ %want_right_space = ();
+ %binary_ws_rules = ();
+
+ # Note that we setting defaults here. Later in processing
# the values of %want_left_space and %want_right_space
# may be overridden by any user settings specified by the
# -wls and -wrs parameters. However the binary_whitespace_rules
$binary_ws_rules{'R'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
+ $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
$binary_ws_rules{'}'}{'L'} = WS_NO;
$binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
- # FIXME: we could to split 'i' into variables and functions
- # and have no space for functions but space for variables. For now,
- # I have a special patch in the special rules below
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
sub set_whitespace_flags {
- # This routine examines each pair of nonblank tokens and
- # sets a flag indicating if white space is needed.
+ # This routine is called once per file to set whitespace flags for that
+ # file. This routine examines each pair of nonblank tokens and sets a flag
+ # indicating if white space is needed.
#
- # $rwhitespace_flags->[$j] is a flag indicating whether a white space
- # BEFORE token $j is needed, with the following values:
+ # $rwhitespace_flags->[$j] is a flag indicating whether a white space
+ # BEFORE token $j is needed, with the following values:
#
- # WS_NO = -1 do not want a space before token $j
+ # WS_NO = -1 do not want a space BEFORE token $j
# WS_OPTIONAL= 0 optional space or $j is a whitespace
- # WS_YES = 1 want a space before token $j
+ # WS_YES = 1 want a space BEFORE token $j
#
my $self = shift;
- my $rLL = $self->{rLL};
+ my $rLL = $self->[_rLL_];
+ use constant DEBUG_WHITE => 0;
+
+ my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
+ my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+
+ my $rwhitespace_flags = [];
+ my $ris_function_call_paren = {};
- my $rwhitespace_flags = [];
+ my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
+
+ my ( $token, $type, $block_type, $seqno, $input_line_no );
+ my (
+ $last_token, $last_type, $last_block_type,
+ $last_seqno, $last_input_line_no
+ );
- my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
- $token, $type, $block_type, $input_line_no );
my $j_tight_closing_paren = -1;
$token = ' ';
$type = 'b';
$block_type = '';
+ $seqno = '';
$input_line_no = 0;
$last_token = ' ';
$last_type = 'b';
$last_block_type = '';
+ $last_seqno = '';
$last_input_line_no = 0;
my $jmax = @{$rLL} - 1;
&& $last_token eq '{'
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
+ # Patch to count a sign separated from a number as a single token, as
+ # in the following line. Otherwise, it takes two steps to converge:
+ # deg2rad(- 0.5)
+ if ( ( $type eq 'm' || $type eq 'p' )
+ && $j < $jmax + 1
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
+ && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
+ && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
+ {
+ $j_here = $j + 2;
+ }
+
# $j_next is where a closing token should be if
# the container has a single token
if ( $j_here + 1 > $jmax ) { return (WS_NO) }
return (WS_YES);
};
+ # Local hashes to set spaces around container tokens according to their
+ # sequence numbers. These are set as keywords are examined.
+ # They are controlled by the -kpit and -kpitl flags.
+ my %opening_container_inside_ws;
+ my %closing_container_inside_ws;
+ my $set_container_ws_by_keyword = sub {
+
+ return unless (%keyword_paren_inner_tightness);
+
+ my ( $word, $sequence_number ) = @_;
+
+ # We just saw a keyword (or other function name) followed by an opening
+ # paren. Now check to see if the following paren should have special
+ # treatment for its inside space. If so we set a hash value using the
+ # sequence number as key.
+ if ( $word && $sequence_number ) {
+ my $tightness = $keyword_paren_inner_tightness{$word};
+ if ( defined($tightness) && $tightness != 1 ) {
+ my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
+ $opening_container_inside_ws{$sequence_number} = $ws_flag;
+ $closing_container_inside_ws{$sequence_number} = $ws_flag;
+ }
+ }
+ };
+
+ my $ws_opening_container_override = sub {
+ my ( $ws, $sequence_number ) = @_;
+ return $ws unless (%opening_container_inside_ws);
+ if ($sequence_number) {
+ my $ws_override = $opening_container_inside_ws{$sequence_number};
+ if ($ws_override) { $ws = $ws_override }
+ }
+ return $ws;
+ };
+
+ my $ws_closing_container_override = sub {
+ my ( $ws, $sequence_number ) = @_;
+ return $ws unless (%closing_container_inside_ws);
+ if ($sequence_number) {
+ my $ws_override = $closing_container_inside_ws{$sequence_number};
+ if ($ws_override) { $ws = $ws_override }
+ }
+ return $ws;
+ };
+
# main loop over all tokens to define the whitespace flags
for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
$last_token = $token;
$last_type = $type;
$last_block_type = $block_type;
+ $last_seqno = $seqno;
$last_input_line_no = $input_line_no;
$token = $rtokh->[_TOKEN_];
$type = $rtokh->[_TYPE_];
$block_type = $rtokh->[_BLOCK_TYPE_];
+ $seqno = $rtokh->[_TYPE_SEQUENCE_];
$input_line_no = $rtokh->[_LINE_INDEX_];
#---------------------------------------------------------------
$ws = $ws_in_container->($j);
}
}
+
+ # check for special cases which override the above rules
+ $ws = $ws_opening_container_override->( $ws, $last_seqno );
+
} # end setting space flag inside opening tokens
my $ws_1;
$ws_1 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 2:
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
}
}
+
+ # check for special cases which override the above rules
+ $ws = $ws_closing_container_override->( $ws, $seqno );
+
} # end setting space flag inside closing tokens
my $ws_2;
$ws_2 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 3:
}
my $ws_3;
$ws_3 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 4:
# &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
# At present, the above & block is marked as type L/R so this case
# won't go through here.
- if ( $last_type eq '}' ) { $ws = WS_YES }
+ if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
# NOTE: some older versions of Perl had occasional problems if
# spaces are introduced between keywords or functions and opening
$ws = WS_NO
unless ( $rOpts_space_keyword_paren
|| $space_after_keyword{$last_token} );
+
+ # Set inside space flag if requested
+ $set_container_ws_by_keyword->( $last_token, $seqno );
}
# Space between function and '('
# 'w' and 'i' checks for something like:
# myfun( &myfun( ->myfun(
# -----------------------------------------------------
- elsif (( $last_type =~ /^[wUG]$/ )
- || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
+
+ # Note that at this point an identifier may still have a leading
+ # arrow, but the arrow will be split off during token respacing.
+ # After that, the token may become a bare word without leading
+ # arrow. The point is, it is best to mark function call parens
+ # right here before that happens.
+ # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
+ # NOTE: this would be the place to allow spaces between repeated
+ # parens, like () () (), as in case c017, but I decided that would
+ # not be a good idea.
+ elsif (( $last_type =~ /^[wCUG]$/ )
+ || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ ) )
{
- $ws = WS_NO unless ($rOpts_space_function_paren);
+ $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ $ris_function_call_paren->{$seqno} = 1;
}
# space between something like $i and ( in <<snippets/space2.in>>
# for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' needs to be split into multiple
+ # FIXME: eventually, type 'i' could be split into multiple
# token types so this can be a hardwired rule.
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
$ws = WS_YES;
elsif ( $type eq 'i' ) {
# never a space before ->
- if ( $token =~ /^\-\>/ ) {
+ if ( substr( $token, 0, 2 ) eq '->' ) {
$ws = WS_NO;
}
}
$ws = WS_OPTIONAL if $last_type eq '-';
# never a space before ->
- if ( $token =~ /^\-\>/ ) {
+ if ( substr( $token, 0, 2 ) eq '->' ) {
$ws = WS_NO;
}
}
$ws = WS_NO;
}
}
+ elsif ( $type eq 'k' ) {
+
+ # Keywords 'for', 'foreach' are special cases for -kpit since the
+ # opening paren does not always immediately follow the keyword. So
+ # we have to search forward for the paren in this case. I have
+ # limited the search to 10 tokens ahead, just in case somebody
+ # has a big file and no opening paren. This should be enough for
+ # all normal code.
+ if ( $is_for_foreach{$token}
+ && %keyword_paren_inner_tightness
+ && defined( $keyword_paren_inner_tightness{$token} )
+ && $j < $jmax )
+ {
+ my $jp = $j;
+ for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
+ $jp++;
+ last if ( $jp > $jmax );
+ next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
+ my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
+ $set_container_ws_by_keyword->( $token, $seqno );
+ last;
+ }
+ }
+ }
my $ws_4;
$ws_4 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ if DEBUG_WHITE;
#---------------------------------------------------------------
# Whitespace Rules Section 5:
# -data => $data;
if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
- if ( ( $ws == 0 )
- && $j > 0
- && $j < $jmax
- && ( $last_type !~ /^[Zh]$/ ) )
- {
-
- # If this happens, we have a non-fatal but undesirable
- # hole in the above rules which should be patched.
- write_diagnostics(
- "WS flag is zero for tokens $last_token $token\n");
- }
-
$rwhitespace_flags->[$j] = $ws;
- FORMATTER_DEBUG_FLAG_WHITE && do {
+ DEBUG_WHITE && do {
my $str = substr( $last_token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( $rOpts->{'tight-secret-operators'} ) {
new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
}
+ $self->[_ris_function_call_paren_] = $ris_function_call_paren;
return $rwhitespace_flags;
-} ## end sub set_whitespace_flags
-sub respace_tokens {
+} ## end sub set_whitespace_flags
- my $self = shift;
- return if $rOpts->{'indent-only'};
+sub dump_want_left_space {
+ my $fh = shift;
+ local $" = "\n";
+ $fh->print(<<EOM);
+These values are the main control of whitespace to the left of a token type;
+They may be altered with the -wls parameter.
+For a list of token types, use perltidy --dump-token-types (-dtt)
+ 1 means the token wants a space to its left
+-1 means the token does not want a space to its left
+------------------------------------------------------------------------
+EOM
+ foreach my $key ( sort keys %want_left_space ) {
+ $fh->print("$key\t$want_left_space{$key}\n");
+ }
+ return;
+}
- # This routine makes all necessary changes to the tokenization after the
- # file has been read. This consists mostly of inserting and deleting spaces
- # according to the selected parameters. In a few cases non-space characters
- # are added, deleted or modified.
+sub dump_want_right_space {
+ my $fh = shift;
+ local $" = "\n";
+ $fh->print(<<EOM);
+These values are the main control of whitespace to the right of a token type;
+They may be altered with the -wrs parameter.
+For a list of token types, use perltidy --dump-token-types (-dtt)
+ 1 means the token wants a space to its right
+-1 means the token does not want a space to its right
+------------------------------------------------------------------------
+EOM
+ foreach my $key ( sort keys %want_right_space ) {
+ $fh->print("$key\t$want_right_space{$key}\n");
+ }
+ return;
+}
- # The old tokens are copied one-by-one, with changes, from the old
- # linear storage array to a new array.
+{ ## begin closure is_essential_whitespace
- my $rLL = $self->{rLL};
- my $Klimit_old = $self->{Klimit};
- my $rlines = $self->{rlines};
- my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
+ my %is_sort_grep_map;
+ my %is_for_foreach;
+ my %is_digraph;
+ my %is_trigraph;
+ my %essential_whitespace_filter_l1;
+ my %essential_whitespace_filter_r1;
+ my %essential_whitespace_filter_l2;
+ my %essential_whitespace_filter_r2;
+ my %is_type_with_space_before_bareword;
- my $rLL_new = []; # This is the new array
- my $KK = 0;
- my $rtoken_vars;
- my $Kmax = @{$rLL} - 1;
+ BEGIN {
- # Set the whitespace flags, which indicate the token spacing preference.
- my $rwhitespace_flags = $self->set_whitespace_flags();
+ my @q;
+ @q = qw(sort grep map);
+ @is_sort_grep_map{@q} = (1) x scalar(@q);
- # we will be setting token lengths as we go
- my $cumulative_length = 0;
+ @q = qw(for foreach);
+ @is_for_foreach{@q} = (1) x scalar(@q);
- # We also define these hash indexes giving container token array indexes
- # as a function of the container sequence numbers. For example,
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
+ @q = qw(
+ .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
+ );
+ @is_digraph{@q} = (1) x scalar(@q);
- # List of new K indexes of phantom semicolons
- # This will be needed if we want to undo them for iterations
- my $rK_phantom_semicolons = [];
+ @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
+ @is_trigraph{@q} = (1) x scalar(@q);
- # Temporary hashes for adding semicolons
- ##my $rKfirst_new = {};
+ # These are used as a speedup filters for sub is_essential_whitespace.
- # a sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
+ # Filter 1:
+ # These left side token types USUALLY do not require a space:
+ @q = qw( ; { } [ ] L R );
+ push @q, ',';
+ push @q, ')';
+ push @q, '(';
+ @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL_new->[$Kprev]->[$key] ) )
- {
- $rLL_new->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
- }
- };
+ # BUT some might if followed by these right token types
+ @q = qw( pp mm << <<= h );
+ @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
- # A sub to store one token in the new array
- # All new tokens must be stored by this sub so that it can update
- # all data structures on the fly.
- my $last_nonblank_type = ';';
- my $last_nonblank_token = ';';
- my $last_nonblank_block_type = '';
- my $store_token = sub {
- my ($item) = @_;
+ # Filter 2:
+ # These right side filters usually do not require a space
+ @q = qw( ; ] R } );
+ push @q, ',';
+ push @q, ')';
+ @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
- # This will be the index of this item in the new array
- my $KK_new = @{$rLL_new};
+ # BUT some might if followed by these left token types
+ @q = qw( h Z );
+ @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
- # check for a sequenced item (i.e., container or ?/:)
- my $type_sequence = $item->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
+ # Keep a space between certain types and any bareword:
+ # Q: keep a space between a quote and a bareword to prevent the
+ # bareword from becoming a quote modifier.
+ # &: do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ @q = qw( Q & );
+ @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
- $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
+ }
- my $token = $item->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ sub is_essential_whitespace {
- $K_opening_container->{$type_sequence} = $KK_new;
- }
- elsif ( $is_closing_token{$token} ) {
+ # Essential whitespace means whitespace which cannot be safely deleted
+ # without risking the introduction of a syntax error.
+ # We are given three tokens and their types:
+ # ($tokenl, $typel) is the token to the left of the space in question
+ # ($tokenr, $typer) is the token to the right of the space in question
+ # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
+ #
+ # Note1: This routine should almost never need to be changed. It is
+ # for avoiding syntax problems rather than for formatting.
- $K_closing_container->{$type_sequence} = $KK_new;
- }
+ # Note2: The -mangle option causes large numbers of calls to this
+ # routine and therefore is a good test. So if a change is made, be sure
+ # to run a large number of files with the -mangle option and check for
+ # differences.
- # These are not yet used but could be useful
- else {
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK_new;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK_new;
- }
- else {
- # shouldn't happen
- Fault("Ugh: shouldn't happen");
- }
- }
- }
+ my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
- # find the length of this token
- my $token_length = length( $item->[_TOKEN_] );
+ # This is potentially a very slow routine but the following quick
+ # filters typically catch and handle over 90% of the calls.
- # and update the cumulative length
- $cumulative_length += $token_length;
+ # Filter 1: usually no space required after common types ; , [ ] { } ( )
+ return
+ if ( $essential_whitespace_filter_l1{$typel}
+ && !$essential_whitespace_filter_r1{$typer} );
- # Save the length sum to just AFTER this token
- $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ # Filter 2: usually no space before common types ; ,
+ return
+ if ( $essential_whitespace_filter_r2{$typer}
+ && !$essential_whitespace_filter_l2{$typel} );
- my $type = $item->[_TYPE_];
+ # Filter 3: Handle side comments: a space is only essential if the left
+ # token ends in '$' For example, we do not want to create $#foo below:
- # trim side comments
- if ( $type eq '#' ) {
- $item->[_TOKEN_] =~ s/\s*$//;
- }
+ # sub t086
+ # ( #foo)))
+ # $ #foo)))
+ # a #foo)))
+ # ) #foo)))
+ # { ... }
- if ( $type && $type ne 'b' && $type ne '#' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $item->[_TOKEN_];
- $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
+ # Also, I prefer not to put a ? and # together because ? used to be
+ # a pattern delmiter and spacing was used if guessing was needed.
+
+ if ( $typer eq '#' ) {
+
+ return 1
+ if ( $tokenl
+ && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
+ return;
}
- # and finally, add this item to the new array
- push @{$rLL_new}, $item;
- };
+ my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
+ my $tokenr_is_open_paren = $tokenr eq '(';
+ my $token_joined = $tokenl . $tokenr;
+ my $tokenl_is_dash = $tokenl eq '-';
- my $store_token_and_space = sub {
- my ( $item, $want_space ) = @_;
+ my $result =
- # store a token with preceding space if requested and needed
+ # never combine two bare words or numbers
+ # examples: and ::ok(1)
+ # return ::spw(...)
+ # for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ # $input eq"quit" to make $inputeq"quit"
+ # my $size=-s::SINK if $file; <==OK but we won't do it
+ # don't join something like: for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
+ && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
- # First store the space
- if ( $want_space
- && @{$rLL_new}
- && $rLL_new->[-1]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace )
- {
- my $rcopy = copy_token_as_type( $item, 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
+ # do not combine a number with a concatenation dot
+ # example: pom.caputo:
+ # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
+ || $typel eq 'n' && $tokenr eq '.'
+ || $typer eq 'n'
+ && $tokenl eq '.'
- # then the token
- $store_token->($item);
- };
+ # cases of a space before a bareword...
+ || (
+ $tokenr_is_bareword && (
+
+ # do not join a minus with a bare word, because you might form
+ # a file test operator. Example from Complex.pm:
+ # if (CORE::abs($z - i) < $eps);
+ # "z-i" would be taken as a file test.
+ $tokenl_is_dash && length($tokenr) == 1
+
+ # and something like this could become ambiguous without space
+ # after the '-':
+ # use constant III=>1;
+ # $a = $b - III;
+ # and even this:
+ # $a = - III;
+ || $tokenl_is_dash && $typer =~ /^[wC]$/
+
+ # keep space between types Q & and a bareword
+ || $is_type_with_space_before_bareword{$typel}
+
+ # +-: binary plus and minus before a bareword could get
+ # converted into unary plus and minus on next pass through the
+ # tokenizer. This can lead to blinkers: cases b660 b670 b780
+ # b781 b787 b788 b790 So we keep a space unless the +/- clearly
+ # follows an operator
+ || ( ( $typel eq '+' || $typel eq '-' )
+ && $typell !~ /^[niC\)\}\]R]$/ )
+
+ # keep a space between a token ending in '$' and any word;
+ # this caused trouble: "die @$ if $@"
+ || $typel eq 'i' && $tokenl =~ /\$$/
+
+ # don't combine $$ or $# with any alphanumeric
+ # (testfile mangle.t with --mangle)
+ || $tokenl =~ /^\$[\$\#]$/
- my $K_end_q = sub {
- my ($KK) = @_;
- my $K_end = $KK;
- my $Kn = $self->K_next_nonblank($KK);
- while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
- $K_end = $Kn;
- $Kn = $self->K_next_nonblank($Kn);
- }
- return $K_end;
- };
+ )
+ ) ## end $tokenr_is_bareword
- my $add_phantom_semicolon = sub {
+ # OLD, not used
+ # '= -' should not become =- or you will get a warning
+ # about reversed -=
+ # || ($tokenr eq '-')
- my ($KK) = @_;
+ # do not join a bare word with a minus, like between 'Send' and
+ # '-recipients' here <<snippets/space3.in>>
+ # my $msg = new Fax::Send
+ # -recipients => $to,
+ # -data => $data;
+ # This is the safest thing to do. If we had the token to the right of
+ # the minus we could do a better check.
+ #
+ # And do not combine a bareword and a quote, like this:
+ # oops "Your login, $Bad_Login, is not valid";
+ # It can cause a syntax error if oops is a sub
+ || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
+ # perl is very fussy about spaces before <<
+ || $tokenr =~ /^\<\</
- # we are only adding semicolons for certain block types
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- return
- unless ( $ok_to_add_semicolon_for_block_type{$block_type}
- || $block_type =~ /^(sub|package)/
- || $block_type =~ /^\w+\:$/ );
+ # avoid combining tokens to create new meanings. Example:
+ # $a+ +$b must not become $a++$b
+ || ( $is_digraph{$token_joined} )
+ || $is_trigraph{$token_joined}
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ # another example: do not combine these two &'s:
+ # allow_options & &OPT_EXECCGI
+ || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ # retain any space after possible filehandle
+ # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
+ || $typel eq 'Z'
- # Do not add a semicolon if...
- return
- if (
+ # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
+ # space after type Y. Otherwise, it will get parsed as type 'Z' later
+ # and any space would have to be added back manually if desired.
+ || $typel eq 'Y'
- # it would follow a comment (and be isolated)
- $previous_nonblank_type eq '#'
+ # Perl is sensitive to whitespace after the + here:
+ # $b = xvals $a + 0.1 * yvals $a;
+ || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
- # it follows a code block ( because they are not always wanted
- # there and may add clutter)
- || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
+ || (
+ $tokenr_is_open_paren && (
- # it would follow a label
- || $previous_nonblank_type eq 'J'
+ # keep paren separate in 'use Foo::Bar ()'
+ ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
- # it would be inside a 'format' statement (and cause syntax error)
- || ( $previous_nonblank_type eq 'k'
- && $previous_nonblank_token =~ /format/ )
+ # OLD: keep any space between filehandle and paren:
+ # file mangle.t with --mangle:
+ # NEW: this test is no longer necessary here (moved above)
+ ## || $typel eq 'Y'
- # if it would prevent welding two containers
- || $rpaired_to_inner_container->{$type_sequence}
+ # must have space between grep and left paren; "grep(" will fail
+ || $is_sort_grep_map{$tokenl}
- );
+ # don't stick numbers next to left parens, as in:
+ #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
+ || $typel eq 'n'
+ )
+ ) ## end $tokenr_is_open_paren
- # We will insert an empty semicolon here as a placeholder. Later, if
- # it becomes the last token on a line, we will bring it to life. The
- # advantage of doing this is that (1) we just have to check line
- # endings, and (2) the phantom semicolon has zero width and therefore
- # won't cause needless breaks of one-line blocks.
- my $Ktop = -1;
- if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
- && $want_left_space{';'} == WS_NO )
- {
+ # retain any space after here doc operator ( hereerr.t)
+ || $typel eq 'h'
- # convert the blank into a semicolon..
- # be careful: we are working on the new stack top
- # on a token which has been stored.
- my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
+ # be careful with a space around ++ and --, to avoid ambiguity as to
+ # which token it applies
+ || $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
+ || $typel =~ /^(\+\+|\-\-)$/
+ && $tokenr !~ /^[\;\}\)\]]/
- # Convert the existing blank to:
- # a phantom semicolon for one_line_block option = 0 or 1
- # a real semicolon for one_line_block option = 2
- my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : '';
+ # need space after foreach my; for example, this will fail in
+ # older versions of Perl:
+ # foreach my$ft(@filetypes)...
+ || (
+ $tokenl eq 'my'
- $rLL_new->[$Ktop]->[_TOKEN_] = $tok; # zero length if phantom
- $rLL_new->[$Ktop]->[_TYPE_] = ';';
- $rLL_new->[$Ktop]->[_SLEVEL_] =
- $rLL->[$KK]->[_SLEVEL_];
+ # /^(for|foreach)$/
+ && $is_for_foreach{$tokenll}
+ && $tokenr =~ /^\$/
+ )
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
- # Then store a new blank
- $store_token->($rcopy);
- }
- else {
+ # Space stacked labels...
+ # Not really required: Perl seems to accept non-spaced labels.
+ ## || $typel eq 'J' && $typer eq 'J'
- # insert a new token
- my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
- $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
- $store_token->($rcopy);
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- }
- };
+ ; # the value of this long logic sequence is the result we want
+ return $result;
+ }
+} ## end closure is_essential_whitespace
- my $check_Q = sub {
+{ ## begin closure new_secret_operator_whitespace
- # Check that a quote looks okay
- # This sub works but needs to by sync'd with the log file output
- # before it can be used.
- my ( $KK, $Kfirst ) = @_;
- my $token = $rLL->[$KK]->[_TOKEN_];
- note_embedded_tab() if ( $token =~ "\t" );
+ my %secret_operators;
+ my %is_leading_secret_token;
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
-
- my $previous_nonblank_type_2 = 'b';
- my $previous_nonblank_token_2 = "";
- my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
- $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
- }
-
- my $Kn = $self->K_next_nonblank($KK);
- my $next_nonblank_token = "";
- if ( defined($Kn) ) {
- $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
- }
-
- my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
- my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
-
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
- if (
- $token =~ /^(s|tr|y|m|\/)/
- && $previous_nonblank_token =~ /^(=|==|!=)$/
+ BEGIN {
- # preceded by simple scalar
- && $previous_nonblank_type_2 eq 'i'
- && $previous_nonblank_token_2 =~ /^\$/
+ # token lists for perl secret operators as compiled by Philippe Bruhat
+ # at: https://metacpan.org/module/perlsecret
+ %secret_operators = (
+ 'Goatse' => [qw#= ( ) =#], #=( )=
+ 'Venus1' => [qw#0 +#], # 0+
+ 'Venus2' => [qw#+ 0#], # +0
+ 'Enterprise' => [qw#) x ! !#], # ()x!!
+ 'Kite1' => [qw#~ ~ <>#], # ~~<>
+ 'Kite2' => [qw#~~ <>#], # ~~<>
+ 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
+ 'Bang bang ' => [qw#! !#], # !!
+ );
- # followed by some kind of termination
- # (but give complaint if we can not see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
+ # The following operators and constants are not included because they
+ # are normally kept tight by perltidy:
+ # ~~ <~>
+ #
- # scalar is not declared
- && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
- )
- {
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
- complain(
-"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
- );
+ # Make a lookup table indexed by the first token of each operator:
+ # first token => [list, list, ...]
+ foreach my $value ( values(%secret_operators) ) {
+ my $tok = $value->[0];
+ push @{ $is_leading_secret_token{$tok} }, $value;
}
- };
+ }
- # Main loop over all lines of the file
- my $last_K_out;
- my $CODE_type = "";
- my $line_type = "";
+ sub new_secret_operator_whitespace {
- # Testing option to break qw. Do not use; it can make a mess.
- my $ALLOW_BREAK_MULTILINE_QW = 0;
- my $in_multiline_qw;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ my ( $rlong_array, $rwhitespace_flags ) = @_;
- $input_line_number = $line_of_tokens->{_line_number};
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $last_CODE_type = $CODE_type;
- $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless defined($Kfirst);
+ # Loop over all tokens in this line
+ my ( $token, $type );
+ my $jmax = @{$rlong_array} - 1;
+ foreach my $j ( 0 .. $jmax ) {
- # Check for correct sequence of token indexes...
- # An error here means that sub write_line() did not correctly
- # package the tokenized lines as it received them.
- if ( defined($last_K_out) ) {
- if ( $Kfirst != $last_K_out + 1 ) {
- Fault(
- "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
- );
- }
- }
- else {
- if ( $Kfirst != 0 ) {
- Fault("Program Bug: first K is $Kfirst but should be 0");
- }
- }
- $last_K_out = $Klast;
+ $token = $rlong_array->[$j]->[_TOKEN_];
+ $type = $rlong_array->[$j]->[_TYPE_];
- # Handle special lines of code
- if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+ # Skip unless this token might start a secret operator
+ next if ( $type eq 'b' );
+ next unless ( $is_leading_secret_token{$token} );
- # CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'FS' = Format Skipping - line goes out verbatim, no blanks
- # 'IO' = Indent Only - only indentation may be changed
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'DEL'=Delete this line
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
+ # Loop over all secret operators with this leading token
+ foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
+ my $jend = $j - 1;
+ foreach my $tok ( @{$rpattern} ) {
+ $jend++;
+ $jend++
- # For a hanging side comment we insert an empty quote before
- # the comment so that it becomes a normal side comment and
- # will be aligned by the vertical aligner
- if ( $CODE_type eq 'HSC' ) {
+ if ( $jend <= $jmax
+ && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
+ if ( $jend > $jmax
+ || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
+ {
+ $jend = undef;
+ last;
+ }
+ }
- # Safety Check: This must be a line with one token (a comment)
- my $rtoken_vars = $rLL->[$Kfirst];
- if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
+ if ($jend) {
- # Note that even if the flag 'noadd-whitespace' is set, we
- # will make an exception here and allow a blank to be
- # inserted to push the comment to the right. We can think
- # of this as an adjustment of indentation rather than
- # whitespace between tokens. This will also prevent the
- # hanging side comment from getting converted to a block
- # comment if whitespace gets deleted, as for example with
- # the -extrude and -mangle options.
- my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
- $store_token->($rcopy);
- $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- $store_token->($rtoken_vars);
- next;
+ # set flags to prevent spaces within this operator
+ foreach my $jj ( $j + 1 .. $jend ) {
+ $rwhitespace_flags->[$jj] = WS_NO;
+ }
+ $j = $jend;
+ last;
}
- else {
+ } ## End Loop over all operators
+ } ## End loop over all tokens
+ return;
+ } # End sub
+} ## end closure new_secret_operator_whitespace
- # This line was mis-marked by sub scan_comment
- Fault(
- "Program bug. A hanging side comment has been mismarked"
- );
- }
- }
+{ ## begin closure set_bond_strengths
- # Copy tokens unchanged
- foreach my $KK ( $Kfirst .. $Klast ) {
- $store_token->( $rLL->[$KK] );
- }
- next;
- }
+ # These routines and variables are involved in deciding where to break very
+ # long lines.
- # Handle normal line..
+ my %is_good_keyword_breakpoint;
+ my %is_lt_gt_le_ge;
+ my %is_container_token;
- # Insert any essential whitespace between lines
- # if last line was normal CODE.
- # Patch for rt #125012: use K_previous_code rather than '_nonblank'
- # because comments may disappear.
- my $type_next = $rLL->[$Kfirst]->[_TYPE_];
- my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
- my $Kp = $self->K_previous_code( undef, $rLL_new );
- if ( $last_line_type eq 'CODE'
- && $type_next ne 'b'
- && defined($Kp) )
- {
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ my %binary_bond_strength_nospace;
+ my %binary_bond_strength;
+ my %nobreak_lhs;
+ my %nobreak_rhs;
- my ( $token_pp, $type_pp );
- my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
+ my @bias_tokens;
+ my %bias_hash;
+ my %bias;
+ my $delta_bias;
- if (
- is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- )
- )
- {
+ sub initialize_bond_strength_hashes {
- # Copy this first token as blank, but use previous line number
- my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
- }
+ my @q;
+ @q = qw(if unless while until for foreach);
+ @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
- # loop to copy all tokens on this line, with any changes
- my $type_sequence;
- for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
- $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- my $last_type_sequence = $type_sequence;
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ @q = qw(lt gt le ge);
+ @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
- # Handle a blank space ...
- if ( $type eq 'b' ) {
+ @q = qw/ ( [ { } ] ) /;
+ @is_container_token{@q} = (1) x scalar(@q);
- # Delete it if not wanted by whitespace rules
- # or we are deleting all whitespace
- # Note that whitespace flag is a flag indicating whether a
- # white space BEFORE the token is needed
- next if ( $KK >= $Klast ); # skip terminal blank
- my $Knext = $KK + 1;
- my $ws = $rwhitespace_flags->[$Knext];
- if ( $ws == -1
- || $rOpts_delete_old_whitespace )
- {
+ # The decision about where to break a line depends upon a "bond
+ # strength" between tokens. The LOWER the bond strength, the MORE
+ # likely a break. A bond strength may be any value but to simplify
+ # things there are several pre-defined strength levels:
- # FIXME: maybe switch to using _new
- my $Kp = $self->K_previous_nonblank($KK);
- next unless defined($Kp);
- my $token_p = $rLL->[$Kp]->[_TOKEN_];
- my $type_p = $rLL->[$Kp]->[_TYPE_];
+ # NO_BREAK => 10000;
+ # VERY_STRONG => 100;
+ # STRONG => 2.1;
+ # NOMINAL => 1.1;
+ # WEAK => 0.8;
+ # VERY_WEAK => 0.55;
- my ( $token_pp, $type_pp );
+ # The strength values are based on trial-and-error, and need to be
+ # tweaked occasionally to get desired results. Some comments:
+ #
+ # 1. Only relative strengths are important. small differences
+ # in strengths can make big formatting differences.
+ # 2. Each indentation level adds one unit of bond strength.
+ # 3. A value of NO_BREAK makes an unbreakable bond
+ # 4. A value of VERY_WEAK is the strength of a ','
+ # 5. Values below NOMINAL are considered ok break points.
+ # 6. Values above NOMINAL are considered poor break points.
+ #
+ # The bond strengths should roughly follow precedence order where
+ # possible. If you make changes, please check the results very
+ # carefully on a variety of scripts. Testing with the -extrude
+ # options is particularly helpful in exercising all of the rules.
- #my $Kpp = $K_previous_nonblank->($Kp);
- my $Kpp = $self->K_previous_nonblank($Kp);
- if ( defined($Kpp) ) {
- $token_pp = $rLL->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
- my $token_next = $rLL->[$Knext]->[_TOKEN_];
- my $type_next = $rLL->[$Knext]->[_TYPE_];
+ # Wherever possible, bond strengths are defined in the following
+ # tables. There are two main stages to setting bond strengths and
+ # two types of tables:
+ #
+ # The first stage involves looking at each token individually and
+ # defining left and right bond strengths, according to if we want
+ # to break to the left or right side, and how good a break point it
+ # is. For example tokens like =, ||, && make good break points and
+ # will have low strengths, but one might want to break on either
+ # side to put them at the end of one line or beginning of the next.
+ #
+ # The second stage involves looking at certain pairs of tokens and
+ # defining a bond strength for that particular pair. This second
+ # stage has priority.
- my $do_not_delete = is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- );
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 1.
+ # Set left and right bond strengths of individual tokens.
+ #---------------------------------------------------------------
- next unless ($do_not_delete);
- }
+ # NOTE: NO_BREAK's set in this section first are HINTS which will
+ # probably not be honored. Essential NO_BREAKS's should be set in
+ # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
+ # of this subroutine.
- # make it just one character if allowed
- if ($rOpts_add_whitespace) {
- $rtoken_vars->[_TOKEN_] = ' ';
- }
- $store_token->($rtoken_vars);
- next;
- }
+ # Note that we are setting defaults in this section. The user
+ # cannot change bond strengths but can cause the left and right
+ # bond strengths of any token type to be swapped through the use of
+ # the -wba and -wbb flags. In this way the user can determine if a
+ # breakpoint token should appear at the end of one line or the
+ # beginning of the next line.
- # Handle a nonblank token...
+ %right_bond_strength = ();
+ %left_bond_strength = ();
+ %binary_bond_strength_nospace = ();
+ %binary_bond_strength = ();
+ %nobreak_lhs = ();
+ %nobreak_rhs = ();
- # check for a qw quote
- if ( $type eq 'q' ) {
+ # The hash keys in this section are token types, plus the text of
+ # certain keywords like 'or', 'and'.
- # trim blanks from right of qw quotes
- # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
- # this)
- $token =~ s/\s*$//;
- $rtoken_vars->[_TOKEN_] = $token;
- note_embedded_tab() if ( $token =~ "\t" );
+ # no break around possible filehandle
+ $left_bond_strength{'Z'} = NO_BREAK;
+ $right_bond_strength{'Z'} = NO_BREAK;
- if ($in_multiline_qw) {
+ # never put a bare word on a new line:
+ # example print (STDERR, "bla"); will fail with break after (
+ $left_bond_strength{'w'} = NO_BREAK;
- # If we are at the end of a multiline qw ..
- if ( $in_multiline_qw == $KK ) {
+ # blanks always have infinite strength to force breaks after
+ # real tokens
+ $right_bond_strength{'b'} = NO_BREAK;
- # Split off the closing delimiter character
- # so that the formatter can put a line break there if necessary
- my $part1 = $token;
- my $part2 = substr( $part1, -1, 1, "" );
+ # try not to break on exponentation
+ @q = qw# ** .. ... <=> #;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
- if ($part1) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q', $part1 );
- $store_token->($rcopy);
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
+ # The comma-arrow has very low precedence but not a good break point
+ $left_bond_strength{'=>'} = NO_BREAK;
+ $right_bond_strength{'=>'} = NOMINAL;
- }
- $in_multiline_qw = undef;
+ # ok to break after label
+ $left_bond_strength{'J'} = NO_BREAK;
+ $right_bond_strength{'J'} = NOMINAL;
+ $left_bond_strength{'j'} = STRONG;
+ $right_bond_strength{'j'} = STRONG;
+ $left_bond_strength{'A'} = STRONG;
+ $right_bond_strength{'A'} = STRONG;
- # store without preceding blank
- $store_token->($rtoken_vars);
- next;
- }
- else {
- # continuing a multiline qw
- $store_token->($rtoken_vars);
- next;
- }
- }
+ $left_bond_strength{'->'} = STRONG;
+ $right_bond_strength{'->'} = VERY_STRONG;
- else {
+ $left_bond_strength{'CORE::'} = NOMINAL;
+ $right_bond_strength{'CORE::'} = NO_BREAK;
- # we are encountered new qw token...see if multiline
- my $K_end = $K_end_q->($KK);
- if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
+ # breaking AFTER modulus operator is ok:
+ @q = qw< % >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
- # Starting multiline qw...
- # set flag equal to the ending K
- $in_multiline_qw = $K_end;
+ # Break AFTER math operators * and /
+ @q = qw< * / x >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # Split off the leading part
- # so that the formatter can put a line break there if necessary
- if ( $token =~ /^(qw\s*.)(.*)$/ ) {
- my $part1 = $1;
- my $part2 = $2;
- if ($part2) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q',
- $part1 );
- $store_token_and_space->(
- $rcopy, $rwhitespace_flags->[$KK] == WS_YES
- );
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
+ # Break AFTER weakest math operators + and -
+ # Make them weaker than * but a bit stronger than '.'
+ @q = qw< + - >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
- # Second part goes without intermediate blank
- $store_token->($rtoken_vars);
- next;
- }
- }
- }
- else {
+ # Define left strength of unary plus and minus (fixes case b511)
+ $left_bond_strength{p} = $left_bond_strength{'+'};
+ $left_bond_strength{m} = $left_bond_strength{'-'};
- # this is a new single token qw -
- # store with possible preceding blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
- next;
- }
- }
- } ## end if ( $type eq 'q' )
+ # And make right strength of unary plus and minus very high.
+ # Fixes cases b670 b790
+ $right_bond_strength{p} = NO_BREAK;
+ $right_bond_strength{m} = NO_BREAK;
- # Modify certain tokens here for whitespace
- # The following is not yet done, but could be:
- # sub (x x x)
- elsif ( $type =~ /^[wit]$/ ) {
+ # breaking BEFORE these is just ok:
+ @q = qw# >> << #;
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # Examples: <<snippets/space1.in>>
- # change '$ var' to '$var' etc
- # '-> new' to '->new'
- if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
- $token =~ s/\s*//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # breaking before the string concatenation operator seems best
+ # because it can be hard to see at the end of a line
+ $right_bond_strength{'.'} = STRONG;
+ $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
- # Split identifiers with leading arrows, inserting blanks if
- # necessary. It is easier and safer here than in the
- # tokenizer. For example '->new' becomes two tokens, '->' and
- # 'new' with a possible blank between.
- #
- # Note: there is a related patch in sub set_whitespace_flags
- if ( $token =~ /^\-\>(.*)$/ && $1 ) {
- my $token_save = $1;
- my $type_save = $type;
+ @q = qw< } ] ) R >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # store a blank to left of arrow if necessary
- my $Kprev = $self->K_previous_nonblank($KK);
- if ( defined($Kprev)
- && $rLL->[$Kprev]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace
- && $want_left_space{'->'} == WS_YES )
- {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- }
+ # make these a little weaker than nominal so that they get
+ # favored for end-of-line characters
+ @q = qw< != == =~ !~ ~~ !~~ >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
- # then store the arrow
- my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
- $store_token->($rcopy);
+ # break AFTER these
+ @q = qw# < > | & >= <= #;
+ @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
- # then reset the current token to be the remainder,
- # and reset the whitespace flag according to the arrow
- $token = $rtoken_vars->[_TOKEN_] = $token_save;
- $type = $rtoken_vars->[_TYPE_] = $type_save;
- $store_token->($rtoken_vars);
- next;
- }
+ # breaking either before or after a quote is ok
+ # but bias for breaking before a quote
+ $left_bond_strength{'Q'} = NOMINAL;
+ $right_bond_strength{'Q'} = NOMINAL + 0.02;
+ $left_bond_strength{'q'} = NOMINAL;
+ $right_bond_strength{'q'} = NOMINAL;
- if ( $token =~ /$SUB_PATTERN/ ) {
+ # starting a line with a keyword is usually ok
+ $left_bond_strength{'k'} = NOMINAL;
- # -spp = 0 : no space before opening prototype paren
- # -spp = 1 : stable (follow input spacing)
- # -spp = 2 : always space before opening prototype paren
- my $spp = $rOpts->{'space-prototype-paren'};
- if ( defined($spp) ) {
- if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
- elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
- }
+ # we usually want to bond a keyword strongly to what immediately
+ # follows, rather than leaving it stranded at the end of a line
+ $right_bond_strength{'k'} = STRONG;
- # one space max, and no tabs
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ $left_bond_strength{'G'} = NOMINAL;
+ $right_bond_strength{'G'} = STRONG;
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- if ( $type eq 'i' ) {
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
+ # assignment operators
+ @q = qw(
+ = **= += *= &= <<= &&=
+ -= /= |= >>= ||= //=
+ .= %= ^=
+ x=
+ );
- # change 'LABEL :' to 'LABEL:'
- elsif ( $type eq 'J' ) {
- $token =~ s/\s+//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # Default is to break AFTER various assignment operators
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
- # patch to add space to something like "x10"
- # This avoids having to split this token in the pre-tokenizer
- elsif ( $type eq 'n' ) {
- if ( $token =~ /^x\d+/ ) {
- $token =~ s/x/x /;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
+ # Default is to break BEFORE '&&' and '||' and '//'
+ # set strength of '||' to same as '=' so that chains like
+ # $a = $b || $c || $d will break before the first '||'
+ $right_bond_strength{'||'} = NOMINAL;
+ $left_bond_strength{'||'} = $right_bond_strength{'='};
- # check a quote for problems
- elsif ( $type eq 'Q' ) {
- $check_Q->( $KK, $Kfirst );
- }
+ # same thing for '//'
+ $right_bond_strength{'//'} = NOMINAL;
+ $left_bond_strength{'//'} = $right_bond_strength{'='};
- # handle semicolons
- elsif ( $type eq ';' ) {
+ # set strength of && a little higher than ||
+ $right_bond_strength{'&&'} = NOMINAL;
+ $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
- # Remove unnecessary semicolons, but not after bare
- # blocks, where it could be unsafe if the brace is
- # mistokenized.
- if (
- $rOpts->{'delete-semicolons'}
- && (
- (
- $last_nonblank_type eq '}'
- && (
- $is_block_without_semicolon{
- $last_nonblank_block_type}
- || $last_nonblank_block_type =~ /$SUB_PATTERN/
- || $last_nonblank_block_type =~ /^\w+:$/ )
- )
- || $last_nonblank_type eq ';'
- )
- )
- {
+ $left_bond_strength{';'} = VERY_STRONG;
+ $right_bond_strength{';'} = VERY_WEAK;
+ $left_bond_strength{'f'} = VERY_STRONG;
- # This looks like a deletable semicolon, but even if a
- # semicolon can be deleted it is necessarily best to do so.
- # We apply these additional rules for deletion:
- # - Always ok to delete a ';' at the end of a line
- # - Never delete a ';' before a '#' because it would
- # promote it to a block comment.
- # - If a semicolon is not at the end of line, then only
- # delete if it is followed by another semicolon or closing
- # token. This includes the comment rule. It may take
- # two passes to get to a final state, but it is a little
- # safer. For example, keep the first semicolon here:
- # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
- # It is not required but adds some clarity.
- my $ok_to_delete = 1;
- if ( $KK < $Klast ) {
- my $Kn = $self->K_next_nonblank($KK);
- if ( defined($Kn) && $Kn <= $Klast ) {
- my $next_nonblank_token_type =
- $rLL->[$Kn]->[_TYPE_];
- $ok_to_delete = $next_nonblank_token_type eq ';'
- || $next_nonblank_token_type eq '}';
- }
- }
+ # make right strength of for ';' a little less than '='
+ # to make for contents break after the ';' to avoid this:
+ # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
+ # $number_of_fields )
+ # and make it weaker than ',' and 'and' too
+ $right_bond_strength{'f'} = VERY_WEAK - 0.03;
- if ($ok_to_delete) {
- note_deleted_semicolon();
- next;
- }
- else {
- write_logfile_entry("Extra ';'\n");
- }
- }
- }
+ # The strengths of ?/: should be somewhere between
+ # an '=' and a quote (NOMINAL),
+ # make strength of ':' slightly less than '?' to help
+ # break long chains of ? : after the colons
+ $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
+ $right_bond_strength{':'} = NO_BREAK;
+ $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
+ $right_bond_strength{'?'} = NO_BREAK;
- elsif ($type_sequence) {
+ $left_bond_strength{','} = VERY_STRONG;
+ $right_bond_strength{','} = VERY_WEAK;
- # if ( $is_opening_token{$token} ) {
- # }
+ # remaining digraphs and trigraphs not defined above
+ @q = qw( :: <> ++ --);
+ @left_bond_strength{@q} = (WEAK) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
- if ( $is_closing_token{$token} ) {
+ # Set bond strengths of certain keywords
+ # make 'or', 'err', 'and' slightly weaker than a ','
+ $left_bond_strength{'and'} = VERY_WEAK - 0.01;
+ $left_bond_strength{'or'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'err'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
+ $right_bond_strength{'and'} = NOMINAL;
+ $right_bond_strength{'or'} = NOMINAL;
+ $right_bond_strength{'err'} = NOMINAL;
+ $right_bond_strength{'xor'} = NOMINAL;
- # Insert a tentative missing semicolon if the next token is
- # a closing block brace
- if (
- $type eq '}'
- && $token eq '}'
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 2.
+ # Set binary rules for bond strengths between certain token types.
+ #---------------------------------------------------------------
- # not preceded by a ';'
- && $last_nonblank_type ne ';'
+ # We have a little problem making tables which apply to the
+ # container tokens. Here is a list of container tokens and
+ # their types:
+ #
+ # type tokens // meaning
+ # { {, [, ( // indent
+ # } }, ], ) // outdent
+ # [ [ // left non-structural [ (enclosing an array index)
+ # ] ] // right non-structural square bracket
+ # ( ( // left non-structural paren
+ # ) ) // right non-structural paren
+ # L { // left non-structural curly brace (enclosing a key)
+ # R } // right non-structural curly brace
+ #
+ # Some rules apply to token types and some to just the token
+ # itself. We solve the problem by combining type and token into a
+ # new hash key for the container types.
+ #
+ # If a rule applies to a token 'type' then we need to make rules
+ # for each of these 'type.token' combinations:
+ # Type Type.Token
+ # { {{, {[, {(
+ # [ [[
+ # ( ((
+ # L L{
+ # } }}, }], })
+ # ] ]]
+ # ) ))
+ # R R}
+ #
+ # If a rule applies to a token then we need to make rules for
+ # these 'type.token' combinations:
+ # Token Type.Token
+ # { {{, L{
+ # [ {[, [[
+ # ( {(, ((
+ # } }}, R}
+ # ] }], ]]
+ # ) }), ))
- # and this is not a VERSION stmt (is all one line, we are not
- # inserting semicolons on one-line blocks)
- && $CODE_type ne 'VER'
+ # allow long lines before final { in an if statement, as in:
+ # if (..........
+ # ..........)
+ # {
+ #
+ # Otherwise, the line before the { tends to be too short.
- # and we are allowed to add semicolons
- && $rOpts->{'add-semicolons'}
- )
- {
- $add_phantom_semicolon->($KK);
- }
- }
- }
+ $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
+ $binary_bond_strength{'(('}{'{{'} = NOMINAL;
- # Store this token with possible previous blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
+ # break on something like '} (', but keep this stronger than a ','
+ # example is in 'howe.pl'
+ $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- } # End token loop
- } # End line loop
+ # keep matrix and hash indices together
+ # but make them a little below STRONG to allow breaking open
+ # something like {'some-word'}{'some-very-long-word'} at the }{
+ # (bracebrk.t)
+ $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- # Reset memory to be the new array
- $self->{rLL} = $rLL_new;
- $self->set_rLL_max_index();
- $self->{K_opening_container} = $K_opening_container;
- $self->{K_closing_container} = $K_closing_container;
- $self->{K_opening_ternary} = $K_opening_ternary;
- $self->{K_closing_ternary} = $K_closing_ternary;
- $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
+ # increase strength to the point where a break in the following
+ # will be after the opening paren rather than at the arrow:
+ # $a->$b($c);
+ $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
- # make sure the new array looks okay
- $self->check_token_array();
+ # Note that the following alternative strength would make the break at the
+ # '->' rather than opening the '('. Both have advantages and disadvantages.
+ # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
- # reset the token limits of each line
- $self->resync_lines_and_tokens();
+ $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- return;
-}
+ $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
-{ # scan_comments
+ #---------------------------------------------------------------
+ # Binary NO_BREAK rules
+ #---------------------------------------------------------------
- my $Last_line_had_side_comment;
- my $In_format_skipping_section;
- my $Saw_VERSION_in_this_file;
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
+ $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
- sub scan_comments {
- my $self = shift;
- my $rlines = $self->{rlines};
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ $binary_bond_strength{'C'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'C'}{'{('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'{('} = NO_BREAK;
- $Last_line_had_side_comment = undef;
- $In_format_skipping_section = undef;
- $Saw_VERSION_in_this_file = undef;
+ # use strict requires that bare word within braces not start new
+ # line
+ $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
- # Loop over all lines
- foreach my $line_of_tokens ( @{$rlines} ) {
- my $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $CODE_type = $self->get_CODE_type($line_of_tokens);
- $line_of_tokens->{_code_type} = $CODE_type;
- }
- return;
- }
+ $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
- sub get_CODE_type {
- my ( $self, $line_of_tokens ) = @_;
-
- # We are looking at a line of code and setting a flag to
- # describe any special processing that it requires
+ # The following two rules prevent a syntax error caused by breaking up
+ # a construction like '{-y}'. The '-' quotes the 'y' and prevents
+ # it from being taken as a transliteration. We have to keep
+ # token types 'L m w' together to prevent this error.
+ $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
+ $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
- # Possible CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'IO' = Indent Only - line goes out unchanged except for indentation
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'DEL'=Delete this line
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
-
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
+ # keep 'bareword-' together, but only if there is no space between
+ # the word and dash. Do not keep together if there is a space.
+ # example 'use perl6-alpha'
+ $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
- my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
- my $no_internal_newlines = 1 - $rOpts_add_newlines;
- if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
- # extract what we need for this line..
+ # use strict does not allow separating type info from trailing { }
+ # testfile is readmail.pl
+ $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
+ $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
- # Global value for error messages:
- $input_line_number = $line_of_tokens->{_line_number};
+ # As a defensive measure, do not break between a '(' and a
+ # filehandle. In some cases, this can cause an error. For
+ # example, the following program works:
+ # my $msg="hi!\n";
+ # print
+ # ( STDOUT
+ # $msg
+ # );
+ #
+ # But this program fails:
+ # my $msg="hi!\n";
+ # print
+ # (
+ # STDOUT
+ # $msg
+ # );
+ #
+ # This is normally only a problem with the 'extrude' option
+ $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
+ $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $jmax = -1;
- if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
- my $input_line = $line_of_tokens->{_line_text};
- my $in_continued_quote = my $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- my $in_quote = $line_of_tokens->{_ending_in_quote};
- my $ending_in_quote = $in_quote;
- my $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
+ # never break between sub name and opening paren
+ $binary_bond_strength{'w'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'w'}{'{('} = NO_BREAK;
- my $is_static_block_comment = 0;
+ # keep '}' together with ';'
+ $binary_bond_strength{'}}'}{';'} = NO_BREAK;
- # Handle a continued quote..
- if ($in_continued_quote) {
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ $nobreak_lhs{'++'} = NO_BREAK;
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- note_embedded_tab();
- }
- $Last_line_had_side_comment = 0;
- return 'VB';
- }
- }
+ # Do not break before a possible file handle
+ $nobreak_lhs{'Z'} = NO_BREAK;
- my $is_block_comment =
- ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
+ # use strict hates bare words on any new line. For
+ # example, a break before the underscore here provokes the
+ # wrath of use strict:
+ # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
+ $nobreak_rhs{'F'} = NO_BREAK;
+ $nobreak_rhs{'CORE::'} = NO_BREAK;
- # Write line verbatim if we are in a formatting skip section
- if ($In_format_skipping_section) {
- $Last_line_had_side_comment = 0;
+ # To prevent the tokenizer from switching between types 'w' and 'G' we
+ # need to avoid breaking between type 'G' and the following code block
+ # brace. Fixes case b929.
+ $nobreak_rhs{G} = NO_BREAK;
- # Note: extra space appended to comment simplifies pattern matching
- if ( $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_end/o )
- {
- $In_format_skipping_section = 0;
- write_logfile_entry("Exiting formatting skip section\n");
- }
- return 'FS';
- }
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 3.
+ # Define tables and values for applying a small bias to the above
+ # values.
+ #---------------------------------------------------------------
+ # Adding a small 'bias' to strengths is a simple way to make a line
+ # break at the first of a sequence of identical terms. For
+ # example, to force long string of conditional operators to break
+ # with each line ending in a ':', we can add a small number to the
+ # bond strength of each ':' (colon.t)
+ @bias_tokens = qw( : && || f and or . ); # tokens which get bias
+ %bias_hash = map { $_ => 0 } @bias_tokens;
+ $delta_bias = 0.0001; # a very small strength level
+ return;
- # See if we are entering a formatting skip section
- if ( $rOpts_format_skipping
- && $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_begin/o )
- {
- $In_format_skipping_section = 1;
- write_logfile_entry("Entering formatting skip section\n");
- $Last_line_had_side_comment = 0;
- return 'FS';
- }
+ } ## end sub initialize_bond_strength_hashes
- # ignore trailing blank tokens (they will get deleted later)
- if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
- $jmax--;
- }
+ use constant DEBUG_BOND => 0;
- # Handle a blank line..
- if ( $jmax < 0 ) {
- $Last_line_had_side_comment = 0;
- return 'BL';
- }
+ sub set_bond_strengths {
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment_without_leading_space = 0;
- if ( $is_block_comment
- && $rOpts->{'static-block-comments'}
- && $input_line =~ /$static_block_comment_pattern/o )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space =
- substr( $input_line, 0, 1 ) eq '#';
- }
+ my ($self) = @_;
- # Check for comments which are line directives
- # Treat exactly as static block comments without leading space
- # reference: perlsyn, near end, section Plain Old Comments (Not!)
- # example: '# line 42 "new_filename.plx"'
- if (
- $is_block_comment
- && $input_line =~ /^\# \s*
- line \s+ (\d+) \s*
- (?:\s("?)([^"]+)\2)? \s*
- $/x
- )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space = 1;
- }
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- # look for hanging side comment
- if (
- $is_block_comment
- && $Last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
- && !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing
- # hanging side comments
- # like this
- )
- {
- $Last_line_had_side_comment = 1;
- return 'HSC';
- }
+ # patch-its always ok to break at end of line
+ $nobreak_to_go[$max_index_to_go] = 0;
- # remember if this line has a side comment
- $Last_line_had_side_comment =
- ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
+ # we start a new set of bias values for each line
+ %bias = %bias_hash;
- # Handle a block (full-line) comment..
- if ($is_block_comment) {
+ my $code_bias = -.01; # bias for closing block braces
- if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
+ my $type = 'b';
+ my $token = ' ';
+ my $token_length = 1;
+ my $last_type;
+ my $last_nonblank_type = $type;
+ my $last_nonblank_token = $token;
+ my $list_str = $left_bond_strength{'?'};
- # TRIM COMMENTS -- This could be turned off as a option
- $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
+ my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
+ $next_nonblank_type, $next_token, $next_type,
+ $total_nesting_depth, );
- if ($is_static_block_comment_without_leading_space) {
- return 'SBCX';
- }
- elsif ($is_static_block_comment) {
- return 'SBC';
- }
- else {
- return 'BC';
+ # main loop to compute bond strengths between each pair of tokens
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ $last_type = $type;
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
}
- }
+ $type = $types_to_go[$i];
- # Patch needed for MakeMaker. Do not break a statement
- # in which $VERSION may be calculated. See MakeMaker.pm;
- # this is based on the coding in it.
- # The first line of a file that matches this will be eval'd:
- # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- # Examples:
- # *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
- # We will pass such a line straight through without breaking
- # it unless -npvl is used.
+ # strength on both sides of a blank is the same
+ if ( $type eq 'b' && $last_type ne 'b' ) {
+ $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
+ next;
+ }
- # Patch for problem reported in RT #81866, where files
- # had been flattened into a single line and couldn't be
- # tidied without -npvl. There are two parts to this patch:
- # First, it is not done for a really long line (80 tokens for now).
- # Second, we will only allow up to one semicolon
- # before the VERSION. We need to allow at least one semicolon
- # for statements like this:
- # require Exporter; our $VERSION = $Exporter::VERSION;
- # where both statements must be on a single line for MakeMaker
+ $token = $tokens_to_go[$i];
+ $token_length = $token_lengths_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $i_next = $i + 1;
+ $next_type = $types_to_go[$i_next];
+ $next_token = $tokens_to_go[$i_next];
+ $total_nesting_depth = $nesting_depth_to_go[$i_next];
+ $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- my $is_VERSION_statement = 0;
- if ( !$Saw_VERSION_in_this_file
- && $jmax < 80
- && $input_line =~
- /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
- {
- $Saw_VERSION_in_this_file = 1;
- write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- $CODE_type = 'VER';
- }
- return $CODE_type;
- }
-}
+ my $seqno = $type_sequence_to_go[$i];
+ my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
-sub find_nested_pairs {
- my $self = shift;
+ # We are computing the strength of the bond between the current
+ # token and the NEXT token.
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ #---------------------------------------------------------------
+ # Bond Strength Section 1:
+ # First Approximation.
+ # Use minimum of individual left and right tabulated bond
+ # strengths.
+ #---------------------------------------------------------------
+ my $bsr = $right_bond_strength{$type};
+ my $bsl = $left_bond_strength{$next_nonblank_type};
- # We define an array of pairs of nested containers
- my @nested_pairs;
+ # define right bond strengths of certain keywords
+ if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
+ $bsr = $right_bond_strength{$token};
+ }
+ elsif ( $token eq 'ne' or $token eq 'eq' ) {
+ $bsr = NOMINAL;
+ }
- # We also set the following hash values to identify container pairs for
- # which the opening and closing tokens are adjacent in the token stream:
- # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
- # $seqno_in are the seqence numbers of the outer and inner containers of
- # the pair We need these later to decide if we can insert a missing
- # semicolon
- my $rpaired_to_inner_container = {};
+ # set terminal bond strength to the nominal value
+ # this will cause good preceding breaks to be retained
+ if ( $i_next_nonblank > $max_index_to_go ) {
+ $bsl = NOMINAL;
+ }
- # This local hash remembers if an outer container has a close following
- # inner container;
- # The key is the outer sequence number
- # The value is the token_hash of the inner container
+ # define right bond strengths of certain keywords
+ if ( $next_nonblank_type eq 'k'
+ && defined( $left_bond_strength{$next_nonblank_token} ) )
+ {
+ $bsl = $left_bond_strength{$next_nonblank_token};
+ }
+ elsif ($next_nonblank_token eq 'ne'
+ or $next_nonblank_token eq 'eq' )
+ {
+ $bsl = NOMINAL;
+ }
+ elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
+ $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
+ }
- my %has_close_following_opening;
+ # Use the minimum of the left and right strengths. Note: it might
+ # seem that we would want to keep a NO_BREAK if either token has
+ # this value. This didn't work, for example because in an arrow
+ # list, it prevents the comma from separating from the following
+ # bare word (which is probably quoted by its arrow). So necessary
+ # NO_BREAK's have to be handled as special cases in the final
+ # section.
+ if ( !defined($bsr) ) { $bsr = VERY_STRONG }
+ if ( !defined($bsl) ) { $bsl = VERY_STRONG }
+ my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
+ my $bond_str_1 = $bond_str;
- # Names of calling routines can either be marked as 'i' or 'w',
- # and they may invoke a sub call with an '->'. We will consider
- # any consecutive string of such types as a single unit when making
- # weld decisions. We also allow a leading !
- my $is_name_type = {
- 'i' => 1,
- 'w' => 1,
- 'U' => 1,
- '->' => 1,
- '!' => 1,
- };
-
- my $is_name = sub {
- my $type = shift;
- return $type && $is_name_type->{$type};
- };
+ #---------------------------------------------------------------
+ # Bond Strength Section 2:
+ # Apply hardwired rules..
+ #---------------------------------------------------------------
- my $last_container;
- my $last_last_container;
- my $last_nonblank_token_vars;
- my $last_count;
+ # Patch to put terminal or clauses on a new line: Weaken the bond
+ # at an || followed by die or similar keyword to make the terminal
+ # or clause fall on a new line, like this:
+ #
+ # my $class = shift
+ # || die "Cannot add broadcast: No class identifier found";
+ #
+ # Otherwise the break will be at the previous '=' since the || and
+ # = have the same starting strength and the or is biased, like
+ # this:
+ #
+ # my $class =
+ # shift || die "Cannot add broadcast: No class identifier found";
+ #
+ # In any case if the user places a break at either the = or the ||
+ # it should remain there.
+ if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
+ if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
+ if ( $want_break_before{$token} && $i > 0 ) {
+ $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
- my $nonblank_token_count = 0;
+ # keep bond strength of a token and its following blank
+ # the same
+ if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
+ $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
+ }
+ }
+ else {
+ $bond_str -= $delta_bias;
+ }
+ }
+ }
- # loop over all tokens
- foreach my $rtoken_vars ( @{$rLL} ) {
+ # good to break after end of code blocks
+ if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
- my $type = $rtoken_vars->[_TYPE_];
+ $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
+ $code_bias += $delta_bias;
+ }
- next if ( $type eq 'b' );
+ if ( $type eq 'k' ) {
- # long identifier-like items are counted as a single item
- $nonblank_token_count++
- unless ( $is_name->($type)
- && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
+ # allow certain control keywords to stand out
+ if ( $next_nonblank_type eq 'k'
+ && $is_last_next_redo_return{$token} )
+ {
+ $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
+ }
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
+ # Don't break after keyword my. This is a quick fix for a
+ # rare problem with perl. An example is this line from file
+ # Container.pm:
- my $token = $rtoken_vars->[_TOKEN_];
+ # foreach my $question( Debian::DebConf::ConfigDb::gettree(
+ # $this->{'question'} ) )
- if ( $is_opening_token{$token} ) {
+ if ( $token eq 'my' ) {
+ $bond_str = NO_BREAK;
+ }
- # following previous opening token ...
- if ( $last_container
- && $is_opening_token{ $last_container->[_TOKEN_] } )
- {
+ }
- # adjacent to this one
- my $tok_diff = $nonblank_token_count - $last_count;
+ # good to break before 'if', 'unless', etc
+ if ( $is_if_brace_follower{$next_nonblank_token} ) {
+ $bond_str = VERY_WEAK;
+ }
- my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
+ if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
- if ( $tok_diff == 1
- || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
- {
+ if ( $is_keyword_returning_list{$next_nonblank_token} ) {
+ $bond_str = $list_str if ( $bond_str > $list_str );
+ }
- # remember this pair...
- my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $inner_seqno = $type_sequence;
- $has_close_following_opening{$outer_seqno} =
- $rtoken_vars;
- }
+ # keywords like 'unless', 'if', etc, within statements
+ # make good breaks
+ if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
+ $bond_str = VERY_WEAK / 1.05;
}
}
- elsif ( $is_closing_token{$token} ) {
+ # try not to break before a comma-arrow
+ elsif ( $next_nonblank_type eq '=>' ) {
+ if ( $bond_str < STRONG ) { $bond_str = STRONG }
+ }
- # if the corresponding opening token had an adjacent opening
- if ( $has_close_following_opening{$type_sequence}
- && $is_closing_token{ $last_container->[_TOKEN_] }
- && $has_close_following_opening{$type_sequence}
- ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
- {
+ #---------------------------------------------------------------
+ # Additional hardwired NOBREAK rules
+ #---------------------------------------------------------------
+
+ # map1.t -- correct for a quirk in perl
+ if ( $token eq '('
+ && $next_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'k'
+ && $is_sort_map_grep{$last_nonblank_token} )
+
+ # /^(sort|map|grep)$/ )
+ {
+ $bond_str = NO_BREAK;
+ }
+
+ # extrude.t: do not break before paren at:
+ # -l pid_filename(
+ if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
+ }
- # The closing weld tokens must be adjacent
- # NOTE: so intermediate commas and semicolons
- # can currently block a weld. This is something
- # that could be fixed in the future by including
- # a flag to delete un-necessary commas and semicolons.
- my $tok_diff = $nonblank_token_count - $last_count;
+ # in older version of perl, use strict can cause problems with
+ # breaks before bare words following opening parens. For example,
+ # this will fail under older versions if a break is made between
+ # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
+ # command"); close MAIL;
+ if ( $type eq '{' ) {
- if ( $tok_diff == 1 ) {
+ if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
- # This is a closely nested pair ..
- my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $outer_seqno = $type_sequence;
- $rpaired_to_inner_container->{$outer_seqno} =
- $inner_seqno;
+ # but it's fine to break if the word is followed by a '=>'
+ # or if it is obviously a sub call
+ my $i_next_next_nonblank = $i_next_nonblank + 1;
+ my $next_next_type = $types_to_go[$i_next_next_nonblank];
+ if ( $next_next_type eq 'b'
+ && $i_next_nonblank < $max_index_to_go )
+ {
+ $i_next_next_nonblank++;
+ $next_next_type = $types_to_go[$i_next_next_nonblank];
+ }
- push @nested_pairs, [ $inner_seqno, $outer_seqno ];
+ # We'll check for an old breakpoint and keep a leading
+ # bareword if it was that way in the input file.
+ # Presumably it was ok that way. For example, the
+ # following would remain unchanged:
+ #
+ # @months = (
+ # January, February, March, April,
+ # May, June, July, August,
+ # September, October, November, December,
+ # );
+ #
+ # This should be sufficient:
+ if (
+ !$old_breakpoint_to_go[$i]
+ && ( $next_next_type eq ','
+ || $next_next_type eq '}' )
+ )
+ {
+ $bond_str = NO_BREAK;
}
}
}
- $last_last_container = $last_container;
- $last_container = $rtoken_vars;
- $last_count = $nonblank_token_count;
- }
- $last_nonblank_token_vars = $rtoken_vars;
- }
- $self->{rnested_pairs} = \@nested_pairs;
- $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
- return;
-}
+ # Do not break between a possible filehandle and a ? or / and do
+ # not introduce a break after it if there is no blank
+ # (extrude.t)
+ elsif ( $type eq 'Z' ) {
-sub dump_tokens {
+ # don't break..
+ if (
- # a debug routine, not normally used
- my ( $self, $msg ) = @_;
- my $rLL = $self->{rLL};
- my $nvars = @{$rLL};
- print STDERR "$msg\n";
- print STDERR "ntokens=$nvars\n";
- print STDERR "K\t_TOKEN_\t_TYPE_\n";
- my $K = 0;
+ # if there is no blank and we do not want one. Examples:
+ # print $x++ # do not break after $x
+ # print HTML"HELLO" # break ok after HTML
+ (
+ $next_type ne 'b'
+ && defined( $want_left_space{$next_type} )
+ && $want_left_space{$next_type} == WS_NO
+ )
- foreach my $item ( @{$rLL} ) {
- print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
- $K++;
- }
- return;
-}
+ # or we might be followed by the start of a quote,
+ # and this is not an existing breakpoint; fixes c039.
+ || !$old_breakpoint_to_go[$i]
+ && substr( $next_nonblank_token, 0, 1 ) eq '/'
-sub get_old_line_index {
- my ( $self, $K ) = @_;
- my $rLL = $self->{rLL};
- return 0 unless defined($K);
- return $rLL->[$K]->[_LINE_INDEX_];
-}
+ )
+ {
+ $bond_str = NO_BREAK;
+ }
+ }
-sub get_old_line_count {
- my ( $self, $Kbeg, $Kend ) = @_;
- my $rLL = $self->{rLL};
- return 0 unless defined($Kbeg);
- return 0 unless defined($Kend);
- return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
-}
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ if ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
+ }
-sub K_next_code {
- my ( $self, $KK, $rLL ) = @_;
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
+ }
- # return the index K of the next nonblank, non-comment token
- return unless ( defined($KK) && $KK >= 0 );
+ # Fix for c039
+ elsif ( $type eq 'w' ) {
+ $bond_str = NO_BREAK
+ if ( !$old_breakpoint_to_go[$i]
+ && substr( $next_nonblank_token, 0, 1 ) eq '/' );
+ }
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
- && $rLL->[$Knnb]->[_TYPE_] ne '#' )
- {
- return $Knnb;
- }
- $Knnb++;
- }
- return;
-}
+ my $bond_str_2 = $bond_str;
-sub K_next_nonblank {
- my ( $self, $KK, $rLL ) = @_;
+ #---------------------------------------------------------------
+ # End of hardwired rules
+ #---------------------------------------------------------------
- # return the index K of the next nonblank token
- return unless ( defined($KK) && $KK >= 0 );
+ #---------------------------------------------------------------
+ # Bond Strength Section 3:
+ # Apply table rules. These have priority over the above
+ # hardwired rules.
+ #---------------------------------------------------------------
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
- $Knnb++;
- }
- return;
-}
+ my $tabulated_bond_str;
+ my $ltype = $type;
+ my $rtype = $next_nonblank_type;
+ if ( $seqno && $is_container_token{$token} ) {
+ $ltype = $type . $token;
+ }
-sub K_previous_code {
+ if ( $next_nonblank_seqno
+ && $is_container_token{$next_nonblank_token} )
+ {
+ $rtype = $next_nonblank_type . $next_nonblank_token;
+ }
- # return the index K of the previous nonblank, non-comment token
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ # apply binary rules which apply regardless of space between tokens
+ if ( $binary_bond_strength{$ltype}{$rtype} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$rtype};
+ $tabulated_bond_str = $bond_str;
+ }
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # apply binary rules which apply only if no space between tokens
+ if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$next_type};
+ $tabulated_bond_str = $bond_str;
+ }
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
- }
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
- && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
- {
- return $Kpnb;
- }
- $Kpnb--;
- }
- return;
-}
+ if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
+ $bond_str = NO_BREAK;
+ $tabulated_bond_str = $bond_str;
+ }
+ my $bond_str_3 = $bond_str;
-sub K_previous_nonblank {
+ # If the hardwired rules conflict with the tabulated bond
+ # strength then there is an inconsistency that should be fixed
+ DEBUG_BOND
+ && $tabulated_bond_str
+ && $bond_str_1
+ && $bond_str_1 != $bond_str_2
+ && $bond_str_2 != $tabulated_bond_str
+ && do {
+ print STDERR
+"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
+ };
- # return index of previous nonblank token before item K;
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ #-----------------------------------------------------------------
+ # Bond Strength Section 4:
+ # Modify strengths of certain tokens which often occur in sequence
+ # by adding a small bias to each one in turn so that the breaks
+ # occur from left to right.
+ #
+ # Note that we only changing strengths by small amounts here,
+ # and usually increasing, so we should not be altering any NO_BREAKs.
+ # Other routines which check for NO_BREAKs will use a tolerance
+ # of one to avoid any problem.
+ #-----------------------------------------------------------------
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # The bias tables use special keys:
+ # $type - if not keyword
+ # $token - if keyword, but map some keywords together
+ my $left_key =
+ $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
+ my $right_key =
+ $next_nonblank_type eq 'k'
+ ? $next_nonblank_token eq 'err'
+ ? 'or'
+ : $next_nonblank_token
+ : $next_nonblank_type;
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
- }
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
- $Kpnb--;
- }
- return;
-}
+ if ( $type eq ',' ) {
-sub map_containers {
+ # add any bias set by sub scan_list at old comma break points
+ $bond_str += $bond_strength_to_go[$i];
- # Maps the container hierarchy
- my $self = shift;
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ }
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
- my $rcontainer_map = $self->{rcontainer_map};
+ # bias left token
+ elsif ( defined( $bias{$left_key} ) ) {
+ if ( !$want_break_before{$left_key} ) {
+ $bias{$left_key} += $delta_bias;
+ $bond_str += $bias{$left_key};
+ }
+ }
- # loop over containers
- my @stack; # stack of container sequence numbers
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $type_sequence not defined at K=$KK");
- }
+ # bias right token
+ if ( defined( $bias{$right_key} ) ) {
+ if ( $want_break_before{$right_key} ) {
- my $token = $rtoken_vars->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
- if (@stack) {
- $rcontainer_map->{$type_sequence} = $stack[-1];
+ # for leading '.' align all but 'short' quotes; the idea
+ # is to not place something like "\n" on a single line.
+ if ( $right_key eq '.' ) {
+ unless (
+ $last_nonblank_type eq '.'
+ && ( $token_length <=
+ $rOpts_short_concatenation_item_length )
+ && ( !$is_closing_token{$token} )
+ )
+ {
+ $bias{$right_key} += $delta_bias;
+ }
+ }
+ else {
+ $bias{$right_key} += $delta_bias;
+ }
+ $bond_str += $bias{$right_key};
+ }
}
- push @stack, $type_sequence;
- }
- if ( $is_closing_token{$token} ) {
- if (@stack) {
- my $seqno = pop @stack;
- if ( $seqno != $type_sequence ) {
+ my $bond_str_4 = $bond_str;
+
+ #---------------------------------------------------------------
+ # Bond Strength Section 5:
+ # Fifth Approximation.
+ # Take nesting depth into account by adding the nesting depth
+ # to the bond strength.
+ #---------------------------------------------------------------
+ my $strength;
- # shouldn't happen unless file is garbage
+ if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
+ if ( $total_nesting_depth > 0 ) {
+ $strength = $bond_str + $total_nesting_depth;
+ }
+ else {
+ $strength = $bond_str;
}
}
- }
- }
+ else {
+ $strength = NO_BREAK;
- # the stack should be empty for a good file
- if (@stack) {
+ # For critical code such as lines with here targets we must
+ # be absolutely sure that we do not allow a break. So for
+ # these the nobreak flag exceeds 1 as a signal. Otherwise we
+ # can run into trouble when small tolerances are added.
+ $strength += 1 if ( $nobreak_to_go[$i] > 1 );
+ }
- # unbalanced containers; file probably bad
- }
- else {
- # ok
- }
- return;
-}
+ #---------------------------------------------------------------
+ # Bond Strength Section 6:
+ # Sixth Approximation. Welds.
+ #---------------------------------------------------------------
-sub mark_short_nested_blocks {
+ # Do not allow a break within welds
+ if ( $total_weld_count && $seqno ) {
+ my $KK = $K_to_go[$i];
+ if ( $rK_weld_right->{$KK} ) {
+ $strength = NO_BREAK;
+ }
- # This routine looks at the entire file and marks any short nested blocks
- # which should not be broken. The results are stored in the hash
- # $rshort_nested->{$type_sequence}
- # which will be true if the container should remain intact.
- #
- # For example, consider the following line:
+ # But encourage breaking after opening welded tokens
+ elsif ($rK_weld_left->{$KK}
+ && $is_opening_token{$token} )
+ {
+ $strength -= 1;
+ }
+ }
- # sub cxt_two { sort { $a <=> $b } test_if_list() }
+ # always break after side comment
+ if ( $type eq '#' ) { $strength = 0 }
- # The 'sort' block is short and nested within an outer sub block.
- # Normally, the existance of the 'sort' block will force the sub block to
- # break open, but this is not always desirable. Here we will set a flag for
- # the sort block to prevent this. To give the user control, we will
- # follow the input file formatting. If either of the blocks is broken in
- # the input file then we will allow it to remain broken. Otherwise we will
- # set a flag to keep it together in later formatting steps.
+ $bond_strength_to_go[$i] = $strength;
- # The flag which is set here will be checked in two places:
- # 'sub print_line_of_tokens' and 'sub starting_one_line_block'
+ # Fix for case c001: be sure NO_BREAK's are enforced by later
+ # routines, except at a '?' because '?' as quote delimiter is
+ # deprecated.
+ if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
+ $nobreak_to_go[$i] ||= 1;
+ }
- my $self = shift;
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ DEBUG_BOND && do {
+ my $str = substr( $token, 0, 15 );
+ $str .= ' ' x ( 16 - length($str) );
+ print STDOUT
+"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
+ };
+ } ## end main loop
+ return;
+ } ## end sub set_bond_strengths
+} ## end closure set_bond_strengths
- return unless ( $rOpts->{'one-line-block-nesting'} );
+sub bad_pattern {
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
- my $rbreak_container = $self->{rbreak_container};
- my $rshort_nested = $self->{rshort_nested};
- my $rcontainer_map = $self->{rcontainer_map};
- my $rlines = $self->{rlines};
+ # See if a pattern will compile. We have to use a string eval here,
+ # but it should be safe because the pattern has been constructed
+ # by this program.
+ my ($pattern) = @_;
+ eval "'##'=~/$pattern/";
+ return $@;
+}
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
- my $length_tol = 1;
+{ ## begin closure prepare_cuddled_block_types
- my $excess_length_to_K = sub {
- my ($K) = @_;
+ my %no_cuddle;
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
+ # Add keywords here which really should not be cuddled
+ BEGIN {
+ my @q = qw(if unless for foreach while);
+ @no_cuddle{@q} = (1) x scalar(@q);
+ }
- my $is_broken_block = sub {
+ sub prepare_cuddled_block_types {
- # a block is broken if the input line numbers of the braces differ
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ # the cuddled-else style, if used, is controlled by a hash that
+ # we construct here
- # loop over all containers
- my @open_block_stack;
- my $iline = -1;
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
+ # Include keywords here which should not be cuddled
- # an error here is most likely due to a recent programming change
- Fault("sequence = $type_sequence not defined at K=$KK");
- }
+ my $cuddled_string = "";
+ if ( $rOpts->{'cuddled-else'} ) {
- # We are just looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- next unless ($block_type);
+ # set the default
+ $cuddled_string = 'elsif else continue catch finally'
+ unless ( $rOpts->{'cuddled-block-list-exclusive'} );
- # Keep a stack of all acceptable block braces seen.
- # Only consider blocks entirely on one line so dump the stack when line
- # changes.
- my $iline_last = $iline;
- $iline = $rLL->[$KK]->[_LINE_INDEX_];
- if ( $iline != $iline_last ) { @open_block_stack = () }
+ # This is the old equivalent but more complex version
+ # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
+
+ # Add users other blocks to be cuddled
+ my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
+ if ($cuddled_block_list) {
+ $cuddled_string .= " " . $cuddled_block_list;
+ }
- if ( $token eq '}' ) {
- if (@open_block_stack) { pop @open_block_stack }
}
- next unless ( $token eq '{' );
- # block must be balanced (bad scripts may be unbalanced)
- my $K_opening = $K_opening_container->{$type_sequence};
- my $K_closing = $K_closing_container->{$type_sequence};
- next unless ( defined($K_opening) && defined($K_closing) );
+ # If we have a cuddled string of the form
+ # 'try-catch-finally'
- # require that this block be entirely on one line
- next if ( $is_broken_block->($type_sequence) );
+ # we want to prepare a hash of the form
- # See if this block fits on one line of allowed length (which may
- # be different from the input script)
- $starting_lentot =
- $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$KK]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # Dump the stack if block is too long and skip this block
- if ( $excess_length_to_K->($K_closing) > 0 ) {
- @open_block_stack = ();
- next;
- }
+ # use -dcbl to dump this hash
- # OK, Block passes tests, remember it
- push @open_block_stack, $type_sequence;
+ # Multiple such strings are input as a space or comma separated list
- # We are only marking nested code blocks,
- # so check for a previous block on the stack
- next unless ( @open_block_stack > 1 );
+ # If we get two lists with the same leading type, such as
+ # -cbl = "-try-catch-finally -try-catch-otherwise"
+ # then they will get merged as follows:
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 2,
+ # 'otherwise' => 1,
+ # },
+ # };
+ # This will allow either type of chain to be followed.
- # Looks OK, mark this as a short nested block
- $rshort_nested->{$type_sequence} = 1;
+ $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
+ my @cuddled_strings = split /\s+/, $cuddled_string;
- }
- return;
-}
+ $rcuddled_block_types = {};
-sub weld_containers {
+ # process each dash-separated string...
+ my $string_count = 0;
+ foreach my $string (@cuddled_strings) {
+ next unless $string;
+ my @words = split /-+/, $string; # allow multiple dashes
- # do any welding operations
- my $self = shift;
+ # we could look for and report possible errors here...
+ next unless ( @words > 0 );
- # initialize weld length hashes needed later for checking line lengths
- # TODO: These should eventually be stored in $self rather than be package vars
- %weld_len_left_closing = ();
- %weld_len_right_closing = ();
- %weld_len_left_opening = ();
- %weld_len_right_opening = ();
+ # allow either '-continue' or *-continue' for arbitrary starting type
+ my $start = '*';
- return if ( $rOpts->{'indent-only'} );
- return unless ($rOpts_add_newlines);
+ # a single word without dashes is a secondary block type
+ if ( @words > 1 ) {
+ $start = shift @words;
+ }
- if ( $rOpts->{'weld-nested-containers'} ) {
+ # always make an entry for the leading word. If none follow, this
+ # will still prevent a wildcard from matching this word.
+ if ( !defined( $rcuddled_block_types->{$start} ) ) {
+ $rcuddled_block_types->{$start} = {};
+ }
- # if called, weld_nested_containers must be called before other weld
- # operations. # This is because weld_nested_containers could overwrite
- # hash values written by weld_cuddled_blocks and weld_nested_quotes.
- $self->weld_nested_containers();
+ # The count gives the original word order in case we ever want it.
+ $string_count++;
+ my $word_count = 0;
+ foreach my $word (@words) {
+ next unless $word;
+ if ( $no_cuddle{$word} ) {
+ Warn(
+"## Ignoring keyword '$word' in -cbl; does not seem right\n"
+ );
+ next;
+ }
+ $word_count++;
+ $rcuddled_block_types->{$start}->{$word} =
+ 1; #"$string_count.$word_count";
- $self->weld_nested_quotes();
+ # git#9: Remove this word from the list of desired one-line
+ # blocks
+ $want_one_line_block{$word} = 0;
+ }
+ }
+ return;
}
+} ## begin closure prepare_cuddled_block_types
- # Note that weld_nested_containers() changes the _LEVEL_ values, so
- # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
+sub dump_cuddled_block_list {
+ my ($fh) = @_;
- # Here is a good test case to Be sure that both cuddling and welding
- # are working and not interfering with each other: <<snippets/ce_wn1.in>>
+ # ORIGINAL METHOD: Here is the format of the cuddled block type hash
+ # which controls this routine
+ # my $rcuddled_block_types = {
+ # 'if' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # },
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # perltidy -wn -ce
+ # SIMPLFIED METHOD: the simplified method uses a wildcard for
+ # the starting block type and puts all cuddled blocks together:
+ # my $rcuddled_block_types = {
+ # '*' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # if ($BOLD_MATH) { (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # ) } else { (
- # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
- # $after
- # ) }
+ # Both methods work, but the simplified method has proven to be adequate and
+ # easier to manage.
+
+ my $cuddled_string = $rOpts->{'cuddled-block-list'};
+ $cuddled_string = '' unless $cuddled_string;
+
+ my $flags = "";
+ $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
+ $flags .= " -cbl='$cuddled_string'";
+
+ unless ( $rOpts->{'cuddled-else'} ) {
+ $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
+ }
+
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+Hash of cuddled block types prepared for a run with these parameters:
+ $flags
+------------------------------------------------------------------------
+EOM
- $self->weld_cuddled_blocks();
+ use Data::Dumper;
+ $fh->print( Dumper($rcuddled_block_types) );
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+EOM
return;
}
-sub cumulative_length_before_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->{rLL};
- return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+sub make_static_block_comment_pattern {
+
+ # create the pattern used to identify static block comments
+ $static_block_comment_pattern = '^\s*##';
+
+ # allow the user to change it
+ if ( $rOpts->{'static-block-comment-prefix'} ) {
+ my $prefix = $rOpts->{'static-block-comment-prefix'};
+ $prefix =~ s/^\s*//;
+ my $pattern = $prefix;
+
+ # user may give leading caret to force matching left comments only
+ if ( $prefix !~ /^\^#/ ) {
+ if ( $prefix !~ /^#/ ) {
+ Die(
+"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
+ );
+ }
+ $pattern = '^\s*' . $prefix;
+ }
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $static_block_comment_pattern = $pattern;
+ }
+ return;
}
-sub cumulative_length_after_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->{rLL};
- return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+sub make_format_skipping_pattern {
+ my ( $opt_name, $default ) = @_;
+ my $param = $rOpts->{$opt_name};
+ unless ($param) { $param = $default }
+ $param =~ s/^\s*//;
+ if ( $param !~ /^#/ ) {
+ Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
+ }
+ my $pattern = '^' . $param . '\s';
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
+ );
+ }
+ return $pattern;
}
-sub weld_cuddled_blocks {
- my $self = shift;
+sub make_non_indenting_brace_pattern {
- # This routine implements the -cb flag by finding the appropriate
- # closing and opening block braces and welding them together.
- return unless ( %{$rcuddled_block_types} );
+ # Create the pattern used to identify static side comments.
+ # Note that we are ending the pattern in a \s. This will allow
+ # the pattern to be followed by a space and some text, or a newline.
+ # The pattern is used in sub 'non_indenting_braces'
+ $non_indenting_brace_pattern = '^#<<<\s';
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
- my $rbreak_container = $self->{rbreak_container};
+ # allow the user to change it
+ if ( $rOpts->{'non-indenting-brace-prefix'} ) {
+ my $prefix = $rOpts->{'non-indenting-brace-prefix'};
+ $prefix =~ s/^\s*//;
+ if ( $prefix !~ /^#/ ) {
+ Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
+ }
+ my $pattern = '^' . $prefix . '\s';
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $non_indenting_brace_pattern = $pattern;
+ }
+ return;
+}
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
+sub make_closing_side_comment_list_pattern {
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # turn any input list into a regex for recognizing selected block types
+ $closing_side_comment_list_pattern = '^\w+';
+ if ( defined( $rOpts->{'closing-side-comment-list'} )
+ && $rOpts->{'closing-side-comment-list'} )
+ {
+ $closing_side_comment_list_pattern =
+ make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
+ }
+ return;
+}
- my $is_broken_block = sub {
+sub make_sub_matching_pattern {
- # a block is broken if the input line numbers of the braces differ
- # we can only cuddle between broken blocks
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ # Patterns for standardizing matches to block types for regular subs and
+ # anonymous subs. Examples
+ # 'sub process' is a named sub
+ # 'sub ::m' is a named sub
+ # 'sub' is an anonymous sub
+ # 'sub:' is a label, not a sub
+ # 'substr' is a keyword
+ $SUB_PATTERN = '^sub\s+(::|\w)'; # match normal sub
+ $ASUB_PATTERN = '^sub$'; # match anonymous sub
+ $ANYSUB_PATTERN = '^sub\b'; # match either type of sub
- # A stack to remember open chains at all levels:
- # $in_chain[$level] = [$chain_type, $type_sequence];
- my @in_chain;
- my $CBO = $rOpts->{'cuddled-break-option'};
+ # Note (see also RT #133130): These patterns are used by
+ # sub make_block_pattern, which is used for making most patterns.
+ # So this sub needs to be called before other pattern-making routines.
- # loop over structure items to find cuddled pairs
- my $level = 0;
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $type_sequence not defined at K=$KK");
- }
+ if ( $rOpts->{'sub-alias-list'} ) {
- # We use the original levels because they get changed by sub
- # 'weld_nested_containers'. So if this were to be called before that
- # routine, the levels would be wrong and things would go bad.
- my $last_level = $level;
- $level = $rtoken_vars->[_LEVEL_TRUE_];
+ # Note that any 'sub-alias-list' has been preprocessed to
+ # be a trimmed, space-separated list which includes 'sub'
+ # for example, it might be 'sub method fun'
+ my $sub_alias_list = $rOpts->{'sub-alias-list'};
+ $sub_alias_list =~ s/\s+/\|/g;
+ $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ }
+ return;
+}
- if ( $level < $last_level ) { $in_chain[$last_level] = undef }
- elsif ( $level > $last_level ) { $in_chain[$level] = undef }
+sub make_bli_pattern {
- # We are only looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
+ # default list of block types for which -bli would apply
+ my $bli_list_string = 'if else elsif unless while for foreach do : sub';
- if ( $token eq '{' ) {
+ if ( defined( $rOpts->{'brace-left-and-indent-list'} )
+ && $rOpts->{'brace-left-and-indent-list'} )
+ {
+ $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
+ }
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- if ( !$block_type ) {
+ $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
+ return;
+}
- # patch for unrecognized block types which may not be labeled
- my $Kp = $self->K_previous_nonblank($KK);
- while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
- $Kp = $self->K_previous_nonblank($Kp);
- }
- next unless $Kp;
- $block_type = $rLL->[$Kp]->[_TOKEN_];
+sub make_keyword_group_list_pattern {
+
+ # turn any input list into a regex for recognizing selected block types.
+ # Here are the defaults:
+ $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
+ $keyword_group_list_comment_pattern = '';
+ if ( defined( $rOpts->{'keyword-group-blanks-list'} )
+ && $rOpts->{'keyword-group-blanks-list'} )
+ {
+ my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
+ my @keyword_list;
+ my @comment_list;
+ foreach my $word (@words) {
+ if ( $word =~ /^(BC|SBC)$/ ) {
+ push @comment_list, $word;
+ if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
}
- if ( $in_chain[$level] ) {
+ else {
+ push @keyword_list, $word;
+ }
+ }
+ $keyword_group_list_pattern =
+ make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
+ $keyword_group_list_comment_pattern =
+ make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
+ }
+ return;
+}
- # we are in a chain and are at an opening block brace.
- # See if we are welding this opening brace with the previous
- # block brace. Get their identification numbers:
- my $closing_seqno = $in_chain[$level]->[1];
- my $opening_seqno = $type_sequence;
+sub make_block_brace_vertical_tightness_pattern {
- # The preceding block must be on multiple lines so that its
- # closing brace will start a new line.
- if ( !$is_broken_block->($closing_seqno) ) {
- next unless ( $CBO == 2 );
- $rbreak_container->{$closing_seqno} = 1;
- }
+ # turn any input list into a regex for recognizing selected block types
+ $block_brace_vertical_tightness_pattern =
+ '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
+ && $rOpts->{'block-brace-vertical-tightness-list'} )
+ {
+ $block_brace_vertical_tightness_pattern =
+ make_block_pattern( '-bbvtl',
+ $rOpts->{'block-brace-vertical-tightness-list'} );
+ }
+ return;
+}
- # we will let the trailing block be either broken or intact
- ## && $is_broken_block->($opening_seqno);
+sub make_blank_line_pattern {
- # We can weld the closing brace to its following word ..
- my $Ko = $K_closing_container->{$closing_seqno};
- my $Kon = $self->K_next_nonblank($Ko);
+ $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
+ my $key = 'blank-lines-before-closing-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_before_closing_block_pattern =
+ make_block_pattern( '-blbcl', $rOpts->{$key} );
+ }
- # ..unless it is a comment
- if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
- my $dlen =
- $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
- $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
- $weld_len_right_closing{$closing_seqno} = $dlen;
+ $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
+ $key = 'blank-lines-after-opening-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_after_opening_block_pattern =
+ make_block_pattern( '-blaol', $rOpts->{$key} );
+ }
+ return;
+}
- # Set flag that we want to break the next container
- # so that the cuddled line is balanced.
- $rbreak_container->{$opening_seqno} = 1
- if ($CBO);
- }
+sub make_block_pattern {
- }
- else {
+ # given a string of block-type keywords, return a regex to match them
+ # The only tricky part is that labels are indicated with a single ':'
+ # and the 'sub' token text may have additional text after it (name of
+ # sub).
+ #
+ # Example:
+ #
+ # input string: "if else elsif unless while for foreach do : sub";
+ # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- # We are not in a chain. Start a new chain if we see the
- # starting block type.
- if ( $rcuddled_block_types->{$block_type} ) {
- $in_chain[$level] = [ $block_type, $type_sequence ];
- }
- else {
- $block_type = '*';
- $in_chain[$level] = [ $block_type, $type_sequence ];
- }
- }
+ # Minor Update:
+ #
+ # To distinguish between anonymous subs and named subs, use 'sub' to
+ # indicate a named sub, and 'asub' to indicate an anonymous sub
+
+ my ( $abbrev, $string ) = @_;
+ my @list = split_words($string);
+ my @words = ();
+ my %seen;
+ for my $i (@list) {
+ if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
+ next if $seen{$i};
+ $seen{$i} = 1;
+ if ( $i eq 'sub' ) {
}
- elsif ( $token eq '}' ) {
- if ( $in_chain[$level] ) {
+ elsif ( $i eq 'asub' ) {
+ }
+ elsif ( $i eq ';' ) {
+ push @words, ';';
+ }
+ elsif ( $i eq '{' ) {
+ push @words, '\{';
+ }
+ elsif ( $i eq ':' ) {
+ push @words, '\w+:';
+ }
+ elsif ( $i =~ /^\w/ ) {
+ push @words, $i;
+ }
+ else {
+ Warn("unrecognized block type $i after $abbrev, ignoring\n");
+ }
+ }
+ my $pattern = '(' . join( '|', @words ) . ')$';
+ my $sub_patterns = "";
+ if ( $seen{'sub'} ) {
+ $sub_patterns .= '|' . $SUB_PATTERN;
+ }
+ if ( $seen{'asub'} ) {
+ $sub_patterns .= '|' . $ASUB_PATTERN;
+ }
+ if ($sub_patterns) {
+ $pattern = '(' . $pattern . $sub_patterns . ')';
+ }
+ $pattern = '^' . $pattern;
+ return $pattern;
+}
- # We are in a chain at a closing brace. See if this chain
- # continues..
- my $Knn = $self->K_next_code($KK);
- next unless $Knn;
+sub make_static_side_comment_pattern {
- my $chain_type = $in_chain[$level]->[0];
- my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
- if (
- $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
- )
- {
+ # create the pattern used to identify static side comments
+ $static_side_comment_pattern = '^##';
- # Note that we do not weld yet because we must wait until
- # we we are sure that an opening brace for this follows.
- $in_chain[$level]->[1] = $type_sequence;
- }
- else { $in_chain[$level] = undef }
- }
+ # allow the user to change it
+ if ( $rOpts->{'static-side-comment-prefix'} ) {
+ my $prefix = $rOpts->{'static-side-comment-prefix'};
+ $prefix =~ s/^\s*//;
+ my $pattern = '^' . $prefix;
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
}
+ $static_side_comment_pattern = $pattern;
}
-
return;
}
-sub weld_nested_containers {
- my $self = shift;
+sub make_closing_side_comment_prefix {
- # This routine implements the -wn flag by "welding together"
- # the nested closing and opening tokens which were previously
- # identified by sub 'find_nested_pairs'. "welding" simply
- # involves setting certain hash values which will be checked
- # later during formatting.
+ # Be sure we have a valid closing side comment prefix
+ my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
+ my $csc_prefix_pattern;
+ if ( !defined($csc_prefix) ) {
+ $csc_prefix = '## end';
+ $csc_prefix_pattern = '^##\s+end';
+ }
+ else {
+ my $test_csc_prefix = $csc_prefix;
+ if ( $test_csc_prefix !~ /^#/ ) {
+ $test_csc_prefix = '#' . $test_csc_prefix;
+ }
- my $rLL = $self->{rLL};
- my $Klimit = $self->get_rLL_max_index();
- my $rnested_pairs = $self->{rnested_pairs};
- my $rlines = $self->{rlines};
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
+ # make a regex to recognize the prefix
+ my $test_csc_prefix_pattern = $test_csc_prefix;
- # Return unless there are nested pairs to weld
- return unless defined($rnested_pairs) && @{$rnested_pairs};
+ # escape any special characters
+ $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
- # This array will hold the sequence numbers of the tokens to be welded.
- my @welds;
-
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
+ $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
- # A tolerance to the length for length estimates. In some rare cases
- # this can avoid problems where a final weld slightly exceeds the
- # line length and gets broken in a bad spot.
- my $length_tol = 1;
+ # allow exact number of intermediate spaces to vary
+ $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
- my $excess_length_to_K = sub {
- my ($K) = @_;
+ # make sure we have a good pattern
+ # if we fail this we probably have an error in escaping
+ # characters.
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
+ if ( bad_pattern($test_csc_prefix_pattern) ) {
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # shouldn't happen..must have screwed up escaping, above
+ report_definite_bug();
+ Warn(
+"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
+ );
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # just warn and keep going with defaults
+ Warn("Please consider using a simpler -cscp prefix\n");
+ Warn("Using default -cscp instead; please check output\n");
+ }
+ else {
+ $csc_prefix = $test_csc_prefix;
+ $csc_prefix_pattern = $test_csc_prefix_pattern;
+ }
+ }
+ $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
+ $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
+ return;
+}
- # Abbreviations:
- # _oo=outer opening, i.e. first of { {
- # _io=inner opening, i.e. second of { {
- # _oc=outer closing, i.e. second of } {
- # _ic=inner closing, i.e. first of } }
+##################################################
+# CODE SECTION 4: receive lines from the tokenizer
+##################################################
- my $previous_pair;
+{ ## begin closure write_line
- # We are working from outermost to innermost pairs so that
- # level changes will be complete when we arrive at the inner pairs.
+ my $Last_line_had_side_comment;
+ my $In_format_skipping_section;
+ my $Saw_VERSION_in_this_file;
- while ( my $item = pop( @{$rnested_pairs} ) ) {
- my ( $inner_seqno, $outer_seqno ) = @{$item};
+ sub initialize_write_line {
- my $Kouter_opening = $K_opening_container->{$outer_seqno};
- my $Kinner_opening = $K_opening_container->{$inner_seqno};
- my $Kouter_closing = $K_closing_container->{$outer_seqno};
- my $Kinner_closing = $K_closing_container->{$inner_seqno};
+ $Last_line_had_side_comment = 0;
+ $In_format_skipping_section = 0;
+ $Saw_VERSION_in_this_file = 0;
- my $outer_opening = $rLL->[$Kouter_opening];
- my $inner_opening = $rLL->[$Kinner_opening];
- my $outer_closing = $rLL->[$Kouter_closing];
- my $inner_closing = $rLL->[$Kinner_closing];
+ return;
+ }
- my $iline_oo = $outer_opening->[_LINE_INDEX_];
- my $iline_io = $inner_opening->[_LINE_INDEX_];
+ sub write_line {
+
+ # This routine originally received lines of code and immediately processed
+ # them. That was efficient when memory was limited, but now it just saves
+ # the lines it receives. They get processed all together after the last
+ # line is received.
+
+ # As tokenized lines are received they are converted to the format needed
+ # for the final formatting.
+ my ( $self, $line_of_tokens_old ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines_new = $self->[_rlines_];
+ my $maximum_level = $self->[_maximum_level_];
+
+ my $Kfirst;
+ my $line_of_tokens = {};
+ foreach my $key (
+ qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _square_bracket_depth
+ _starting_in_quote
+ )
+ )
+ {
+ $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
+ }
- # Set flag saying if this pair starts a new weld
- my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
+ # Data needed by Logger
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = "";
+ $line_of_tokens->{_nesting_tokens_0} = "";
- # Set flag saying if this pair is adjacent to the previous nesting pair
- # (even if previous pair was rejected as a weld)
- my $touch_previous_pair =
- defined($previous_pair) && $outer_seqno == $previous_pair->[0];
- $previous_pair = $item;
+ # Needed to avoid trimming quotes
+ $line_of_tokens->{_ended_in_blank_token} = undef;
- # Set a flag if we should not weld. It sometimes looks best not to weld
- # when the opening and closing tokens are very close. However, there
- # is a danger that we will create a "blinker", which oscillates between
- # two semi-stable states, if we do not weld. So the rules for
- # not welding have to be carefully defined and tested.
- my $do_not_weld;
- if ( !$touch_previous_pair ) {
+ my $line_type = $line_of_tokens_old->{_line_type};
+ my $input_line_no = $line_of_tokens_old->{_line_number};
+ my $CODE_type = "";
+ my $tee_output;
- # If this pair is not adjacent to the previous pair (skipped or
- # not), then measure lengths from the start of line of oo
+ # Handle line of non-code
+ if ( $line_type ne 'CODE' ) {
+ $tee_output ||= $rOpts_tee_pod
+ && substr( $line_type, 0, 3 ) eq 'POD';
+ }
- my $rK_range = $rlines->[$iline_oo]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- $starting_lentot =
- $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ # Handle line of code
+ else {
- # DO-NOT-WELD RULE 1:
- # Do not weld something that looks like the start of a two-line
- # function call, like this: <<snippets/wn6.in>>
- # $trans->add_transformation(
- # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
- # We will look for a semicolon after the closing paren.
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
+ my $rblock_type = $line_of_tokens_old->{_rblock_type};
+ my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
+ my $rcontainer_environment =
+ $line_of_tokens_old->{_rcontainer_environment};
+ my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
+ my $rlevels = $line_of_tokens_old->{_rlevels};
+ my $rslevels = $line_of_tokens_old->{_rslevels};
+ my $rci_levels = $line_of_tokens_old->{_rci_levels};
+ my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
+ my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
+
+ my $jmax = @{$rtokens} - 1;
+ if ( $jmax >= 0 ) {
+ $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
+ foreach my $j ( 0 .. $jmax ) {
+
+ # Clip negative nesting depths to zero to avoid problems.
+ # Negative values can occur in files with unbalanced containers
+ my $slevel = $rslevels->[$j];
+ if ( $slevel < 0 ) { $slevel = 0 }
+
+ if ( $rlevels->[$j] > $maximum_level ) {
+ $maximum_level = $rlevels->[$j];
+ }
- # We want to weld something complex, like this though
- # my $compass = uc( opposite_direction( line_to_canvas_direction(
- # @{ $coords[0] }, @{ $coords[1] } ) ) );
- # Otherwise we will get a 'blinker'
+ # But do not clip the 'level' variable yet. We will do this
+ # later, in sub 'store_token_to_go'. The reason is that in
+ # files with level errors, the logic in 'weld_cuddled_else'
+ # uses a stack logic that will give bad welds if we clip
+ # levels here.
+ ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
+
+ my @tokary;
+ @tokary[
+ _TOKEN_, _TYPE_, _BLOCK_TYPE_,
+ _TYPE_SEQUENCE_, _LEVEL_, _SLEVEL_,
+ _CI_LEVEL_, _LINE_INDEX_,
+ ]
+ = (
+ $rtokens->[$j], $rtoken_type->[$j],
+ $rblock_type->[$j], $rtype_sequence->[$j],
+ $rlevels->[$j], $slevel,
+ $rci_levels->[$j], $input_line_no - 1,
+ );
+ push @{$rLL}, \@tokary;
+ } ## end foreach my $j ( 0 .. $jmax )
+
+ $Klimit = @{$rLL} - 1;
+
+ # Need to remember if we can trim the input line
+ $line_of_tokens->{_ended_in_blank_token} =
+ $rtoken_type->[$jmax] eq 'b';
+
+ $line_of_tokens->{_level_0} = $rlevels->[0];
+ $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
+ $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
+ $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
+ } ## end if ( $jmax >= 0 )
+
+ $CODE_type =
+ $self->get_CODE_type( $line_of_tokens, $Kfirst, $Klimit,
+ $input_line_no );
+
+ $tee_output ||=
+ $rOpts_tee_block_comments
+ && $jmax == 0
+ && $rLL->[$Kfirst]->[_TYPE_] eq '#';
+
+ $tee_output ||=
+ $rOpts_tee_side_comments
+ && defined($Kfirst)
+ && $Klimit > $Kfirst
+ && $rLL->[$Klimit]->[_TYPE_] eq '#';
+
+ # Handle any requested side comment deletions. It is easier to get
+ # this done here rather than farther down the pipeline because IO
+ # lines take a different route, and because lines with deleted HSC
+ # become BL lines. An since we are deleting now, we have to also
+ # handle any tee- requests before the side comments vanish.
+ my $delete_side_comment =
+ $rOpts_delete_side_comments
+ && defined($Kfirst)
+ && $rLL->[$Klimit]->[_TYPE_] eq '#'
+ && ( $Klimit > $Kfirst || $CODE_type eq 'HSC' )
+ && (!$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' );
- my $iline_oc = $outer_closing->[_LINE_INDEX_];
- if ( $iline_oc <= $iline_oo + 1 ) {
+ if (
+ $rOpts_delete_closing_side_comments
+ && !$delete_side_comment
+ && defined($Kfirst)
+ && $Klimit > $Kfirst
+ && $rLL->[$Klimit]->[_TYPE_] eq '#'
+ && ( !$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' )
+ )
+ {
+ my $token = $rLL->[$Klimit]->[_TOKEN_];
+ my $K_m = $Klimit - 1;
+ my $type_m = $rLL->[$K_m]->[_TYPE_];
+ if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
+ my $last_nonblank_block_type = $rLL->[$K_m]->[_BLOCK_TYPE_];
+ if ( $token =~ /$closing_side_comment_prefix_pattern/
+ && $last_nonblank_block_type =~
+ /$closing_side_comment_list_pattern/ )
+ {
+ $delete_side_comment = 1;
+ }
+ } ## end if ( $rOpts_delete_closing_side_comments...)
- # Look for following semicolon...
- my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
- my $next_nonblank_type =
- defined($Knext_nonblank)
- ? $rLL->[$Knext_nonblank]->[_TYPE_]
- : 'b';
- if ( $next_nonblank_type eq ';' ) {
+ if ($delete_side_comment) {
+ pop @{$rLL};
+ $Klimit -= 1;
+ if ( $Klimit > $Kfirst
+ && $rLL->[$Klimit]->[_TYPE_] eq 'b' )
+ {
+ pop @{$rLL};
+ $Klimit -= 1;
+ }
- # Then do not weld if no other containers between inner
- # opening and closing.
- my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
- if ( $Knext_seq_item == $Kinner_closing ) {
- $do_not_weld ||= 1;
+ # The -io option outputs the line text, so we have to update
+ # the line text so that the comment does not reappear.
+ if ( $CODE_type eq 'IO' ) {
+ my $line = "";
+ foreach my $KK ( $Kfirst .. $Klimit ) {
+ $line .= $rLL->[$KK]->[_TOKEN_];
}
+ $line_of_tokens->{_line_text} = $line . "\n";
}
- }
- }
-
- my $iline_ic = $inner_closing->[_LINE_INDEX_];
-
- # DO-NOT-WELD RULE 2:
- # Do not weld an opening paren to an inner one line brace block
- # We will just use old line numbers for this test and require
- # iterations if necessary for convergence
-
- # For example, otherwise we could cause the opening paren
- # in the following example to separate from the caller name
- # as here:
-
- # $_[0]->code_handler
- # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
-
- # Here is another example where we do not want to weld:
- # $wrapped->add_around_modifier(
- # sub { push @tracelog => 'around 1'; $_[0]->(); } );
- # If the one line sub block gets broken due to length or by the
- # user, then we can weld. The result will then be:
- # $wrapped->add_around_modifier( sub {
- # push @tracelog => 'around 1';
- # $_[0]->();
- # } );
+ # If we delete a hanging side comment the line becomes blank.
+ if ( $CODE_type eq 'HSC' ) { $CODE_type = 'BL' }
+ }
- if ( $iline_ic == $iline_io ) {
+ } ## end if ( $line_type eq 'CODE')
- my $token_oo = $outer_opening->[_TOKEN_];
- my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
- my $token_io = $inner_opening->[_TOKEN_];
- $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
+ # Finish storing line variables
+ if ($tee_output) {
+ my $fh_tee = $self->[_fh_tee_];
+ my $line_text = $line_of_tokens_old->{_line_text};
+ $fh_tee->print($line_text) if ($fh_tee);
}
- # DO-NOT-WELD RULE 3:
- # Do not weld if this makes our line too long
- $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
-
- # DO-NOT-WELD RULE 4; implemented for git#10:
- # Do not weld an opening -ce brace if the next container is on a single
- # line, different from the opening brace. (This is very rare). For
- # example, given the following with -ce, we will avoid joining the {
- # and [
-
- # } else {
- # [ $_, length($_) ]
- # }
-
- # because this would produce a terminal one-line block:
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
+ $line_of_tokens->{_code_type} = $CODE_type;
+ $self->[_Klimit_] = $Klimit;
+ $self->[_maximum_level_] = $maximum_level;
- # } else { [ $_, length($_) ] }
+ push @{$rlines_new}, $line_of_tokens;
+ return;
+ }
- # which may not be what is desired. But given this input:
+ sub get_CODE_type {
+ my ( $self, $line_of_tokens, $Kfirst, $Klast, $input_line_no ) = @_;
- # } else { [ $_, length($_) ] }
+ # We are looking at a line of code and setting a flag to
+ # describe any special processing that it requires
- # then we will do the weld and retain the one-line block
- if ( $rOpts->{'cuddled-else'} ) {
- my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
- if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
- my $io_line = $inner_opening->[_LINE_INDEX_];
- my $ic_line = $inner_closing->[_LINE_INDEX_];
- my $oo_line = $outer_opening->[_LINE_INDEX_];
- $do_not_weld ||=
- ( $oo_line < $io_line && $ic_line == $io_line );
- }
- }
+ # Possible CODE_types
+ # 'VB' = Verbatim - line goes out verbatim (a quote)
+ # 'FS' = Format Skipping - line goes out verbatim
+ # 'BL' = Blank Line
+ # 'HSC' = Hanging Side Comment - fix this hanging side comment
+ # 'SBCX'= Static Block Comment Without Leading Space
+ # 'SBC' = Static Block Comment
+ # 'BC' = Block Comment - an ordinary full line comment
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'VER' = VERSION statement
+ # '' = ordinary line of code with no restructions
- if ($do_not_weld) {
+ my $rLL = $self->[_rLL_];
- # After neglecting a pair, we start measuring from start of point io
- $starting_lentot =
- $self->cumulative_length_before_K($Kinner_opening);
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $inner_opening->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ my $CODE_type = "";
+ my $input_line = $line_of_tokens->{_line_text};
+ my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
- # Normally, a broken pair should not decrease indentation of
- # intermediate tokens:
- ## if ( $last_pair_broken ) { next }
- # However, for long strings of welded tokens, such as '{{{{{{...'
- # we will allow broken pairs to also remove indentation.
- # This will keep very long strings of opening and closing
- # braces from marching off to the right. We will do this if the
- # number of tokens in a weld before the broken weld is 4 or more.
- # This rule will mainly be needed for test scripts, since typical
- # welds have fewer than about 4 welded tokens.
- if ( !@welds || @{ $welds[-1] } < 4 ) { next }
- }
+ my $is_block_comment = 0;
+ my $has_side_comment = 0;
- # otherwise start new weld ...
- elsif ($starting_new_weld) {
- push @welds, $item;
+ if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
+ if ( $jmax == 0 ) { $is_block_comment = 1; }
+ else { $has_side_comment = 1 }
}
- # ... or extend current weld
- else {
- unshift @{ $welds[-1] }, $inner_seqno;
- }
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
- # After welding, reduce the indentation level if all intermediate tokens
- my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
- if ( $dlevel != 0 ) {
- my $Kstart = $Kinner_opening;
- my $Kstop = $Kinner_closing;
- for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
- $rLL->[$KK]->[_LEVEL_] += $dlevel;
+ # Note: extra space appended to comment simplifies pattern matching
+ if ( $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_end/ )
+ {
+ $In_format_skipping_section = 0;
+ write_logfile_entry(
+ "Line $input_line_no: Exiting format-skipping section\n");
}
+ $CODE_type = 'FS';
+ goto RETURN;
}
- }
- # Define weld lengths needed later to set line breaks
- foreach my $item (@welds) {
-
- # sweep from inner to outer
-
- my $inner_seqno;
- my $len_close = 0;
- my $len_open = 0;
- foreach my $outer_seqno ( @{$item} ) {
- if ($inner_seqno) {
-
- my $dlen_opening =
- $length_to_opening_seqno->($inner_seqno) -
- $length_to_opening_seqno->($outer_seqno);
-
- my $dlen_closing =
- $length_to_closing_seqno->($outer_seqno) -
- $length_to_closing_seqno->($inner_seqno);
-
- $len_open += $dlen_opening;
- $len_close += $dlen_closing;
+ # Check for a continued quote..
+ if ( $line_of_tokens->{_starting_in_quote} ) {
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( ( $input_line =~ "\t" ) ) {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->note_embedded_tab($input_line_number);
+ }
+ $CODE_type = 'VB';
+ goto RETURN;
}
+ }
- $weld_len_left_closing{$outer_seqno} = $len_close;
- $weld_len_right_opening{$outer_seqno} = $len_open;
-
- $inner_seqno = $outer_seqno;
+ # See if we are entering a formatting skip section
+ if ( $rOpts_format_skipping
+ && $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_begin/ )
+ {
+ $In_format_skipping_section = 1;
+ write_logfile_entry(
+ "Line $input_line_no: Entering format-skipping section\n");
+ $CODE_type = 'FS';
+ goto RETURN;
}
- # sweep from outer to inner
- foreach my $seqno ( reverse @{$item} ) {
- $weld_len_right_closing{$seqno} =
- $len_close - $weld_len_left_closing{$seqno};
- $weld_len_left_opening{$seqno} =
- $len_open - $weld_len_right_opening{$seqno};
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
}
- }
- #####################################
- # DEBUG
- #####################################
- if (0) {
- my $count = 0;
- local $" = ')(';
- foreach my $weld (@welds) {
- print "\nWeld number $count has seq: (@{$weld})\n";
- foreach my $seq ( @{$weld} ) {
- print <<EOM;
- seq=$seq
- left_opening=$weld_len_left_opening{$seq};
- right_opening=$weld_len_right_opening{$seq};
- left_closing=$weld_len_left_closing{$seq};
- right_closing=$weld_len_right_closing{$seq};
-EOM
- }
+ # blank line..
+ if ( $jmax < 0 ) {
+ $CODE_type = 'BL';
+ goto RETURN;
+ }
- $count++;
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment = 0;
+ my $is_static_block_comment_without_leading_space = 0;
+ if ( $is_block_comment
+ && $rOpts->{'static-block-comments'}
+ && $input_line =~ /$static_block_comment_pattern/ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space =
+ substr( $input_line, 0, 1 ) eq '#';
}
- }
- return;
-}
-sub weld_nested_quotes {
- my $self = shift;
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $is_block_comment
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space = 1;
+ }
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ # look for hanging side comment
+ if (
+ $is_block_comment
+ && $Last_line_had_side_comment # last line had side comment
+ && $input_line =~ /^\s/ # there is some leading space
+ && !$is_static_block_comment # do not make static comment hanging
+ && $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
+ )
+ {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ goto RETURN;
+ }
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
- my $rlines = $self->{rlines};
+ # Handle a block (full-line) comment..
+ if ($is_block_comment) {
- my $is_single_quote = sub {
- my ( $Kbeg, $Kend, $quote_type ) = @_;
- foreach my $K ( $Kbeg .. $Kend ) {
- my $test_type = $rLL->[$K]->[_TYPE_];
- next if ( $test_type eq 'b' );
- return if ( $test_type ne $quote_type );
+ if ($is_static_block_comment_without_leading_space) {
+ $CODE_type = 'SBCX';
+ goto RETURN;
+ }
+ elsif ($is_static_block_comment) {
+ $CODE_type = 'SBC';
+ goto RETURN;
+ }
+ elsif ($Last_line_had_side_comment
+ && !$rOpts_maximum_consecutive_blank_lines
+ && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
+ {
+ # Emergency fix to keep a block comment from becoming a hanging
+ # side comment. This fix is for the case that blank lines
+ # cannot be inserted. There is related code in sub
+ # 'process_line_of_CODE'
+ $CODE_type = 'SBCX';
+ goto RETURN;
+ }
+ else {
+ $CODE_type = 'BC';
+ goto RETURN;
+ }
}
- return 1;
- };
- my $excess_line_length = sub {
- my ( $KK, $Ktest ) = @_;
+ # End of comments. Handle a line of normal code:
- # what is the excess length if we add token $Ktest to the line with $KK?
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
- my $rK_range = $rlines->[$iline]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $starting_lentot =
- $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- my $starting_indent = 0;
- my $length_tol = 1;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
-
- my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return $excess_length;
- };
+ if ($rOpts_indent_only) {
+ $CODE_type = 'IO';
+ goto RETURN;
+ }
- # look for single qw quotes nested in containers
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$outer_seqno ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $outer_seqno not defined at K=$KK");
+ if ( !$rOpts_add_newlines ) {
+ $CODE_type = 'NIN';
+ goto RETURN;
}
- my $token = $rtoken_vars->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ # Patch needed for MakeMaker. Do not break a statement
+ # in which $VERSION may be calculated. See MakeMaker.pm;
+ # this is based on the coding in it.
+ # The first line of a file that matches this will be eval'd:
+ # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
+ # Examples:
+ # *VERSION = \'1.01';
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # We will pass such a line straight through without breaking
+ # it unless -npvl is used.
- # see if the next token is a quote of some type
- my $Kn = $self->K_next_nonblank($KK);
- next unless $Kn;
- my $next_token = $rLL->[$Kn]->[_TOKEN_];
- my $next_type = $rLL->[$Kn]->[_TYPE_];
- next
- unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
- && $next_token =~ /^q/ );
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
- # The token before the closing container must also be a quote
- my $K_closing = $K_closing_container->{$outer_seqno};
- my $Kt_end = $self->K_previous_nonblank($K_closing);
- next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
+ my $is_VERSION_statement = 0;
+ if ( !$Saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
+ {
+ $Saw_VERSION_in_this_file = 1;
+ write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- # Do not weld to single-line quotes. Nothing is gained, and it may
- # look bad.
- next if ( $Kt_end == $Kn );
+ # This code type has lower priority than others
+ $CODE_type = 'VER';
+ goto RETURN;
+ }
- # Only weld to quotes delimited with container tokens. This is
- # because welding to arbitrary quote delimiters can produce code
- # which is less readable than without welding.
- my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
- next
- unless ( $is_closing_token{$closing_delimiter}
- || $closing_delimiter eq '>' );
+ RETURN:
+ $Last_line_had_side_comment = $has_side_comment;
+ return $CODE_type;
+ }
- # Now make sure that there is just a single quote in the container
- next
- unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
+} ## end closure write_line
- # If welded, the line must not exceed allowed line length
- # Assume old line breaks for this estimate.
- next if ( $excess_line_length->( $KK, $Kn ) > 0 );
+#############################################
+# CODE SECTION 5: Pre-process the entire file
+#############################################
- # OK to weld
- # FIXME: Are these always correct?
- $weld_len_left_closing{$outer_seqno} = 1;
- $weld_len_right_opening{$outer_seqno} = 2;
+sub finish_formatting {
- # QW PATCH 1 (Testing)
- # undo CI for welded quotes
- foreach my $K ( $Kn .. $Kt_end ) {
- $rLL->[$K]->[_CI_LEVEL_] = 0;
- }
+ my ( $self, $severe_error ) = @_;
- # Change the level of a closing qw token to be that of the outer
- # containing token. This will allow -lp indentation to function
- # correctly in the vertical aligner.
- $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
- }
+ # The file has been tokenized and is ready to be formatted.
+ # All of the relevant data is stored in $self, ready to go.
+
+ # Check the maximum level. If it is extremely large we will
+ # give up and output the file verbatim.
+ my $maximum_level = $self->[_maximum_level_];
+ my $maximum_table_index = $#maximum_line_length_at_level;
+ if ( !$severe_error && $maximum_level > $maximum_table_index ) {
+ $severe_error ||= 1;
+ Warn(<<EOM);
+The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
+Something may be wrong; formatting will be skipped.
+EOM
+ }
+
+ # output file verbatim if severe error or no formatting requested
+ if ( $severe_error || $rOpts->{notidy} ) {
+ $self->dump_verbatim();
+ $self->wrapup();
+ return;
}
- return;
-}
-sub weld_len_left {
+ # Update the 'save_logfile' flag based to include any tokenization errors.
+ # We can save time by skipping logfile calls if it is not going to be saved.
+ my $logger_object = $self->[_logger_object_];
+ if ($logger_object) {
+ $self->[_save_logfile_] = $logger_object->get_save_logfile();
+ }
+
+ # Make a pass through all tokens, adding or deleting any whitespace as
+ # required. Also make any other changes, such as adding semicolons.
+ # All token changes must be made here so that the token data structure
+ # remains fixed for the rest of this iteration.
+ $self->respace_tokens();
- my ( $seqno, $type_or_tok ) = @_;
+ $self->find_multiline_qw();
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its left
+ $self->keep_old_line_breaks();
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $weld_len_left_closing{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $weld_len_left_opening{$seqno};
- }
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
+ # Implement any welding needed for the -wn or -cb options
+ $self->weld_containers();
-sub weld_len_right {
+ # Locate small nested blocks which should not be broken
+ $self->mark_short_nested_blocks();
- my ( $seqno, $type_or_tok ) = @_;
+ $self->adjust_indentation_levels();
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its right
+ $self->set_excluded_lp_containers();
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $weld_len_right_closing{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $weld_len_right_opening{$seqno};
- }
+ # Finishes formatting and write the result to the line sink.
+ # Eventually this call should just change the 'rlines' data according to the
+ # new line breaks and then return so that we can do an internal iteration
+ # before continuing with the next stages of formatting.
+ $self->process_all_lines();
+
+ # A final routine to tie up any loose ends
+ $self->wrapup();
+ return;
+}
+
+sub dump_verbatim {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $line ( @{$rlines} ) {
+ my $input_line = $line->{_line_text};
+ $self->write_unindented_line($input_line);
}
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
+ return;
}
-sub weld_len_left_to_go {
- my ($i) = @_;
+my %wU;
+my %wiq;
+my %is_nonlist_keyword;
+my %is_nonlist_type;
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its left
- return if ( $i < 0 );
- my $weld_len =
- weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
+BEGIN {
-sub weld_len_right_to_go {
- my ($i) = @_;
+ # added 'U' to fix cases b1125 b1126 b1127
+ my @q = qw(w U);
+ @{wU}{@q} = (1) x scalar(@q);
+
+ @q = qw(w i q Q G C Z);
+ @{wiq}{@q} = (1) x scalar(@q);
+
+ # Parens following these keywords will not be marked as lists. Note that
+ # 'for' is not included and is handled separately, by including 'f' in the
+ # hash %is_counted_type, since it may or may not be a c-style for loop.
+ @q = qw( if elsif unless and or );
+ @is_nonlist_keyword{@q} = (1) x scalar(@q);
+
+ # Parens following these types will not be marked as lists
+ @q = qw( && || );
+ @is_nonlist_type{@q} = (1) x scalar(@q);
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its right
- return if ( $i < 0 );
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- my $weld_len =
- weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
}
-sub link_sequence_items {
+sub respace_tokens {
- # This has been merged into 'respace_tokens' but retained for reference
- my $self = shift;
- my $rlines = $self->{rlines};
- my $rLL = $self->{rLL};
-
- # We walk the token list and make links to the next sequence item.
- # We also define these hashes to container tokens using sequence number as
- # the key:
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
-
- # sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
-
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL->[$Kprev]->[$key] ) )
- {
- $rLL->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
+
+ # This routine is called once per file to do as much formatting as possible
+ # before new line breaks are set.
+
+ # This routine makes all necessary and possible changes to the tokenization
+ # after the initial tokenization of the file. This is a tedious routine,
+ # but basically it consists of inserting and deleting whitespace between
+ # nonblank tokens according to the selected parameters. In a few cases
+ # non-space characters are added, deleted or modified.
+
+ # The goal of this routine is to create a new token array which only needs
+ # the definition of new line breaks and padding to complete formatting. In
+ # a few cases we have to cheat a little to achieve this goal. In
+ # particular, we may not know if a semicolon will be needed, because it
+ # depends on how the line breaks go. To handle this, we include the
+ # semicolon as a 'phantom' which can be displayed as normal or as an empty
+ # string.
+
+ # Method: The old tokens are copied one-by-one, with changes, from the old
+ # linear storage array $rLL to a new array $rLL_new.
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit_old = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $length_function = $self->[_length_function_];
+ my $is_encoded_data = $self->[_is_encoded_data_];
+
+ my $rLL_new = []; # This is the new array
+ my $rtoken_vars;
+ my $Ktoken_vars; # the old K value of $rtoken_vars
+ my ( $Kfirst_old, $Klast_old ); # Range of old line
+ my $Klast_old_code; # K of last token if side comment
+ my $Kmax = @{$rLL} - 1;
+
+ my $CODE_type = "";
+ my $line_type = "";
+
+ # Set the whitespace flags, which indicate the token spacing preference.
+ my $rwhitespace_flags = $self->set_whitespace_flags();
+
+ # we will be setting token lengths as we go
+ my $cumulative_length = 0;
+
+ my %seqno_stack;
+ my %K_old_opening_by_seqno = (); # Note: old K index
+ my $depth_next = 0;
+ my $depth_next_max = 0;
+
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $K_closing_ternary = $self->[_K_closing_ternary_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_opening_ternary = $self->[_K_opening_ternary_];
+ my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
+ my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
+ my $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
+ my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
+ my $rhas_code_block = $self->[_rhas_code_block_];
+ my $rhas_list = $self->[_rhas_list_];
+ my $rhas_ternary = $self->[_rhas_ternary_];
+ my $ris_assigned_structure = $self->[_ris_assigned_structure_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
+ my $roverride_cab3 = $self->[_roverride_cab3_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+
+ my $last_nonblank_type = ';';
+ my $last_nonblank_token = ';';
+ my $last_nonblank_block_type = '';
+ my $nonblank_token_count = 0;
+ my $last_nonblank_token_lx = 0;
+
+ my %K_first_here_doc_by_seqno;
+
+ my $set_permanently_broken = sub {
+ my ($seqno) = @_;
+ while ( defined($seqno) ) {
+ $ris_permanently_broken->{$seqno} = 1;
+ $seqno = $rparent_of_seqno->{$seqno};
}
+ return;
};
+ my $store_token = sub {
+ my ($item) = @_;
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
-
- $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
+ # This will be the index of this item in the new array
+ my $KK_new = @{$rLL_new};
- my $type = $rLL->[$KK]->[_TYPE_];
+ my $type = $item->[_TYPE_];
+ my $is_blank = $type eq 'b';
- next if ( $type eq 'b' );
+ # Do not output consecutive blanks. This should not happen, but
+ # is worth checking because later routines make this assumption.
+ if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
+ return;
+ }
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ # check for a sequenced item (i.e., container or ?/:)
+ my $type_sequence = $item->[_TYPE_SEQUENCE_];
if ($type_sequence) {
- $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
-
- my $token = $rLL->[$KK]->[_TOKEN_];
+ my $token = $item->[_TOKEN_];
if ( $is_opening_token{$token} ) {
- $K_opening_container->{$type_sequence} = $KK;
+ $K_opening_container->{$type_sequence} = $KK_new;
+
+ # Fix for case b1100: Count a line ending in ', [' as having
+ # a line-ending comma. Otherwise, these commas can be hidden
+ # with something like --opening-square-bracket-right
+ if ( $last_nonblank_type eq ','
+ && $Ktoken_vars == $Klast_old_code
+ && $Ktoken_vars > $Kfirst_old )
+ {
+ $rlec_count_by_seqno->{$type_sequence}++;
+ }
+
+ if ( $last_nonblank_type eq '='
+ || $last_nonblank_type eq '=>' )
+ {
+ $ris_assigned_structure->{$type_sequence} =
+ $last_nonblank_type;
+ }
+
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
+ $seqno_stack{$depth_next} = $type_sequence;
+ $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
+ $depth_next++;
+
+ if ( $depth_next > $depth_next_max ) {
+ $depth_next_max = $depth_next;
+ }
}
elsif ( $is_closing_token{$token} ) {
- $K_closing_container->{$type_sequence} = $KK;
- }
+ $K_closing_container->{$type_sequence} = $KK_new;
+
+ # Do not include terminal commas in counts
+ if ( $last_nonblank_type eq ','
+ || $last_nonblank_type eq '=>' )
+ {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ($seqno) {
+ $rtype_count_by_seqno->{$seqno}->{$last_nonblank_type}
+ --;
+
+ if ( $Ktoken_vars == $Kfirst_old
+ && $last_nonblank_type eq ','
+ && $rlec_count_by_seqno->{$seqno} )
+ {
+ $rlec_count_by_seqno->{$seqno}--;
+ }
+ }
+ }
- # These are not yet used but could be useful
+ # Update the stack...
+ $depth_next--;
+ }
else {
+
+ # For ternary, note parent but do not include as child
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
+
+ # These are not yet used but could be useful
if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK;
+ $K_opening_ternary->{$type_sequence} = $KK_new;
}
elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK;
+ $K_closing_ternary->{$type_sequence} = $KK_new;
}
else {
- Fault(<<EOM);
-Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
-EOM
+
+ # We really shouldn't arrive here, just being cautious:
+ # The only sequenced types output by the tokenizer are the
+ # opening & closing containers and the ternary types. Each
+ # of those was checked above. So we would only get here
+ # if the tokenizer has been changed to mark some other
+ # tokens with sequence numbers.
+ my $type = $item->[_TYPE_];
+ Fault(
+"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
+ );
}
}
}
- }
- $self->{K_opening_container} = $K_opening_container;
- $self->{K_closing_container} = $K_closing_container;
- $self->{K_opening_ternary} = $K_opening_ternary;
- $self->{K_closing_ternary} = $K_closing_ternary;
- return;
-}
+ # Find the length of this token. Later it may be adjusted if phantom
+ # or ignoring side comment lengths.
+ my $token_length =
+ $is_encoded_data
+ ? $length_function->( $item->[_TOKEN_] )
+ : length( $item->[_TOKEN_] );
-sub sum_token_lengths {
- my $self = shift;
+ # handle comments
+ my $is_comment = $type eq '#';
+ if ($is_comment) {
- # This has been merged into 'respace_tokens' but retained for reference
- my $rLL = $self->{rLL};
- my $cumulative_length = 0;
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+ # trim comments if necessary
+ if ( $item->[_TOKEN_] =~ s/\s+$// ) {
+ $token_length = $length_function->( $item->[_TOKEN_] );
+ }
- # now set the length of this token
- my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
+ # Mark length of side comments as just 1 if sc lengths are ignored
+ if ( $rOpts_ignore_side_comment_lengths
+ && ( !$CODE_type || $CODE_type eq 'HSC' ) )
+ {
+ $token_length = 1;
+ }
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno)
+ && !$ris_permanently_broken->{$seqno} )
+ {
+ $set_permanently_broken->($seqno);
+ }
- $cumulative_length += $token_length;
+ }
- # Save the length sum to just AFTER this token
- $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ $item->[_TOKEN_LENGTH_] = $token_length;
- }
- return;
-}
+ # and update the cumulative length
+ $cumulative_length += $token_length;
-sub resync_lines_and_tokens {
+ # Save the length sum to just AFTER this token
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
- my $self = shift;
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- my $rlines = $self->{rlines};
+ if ( !$is_blank && !$is_comment ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $item->[_TOKEN_];
+ $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
+ $last_nonblank_token_lx = $item->[_LINE_INDEX_];
+ $nonblank_token_count++;
+
+ # count selected types
+ if ( $is_counted_type{$type} ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno) ) {
+ $rtype_count_by_seqno->{$seqno}->{$type}++;
+
+ # Count line-ending commas for -bbx
+ if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
+ $rlec_count_by_seqno->{$seqno}++;
+ }
- # Re-construct the arrays of tokens associated with the original input lines
- # since they have probably changed due to inserting and deleting blanks
- # and a few other tokens.
+ # Remember index of first here doc target
+ if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
+ $K_first_here_doc_by_seqno{$seqno} = $KK_new;
+ }
+ }
+ }
+ }
- my $Kmax = -1;
+ # For reference, here is how to get the parent sequence number.
+ # This is not used because it is slower than finding it on the fly
+ # in sub parent_seqno_by_K:
- # This is the next token and its line index:
- my $Knext = 0;
- my $inext;
- if ( defined($rLL) && @{$rLL} ) {
- $Kmax = @{$rLL} - 1;
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
+ # my $seqno_parent =
+ # $type_sequence && $is_opening_token{$token}
+ # ? $seqno_stack{ $depth_next - 2 }
+ # : $seqno_stack{ $depth_next - 1 };
+ # my $KK = @{$rLL_new};
+ # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
- my $get_inext = sub {
- if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
- else {
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
- return $inext;
+ # and finally, add this item to the new array
+ push @{$rLL_new}, $item;
};
- # Remember the most recently output token index
- my $Klast_out;
-
- my $iline = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $iline++;
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type eq 'CODE' ) {
-
- my @K_array;
- my $rK_range;
- $inext = $get_inext->();
- while ( defined($inext) && $inext <= $iline ) {
- push @{K_array}, $Knext;
- $Knext += 1;
- $inext = $get_inext->();
- }
+ my $store_token_and_space = sub {
+ my ( $item, $want_space ) = @_;
- # Delete any terminal blank token
- if (@K_array) {
- if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
- pop @K_array;
- }
- }
+ # store a token with preceding space if requested and needed
- # Define the range of K indexes for the line:
- # $Kfirst = index of first token on line
- # $Klast_out = index of last token on line
- my ( $Kfirst, $Klast );
- if (@K_array) {
- $Kfirst = $K_array[0];
- $Klast = $K_array[-1];
- $Klast_out = $Klast;
- }
+ # First store the space
+ if ( $want_space
+ && @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ my $rcopy = copy_token_as_type( $item, 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
- # It is only safe to trim the actual line text if the input
- # line had a terminal blank token. Otherwise, we may be
- # in a quote.
- if ( $line_of_tokens->{_ended_in_blank_token} ) {
- $line_of_tokens->{_line_text} =~ s/\s+$//;
- }
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+ # Patch 23-Jan-2021 to fix -lp blinkers:
+ # The level and ci_level of newly created spaces should be the same
+ # as the previous token. Otherwise the coding for the -lp option,
+ # in sub set_leading_whitespace, can create a blinking state in
+ # some rare cases.
+ $rcopy->[_LEVEL_] =
+ $rLL_new->[-1]->[_LEVEL_];
+ $rcopy->[_CI_LEVEL_] =
+ $rLL_new->[-1]->[_CI_LEVEL_];
- # Deleting semicolons can create new empty code lines
- # which should be marked as blank
- if ( !defined($Kfirst) ) {
- my $code_type = $line_of_tokens->{_code_type};
- if ( !$code_type ) {
- $line_of_tokens->{_code_type} = 'BL';
- }
- }
+ $store_token->($rcopy);
}
- }
-
- # There shouldn't be any nodes beyond the last one unless we start
- # allowing 'link_after' calls
- if ( defined($inext) ) {
- Fault("unexpected tokens at end of file when reconstructing lines");
- }
+ # then the token
+ $store_token->($item);
+ };
- return;
-}
+ my $K_end_q = sub {
+ my ($KK) = @_;
+ my $K_end = $KK;
-sub dump_verbatim {
- my $self = shift;
- my $rlines = $self->{rlines};
- foreach my $line ( @{$rlines} ) {
- my $input_line = $line->{_line_text};
- $self->write_unindented_line($input_line);
- }
- return;
-}
+ my $Kn = $KK + 1;
+ if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
-sub finish_formatting {
+ while ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
+ $K_end = $Kn;
- my ( $self, $severe_error ) = @_;
+ $Kn += 1;
+ if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
+ }
- # The file has been tokenized and is ready to be formatted.
- # All of the relevant data is stored in $self, ready to go.
+ return $K_end;
+ };
- # output file verbatim if severe error or no formatting requested
- if ( $severe_error || $rOpts->{notidy} ) {
- $self->dump_verbatim();
- $self->wrapup();
- return;
- }
+ my $add_phantom_semicolon = sub {
- # Make a pass through the lines, looking at lines of CODE and identifying
- # special processing needs, such format skipping sections marked by
- # special comments
- $self->scan_comments();
+ my ($KK) = @_;
- # Find nested pairs of container tokens for any welding. This information
- # is also needed for adding semicolons, so it is split apart from the
- # welding step.
- $self->find_nested_pairs();
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
- # Make sure everything looks good
- $self->check_line_hashes();
+ # we are only adding semicolons for certain block types
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ return
+ unless ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ );
- # Future: Place to Begin future Iteration Loop
- # foreach my $it_count(1..$maxit) {
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- # Future: We must reset some things after the first iteration.
- # This includes:
- # - resetting levels if there was any welding
- # - resetting any phantom semicolons
- # - dealing with any line numbering issues so we can relate final lines
- # line numbers with input line numbers.
- #
- # If ($it_count>1) {
- # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
- # Renumber lines
- # }
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
- # Make a pass through all tokens, adding or deleting any whitespace as
- # required. Also make any other changes, such as adding semicolons.
- # All token changes must be made here so that the token data structure
- # remains fixed for the rest of this iteration.
- $self->respace_tokens();
+ # Do not add a semicolon if...
+ return
+ if (
- # Make a hierarchical map of the containers
- $self->map_containers();
+ # it would follow a comment (and be isolated)
+ $previous_nonblank_type eq '#'
- # Implement any welding needed for the -wn or -cb options
- $self->weld_containers();
+ # it follows a code block ( because they are not always wanted
+ # there and may add clutter)
+ || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
- # Locate small nested blocks which should not be broken
- $self->mark_short_nested_blocks();
+ # it would follow a label
+ || $previous_nonblank_type eq 'J'
- # Finishes formatting and write the result to the line sink.
- # Eventually this call should just change the 'rlines' data according to the
- # new line breaks and then return so that we can do an internal iteration
- # before continuing with the next stages of formatting.
- $self->break_lines();
+ # it would be inside a 'format' statement (and cause syntax error)
+ || ( $previous_nonblank_type eq 'k'
+ && $previous_nonblank_token =~ /format/ )
- ############################################################
- # A possible future decomposition of 'break_lines()' follows.
- # Benefits:
- # - allow perltidy to do an internal iteration which eliminates
- # many unnecessary steps, such as re-parsing and vertical alignment.
- # This will allow iterations to be automatic.
- # - consolidate all length calculations to allow utf8 alignment
- ############################################################
+ );
- # Future: Check for convergence of beginning tokens on CODE lines
+ # Do not add a semicolon if it would impede a weld with an immediately
+ # following closing token...like this
+ # { ( some code ) }
+ # ^--No semicolon can go here
- # Future: End of Iteration Loop
+ # look at the previous token... note use of the _NEW rLL array here,
+ # but sequence numbers are invariant.
+ my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
- # Future: add_padding($rargs);
+ # If it is also a CLOSING token we have to look closer...
+ if (
+ $seqno_inner
+ && $is_closing_token{$previous_nonblank_token}
- # Future: add_closing_side_comments($rargs);
+ # we only need to look if there is just one inner container..
+ && defined( $rchildren_of_seqno->{$type_sequence} )
+ && @{ $rchildren_of_seqno->{$type_sequence} } == 1
+ )
+ {
- # Future: vertical_alignment($rargs);
+ # Go back and see if the corresponding two OPENING tokens are also
+ # together. Note that we are using the OLD K indexing here:
+ my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
+ if ( defined($K_outer_opening) ) {
+ my $K_nxt = $self->K_next_nonblank($K_outer_opening);
+ if ( defined($K_nxt) ) {
+ my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
+
+ # Is the next token after the outer opening the same as
+ # our inner closing (i.e. same sequence number)?
+ # If so, do not insert a semicolon here.
+ return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
+ }
+ }
+ }
- # Future: output results
+ # We will insert an empty semicolon here as a placeholder. Later, if
+ # it becomes the last token on a line, we will bring it to life. The
+ # advantage of doing this is that (1) we just have to check line
+ # endings, and (2) the phantom semicolon has zero width and therefore
+ # won't cause needless breaks of one-line blocks.
+ my $Ktop = -1;
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
+ && $want_left_space{';'} == WS_NO )
+ {
- # A final routine to tie up any loose ends
- $self->wrapup();
- return;
-}
+ # convert the blank into a semicolon..
+ # be careful: we are working on the new stack top
+ # on a token which has been stored.
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
-sub create_one_line_block {
- ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
- @_;
- return;
-}
+ # Convert the existing blank to:
+ # a phantom semicolon for one_line_block option = 0 or 1
+ # a real semicolon for one_line_block option = 2
+ my $tok = '';
+ my $len_tok = 0;
+ if ( $rOpts_one_line_block_semicolons == 2 ) {
+ $tok = ';';
+ $len_tok = 1;
+ }
-sub destroy_one_line_block {
- $index_start_one_line_block = UNDEFINED_INDEX;
- $semicolons_before_block_self_destruct = 0;
- return;
-}
+ $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
+ $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
+ $rLL_new->[$Ktop]->[_TYPE_] = ';';
+ $rLL_new->[$Ktop]->[_SLEVEL_] =
+ $rLL->[$KK]->[_SLEVEL_];
-sub leading_spaces_to_go {
+ # Save list of new K indexes of phantom semicolons.
+ # This will be needed if we want to undo them for iterations in
+ # future coding.
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- # return the number of indentation spaces for a token in the output stream;
- # these were previously stored by 'set_leading_whitespace'.
+ # Then store a new blank
+ $store_token->($rcopy);
+ }
+ else {
- my $ii = shift;
- if ( $ii < 0 ) { $ii = 0 }
- return get_spaces( $leading_spaces_to_go[$ii] );
+ # insert a new token
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
+ $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
+ $store_token->($rcopy);
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ }
+ };
-}
+ my $check_Q = sub {
-sub get_spaces {
+ # Check that a quote looks okay
+ # This sub works but needs to by sync'd with the log file output
+ # before it can be used.
+ my ( $KK, $Kfirst, $line_number ) = @_;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
- # return the number of leading spaces associated with an indentation
- # variable $indentation is either a constant number of spaces or an object
- # with a get_spaces method.
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
-}
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
-sub get_recoverable_spaces {
+ my $previous_nonblank_type_2 = 'b';
+ my $previous_nonblank_token_2 = "";
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
+ $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
+ }
- # return the number of spaces (+ means shift right, - means shift left)
- # that we would like to shift a group of lines with the same indentation
- # to get them to line up with their opening parens
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
-}
+ my $next_nonblank_token = "";
+ my $Kn = $KK + 1;
+ if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
+ if ( $Kn <= $Kmax ) {
+ $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ }
-sub get_available_spaces_to_go {
+ my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
- my $ii = shift;
- my $item = $leading_spaces_to_go[$ii];
+ # make note of something like '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
+ if (
+ $token =~ /^(s|tr|y|m|\/)/
+ && $previous_nonblank_token =~ /^(=|==|!=)$/
- # return the number of available leading spaces associated with an
- # indentation variable. $indentation is either a constant number of
- # spaces or an object with a get_available_spaces method.
- return ref($item) ? $item->get_available_spaces() : 0;
-}
-
-sub new_lp_indentation_item {
-
- # this is an interface to the IndentationItem class
- my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
-
- # A negative level implies not to store the item in the item_list
- my $index = 0;
- if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
+ # preceded by simple scalar
+ && $previous_nonblank_type_2 eq 'i'
+ && $previous_nonblank_token_2 =~ /^\$/
- my $item = Perl::Tidy::IndentationItem->new(
- $spaces, $level,
- $ci_level, $available_spaces,
- $index, $gnu_sequence_number,
- $align_paren, $max_gnu_stack_index,
- $line_start_index_to_go,
- );
+ # followed by some kind of termination
+ # (but give complaint if we can not see far enough ahead)
+ && $next_nonblank_token =~ /^[; \)\}]$/
- if ( $level >= 0 ) {
- $gnu_item_list[$max_gnu_item_index] = $item;
- }
+ # scalar is not declared
+ && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
+ )
+ {
+ my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
+ complain(
+"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+ );
+ }
+ };
- return $item;
-}
+ ############################################
+ # Main loop to respace all lines of the file
+ ############################################
+ my $last_K_out;
-sub set_leading_whitespace {
+ # Testing option to break qw. Do not use; it can make a mess.
+ my $ALLOW_BREAK_MULTILINE_QW = 0;
+ my $in_multiline_qw;
+ foreach my $line_of_tokens ( @{$rlines} ) {
- # This routine defines leading whitespace
- # given: the level and continuation_level of a token,
- # define: space count of leading string which would apply if it
- # were the first token of a new line.
+ my $input_line_number = $line_of_tokens->{_line_number};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+ ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
+ $Klast_old_code = $Klast_old;
- my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
+ # Be sure an old K value is defined for sub $store_token
+ $Ktoken_vars = $Kfirst;
- # Adjust levels if necessary to recycle whitespace:
- # given $level_abs, the absolute level
- # define $level, a possibly reduced level for whitespace
- my $level = $level_abs;
- if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
- if ( $level_abs < $whitespace_last_level ) {
- pop(@whitespace_level_stack);
- }
- if ( !@whitespace_level_stack ) {
- push @whitespace_level_stack, $level_abs;
+ # Check for correct sequence of token indexes...
+ # An error here means that sub write_line() did not correctly
+ # package the tokenized lines as it received them. If we
+ # get a fault here it has not output a continuous sequence
+ # of K values. Or a line of CODE may have been mismarked as
+ # something else.
+ if ( defined($last_K_out) ) {
+ if ( $Kfirst != $last_K_out + 1 ) {
+ Fault(
+ "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
+ );
+ }
}
- elsif ( $level_abs > $whitespace_last_level ) {
- $level = $whitespace_level_stack[-1] +
- ( $level_abs - $whitespace_last_level );
-
- if (
- # 1 Try to break at a block brace
- (
- $level > $rOpts_whitespace_cycle
- && $last_nonblank_type eq '{'
- && $last_nonblank_token eq '{'
- )
-
- # 2 Then either a brace or bracket
- || ( $level > $rOpts_whitespace_cycle + 1
- && $last_nonblank_token =~ /^[\{\[]$/ )
+ else {
- # 3 Then a paren too
- || $level > $rOpts_whitespace_cycle + 2
- )
- {
- $level = 1;
+ # The first token should always have been given index 0 by sub
+ # write_line()
+ if ( $Kfirst != 0 ) {
+ Fault("Program Bug: first K is $Kfirst but should be 0");
}
- push @whitespace_level_stack, $level;
}
- $level = $whitespace_level_stack[-1];
- }
- $whitespace_last_level = $level_abs;
-
- # modify for -bli, which adds one continuation indentation for
- # opening braces
- if ( $rOpts_brace_left_and_indent
- && $max_index_to_go == 0
- && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
- {
- $ci_level++;
- }
-
- # patch to avoid trouble when input file has negative indentation.
- # other logic should catch this error.
- if ( $level < 0 ) { $level = 0 }
+ $last_K_out = $Klast;
- #-------------------------------------------
- # handle the standard indentation scheme
- #-------------------------------------------
- unless ($rOpts_line_up_parentheses) {
- my $space_count =
- $ci_level * $rOpts_continuation_indentation +
- $level * $rOpts_indent_columns;
- my $ci_spaces =
- ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
+ # Handle special lines of code
+ if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
- if ($in_continued_quote) {
- $space_count = 0;
- $ci_spaces = 0;
- }
- $leading_spaces_to_go[$max_index_to_go] = $space_count;
- $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
- return;
- }
+ # CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'FS' = Format Skipping - line goes out verbatim, no blanks
+ # 'IO' = Indent Only - only indentation may be changed
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
- #-------------------------------------------------------------
- # handle case of -lp indentation..
- #-------------------------------------------------------------
+ # For a hanging side comment we insert an empty quote before
+ # the comment so that it becomes a normal side comment and
+ # will be aligned by the vertical aligner
+ if ( $CODE_type eq 'HSC' ) {
- # The continued_quote flag means that this is the first token of a
- # line, and it is the continuation of some kind of multi-line quote
- # or pattern. It requires special treatment because it must have no
- # added leading whitespace. So we create a special indentation item
- # which is not in the stack.
- if ($in_continued_quote) {
- my $space_count = 0;
- my $available_space = 0;
- $level = -1; # flag to prevent storing in item_list
- $leading_spaces_to_go[$max_index_to_go] =
- $reduced_spaces_to_go[$max_index_to_go] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, 0 );
- return;
- }
+ # Safety Check: This must be a line with one token (a comment)
+ my $rtoken_vars = $rLL->[$Kfirst];
+ if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
- # get the top state from the stack
- my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
- my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
- my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+ # Note that even if the flag 'noadd-whitespace' is set, we
+ # will make an exception here and allow a blank to be
+ # inserted to push the comment to the right. We can think
+ # of this as an adjustment of indentation rather than
+ # whitespace between tokens. This will also prevent the
+ # hanging side comment from getting converted to a block
+ # comment if whitespace gets deleted, as for example with
+ # the -extrude and -mangle options.
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
+ $store_token->($rcopy);
+ $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
- my $type = $types_to_go[$max_index_to_go];
- my $token = $tokens_to_go[$max_index_to_go];
- my $total_depth = $nesting_depth_to_go[$max_index_to_go];
+ # This line was mis-marked by sub scan_comment
+ Fault(
+ "Program bug. A hanging side comment has been mismarked"
+ );
+ }
+ }
- if ( $type eq '{' || $type eq '(' ) {
+ if ( $CODE_type eq 'BL' ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno)
+ && !$ris_permanently_broken->{$seqno}
+ && $rOpts_maximum_consecutive_blank_lines )
+ {
+ $set_permanently_broken->($seqno);
+ }
+ }
- $gnu_comma_count{ $total_depth + 1 } = 0;
- $gnu_arrow_count{ $total_depth + 1 } = 0;
+ # Copy tokens unchanged
+ foreach my $KK ( $Kfirst .. $Klast ) {
+ $Ktoken_vars = $KK;
+ $store_token->( $rLL->[$KK] );
+ }
+ next;
+ }
- # If we come to an opening token after an '=' token of some type,
- # see if it would be helpful to 'break' after the '=' to save space
- my $last_equals = $last_gnu_equals{$total_depth};
- if ( $last_equals && $last_equals > $line_start_index_to_go ) {
+ # Handle normal line..
- # find the position if we break at the '='
- my $i_test = $last_equals;
- if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+ # Define index of last token before any side comment for comma counts
+ my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
+ if ( ( $type_end eq '#' || $type_end eq 'b' )
+ && $Klast_old_code > $Kfirst_old )
+ {
+ $Klast_old_code--;
+ if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
+ && $Klast_old_code > $Kfirst_old )
+ {
+ $Klast_old_code--;
+ }
+ }
- # TESTING
- ##my $too_close = ($i_test==$max_index_to_go-1);
+ # Insert any essential whitespace between lines
+ # if last line was normal CODE.
+ # Patch for rt #125012: use K_previous_code rather than '_nonblank'
+ # because comments may disappear.
+ my $type_next = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
+ my $Kp = $self->K_previous_code( undef, $rLL_new );
+ if ( $last_line_type eq 'CODE'
+ && $type_next ne 'b'
+ && defined($Kp) )
+ {
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
- my $test_position = total_line_length( $i_test, $max_index_to_go );
- my $mll = maximum_line_length($i_test);
+ my ( $token_pp, $type_pp );
+ my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
if (
- # the equals is not just before an open paren (testing)
- ##!$too_close &&
-
- # if we are beyond the midpoint
- $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
-
- # or we are beyond the 1/4 point and there was an old
- # break at the equals
- || (
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length * 3 / 4
- && (
- $old_breakpoint_to_go[$last_equals]
- || ( $last_equals > 0
- && $old_breakpoint_to_go[ $last_equals - 1 ] )
- || ( $last_equals > 1
- && $types_to_go[ $last_equals - 1 ] eq 'b'
- && $old_breakpoint_to_go[ $last_equals - 2 ] )
- )
+ is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
)
)
{
- # then make the switch -- note that we do not set a real
- # breakpoint here because we may not really need one; sub
- # scan_list will do that if necessary
- $line_start_index_to_go = $i_test + 1;
- $gnu_position_predictor = $test_position;
- }
- }
- }
+ # Copy this first token as blank, but use previous line number
+ my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
- my $halfway =
- maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
+ # The level and ci_level of newly created spaces should be the
+ # same as the previous token. Otherwise blinking states can
+ # be created if the -lp mode is used. See similar coding in
+ # sub 'store_token_and_space'. Fixes cases b1109 b1110.
+ $rcopy->[_LEVEL_] =
+ $rLL_new->[-1]->[_LEVEL_];
+ $rcopy->[_CI_LEVEL_] =
+ $rLL_new->[-1]->[_CI_LEVEL_];
- # Check for decreasing depth ..
- # Note that one token may have both decreasing and then increasing
- # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
- # in this example we would first go back to (1,0) then up to (2,0)
- # in a single call.
- if ( $level < $current_level || $ci_level < $current_ci_level ) {
+ $store_token->($rcopy);
+ }
+ }
- # loop to find the first entry at or completely below this level
- my ( $lev, $ci_lev );
- while (1) {
- if ($max_gnu_stack_index) {
+ ########################################################
+ # Loop to copy all tokens on this line, with any changes
+ ########################################################
+ my $type_sequence;
+ for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
+ $Ktoken_vars = $KK;
+ $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $last_type_sequence = $type_sequence;
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- # save index of token which closes this level
- $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
+ # Handle a blank space ...
+ if ( $type eq 'b' ) {
- # Undo any extra indentation if we saw no commas
- my $available_spaces =
- $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
+ # Delete it if not wanted by whitespace rules
+ # or we are deleting all whitespace
+ # Note that whitespace flag is a flag indicating whether a
+ # white space BEFORE the token is needed
+ next if ( $KK >= $Klast ); # skip terminal blank
+ my $Knext = $KK + 1;
- my $comma_count = 0;
- my $arrow_count = 0;
- if ( $type eq '}' || $type eq ')' ) {
- $comma_count = $gnu_comma_count{$total_depth};
- $arrow_count = $gnu_arrow_count{$total_depth};
- $comma_count = 0 unless $comma_count;
- $arrow_count = 0 unless $arrow_count;
+ if ($rOpts_freeze_whitespace) {
+ $store_token->($rtoken_vars);
+ next;
}
- $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
- $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
-
- if ( $available_spaces > 0 ) {
- if ( $comma_count <= 0 || $arrow_count > 0 ) {
+ my $ws = $rwhitespace_flags->[$Knext];
+ if ( $ws == -1
+ || $rOpts_delete_old_whitespace )
+ {
- my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
- my $seqno =
- $gnu_stack[$max_gnu_stack_index]
- ->get_sequence_number();
+ my $Kp = $self->K_previous_nonblank($KK);
+ next unless defined($Kp);
+ my $token_p = $rLL->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
- # Be sure this item was created in this batch. This
- # should be true because we delete any available
- # space from open items at the end of each batch.
- if ( $gnu_sequence_number != $seqno
- || $i > $max_gnu_item_index )
- {
- warning(
-"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
- );
- report_definite_bug();
- }
+ my ( $token_pp, $type_pp );
- else {
- if ( $arrow_count == 0 ) {
- $gnu_item_list[$i]
- ->permanently_decrease_available_spaces(
- $available_spaces);
- }
- else {
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces(
- $available_spaces);
- }
- foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$j]
- ->decrease_SPACES($available_spaces);
- }
- }
+ my $Kpp = $self->K_previous_nonblank($Kp);
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL->[$Kpp]->[_TYPE_];
}
- }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
+ my $token_next = $rLL->[$Knext]->[_TOKEN_];
+ my $type_next = $rLL->[$Knext]->[_TYPE_];
- # go down one level
- --$max_gnu_stack_index;
- $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
- $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+ my $do_not_delete = is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ );
- # stop when we reach a level at or below the current level
- if ( $lev <= $level && $ci_lev <= $ci_level ) {
- $space_count =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
- $current_level = $lev;
- $current_ci_level = $ci_lev;
- last;
+ next unless ($do_not_delete);
}
- }
- # reached bottom of stack .. should never happen because
- # only negative levels can get here, and $level was forced
- # to be positive above.
- else {
- warning(
-"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
- );
- report_definite_bug();
- last;
+ # make it just one character
+ $rtoken_vars->[_TOKEN_] = ' ';
+ $store_token->($rtoken_vars);
+ next;
}
- }
- }
-
- # handle increasing depth
- if ( $level > $current_level || $ci_level > $current_ci_level ) {
-
- # Compute the standard incremental whitespace. This will be
- # the minimum incremental whitespace that will be used. This
- # choice results in a smooth transition between the gnu-style
- # and the standard style.
- my $standard_increment =
- ( $level - $current_level ) * $rOpts_indent_columns +
- ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
-
- # Now we have to define how much extra incremental space
- # ("$available_space") we want. This extra space will be
- # reduced as necessary when long lines are encountered or when
- # it becomes clear that we do not have a good list.
- my $available_space = 0;
- my $align_paren = 0;
- my $excess = 0;
- # initialization on empty stack..
- if ( $max_gnu_stack_index == 0 ) {
- $space_count = $level * $rOpts_indent_columns;
- }
+ # Handle a nonblank token...
- # if this is a BLOCK, add the standard increment
- elsif ($last_nonblank_block_type) {
- $space_count += $standard_increment;
- }
+ if ($type_sequence) {
- # if last nonblank token was not structural indentation,
- # just use standard increment
- elsif ( $last_nonblank_type ne '{' ) {
- $space_count += $standard_increment;
- }
+ if ( $is_closing_token{$token} ) {
- # otherwise use the space to the first non-blank level change token
- else {
+ # Insert a tentative missing semicolon if the next token is
+ # a closing block brace
+ if (
+ $type eq '}'
+ && $token eq '}'
- $space_count = $gnu_position_predictor;
+ # not preceded by a ';'
+ && $last_nonblank_type ne ';'
- my $min_gnu_indentation =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ # and this is not a VERSION stmt (is all one line, we
+ # are not inserting semicolons on one-line blocks)
+ && $CODE_type ne 'VER'
- $available_space = $space_count - $min_gnu_indentation;
- if ( $available_space >= $standard_increment ) {
- $min_gnu_indentation += $standard_increment;
- }
- elsif ( $available_space > 1 ) {
- $min_gnu_indentation += $available_space + 1;
- }
- elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
- if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
- $min_gnu_indentation += 2;
- }
- else {
- $min_gnu_indentation += 1;
+ # and we are allowed to add semicolons
+ && $rOpts->{'add-semicolons'}
+ )
+ {
+ $add_phantom_semicolon->($KK);
+ }
}
}
- else {
- $min_gnu_indentation += $standard_increment;
- }
- $available_space = $space_count - $min_gnu_indentation;
-
- if ( $available_space < 0 ) {
- $space_count = $min_gnu_indentation;
- $available_space = 0;
- }
- $align_paren = 1;
- }
-
- # update state, but not on a blank token
- if ( $types_to_go[$max_index_to_go] ne 'b' ) {
-
- $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
- ++$max_gnu_stack_index;
- $gnu_stack[$max_gnu_stack_index] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, $align_paren );
+ # Modify certain tokens here for whitespace
+ # The following is not yet done, but could be:
+ # sub (x x x)
+ elsif ( $type =~ /^[wit]$/ ) {
- # If the opening paren is beyond the half-line length, then
- # we will use the minimum (standard) indentation. This will
- # help avoid problems associated with running out of space
- # near the end of a line. As a result, in deeply nested
- # lists, there will be some indentations which are limited
- # to this minimum standard indentation. But the most deeply
- # nested container will still probably be able to shift its
- # parameters to the right for proper alignment, so in most
- # cases this will not be noticeable.
- if ( $available_space > 0 && $space_count > $halfway ) {
- $gnu_stack[$max_gnu_stack_index]
- ->tentatively_decrease_available_spaces($available_space);
- }
- }
- }
+ # Examples: <<snippets/space1.in>>
+ # change '$ var' to '$var' etc
+ # change '@ ' to '@'
+ my ( $sigil, $word ) = split /\s+/, $token, 2;
+ if ( length($sigil) == 1
+ && $sigil =~ /^[\$\&\%\*\@]$/ )
+ {
+ $token = $sigil;
+ $token .= $word if ($word);
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # Count commas and look for non-list characters. Once we see a
- # non-list character, we give up and don't look for any more commas.
- if ( $type eq '=>' ) {
- $gnu_arrow_count{$total_depth}++;
+ # Split identifiers with leading arrows, inserting blanks if
+ # necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->' and
+ # 'new' with a possible blank between.
+ #
+ # Note: there is a related patch in sub set_whitespace_flags
+ if ( substr( $token, 0, 1 ) eq '-'
+ && $token =~ /^\-\>(.*)$/
+ && $1 )
+ {
- # tentatively treating '=>' like '=' for estimating breaks
- # TODO: this could use some experimentation
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ my $token_save = $1;
+ my $type_save = $type;
- elsif ( $type eq ',' ) {
- $gnu_comma_count{$total_depth}++;
- }
+ # Change '-> new' to '->new'
+ $token_save =~ s/^\s+//g;
- elsif ( $is_assignment{$type} ) {
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ # store a blank to left of arrow if necessary
+ my $Kprev = $self->K_previous_nonblank($KK);
+ if ( defined($Kprev)
+ && $rLL->[$Kprev]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace
+ && $want_left_space{'->'} == WS_YES )
+ {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
- # this token might start a new line
- # if this is a non-blank..
- if ( $type ne 'b' ) {
+ # then store the arrow
+ my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
+ $store_token->($rcopy);
- # and if ..
- if (
+ # store a blank after the arrow if requested
+ # added for issue git #33
+ if ( $want_right_space{'->'} == WS_YES ) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
- # this is the first nonblank token of the line
- $max_index_to_go == 1 && $types_to_go[0] eq 'b'
+ # then reset the current token to be the remainder,
+ # and reset the whitespace flag according to the arrow
+ $token = $rtoken_vars->[_TOKEN_] = $token_save;
+ $type = $rtoken_vars->[_TYPE_] = $type_save;
+ $store_token->($rtoken_vars);
+ next;
+ }
- # or previous character was one of these:
- || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
+ if ( $token =~ /$ANYSUB_PATTERN/ ) {
- # or previous character was opening and this does not close it
- || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
- || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ my $spp = $rOpts->{'space-prototype-paren'};
+ if ( defined($spp) ) {
+ if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
+ elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ }
- # or this token is one of these:
- || $type =~ /^([\.]|\|\||\&\&)$/
+ # one space max, and no tabs
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # or this is a closing structure
- || ( $last_nonblank_type_to_go eq '}'
- && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
+ # clean up spaces in package identifiers, like
+ # "package Bob::Dog;"
+ if ( $token =~ /^package\s/ ) {
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # or previous token was keyword 'return'
- || ( $last_nonblank_type_to_go eq 'k'
- && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ if ( $type eq 'i' ) {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
- # or starting a new line at certain keywords is fine
- || ( $type eq 'k'
- && $is_if_unless_and_or_last_next_redo_return{$token} )
+ # handle semicolons
+ elsif ( $type eq ';' ) {
- # or this is after an assignment after a closing structure
- || (
- $is_assignment{$last_nonblank_type_to_go}
- && (
- $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
+ # Remove unnecessary semicolons, but not after bare
+ # blocks, where it could be unsafe if the brace is
+ # mistokenized.
+ if (
+ $rOpts->{'delete-semicolons'}
+ && (
+ (
+ $last_nonblank_type eq '}'
+ && (
+ $is_block_without_semicolon{
+ $last_nonblank_block_type}
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
+ || $last_nonblank_block_type =~ /^\w+:$/
+ )
+ )
+ || $last_nonblank_type eq ';'
+ )
+ )
+ {
- # and it is significantly to the right
- || $gnu_position_predictor > $halfway
- )
- )
- )
- {
- check_for_long_gnu_style_lines();
- $line_start_index_to_go = $max_index_to_go;
+ # This looks like a deletable semicolon, but even if a
+ # semicolon can be deleted it is necessarily best to do so.
+ # We apply these additional rules for deletion:
+ # - Always ok to delete a ';' at the end of a line
+ # - Never delete a ';' before a '#' because it would
+ # promote it to a block comment.
+ # - If a semicolon is not at the end of line, then only
+ # delete if it is followed by another semicolon or closing
+ # token. This includes the comment rule. It may take
+ # two passes to get to a final state, but it is a little
+ # safer. For example, keep the first semicolon here:
+ # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
+ # It is not required but adds some clarity.
+ my $ok_to_delete = 1;
+ if ( $KK < $Klast ) {
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) && $Kn <= $Klast ) {
+ my $next_nonblank_token_type =
+ $rLL->[$Kn]->[_TYPE_];
+ $ok_to_delete = $next_nonblank_token_type eq ';'
+ || $next_nonblank_token_type eq '}';
+ }
+ }
- # back up 1 token if we want to break before that type
- # otherwise, we may strand tokens like '?' or ':' on a line
- if ( $line_start_index_to_go > 0 ) {
- if ( $last_nonblank_type_to_go eq 'k' ) {
+ # do not delete only nonblank token in a file
+ else {
+ my $Kn = $self->K_next_nonblank($KK);
+ $ok_to_delete = defined($Kn) || $nonblank_token_count;
+ }
- if ( $want_break_before{$last_nonblank_token_to_go} ) {
- $line_start_index_to_go--;
+ if ($ok_to_delete) {
+ $self->note_deleted_semicolon($input_line_number);
+ next;
+ }
+ else {
+ write_logfile_entry("Extra ';'\n");
}
- }
- elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
- $line_start_index_to_go--;
}
}
- }
- }
-
- # remember the predicted position of this token on the output line
- if ( $max_index_to_go > $line_start_index_to_go ) {
- $gnu_position_predictor =
- total_line_length( $line_start_index_to_go, $max_index_to_go );
- }
- else {
- $gnu_position_predictor =
- $space_count + $token_lengths_to_go[$max_index_to_go];
- }
-
- # store the indentation object for this token
- # this allows us to manipulate the leading whitespace
- # (in case we have to reduce indentation to fit a line) without
- # having to change any token values
- $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
- $reduced_spaces_to_go[$max_index_to_go] =
- ( $max_gnu_stack_index > 0 && $ci_level )
- ? $gnu_stack[ $max_gnu_stack_index - 1 ]
- : $gnu_stack[$max_gnu_stack_index];
- return;
-}
-sub check_for_long_gnu_style_lines {
+ # patch to add space to something like "x10"
+ # This avoids having to split this token in the pre-tokenizer
+ elsif ( $type eq 'n' ) {
+ if ( $token =~ /^x\d+/ ) {
+ $token =~ s/x/x /;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
- # look at the current estimated maximum line length, and
- # remove some whitespace if it exceeds the desired maximum
+ # check for a qw quote
+ elsif ( $type eq 'q' ) {
- # this is only for the '-lp' style
- return unless ($rOpts_line_up_parentheses);
+ # trim blanks from right of qw quotes
+ # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
+ # this)
+ $token =~ s/\s*$//;
+ $rtoken_vars->[_TOKEN_] = $token;
+ $self->note_embedded_tab($input_line_number)
+ if ( $token =~ "\t" );
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ if ($in_multiline_qw) {
- # see if we have exceeded the maximum desired line length
- # keep 2 extra free because they are needed in some cases
- # (result of trial-and-error testing)
- my $spaces_needed =
- $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
+ # If we are at the end of a multiline qw ..
+ if ( $in_multiline_qw == $KK ) {
- return if ( $spaces_needed <= 0 );
+ # Split off the closing delimiter character
+ # so that the formatter can put a line break there if necessary
+ my $part1 = $token;
+ my $part2 = substr( $part1, -1, 1, "" );
- # We are over the limit, so try to remove a requested number of
- # spaces from leading whitespace. We are only allowed to remove
- # from whitespace items created on this batch, since others have
- # already been used and cannot be undone.
- my @candidates = ();
- my $i;
+ if ($part1) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'q', $part1 );
+ $store_token->($rcopy);
+ $token = $part2;
+ $rtoken_vars->[_TOKEN_] = $token;
- # loop over all whitespace items created for the current batch
- for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
- my $item = $gnu_item_list[$i];
+ }
+ $in_multiline_qw = undef;
- # item must still be open to be a candidate (otherwise it
- # cannot influence the current token)
- next if ( $item->get_closed() >= 0 );
+ # store without preceding blank
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
+ # continuing a multiline qw
+ $store_token->($rtoken_vars);
+ next;
+ }
+ }
- my $available_spaces = $item->get_available_spaces();
+ else {
- if ( $available_spaces > 0 ) {
- push( @candidates, [ $i, $available_spaces ] );
- }
- }
+ # we are encountered new qw token...see if multiline
+ if ($ALLOW_BREAK_MULTILINE_QW) {
+ my $K_end = $K_end_q->($KK);
+ if ( $K_end != $KK ) {
+
+ # Starting multiline qw...
+ # set flag equal to the ending K
+ $in_multiline_qw = $K_end;
+
+ # Split off the leading part so that the formatter can
+ # put a line break there if necessary
+ if ( $token =~ /^(qw\s*.)(.*)$/ ) {
+ my $part1 = $1;
+ my $part2 = $2;
+ if ($part2) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'q',
+ $part1 );
+ $store_token_and_space->(
+ $rcopy,
+ $rwhitespace_flags->[$KK] == WS_YES
+ );
+ $token = $part2;
+ $rtoken_vars->[_TOKEN_] = $token;
+
+ # Second part goes without intermediate blank
+ $store_token->($rtoken_vars);
+ next;
+ }
+ }
+ }
+ }
+ else {
- return unless (@candidates);
+ # this is a new single token qw -
+ # store with possible preceding blank
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
+ next;
+ }
+ }
+ } ## end if ( $type eq 'q' )
- # sort by available whitespace so that we can remove whitespace
- # from the maximum available first
- @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
+ # change 'LABEL :' to 'LABEL:'
+ elsif ( $type eq 'J' ) {
+ $token =~ s/\s+//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # keep removing whitespace until we are done or have no more
- foreach my $candidate (@candidates) {
- my ( $i, $available_spaces ) = @{$candidate};
- my $deleted_spaces =
- ( $available_spaces > $spaces_needed )
- ? $spaces_needed
- : $available_spaces;
+ # check a quote for problems
+ elsif ( $type eq 'Q' ) {
+ $check_Q->( $KK, $Kfirst, $input_line_number );
+ }
- # remove the incremental space from this item
- $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
+ # Store this token with possible previous blank
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
- my $i_debug = $i;
+ } # End token loop
+ } # End line loop
- # update the leading whitespace of this item and all items
- # that came after it
- for ( ; $i <= $max_gnu_item_index ; $i++ ) {
+ # Walk backwards through the tokens, making forward links to sequence items.
+ if ( @{$rLL_new} ) {
+ my $KNEXT;
+ for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
+ $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
+ if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
+ }
+ $self->[_K_first_seq_item_] = $KNEXT;
+ }
- my $old_spaces = $gnu_item_list[$i]->get_spaces();
- if ( $old_spaces >= $deleted_spaces ) {
- $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
- }
+ # Find and remember lists by sequence number
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+ my $K_opening = $K_opening_container->{$seqno};
+ next unless defined($K_opening);
- # shouldn't happen except for code bug:
- else {
- my $level = $gnu_item_list[$i_debug]->get_level();
- my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
- my $old_level = $gnu_item_list[$i]->get_level();
- my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
- warning(
-"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
- );
- report_definite_bug();
+ # code errors may leave undefined closing tokens
+ my $K_closing = $K_closing_container->{$seqno};
+ next unless defined($K_closing);
+
+ my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
+ my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
+ my $line_diff = $lx_close - $lx_open;
+ $ris_broken_container->{$seqno} = $line_diff;
+
+ # See if this is a list
+ my $is_list;
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ if ($rtype_count) {
+ my $comma_count = $rtype_count->{','};
+ my $fat_comma_count = $rtype_count->{'=>'};
+ my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
+
+ # We will define a list to be a container with one or more commas
+ # and no semicolons. Note that we have included the semicolons
+ # in a 'for' container in the simicolon count to keep c-style for
+ # statements from being formatted as lists.
+ if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
+ $is_list = 1;
+
+ # We need to do one more check for a perenthesized list:
+ # At an opening paren following certain tokens, such as 'if',
+ # we do not want to format the contents as a list.
+ if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
+ my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
+ if ( defined($Kp) ) {
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ if ( $type_p eq 'k' ) {
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
+ }
+ else {
+ $is_list = 0 if ( $is_nonlist_type{$type_p} );
+ }
+ }
+ }
}
}
- $gnu_position_predictor -= $deleted_spaces;
- $spaces_needed -= $deleted_spaces;
- last unless ( $spaces_needed > 0 );
- }
- return;
-}
-sub finish_lp_batch {
+ # Look for a block brace marked as uncertain. If the tokenizer thinks
+ # its guess is uncertain for the type of a brace following an unknown
+ # bareword then it adds a trailing space as a signal. We can fix the
+ # type here now that we have had a better look at the contents of the
+ # container. This fixes case b1085. To find the corresponding code in
+ # Tokenizer.pm search for 'b1085' with an editor.
+ my $block_type = $rLL_new->[$K_opening]->[_BLOCK_TYPE_];
+ if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
- # This routine is called once after each output stream batch is
- # finished to undo indentation for all incomplete -lp
- # indentation levels. It is too risky to leave a level open,
- # because then we can't backtrack in case of a long line to follow.
- # This means that comments and blank lines will disrupt this
- # indentation style. But the vertical aligner may be able to
- # get the space back if there are side comments.
+ # Always remove the trailing space
+ $block_type =~ s/\s+$//;
- # this is only for the 'lp' style
- return unless ($rOpts_line_up_parentheses);
+ # Try to filter out parenless sub calls
+ my ( $Knn1, $Knn2 );
+ my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
+ $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
+ $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
+ $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
+ $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
+ if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
+ $is_list = 0;
+ }
- # loop over all whitespace items created for the current batch
- foreach my $i ( 0 .. $max_gnu_item_index ) {
- my $item = $gnu_item_list[$i];
+ # Convert to a hash brace if it looks like it holds a list
+ if ($is_list) {
- # only look for open items
- next if ( $item->get_closed() >= 0 );
+ $block_type = "";
- # Tentatively remove all of the available space
- # (The vertical aligner will try to get it back later)
- my $available_spaces = $item->get_available_spaces();
- if ( $available_spaces > 0 ) {
+ $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
+ $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
+ }
- # delete incremental space for this item
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces($available_spaces);
+ $rLL_new->[$K_opening]->[_BLOCK_TYPE_] = $block_type;
+ $rLL_new->[$K_closing]->[_BLOCK_TYPE_] = $block_type;
+ }
- # Reduce the total indentation space of any nodes that follow
- # Note that any such nodes must necessarily be dependents
- # of this node.
- foreach ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$_]->decrease_SPACES($available_spaces);
+ # Handle a list container
+ if ( $is_list && !$block_type ) {
+ $ris_list_by_seqno->{$seqno} = $seqno;
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ my $depth = 0;
+ while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+ $depth++;
+
+ # for $rhas_list we need to save the minimum depth
+ if ( !$rhas_list->{$seqno_parent}
+ || $rhas_list->{$seqno_parent} > $depth )
+ {
+ $rhas_list->{$seqno_parent} = $depth;
+ }
+
+ if ($line_diff) {
+ $rhas_broken_list->{$seqno_parent} = 1;
+
+ # Patch1: We need to mark broken lists with non-terminal
+ # line-ending commas for the -bbx=2 parameter. This insures
+ # that the list will stay broken. Otherwise the flag
+ # -bbx=2 can be unstable. This fixes case b789 and b938.
+
+ # Patch2: Updated to also require either one fat comma or
+ # one more line-ending comma. Fixes cases b1069 b1070
+ # b1072 b1076.
+ if (
+ $rlec_count_by_seqno->{$seqno}
+ && ( $rlec_count_by_seqno->{$seqno} > 1
+ || $rtype_count_by_seqno->{$seqno}->{'=>'} )
+ )
+ {
+ $rhas_broken_list_with_lec->{$seqno_parent} = 1;
+ }
+ }
+ $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+ }
+ }
+
+ # Handle code blocks ...
+ # The -lp option needs to know if a container holds a code block
+ elsif ( $block_type && $rOpts_line_up_parentheses ) {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+ $rhas_code_block->{$seqno_parent} = 1;
+ $rhas_broken_code_block->{$seqno_parent} = $line_diff;
+ $seqno_parent = $rparent_of_seqno->{$seqno_parent};
}
}
}
- return;
-}
-sub reduce_lp_indentation {
+ # Find containers with ternaries, needed for -lp formatting.
+ foreach my $seqno ( keys %{$K_opening_ternary} ) {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+ $rhas_ternary->{$seqno_parent} = 1;
+ $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+ }
+ }
- # reduce the leading whitespace at token $i if possible by $spaces_needed
- # (a large value of $spaces_needed will remove all excess space)
- # NOTE: to be called from scan_list only for a sequence of tokens
- # contained between opening and closing parens/braces/brackets
+ # Turn off -lp for containers with here-docs with text within a container,
+ # since they have their own fixed indentation. Fixes case b1081.
+ if ($rOpts_line_up_parentheses) {
+ foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
+ my $Kh = $K_first_here_doc_by_seqno{$seqno};
+ my $Kc = $K_closing_container->{$seqno};
+ my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
+ my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
+ next if ( $line_Kh == $line_Kc );
+ $ris_excluded_lp_container->{$seqno} = 1;
+ }
+ }
- my ( $i, $spaces_wanted ) = @_;
- my $deleted_spaces = 0;
+ # Set a flag to turn off -cab=3 in complex structures. Otherwise,
+ # instability can occur. When it is overridden the behavior of the closest
+ # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
+ if ( $rOpts_comma_arrow_breakpoints == 3 ) {
+ foreach my $seqno ( keys %{$K_opening_container} ) {
- my $item = $leading_spaces_to_go[$i];
- my $available_spaces = $item->get_available_spaces();
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ next unless ( $rtype_count && $rtype_count->{'=>'} );
- if (
- $available_spaces > 0
- && ( ( $spaces_wanted <= $available_spaces )
- || !$item->get_have_child() )
- )
- {
+ # override -cab=3 if this contains a sub-list
+ if ( $rhas_list->{$seqno} ) {
+ $roverride_cab3->{$seqno} = 1;
+ }
- # we'll remove these spaces, but mark them as recoverable
- $deleted_spaces =
- $item->tentatively_decrease_available_spaces($spaces_wanted);
+ # or if this is a sub-list of its parent container
+ else {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ if ( defined($seqno_parent)
+ && $ris_list_by_seqno->{$seqno_parent} )
+ {
+ $roverride_cab3->{$seqno} = 1;
+ }
+ }
+ }
}
- return $deleted_spaces;
-}
-
-sub token_sequence_length {
+ # Reset memory to be the new array
+ $self->[_rLL_] = $rLL_new;
+ my $Klimit;
+ if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
+ $self->[_Klimit_] = $Klimit;
- # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
- # returns 0 if $ibeg > $iend (shouldn't happen)
- my ( $ibeg, $iend ) = @_;
- return 0 if ( $iend < 0 || $ibeg > $iend );
- return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
- return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
-}
+ # DEBUG OPTION: make sure the new array looks okay.
+ # This is no longer needed but should be retained for future development.
+ DEVEL_MODE && $self->check_token_array();
-sub total_line_length {
+ # reset the token limits of each line
+ $self->resync_lines_and_tokens();
- # return length of a line of tokens ($ibeg .. $iend)
- my ( $ibeg, $iend ) = @_;
- return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+ return;
}
-sub maximum_line_length_for_level {
+sub copy_token_as_type {
- # return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
+ # This provides a quick way to create a new token by
+ # slightly modifying an existing token.
+ my ( $rold_token, $type, $token ) = @_;
+ if ( $type eq 'b' ) {
+ $token = " " unless defined($token);
+ }
+ elsif ( $type eq 'q' ) {
+ $token = '' unless defined($token);
+ }
+ elsif ( $type eq '->' ) {
+ $token = '->' unless defined($token);
+ }
+ elsif ( $type eq ';' ) {
+ $token = ';' unless defined($token);
+ }
+ else {
- # Modify if -vmll option is selected
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
- if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
+ # This sub assumes it will be called with just two types, 'b' or 'q'
+ Fault(
+"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
+ );
}
- return $maximum_line_length;
+
+ my @rnew_token = @{$rold_token};
+ $rnew_token[_TYPE_] = $type;
+ $rnew_token[_TOKEN_] = $token;
+ $rnew_token[_BLOCK_TYPE_] = '';
+ $rnew_token[_TYPE_SEQUENCE_] = '';
+ return \@rnew_token;
}
-sub maximum_line_length {
+sub Debug_dump_tokens {
+
+ # a debug routine, not normally used
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $nvars = @{$rLL};
+ print STDERR "$msg\n";
+ print STDERR "ntokens=$nvars\n";
+ print STDERR "K\t_TOKEN_\t_TYPE_\n";
+ my $K = 0;
- # return maximum line length for line starting with the token at given index
- my $ii = shift;
- return maximum_line_length_for_level( $levels_to_go[$ii] );
+ foreach my $item ( @{$rLL} ) {
+ print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
+ $K++;
+ }
+ return;
}
-sub excess_line_length {
+sub K_next_code {
+ my ( $self, $KK, $rLL ) = @_;
- # return number of characters by which a line of tokens ($ibeg..$iend)
- # exceeds the allowable line length.
- my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
+ # return the index K of the next nonblank, non-comment token
+ return unless ( defined($KK) && $KK >= 0 );
- # Include left and right weld lengths unless requested not to
- my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
- my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
- return total_line_length( $ibeg, $iend ) + $wl + $wr -
- maximum_line_length($ibeg);
+ # We seem to have encountered a gap in our array.
+ # This shouldn't happen because sub write_line() pushed
+ # items into the $rLL array.
+ Fault("Undefined entry for k=$Knnb");
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Knnb]->[_TYPE_] ne '#' )
+ {
+ return $Knnb;
+ }
+ $Knnb++;
+ }
+ return;
}
-sub wrapup {
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
- # flush buffer and write any informative messages
- my $self = shift;
+ # return the index K of the next nonblank token, or
+ # return undef if none
+ return unless ( defined($KK) && $KK >= 0 );
- $self->flush();
- $file_writer_object->decrement_output_line_number()
- ; # fix up line number since it was incremented
- we_are_at_the_last_line();
- if ( $added_semicolon_count > 0 ) {
- my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
- write_logfile_entry("$added_semicolon_count $what added:\n");
- write_logfile_entry(
- " $first at input line $first_added_semicolon_at\n");
+ # The third arg allows this routine to be used on any array. This is
+ # useful in sub respace_tokens when we are copying tokens from an old $rLL
+ # to a new $rLL array. But usually the third arg will not be given and we
+ # will just use the $rLL array in $self.
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ return unless ( $Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+ return unless ( ++$Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. Very unlikely to get here; it means we have neighboring
+ # blanks in the token stream.
+ $Knnb++;
+ while ( $Knnb < $Num ) {
- if ( $added_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_added_semicolon_at\n");
+ # Safety check, this fault shouldn't happen: The $rLL array is the
+ # main array of tokens, so all entries should be used. It is
+ # initialized in sub write_line, and then re-initialized by sub
+ # $store_token() within sub respace_tokens. Tokens are pushed on
+ # so there shouldn't be any gaps.
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb");
}
- write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
- write_logfile_entry("\n");
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
}
+ return;
+}
- if ( $deleted_semicolon_count > 0 ) {
- my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $deleted_semicolon_count > 1 )
- ? "semicolons were"
- : "semicolon was";
- write_logfile_entry(
- "$deleted_semicolon_count unnecessary $what deleted:\n");
- write_logfile_entry(
- " $first at input line $first_deleted_semicolon_at\n");
+sub K_previous_code {
- if ( $deleted_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_deleted_semicolon_at\n");
- }
- write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
- write_logfile_entry("\n");
- }
+ # return the index K of the previous nonblank, non-comment token
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
- if ( $embedded_tab_count > 0 ) {
- my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
- my $what =
- ( $embedded_tab_count > 1 )
- ? "quotes or patterns"
- : "quote or pattern";
- write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
- write_logfile_entry(
-"This means the display of this script could vary with device or software\n"
- );
- write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
- if ( $embedded_tab_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_embedded_tab_at\n");
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ );
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
+ {
+ return $Kpnb;
}
- write_logfile_entry("\n");
+ $Kpnb--;
}
+ return;
+}
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
- );
- }
+sub K_previous_nonblank {
- if ($in_tabbing_disagreement) {
- write_logfile_entry(
-"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
- );
- }
- else {
+ # return index of previous nonblank token before item K;
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
- if ($last_tabbing_disagreement) {
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
- write_logfile_entry(
-"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
- );
- }
- else {
- write_logfile_entry("No indentation disagreement seen\n");
- }
- }
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
);
}
- write_logfile_entry("\n");
-
- $vertical_aligner_object->report_anything_unusual();
+ my $Kpnb = $KK - 1;
+ return unless ( $Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+ return unless ( --$Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. We should not get here unless some routine
+ # slipped repeated blanks into the token stream.
+ return unless ( --$Kpnb >= 0 );
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
+ return;
+}
- $file_writer_object->report_line_length_errors();
+sub get_old_line_index {
- return;
+ # return index of the original line that token K was on
+ my ( $self, $K ) = @_;
+ my $rLL = $self->[_rLL_];
+ return 0 unless defined($K);
+ return $rLL->[$K]->[_LINE_INDEX_];
}
-sub check_options {
+sub get_old_line_count {
- # This routine is called to check the Opts hash after it is defined
- $rOpts = shift;
+ # return number of input lines separating two tokens
+ my ( $self, $Kbeg, $Kend ) = @_;
+ my $rLL = $self->[_rLL_];
+ return 0 unless defined($Kbeg);
+ return 0 unless defined($Kend);
+ return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
+}
- initialize_whitespace_hashes();
- initialize_bond_strength_hashes();
+sub parent_seqno_by_K {
- make_static_block_comment_pattern();
- make_static_side_comment_pattern();
- make_closing_side_comment_prefix();
- make_closing_side_comment_list_pattern();
- $format_skipping_pattern_begin =
- make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
- $format_skipping_pattern_end =
- make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
+ # Return the sequence number of the parent container of token K, if any.
- # If closing side comments ARE selected, then we can safely
- # delete old closing side comments unless closing side comment
- # warnings are requested. This is a good idea because it will
- # eliminate any old csc's which fall below the line count threshold.
- # We cannot do this if warnings are turned on, though, because we
- # might delete some text which has been added. So that must
- # be handled when comments are created.
- if ( $rOpts->{'closing-side-comments'} ) {
- if ( !$rOpts->{'closing-side-comment-warnings'} ) {
- $rOpts->{'delete-closing-side-comments'} = 1;
+ my ( $self, $KK ) = @_;
+ return unless defined($KK);
+
+ # Note: This routine is relatively slow. I tried replacing it with a hash
+ # which is easily created in sub respace_tokens. But the total time with a
+ # hash was greater because this routine is called once per line whereas a
+ # hash must be created token-by-token.
+
+ my $rLL = $self->[_rLL_];
+ my $KNEXT = $KK;
+
+ # For example, consider the following with seqno=5 of the '[' and ']'
+ # being called with index K of the first token of each line:
+
+ # # result
+ # push @tests, # -
+ # [ # -
+ # sub { 99 }, 'do {&{%s} for 1,2}', # 5
+ # '(&{})(&{})', undef, # 5
+ # [ 2, 2, 0 ], 0 # 5
+ # ]; # -
+
+ # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
+ # unbalanced files, last sequence number will either be undefined or it may
+ # be at a deeper level. In either case we will just return SEQ_ROOT to
+ # have a defined value and allow formatting to proceed.
+ my $parent_seqno = SEQ_ROOT;
+ while ( defined($KNEXT) ) {
+ my $Kt = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$Kt];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
+ if ( $Kt > $KK ) {
+ $parent_seqno = $type_sequence;
+ }
+ else {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ }
+ last;
}
- }
- # If closing side comments ARE NOT selected, but warnings ARE
- # selected and we ARE DELETING csc's, then we will pretend to be
- # adding with a huge interval. This will force the comments to be
- # generated for comparison with the old comments, but not added.
- elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
- if ( $rOpts->{'delete-closing-side-comments'} ) {
- $rOpts->{'delete-closing-side-comments'} = 0;
- $rOpts->{'closing-side-comments'} = 1;
- $rOpts->{'closing-side-comment-interval'} = 100000000;
+ # if next container token is opening, we want its parent container
+ elsif ( $is_opening_type{$type} ) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ last;
}
+
+ # not a container - must be ternary - keep going
}
- make_sub_matching_pattern();
- make_bli_pattern();
- make_block_brace_vertical_tightness_pattern();
- make_blank_line_pattern();
- make_keyword_group_list_pattern();
+ $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
+ return $parent_seqno;
+}
- # Make initial list of desired one line block types
- # They will be modified by 'prepare_cuddled_block_types'
- %want_one_line_block = %is_sort_map_grep_eval;
+sub is_in_block_by_i {
+ my ( $self, $i ) = @_;
- prepare_cuddled_block_types();
- if ( $rOpts->{'dump-cuddled-block-list'} ) {
- dump_cuddled_block_list(*STDOUT);
- Exit(0);
- }
+ # returns true if
+ # token at i is contained in a BLOCK
+ # or is at root level
+ # or there is some kind of error (i.e. unbalanced file)
+ # returns false otherwise
+ my $seqno = $parent_seqno_to_go[$i];
+ return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
+ my $Kopening = $self->[_K_opening_container_]->{$seqno};
+ return 1 unless defined($Kopening);
+ my $rLL = $self->[_rLL_];
+ return 1 if $rLL->[$Kopening]->[_BLOCK_TYPE_];
+ return;
+}
- if ( $rOpts->{'line-up-parentheses'} ) {
+sub is_in_list_by_i {
+ my ( $self, $i ) = @_;
- if ( $rOpts->{'indent-only'}
- || !$rOpts->{'add-newlines'}
- || !$rOpts->{'delete-old-newlines'} )
- {
- Warn(<<EOM);
------------------------------------------------------------------------
-Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
-
-The -lp indentation logic requires that perltidy be able to coordinate
-arbitrarily large numbers of line breakpoints. This isn't possible
-with these flags. Sometimes an acceptable workaround is to use -wocb=3
------------------------------------------------------------------------
-EOM
- $rOpts->{'line-up-parentheses'} = 0;
- }
+ # returns true if token at i is contained in a LIST
+ # returns false otherwise
+ my $seqno = $parent_seqno_to_go[$i];
+ return unless ( $seqno && $seqno ne SEQ_ROOT );
+ if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
+ return 1;
}
+ return;
+}
- # At present, tabs are not compatible with the line-up-parentheses style
- # (it would be possible to entab the total leading whitespace
- # just prior to writing the line, if desired).
- if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+sub is_list_by_K {
- # Likewise, tabs are not compatible with outdenting..
- if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ # Return true if token K is in a list
+ my ( $self, $KK ) = @_;
- if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ my $parent_seqno = $self->parent_seqno_by_K($KK);
+ return unless defined($parent_seqno);
+ return $self->[_ris_list_by_seqno_]->{$parent_seqno};
+}
- if ( !$rOpts->{'space-for-semicolon'} ) {
- $want_left_space{'f'} = -1;
- }
+sub is_list_by_seqno {
- if ( $rOpts->{'space-terminal-semicolon'} ) {
- $want_left_space{';'} = 1;
- }
+ # Return true if the immediate contents of a container appears to be a
+ # list.
+ my ( $self, $seqno ) = @_;
+ return unless defined($seqno);
+ return $self->[_ris_list_by_seqno_]->{$seqno};
+}
- # implement outdenting preferences for keywords
- %outdent_keyword = ();
- my @okw = split_words( $rOpts->{'outdent-keyword-okl'} );
- unless (@okw) {
- @okw = qw(next last redo goto return); # defaults
- }
+sub resync_lines_and_tokens {
- # FUTURE: if not a keyword, assume that it is an identifier
- foreach (@okw) {
- if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
- $outdent_keyword{$_} = 1;
- }
- else {
- Warn("ignoring '$_' in -okwl list; not a perl keyword");
- }
- }
+ my $self = shift;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my @Krange_code_without_comments;
+ my @Klast_valign_code;
- # implement user whitespace preferences
- if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
- @want_left_space{@q} = (1) x scalar(@q);
- }
+ # Re-construct the arrays of tokens associated with the original input lines
+ # since they have probably changed due to inserting and deleting blanks
+ # and a few other tokens.
- if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
- @want_right_space{@q} = (1) x scalar(@q);
- }
+ my $Kmax = -1;
- if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
- @want_left_space{@q} = (-1) x scalar(@q);
+ # This is the next token and its line index:
+ my $Knext = 0;
+ my $inext;
+ if ( defined($rLL) && @{$rLL} ) {
+ $Kmax = @{$rLL} - 1;
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
}
- if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
- @want_right_space{@q} = (-1) x scalar(@q);
- }
- if ( $rOpts->{'dump-want-left-space'} ) {
- dump_want_left_space(*STDOUT);
- Exit(0);
- }
+ # Remember the most recently output token index
+ my $Klast_out;
- if ( $rOpts->{'dump-want-right-space'} ) {
- dump_want_right_space(*STDOUT);
- Exit(0);
- }
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( $line_type eq 'CODE' ) {
- # default keywords for which space is introduced before an opening paren
- # (at present, including them messes up vertical alignment)
- my @sak = qw(my local our and or err eq ne if else elsif until
- unless while for foreach return switch case given when catch);
- @space_after_keyword{@sak} = (1) x scalar(@sak);
+ my @K_array;
+ my $rK_range;
+ if ( $Knext <= $Kmax ) {
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
+ while ( $inext <= $iline ) {
+ push @K_array, $Knext;
+ $Knext += 1;
+ if ( $Knext > $Kmax ) {
+ $inext = undef;
+ last;
+ }
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
+ }
+ }
- # first remove any or all of these if desired
- if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
+ # Delete any terminal blank token
+ if (@K_array) {
+ if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
+ pop @K_array;
+ }
+ }
- # -nsak='*' selects all the above keywords
- if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
- @space_after_keyword{@q} = (0) x scalar(@q);
- }
+ # Define the range of K indexes for the line:
+ # $Kfirst = index of first token on line
+ # $Klast_out = index of last token on line
+ my ( $Kfirst, $Klast );
+ if (@K_array) {
+ $Kfirst = $K_array[0];
+ $Klast = $K_array[-1];
+ $Klast_out = $Klast;
- # then allow user to add to these defaults
- if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
- @space_after_keyword{@q} = (1) x scalar(@q);
- }
+ if ( defined($Kfirst) ) {
- # implement user break preferences
- my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- . : ? && || and or err xor
- );
+ # Save ranges of non-comment code. This will be used by
+ # sub keep_old_line_breaks.
+ if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
+ push @Krange_code_without_comments, [ $Kfirst, $Klast ];
+ }
- my $break_after = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ # Only save ending K indexes of code types which are blank
+ # or 'VER'. These will be used for a convergence check.
+ # See related code in sub 'send_lines_to_vertical_aligner'.
+ if ( !$CODE_type
+ || $CODE_type eq 'VER' )
+ {
+ push @Klast_valign_code, $Klast;
+ }
+ }
}
- }
- };
- my $break_before = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ # It is only safe to trim the actual line text if the input
+ # line had a terminal blank token. Otherwise, we may be
+ # in a quote.
+ if ( $line_of_tokens->{_ended_in_blank_token} ) {
+ $line_of_tokens->{_line_text} =~ s/\s+$//;
}
- }
- };
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
- $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
- $break_before->(@all_operators)
- if ( $rOpts->{'break-before-all-operators'} );
+ # Deleting semicolons can create new empty code lines
+ # which should be marked as blank
+ if ( !defined($Kfirst) ) {
+ my $code_type = $line_of_tokens->{_code_type};
+ if ( !$code_type ) {
+ $line_of_tokens->{_code_type} = 'BL';
+ }
+ }
+ }
+ }
- $break_after->( split_words( $rOpts->{'want-break-after'} ) );
- $break_before->( split_words( $rOpts->{'want-break-before'} ) );
+ # There shouldn't be any nodes beyond the last one. This routine is
+ # relinking lines and tokens after the tokens have been respaced. A fault
+ # here indicates some kind of bug has been introduced into the above loops.
+ if ( defined($inext) ) {
- # make note if breaks are before certain key types
- %want_break_before = ();
- foreach my $tok ( @all_operators, ',' ) {
- $want_break_before{$tok} =
- $left_bond_strength{$tok} < $right_bond_strength{$tok};
+ Fault("unexpected tokens at end of file when reconstructing lines");
}
+ $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
+
+ # Setup the convergence test in the FileWriter based on line-ending indexes
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->setup_convergence_test( \@Klast_valign_code );
+
+ # Mark essential old breakpoints if combination -iob -lp is used. These
+ # two options do not work well together, but we can avoid turning -iob off
+ # by ignoring -iob at certain essential line breaks.
+ # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
+ if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
+ my %is_assignment_or_fat_comma = %is_assignment;
+ $is_assignment_or_fat_comma{'=>'} = 1;
+ my $ris_essential_old_breakpoint =
+ $self->[_ris_essential_old_breakpoint_];
+ my $iline = -1;
+ my ( $Kfirst, $Klast );
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type ne 'CODE' ) {
+ ( $Kfirst, $Klast ) = ( undef, undef );
+ next;
+ }
+ my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
+ ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
- # Coordinate ?/: breaks, which must be similar
- if ( !$want_break_before{':'} ) {
- $want_break_before{'?'} = $want_break_before{':'};
- $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
- $left_bond_strength{'?'} = NO_BREAK;
+ next unless defined($Klast_prev);
+ next unless defined($Kfirst);
+ my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
+ my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
+ next
+ unless ( $is_assignment_or_fat_comma{$type_last}
+ || $is_assignment_or_fat_comma{$type_first} );
+ $ris_essential_old_breakpoint->{$Klast_prev} = 1;
+ }
}
- # Define here tokens which may follow the closing brace of a do statement
- # on the same line, as in:
- # } while ( $something);
- my @dof = qw(until while unless if ; : );
- push @dof, ',';
- @is_do_follower{@dof} = (1) x scalar(@dof);
+ return;
+}
- # What tokens may follow the closing brace of an if or elsif block?
- # Not used. Previously used for cuddled else, but no longer needed.
- %is_if_brace_follower = ();
-
- # nothing can follow the closing curly of an else { } block:
- %is_else_brace_follower = ();
-
- # what can follow a multi-line anonymous sub definition closing curly:
- my @asf = qw# ; : => or and && || ~~ !~~ ) #;
- push @asf, ',';
- @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
-
- # what can follow a one-line anonymous sub closing curly:
- # one-line anonymous subs also have ']' here...
- # see tk3.t and PP.pm
- my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
- push @asf1, ',';
- @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
-
- # What can follow a closing curly of a block
- # which is not an if/elsif/else/do/sort/map/grep/eval/sub
- # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
- my @obf = qw# ; : => or and && || ) #;
- push @obf, ',';
- @is_other_brace_follower{@obf} = (1) x scalar(@obf);
-
- $right_bond_strength{'{'} = WEAK;
- $left_bond_strength{'{'} = VERY_STRONG;
-
- # make -l=0 equal to -l=infinite
- if ( !$rOpts->{'maximum-line-length'} ) {
- $rOpts->{'maximum-line-length'} = 1000000;
- }
-
- # make -lbl=0 equal to -lbl=infinite
- if ( !$rOpts->{'long-block-line-count'} ) {
- $rOpts->{'long-block-line-count'} = 1000000;
- }
-
- my $enc = $rOpts->{'character-encoding'};
- if ( $enc && $enc !~ /^(none|utf8)$/i ) {
- Die(<<EOM);
-Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
-EOM
- }
+sub keep_old_line_breaks {
- my $ole = $rOpts->{'output-line-ending'};
- if ($ole) {
- my %endings = (
- dos => "\015\012",
- win => "\015\012",
- mac => "\015",
- unix => "\012",
- );
+ # Called once per file to find and mark any old line breaks which
+ # should be kept. We will be translating the input hashes into
+ # token indexes.
- # Patch for RT #99514, a memoization issue.
- # Normally, the user enters one of 'dos', 'win', etc, and we change the
- # value in the options parameter to be the corresponding line ending
- # character. But, if we are using memoization, on later passes through
- # here the option parameter will already have the desired ending
- # character rather than the keyword 'dos', 'win', etc. So
- # we must check to see if conversion has already been done and, if so,
- # bypass the conversion step.
- my %endings_inverted = (
- "\015\012" => 'dos',
- "\015\012" => 'win',
- "\015" => 'mac',
- "\012" => 'unix',
- );
+ # A flag is set as follows:
+ # = 1 make a hard break (flush the current batch)
+ # best for something like leading commas (-kbb=',')
+ # = 2 make a soft break (keep building current batch)
+ # best for something like leading ->
- if ( defined( $endings_inverted{$ole} ) ) {
+ my ($self) = @_;
- # we already have valid line ending, nothing more to do
- }
- else {
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- Die(<<EOM);
-Unrecognized line ending '$ole'; expecting one of: $str
-EOM
+ my $rLL = $self->[_rLL_];
+ my $rKrange_code_without_comments =
+ $self->[_rKrange_code_without_comments_];
+ my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
+ my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
+ my $rwant_container_open = $self->[_rwant_container_open_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+
+ # This code moved here from sub scan_list to fix b1120
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
+ my $token = $rLL->[$Kfirst]->[_TOKEN_];
+
+ # leading '->' use a value of 2 which causes a soft
+ # break rather than a hard break
+ if ( $type eq '->' ) {
+ $rbreak_before_Kfirst->{$Kfirst} = 2;
}
- if ( $rOpts->{'preserve-line-endings'} ) {
- Warn("Ignoring -ple; conflicts with -ole\n");
- $rOpts->{'preserve-line-endings'} = undef;
+
+ # leading ')->' use a special flag to insure that both
+ # opening and closing parens get opened
+ # Fix for b1120: only for parens, not braces
+ elsif ( $token eq ')' ) {
+ my $Kn = $self->K_next_nonblank($Kfirst);
+ next
+ unless ( defined($Kn)
+ && $Kn <= $Klast
+ && $rLL->[$Kn]->[_TYPE_] eq '->' );
+ my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
+ next unless ($seqno);
+
+ # Patch to avoid blinkers: but do not do this unless the
+ # container holds a list, or the opening and closing parens are
+ # separated by more than one line.
+ # Fixes case b977.
+ next
+ if (
+ !$ris_list_by_seqno->{$seqno}
+ && ( !$ris_broken_container->{$seqno}
+ || $ris_broken_container->{$seqno} <= 1 )
+ );
+ $rwant_container_open->{$seqno} = 1;
}
}
}
- # hashes used to simplify setting whitespace
- %tightness = (
- '{' => $rOpts->{'brace-tightness'},
- '}' => $rOpts->{'brace-tightness'},
- '(' => $rOpts->{'paren-tightness'},
- ')' => $rOpts->{'paren-tightness'},
- '[' => $rOpts->{'square-bracket-tightness'},
- ']' => $rOpts->{'square-bracket-tightness'},
- );
- %matching_token = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '?' => ':',
- );
+ return unless ( %keep_break_before_type || %keep_break_after_type );
- if ( $rOpts->{'ignore-old-breakpoints'} ) {
- if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
- );
- }
- if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
- );
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+
+ my $type_first = $rLL->[$Kfirst]->[_TYPE_];
+ if ( $keep_break_before_type{$type_first} ) {
+ $rbreak_before_Kfirst->{$Kfirst} = 1;
}
- # Note: there are additional parameters that can be made inactive by
- # -iob, but they are on by default so we would generate excessive
- # warnings if we noted them. They are:
- # $rOpts->{'break-at-old-keyword-breakpoints'}
- # $rOpts->{'break-at-old-logical-breakpoints'}
- # $rOpts->{'break-at-old-ternary-breakpoints'}
- # $rOpts->{'break-at-old-attribute-breakpoints'}
+ my $type_last = $rLL->[$Klast]->[_TYPE_];
+ if ( $keep_break_after_type{$type_last} ) {
+ $rbreak_after_Klast->{$Klast} = 1;
+ }
}
+ return;
+}
- # frequently used parameters
- $rOpts_add_newlines = $rOpts->{'add-newlines'};
- $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
- $rOpts_block_brace_vertical_tightness =
- $rOpts->{'block-brace-vertical-tightness'};
- $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
- $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- $rOpts_break_at_old_ternary_breakpoints =
- $rOpts->{'break-at-old-ternary-breakpoints'};
- $rOpts_break_at_old_attribute_breakpoints =
- $rOpts->{'break-at-old-attribute-breakpoints'};
- $rOpts_break_at_old_comma_breakpoints =
- $rOpts->{'break-at-old-comma-breakpoints'};
- $rOpts_break_at_old_keyword_breakpoints =
- $rOpts->{'break-at-old-keyword-breakpoints'};
- $rOpts_break_at_old_logical_breakpoints =
- $rOpts->{'break-at-old-logical-breakpoints'};
- $rOpts_break_at_old_method_breakpoints =
- $rOpts->{'break-at-old-method-breakpoints'};
- $rOpts_closing_side_comment_else_flag =
- $rOpts->{'closing-side-comment-else-flag'};
- $rOpts_closing_side_comment_maximum_text =
- $rOpts->{'closing-side-comment-maximum-text'};
- $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
- $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
- $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
- $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
- $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
-
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
- $rOpts_short_concatenation_item_length =
- $rOpts->{'short-concatenation-item-length'};
-
- $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
- $rOpts_ignore_side_comment_lengths =
- $rOpts->{'ignore-side-comment-lengths'};
+sub weld_containers {
- # Note that both opening and closing tokens can access the opening
- # and closing flags of their container types.
- %opening_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness'},
- '{' => $rOpts->{'brace-vertical-tightness'},
- '[' => $rOpts->{'square-bracket-vertical-tightness'},
- ')' => $rOpts->{'paren-vertical-tightness'},
- '}' => $rOpts->{'brace-vertical-tightness'},
- ']' => $rOpts->{'square-bracket-vertical-tightness'},
- );
+ # Called once per file to do any welding operations requested by --weld*
+ # flags.
+ my ($self) = @_;
- %closing_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness-closing'},
- '{' => $rOpts->{'brace-vertical-tightness-closing'},
- '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- ')' => $rOpts->{'paren-vertical-tightness-closing'},
- '}' => $rOpts->{'brace-vertical-tightness-closing'},
- ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- );
+ # This count is used to eliminate needless calls for weld checks elsewere
+ $total_weld_count = 0;
- # assume flag for '>' same as ')' for closing qw quotes
- %closing_token_indentation = (
- ')' => $rOpts->{'closing-paren-indentation'},
- '}' => $rOpts->{'closing-brace-indentation'},
- ']' => $rOpts->{'closing-square-bracket-indentation'},
- '>' => $rOpts->{'closing-paren-indentation'},
- );
+ return if ( $rOpts->{'indent-only'} );
+ return unless ($rOpts_add_newlines);
- # flag indicating if any closing tokens are indented
- $some_closing_token_indentation =
- $rOpts->{'closing-paren-indentation'}
- || $rOpts->{'closing-brace-indentation'}
- || $rOpts->{'closing-square-bracket-indentation'}
- || $rOpts->{'indent-closing-brace'};
+ # Important: sub 'weld_cuddled_blocks' must be called before
+ # sub 'weld_nested_containers'. This is because the cuddled option needs to
+ # use the original _LEVEL_ values of containers, but the weld nested
+ # containers changes _LEVEL_ of welded containers.
- %opening_token_right = (
- '(' => $rOpts->{'opening-paren-right'},
- '{' => $rOpts->{'opening-hash-brace-right'},
- '[' => $rOpts->{'opening-square-bracket-right'},
- );
+ # Here is a good test case to be sure that both cuddling and welding
+ # are working and not interfering with each other: <<snippets/ce_wn1.in>>
- %stack_opening_token = (
- '(' => $rOpts->{'stack-opening-paren'},
- '{' => $rOpts->{'stack-opening-hash-brace'},
- '[' => $rOpts->{'stack-opening-square-bracket'},
- );
+ # perltidy -wn -ce
- %stack_closing_token = (
- ')' => $rOpts->{'stack-closing-paren'},
- '}' => $rOpts->{'stack-closing-hash-brace'},
- ']' => $rOpts->{'stack-closing-square-bracket'},
- );
- $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
- $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
- return;
-}
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
-sub bad_pattern {
+ $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
- # See if a pattern will compile. We have to use a string eval here,
- # but it should be safe because the pattern has been constructed
- # by this program.
- my ($pattern) = @_;
- eval "'##'=~/$pattern/";
- return $@;
-}
+ if ( $rOpts->{'weld-nested-containers'} ) {
-{
- my %no_cuddle;
+ $self->weld_nested_containers();
- # Add keywords here which really should not be cuddled
- BEGIN {
- my @q = qw(if unless for foreach while);
- @no_cuddle{@q} = (1) x scalar(@q);
+ $self->weld_nested_quotes();
}
- sub prepare_cuddled_block_types {
+ ##############################################################
+ # All welding is done. Finish setting up weld data structures.
+ ##############################################################
- # the cuddled-else style, if used, is controlled by a hash that
- # we construct here
+ my $rLL = $self->[_rLL_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
- # Include keywords here which should not be cuddled
+ my @K_multi_weld;
+ my @keys = keys %{$rK_weld_right};
+ $total_weld_count = @keys;
- my $cuddled_string = "";
- if ( $rOpts->{'cuddled-else'} ) {
+ # Note that this loop is processed in unsorted order for efficiency
+ foreach my $Kstart (@keys) {
+ my $Kend = $rK_weld_right->{$Kstart};
- # set the default
- $cuddled_string = 'elsif else continue catch finally'
- unless ( $rOpts->{'cuddled-block-list-exclusive'} );
+ # An error here would be due to an incorrect initialization introduced
+ # in one of the above weld routines, like sub weld_nested.
+ if ( $Kend <= $Kstart ) {
+ Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n");
+ }
- # This is the old equivalent but more complex version
- # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
+ $rweld_len_right_at_K->{$Kstart} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kstart]->[_CUMULATIVE_LENGTH_];
- # Add users other blocks to be cuddled
- my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
- if ($cuddled_block_list) {
- $cuddled_string .= " " . $cuddled_block_list;
+ $rK_weld_left->{$Kend} = $Kstart; # fix in case of missing left link
+
+ # Remember the leftmost index of welds which continue to the right
+ if ( defined( $rK_weld_right->{$Kend} )
+ && !defined( $rK_weld_left->{$Kstart} ) )
+ {
+ push @K_multi_weld, $Kstart;
+ }
+ }
+
+ # Update the end index and lengths of any long welds to extend to the far
+ # end. This has to be processed in sorted order.
+ # Left links added for b1173.
+ my $Kend = -1;
+ foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
+
+ # skip any interior K which was originally missing a left link
+ next if ( $Kstart <= $Kend );
+
+ my @Klist;
+ push @Klist, $Kstart;
+ $Kend = $rK_weld_right->{$Kstart};
+ $rK_weld_left->{$Kend} = $Kstart;
+ my $Knext = $rK_weld_right->{$Kend};
+ while ( defined($Knext) ) {
+ push @Klist, $Kend;
+ $Kend = $Knext;
+ $rK_weld_left->{$Kend} = $Kstart;
+ $Knext = $rK_weld_right->{$Kend};
+ }
+ pop @Klist; # values for last entry are already correct
+ foreach my $KK (@Klist) {
+
+ # Ending indexes must only be shifted to the right for long welds.
+ # An error here would be due to a programming error introduced in
+ # the code immediately above.
+ my $Kend_old = $rK_weld_right->{$KK};
+ if ( !defined($Kend_old) || $Kend < $Kend_old ) {
+ Fault(
+"Bad weld link at K=$KK, old end is K=$Kend_old, new end is $Kend\n"
+ );
}
+ $rK_weld_right->{$KK} = $Kend;
+ $rweld_len_right_at_K->{$KK} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
}
+ }
- # If we have a cuddled string of the form
- # 'try-catch-finally'
+ return;
+}
- # we want to prepare a hash of the form
+sub cumulative_length_before_K {
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+}
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+sub weld_cuddled_blocks {
+ my ($self) = @_;
- # use -dcbl to dump this hash
+ # Called once per file to handle cuddled formatting
- # Multiple such strings are input as a space or comma separated list
-
- # If we get two lists with the same leading type, such as
- # -cbl = "-try-catch-finally -try-catch-otherwise"
- # then they will get merged as follows:
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 2,
- # 'otherwise' => 1,
- # },
- # };
- # This will allow either type of chain to be followed.
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
- $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
- my @cuddled_strings = split /\s+/, $cuddled_string;
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+ return unless ( %{$rcuddled_block_types} );
- $rcuddled_block_types = {};
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $rbreak_container = $self->[_rbreak_container_];
- # process each dash-separated string...
- my $string_count = 0;
- foreach my $string (@cuddled_strings) {
- next unless $string;
- my @words = split /-+/, $string; # allow multiple dashes
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # we could look for and report possible errors here...
- next unless ( @words > 0 );
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
- # allow either '-continue' or *-continue' for arbitrary starting type
- my $start = '*';
+ my $is_broken_block = sub {
- # a single word without dashes is a secondary block type
- if ( @words > 1 ) {
- $start = shift @words;
- }
+ # a block is broken if the input line numbers of the braces differ
+ # we can only cuddle between broken blocks
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
- # always make an entry for the leading word. If none follow, this
- # will still prevent a wildcard from matching this word.
- if ( !defined( $rcuddled_block_types->{$start} ) ) {
- $rcuddled_block_types->{$start} = {};
- }
+ # A stack to remember open chains at all levels: This is a hash rather than
+ # an array for safety because negative levels can occur in files with
+ # errors. This allows us to keep processing with negative levels.
+ # $in_chain{$level} = [$chain_type, $type_sequence];
+ my %in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
- # The count gives the original word order in case we ever want it.
- $string_count++;
- my $word_count = 0;
- foreach my $word (@words) {
- next unless $word;
- if ( $no_cuddle{$word} ) {
- Warn(
-"## Ignoring keyword '$word' in -cbl; does not seem right\n"
- );
- next;
- }
- $word_count++;
- $rcuddled_block_types->{$start}->{$word} =
- 1; #"$string_count.$word_count";
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- # git#9: Remove this word from the list of desired one-line
- # blocks
- $want_one_line_block{$word} = 0;
- }
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK");
}
- return;
- }
-}
-sub dump_cuddled_block_list {
- my ($fh) = @_;
+ # NOTE: we must use the original levels here. They can get changed
+ # by sub 'weld_nested_containers', so this routine must be called
+ # before sub 'weld_nested_containers'.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_];
- # ORIGINAL METHOD: Here is the format of the cuddled block type hash
- # which controls this routine
- # my $rcuddled_block_types = {
- # 'if' => {
- # 'else' => 1,
- # 'elsif' => 1
- # },
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ if ( $level < $last_level ) { $in_chain{$last_level} = undef }
+ elsif ( $level > $last_level ) { $in_chain{$level} = undef }
- # SIMPLFIED METHOD: the simplified method uses a wildcard for
- # the starting block type and puts all cuddled blocks together:
- # my $rcuddled_block_types = {
- # '*' => {
- # 'else' => 1,
- # 'elsif' => 1
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
- # Both methods work, but the simplified method has proven to be adequate and
- # easier to manage.
+ if ( $token eq '{' ) {
- my $cuddled_string = $rOpts->{'cuddled-block-list'};
- $cuddled_string = '' unless $cuddled_string;
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ if ( !$block_type ) {
- my $flags = "";
- $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
- $flags .= " -cbl='$cuddled_string'";
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
- unless ( $rOpts->{'cuddled-else'} ) {
- $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
- }
+ }
+ if ( $in_chain{$level} ) {
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-Hash of cuddled block types prepared for a run with these parameters:
- $flags
-------------------------------------------------------------------------
-EOM
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain{$level}->[1];
+ my $opening_seqno = $type_sequence;
- use Data::Dumper;
- $fh->print( Dumper($rcuddled_block_types) );
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$is_broken_block->($closing_seqno) ) {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-EOM
- return;
-}
+ # we will let the trailing block be either broken or intact
+ ## && $is_broken_block->($opening_seqno);
-sub make_static_block_comment_pattern {
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon;
+ if ( defined($Ko) ) {
+ $Kon = $self->K_next_nonblank($Ko);
+ }
- # create the pattern used to identify static block comments
- $static_block_comment_pattern = '^\s*##';
+ # ..unless it is a comment
+ if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
- # allow the user to change it
- if ( $rOpts->{'static-block-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-block-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = $prefix;
+ # OK to weld these two tokens...
+ $rK_weld_right->{$Ko} = $Kon;
+ $rK_weld_left->{$Kon} = $Ko;
+
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
+ }
- # user may give leading caret to force matching left comments only
- if ( $prefix !~ /^\^#/ ) {
- if ( $prefix !~ /^#/ ) {
- Die(
-"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
- );
}
- $pattern = '^\s*' . $prefix;
- }
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
+ else {
+
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ }
}
- $static_block_comment_pattern = $pattern;
- }
- return;
-}
+ elsif ( $token eq '}' ) {
+ if ( $in_chain{$level} ) {
-sub make_format_skipping_pattern {
- my ( $opt_name, $default ) = @_;
- my $param = $rOpts->{$opt_name};
- unless ($param) { $param = $default }
- $param =~ s/^\s*//;
- if ( $param !~ /^#/ ) {
- Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
- }
- my $pattern = '^' . $param . '\s';
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
- );
- }
- return $pattern;
-}
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_code($KK);
+ next unless $Knn;
-sub make_closing_side_comment_list_pattern {
+ my $chain_type = $in_chain{$level}->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
- # turn any input list into a regex for recognizing selected block types
- $closing_side_comment_list_pattern = '^\w+';
- if ( defined( $rOpts->{'closing-side-comment-list'} )
- && $rOpts->{'closing-side-comment-list'} )
- {
- $closing_side_comment_list_pattern =
- make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain{$level}->[1] = $type_sequence;
+ }
+ else { $in_chain{$level} = undef }
+ }
+ }
}
return;
}
-sub make_sub_matching_pattern {
-
- $SUB_PATTERN = '^sub\s+(::|\w)';
- $ASUB_PATTERN = '^sub$';
+sub find_nested_pairs {
+ my $self = shift;
- if ( $rOpts->{'sub-alias-list'} ) {
+ # This routine is called once per file to do preliminary work needed for
+ # the --weld-nested option. This information is also needed for adding
+ # semicolons.
- # Note that any 'sub-alias-list' has been preprocessed to
- # be a trimmed, space-separated list which includes 'sub'
- # for example, it might be 'sub method fun'
- my $sub_alias_list = $rOpts->{'sub-alias-list'};
- $sub_alias_list =~ s/\s+/\|/g;
- $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- }
- return;
-}
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
-sub make_bli_pattern {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
- if ( defined( $rOpts->{'brace-left-and-indent-list'} )
- && $rOpts->{'brace-left-and-indent-list'} )
- {
- $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
- }
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
- $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
- return;
-}
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
-sub make_keyword_group_list_pattern {
+ # Loop over all closing container tokens
+ foreach my $inner_seqno ( keys %{$K_closing_container} ) {
+ my $K_inner_closing = $K_closing_container->{$inner_seqno};
+
+ # See if it is immediately followed by another, outer closing token
+ my $K_outer_closing = $K_inner_closing + 1;
+ $K_outer_closing += 1
+ if ( $K_outer_closing < $Num
+ && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
+
+ next unless ( $K_outer_closing < $Num );
+ my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
+ next unless ($outer_seqno);
+ my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
+ next unless ( $is_closing_token{$token_outer_closing} );
+
+ # Now we have to check the opening tokens.
+ my $K_outer_opening = $K_opening_container->{$outer_seqno};
+ my $K_inner_opening = $K_opening_container->{$inner_seqno};
+ next unless defined($K_outer_opening) && defined($K_inner_opening);
+
+ # Verify that the inner opening token is the next container after the
+ # outer opening token.
+ my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
+ next unless defined($K_io_check);
+ if ( $K_io_check != $K_inner_opening ) {
+
+ # The inner opening container does not immediately follow the outer
+ # opening container, but we may still allow a weld if they are
+ # separated by a sub signature. For example, we may have something
+ # like this, where $K_io_check may be at the first 'x' instead of
+ # 'io'. So we need to hop over the signature and see if we arrive
+ # at 'io'.
+
+ # oo io
+ # | x x |
+ # $obj->then( sub ( $code ) {
+ # ...
+ # return $c->render(text => '', status => $code);
+ # } );
+ # | |
+ # ic oc
+
+ next if $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] ne 'sub';
+ next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
+ my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
+ next unless defined($seqno_signature);
+ my $K_signature_closing = $K_closing_container->{$seqno_signature};
+ next unless defined($K_signature_closing);
+ my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
+ next
+ unless ( defined($K_test) && $K_test == $K_inner_opening );
+
+ # OK, we have arrived at 'io' in the above diagram. We should put
+ # a limit on the length or complexity of the signature here. There
+ # is no perfect way to do this, one way is to put a limit on token
+ # count. For consistency with older versions, we should allow a
+ # signature with a single variable to weld, but not with
+ # multiple variables. A single variable as in 'sub ($code) {' can
+ # have a $Kdiff of 2 to 4, depending on spacing.
+
+ # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
+ # 7, depending on spacing. So to keep formatting consistent with
+ # previous versions, we will also avoid welding if there is a comma
+ # in the signature.
+
+ my $Kdiff = $K_signature_closing - $K_io_check;
+ next if ( $Kdiff > 4 );
+
+ my $saw_comma;
+ foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
+ if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
+ }
+ next if ($saw_comma);
+ }
+
+ # Yes .. this is a possible nesting pair.
+ # They can be separated by a small amount.
+ my $K_diff = $K_inner_opening - $K_outer_opening;
+
+ # Count nonblank characters separating them.
+ if ( $K_diff < 0 ) { next } # Shouldn't happen
+ my $Kn = $K_outer_opening;
+ my $nonblank_count = 0;
+ my $type;
+ my $is_name;
+
+ # Here is an example of a long identifier chain which counts as a
+ # single nonblank here (this spans about 10 K indexes):
+ # if ( !Boucherot::SetOfConnections->new->handler->execute(
+ # ^--K_o_o ^--K_i_o
+ # @array) )
+ my $Kn_first = $K_outer_opening;
+ my $Kn_last_nonblank;
+ for (
+ my $Kn = $K_outer_opening + 1 ;
+ $Kn <= $K_inner_opening ;
+ $Kn += 1
+ )
+ {
+ next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ if ( !$nonblank_count ) { $Kn_first = $Kn }
+ if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
+ $Kn_last_nonblank = $Kn;
+
+ # skip chain of identifier tokens
+ my $last_type = $type;
+ my $last_is_name = $is_name;
+ $type = $rLL->[$Kn]->[_TYPE_];
+ $is_name = $is_name_type->{$type};
+ next if ( $is_name && $last_is_name );
+
+ $nonblank_count++;
+ last if ( $nonblank_count > 2 );
+ }
+
+ # Patch for b1104: do not weld to a paren preceded by sort/map/grep
+ # because the special line break rules may cause a blinking state
+ if ( defined($Kn_last_nonblank)
+ && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
+ && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
+ {
+ my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
- # turn any input list into a regex for recognizing selected block types.
- # Here are the defaults:
- $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
- $keyword_group_list_comment_pattern = '';
- if ( defined( $rOpts->{'keyword-group-blanks-list'} )
- && $rOpts->{'keyword-group-blanks-list'} )
- {
- my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
- my @keyword_list;
- my @comment_list;
- foreach my $word (@words) {
- if ( $word =~ /^(BC|SBC)$/ ) {
- push @comment_list, $word;
- if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
- }
- else {
- push @keyword_list, $word;
- }
+ # Turn off welding at sort/map/grep (
+ if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
}
- $keyword_group_list_pattern =
- make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
- $keyword_group_list_comment_pattern =
- make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
- }
- return;
-}
-sub make_block_brace_vertical_tightness_pattern {
+ if (
- # turn any input list into a regex for recognizing selected block types
- $block_brace_vertical_tightness_pattern =
- '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
- && $rOpts->{'block-brace-vertical-tightness-list'} )
- {
- $block_brace_vertical_tightness_pattern =
- make_block_pattern( '-bbvtl',
- $rOpts->{'block-brace-vertical-tightness-list'} );
+ # adjacent opening containers, like: do {{
+ $nonblank_count == 1
+
+ # short item following opening paren, like: fun( yyy (
+ || ( $nonblank_count == 2
+ && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
+
+ # anonymous sub + prototype or sig: )->then( sub ($code) {
+ # ... but it seems best not to stack two structural blocks, like
+ # this
+ # sub make_anon_with_my_sub { sub {
+ # because it probably hides the structure a little too much.
+ || ( $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub'
+ && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
+ && !$rLL->[$K_outer_opening]->[_BLOCK_TYPE_] )
+ )
+ {
+ push @nested_pairs,
+ [ $inner_seqno, $outer_seqno, $K_inner_closing ];
+ }
+ next;
}
- return;
-}
-sub make_blank_line_pattern {
+ # The weld routine expects the pairs in order in the form
+ # [$seqno_inner, $seqno_outer]
+ # And they must be in the same order as the inner closing tokens
+ # (otherwise, welds of three or more adjacent tokens will not work). The K
+ # value of this inner closing token has temporarily been stored for
+ # sorting.
+ @nested_pairs =
- $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
- my $key = 'blank-lines-before-closing-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_before_closing_block_pattern =
- make_block_pattern( '-blbcl', $rOpts->{$key} );
- }
+ # Drop the K index after sorting (it would cause trouble downstream)
+ map { [ $_->[0], $_->[1] ] }
- $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
- $key = 'blank-lines-after-opening-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_after_opening_block_pattern =
- make_block_pattern( '-blaol', $rOpts->{$key} );
- }
- return;
+ # Sort on the K values
+ sort { $a->[2] <=> $b->[2] } @nested_pairs;
+
+ return \@nested_pairs;
}
-sub make_block_pattern {
+sub is_excluded_weld {
+
+ # decide if this weld is excluded by user request
+ my ( $self, $KK, $is_leading ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $weld_nested_exclusion_rules{$token};
+ return 0 unless ( defined($rflags) );
+ my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
+ return 0 unless ( defined($flag) );
+ return 1 if $flag eq '*';
+
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank($KK);
+ if ( defined($Kp) ) {
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+
+ # keyword?
+ $is_k = $type_p eq 'k';
+
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
+ }
+
+ my $match;
+ if ( $flag eq 'k' ) { $match = $is_k }
+ elsif ( $flag eq 'K' ) { $match = !$is_k }
+ elsif ( $flag eq 'f' ) { $match = $is_f }
+ elsif ( $flag eq 'F' ) { $match = !$is_f }
+ elsif ( $flag eq 'w' ) { $match = $is_w }
+ elsif ( $flag eq 'W' ) { $match = !$is_w }
+ return $match;
+}
- # given a string of block-type keywords, return a regex to match them
- # The only tricky part is that labels are indicated with a single ':'
- # and the 'sub' token text may have additional text after it (name of
- # sub).
- #
- # Example:
- #
- # input string: "if else elsif unless while for foreach do : sub";
- # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+# types needed for welding RULE 6
+my %type_ok_after_bareword;
- # Minor Update:
- #
- # To distinguish between anonymous subs and named subs, use 'sub' to
- # indicate a named sub, and 'asub' to indicate an anonymous sub
+BEGIN {
- my ( $abbrev, $string ) = @_;
- my @list = split_words($string);
- my @words = ();
- my %seen;
- for my $i (@list) {
- if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
- next if $seen{$i};
- $seen{$i} = 1;
- if ( $i eq 'sub' ) {
- }
- elsif ( $i eq 'asub' ) {
- }
- elsif ( $i eq ';' ) {
- push @words, ';';
- }
- elsif ( $i eq '{' ) {
- push @words, '\{';
- }
- elsif ( $i eq ':' ) {
- push @words, '\w+:';
- }
- elsif ( $i =~ /^\w/ ) {
- push @words, $i;
- }
- else {
- Warn("unrecognized block type $i after $abbrev, ignoring\n");
- }
- }
- my $pattern = '(' . join( '|', @words ) . ')$';
- my $sub_patterns = "";
- if ( $seen{'sub'} ) {
- $sub_patterns .= '|' . $SUB_PATTERN;
- }
- if ( $seen{'asub'} ) {
- $sub_patterns .= '|' . $ASUB_PATTERN;
- }
- if ($sub_patterns) {
- $pattern = '(' . $pattern . $sub_patterns . ')';
- }
- $pattern = '^' . $pattern;
- return $pattern;
+ my @q = qw# => -> { ( [ #;
+ @type_ok_after_bareword{@q} = (1) x scalar(@q);
}
-sub make_static_side_comment_pattern {
+use constant DEBUG_WELD => 0;
- # create the pattern used to identify static side comments
- $static_side_comment_pattern = '^##';
+sub setup_new_weld_measurements {
- # allow the user to change it
- if ( $rOpts->{'static-side-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-side-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = '^' . $prefix;
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
- }
- $static_side_comment_pattern = $pattern;
- }
- return;
-}
+ # Define quantities to check for excess line lengths when welded.
+ # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
-sub make_closing_side_comment_prefix {
+ my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
- # Be sure we have a valid closing side comment prefix
- my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
- my $csc_prefix_pattern;
- if ( !defined($csc_prefix) ) {
- $csc_prefix = '## end';
- $csc_prefix_pattern = '^##\s+end';
- }
- else {
- my $test_csc_prefix = $csc_prefix;
- if ( $test_csc_prefix !~ /^#/ ) {
- $test_csc_prefix = '#' . $test_csc_prefix;
- }
+ # Given indexes of outer and inner opening containers to be welded:
+ # $Kouter_opening, $Kinner_opening
- # make a regex to recognize the prefix
- my $test_csc_prefix_pattern = $test_csc_prefix;
+ # Returns these variables:
+ # $new_weld_ok = true (new weld ok) or false (do not start new weld)
+ # $starting_indent = starting indentation
+ # $starting_lentot = starting cumulative length
+ # $msg = diagnostic message for debugging
- # escape any special characters
- $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
- $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
+ my $starting_level;
+ my $starting_ci;
+ my $starting_lentot;
+ my $maximum_text_length;
+ my $msg = "";
+
+ my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+
+ # Define a reference index from which to start measuring
+ my $Kref = $Kfirst;
+ my $Kprev = $self->K_previous_nonblank($Kfirst);
+ if ( defined($Kprev) ) {
+
+ # The -iob and -wn flags do not work well together. To avoid
+ # blinking states we have to override -iob at certain key line
+ # breaks.
+ $self->[_ris_essential_old_breakpoint_]->{$Kprev} = 1;
+
+ # Back up and count length from a token like '=' or '=>' if -lp
+ # is used (this fixes b520)
+ # ...or if a break is wanted before there
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ if ( $rOpts_line_up_parentheses
+ || $want_break_before{$type_prev} )
+ {
+ if ( substr( $type_prev, 0, 1 ) eq '=' ) {
+ $Kref = $Kprev;
+
+ # Fix for b1144 and b1112: backup to the first nonblank
+ # character before the =>, or to the start of its line.
+ if ( $type_prev eq '=>' ) {
+ my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline_prev]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ for ( my $KK = $Kref - 1 ; $KK >= $Kfirst ; $KK-- ) {
+ next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
+ $Kref = $KK;
+ last;
+ }
+ }
+ }
+ }
+ }
- # allow exact number of intermediate spaces to vary
- $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
+ # Define the starting measurements we will need
+ $starting_lentot =
+ $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
+ $starting_level = $rLL->[$Kref]->[_LEVEL_];
+ $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
- # make sure we have a good pattern
- # if we fail this we probably have an error in escaping
- # characters.
+ $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
+ $starting_ci * $rOpts_continuation_indentation;
- if ( bad_pattern($test_csc_prefix_pattern) ) {
+ # Now fix these if necessary to avoid known problems...
- # shouldn't happen..must have screwed up escaping, above
- report_definite_bug();
- Warn(
-"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
- );
+ # FIX1: Switch to using the outer opening token as the reference
+ # point if a line break before it would make a longer line.
+ # Fixes case b1055 and is also an alternate fix for b1065.
+ my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ if ( $Kref < $Kouter_opening ) {
+ my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+ my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $maximum_text_length_oo =
+ $maximum_text_length_at_level[$starting_level_oo] -
+ $starting_ci_oo * $rOpts_continuation_indentation;
- # just warn and keep going with defaults
- Warn("Please consider using a simpler -cscp prefix\n");
- Warn("Using default -cscp instead; please check output\n");
- }
- else {
- $csc_prefix = $test_csc_prefix;
- $csc_prefix_pattern = $test_csc_prefix_pattern;
+ # The excess length to any cumulative length K = lenK is either
+ # $excess = $lenk - ($lentot + $maximum_text_length), or
+ # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
+ # so the worst case (maximum excess) corresponds to the configuration
+ # with minimum value of the sum: $lentot + $maximum_text_length
+ if ( $lentot_oo + $maximum_text_length_oo <
+ $starting_lentot + $maximum_text_length )
+ {
+ $Kref = $Kouter_opening;
+ $starting_level = $starting_level_oo;
+ $starting_ci = $starting_ci_oo;
+ $starting_lentot = $lentot_oo;
+ $maximum_text_length = $maximum_text_length_oo;
+ }
+ }
+
+ my $new_weld_ok = 1;
+
+ # FIX2 for b1020: Avoid problem areas with the -wn -lp combination. The
+ # combination -wn -lp -dws -naws does not work well and can cause blinkers.
+ # It will probably only occur in stress testing. For this situation we
+ # will only start a new weld if we start at a 'good' location.
+ # - Added 'if' to fix case b1032.
+ # - Require blank before certain previous characters to fix b1111.
+ # - Add ';' to fix case b1139
+ # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
+ if ( $starting_ci
+ && $rOpts_line_up_parentheses
+ && $rOpts_delete_old_whitespace
+ && !$rOpts_add_whitespace
+ && defined($Kprev) )
+ {
+ my $type_first = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ my $type_pp = 'b';
+ if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
+ unless (
+ $type_prev =~ /^[\,\.\;]/
+ || $type_prev =~ /^[=\{\[\(\L]/ && $type_pp eq 'b'
+ || $type_first =~ /^[=\,\.\;\{\[\(\L]/
+ || $type_first eq '||'
+ || ( $type_first eq 'k' && $token_first eq 'if'
+ || $token_first eq 'or' )
+ )
+ {
+ $msg =
+"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev'\n";
+ $new_weld_ok = 0;
}
}
- $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
- $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
- return;
-}
-sub dump_want_left_space {
- my $fh = shift;
- local $" = "\n";
- print $fh <<EOM;
-These values are the main control of whitespace to the left of a token type;
-They may be altered with the -wls parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its left
--1 means the token does not want a space to its left
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_left_space ) {
- print $fh "$key\t$want_left_space{$key}\n";
- }
- return;
+ return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
}
-sub dump_want_right_space {
- my $fh = shift;
- local $" = "\n";
- print $fh <<EOM;
-These values are the main control of whitespace to the right of a token type;
-They may be altered with the -wrs parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its right
--1 means the token does not want a space to its right
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_right_space ) {
- print $fh "$key\t$want_right_space{$key}\n";
+sub excess_line_length_for_Krange {
+ my ( $self, $Kfirst, $Klast ) = @_;
+
+ # returns $excess_length =
+ # by how many characters a line composed of tokens $Kfirst .. $Klast will
+ # exceed the allowed line length
+
+ my $rLL = $self->[_rLL_];
+ my $length_before_Kfirst =
+ $Kfirst <= 0
+ ? 0
+ : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
+
+ # backup before a side comment if necessary
+ my $Kend = $Klast;
+ if ( $rOpts_ignore_side_comment_lengths
+ && $rLL->[$Klast]->[_TYPE_] eq '#' )
+ {
+ my $Kprev = $self->K_previous_nonblank($Klast);
+ if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
}
- return;
-}
-{ # begin is_essential_whitespace
+ # get the length of the text
+ my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
- my %is_sort_grep_map;
- my %is_for_foreach;
+ # get the size of the text window
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
+ my $max_text_length = $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
- BEGIN {
+ my $excess_length = $length - $max_text_length;
- my @q;
- @q = qw(sort grep map);
- @is_sort_grep_map{@q} = (1) x scalar(@q);
-
- @q = qw(for foreach);
- @is_for_foreach{@q} = (1) x scalar(@q);
-
- }
-
- sub is_essential_whitespace {
-
- # Essential whitespace means whitespace which cannot be safely deleted
- # without risking the introduction of a syntax error.
- # We are given three tokens and their types:
- # ($tokenl, $typel) is the token to the left of the space in question
- # ($tokenr, $typer) is the token to the right of the space in question
- # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
- #
- # This is a slow routine but is not needed too often except when -mangle
- # is used.
- #
- # Note: This routine should almost never need to be changed. It is
- # for avoiding syntax problems rather than for formatting.
- my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
-
- my $result =
-
- # never combine two bare words or numbers
- # examples: and ::ok(1)
- # return ::spw(...)
- # for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- # $input eq"quit" to make $inputeq"quit"
- # my $size=-s::SINK if $file; <==OK but we won't do it
- # don't join something like: for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
- && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
-
- # do not combine a number with a concatenation dot
- # example: pom.caputo:
- # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
- || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
- || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
-
- # do not join a minus with a bare word, because you might form
- # a file test operator. Example from Complex.pm:
- # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
- || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
+ DEBUG_WELD
+ && print
+"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
+ return ($excess_length);
+}
- # do not join a bare word with a minus, like between 'Send' and
- # '-recipients' here <<snippets/space3.in>>
- # my $msg = new Fax::Send
- # -recipients => $to,
- # -data => $data;
- # This is the safest thing to do. If we had the token to the right of
- # the minus we could do a better check.
- || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) )
+sub weld_nested_containers {
+ my ($self) = @_;
- # and something like this could become ambiguous without space
- # after the '-':
- # use constant III=>1;
- # $a = $b - III;
- # and even this:
- # $a = - III;
- || ( ( $tokenl eq '-' )
- && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
+ # Called once per file for option '--weld-nested-containers'
- # '= -' should not become =- or you will get a warning
- # about reversed -=
- # || ($tokenr eq '-')
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
- # keep a space between a quote and a bareword to prevent the
- # bareword from becoming a quote modifier.
- || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+ # This routine implements the -wn flag by "welding together"
+ # the nested closing and opening tokens which were previously
+ # identified by sub 'find_nested_pairs'. "welding" simply
+ # involves setting certain hash values which will be checked
+ # later during formatting.
- # keep a space between a token ending in '$' and any word;
- # this caused trouble: "die @$ if $@"
- || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
- && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
- # perl is very fussy about spaces before <<
- || ( $tokenr =~ /^\<\</ )
+ # Find nested pairs of container tokens for any welding.
+ my $rnested_pairs = $self->find_nested_pairs();
- # avoid combining tokens to create new meanings. Example:
- # $a+ +$b must not become $a++$b
- || ( $is_digraph{ $tokenl . $tokenr } )
- || ( $is_trigraph{ $tokenl . $tokenr } )
+ # Return unless there are nested pairs to weld
+ return unless defined($rnested_pairs) && @{$rnested_pairs};
- # another example: do not combine these two &'s:
- # allow_options & &OPT_EXECCGI
- || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
+ my $rOpts_break_at_old_method_breakpoints =
+ $rOpts->{'break-at-old-method-breakpoints'};
- # don't combine $$ or $# with any alphanumeric
- # (testfile mangle.t with --mangle)
- || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
+ # This array will hold the sequence numbers of the tokens to be welded.
+ my @welds;
- # retain any space after possible filehandle
- # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
- || ( $typel eq 'Z' )
+ # Variables needed for estimating line lengths
+ my $maximum_text_length; # maximum spaces available for text
+ my $starting_lentot; # cumulative text to start of current line
- # Perl is sensitive to whitespace after the + here:
- # $b = xvals $a + 0.1 * yvals $a;
- || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
+ my $iline_outer_opening = -1;
+ my $weld_count_this_start = 0;
- # keep paren separate in 'use Foo::Bar ()'
- || ( $tokenr eq '('
- && $typel eq 'w'
- && $typell eq 'k'
- && $tokenll eq 'use' )
+ my $multiline_tol =
+ 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
- # keep any space between filehandle and paren:
- # file mangle.t with --mangle:
- || ( $typel eq 'Y' && $tokenr eq '(' )
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = defined($KK)
+ && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
+ return $lentot;
+ };
- # retain any space after here doc operator ( hereerr.t)
- || ( $typel eq 'h' )
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = defined($KK)
+ && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
+ return $lentot;
+ };
- # be careful with a space around ++ and --, to avoid ambiguity as to
- # which token it applies
- || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
- || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
+ # Abbreviations:
+ # _oo=outer opening, i.e. first of { {
+ # _io=inner opening, i.e. second of { {
+ # _oc=outer closing, i.e. second of } {
+ # _ic=inner closing, i.e. first of } }
- # need space after foreach my; for example, this will fail in
- # older versions of Perl:
- # foreach my$ft(@filetypes)...
- || (
- $tokenl eq 'my'
+ my $previous_pair;
- # /^(for|foreach)$/
- && $is_for_foreach{$tokenll}
- && $tokenr =~ /^\$/
- )
+ # Main loop over nested pairs...
+ # We are working from outermost to innermost pairs so that
+ # level changes will be complete when we arrive at the inner pairs.
+ while ( my $item = pop( @{$rnested_pairs} ) ) {
+ my ( $inner_seqno, $outer_seqno ) = @{$item};
- # must have space between grep and left paren; "grep(" will fail
- || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $Kinner_opening = $K_opening_container->{$inner_seqno};
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $K_closing_container->{$inner_seqno};
- # don't stick numbers next to left parens, as in:
- #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
- || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
+ # RULE: do not weld if inner container has <= 3 tokens unless the next
+ # token is a heredoc (so we know there will be multiple lines)
+ if ( $Kinner_closing - $Kinner_opening <= 4 ) {
+ my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
+ next unless defined($Knext_nonblank);
+ my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
+ next unless ( $type eq 'h' );
+ }
- # We must be sure that a space between a ? and a quoted string
- # remains if the space before the ? remains. [Loca.pm, lockarea]
- # ie,
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma?',' : ':', @_; # ok!
- # $b=join $comma ?',' : ':', @_; # error!
- # Not really required:
- ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
+ my $outer_opening = $rLL->[$Kouter_opening];
+ my $inner_opening = $rLL->[$Kinner_opening];
+ my $outer_closing = $rLL->[$Kouter_closing];
+ my $inner_closing = $rLL->[$Kinner_closing];
- # do not remove space between an '&' and a bare word because
- # it may turn into a function evaluation, like here
- # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
- # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
- || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+ # RULE: do not weld to a hash brace. The reason is that it has a very
+ # strong bond strength to the next token, so a line break after it
+ # may not work. Previously we allowed welding to something like @{
+ # but that caused blinking states (cases b751, b779).
+ if ( $inner_opening->[_TYPE_] eq 'L' ) {
+ next;
+ }
- # space stacked labels (TODO: check if really necessary)
- || ( $typel eq 'J' && $typer eq 'J' )
+ # RULE: do not weld to a square bracket which does not contain commas
+ if ( $inner_opening->[_TYPE_] eq '[' ) {
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
+ next unless ($rtype_count);
+ my $comma_count = $rtype_count->{','};
+ next unless ($comma_count);
- ; # the value of this long logic sequence is the result we want
-##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
- return $result;
- }
-}
+ # Do not weld if there is text before a '[' such as here:
+ # curr_opt ( @beg [2,5] )
+ # It will not break into the desired sandwich structure.
+ # This fixes case b109, 110.
+ my $Kdiff = $Kinner_opening - $Kouter_opening;
+ next if ( $Kdiff > 2 );
+ next
+ if ( $Kdiff == 2
+ && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
-{
- my %secret_operators;
- my %is_leading_secret_token;
+ }
- BEGIN {
+ # Set flag saying if this pair starts a new weld
+ my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
- # token lists for perl secret operators as compiled by Philippe Bruhat
- # at: https://metacpan.org/module/perlsecret
- %secret_operators = (
- 'Goatse' => [qw#= ( ) =#], #=( )=
- 'Venus1' => [qw#0 +#], # 0+
- 'Venus2' => [qw#+ 0#], # +0
- 'Enterprise' => [qw#) x ! !#], # ()x!!
- 'Kite1' => [qw#~ ~ <>#], # ~~<>
- 'Kite2' => [qw#~~ <>#], # ~~<>
- 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
- 'Bang bang ' => [qw#! !#], # !!
- );
+ # Set flag saying if this pair is adjacent to the previous nesting pair
+ # (even if previous pair was rejected as a weld)
+ my $touch_previous_pair =
+ defined($previous_pair) && $outer_seqno == $previous_pair->[0];
+ $previous_pair = $item;
- # The following operators and constants are not included because they
- # are normally kept tight by perltidy:
- # ~~ <~>
- #
+ my $do_not_weld_rule = 0;
+ my $Msg = "";
+ my $is_one_line_weld;
- # Make a lookup table indexed by the first token of each operator:
- # first token => [list, list, ...]
- foreach my $value ( values(%secret_operators) ) {
- my $tok = $value->[0];
- push @{ $is_leading_secret_token{$tok} }, $value;
+ my $iline_oo = $outer_opening->[_LINE_INDEX_];
+ my $iline_io = $inner_opening->[_LINE_INDEX_];
+ my $iline_ic = $inner_closing->[_LINE_INDEX_];
+ my $iline_oc = $outer_closing->[_LINE_INDEX_];
+ my $token_oo = $outer_opening->[_TOKEN_];
+
+ my $is_multiline_weld =
+ $iline_oo == $iline_io
+ && $iline_ic == $iline_oc
+ && $iline_io != $iline_ic;
+
+ if (DEBUG_WELD) {
+ my $token_io = $rLL->[$Kinner_opening]->[_TOKEN_];
+ my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
+ my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
+ $Msg .= <<EOM;
+Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
+Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
+tokens '$token_oo' .. '$token_io'
+EOM
}
- }
- sub new_secret_operator_whitespace {
+ # If this pair is not adjacent to the previous pair (skipped or not),
+ # then measure lengths from the start of line of oo.
+ if (
+ !$touch_previous_pair
- my ( $rlong_array, $rwhitespace_flags ) = @_;
+ # Also do this if restarting at a new line; fixes case b965, s001
+ || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
+ )
+ {
- # Loop over all tokens in this line
- my ( $token, $type );
- my $jmax = @{$rlong_array} - 1;
- foreach my $j ( 0 .. $jmax ) {
+ # Remember the line we are using as a reference
+ $iline_outer_opening = $iline_oo;
+ $weld_count_this_start = 0;
- $token = $rlong_array->[$j]->[_TOKEN_];
- $type = $rlong_array->[$j]->[_TYPE_];
+ ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
+ = $self->setup_new_weld_measurements( $Kouter_opening,
+ $Kinner_opening );
- # Skip unless this token might start a secret operator
- next if ( $type eq 'b' );
- next unless ( $is_leading_secret_token{$token} );
+ if (
+ !$new_weld_ok
+ && ( $iline_oo != $iline_io
+ || $iline_ic != $iline_oc )
+ )
+ {
+ if (DEBUG_WELD) { print $msg}
+ next;
+ }
- # Loop over all secret operators with this leading token
- foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
- my $jend = $j - 1;
- foreach my $tok ( @{$rpattern} ) {
- $jend++;
- $jend++
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
- if ( $jend <= $jmax
- && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
- if ( $jend > $jmax
- || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
+ # An existing one-line weld is a line in which
+ # (1) the containers are all on one line, and
+ # (2) the line does not exceed the allowable length, and
+ # This flag is used to avoid creating blinkers.
+ # FIX1: Changed 'excess_length_to_K' to 'excess_length_of_line'
+ # to get exact lengths and fix b604 b605.
+ if ( $iline_oo == $iline_oc ) {
+
+ # All the tokens are on one line, now check their length
+ my $excess =
+ $self->excess_line_length_for_Krange( $Kfirst, $Klast );
+ if ( $excess <= 0 ) {
+
+ # All tokens are on one line and fit. This is a valid
+ # existing one-line weld except for some edge cases
+ # involving -lp:
+
+ # FIX2: Patch for b1114: add a tolerance of one level if
+ # this line has an unbalanced start. This helps prevent
+ # blinkers in unusual cases for lines near the length limit
+ # by making it more likely that RULE 2 will prevent a weld.
+ # FIX3: for b1131: only use level difference in -lp mode.
+ # FIX4: for b1141, b1142: reduce the tolerance for longer
+ # leading tokens
+ if ( $rOpts_line_up_parentheses
+ && $outer_opening->[_LEVEL_] -
+ $rLL->[$Kfirst]->[_LEVEL_] )
{
- $jend = undef;
- last;
- }
- }
-
- if ($jend) {
- # set flags to prevent spaces within this operator
- foreach my $jj ( $j + 1 .. $jend ) {
- $rwhitespace_flags->[$jj] = WS_NO;
+ # We only need a tolerance if the leading text before
+ # the first opening token is shorter than the
+ # indentation length. For simplicity we just use the
+ # length of the first token here. If necessary, we
+ # could be more exact in the future and find the
+ # total length up to the first opening token.
+ # See cases b1114, b1141, b1142.
+ my $tolx = max( 0,
+ $rOpts_indent_columns -
+ $rLL->[$Kfirst]->[_TOKEN_LENGTH_] );
+
+ if ( $excess + $tolx <= 0 ) {
+ $is_one_line_weld = 1;
+ }
+ }
+ else {
+ $is_one_line_weld = 1;
}
- $j = $jend;
- last;
}
- } ## End Loop over all operators
- } ## End loop over all tokens
- return;
- } # End sub
-}
+ }
-{ # begin print_line_of_tokens
+ # DO-NOT-WELD RULE 1:
+ # Do not weld something that looks like the start of a two-line
+ # function call, like this: <<snippets/wn6.in>>
+ # $trans->add_transformation(
+ # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
+ # We will look for a semicolon after the closing paren.
- my $rinput_token_array; # Current working array
- my $rinput_K_array; # Future working array
+ # We want to weld something complex, like this though
+ # my $compass = uc( opposite_direction( line_to_canvas_direction(
+ # @{ $coords[0] }, @{ $coords[1] } ) ) );
+ # Otherwise we will get a 'blinker'. For example, the following
+ # would become a blinker without this rule:
+ # $Self->_Add( $SortOrderDisplay{ $Field
+ # ->GenerateFieldForSelectSQL() } );
+ # But it is okay to weld a two-line statement if it looks like
+ # it was already welded, meaning that the two opening containers are
+ # on a different line that the two closing containers. This is
+ # necessary to prevent blinking of something like this with
+ # perltidy -wn -pbp (starting indentation two levels deep):
+
+ # $top_label->set_text( gettext(
+ # "Unable to create personal directory - check permissions.") );
+
+ if ( $iline_oc == $iline_oo + 1
+ && $iline_io == $iline_ic
+ && $token_oo eq '(' )
+ {
- my $in_quote;
- my $guessed_indentation_level;
+ # Look for following semicolon...
+ my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
+ my $next_nonblank_type =
+ defined($Knext_nonblank)
+ ? $rLL->[$Knext_nonblank]->[_TYPE_]
+ : 'b';
+ if ( $next_nonblank_type eq ';' ) {
- # This should be a return variable from extract_token
- # These local token variables are stored by store_token_to_go:
- my $Ktoken_vars;
- my $block_type;
- my $ci_level;
- my $container_environment;
- my $container_type;
- my $in_continued_quote;
- my $level;
- my $no_internal_newlines;
- my $slevel;
- my $token;
- my $type;
- my $type_sequence;
+ # Then do not weld if no other containers between inner
+ # opening and closing.
+ my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
+ if ( $Knext_seq_item == $Kinner_closing ) {
+ $do_not_weld_rule = 1;
+ }
+ }
+ }
+ } ## end starting new weld sequence
- # routine to pull the jth token from the line of tokens
- sub extract_token {
- my ( $self, $j ) = @_;
+ # DO-NOT-WELD RULE 2:
+ # Do not weld an opening paren to an inner one line brace block
+ # We will just use old line numbers for this test and require
+ # iterations if necessary for convergence
- my $rLL = $self->{rLL};
- $Ktoken_vars = $rinput_K_array->[$j];
- if ( !defined($Ktoken_vars) ) {
+ # For example, otherwise we could cause the opening paren
+ # in the following example to separate from the caller name
+ # as here:
- # Shouldn't happen: an error here would be due to a recent program change
- Fault("undefined index K for j=$j");
- }
- my $rtoken_vars = $rLL->[$Ktoken_vars];
+ # $_[0]->code_handler
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
- if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
+ # Here is another example where we do not want to weld:
+ # $wrapped->add_around_modifier(
+ # sub { push @tracelog => 'around 1'; $_[0]->(); } );
- # Shouldn't happen: an error here would be due to a recent program change
- Fault(<<EOM);
- j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
-EOM
- }
-
- #########################################################
- # these are now redundant and can eventually be eliminated
-
- $token = $rtoken_vars->[_TOKEN_];
- $type = $rtoken_vars->[_TYPE_];
- $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
- $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- $level = $rtoken_vars->[_LEVEL_];
- $slevel = $rtoken_vars->[_SLEVEL_];
- $ci_level = $rtoken_vars->[_CI_LEVEL_];
- #########################################################
+ # If the one line sub block gets broken due to length or by the
+ # user, then we can weld. The result will then be:
+ # $wrapped->add_around_modifier( sub {
+ # push @tracelog => 'around 1';
+ # $_[0]->();
+ # } );
- return;
- }
+ # Updated to fix cases b1082 b1102 b1106 b1115:
+ # Also, do not weld to an intact inner block if the outer opening token
+ # is on a different line. For example, this prevents oscillation
+ # between these two states in case b1106:
- {
- my @saved_token;
+ # return map{
+ # ($_,[$self->$_(@_[1..$#_])])
+ # }@every;
- sub save_current_token {
+ # return map { (
+ # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
+ # ) } @every;
- @saved_token = (
- $block_type, $ci_level,
- $container_environment, $container_type,
- $in_continued_quote, $level,
- $no_internal_newlines, $slevel,
- $token, $type,
- $type_sequence, $Ktoken_vars,
- );
- return;
- }
+ # The effect of this change on typical code is very minimal. Sometimes
+ # it may take a second iteration to converge, but this gives protection
+ # against blinking.
- sub restore_current_token {
- (
- $block_type, $ci_level,
- $container_environment, $container_type,
- $in_continued_quote, $level,
- $no_internal_newlines, $slevel,
- $token, $type,
- $type_sequence, $Ktoken_vars,
- ) = @saved_token;
- return;
+ if ( !$do_not_weld_rule
+ && !$is_one_line_weld
+ && $iline_ic == $iline_io )
+ {
+ $do_not_weld_rule = 2
+ if ( $token_oo eq '(' || $iline_oo != $iline_io );
}
- }
- sub token_length {
+ # DO-NOT-WELD RULE 3:
+ # Do not weld if this makes our line too long.
+ # Use a tolerance which depends on if the old tokens were welded
+ # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
+ if ( !$do_not_weld_rule ) {
+
+ # Measure to a little beyond the inner opening token if it is
+ # followed by a bare word, which may have unusual line break rules.
+
+ # NOTE: Originally this was OLD RULE 6: do not weld to a container
+ # which is followed on the same line by an unknown bareword token.
+ # This can cause blinkers (cases b626, b611). But OK to weld one
+ # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
+ # has been merged into RULE 3 here to also fix cases b1078 b1091.
+
+ my $K_for_length = $Kinner_opening;
+ my $Knext_io = $self->K_next_nonblank($Kinner_opening);
+ next unless ( defined($Knext_io) ); # shouldn't happen
+ my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
+
+ # Note: may need to eventually also include other types here,
+ # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
+ if ( $type_io_next eq 'w' ) {
+ my $Knext_io2 = $self->K_next_nonblank($Knext_io);
+ next unless ( defined($Knext_io2) );
+ my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
+ if ( !$type_ok_after_bareword{$type_io_next2} ) {
+ $K_for_length = $Knext_io2;
+ }
+ }
- # Returns the length of a token, given:
- # $token=text of the token
- # $type = type
- # $not_first_token = should be TRUE if this is not the first token of
- # the line. It might the index of this token in an array. It is
- # used to test for a side comment vs a block comment.
- # Note: Eventually this should be the only routine determining the
- # length of a token in this package.
- my ( $token, $type, $not_first_token ) = @_;
- my $token_length = length($token);
+ # Use a tolerance for welds over multiple lines to avoid blinkers.
+ # We can use zero tolerance if it looks like we are working on an
+ # existing weld.
+ my $tol =
+ $is_one_line_weld || $is_multiline_weld
+ ? 0
+ : $multiline_tol;
- # We mark lengths of side comments as just 1 if we are
- # ignoring their lengths when setting line breaks.
- $token_length = 1
- if ( $rOpts_ignore_side_comment_lengths
- && $not_first_token
- && $type eq '#' );
- return $token_length;
- }
+ # By how many characters does this exceed the text window?
+ my $excess =
+ $self->cumulative_length_before_K($K_for_length) -
+ $starting_lentot + 1 + $tol -
+ $maximum_text_length;
- sub rtoken_length {
+ # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
+ # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
+ # Revised patch: New tolerance definition allows going back to '> 0'
+ # here. This fixes case b1124. See also cases b1087 and b1087a.
+ if ( $excess > 0 ) { $do_not_weld_rule = 3 }
- # return length of ith token in @{$rtokens}
- my ($i) = @_;
- return token_length( $rinput_token_array->[$i]->[_TOKEN_],
- $rinput_token_array->[$i]->[_TYPE_], $i );
- }
+ if (DEBUG_WELD) {
+ $Msg .=
+"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
+ }
+ }
- # Routine to place the current token into the output stream.
- # Called once per output token.
- sub store_token_to_go {
+ # DO-NOT-WELD RULE 4; implemented for git#10:
+ # Do not weld an opening -ce brace if the next container is on a single
+ # line, different from the opening brace. (This is very rare). For
+ # example, given the following with -ce, we will avoid joining the {
+ # and [
- my ( $self, $side_comment_follows ) = @_;
+ # } else {
+ # [ $_, length($_) ]
+ # }
- my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
+ # because this would produce a terminal one-line block:
- ++$max_index_to_go;
- $K_to_go[$max_index_to_go] = $Ktoken_vars;
- $tokens_to_go[$max_index_to_go] = $token;
- $types_to_go[$max_index_to_go] = $type;
- $nobreak_to_go[$max_index_to_go] = $flag;
- $old_breakpoint_to_go[$max_index_to_go] = 0;
- $forced_breakpoint_to_go[$max_index_to_go] = 0;
- $block_type_to_go[$max_index_to_go] = $block_type;
- $type_sequence_to_go[$max_index_to_go] = $type_sequence;
- $container_environment_to_go[$max_index_to_go] = $container_environment;
- $ci_levels_to_go[$max_index_to_go] = $ci_level;
- $mate_index_to_go[$max_index_to_go] = -1;
- $bond_strength_to_go[$max_index_to_go] = 0;
-
- # Note: negative levels are currently retained as a diagnostic so that
- # the 'final indentation level' is correctly reported for bad scripts.
- # But this means that every use of $level as an index must be checked.
- # If this becomes too much of a problem, we might give up and just clip
- # them at zero.
- ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
- $levels_to_go[$max_index_to_go] = $level;
- $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
-
- # link the non-blank tokens
- my $iprev = $max_index_to_go - 1;
- $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
- $iprev_to_go[$max_index_to_go] = $iprev;
- $inext_to_go[$iprev] = $max_index_to_go
- if ( $iprev >= 0 && $type ne 'b' );
- $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
-
- $token_lengths_to_go[$max_index_to_go] =
- token_length( $token, $type, $max_index_to_go );
+ # } else { [ $_, length($_) ] }
- # We keep a running sum of token lengths from the start of this batch:
- # summed_lengths_to_go[$i] = total length to just before token $i
- # summed_lengths_to_go[$i+1] = total length to just after token $i
- $summed_lengths_to_go[ $max_index_to_go + 1 ] =
- $summed_lengths_to_go[$max_index_to_go] +
- $token_lengths_to_go[$max_index_to_go];
+ # which may not be what is desired. But given this input:
- # Define the indentation that this token would have if it started
- # a new line. We have to do this now because we need to know this
- # when considering one-line blocks.
- set_leading_whitespace( $level, $ci_level, $in_continued_quote );
+ # } else { [ $_, length($_) ] }
- # remember previous nonblank tokens seen
- if ( $type ne 'b' ) {
- $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
- $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
- $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
- $last_nonblank_index_to_go = $max_index_to_go;
- $last_nonblank_type_to_go = $type;
- $last_nonblank_token_to_go = $token;
- if ( $type eq ',' ) {
- $comma_count_in_batch++;
+ # then we will do the weld and retain the one-line block
+ if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
+ my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
+ if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
+ my $io_line = $inner_opening->[_LINE_INDEX_];
+ my $ic_line = $inner_closing->[_LINE_INDEX_];
+ my $oo_line = $outer_opening->[_LINE_INDEX_];
+ if ( $oo_line < $io_line && $ic_line == $io_line ) {
+ $do_not_weld_rule = 4;
+ }
}
}
- FORMATTER_DEBUG_FLAG_STORE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
- };
- return;
- }
+ # DO-NOT-WELD RULE 5: do not include welds excluded by user
+ if (
+ !$do_not_weld_rule
+ && %weld_nested_exclusion_rules
+ && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
+ || $self->is_excluded_weld( $Kinner_opening, 0 ) )
+ )
+ {
+ $do_not_weld_rule = 5;
+ }
- sub copy_hash {
- my ($rold_token_hash) = @_;
- my %new_token_hash =
- map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash};
- return \%new_token_hash;
- }
+ # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
- sub copy_array {
- my ($rold) = @_;
- my @new = map { $_ } @{$rold};
- return \@new;
- }
+ # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
+ # (case b973)
+ if ( !$do_not_weld_rule
+ && $rOpts_break_at_old_method_breakpoints
+ && $iline_io > $iline_oo )
+ {
- sub copy_token_as_type {
- my ( $rold_token, $type, $token ) = @_;
- if ( $type eq 'b' ) {
- $token = " " unless defined($token);
- }
- elsif ( $type eq 'q' ) {
- $token = '' unless defined($token);
+ foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ next unless defined($rK_range);
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+ if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
+ $do_not_weld_rule = 7;
+ last;
+ }
+ }
}
- elsif ( $type eq '->' ) {
- $token = '->' unless defined($token);
+
+ if ($do_not_weld_rule) {
+
+ # After neglecting a pair, we start measuring from start of point io
+ my $starting_level = $inner_opening->[_LEVEL_];
+ my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
+ $starting_lentot =
+ $self->cumulative_length_before_K($Kinner_opening);
+ $maximum_text_length =
+ $maximum_text_length_at_level[$starting_level] -
+ $starting_ci_level * $rOpts_continuation_indentation;
+
+ if (DEBUG_WELD) {
+ $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
+ print $Msg;
+ }
+
+ # Normally, a broken pair should not decrease indentation of
+ # intermediate tokens:
+ ## if ( $last_pair_broken ) { next }
+ # However, for long strings of welded tokens, such as '{{{{{{...'
+ # we will allow broken pairs to also remove indentation.
+ # This will keep very long strings of opening and closing
+ # braces from marching off to the right. We will do this if the
+ # number of tokens in a weld before the broken weld is 4 or more.
+ # This rule will mainly be needed for test scripts, since typical
+ # welds have fewer than about 4 welded tokens.
+ if ( !@welds || @{ $welds[-1] } < 4 ) { next }
}
- elsif ( $type eq ';' ) {
- $token = ';' unless defined($token);
+
+ # otherwise start new weld ...
+ elsif ($starting_new_weld) {
+ $weld_count_this_start++;
+ if (DEBUG_WELD) {
+ $Msg .= "Starting new weld\n";
+ print $Msg;
+ }
+ push @welds, $item;
+
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
}
+
+ # ... or extend current weld
else {
- Fault(
-"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
- );
+ $weld_count_this_start++;
+ if (DEBUG_WELD) {
+ $Msg .= "Extending current weld\n";
+ print $Msg;
+ }
+ unshift @{ $welds[-1] }, $inner_seqno;
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
}
- my $rnew_token = copy_array($rold_token);
- $rnew_token->[_TYPE_] = $type;
- $rnew_token->[_TOKEN_] = $token;
- $rnew_token->[_BLOCK_TYPE_] = '';
- $rnew_token->[_CONTAINER_TYPE_] = '';
- $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
- $rnew_token->[_TYPE_SEQUENCE_] = '';
- return $rnew_token;
- }
- sub boolean_equals {
- my ( $val1, $val2 ) = @_;
- return ( $val1 && $val2 || !$val1 && !$val2 );
+ # After welding, reduce the indentation level if all intermediate tokens
+ my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
+ if ( $dlevel != 0 ) {
+ my $Kstart = $Kinner_opening;
+ my $Kstop = $Kinner_closing;
+ for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
+ $rLL->[$KK]->[_LEVEL_] += $dlevel;
+ }
+
+ # Copy opening ci level to help break at = for -lp mode (case b1124)
+ $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
+ $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+
+ # But do not copy the closing ci level ... it can give poor results
+ ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
+ ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
+ }
}
- sub print_line_of_tokens {
+ return;
+}
- my ( $self, $line_of_tokens ) = @_;
+sub weld_nested_quotes {
- # This routine is called once per input line to process all of
- # the tokens on that line. This is the first stage of
- # beautification.
- #
- # Full-line comments and blank lines may be processed immediately.
- #
- # For normal lines of code, the tokens are stored one-by-one,
- # via calls to 'sub store_token_to_go', until a known line break
- # point is reached. Then, the batch of collected tokens is
- # passed along to 'sub output_line_to_go' for further
- # processing. This routine decides if there should be
- # whitespace between each pair of non-white tokens, so later
- # routines only need to decide on any additional line breaks.
- # Any whitespace is initially a single space character. Later,
- # the vertical aligner may expand that to be multiple space
- # characters if necessary for alignment.
-
- $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- my $CODE_type = $line_of_tokens->{_code_type};
+ # Called once per file for option '--weld-nested-containers'. This
+ # does welding on qw quotes.
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $K_first, $K_last ) = @{$rK_range};
+ my $self = shift;
- my $rLL = $self->{rLL};
- my $rbreak_container = $self->{rbreak_container};
- my $rshort_nested = $self->{rshort_nested};
+ # See if quotes are excluded from welding
+ my $rflags = $weld_nested_exclusion_rules{'q'};
+ return if ( defined($rflags) && defined( $rflags->[1] ) );
- if ( !defined($K_first) ) {
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
- # Empty line: This can happen if tokens are deleted, for example
- # with the -mangle parameter
- return;
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
- $no_internal_newlines = 1 - $rOpts_add_newlines;
- my $is_comment =
- ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
- my $is_static_block_comment_without_leading_space =
- $CODE_type eq 'SBCX';
- $is_static_block_comment =
- $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
- my $is_hanging_side_comment = $CODE_type eq 'HSC';
- my $is_VERSION_statement = $CODE_type eq 'VER';
- if ($is_VERSION_statement) {
- $saw_VERSION_in_this_file = 1;
- $no_internal_newlines = 1;
- }
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rlines = $self->[_rlines_];
- # Add interline blank if any
- my $last_old_nonblank_type = "b";
- my $first_new_nonblank_type = "b";
- my $first_new_nonblank_token = " ";
- if ( $max_index_to_go >= 0 ) {
- $last_old_nonblank_type = $types_to_go[$max_index_to_go];
- $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
- $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
- if ( !$is_comment
- && $types_to_go[$max_index_to_go] ne 'b'
- && $K_first > 0
- && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
- {
- $K_first -= 1;
- }
+ my $starting_lentot;
+ my $maximum_text_length;
+
+ my $is_single_quote = sub {
+ my ( $Kbeg, $Kend, $quote_type ) = @_;
+ foreach my $K ( $Kbeg .. $Kend ) {
+ my $test_type = $rLL->[$K]->[_TYPE_];
+ next if ( $test_type eq 'b' );
+ return if ( $test_type ne $quote_type );
}
+ return 1;
+ };
- # Copy the tokens into local arrays
- $rinput_token_array = [];
- $rinput_K_array = [];
- $rinput_K_array = [ ( $K_first .. $K_last ) ];
- $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
- my $jmax = @{$rinput_K_array} - 1;
+ # Length tolerance - same as previously used for sub weld_nested
+ my $multiline_tol =
+ 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
- $in_continued_quote = $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- $in_quote = $line_of_tokens->{_ending_in_quote};
- $ending_in_quote = $in_quote;
- $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
+ # look for single qw quotes nested in containers
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$outer_seqno ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- my $j_next;
- my $next_nonblank_token;
- my $next_nonblank_token_type;
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $outer_seqno not defined at K=$KK");
+ }
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
+ my $token = $rtoken_vars->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
- ######################################
- # Handle a block (full-line) comment..
- ######################################
- if ($is_comment) {
+ # see if the next token is a quote of some type
+ my $Kn = $KK + 1;
+ $Kn += 1
+ if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ next unless ( $Kn < $Num );
- if ( $rOpts->{'tee-block-comments'} ) {
- $file_writer_object->tee_on();
- }
+ my $next_token = $rLL->[$Kn]->[_TOKEN_];
+ my $next_type = $rLL->[$Kn]->[_TYPE_];
+ next
+ unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
+ && $next_token =~ /^q/ );
- destroy_one_line_block();
- $self->output_line_to_go();
+ # The token before the closing container must also be a quote
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
+ next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
- # output a blank line before block comments
- if (
- # unless we follow a blank or comment line
- $last_line_leading_type !~ /^[#b]$/
+ # This is an inner opening container
+ my $Kinner_opening = $Kn;
- # only if allowed
- && $rOpts->{'blanks-before-comments'}
+ # Do not weld to single-line quotes. Nothing is gained, and it may
+ # look bad.
+ next if ( $Kinner_closing == $Kinner_opening );
- # if this is NOT an empty comment line
- && $rinput_token_array->[0]->[_TOKEN_] ne '#'
+ # Only weld to quotes delimited with container tokens. This is
+ # because welding to arbitrary quote delimiters can produce code
+ # which is less readable than without welding.
+ my $closing_delimiter =
+ substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
+ next
+ unless ( $is_closing_token{$closing_delimiter}
+ || $closing_delimiter eq '>' );
- # not after a short line ending in an opening token
- # because we already have space above this comment.
- # Note that the first comment in this if block, after
- # the 'if (', does not get a blank line because of this.
- && !$last_output_short_opening_token
+ # Now make sure that there is just a single quote in the container
+ next
+ unless (
+ $is_single_quote->(
+ $Kinner_opening + 1,
+ $Kinner_closing - 1,
+ $next_type
+ )
+ );
- # never before static block comments
- && !$is_static_block_comment
- )
- {
- $self->flush(); # switching to new output stream
- $file_writer_object->write_blank_code_line();
- $last_line_leading_type = 'b';
+ # OK: This is a candidate for welding
+ my $Msg = "";
+ my $do_not_weld;
+
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
+ my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
+ my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
+ my $is_old_weld =
+ ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
+
+ # If welded, the line must not exceed allowed line length
+ ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
+ = $self->setup_new_weld_measurements( $Kouter_opening,
+ $Kinner_opening );
+ if ( !$ok_to_weld ) {
+ if (DEBUG_WELD) { print $msg}
+ next;
}
- # TRIM COMMENTS -- This could be turned off as a option
- $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
+ my $length =
+ $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
+ my $excess = $length + $multiline_tol - $maximum_text_length;
- if (
- $rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
- || $input_line =~ /^\s+/ )
- && !$is_static_block_comment_without_leading_space
- )
- {
- $self->extract_token(0);
- $self->store_token_to_go();
- $self->output_line_to_go();
+ my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
+ if ( $excess >= $excess_max ) {
+ $do_not_weld = 1;
}
- else {
- $self->flush(); # switching to new output stream
- $file_writer_object->write_code_line(
- $rinput_token_array->[0]->[_TOKEN_] . "\n" );
- $last_line_leading_type = '#';
+
+ if (DEBUG_WELD) {
+ if ( !$is_old_weld ) { $is_old_weld = "" }
+ $Msg .=
+"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
}
- if ( $rOpts->{'tee-block-comments'} ) {
- $file_writer_object->tee_off();
+
+ # Check weld exclusion rules for outer container
+ if ( !$do_not_weld ) {
+ my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
+ if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
+ if (DEBUG_WELD) {
+ $Msg .=
+"No qw weld due to weld exclusion rules for outer container\n";
+ }
+ $do_not_weld = 1;
+ }
}
- return;
- }
- # compare input/output indentation except for continuation lines
- # (because they have an unknown amount of initial blank space)
- # and lines which are quotes (because they may have been outdented)
- my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
- compare_indentation_levels( $guessed_indentation_level,
- $structural_indentation_level )
- unless ( $is_hanging_side_comment
- || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0
- && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
+ # Check the length of the last line (fixes case b1039)
+ if ( !$do_not_weld ) {
+ my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
+ my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
+ my $excess_ic =
+ $self->excess_line_length_for_Krange( $Kfirst_ic,
+ $Kouter_closing );
+
+ # Allow extra space for additional welded closing container(s)
+ # and a space and comma or semicolon.
+ # NOTE: weld len has not been computed yet. Use 2 spaces
+ # for now, correct for a single weld. This estimate could
+ # be made more accurate if necessary.
+ my $weld_len =
+ defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
+ if ( $excess_ic + $weld_len + 2 > 0 ) {
+ if (DEBUG_WELD) {
+ $Msg .=
+"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
+ }
+ $do_not_weld = 1;
+ }
+ }
- ##########################
- # Handle indentation-only
- ##########################
+ if ($do_not_weld) {
+ if (DEBUG_WELD) {
+ $Msg .= "Not Welding QW\n";
+ print $Msg;
+ }
+ next;
+ }
- # NOTE: In previous versions we sent all qw lines out immediately here.
- # No longer doing this: also write a line which is entirely a 'qw' list
- # to allow stacking of opening and closing tokens. Note that interior
- # qw lines will still go out at the end of this routine.
- if ( $CODE_type eq 'IO' ) {
- $self->flush();
- my $line = $input_line;
+ # OK to weld
+ if (DEBUG_WELD) {
+ $Msg .= "Welding QW\n";
+ print $Msg;
+ }
- # delete side comments if requested with -io, but
- # we will not allow deleting of closing side comments with -io
- # because the coding would be more complex
- if ( $rOpts->{'delete-side-comments'}
- && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
+
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+
+ # Undo one indentation level if an extra level was added to this
+ # multiline quote
+ my $qw_seqno =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
+ if ( $qw_seqno
+ && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
{
+ foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
+ $rLL->[$K]->[_LEVEL_] -= 1;
+ }
+ $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
+ $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
+ }
- $line = "";
- foreach my $jj ( 0 .. $jmax - 1 ) {
- $line .= $rinput_token_array->[$jj]->[_TOKEN_];
+ # undo CI for other welded quotes
+ else {
+
+ foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
+ $rLL->[$K]->[_CI_LEVEL_] = 0;
}
}
- # Fix for rt #125506 Unexpected string formating
- # in which leading space of a terminal quote was removed
- $line =~ s/\s+$//;
- $line =~ s/^\s+// unless ($in_continued_quote);
-
- $self->extract_token(0);
- $token = $line;
- $type = 'q';
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
- $self->store_token_to_go();
- $self->output_line_to_go();
- return;
+ # Change the level of a closing qw token to be that of the outer
+ # containing token. This will allow -lp indentation to function
+ # correctly in the vertical aligner.
+ # Patch to fix c002: but not if it contains text
+ if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
+ $rLL->[$Kinner_closing]->[_LEVEL_] =
+ $rLL->[$Kouter_closing]->[_LEVEL_];
+ }
}
+ }
+ return;
+}
- ############################
- # Handle all other lines ...
- ############################
+sub is_welded_right_at_i {
+ my ( $self, $i ) = @_;
+ return unless ( $total_weld_count && $i >= 0 );
- #######################################################
- # FIXME: this should become unnecessary
- # making $j+2 valid simplifies coding
- my $rnew_blank =
- copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
- push @{$rinput_token_array}, $rnew_blank;
- push @{$rinput_token_array}, $rnew_blank;
- #######################################################
+ # Back up at a blank. This routine is sometimes called at blanks.
+ # TODO: this routine can eventually be eliminated by setting the weld flags
+ # for all K indexes between the start and end of a weld, not just at
+ # sequenced items.
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
+ return defined( $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
+}
- # If we just saw the end of an elsif block, write nag message
- # if we do not see another elseif or an else.
- if ($looking_for_else) {
+sub is_welded_at_seqno {
- unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
- write_logfile_entry("(No else block)\n");
- }
- $looking_for_else = 0;
- }
+ my ( $self, $seqno ) = @_;
- # This is a good place to kill incomplete one-line blocks
- if (
- (
- ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $last_old_nonblank_type eq ';' )
- && ( $first_new_nonblank_token ne '}' )
- )
+ # given a sequence number:
+ # return true if it is welded either left or right
+ # return false otherwise
+ return unless ( $total_weld_count && defined($seqno) );
+ my $KK_o = $self->[_K_opening_container_]->{$seqno};
+ return unless defined($KK_o);
+ return defined( $self->[_rK_weld_left_]->{$KK_o} )
+ || defined( $self->[_rK_weld_right_]->{$KK_o} );
+}
- # Patch for RT #98902. Honor request to break at old commas.
- || ( $rOpts_break_at_old_comma_breakpoints
- && $max_index_to_go >= 0
- && $last_old_nonblank_type eq ',' )
- )
- {
- $forced_breakpoint_to_go[$max_index_to_go] = 1
- if ($rOpts_break_at_old_comma_breakpoints);
- destroy_one_line_block();
- $self->output_line_to_go();
- }
+sub mark_short_nested_blocks {
- # loop to process the tokens one-by-one
- $type = 'b';
- $token = "";
+ # This routine looks at the entire file and marks any short nested blocks
+ # which should not be broken. The results are stored in the hash
+ # $rshort_nested->{$type_sequence}
+ # which will be true if the container should remain intact.
+ #
+ # For example, consider the following line:
- # We do not want a leading blank if the previous batch just got output
- my $jmin = 0;
- if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
- $jmin = 1;
- }
+ # sub cxt_two { sort { $a <=> $b } test_if_list() }
- foreach my $j ( $jmin .. $jmax ) {
+ # The 'sort' block is short and nested within an outer sub block.
+ # Normally, the existence of the 'sort' block will force the sub block to
+ # break open, but this is not always desirable. Here we will set a flag for
+ # the sort block to prevent this. To give the user control, we will
+ # follow the input file formatting. If either of the blocks is broken in
+ # the input file then we will allow it to remain broken. Otherwise we will
+ # set a flag to keep it together in later formatting steps.
- # pull out the local values for this token
- $self->extract_token($j);
+ # The flag which is set here will be checked in two places:
+ # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
- if ( $type eq '#' ) {
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
- if (
- $rOpts->{'delete-side-comments'}
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # delete closing side comments if necessary
- || ( $rOpts->{'delete-closing-side-comments'}
- && $token =~ /$closing_side_comment_prefix_pattern/o
- && $last_nonblank_block_type =~
- /$closing_side_comment_list_pattern/o )
- )
- {
- if ( $types_to_go[$max_index_to_go] eq 'b' ) {
- unstore_token_to_go();
- }
- last;
- }
- }
+ return unless ( $rOpts->{'one-line-block-nesting'} );
- # If we are continuing after seeing a right curly brace, flush
- # buffer unless we see what we are looking for, as in
- # } else ...
- if ( $rbrace_follower && $type ne 'b' ) {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rlines = $self->[_rlines_];
- unless ( $rbrace_follower->{$token} ) {
- $self->output_line_to_go();
- }
- $rbrace_follower = undef;
- }
+ # Variables needed for estimating line lengths
+ my $maximum_text_length;
+ my $starting_lentot;
+ my $length_tol = 1;
- $j_next =
- ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
- ? $j + 2
- : $j + 1;
- $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
- $next_nonblank_token_type =
- $rinput_token_array->[$j_next]->[_TYPE_];
+ my $excess_length_to_K = sub {
+ my ($K) = @_;
- # Do not allow breaks which would promote a side comment to a
- # block comment. In order to allow a break before an opening
- # or closing BLOCK, followed by a side comment, those sections
- # of code will handle this flag separately.
- my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
- my $is_opening_BLOCK =
- ( $type eq '{'
- && $token eq '{'
- && $block_type
- && !$rshort_nested->{$type_sequence}
- && $block_type ne 't' );
- my $is_closing_BLOCK =
- ( $type eq '}'
- && $token eq '}'
- && $block_type
- && !$rshort_nested->{$type_sequence}
- && $block_type ne 't' );
+ # Estimate the length from the line start to a given token
+ my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
+ my $excess_length = $length + $length_tol - $maximum_text_length;
+ return ($excess_length);
+ };
- if ( $side_comment_follows
- && !$is_opening_BLOCK
- && !$is_closing_BLOCK )
- {
- $no_internal_newlines = 1;
- }
+ my $is_broken_block = sub {
- # We're only going to handle breaking for code BLOCKS at this
- # (top) level. Other indentation breaks will be handled by
- # sub scan_list, which is better suited to dealing with them.
- if ($is_opening_BLOCK) {
+ # a block is broken if the input line numbers of the braces differ
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
- # Tentatively output this token. This is required before
- # calling starting_one_line_block. We may have to unstore
- # it, though, if we have to break before it.
- $self->store_token_to_go($side_comment_follows);
+ # loop over all containers
+ my @open_block_stack;
+ my $iline = -1;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- # Look ahead to see if we might form a one-line block..
- my $too_long =
- $self->starting_one_line_block( $j, $jmax, $level, $slevel,
- $ci_level, $rinput_token_array );
- clear_breakpoint_undo_stack();
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK");
+ }
- # to simplify the logic below, set a flag to indicate if
- # this opening brace is far from the keyword which introduces it
- my $keyword_on_same_line = 1;
- if ( ( $max_index_to_go >= 0 )
- && ( $last_nonblank_type eq ')' )
- && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) )
- {
- $keyword_on_same_line = 0;
- }
+ # Patch: do not mark short blocks with welds.
+ # In some cases blinkers can form (case b690).
+ if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
+ next;
+ }
- # decide if user requested break before '{'
- my $want_break =
+ # We are just looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ next unless ($block_type);
- # use -bl flag if not a sub block of any type
- $block_type !~ /^sub\b/
- ? $rOpts->{'opening-brace-on-new-line'}
+ # Keep a stack of all acceptable block braces seen.
+ # Only consider blocks entirely on one line so dump the stack when line
+ # changes.
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline != $iline_last ) { @open_block_stack = () }
- # use -sbl flag for a named sub block
- : $block_type !~ /$ASUB_PATTERN/
- ? $rOpts->{'opening-sub-brace-on-new-line'}
+ if ( $token eq '}' ) {
+ if (@open_block_stack) { pop @open_block_stack }
+ }
+ next unless ( $token eq '{' );
- # use -asbl flag for an anonymous sub block
- : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+ # block must be balanced (bad scripts may be unbalanced)
+ my $K_opening = $K_opening_container->{$type_sequence};
+ my $K_closing = $K_closing_container->{$type_sequence};
+ next unless ( defined($K_opening) && defined($K_closing) );
- # Do not break if this token is welded to the left
- if ( weld_len_left( $type_sequence, $token ) ) {
- $want_break = 0;
- }
+ # require that this block be entirely on one line
+ next if ( $is_broken_block->($type_sequence) );
- # Break before an opening '{' ...
- if (
+ # See if this block fits on one line of allowed length (which may
+ # be different from the input script)
+ $starting_lentot =
+ $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
+ $maximum_text_length =
+ $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
- # if requested
- $want_break
+ # Dump the stack if block is too long and skip this block
+ if ( $excess_length_to_K->($K_closing) > 0 ) {
+ @open_block_stack = ();
+ next;
+ }
- # and we were unable to start looking for a block,
- && $index_start_one_line_block == UNDEFINED_INDEX
+ # OK, Block passes tests, remember it
+ push @open_block_stack, $type_sequence;
- # or if it will not be on same line as its keyword, so that
- # it will be outdented (eval.t, overload.t), and the user
- # has not insisted on keeping it on the right
- || ( !$keyword_on_same_line
- && !$rOpts->{'opening-brace-always-on-right'} )
+ # We are only marking nested code blocks,
+ # so check for a previous block on the stack
+ next unless ( @open_block_stack > 1 );
- )
- {
+ # Looks OK, mark this as a short nested block
+ $rshort_nested->{$type_sequence} = 1;
- # but only if allowed
- unless ($no_internal_newlines) {
+ }
+ return;
+}
- # since we already stored this token, we must unstore it
- $self->unstore_token_to_go();
+sub adjust_indentation_levels {
- # then output the line
- $self->output_line_to_go();
+ my ($self) = @_;
- # and now store this token at the start of a new line
- $self->store_token_to_go($side_comment_follows);
- }
- }
+ # Called once per file to do special indentation adjustments.
+ # These routines adjust levels either by changing _CI_LEVEL_ directly or
+ # by setting modified levels in the array $self->[_radjusted_levels_].
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
+ # Initialize the adjusted levels. These will be the levels actually used
+ # for computing indentation.
- # now output this line
- unless ($no_internal_newlines) {
- $self->output_line_to_go();
- }
- }
+ # NOTE: This routine is called after the weld routines, which may have
+ # already adjusted _LEVEL_, so we are making adjustments on top of those
+ # levels. It would be much nicer to have the weld routines also use this
+ # adjustment, but that gets complicated when we combine -gnu -wn and have
+ # some welded quotes.
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $rLL = $self->[_rLL_];
+ foreach my $KK ( 0 .. @{$rLL} - 1 ) {
+ $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
+ }
- elsif ($is_closing_BLOCK) {
+ # First set adjusted levels for any non-indenting braces.
+ $self->non_indenting_braces();
- # If there is a pending one-line block ..
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ # Adjust breaks and indentation list containers
+ $self->break_before_list_opening_containers();
- # we have to terminate it if..
- if (
+ # Set adjusted levels for the whitespace cycle option.
+ $self->whitespace_cycle_adjustment();
- # it is too long (final length may be different from
- # initial estimate). note: must allow 1 space for this
- # token
- excess_line_length( $index_start_one_line_block,
- $max_index_to_go ) >= 0
-
- # or if it has too many semicolons
- || ( $semicolons_before_block_self_destruct == 0
- && $last_nonblank_type ne ';' )
- )
- {
- destroy_one_line_block();
- }
- }
-
- # put a break before this closing curly brace if appropriate
- unless ( $no_internal_newlines
- || $index_start_one_line_block != UNDEFINED_INDEX )
- {
+ # Adjust continuation indentation if -bli is set
+ $self->bli_adjustment();
- # write out everything before this closing curly brace
- $self->output_line_to_go();
- }
+ $self->extended_ci()
+ if ( $rOpts->{'extended-continuation-indentation'} );
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
+ # Now clip any adjusted levels to be non-negative
+ $self->clip_adjusted_levels();
- # store the closing curly brace
- $self->store_token_to_go();
+ return;
+}
- # ok, we just stored a closing curly brace. Often, but
- # not always, we want to end the line immediately.
- # So now we have to check for special cases.
+sub clip_adjusted_levels {
- # if this '}' successfully ends a one-line block..
- my $is_one_line_block = 0;
- my $keep_going = 0;
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ # Replace any negative adjusted levels with zero.
+ # Negative levels can occur in files with brace errors.
+ my ($self) = @_;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ return unless defined($radjusted_levels) && @{$radjusted_levels};
+ foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
+ return;
+}
- # Remember the type of token just before the
- # opening brace. It would be more general to use
- # a stack, but this will work for one-line blocks.
- $is_one_line_block =
- $types_to_go[$index_start_one_line_block];
+sub non_indenting_braces {
- # we have to actually make it by removing tentative
- # breaks that were set within it
- undo_forced_breakpoint_stack(0);
- set_nobreaks( $index_start_one_line_block,
- $max_index_to_go - 1 );
+ # Called once per file to handle the --non-indenting-braces parameter.
+ # Remove indentation within marked braces if requested
+ my ($self) = @_;
+ return unless ( $rOpts->{'non-indenting-braces'} );
- # then re-initialize for the next one-line block
- destroy_one_line_block();
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # then decide if we want to break after the '}' ..
- # We will keep going to allow certain brace followers as in:
- # do { $ifclosed = 1; last } unless $losing;
- #
- # But make a line break if the curly ends a
- # significant block:
- if (
- (
- $is_block_without_semicolon{$block_type}
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
- # Follow users break point for
- # one line block types U & G, such as a 'try' block
- || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
- )
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $Kmax = @{$rLL} - 1;
+ my @seqno_stack;
- # if needless semicolon follows we handle it later
- && $next_nonblank_token ne ';'
- )
- {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- }
+ my $is_non_indenting_brace = sub {
+ my ($KK) = @_;
- # set string indicating what we need to look for brace follower
- # tokens
- if ( $block_type eq 'do' ) {
- $rbrace_follower = \%is_do_follower;
- }
- elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
- $rbrace_follower = \%is_if_brace_follower;
- }
- elsif ( $block_type eq 'else' ) {
- $rbrace_follower = \%is_else_brace_follower;
- }
+ # looking for an opening block brace
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ return unless ( $token eq '{' && $block_type );
+
+ # followed by a comment
+ my $K_sc = $KK + 1;
+ $K_sc += 1
+ if ( $K_sc <= $Kmax && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
+ return unless ( $K_sc <= $Kmax );
+ my $type_sc = $rLL->[$K_sc]->[_TYPE_];
+ return unless ( $type_sc eq '#' );
+
+ # on the same line
+ my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
+ my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
+ return unless ( $line_index_sc == $line_index );
+
+ # get the side comment text
+ my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
+
+ # The pattern ends in \s but we have removed the newline, so
+ # we added it back for the match. That way we require an exact
+ # match to the special string and also allow additional text.
+ $token_sc .= "\n";
+ my $is_nib = ( $token_sc =~ /$non_indenting_brace_pattern/ );
+ if ($is_nib) { $rspecial_side_comment_type->{$K_sc} = 'NIB' }
+ return $is_nib;
+ };
- # added eval for borris.t
- elsif ($is_sort_map_grep_eval{$block_type}
- || $is_one_line_block eq 'G' )
- {
- $rbrace_follower = undef;
- $keep_going = 1;
- }
+ foreach my $KK ( 0 .. $Kmax ) {
+ my $num = @seqno_stack;
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($seqno) {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $token eq '{' && $is_non_indenting_brace->($KK) ) {
+ push @seqno_stack, $seqno;
+ }
+ if ( $token eq '}' && @seqno_stack && $seqno_stack[-1] == $seqno ) {
+ pop @seqno_stack;
+ $num -= 1;
+ }
+ }
+ next unless $num;
+ $radjusted_levels->[$KK] -= $num;
+ }
+ return;
+}
- # anonymous sub
- elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
+sub whitespace_cycle_adjustment {
- if ($is_one_line_block) {
- $rbrace_follower = \%is_anon_sub_1_brace_follower;
- }
- else {
- $rbrace_follower = \%is_anon_sub_brace_follower;
- }
- }
+ my $self = shift;
- # None of the above: specify what can follow a closing
- # brace of a block which is not an
- # if/elsif/else/do/sort/map/grep/eval
- # Testfiles:
- # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
- else {
- $rbrace_follower = \%is_other_brace_follower;
- }
+ # Called once per file to implement the --whitespace-cycle option
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $radjusted_levels = $self->[_radjusted_levels_];
- # See if an elsif block is followed by another elsif or else;
- # complain if not.
- if ( $block_type eq 'elsif' ) {
+ my $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+ if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
- if ( $next_nonblank_token_type eq 'b' ) { # end of line?
- $looking_for_else = 1; # ok, check on next line
- }
- else {
+ my $Kmax = @{$rLL} - 1;
- unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
- write_logfile_entry("No else block :(\n");
- }
- }
- }
+ my $whitespace_last_level = -1;
+ my @whitespace_level_stack = ();
+ my $last_nonblank_type = 'b';
+ my $last_nonblank_token = '';
+ foreach my $KK ( 0 .. $Kmax ) {
+ my $level_abs = $radjusted_levels->[$KK];
+ my $level = $level_abs;
+ if ( $level_abs < $whitespace_last_level ) {
+ pop(@whitespace_level_stack);
+ }
+ if ( !@whitespace_level_stack ) {
+ push @whitespace_level_stack, $level_abs;
+ }
+ elsif ( $level_abs > $whitespace_last_level ) {
+ $level = $whitespace_level_stack[-1] +
+ ( $level_abs - $whitespace_last_level );
- # keep going after certain block types (map,sort,grep,eval)
- # added eval for borris.t
- if ($keep_going) {
+ if (
+ # 1 Try to break at a block brace
+ (
+ $level > $rOpts_whitespace_cycle
+ && $last_nonblank_type eq '{'
+ && $last_nonblank_token eq '{'
+ )
- # keep going
- }
+ # 2 Then either a brace or bracket
+ || ( $level > $rOpts_whitespace_cycle + 1
+ && $last_nonblank_token =~ /^[\{\[]$/ )
- # if no more tokens, postpone decision until re-entring
- elsif ( ( $next_nonblank_token_type eq 'b' )
- && $rOpts_add_newlines )
+ # 3 Then a paren too
+ || $level > $rOpts_whitespace_cycle + 2
+ )
{
- unless ($rbrace_follower) {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
+ $level = 1;
}
+ push @whitespace_level_stack, $level;
+ }
+ $level = $whitespace_level_stack[-1];
+ $radjusted_levels->[$KK] = $level;
- elsif ($rbrace_follower) {
+ $whitespace_last_level = $level_abs;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ }
+ }
+ }
+ return;
+}
- unless ( $rbrace_follower->{$next_nonblank_token} ) {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- $rbrace_follower = undef;
- }
+use constant DEBUG_BBX => 0;
- else {
- $self->output_line_to_go() unless ($no_internal_newlines);
- }
+sub break_before_list_opening_containers {
- } # end treatment of closing block token
+ my ($self) = @_;
- # handle semicolon
- elsif ( $type eq ';' ) {
+ # This routine is called once per batch to implement parameters
+ # --break-before-hash-brace=n and similar -bbx=n flags
+ # and their associated indentation flags:
+ # --break-before-hash-brace-and-indent and similar -bbxi=n
- # kill one-line blocks with too many semicolons
- $semicolons_before_block_self_destruct--;
- if (
- ( $semicolons_before_block_self_destruct < 0 )
- || ( $semicolons_before_block_self_destruct == 0
- && $next_nonblank_token_type !~ /^[b\}]$/ )
- )
- {
- destroy_one_line_block();
- }
+ # Nothing to do if none of the -bbx=n parameters has been set
+ return unless %break_before_container_types;
- $self->store_token_to_go();
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- $self->output_line_to_go()
- unless ( $no_internal_newlines
- || ( $rOpts_keep_interior_semicolons && $j < $jmax )
- || ( $next_nonblank_token eq '}' ) );
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $rhas_list = $self->[_rhas_list_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
+ my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ my $rlines = $self->[_rlines_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
+ my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+
+ my $length_tol =
+ max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
+ if ($rOpts_ignore_old_breakpoints) {
+ $length_tol += $rOpts_maximum_line_length;
+ }
+
+ my $rbreak_before_container_by_seqno = {};
+ my $rwant_reduced_ci = {};
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+
+ #################################################################
+ # Part 1: Examine any -bbx=n flags
+ #################################################################
+
+ my $KK = $K_opening_container->{$seqno};
+ next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
+
+ # This must be a list or contain a list.
+ # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
+ # Note2: 'has_list' holds the depth to the sub-list. We will require
+ # a depth of just 1
+ my $is_list = $self->is_list_by_seqno($seqno);
+ my $has_list = $rhas_list->{$seqno};
+
+ # Fix for b1173: if welded opening container, use flag of innermost
+ # seqno. Otherwise, the restriction $has_list==1 prevents triple and
+ # higher welds from following the -BBX parameters.
+ if ($total_weld_count) {
+ my $KK_test = $rK_weld_right->{$KK};
+ if ( defined($KK_test) ) {
+ my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
+ $is_list ||= $self->is_list_by_seqno($seqno_inner);
+ $has_list = $rhas_list->{$seqno_inner};
+ }
+ }
+
+ next unless ( $is_list || $has_list && $has_list == 1 );
+
+ my $has_broken_list = $rhas_broken_list->{$seqno};
+ my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
+
+ # Only for types of container tokens with a non-default break option
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $break_option = $break_before_container_types{$token};
+ next unless ($break_option);
+
+ # Require previous nonblank to be '=' or '=>'
+ my $Kprev = $KK - 1;
+ next if ( $Kprev < 0 );
+ my $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ if ( $prev_type eq 'b' ) {
+ $Kprev--;
+ next if ( $Kprev < 0 );
+ $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ }
+ next unless ( $is_equal_or_fat_comma{$prev_type} );
+
+ my $ci = $rLL->[$KK]->[_CI_LEVEL_];
+
+ DEBUG_BBX
+ && print STDOUT
+"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
- }
+ # -bbx=1 = stable, try to follow input
+ if ( $break_option == 1 ) {
- # handle here_doc target string
- elsif ( $type eq 'h' ) {
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless ( $KK == $Kfirst );
+ }
- # no newlines after seeing here-target
- $no_internal_newlines = 1;
- destroy_one_line_block();
- $self->store_token_to_go();
+ # -bbx=2 => apply this style only for a 'complex' list
+ elsif ( $break_option == 2 ) {
+
+ # break if this list contains a broken list with line-ending comma
+ my $ok_to_break;
+ my $Msg = "";
+ if ($has_list_with_lec) {
+ $ok_to_break = 1;
+ DEBUG_BBX && do { $Msg = "has list with lec;" };
}
- # handle all other token types
- else {
+ if ( !$ok_to_break ) {
+
+ # Turn off -xci if -bbx=2 and this container has a sublist but
+ # not a broken sublist. This avoids creating blinkers. The
+ # problem is that -xci can cause one-line lists to break open,
+ # and thereby creating formatting instability.
+ # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
+ # b1045 b1046 b1047 b1051 b1052 b1061.
+ if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
- $self->store_token_to_go();
+ my $parent = $rparent_of_seqno->{$seqno};
+ if ( $self->is_list_by_seqno($parent) ) {
+ DEBUG_BBX && do { $Msg = "parent is list" };
+ $ok_to_break = 1;
+ }
}
- # remember two previous nonblank OUTPUT tokens
- if ( $type ne '#' && $type ne 'b' ) {
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_nonblank_token = $token;
- $last_nonblank_type = $type;
- $last_nonblank_block_type = $block_type;
+ # Patch to fix b1099 for -lp
+ # ok in -lp mode if this is a list which contains a list
+ if ( !$ok_to_break && $rOpts_line_up_parentheses ) {
+ if ( $is_list && $has_list ) {
+ $ok_to_break = 1;
+ DEBUG_BBX && do { $Msg = "is list or has list" };
+ }
}
- # unset the continued-quote flag since it only applies to the
- # first token, and we want to resume normal formatting if
- # there are additional tokens on the line
- $in_continued_quote = 0;
+ if ( !$ok_to_break ) {
+ DEBUG_BBX
+ && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
+ next;
+ }
- } # end of loop over all tokens in this 'line_of_tokens'
+ DEBUG_BBX
+ && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
- # we have to flush ..
- if (
+ # Patch: turn off -xci if -bbx=2 and -lp
+ # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
+ $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
+ }
- # if there is a side comment
- ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
+ # -bbx=3 = always break
+ elsif ( $break_option == 3 ) {
- # if this line ends in a quote
- # NOTE: This is critically important for insuring that quoted lines
- # do not get processed by things like -sot and -sct
- || $in_quote
+ # ok to break
+ }
- # if this is a VERSION statement
- || $is_VERSION_statement
+ # Shouldn't happen! Bad flag, but make behavior same as 3
+ else {
+ # ok to break
+ }
- # to keep a label at the end of a line
- || $type eq 'J'
+ # Set a flag for actual implementation later in
+ # sub insert_breaks_before_list_opening_containers
+ $rbreak_before_container_by_seqno->{$seqno} = 1;
+ DEBUG_BBX
+ && print STDOUT "BBX: ok to break at seqno=$seqno\n";
- # if we are instructed to keep all old line breaks
- || !$rOpts->{'delete-old-newlines'}
- )
- {
- destroy_one_line_block();
- $self->output_line_to_go();
- }
+ # -bbxi=0: Nothing more to do if the ci value remains unchanged
+ my $ci_flag = $container_indentation_options{$token};
+ next unless ($ci_flag);
- # mark old line breakpoints in current output stream
- if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
- my $jobp = $max_index_to_go;
- if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
- {
- $jobp--;
- }
- $old_breakpoint_to_go[$jobp] = 1;
+ # -bbxi=1: This option removes ci and is handled in
+ # later sub set_adjusted_indentation
+ if ( $ci_flag == 1 ) {
+ $rwant_reduced_ci->{$seqno} = 1;
+ next;
}
- return;
- } ## end sub print_line_of_tokens
-} ## end block print_line_of_tokens
-
-sub consecutive_nonblank_lines {
- return $file_writer_object->get_consecutive_nonblank_lines() +
- $vertical_aligner_object->get_cached_line_count();
-}
-# sub output_line_to_go sends one logical line of tokens on down the
-# pipeline to the VerticalAligner package, breaking the line into continuation
-# lines as necessary. The line of tokens is ready to go in the "to_go"
-# arrays.
-sub output_line_to_go {
+ # -bbxi=2 ...
+
+ #################################################################
+ # Part 2: Perform tests before committing to changing ci and level
+ #################################################################
+
+ # Before changing the ci level of the opening container, we need
+ # to be sure that the container will be broken in the later stages of
+ # formatting. We have to do this because we are working early in the
+ # formatting pipeline. A problem can occur if we change the ci or
+ # level of the opening token but do not actually break the container
+ # open as expected. In most cases it wouldn't make any difference if
+ # we changed ci or not, but there are some edge cases where this
+ # can cause blinking states, so we need to try to only change ci if
+ # the container will really be broken.
+
+ # Only consider containers already broken
+ next if ( !$ris_broken_container->{$seqno} );
+
+ # Always ok to change ci for permanently broken containers
+ if ( $ris_permanently_broken->{$seqno} ) {
+ goto OK;
+ }
+
+ # Always OK if this list contains a broken sub-container with
+ # a non-terminal line-ending comma
+ if ($has_list_with_lec) { goto OK }
+
+ # From here on we are considering a single container...
+
+ # A single container must have at least 1 line-ending comma:
+ next unless ( $rlec_count_by_seqno->{$seqno} );
+
+ # Since it has a line-ending comma, it will stay broken if the -boc
+ # flag is set
+ if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
+
+ # OK if the container contains multiple fat commas
+ # Better: multiple lines with fat commas
+ if ( !$rOpts_ignore_old_breakpoints ) {
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ next unless ($rtype_count);
+ my $fat_comma_count = $rtype_count->{'=>'};
+ DEBUG_BBX
+ && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
+ if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
+ }
+
+ # The last check we can make is to see if this container could fit on a
+ # single line. Use the least possble indentation in the estmate (ci=0),
+ # so we are not subtracting $ci * $rOpts_continuation_indentation from
+ # tablulated $maximum_text_length value.
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $maximum_text_length = $maximum_text_length_at_level[$level];
+ my $K_closing = $K_closing_container->{$seqno};
+ my $length = $self->cumulative_length_before_K($K_closing) -
+ $self->cumulative_length_before_K($KK);
+ my $excess_length = $length - $maximum_text_length;
+ DEBUG_BBX
+ && print STDOUT
+"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
- my $self = shift;
- my $rLL = $self->{rLL};
+ # OK if the net container definitely breaks on length
+ if ( $excess_length > $length_tol ) {
+ DEBUG_BBX
+ && print STDOUT "BBX: excess_length=$excess_length\n";
+ goto OK;
+ }
- # debug stuff; this routine can be called from many points
- FORMATTER_DEBUG_FLAG_OUTPUT && do {
- my ( $a, $b, $c ) = caller;
- write_diagnostics(
-"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
- );
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
- write_diagnostics("$output_str\n");
- };
+ # Otherwise skip it
+ next;
- # Do not end line in a weld
- return if ( weld_len_right_to_go($max_index_to_go) );
+ #################################################################
+ # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
+ #################################################################
- # just set a tentative breakpoint if we might be in a one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- set_forced_breakpoint($max_index_to_go);
- return;
- }
+ OK:
- my $comma_arrow_count_contained = match_opening_and_closing_tokens();
+ DEBUG_BBX && print STDOUT "BBX: OK to break\n";
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
+ # -bbhbi=n
+ # -bbsbi=n
+ # -bbpi=n
- # If this line ends in a code block brace, set breaks at any
- # previous closing code block braces to breakup a chain of code
- # blocks on one line. This is very rare but can happen for
- # user-defined subs. For example we might be looking at this:
- # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
- my $saw_good_break = 0; # flag to force breaks even if short line
- if (
+ # where:
- # looking for opening or closing block brace
- $block_type_to_go[$max_index_to_go]
+ # n=0 default indentation (usually one ci)
+ # n=1 outdent one ci
+ # n=2 indent one level (minus one ci)
+ # n=3 indent one extra ci [This may be dropped]
- # but not one of these which are never duplicated on a line:
- # until|while|for|if|elsif|else
- && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
- )
- {
- my $lev = $nesting_depth_to_go[$max_index_to_go];
+ # NOTE: We are adjusting indentation of the opening container. The
+ # closing container will normally follow the indentation of the opening
+ # container automatically, so this is not currently done.
+ next unless ($ci);
- # Walk backwards from the end and
- # set break at any closing block braces at the same level.
- # But quit if we are not in a chain of blocks.
- for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
- last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
- next if ( $levels_to_go[$i] > $lev ); # skip past higher level
+ # option 1: outdent
+ if ( $ci_flag == 1 ) {
+ $ci -= 1;
+ }
- if ( $block_type_to_go[$i] ) {
- if ( $tokens_to_go[$i] eq '}' ) {
- set_forced_breakpoint($i);
- $saw_good_break = 1;
- }
- }
+ # option 2: indent one level
+ elsif ( $ci_flag == 2 ) {
+ $ci -= 1;
+ $radjusted_levels->[$KK] += 1;
+ }
- # quit if we see anything besides words, function, blanks
- # at this level
- elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
+ # unknown option
+ else {
+ # Shouldn't happen - leave ci unchanged
}
+
+ $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
}
- my $imin = 0;
- my $imax = $max_index_to_go;
+ $self->[_rbreak_before_container_by_seqno_] =
+ $rbreak_before_container_by_seqno;
+ $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
+ return;
+}
- # trim any blank tokens
- if ( $max_index_to_go >= 0 ) {
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- }
+use constant DEBUG_XCI => 0;
- # anything left to write?
- if ( $imin <= $imax ) {
+sub extended_ci {
- # add a blank line before certain key types but not after a comment
- if ( $last_line_leading_type !~ /^[#]/ ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
+ # This routine implements the -xci (--extended-continuation-indentation)
+ # flag. We add CI to interior tokens of a container which itself has CI but
+ # only if a token does not already have CI.
- # blank lines before subs except declarations and one-liners
- if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
- }
+ # To do this, we will locate opening tokens which themselves have
+ # continuation indentation (CI). We track them with their sequence
+ # numbers. These sequence numbers are called 'controlling sequence
+ # numbers'. They apply continuation indentation to the tokens that they
+ # contain. These inner tokens remember their controlling sequence numbers.
+ # Later, when these inner tokens are output, they have to see if the output
+ # lines with their controlling tokens were output with CI or not. If not,
+ # then they must remove their CI too.
- # break before all package declarations
- elsif ($leading_token =~ /^(package\s)/
- && $leading_type eq 'i' )
- {
- $want_blank = $rOpts->{'blank-lines-before-packages'};
- }
+ # The controlling CI concept works hierarchically. But CI itself is not
+ # hierarchical; it is either on or off. There are some rare instances where
+ # it would be best to have hierarchical CI too, but not enough to be worth
+ # the programming effort.
- # break before certain key blocks except one-liners
- if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( $self->terminal_type_i( $imin, $imax ) ne '}' );
- }
+ # The operations to remove unwanted CI are done in sub 'undo_ci'.
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($leading_type eq 'k'
- && $last_line_leading_type ne 'b'
- && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
+ my ($self) = @_;
- # patch for RT #128216: no blank line inserted at a level change
- if ( $levels_to_go[$imin] != $last_line_leading_level ) {
- $lc = 0;
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- $want_blank =
- $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && $self->terminal_type_i( $imin, $imax ) ne '}';
- }
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+ my $rlines = $self->[_rlines_];
+ my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+
+ my %available_space;
+
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my @seqno_stack;
+ my $seqno_top;
+ my $KLAST;
+ my $KNEXT = $self->[_K_first_seq_item_];
+
+ # The following variable can be used to allow a little extra space to
+ # avoid blinkers. A value $len_tol = 20 fixed the following
+ # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
+ # It turned out that the real problem was misparsing a list brace as
+ # a code block in a 'use' statement when the line length was extremely
+ # small. A value of 0 works now, but a slightly larger value can
+ # be used to minimize the chance of a blinker.
+ my $len_tol = 0;
+
+ while ( defined($KNEXT) ) {
- # Check for blank lines wanted before a closing brace
- if ( $leading_token eq '}' ) {
- if ( $rOpts->{'blank-lines-before-closing-block'}
- && $block_type_to_go[$imin]
- && $block_type_to_go[$imin] =~
- /$blank_lines_before_closing_block_pattern/ )
+ # Fix all tokens up to the next sequence item if we are changing CI
+ if ($seqno_top) {
+
+ my $is_list = $ris_list_by_seqno->{$seqno_top};
+ my $space = $available_space{$seqno_top};
+ my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
+ my $count = 0;
+ for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
+
+ # But do not include tokens which might exceed the line length
+ # and are not in a list.
+ # ... This fixes case b1031
+ my $length_before = $length;
+ $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
+ if (
+ !$rLL->[$Kt]->[_CI_LEVEL_]
+ && ( $is_list
+ || $length - $length_before < $space
+ || $rLL->[$Kt]->[_TYPE_] eq '#' )
+ )
{
- my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
- if ( $nblanks > $want_blank ) {
- $want_blank = $nblanks;
- }
+ $rLL->[$Kt]->[_CI_LEVEL_] = 1;
+ $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
+ $count++;
}
}
+ $ris_seqno_controlling_ci->{$seqno_top} += $count;
+ }
+
+ $KLAST = $KNEXT;
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- if ($want_blank) {
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$seqno};
- # future: send blank line down normal path to VerticalAligner
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->require_blank_code_lines($want_blank);
- }
+ # see if we have reached the end of the current controlling container
+ if ( $seqno_top && $seqno == $seqno_top ) {
+ $seqno_top = pop @seqno_stack;
}
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
+ # Patch to fix some block types...
+ # Certain block types arrive from the tokenizer without CI but should
+ # have it for this option. These include anonymous subs and
+ # do sort map grep eval
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ if ( $block_type && $is_block_with_ci{$block_type} ) {
+ $rLL->[$KK]->[_CI_LEVEL_] = 1;
+ if ($seqno_top) {
+ $rseqno_controlling_my_ci->{$KK} = $seqno_top;
+ $ris_seqno_controlling_ci->{$seqno_top}++;
+ }
}
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+
+ # If this does not have ci, update ci if necessary and continue looking
+ if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
+ if ($seqno_top) {
+ $rLL->[$KK]->[_CI_LEVEL_] = 1;
+ $rseqno_controlling_my_ci->{$KK} = $seqno_top;
+ $ris_seqno_controlling_ci->{$seqno_top}++;
+ }
+ next;
}
- FORMATTER_DEBUG_FLAG_FLUSH && do {
- my ( $package, $file, $line ) = caller;
- print STDOUT
-"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
- };
+ # Skip if requested by -bbx to avoid blinkers
+ if ( $rno_xci_by_seqno->{$seqno} ) {
+ next;
+ }
- # add a couple of extra terminal blank tokens
- pad_array_to_go();
+ # Skip if this is a -bli container (this fixes case b1065) Note: case
+ # b1065 is also fixed by the update for b1055, so this update is not
+ # essential now. But there does not seem to be a good reason to add
+ # xci and bli together, so the update is retained.
+ if ( $ris_bli_container->{$seqno} ) {
+ next;
+ }
- # set all forced breakpoints for good list formatting
- my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
+ # We are looking for opening container tokens with ci
+ next unless ( defined($K_opening) && $KK == $K_opening );
- my $old_line_count_in_batch =
- $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] );
+ # Make sure there is a corresponding closing container
+ # (could be missing if the script has a brace error)
+ my $K_closing = $K_closing_container->{$seqno};
+ next unless defined($K_closing);
+ # Require different input lines. This will filter out a large number
+ # of small hash braces and array brackets. If we accidentally filter
+ # out an important container, it will get fixed on the next pass.
if (
- $is_long_line
- || $old_line_count_in_batch > 1
-
- # must always call scan_list() with unbalanced batches because it
- # is maintaining some stacks
- || is_unbalanced_batch()
-
- # call scan_list if we might want to break at commas
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
-
- # call scan_list if user may want to break open some one-line
- # hash references
- || ( $comma_arrow_count_contained
- && $rOpts_comma_arrow_breakpoints != 3 )
+ $rLL->[$K_opening]->[_LINE_INDEX_] ==
+ $rLL->[$K_closing]->[_LINE_INDEX_]
+ && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
+ $rOpts_maximum_line_length )
)
{
- ## This caused problems in one version of perl for unknown reasons:
- ## $saw_good_break ||= scan_list();
- my $sgb = scan_list();
- $saw_good_break ||= $sgb;
+ DEBUG_XCI
+ && print "XCI: Skipping seqno=$seqno, require different lines\n";
+ next;
}
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
+ # Do not apply -xci if adding extra ci will put the container contents
+ # beyond the line length limit (fixes cases b899 b935)
+ my $level = $rLL->[$K_opening]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
+ my $maximum_text_length =
+ $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
- # write a single line if..
- if (
-
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
+ # remember how much space is available for patch b1031 above
+ my $space =
+ $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
- # or, we don't already have an interior breakpoint
- # and we didn't see a good breakpoint
- || (
- !$forced_breakpoint_count
- && !$saw_good_break
-
- # and this line is 'short'
- && !$is_long_line
- )
- )
- {
- @{$ri_first} = ($imin);
- @{$ri_last} = ($imax);
+ if ( $space < 0 ) {
+ DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
+ next;
}
+ DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
- # otherwise use multiple lines
- else {
+ $available_space{$seqno} = $space;
- ( $ri_first, $ri_last, my $colon_count ) =
- $self->set_continuation_breaks($saw_good_break);
+ # This becomes the next controlling container
+ push @seqno_stack, $seqno_top if ($seqno_top);
+ $seqno_top = $seqno;
+ }
+ return;
+}
- $self->break_all_chain_tokens( $ri_first, $ri_last );
+sub bli_adjustment {
- break_equals( $ri_first, $ri_last );
+ # Called once per file to implement the --brace-left-and-indent option.
+ # If -bli is set, adds one continuation indentation for certain braces
+ my $self = shift;
+ return unless ( $rOpts->{'brace-left-and-indent'} );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $KNEXT = $self->[_K_first_seq_item_];
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ( $rOpts->{'recombine'} ) {
- ( $ri_first, $ri_last ) =
- recombine_breakpoints( $ri_first, $ri_last );
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ if ( $block_type && $block_type =~ /$bli_pattern/ ) {
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$seqno};
+ if ( defined($K_opening) ) {
+ if ( $KK eq $K_opening ) {
+ $rLL->[$KK]->[_CI_LEVEL_]++;
+ $ris_bli_container->{$seqno} = 1;
+ }
+ else {
+ $rLL->[$KK]->[_CI_LEVEL_] =
+ $rLL->[$K_opening]->[_CI_LEVEL_];
+ }
}
-
- $self->insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
}
+ }
+ return;
+}
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
- }
- $self->unmask_phantom_semicolons( $ri_first, $ri_last );
- if ( $rOpts_one_line_block_semicolons == 0 ) {
- $self->delete_one_line_semicolons( $ri_first, $ri_last );
- }
+sub find_multiline_qw {
- # The line breaks for this batch of code have been finalized. Now we
- # can to package the results for further processing. We will switch
- # from the local '_to_go' buffer arrays (i-index) back to the global
- # token arrays (K-index) at this point.
- my $rlines_K;
- my $index_error;
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- if ( $iend - $ibeg != $Kend - $Kbeg ) {
- $index_error = $n unless defined($index_error);
- }
- push @{$rlines_K},
- [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
- }
+ my $self = shift;
- # Check correctness of the mapping between the i and K token indexes
- if ( defined($index_error) ) {
+ # Multiline qw quotes are not sequenced items like containers { [ (
+ # but behave in some respects in a similar way. So this routine finds them
+ # and creates a separate sequence number system for later use.
- # Temporary debug code - should never get here
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- my $idiff = $iend - $ibeg;
- my $Kdiff = $Kend - $Kbeg;
- print STDERR <<EOM;
-line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
-EOM
- }
- Fault("Index error at line $index_error; i and K ranges differ");
- }
+ # This is straightforward because they always begin at the end of one line
+ # and and at the beginning of a later line. This is true no matter how we
+ # finally make our line breaks, so we can find them before deciding on new
+ # line breaks.
- my $rbatch_hash = {
- rlines_K => $rlines_K,
- do_not_pad => $do_not_pad,
- ibeg0 => $ri_first->[0],
- };
+ my $rstarting_multiline_qw_seqno_by_K = {};
+ my $rending_multiline_qw_seqno_by_K = {};
+ my $rKrange_multiline_qw_by_seqno = {};
+ my $rmultiline_qw_has_extra_level = {};
- $self->send_lines_to_vertical_aligner($rbatch_hash);
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- # Insert any requested blank lines after an opening brace. We have to
- # skip back before any side comment to find the terminal token
- my $iterm;
- for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
- next if $types_to_go[$iterm] eq '#';
- next if $types_to_go[$iterm] eq 'b';
- last;
- }
+ my $rlines = $self->[_rlines_];
+ my $rLL = $self->[_rLL_];
+ my $qw_seqno;
+ my $num_qw_seqno = 0;
+ my $K_start_multiline_qw;
- # write requested number of blank lines after an opening block brace
- if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
- if ( $rOpts->{'blank-lines-after-opening-block'}
- && $block_type_to_go[$iterm]
- && $block_type_to_go[$iterm] =~
- /$blank_lines_after_opening_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->require_blank_code_lines($nblanks);
+ foreach my $line_of_tokens ( @{$rlines} ) {
+
+ my $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
+ if ( defined($K_start_multiline_qw) ) {
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
+
+ # shouldn't happen
+ if ( $type ne 'q' ) {
+ DEVEL_MODE && print STDERR <<EOM;
+STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
+EOM
+ $K_start_multiline_qw = undef;
+ next;
+ }
+ my $Kprev = $self->K_previous_nonblank($Kfirst);
+ my $Knext = $self->K_next_nonblank($Kfirst);
+ my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
+ my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
+ if ( $type_m eq 'q' && $type_p ne 'q' ) {
+ $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
+ $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
+ [ $K_start_multiline_qw, $Kfirst ];
+ $K_start_multiline_qw = undef;
+ $qw_seqno = undef;
+ }
+ }
+ if ( !defined($K_start_multiline_qw)
+ && $rLL->[$Klast]->[_TYPE_] eq 'q' )
+ {
+ my $Kprev = $self->K_previous_nonblank($Klast);
+ my $Knext = $self->K_next_nonblank($Klast);
+ my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
+ my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
+ if ( $type_m ne 'q' && $type_p eq 'q' ) {
+ $num_qw_seqno++;
+ $qw_seqno = 'q' . $num_qw_seqno;
+ $K_start_multiline_qw = $Klast;
+ $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
}
}
}
- prepare_for_new_input_lines();
-
- return;
-}
-
-sub note_added_semicolon {
- my ($line_number) = @_;
- $last_added_semicolon_at = $line_number;
- if ( $added_semicolon_count == 0 ) {
- $first_added_semicolon_at = $last_added_semicolon_at;
- }
- $added_semicolon_count++;
- write_logfile_entry("Added ';' here\n");
- return;
-}
-
-sub note_deleted_semicolon {
- $last_deleted_semicolon_at = $input_line_number;
- if ( $deleted_semicolon_count == 0 ) {
- $first_deleted_semicolon_at = $last_deleted_semicolon_at;
- }
- $deleted_semicolon_count++;
- write_logfile_entry("Deleted unnecessary ';' at line $input_line_number\n");
- return;
-}
+ # Give multiline qw lists extra indentation instead of CI. This option
+ # works well but is currently only activated when the -xci flag is set.
+ # The reason is to avoid unexpected changes in formatting.
+ if ( $rOpts->{'extended-continuation-indentation'} ) {
+ while ( my ( $qw_seqno, $rKrange ) =
+ each %{$rKrange_multiline_qw_by_seqno} )
+ {
+ my ( $Kbeg, $Kend ) = @{$rKrange};
-sub note_embedded_tab {
- $embedded_tab_count++;
- $last_embedded_tab_at = $input_line_number;
- if ( !$first_embedded_tab_at ) {
- $first_embedded_tab_at = $last_embedded_tab_at;
- }
+ # require isolated closing token
+ my $token_end = $rLL->[$Kend]->[_TOKEN_];
+ next
+ unless ( length($token_end) == 1
+ && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
- if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
- }
- return;
-}
+ # require isolated opening token
+ my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
-sub starting_one_line_block {
+ # allow space(s) after the qw
+ if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) eq ' ' ) {
+ $token_beg =~ s/\s+//;
+ }
- # after seeing an opening curly brace, look for the closing brace
- # and see if the entire block will fit on a line. This routine is
- # not always right because it uses the old whitespace, so a check
- # is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check,
- # though, because otherwise we would always break at a semicolon
- # within a one-line block if the block contains multiple statements.
+ next unless ( length($token_beg) == 3 );
- my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
- my $rbreak_container = $self->{rbreak_container};
- my $rshort_nested = $self->{rshort_nested};
+ foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
+ $rLL->[$KK]->[_LEVEL_]++;
+ $rLL->[$KK]->[_CI_LEVEL_] = 0;
+ }
- my $jmax_check = @{$rtoken_array};
- if ( $jmax_check < $jmax ) {
- Fault("jmax=$jmax > $jmax_check");
+ # set flag for -wn option, which will remove the level
+ $rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
+ }
}
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
-
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
+ # For the -lp option we need to mark all parent containers of
+ # multiline quotes
+ if ($rOpts_line_up_parentheses) {
- my $i_start = 0;
+ while ( my ( $qw_seqno, $rKrange ) =
+ each %{$rKrange_multiline_qw_by_seqno} )
+ {
+ my ( $Kbeg, $Kend ) = @{$rKrange};
+ my $parent_seqno = $self->parent_seqno_by_K($Kend);
+ next unless ($parent_seqno);
+
+ # If the parent container exactly surrounds this qw, then -lp
+ # formatting seems to work so we will not mark it.
+ my $is_tightly_contained;
+ my $Kn = $self->K_next_nonblank($Kend);
+ my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
+ if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
+
+ my $Kp = $self->K_previous_nonblank($Kbeg);
+ my $seqno_p =
+ defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
+ if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
+ $is_tightly_contained = 1;
+ }
+ }
- # shouldn't happen: there must have been a prior call to
- # store_token_to_go to put the opening brace in the output stream
- if ( $max_index_to_go < 0 ) {
- Fault("program bug: store_token_to_go called incorrectly\n");
- }
+ $ris_excluded_lp_container->{$parent_seqno} = 1
+ unless ($is_tightly_contained);
- # return if block should be broken
- my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_];
- if ( $rbreak_container->{$type_sequence} ) {
- return 0;
+ # continue up the tree marking parent containers
+ while (1) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
+ last
+ unless ( defined($parent_seqno)
+ && $parent_seqno ne SEQ_ROOT );
+ $ris_excluded_lp_container->{$parent_seqno} = 1;
+ }
+ }
}
- my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
+ $self->[_rstarting_multiline_qw_seqno_by_K_] =
+ $rstarting_multiline_qw_seqno_by_K;
+ $self->[_rending_multiline_qw_seqno_by_K_] =
+ $rending_multiline_qw_seqno_by_K;
+ $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
+ $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
- # find the starting keyword for this block (such as 'if', 'else', ...)
+ return;
+}
- if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
- $i_start = $max_index_to_go;
- }
+sub is_excluded_lp {
- # the previous nonblank token should start these block types
- elsif (( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub\b/ )
- || $block_type =~ /\(\)/ )
- {
- $i_start = $last_last_nonblank_index_to_go;
+ # decide if this container is excluded by user request
+ # returns true if this token is excluded (i.e., may not use -lp)
+ # returns false otherwise
- # For signatures and extended syntax ...
- # If this brace follows a parenthesized list, we should look back to
- # find the keyword before the opening paren because otherwise we might
- # form a one line block which stays intack, and cause the parenthesized
- # expression to break open. That looks bad. However, actually
- # searching for the opening paren is slow and tedius.
- # The actual keyword is often at the start of a line, but might not be.
- # For example, we might have an anonymous sub with signature list
- # following a =>. It is safe to mark the start anywhere before the
- # opening paren, so we just go back to the prevoious break (or start of
- # the line) if that is before the opening paren. The minor downside is
- # that we may very occasionally break open a block unnecessarily.
- if ( $tokens_to_go[$i_start] eq ')' ) {
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
- my $lev = $levels_to_go[$i_start];
- if ( $lev > $level ) { return 0 }
+ # note similarity with sub 'is_excluded_weld'
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $line_up_parentheses_exclusion_rules{$token};
+ return 0 unless ( defined($rflags) );
+ my ( $flag1, $flag2 ) = @{$rflags};
+
+ # There are two flags:
+ # flag1 excludes based on the preceding nonblank word
+ # flag2 excludes based on the contents of the container
+ return 0 unless ( defined($flag1) );
+ return 1 if $flag1 eq '*';
+
+ # Find the previous token
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank($KK);
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ # keyword?
+ $is_k = $type_p eq 'k';
+
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
+ }
+
+ # Check for exclusion based on flag1 and the previous token:
+ my $match;
+ if ( $flag1 eq 'k' ) { $match = $is_k }
+ elsif ( $flag1 eq 'K' ) { $match = !$is_k }
+ elsif ( $flag1 eq 'f' ) { $match = $is_f }
+ elsif ( $flag1 eq 'F' ) { $match = !$is_f }
+ elsif ( $flag1 eq 'w' ) { $match = $is_w }
+ elsif ( $flag1 eq 'W' ) { $match = !$is_w }
+ return $match if ($match);
+
+ # Check for exclusion based on flag2 and the container contents
+ # Current options to filter on contents:
+ # 0 or blank: ignore container contents
+ # 1 exclude non-lists or lists with sublists
+ # 2 same as 1 but also exclude lists with code blocks
+
+ # Note:
+ # Containers with multiline-qw containers are automatically
+ # excluded so do not need to be checked.
+ if ($flag2) {
+
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
+ my $has_list = $self->[_rhas_list_]->{$seqno};
+ my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
+ my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
+ if ( !$is_list
+ || $has_list
+ || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
+ {
+ $match = 1;
}
}
+ return $match;
+}
- elsif ( $last_last_nonblank_token_to_go eq ')' ) {
+sub set_excluded_lp_containers {
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
+ my ($self) = @_;
+ return unless ($rOpts_line_up_parentheses);
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # Patch to avoid breaking short blocks defined with extended_syntax:
- # Strip off any trailing () which was added in the parser to mark
- # the opening keyword. For example, in the following
- # create( TypeFoo $e) {$bubba}
- # the blocktype would be marked as create()
- my $stripped_block_type = $block_type;
- $stripped_block_type =~ s/\(\)$//;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
- return 0;
- }
- }
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+ my $KK = $K_opening_container->{$seqno};
+ next unless defined($KK);
- # patch for SWITCH/CASE to retain one-line case/when blocks
- elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+ # code blocks are always excluded by the -lp coding so we can skip them
+ next if ( $rLL->[$KK]->[_BLOCK_TYPE_] );
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
+ # see if a user exclusion rule turns off -lp for this container
+ if ( $self->is_excluded_lp($KK) ) {
+ $ris_excluded_lp_container->{$seqno} = 1;
}
}
+ return;
+}
- else {
- return 1;
- }
+######################################
+# CODE SECTION 6: Process line-by-line
+######################################
- my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
+sub process_all_lines {
- # see if length is too long to even start
- if ( $pos > maximum_line_length($i_start) ) {
- return 1;
- }
+ # Main loop over all lines of a file.
+ # Lines are processed according to type.
- foreach my $i ( $j + 1 .. $jmax ) {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ my $sink_object = $self->[_sink_object_];
+ my $fh_tee = $self->[_fh_tee_];
+ my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $logger_object = $self->[_logger_object_];
+ my $vertical_aligner_object = $self->[_vertical_aligner_object_];
+ my $save_logfile = $self->[_save_logfile_];
- # old whitespace could be arbitrarily large, so don't use it
- if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
- else { $pos += rtoken_length($i) }
+ # Note for RT#118553, leave only one newline at the end of a file.
+ # Example code to do this is in comments below:
+ # my $Opt_trim_ending_blank_lines = 0;
+ # if ($Opt_trim_ending_blank_lines) {
+ # while ( my $line_of_tokens = pop @{$rlines} ) {
+ # my $line_type = $line_of_tokens->{_line_type};
+ # if ( $line_type eq 'CODE' ) {
+ # my $CODE_type = $line_of_tokens->{_code_type};
+ # next if ( $CODE_type eq 'BL' );
+ # }
+ # push @{$rlines}, $line_of_tokens;
+ # last;
+ # }
+ # }
- # ignore some small blocks
- my $type_sequence = $rtoken_array->[$i]->[_TYPE_SEQUENCE_];
- my $nobreak = $rshort_nested->{$type_sequence};
+ # But while this would be a trivial update, it would have very undesirable
+ # side effects when perltidy is run from within an editor on a small snippet.
+ # So this is best done with a separate filter, such
+ # as 'delete_ending_blank_lines.pl' in the examples folder.
- # Return false result if we exceed the maximum line length,
- if ( $pos > maximum_line_length($i_start) ) {
- return 0;
- }
+ # Flag to prevent blank lines when POD occurs in a format skipping sect.
+ my $in_format_skipping_section;
- # keep going for non-containers
- elsif ( !$type_sequence ) {
+ # set locations for blanks around long runs of keywords
+ my $rwant_blank_line_after = $self->keyword_group_scan();
- }
+ my $line_type = "";
+ my $i_last_POD_END = -10;
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $i++;
- # return if we encounter another opening brace before finding the
- # closing brace.
- elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
- && $rtoken_array->[$i]->[_TYPE_] eq '{'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_]
- && !$nobreak )
+ # insert blank lines requested for keyword sequences
+ if ( $i > 0
+ && defined( $rwant_blank_line_after->{ $i - 1 } )
+ && $rwant_blank_line_after->{ $i - 1 } == 1 )
{
- return 0;
+ $self->want_blank_line();
}
- # if we find our closing brace..
- elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
- && $rtoken_array->[$i]->[_TYPE_] eq '}'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_]
- && !$nobreak )
- {
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ my $input_line = $line_of_tokens->{_line_text};
- # be sure any trailing comment also fits on the line
- my $i_nonblank =
- ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
+ # _line_type codes are:
+ # SYSTEM - system-specific code before hash-bang line
+ # CODE - line of perl code (including comments)
+ # POD_START - line starting pod, such as '=head'
+ # POD - pod documentation text
+ # POD_END - last line of pod section, '=cut'
+ # HERE - text of here-document
+ # HERE_END - last line of here-doc (target word)
+ # FORMAT - format section
+ # FORMAT_END - last line of format section, '.'
+ # DATA_START - __DATA__ line
+ # DATA - unidentified text following __DATA__
+ # END_START - __END__ line
+ # END - unidentified text following __END__
+ # ERROR - we are in big trouble, probably not a perl script
- # Patch for one-line sort/map/grep/eval blocks with side comments:
- # We will ignore the side comment length for sort/map/grep/eval
- # because this can lead to statements which change every time
- # perltidy is run. Here is an example from Denis Moskowitz which
- # oscillates between these two states without this patch:
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
+ $i_last_POD_END = $i;
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
+ $self->want_blank_line();
+ }
+ }
-## --------
-## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-##
-## grep {
-## $_->foo ne 'bar'
-## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-## --------
+ # handle line of code..
+ if ( $line_type eq 'CODE' ) {
- # When the first line is input it gets broken apart by the main
- # line break logic in sub print_line_of_tokens.
- # When the second line is input it gets recombined by
- # print_line_of_tokens and passed to the output routines. The
- # output routines (set_continuation_breaks) do not break it apart
- # because the bond strengths are set to the highest possible value
- # for grep/map/eval/sort blocks, so the first version gets output.
- # It would be possible to fix this by changing bond strengths,
- # but they are high to prevent errors in older versions of perl.
+ my $CODE_type = $line_of_tokens->{_code_type};
+ $in_format_skipping_section = $CODE_type eq 'FS';
- if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
- && !$is_sort_map_grep{$block_type} )
- {
+ # Handle blank lines
+ if ( $CODE_type eq 'BL' ) {
- $pos += rtoken_length($i_nonblank);
+ # Keep this blank? Start with the flag -kbl=n, where
+ # n=0 ignore all old blank lines
+ # n=1 stable: keep old blanks, but limited by -mbl=n
+ # n=2 keep all old blank lines, regardless of -mbl=n
+ # If n=0 we delete all old blank lines and let blank line
+ # rules generate any needed blank lines.
+ my $kgb_keep = $rOpts_keep_old_blank_lines;
+
+ # Then delete lines requested by the keyword-group logic if
+ # allowed
+ if ( $kgb_keep == 1
+ && defined( $rwant_blank_line_after->{$i} )
+ && $rwant_blank_line_after->{$i} == 2 )
+ {
+ $kgb_keep = 0;
+ }
- if ( $i_nonblank > $i + 1 ) {
+ # But always keep a blank line following an =cut
+ if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
+ $kgb_keep = 1;
+ }
- # source whitespace could be anything, assume
- # at least one space before the hash on output
- if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
- $pos += 1;
- }
- else { $pos += rtoken_length( $i + 1 ) }
+ if ($kgb_keep) {
+ $self->flush($CODE_type);
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
+ $self->[_last_line_leading_type_] = 'b';
}
+ next;
+ }
+ else {
- if ( $pos >= maximum_line_length($i_start) ) {
- return 0;
+ # Let logger see all non-blank lines of code. This is a slow operation
+ # so we avoid it if it is not going to be saved.
+ if ( $save_logfile && $logger_object ) {
+ $logger_object->black_box( $line_of_tokens,
+ $vertical_aligner_object->get_output_line_number );
}
}
- # ok, it's a one-line block
- create_one_line_block( $i_start, 20 );
- return 0;
- }
+ # Handle Format Skipping (FS) and Verbatim (VB) Lines
+ if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
+ $self->write_unindented_line("$input_line");
+ $file_writer_object->reset_consecutive_blank_lines();
+ next;
+ }
- # just keep going for other characters
- else {
+ # Handle all other lines of code
+ $self->process_line_of_CODE($line_of_tokens);
}
- }
-
- # Allow certain types of new one-line blocks to form by joining
- # input lines. These can be safely done, but for other block types,
- # we keep old one-line blocks but do not form new ones. It is not
- # always a good idea to make as many one-line blocks as possible,
- # so other types are not done. The user can always use -mangle.
- if ( $want_one_line_block{$block_type} ) {
- create_one_line_block( $i_start, 1 );
- }
- return 0;
-}
-
-sub unstore_token_to_go {
-
- # remove most recent token from output stream
- my $self = shift;
- if ( $max_index_to_go > 0 ) {
- $max_index_to_go--;
- }
- else {
- $max_index_to_go = UNDEFINED_INDEX;
- }
- return;
-}
-
-sub want_blank_line {
- my $self = shift;
- $self->flush();
- $file_writer_object->want_blank_line();
- return;
-}
-
-sub write_unindented_line {
- my ( $self, $line ) = @_;
- $self->flush();
- $file_writer_object->write_line($line);
- return;
-}
-
-sub undo_ci {
-
- # Undo continuation indentation in certain sequences
- # For example, we can undo continuation indentation in sort/map/grep chains
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- # To align the map/sort/grep keywords like this:
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- my ( $self, $ri_first, $ri_last ) = @_;
- my ( $line_1, $line_2, $lev_last );
- my $this_line_is_semicolon_terminated;
- my $max_line = @{$ri_first} - 1;
- # looking at each line of this batch..
- # We are looking at leading tokens and looking for a sequence
- # all at the same level and higher level than enclosing lines.
- foreach my $line ( 0 .. $max_line ) {
-
- my $ibeg = $ri_first->[$line];
- my $lev = $levels_to_go[$ibeg];
- if ( $line > 0 ) {
-
- # if we have started a chain..
- if ($line_1) {
-
- # see if it continues..
- if ( $lev == $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
-
- # chain continues...
- # check for chain ending at end of a statement
- if ( $line == $max_line ) {
-
- # see of this line ends a statement
- my $iend = $ri_last->[$line];
- $this_line_is_semicolon_terminated =
- $types_to_go[$iend] eq ';'
-
- # with possible side comment
- || ( $types_to_go[$iend] eq '#'
- && $iend - $ibeg >= 2
- && $types_to_go[ $iend - 2 ] eq ';'
- && $types_to_go[ $iend - 1 ] eq 'b' );
- }
- $line_2 = $line if ($this_line_is_semicolon_terminated);
- }
- else {
-
- # kill chain
- $line_1 = undef;
- }
- }
- elsif ( $lev < $lev_last ) {
-
- # chain ends with previous line
- $line_2 = $line - 1;
- }
- elsif ( $lev > $lev_last ) {
+ # handle line of non-code..
+ else {
- # kill chain
- $line_1 = undef;
- }
+ # set special flags
+ my $skip_line = 0;
+ if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
- # undo the continuation indentation if a chain ends
- if ( defined($line_2) && defined($line_1) ) {
- my $continuation_line_count = $line_2 - $line_1 + 1;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
- (0) x ($continuation_line_count)
- if ( $continuation_line_count >= 0 );
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
- = @reduced_spaces_to_go[ @{$ri_first}
- [ $line_1 .. $line_2 ] ];
- $line_1 = undef;
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
+ if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
+ && !$in_format_skipping_section
+ && $line_type eq 'POD_START'
+ && !$self->[_saw_END_or_DATA_] )
+ {
+ $self->want_blank_line();
}
}
- # not in a chain yet..
- else {
+ # leave the blank counters in a predictable state
+ # after __END__ or __DATA__
+ elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ $self->[_saw_END_or_DATA_] = 1;
+ }
- # look for start of a new sort/map/grep chain
- if ( $lev > $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
- $line_1 = $line;
- }
- }
+ # write unindented non-code line
+ if ( !$skip_line ) {
+ $self->write_unindented_line($input_line);
}
}
- $lev_last = $lev;
}
return;
-}
-sub undo_lp_ci {
+} ## end sub process_all_lines
- # If there is a single, long parameter within parens, like this:
- #
- # $self->command( "/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?" );
- #
- # we can remove the continuation indentation of the 2nd and higher lines
- # to achieve this effect, which is more pleasing:
- #
- # $self->command("/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?");
+sub keyword_group_scan {
+ my $self = shift;
- my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
- my $max_line = @{$ri_first} - 1;
+ # Called once per file to process the --keyword-group-blanks-* parameters.
- # must be multiple lines
- return unless $max_line > $line_open;
+ # Manipulate blank lines around keyword groups (kgb* flags)
+ # Scan all lines looking for runs of consecutive lines beginning with
+ # selected keywords. Example keywords are 'my', 'our', 'local', ... but
+ # they may be anything. We will set flags requesting that blanks be
+ # inserted around and within them according to input parameters. Note
+ # that we are scanning the lines as they came in in the input stream, so
+ # they are not necessarily well formatted.
- my $lev_start = $levels_to_go[$i_start];
- my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
+ # The output of this sub is a return hash ref whose keys are the indexes of
+ # lines after which we desire a blank line. For line index i:
+ # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
+ # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
+ my $rhash_of_desires = {};
- # see if all additional lines in this container have continuation
- # indentation
- my $n;
- my $line_1 = 1 + $line_open;
- for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- if ( $ibeg eq $closing_index ) { $n--; last }
- return if ( $lev_start != $levels_to_go[$ibeg] );
- return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
- last if ( $closing_index <= $iend );
+ # Nothing to do if no blanks can be output. This test added to fix
+ # case b760.
+ if ( !$rOpts_maximum_consecutive_blank_lines ) {
+ return $rhash_of_desires;
}
- # we can reduce the indentation of all continuation lines
- my $continuation_line_count = $n - $line_open;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- (0) x ($continuation_line_count);
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
- return;
-}
+ my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
+ my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
+ my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
+ my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
+ my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
-sub pad_token {
+ # A range of sizes can be input with decimal notation like 'min.max' with
+ # any number of dots between the two numbers. Examples:
+ # string => min max matches
+ # 1.1 1 1 exactly 1
+ # 1.3 1 3 1,2, or 3
+ # 1..3 1 3 1,2, or 3
+ # 5 5 - 5 or more
+ # 6. 6 - 6 or more
+ # .2 - 2 up to 2
+ # 1.0 1 0 nothing
+ my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
+ if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
+ || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
+ {
+ Warn(<<EOM);
+Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
+ignoring all -kgb flags
+EOM
- # insert $pad_spaces before token number $ipad
- my ( $self, $ipad, $pad_spaces ) = @_;
- my $rLL = $self->{rLL};
- if ( $pad_spaces > 0 ) {
- $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
- }
- elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
- $tokens_to_go[$ipad] = "";
+ # Turn this option off so that this message does not keep repeating
+ # during iterations and other files.
+ $rOpts->{'keyword-group-blanks-size'} = "";
+ return $rhash_of_desires;
}
- else {
+ $Opt_size_min = 1 unless ($Opt_size_min);
- # shouldn't happen
- return;
+ if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
+ return $rhash_of_desires;
}
- # Keep token arrays in sync
- $self->sync_token_K($ipad);
+ # codes for $Opt_blanks_before and $Opt_blanks_after:
+ # 0 = never (delete if exist)
+ # 1 = stable (keep unchanged)
+ # 2 = always (insert if missing)
- $token_lengths_to_go[$ipad] += $pad_spaces;
- foreach my $i ( $ipad .. $max_index_to_go ) {
- $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
- }
- return;
-}
+ return $rhash_of_desires
+ unless $Opt_size_min > 0
+ && ( $Opt_blanks_before != 1
+ || $Opt_blanks_after != 1
+ || $Opt_blanks_inside
+ || $Opt_blanks_delete );
-{
- my %is_math_op;
+ my $Opt_pattern = $keyword_group_list_pattern;
+ my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
+ my $Opt_repeat_count =
+ $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
- BEGIN {
+ my $rlines = $self->[_rlines_];
+ my $rLL = $self->[_rLL_];
+ my $K_closing_container = $self->[_K_closing_container_];
- my @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
- }
+ # variables for the current group and subgroups:
+ my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
+ @subgroup );
- sub set_logical_padding {
+ # Definitions:
+ # ($ibeg, $iend) = starting and ending line indexes of this entire group
+ # $count = total number of keywords seen in this entire group
+ # $level_beg = indententation level of this group
+ # @group = [ $i, $token, $count ] =list of all keywords & blanks
+ # @subgroup = $j, index of group where token changes
+ # @iblanks = line indexes of blank lines in input stream in this group
+ # where i=starting line index
+ # token (the keyword)
+ # count = number of this token in this subgroup
+ # j = index in group where token changes
+ #
+ # These vars will contain values for the most recently seen line:
+ my ( $line_type, $CODE_type, $K_first, $K_last );
- # Look at a batch of lines and see if extra padding can improve the
- # alignment when there are certain leading operators. Here is an
- # example, in which some extra space is introduced before
- # '( $year' to make it line up with the subsequent lines:
- #
- # if ( ( $Year < 1601 )
- # || ( $Year > 2899 )
- # || ( $EndYear < 1601 )
- # || ( $EndYear > 2899 ) )
- # {
- # &Error_OutOfRange;
- # }
- #
- my ( $self, $ri_first, $ri_last ) = @_;
- my $max_line = @{$ri_first} - 1;
+ my $number_of_groups_seen = 0;
- # FIXME: move these declarations below
- my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
- $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
+ ####################
+ # helper subroutines
+ ####################
- # looking at each line of this batch..
- foreach my $line ( 0 .. $max_line - 1 ) {
+ my $insert_blank_after = sub {
+ my ($i) = @_;
+ $rhash_of_desires->{$i} = 1;
+ my $ip = $i + 1;
+ if ( defined( $rhash_of_desires->{$ip} )
+ && $rhash_of_desires->{$ip} == 2 )
+ {
+ $rhash_of_desires->{$ip} = 0;
+ }
+ return;
+ };
- # see if the next line begins with a logical operator
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
- $ibeg_next = $ri_first->[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $type_next = $types_to_go[$ibeg_next];
+ my $split_into_sub_groups = sub {
- $has_leading_op_next = ( $tok_next =~ /^\w/ )
- ? $is_chain_operator{$tok_next} # + - * / : ? && ||
- : $is_chain_operator{$type_next}; # and, or
+ # place blanks around long sub-groups of keywords
+ # ...if requested
+ return unless ($Opt_blanks_inside);
- next unless ($has_leading_op_next);
+ # loop over sub-groups, index k
+ push @subgroup, scalar @group;
+ my $kbeg = 1;
+ my $kend = @subgroup - 1;
+ for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
- # next line must not be at lesser depth
- next
- if ( $nesting_depth_to_go[$ibeg] >
- $nesting_depth_to_go[$ibeg_next] );
+ # index j runs through all keywords found
+ my $j_b = $subgroup[ $k - 1 ];
+ my $j_e = $subgroup[$k] - 1;
- # identify the token in this line to be padded on the left
- $ipad = undef;
+ # index i is the actual line number of a keyword
+ my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
+ my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
+ my $num = $count_e - $count_b + 1;
- # handle lines at same depth...
- if ( $nesting_depth_to_go[$ibeg] ==
- $nesting_depth_to_go[$ibeg_next] )
- {
+ # This subgroup runs from line $ib to line $ie-1, but may contain
+ # blank lines
+ if ( $num >= $Opt_size_min ) {
- # if this is not first line of the batch ...
- if ( $line > 0 ) {
+ # if there are blank lines, we require that at least $num lines
+ # be non-blank up to the boundary with the next subgroup.
+ my $nog_b = my $nog_e = 1;
+ if ( @iblanks && !$Opt_blanks_delete ) {
+ my $j_bb = $j_b + $num - 1;
+ my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
+ $nog_b = $count_bb - $count_b + 1 == $num;
- # and we have leading operator..
- next if $has_leading_op;
+ my $j_ee = $j_e - ( $num - 1 );
+ my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
+ $nog_e = $count_e - $count_ee + 1 == $num;
+ }
+ if ( $nog_b && $k > $kbeg ) {
+ $insert_blank_after->( $i_b - 1 );
+ }
+ if ( $nog_e && $k < $kend ) {
+ my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
+ $insert_blank_after->( $i_ep - 1 );
+ }
+ }
+ }
+ };
- # Introduce padding if..
- # 1. the previous line is at lesser depth, or
- # 2. the previous line ends in an assignment
- # 3. the previous line ends in a 'return'
- # 4. the previous line ends in a comma
- # Example 1: previous line at lesser depth
- # if ( ( $Year < 1601 ) # <- we are here but
- # || ( $Year > 2899 ) # list has not yet
- # || ( $EndYear < 1601 ) # collapsed vertically
- # || ( $EndYear > 2899 ) )
- # {
- #
- # Example 2: previous line ending in assignment:
- # $leapyear =
- # $year % 4 ? 0 # <- We are here
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- #
- # Example 3: previous line ending in comma:
- # push @expr,
- # /test/ ? undef
- # : eval($_) ? 1
- # : eval($_) ? 1
- # : 0;
+ my $delete_if_blank = sub {
+ my ($i) = @_;
- # be sure levels agree (do not indent after an indented 'if')
- next
- if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+ # delete line $i if it is blank
+ return unless ( $i >= 0 && $i < @{$rlines} );
+ my $line_type = $rlines->[$i]->{_line_type};
+ return if ( $line_type ne 'CODE' );
+ my $code_type = $rlines->[$i]->{_code_type};
+ if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
+ return;
+ };
- # allow padding on first line after a comma but only if:
- # (1) this is line 2 and
- # (2) there are at more than three lines and
- # (3) lines 3 and 4 have the same leading operator
- # These rules try to prevent padding within a long
- # comma-separated list.
- my $ok_comma;
- if ( $types_to_go[$iendm] eq ','
- && $line == 1
- && $max_line > 2 )
- {
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- my $tok_next_next = $tokens_to_go[$ibeg_next_next];
- $ok_comma = $tok_next_next eq $tok_next;
- }
+ my $delete_inner_blank_lines = sub {
- next
- unless (
- $is_assignment{ $types_to_go[$iendm] }
- || $ok_comma
- || ( $nesting_depth_to_go[$ibegm] <
- $nesting_depth_to_go[$ibeg] )
- || ( $types_to_go[$iendm] eq 'k'
- && $tokens_to_go[$iendm] eq 'return' )
- );
+ # always remove unwanted trailing blank lines from our list
+ return unless (@iblanks);
+ while ( my $ibl = pop(@iblanks) ) {
+ if ( $ibl < $iend ) { push @iblanks, $ibl; last }
+ $iend = $ibl;
+ }
- # we will add padding before the first token
- $ipad = $ibeg;
- }
+ # now mark mark interior blank lines for deletion if requested
+ return unless ($Opt_blanks_delete);
- # for first line of the batch..
- else {
+ while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
- # WARNING: Never indent if first line is starting in a
- # continued quote, which would change the quote.
- next if $starting_in_quote;
+ };
- # if this is text after closing '}'
- # then look for an interior token to pad
- if ( $types_to_go[$ibeg] eq '}' ) {
+ my $end_group = sub {
- }
+ # end a group of keywords
+ my ($bad_ending) = @_;
+ if ( defined($ibeg) && $ibeg >= 0 ) {
- # otherwise, we might pad if it looks really good
- else {
+ # then handle sufficiently large groups
+ if ( $count >= $Opt_size_min ) {
- # we might pad token $ibeg, so be sure that it
- # is at the same depth as the next line.
- next
- if ( $nesting_depth_to_go[$ibeg] !=
- $nesting_depth_to_go[$ibeg_next] );
+ $number_of_groups_seen++;
- # We can pad on line 1 of a statement if at least 3
- # lines will be aligned. Otherwise, it
- # can look very confusing.
+ # do any blank deletions regardless of the count
+ $delete_inner_blank_lines->();
- # We have to be careful not to pad if there are too few
- # lines. The current rule is:
- # (1) in general we require at least 3 consecutive lines
- # with the same leading chain operator token,
- # (2) but an exception is that we only require two lines
- # with leading colons if there are no more lines. For example,
- # the first $i in the following snippet would get padding
- # by the second rule:
- #
- # $i == 1 ? ( "First", "Color" )
- # : $i == 2 ? ( "Then", "Rarity" )
- # : ( "Then", "Name" );
+ if ( $ibeg > 0 ) {
+ my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
- if ( $max_line > 1 ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $tokens_differ;
+ # patch for hash bang line which is not currently marked as
+ # a comment; mark it as a comment
+ if ( $ibeg == 1 && !$code_type ) {
+ my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
+ $code_type = 'BC'
+ if ( $line_text && $line_text =~ /^#/ );
+ }
- # never indent line 1 of a '.' series because
- # previous line is most likely at same level.
- # TODO: we should also look at the leasing_spaces
- # of the last output line and skip if it is same
- # as this line.
- next if ( $leading_token eq '.' );
+ # Do not insert a blank after a comment
+ # (this could be subject to a flag in the future)
+ if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
+ if ( $Opt_blanks_before == INSERT ) {
+ $insert_blank_after->( $ibeg - 1 );
- my $count = 1;
- foreach my $l ( 2 .. 3 ) {
- last if ( $line + $l > $max_line );
- my $ibeg_next_next = $ri_first->[ $line + $l ];
- if ( $tokens_to_go[$ibeg_next_next] ne
- $leading_token )
- {
- $tokens_differ = 1;
- last;
- }
- $count++;
- }
- next if ($tokens_differ);
- next if ( $count < 3 && $leading_token ne ':' );
- $ipad = $ibeg;
}
- else {
- next;
+ elsif ( $Opt_blanks_before == DELETE ) {
+ $delete_if_blank->( $ibeg - 1 );
}
}
}
- }
- # find interior token to pad if necessary
- if ( !defined($ipad) ) {
-
- for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
+ # We will only put blanks before code lines. We could loosen
+ # this rule a little, but we have to be very careful because
+ # for example we certainly don't want to drop a blank line
+ # after a line like this:
+ # my $var = <<EOM;
+ if ( $line_type eq 'CODE' && defined($K_first) ) {
- # find any unclosed container
- next
- unless ( $type_sequence_to_go[$i]
- && $self->mate_index_to_go($i) > $iend );
+ # - Do not put a blank before a line of different level
+ # - Do not put a blank line if we ended the search badly
+ # - Do not put a blank at the end of the file
+ # - Do not put a blank line before a hanging side comment
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- # find next nonblank token to pad
- $ipad = $inext_to_go[$i];
- last if ( $ipad > $iend );
+ if ( $level == $level_beg
+ && $ci_level == 0
+ && !$bad_ending
+ && $iend < @{$rlines}
+ && $CODE_type ne 'HSC' )
+ {
+ if ( $Opt_blanks_after == INSERT ) {
+ $insert_blank_after->($iend);
+ }
+ elsif ( $Opt_blanks_after == DELETE ) {
+ $delete_if_blank->( $iend + 1 );
+ }
+ }
}
- last unless $ipad;
}
+ $split_into_sub_groups->();
+ }
- # We cannot pad the first leading token of a file because
- # it could cause a bug in which the starting indentation
- # level is guessed incorrectly each time the code is run
- # though perltidy, thus causing the code to march off to
- # the right. For example, the following snippet would have
- # this problem:
-
-## ov_method mycan( $package, '(""' ), $package
-## or ov_method mycan( $package, '(0+' ), $package
-## or ov_method mycan( $package, '(bool' ), $package
-## or ov_method mycan( $package, '(nomethod' ), $package;
-
- # If this snippet is within a block this won't happen
- # unless the user just processes the snippet alone within
- # an editor. In that case either the user will see and
- # fix the problem or it will be corrected next time the
- # entire file is processed with perltidy.
- next if ( $ipad == 0 && $peak_batch_size <= 1 );
-
-## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
-## IT DID MORE HARM THAN GOOD
-## ceil(
-## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
-## / $upem
-## ),
-##? # do not put leading padding for just 2 lines of math
-##? if ( $ipad == $ibeg
-##? && $line > 0
-##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
-##? && $is_math_op{$type_next}
-##? && $line + 2 <= $max_line )
-##? {
-##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
-##? my $type_next_next = $types_to_go[$ibeg_next_next];
-##? next if !$is_math_op{$type_next_next};
-##? }
+ # reset for another group
+ $ibeg = -1;
+ $iend = undef;
+ $level_beg = -1;
+ $K_closing = undef;
+ @group = ();
+ @subgroup = ();
+ @iblanks = ();
+ };
- # next line must not be at greater depth
- my $iend_next = $ri_last->[ $line + 1 ];
- next
- if ( $nesting_depth_to_go[ $iend_next + 1 ] >
- $nesting_depth_to_go[$ipad] );
+ my $find_container_end = sub {
- # lines must be somewhat similar to be padded..
- my $inext_next = $inext_to_go[$ibeg_next];
- my $type = $types_to_go[$ipad];
- my $type_next = $types_to_go[ $ipad + 1 ];
+ # If the keyword lines ends with an open token, find the closing token
+ # '$K_closing' so that we can easily skip past the contents of the
+ # container.
+ return if ( $K_last <= $K_first );
+ my $KK = $K_last;
+ my $type_last = $rLL->[$KK]->[_TYPE_];
+ my $tok_last = $rLL->[$KK]->[_TOKEN_];
+ if ( $type_last eq '#' ) {
+ $KK = $self->K_previous_nonblank($KK);
+ $tok_last = $rLL->[$KK]->[_TOKEN_];
+ }
+ if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
- # see if there are multiple continuation lines
- my $logical_continuation_lines = 1;
- if ( $line + 2 <= $max_line ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
- && $nesting_depth_to_go[$ibeg_next] eq
- $nesting_depth_to_go[$ibeg_next_next] )
- {
- $logical_continuation_lines++;
- }
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $lev = $rLL->[$KK]->[_LEVEL_];
+ if ( $lev == $level_beg ) {
+ $K_closing = $K_closing_container->{$type_sequence};
}
+ }
+ };
- # see if leading types match
- my $types_match = $types_to_go[$inext_next] eq $type;
- my $matches_without_bang;
+ my $add_to_group = sub {
+ my ( $i, $token, $level ) = @_;
- # if first line has leading ! then compare the following token
- if ( !$types_match && $type eq '!' ) {
- $types_match = $matches_without_bang =
- $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
- }
+ # End the previous group if we have reached the maximum
+ # group size
+ if ( $Opt_size_max && @group >= $Opt_size_max ) {
+ $end_group->();
+ }
- if (
+ if ( @group == 0 ) {
+ $ibeg = $i;
+ $level_beg = $level;
+ $count = 0;
+ }
- # either we have multiple continuation lines to follow
- # and we are not padding the first token
- ( $logical_continuation_lines > 1 && $ipad > 0 )
+ $count++;
+ $iend = $i;
- # or..
- || (
+ # New sub-group?
+ if ( !@group || $token ne $group[-1]->[1] ) {
+ push @subgroup, scalar(@group);
+ }
+ push @group, [ $i, $token, $count ];
- # types must match
- $types_match
+ # remember if this line ends in an open container
+ $find_container_end->();
- # and keywords must match if keyword
- && !(
- $type eq 'k'
- && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
- )
- )
- )
- {
+ return;
+ };
- #----------------------begin special checks--------------
- #
- # SPECIAL CHECK 1:
- # A check is needed before we can make the pad.
- # If we are in a list with some long items, we want each
- # item to stand out. So in the following example, the
- # first line beginning with '$casefold->' would look good
- # padded to align with the next line, but then it
- # would be indented more than the last line, so we
- # won't do it.
- #
- # ok(
- # $casefold->{code} eq '0041'
- # && $casefold->{status} eq 'C'
- # && $casefold->{mapping} eq '0061',
- # 'casefold 0x41'
- # );
- #
- # Note:
- # It would be faster, and almost as good, to use a comma
- # count, and not pad if comma_count > 1 and the previous
- # line did not end with a comma.
- #
- my $ok_to_pad = 1;
+ ###################################
+ # loop over all lines of the source
+ ###################################
+ $end_group->();
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
- my $ibg = $ri_first->[ $line + 1 ];
- my $depth = $nesting_depth_to_go[ $ibg + 1 ];
+ $i++;
+ last
+ if ( $Opt_repeat_count > 0
+ && $number_of_groups_seen >= $Opt_repeat_count );
- # just use simplified formula for leading spaces to avoid
- # needless sub calls
- my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
+ $CODE_type = "";
+ $K_first = undef;
+ $K_last = undef;
+ $line_type = $line_of_tokens->{_line_type};
- # look at each line beyond the next ..
- my $l = $line + 1;
- foreach my $ltest ( $line + 2 .. $max_line ) {
- $l = $ltest;
- my $ibg = $ri_first->[$l];
+ # always end a group at non-CODE
+ if ( $line_type ne 'CODE' ) { $end_group->(); next }
- # quit looking at the end of this container
- last
- if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
- || ( $nesting_depth_to_go[$ibg] < $depth );
+ $CODE_type = $line_of_tokens->{_code_type};
- # cannot do the pad if a later line would be
- # outdented more
- if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
- $ok_to_pad = 0;
- last;
- }
- }
+ # end any group at a format skipping line
+ if ( $CODE_type && $CODE_type eq 'FS' ) {
+ $end_group->();
+ next;
+ }
- # don't pad if we end in a broken list
- if ( $l == $max_line ) {
- my $i2 = $ri_last->[$l];
- if ( $types_to_go[$i2] eq '#' ) {
- my $i1 = $ri_first->[$l];
- next if $self->terminal_type_i( $i1, $i2 ) eq ',';
- }
- }
+ # continue in a verbatim (VB) type; it may be quoted text
+ if ( $CODE_type eq 'VB' ) {
+ if ( $ibeg >= 0 ) { $iend = $i; }
+ next;
+ }
- # SPECIAL CHECK 2:
- # a minus may introduce a quoted variable, and we will
- # add the pad only if this line begins with a bare word,
- # such as for the word 'Button' here:
- # [
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- #
- # On the other hand, if 'Button' is quoted, it looks best
- # not to pad:
- # [
- # 'Button' => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- if ( $types_to_go[$ibeg_next] eq 'm' ) {
- $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
- }
+ # and continue in blank (BL) types
+ if ( $CODE_type eq 'BL' ) {
+ if ( $ibeg >= 0 ) {
+ $iend = $i;
+ push @{iblanks}, $i;
- next unless $ok_to_pad;
+ # propagate current subgroup token
+ my $tok = $group[-1]->[1];
+ push @group, [ $i, $tok, $count ];
+ }
+ next;
+ }
- #----------------------end special check---------------
+ # examine the first token of this line
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $K_first, $K_last ) = @{$rK_range};
+ if ( !defined($K_first) ) {
- my $length_1 = total_line_length( $ibeg, $ipad - 1 );
- my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
- $pad_spaces = $length_2 - $length_1;
+ # Somewhat unexpected blank line..
+ # $rK_range is normally defined for line type CODE, but this can
+ # happen for example if the input line was a single semicolon which
+ # is being deleted. In that case there was code in the input
+ # file but it is not being retained. So we can silently return.
+ return $rhash_of_desires;
+ }
- # If the first line has a leading ! and the second does
- # not, then remove one space to try to align the next
- # leading characters, which are often the same. For example:
- # if ( !$ts
- # || $ts == $self->Holder
- # || $self->Holder->Type eq "Arena" )
- #
- # This usually helps readability, but if there are subsequent
- # ! operators things will still get messed up. For example:
- #
- # if ( !exists $Net::DNS::typesbyname{$qtype}
- # && exists $Net::DNS::classesbyname{$qtype}
- # && !exists $Net::DNS::classesbyname{$qclass}
- # && exists $Net::DNS::typesbyname{$qclass} )
- # We can't fix that.
- if ($matches_without_bang) { $pad_spaces-- }
+ # This is not for keywords in lists ( keyword 'my' can occur in lists,
+ # see case b760)
+ next if ( $self->is_list_by_K($K_first) );
- # make sure this won't change if -lp is used
- my $indentation_1 = $leading_spaces_to_go[$ibeg];
- if ( ref($indentation_1) ) {
- if ( $indentation_1->get_recoverable_spaces() == 0 ) {
- my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
- unless ( $indentation_2->get_recoverable_spaces() == 0 )
- {
- $pad_spaces = 0;
- }
- }
- }
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $type = $rLL->[$K_first]->[_TYPE_];
+ my $token = $rLL->[$K_first]->[_TOKEN_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- # we might be able to handle a pad of -1 by removing a blank
- # token
- if ( $pad_spaces < 0 ) {
+ # see if this is a code type we seek (i.e. comment)
+ if ( $CODE_type
+ && $Opt_comment_pattern
+ && $CODE_type =~ /$Opt_comment_pattern/ )
+ {
- if ( $pad_spaces == -1 ) {
- if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
- {
- $self->pad_token( $ipad - 1, $pad_spaces );
- }
- }
- $pad_spaces = 0;
- }
+ my $tok = $CODE_type;
- # now apply any padding for alignment
- if ( $ipad >= 0 && $pad_spaces ) {
+ # Continuing a group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $add_to_group->( $i, $tok, $level );
+ }
- my $length_t = total_line_length( $ibeg, $iend );
- if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
- {
- $self->pad_token( $ipad, $pad_spaces );
- }
- }
+ # Start new group
+ else {
+
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg > 0 ) { $end_group->(); }
+ $add_to_group->( $i, $tok, $level );
}
+ next;
}
- continue {
- $iendm = $iend;
- $ibegm = $ibeg;
- $has_leading_op = $has_leading_op_next;
- } # end of loop over lines
- return;
- }
-}
-sub correct_lp_indentation {
+ # See if it is a keyword we seek, but never start a group in a
+ # continuation line; the code may be badly formatted.
+ if ( $ci_level == 0
+ && $type eq 'k'
+ && $token =~ /$Opt_pattern/ )
+ {
- # When the -lp option is used, we need to make a last pass through
- # each line to correct the indentation positions in case they differ
- # from the predictions. This is necessary because perltidy uses a
- # predictor/corrector method for aligning with opening parens. The
- # predictor is usually good, but sometimes stumbles. The corrector
- # tries to patch things up once the actual opening paren locations
- # are known.
- my ( $ri_first, $ri_last ) = @_;
- my $do_not_pad = 0;
+ # Continuing a keyword group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $add_to_group->( $i, $token, $level );
+ }
- # Note on flag '$do_not_pad':
- # We want to avoid a situation like this, where the aligner inserts
- # whitespace before the '=' to align it with a previous '=', because
- # otherwise the parens might become mis-aligned in a situation like
- # this, where the '=' has become aligned with the previous line,
- # pushing the opening '(' forward beyond where we want it.
- #
- # $mkFloor::currentRoom = '';
- # $mkFloor::c_entry = $c->Entry(
- # -width => '10',
- # -relief => 'sunken',
- # ...
- # );
- #
- # We leave it to the aligner to decide how to do this.
+ # Start new keyword group
+ else {
- # first remove continuation indentation if appropriate
- my $max_line = @{$ri_first} - 1;
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg > 0 ) { $end_group->(); }
+ $add_to_group->( $i, $token, $level );
+ }
+ next;
+ }
- # looking at each line of this batch..
- my ( $ibeg, $iend );
- foreach my $line ( 0 .. $max_line ) {
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
+ # This is not one of our keywords, but we are in a keyword group
+ # so see if we should continue or quit
+ elsif ( $ibeg >= 0 ) {
- # looking at each token in this output line..
- foreach my $i ( $ibeg .. $iend ) {
+ # - bail out on a large level change; we may have walked into a
+ # data structure or anoymous sub code.
+ if ( $level > $level_beg + 1 || $level < $level_beg ) {
+ $end_group->();
+ next;
+ }
- # How many space characters to place before this token
- # for special alignment. Actual padding is done in the
- # continue block.
+ # - keep going on a continuation line of the same level, since
+ # it is probably a continuation of our previous keyword,
+ # - and keep going past hanging side comments because we never
+ # want to interrupt them.
+ if ( ( ( $level == $level_beg ) && $ci_level > 0 )
+ || $CODE_type eq 'HSC' )
+ {
+ $iend = $i;
+ next;
+ }
- # looking for next unvisited indentation item
- my $indentation = $leading_spaces_to_go[$i];
- if ( !$indentation->get_marked() ) {
- $indentation->set_marked(1);
+ # - continue if if we are within in a container which started with
+ # the line of the previous keyword.
+ if ( defined($K_closing) && $K_first <= $K_closing ) {
- # looking for indentation item for which we are aligning
- # with parens, braces, and brackets
- next unless ( $indentation->get_align_paren() );
+ # continue if entire line is within container
+ if ( $K_last <= $K_closing ) { $iend = $i; next }
- # skip closed container on this line
- if ( $i > $ibeg ) {
- my $im = max( $ibeg, $iprev_to_go[$i] );
- if ( $type_sequence_to_go[$im]
- && $mate_index_to_go[$im] <= $iend )
- {
- next;
+ # continue at ); or }; or ];
+ my $KK = $K_closing + 1;
+ if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
+ if ( $KK < $K_last ) {
+ if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
+ if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
+ $end_group->(1);
+ next;
+ }
}
+ $iend = $i;
+ next;
}
- if ( $line == 1 && $i == $ibeg ) {
- $do_not_pad = 1;
- }
+ $end_group->(1);
+ next;
+ }
- # Ok, let's see what the error is and try to fix it
- my $actual_pos;
- my $predicted_pos = $indentation->get_spaces();
- if ( $i > $ibeg ) {
+ # - end the group if none of the above
+ $end_group->();
+ next;
+ }
- # token is mid-line - use length to previous token
- $actual_pos = total_line_length( $ibeg, $i - 1 );
+ # not in a keyword group; continue
+ else { next }
+ }
- # for mid-line token, we must check to see if all
- # additional lines have continuation indentation,
- # and remove it if so. Otherwise, we do not get
- # good alignment.
- my $closing_index = $indentation->get_closed();
- if ( $closing_index > $iend ) {
- my $ibeg_next = $ri_first->[ $line + 1 ];
- if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
- undo_lp_ci( $line, $i, $closing_index, $ri_first,
- $ri_last );
- }
- }
- }
- elsif ( $line > 0 ) {
+ # end of loop over all lines
+ $end_group->();
+ return $rhash_of_desires;
- # handle case where token starts a new line;
- # use length of previous line
- my $ibegm = $ri_first->[ $line - 1 ];
- my $iendm = $ri_last->[ $line - 1 ];
- $actual_pos = total_line_length( $ibegm, $iendm );
+} ## end sub keyword_group_scan
- # follow -pt style
- ++$actual_pos
- if ( $types_to_go[ $iendm + 1 ] eq 'b' );
- }
- else {
+#######################################
+# CODE SECTION 7: Process lines of code
+#######################################
- # token is first character of first line of batch
- $actual_pos = $predicted_pos;
- }
+{ ## begin closure process_line_of_CODE
- my $move_right = $actual_pos - $predicted_pos;
+ # The routines in this closure receive lines of code and combine them into
+ # 'batches' and send them along. A 'batch' is the unit of code which can be
+ # processed further as a unit. It has the property that it is the largest
+ # amount of code into which which perltidy is free to place one or more
+ # line breaks within it without violating any constraints.
- # done if no error to correct (gnu2.t)
- if ( $move_right == 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+ # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
- # if we have not seen closure for this indentation in
- # this batch, we can only pass on a request to the
- # vertical aligner
- my $closing_index = $indentation->get_closed();
+ # flags needed by the store routine
+ my $line_of_tokens;
+ my $no_internal_newlines;
+ my $side_comment_follows;
+ my $CODE_type;
- if ( $closing_index < 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+ # range of K of tokens for the current line
+ my ( $K_first, $K_last );
- # If necessary, look ahead to see if there is really any
- # leading whitespace dependent on this whitespace, and
- # also find the longest line using this whitespace.
- # Since it is always safe to move left if there are no
- # dependents, we only need to do this if we may have
- # dependent nodes or need to move right.
+ my ( $rLL, $radjusted_levels );
- my $right_margin = 0;
- my $have_child = $indentation->get_have_child();
+ # past stored nonblank tokens
+ my (
+ $last_last_nonblank_token, $last_last_nonblank_type,
+ $last_nonblank_token, $last_nonblank_type,
+ $last_nonblank_block_type, $K_last_nonblank_code,
+ $K_last_last_nonblank_code, $looking_for_else,
+ $is_static_block_comment, $batch_CODE_type,
+ $last_line_had_side_comment,
+ );
- my %saw_indentation;
- my $line_count = 1;
- $saw_indentation{$indentation} = $indentation;
+ # Called once at the start of a new file
+ sub initialize_process_line_of_CODE {
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_last_nonblank_token = ';';
+ $last_last_nonblank_type = ';';
+ $last_nonblank_block_type = "";
+ $K_last_nonblank_code = undef;
+ $K_last_last_nonblank_code = undef;
+ $looking_for_else = 0;
+ $is_static_block_comment = 0;
+ $batch_CODE_type = "";
+ $last_line_had_side_comment = 0;
+ return;
+ }
- if ( $have_child || $move_right > 0 ) {
- $have_child = 0;
- my $max_length = 0;
- if ( $i == $ibeg ) {
- $max_length = total_line_length( $ibeg, $iend );
- }
+ # Batch variables: these describe the current batch of code being formed
+ # and sent down the pipeline. They are initialized in the next
+ # sub.
+ my ( $rbrace_follower, $index_start_one_line_block,
+ $semicolons_before_block_self_destruct,
+ $starting_in_quote, $ending_in_quote, );
+
+ # Called before the start of each new batch
+ sub initialize_batch_variables {
+
+ $max_index_to_go = UNDEFINED_INDEX;
+ @summed_lengths_to_go = @nesting_depth_to_go = (0);
+
+ # The initialization code for the remaining batch arrays is as follows
+ # and can be activated for testing. But profiling shows that it is
+ # time-consuming to re-initialize the batch arrays and is not necessary
+ # because the maximum valid token, $max_index_to_go, is carefully
+ # controlled. This means however that it is not possible to do any
+ # type of filter or map operation directly on these arrays. And it is
+ # not possible to use negative indexes. As a precaution against program
+ # changes which might do this, sub pad_array_to_go adds some undefs at
+ # the end of the current batch of data.
+
+ # So 'long story short': this is a waste of time
+ 0 && do { #<<<
+ @block_type_to_go = ();
+ @type_sequence_to_go = ();
+ @bond_strength_to_go = ();
+ @forced_breakpoint_to_go = ();
+ @token_lengths_to_go = ();
+ @levels_to_go = ();
+ @mate_index_to_go = ();
+ @ci_levels_to_go = ();
+ @nobreak_to_go = ();
+ @old_breakpoint_to_go = ();
+ @tokens_to_go = ();
+ @K_to_go = ();
+ @types_to_go = ();
+ @leading_spaces_to_go = ();
+ @reduced_spaces_to_go = ();
+ @inext_to_go = ();
+ @iprev_to_go = ();
+ @parent_seqno_to_go = ();
+ };
- # look ahead at the rest of the lines of this batch..
- foreach my $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $ri_first->[$line_t];
- my $iend_t = $ri_last->[$line_t];
- last if ( $closing_index <= $ibeg_t );
+ $rbrace_follower = undef;
+ $ending_in_quote = 0;
+ destroy_one_line_block();
+ return;
+ }
- # remember all different indentation objects
- my $indentation_t = $leading_spaces_to_go[$ibeg_t];
- $saw_indentation{$indentation_t} = $indentation_t;
- $line_count++;
+ sub leading_spaces_to_go {
- # remember longest line in the group
- my $length_t = total_line_length( $ibeg_t, $iend_t );
- if ( $length_t > $max_length ) {
- $max_length = $length_t;
- }
- }
- $right_margin = maximum_line_length($ibeg) - $max_length;
- if ( $right_margin < 0 ) { $right_margin = 0 }
- }
+ # return the number of indentation spaces for a token in the output
+ # stream; these were previously stored by 'set_leading_whitespace'.
- my $first_line_comma_count =
- grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_comma_count();
- my $arrow_count = $indentation->get_arrow_count();
+ my ($ii) = @_;
+ return 0 if ( $ii < 0 );
+ my $indentation = $leading_spaces_to_go[$ii];
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
+ }
- # This is a simple approximate test for vertical alignment:
- # if we broke just after an opening paren, brace, bracket,
- # and there are 2 or more commas in the first line,
- # and there are no '=>'s,
- # then we are probably vertically aligned. We could set
- # an exact flag in sub scan_list, but this is good
- # enough.
- my $indentation_count = keys %saw_indentation;
- my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
+ sub create_one_line_block {
+ ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
+ = @_;
+ return;
+ }
- # Make the move if possible ..
- if (
+ sub destroy_one_line_block {
+ $index_start_one_line_block = UNDEFINED_INDEX;
+ $semicolons_before_block_self_destruct = 0;
+ return;
+ }
- # we can always move left
- $move_right < 0
+ # Routine to place the current token into the output stream.
+ # Called once per output token.
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
+ use constant DEBUG_STORE => 0;
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_available_spaces( -$move );
- }
- }
+ sub store_token_to_go {
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
- else {
- $indentation->set_recoverable_spaces($move_right);
- }
- }
- }
- }
- return $do_not_pad;
-}
+ my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
-# flush is called to output any tokens in the pipeline, so that
-# an alternate source of lines can be written in the correct order
+ # Add one token to the next batch.
+ # $Ktoken_vars = the index K in the global token array
+ # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
+ # unless they are temporarily being overridden
-sub flush {
- my $self = shift;
- destroy_one_line_block();
- $self->output_line_to_go();
- Perl::Tidy::VerticalAligner::flush();
- return;
-}
+ # NOTE: This routine needs to be coded efficiently because it is called
+ # once per token. I have gotten it down from the second slowest to the
+ # eighth slowest, but that still seems rather slow for what it does.
-sub reset_block_text_accumulator {
+ # This closure variable has already been defined, for efficiency:
+ # my $radjusted_levels = $self->[_radjusted_levels_];
- # save text after 'if' and 'elsif' to append after 'else'
- if ($accumulating_text_for_block) {
+ my $type = $rtoken_vars->[_TYPE_];
+
+ # Check for emergency flush...
+ # The K indexes in the batch must always be a continuous sequence of
+ # the global token array. The batch process programming assumes this.
+ # If storing this token would cause this relation to fail we must dump
+ # the current batch before storing the new token. It is extremely rare
+ # for this to happen. One known example is the following two-line
+ # snippet when run with parameters
+ # --noadd-newlines --space-terminal-semicolon:
+ # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
+ # $yy=1;
+ if ( $max_index_to_go >= 0 ) {
+ my $Klast = $K_to_go[$max_index_to_go];
+ if ( $Ktoken_vars != $Klast + 1 ) {
+ $self->flush_batch_of_CODE();
+ }
- if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
- push @{$rleading_block_if_elsif_text}, $leading_block_text;
+ # Do not output consecutive blank tokens ... this should not
+ # happen, but it is worth checking. Later code can then make the
+ # simplifying assumption that blank tokens are not consecutive.
+ elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
+ return;
+ }
}
- }
- $accumulating_text_for_block = "";
- $leading_block_text = "";
- $leading_block_text_level = 0;
- $leading_block_text_length_exceeded = 0;
- $leading_block_text_line_number = 0;
- $leading_block_text_line_length = 0;
- return;
-}
-sub set_block_text_accumulator {
- my $i = shift;
- $accumulating_text_for_block = $tokens_to_go[$i];
- if ( $accumulating_text_for_block !~ /^els/ ) {
- $rleading_block_if_elsif_text = [];
- }
- $leading_block_text = "";
- $leading_block_text_level = $levels_to_go[$i];
- $leading_block_text_line_number = get_output_line_number();
- $leading_block_text_length_exceeded = 0;
-
- # this will contain the column number of the last character
- # of the closing side comment
- $leading_block_text_line_length =
- length($csc_last_label) +
- length($accumulating_text_for_block) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $leading_block_text_level * $rOpts_indent_columns + 3;
- return;
-}
+ # Do not start a batch with a blank token.
+ # Fixes cases b149 b888 b984 b985 b986 b987
+ else {
+ if ( $type eq 'b' ) { return }
+ }
-sub accumulate_block_text {
- my $i = shift;
+ ++$max_index_to_go;
+ $batch_CODE_type = $CODE_type;
+ $K_to_go[$max_index_to_go] = $Ktoken_vars;
+ $types_to_go[$max_index_to_go] = $type;
- # accumulate leading text for -csc, ignoring any side comments
- if ( $accumulating_text_for_block
- && !$leading_block_text_length_exceeded
- && $types_to_go[$i] ne '#' )
- {
+ $old_breakpoint_to_go[$max_index_to_go] = 0;
+ $forced_breakpoint_to_go[$max_index_to_go] = 0;
+ $mate_index_to_go[$max_index_to_go] = -1;
- my $added_length = $token_lengths_to_go[$i];
- $added_length += 1 if $i == 0;
- my $new_line_length = $leading_block_text_line_length + $added_length;
+ my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
+ my $ci_level = $ci_levels_to_go[$max_index_to_go] =
+ $rtoken_vars->[_CI_LEVEL_];
- # we can add this text if we don't exceed some limits..
- if (
+ # Clip levels to zero if there are level errors in the file.
+ # We had to wait until now for reasons explained in sub 'write_line'.
+ my $level = $rtoken_vars->[_LEVEL_];
+ if ( $level < 0 ) { $level = 0 }
+ $levels_to_go[$max_index_to_go] = $level;
- # we must not have already exceeded the text length limit
- length($leading_block_text) <
- $rOpts_closing_side_comment_maximum_text
+ $nesting_depth_to_go[$max_index_to_go] = $rtoken_vars->[_SLEVEL_];
+ $block_type_to_go[$max_index_to_go] = $rtoken_vars->[_BLOCK_TYPE_];
+ $type_sequence_to_go[$max_index_to_go] =
+ $rtoken_vars->[_TYPE_SEQUENCE_];
- # and either:
- # the new total line length must be below the line length limit
- # or the new length must be below the text length limit
- # (ie, we may allow one token to exceed the text length limit)
- && (
- $new_line_length <
- maximum_line_length_for_level($leading_block_text_level)
+ $nobreak_to_go[$max_index_to_go] =
+ $side_comment_follows ? 2 : $no_internal_newlines;
- || length($leading_block_text) + $added_length <
- $rOpts_closing_side_comment_maximum_text
- )
+ my $length = $rtoken_vars->[_TOKEN_LENGTH_];
- # UNLESS: we are adding a closing paren before the brace we seek.
- # This is an attempt to avoid situations where the ... to be
- # added are longer than the omitted right paren, as in:
+ # Safety check that length is defined. Should not be needed now.
+ # Former patch for indent-only, in which the entire set of tokens is
+ # turned into type 'q'. Lengths may have not been defined because sub
+ # 'respace_tokens' is bypassed. We do not need lengths in this case,
+ # but we will use the character count to have a defined value. In the
+ # future, it would be nicer to have 'respace_tokens' convert the lines
+ # to quotes and get correct lengths.
+ if ( !defined($length) ) { $length = length($token) }
- # foreach my $item (@a_rather_long_variable_name_here) {
- # &whatever;
- # } ## end foreach my $item (@a_rather_long_variable_name_here...
+ $token_lengths_to_go[$max_index_to_go] = $length;
- || (
- $tokens_to_go[$i] eq ')'
- && (
- (
- $i + 1 <= $max_index_to_go
- && $block_type_to_go[ $i + 1 ] eq
- $accumulating_text_for_block
- )
- || ( $i + 2 <= $max_index_to_go
- && $block_type_to_go[ $i + 2 ] eq
- $accumulating_text_for_block )
- )
- )
- )
- {
+ # We keep a running sum of token lengths from the start of this batch:
+ # summed_lengths_to_go[$i] = total length to just before token $i
+ # summed_lengths_to_go[$i+1] = total length to just after token $i
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] =
+ $summed_lengths_to_go[$max_index_to_go] + $length;
- # add an extra space at each newline
- if ( $i == 0 ) { $leading_block_text .= ' ' }
+ my $in_continued_quote =
+ ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
+ if ( $max_index_to_go == 0 ) {
+ $starting_in_quote = $in_continued_quote;
+ }
- # add the token text
- $leading_block_text .= $tokens_to_go[$i];
- $leading_block_text_line_length = $new_line_length;
+ # Define the indentation that this token will have in two cases:
+ # Without CI = reduced_spaces_to_go
+ # With CI = leading_spaces_to_go
+ if ($in_continued_quote) {
+ $leading_spaces_to_go[$max_index_to_go] = 0;
+ $reduced_spaces_to_go[$max_index_to_go] = 0;
+ }
+ else {
+ $reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
+ $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
+ $leading_spaces_to_go[$max_index_to_go] =
+ $reduced_spaces + $rOpts_continuation_indentation * $ci_level;
}
- # show that text was truncated if necessary
- elsif ( $types_to_go[$i] ne 'b' ) {
- $leading_block_text_length_exceeded = 1;
- $leading_block_text .= '...';
+ # Correct these values if -lp is used
+ if ($rOpts_line_up_parentheses) {
+ $self->set_leading_whitespace( $Ktoken_vars, $K_last_nonblank_code,
+ $K_last_last_nonblank_code, $level, $ci_level,
+ $in_continued_quote );
}
+
+ DEBUG_STORE && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
+ };
+ return;
}
- return;
-}
-{
- my %is_if_elsif_else_unless_while_until_for_foreach;
+ sub flush_batch_of_CODE {
- BEGIN {
+ # Finish any batch packaging and call the process routine.
+ # This must be the only call to grind_batch_of_CODE()
+ my ($self) = @_;
- # These block types may have text between the keyword and opening
- # curly. Note: 'else' does not, but must be included to allow trailing
- # if/elsif text to be appended.
- # patch for SWITCH/CASE: added 'case' and 'when'
- my @q =
- qw(if elsif else unless while until for foreach case when catch);
- @is_if_elsif_else_unless_while_until_for_foreach{@q} =
- (1) x scalar(@q);
- }
+ return unless ( $max_index_to_go >= 0 );
- sub accumulate_csc_text {
+ # Create an array to hold variables for this batch
+ my $this_batch = [];
+ $this_batch->[_starting_in_quote_] = $starting_in_quote;
+ $this_batch->[_ending_in_quote_] = $ending_in_quote;
+ $this_batch->[_max_index_to_go_] = $max_index_to_go;
+ $this_batch->[_rK_to_go_] = \@K_to_go;
+ $this_batch->[_batch_CODE_type_] = $batch_CODE_type;
- # called once per output buffer when -csc is used. Accumulates
- # the text placed after certain closing block braces.
- # Defines and returns the following for this buffer:
+ # The flag $is_static_block_comment applies to the line which just
+ # arrived. So it only applies if we are outputting that line.
+ $this_batch->[_is_static_block_comment_] =
+ defined($K_first)
+ && $max_index_to_go == 0
+ && $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
- my $block_leading_text = ""; # the leading text of the last '}'
- my $rblock_leading_if_elsif_text;
- my $i_block_leading_text =
- -1; # index of token owning block_leading_text
- my $block_line_count = 100; # how many lines the block spans
- my $terminal_type = 'b'; # type of last nonblank token
- my $i_terminal = 0; # index of last nonblank token
- my $terminal_block_type = "";
+ $self->[_this_batch_] = $this_batch;
- # update most recent statement label
- $csc_last_label = "" unless ($csc_last_label);
- if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
- my $block_label = $csc_last_label;
+ $last_line_had_side_comment =
+ $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
- # Loop over all tokens of this batch
- for my $i ( 0 .. $max_index_to_go ) {
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
+ $self->grind_batch_of_CODE();
- # remember last nonblank token type
- if ( $type ne '#' && $type ne 'b' ) {
- $terminal_type = $type;
- $terminal_block_type = $block_type;
- $i_terminal = $i;
- }
+ # Done .. this batch is history
+ $self->[_this_batch_] = [];
- my $type_sequence = $type_sequence_to_go[$i];
- if ( $block_type && $type_sequence ) {
+ initialize_batch_variables();
+ initialize_forced_breakpoint_vars();
+ initialize_gnu_batch_vars()
+ if $rOpts_line_up_parentheses;
- if ( $token eq '}' ) {
+ return;
+ }
- # restore any leading text saved when we entered this block
- if ( defined( $block_leading_text{$type_sequence} ) ) {
- ( $block_leading_text, $rblock_leading_if_elsif_text )
- = @{ $block_leading_text{$type_sequence} };
- $i_block_leading_text = $i;
- delete $block_leading_text{$type_sequence};
- $rleading_block_if_elsif_text =
- $rblock_leading_if_elsif_text;
- }
+ sub end_batch {
- if ( defined( $csc_block_label{$type_sequence} ) ) {
- $block_label = $csc_block_label{$type_sequence};
- delete $csc_block_label{$type_sequence};
- }
+ # end the current batch, EXCEPT for a few special cases
+ my ($self) = @_;
- # if we run into a '}' then we probably started accumulating
- # at something like a trailing 'if' clause..no harm done.
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] <= $leading_block_text_level )
- {
- my $lev = $levels_to_go[$i];
- reset_block_text_accumulator();
- }
+ # Exception 1: Do not end line in a weld
+ return
+ if ( $total_weld_count
+ && $self->is_welded_right_at_i($max_index_to_go) );
- if ( defined( $block_opening_line_number{$type_sequence} ) )
- {
- my $output_line_number = get_output_line_number();
- $block_line_count =
- $output_line_number -
- $block_opening_line_number{$type_sequence} + 1;
- delete $block_opening_line_number{$type_sequence};
- }
- else {
+ # Exception 2: just set a tentative breakpoint if we might be in a
+ # one-line block
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ return;
+ }
- # Error: block opening line undefined for this line..
- # This shouldn't be possible, but it is not a
- # significant problem.
- }
- }
+ $self->flush_batch_of_CODE();
+ return;
+ }
- elsif ( $token eq '{' ) {
-
- my $line_number = get_output_line_number();
- $block_opening_line_number{$type_sequence} = $line_number;
+ sub flush_vertical_aligner {
+ my ($self) = @_;
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->flush();
+ return;
+ }
- # set a label for this block, except for
- # a bare block which already has the label
- # A label can only be used on the next {
- if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
- $csc_block_label{$type_sequence} = $csc_last_label;
- $csc_last_label = "";
+ # flush is called to output any tokens in the pipeline, so that
+ # an alternate source of lines can be written in the correct order
+ sub flush {
+ my ( $self, $CODE_type ) = @_;
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
+ # end the current batch with 1 exception
- if ( $accumulating_text_for_block eq $block_type ) {
+ destroy_one_line_block();
- # save any leading text before we enter this block
- $block_leading_text{$type_sequence} = [
- $leading_block_text,
- $rleading_block_if_elsif_text
- ];
- $block_opening_line_number{$type_sequence} =
- $leading_block_text_line_number;
- reset_block_text_accumulator();
- }
- else {
+ # Exception: if we are flushing within the code stream only to insert
+ # blank line(s), then we can keep the batch intact at a weld. This
+ # improves formatting of -ce. See test 'ce1.ce'
+ if ( $CODE_type && $CODE_type eq 'BL' ) { $self->end_batch() }
- # shouldn't happen, but not a serious error.
- # We were accumulating -csc text for block type
- # $accumulating_text_for_block and unexpectedly
- # encountered a '{' for block type $block_type.
- }
- }
- }
- }
+ # otherwise, we have to shut things down completely.
+ else { $self->flush_batch_of_CODE() }
- if ( $type eq 'k'
- && $csc_new_statement_ok
- && $is_if_elsif_else_unless_while_until_for_foreach{$token}
- && $token =~ /$closing_side_comment_list_pattern/o )
- {
- set_block_text_accumulator($i);
- }
- else {
+ $self->flush_vertical_aligner();
+ return;
+ }
- # note: ignoring type 'q' because of tricks being played
- # with 'q' for hanging side comments
- if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
- $csc_new_statement_ok =
- ( $block_type || $type eq 'J' || $type eq ';' );
- }
- if ( $type eq ';'
- && $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
- reset_block_text_accumulator();
- }
- else {
- accumulate_block_text($i);
- }
- }
- }
+ sub process_line_of_CODE {
- # Treat an 'else' block specially by adding preceding 'if' and
- # 'elsif' text. Otherwise, the 'end else' is not helpful,
- # especially for cuddled-else formatting.
- if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
- $block_leading_text =
- make_else_csc_text( $i_terminal, $terminal_block_type,
- $block_leading_text, $rblock_leading_if_elsif_text );
- }
+ my ( $self, $my_line_of_tokens ) = @_;
- # if this line ends in a label then remember it for the next pass
- $csc_last_label = "";
- if ( $terminal_type eq 'J' ) {
- $csc_last_label = $tokens_to_go[$i_terminal];
- }
+ # This routine is called once per INPUT line to process all of the
+ # tokens on that line.
- return ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label );
- }
-}
+ # It outputs full-line comments and blank lines immediately.
-sub make_else_csc_text {
+ # The tokens are copied one-by-one from the global token array $rLL to
+ # a set of '_to_go' arrays which collect batches of tokens for a
+ # further processing via calls to 'sub store_token_to_go', until a well
+ # defined 'structural' break point* or 'forced' breakpoint* is reached.
+ # Then, the batch of collected '_to_go' tokens is passed along to 'sub
+ # grind_batch_of_CODE' for further processing.
- # create additional -csc text for an 'else' and optionally 'elsif',
- # depending on the value of switch
- # $rOpts_closing_side_comment_else_flag:
- #
- # = 0 add 'if' text to trailing else
- # = 1 same as 0 plus:
- # add 'if' to 'elsif's if can fit in line length
- # add last 'elsif' to trailing else if can fit in one line
- # = 2 same as 1 but do not check if exceed line length
- #
- # $rif_elsif_text = a reference to a list of all previous closing
- # side comments created for this if block
- #
- my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
- my $csc_text = $block_leading_text;
+ # * 'structural' break points are basically line breaks corresponding
+ # to code blocks. An example is a chain of if-elsif-else statements,
+ # which should typically be broken at the opening and closing braces.
- if ( $block_type eq 'elsif'
- && $rOpts_closing_side_comment_else_flag == 0 )
- {
- return $csc_text;
- }
+ # * 'forced' break points are breaks required by side comments or by
+ # special user controls.
- my $count = @{$rif_elsif_text};
- return $csc_text unless ($count);
+ # So this routine is just making an initial set of required line
+ # breaks, basically regardless of the maximum requested line length.
+ # The subsequent stage of formating make additional line breaks
+ # appropriate for lists and logical structures, and to keep line
+ # lengths below the requested maximum line length.
- my $if_text = '[ if' . $rif_elsif_text->[0];
+ $line_of_tokens = $my_line_of_tokens;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $input_line_number = $line_of_tokens->{_line_number};
+ my $input_line = $line_of_tokens->{_line_text};
- # always show the leading 'if' text on 'else'
- if ( $block_type eq 'else' ) {
- $csc_text .= $if_text;
- }
+ # initialize closure variables
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $K_first, $K_last ) = @{$rK_range};
- # see if that's all
- if ( $rOpts_closing_side_comment_else_flag == 0 ) {
- return $csc_text;
- }
+ # remember original starting index in case it changes
+ my $K_first_true = $K_first;
- my $last_elsif_text = "";
- if ( $count > 1 ) {
- $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
- if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
- }
+ $rLL = $self->[_rLL_];
+ $radjusted_levels = $self->[_radjusted_levels_];
- # tentatively append one more item
- my $saved_text = $csc_text;
- if ( $block_type eq 'else' ) {
- $csc_text .= $last_elsif_text;
- }
- else {
- $csc_text .= ' ' . $if_text;
- }
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $sink_object = $self->[_sink_object_];
+ my $fh_tee = $self->[_fh_tee_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- # all done if no length checks requested
- if ( $rOpts_closing_side_comment_else_flag == 2 ) {
- return $csc_text;
- }
+ if ( !defined($K_first) ) {
- # undo it if line length exceeded
- my $length =
- length($csc_text) +
- length($block_type) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
- if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
- $csc_text = $saved_text;
- }
- return $csc_text;
-}
+ # Empty line: This can happen if tokens are deleted, for example
+ # with the -mangle parameter
+ return;
+ }
-{ # sub balance_csc_text
+ $no_internal_newlines = 0;
+ if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
+ $no_internal_newlines = 2;
+ }
- my %matching_char;
+ $side_comment_follows = 0;
+ my $is_comment =
+ ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
+ my $is_static_block_comment_without_leading_space =
+ $CODE_type eq 'SBCX';
+ $is_static_block_comment =
+ $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
+ my $is_hanging_side_comment = $CODE_type eq 'HSC';
+ my $is_VERSION_statement = $CODE_type eq 'VER';
- BEGIN {
- %matching_char = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '}' => '{',
- ')' => '(',
- ']' => '[',
- );
- }
+ if ($is_VERSION_statement) {
+ $self->[_saw_VERSION_in_this_file_] = 1;
+ $no_internal_newlines = 2;
+ }
- sub balance_csc_text {
+ # Add interline blank if any
+ my $last_old_nonblank_type = "b";
+ my $first_new_nonblank_token = "";
+ if ( $max_index_to_go >= 0 ) {
+ $last_old_nonblank_type = $types_to_go[$max_index_to_go];
+ $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
+ if ( !$is_comment
+ && $types_to_go[$max_index_to_go] ne 'b'
+ && $K_first > 0
+ && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
+ {
+ $K_first -= 1;
+ }
+ }
- # Append characters to balance a closing side comment so that editors
- # such as vim can correctly jump through code.
- # Simple Example:
- # input = ## end foreach my $foo ( sort { $b ...
- # output = ## end foreach my $foo ( sort { $b ...})
+ my $rtok_first = $rLL->[$K_first];
- # NOTE: This routine does not currently filter out structures within
- # quoted text because the bounce algorithms in text editors do not
- # necessarily do this either (a version of vim was checked and
- # did not do this).
+ my $in_quote = $line_of_tokens->{_ending_in_quote};
+ $ending_in_quote = $in_quote;
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
- # Some complex examples which will cause trouble for some editors:
- # while ( $mask_string =~ /\{[^{]*?\}/g ) {
- # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
- # if ( $1 eq '{' ) {
- # test file test1/braces.pl has many such examples.
+ ######################################
+ # Handle a block (full-line) comment..
+ ######################################
+ if ($is_comment) {
- my ($csc) = @_;
+ if ( $rOpts->{'delete-block-comments'} ) {
+ $self->flush();
+ return;
+ }
- # loop to examine characters one-by-one, RIGHT to LEFT and
- # build a balancing ending, LEFT to RIGHT.
- for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
+ destroy_one_line_block();
+ $self->end_batch();
- my $char = substr( $csc, $pos, 1 );
+ # output a blank line before block comments
+ if (
+ # unless we follow a blank or comment line
+ $self->[_last_line_leading_type_] ne '#'
+ && $self->[_last_line_leading_type_] ne 'b'
- # ignore everything except structural characters
- next unless ( $matching_char{$char} );
+ # only if allowed
+ && $rOpts->{'blanks-before-comments'}
- # pop most recently appended character
- my $top = chop($csc);
+ # if this is NOT an empty comment, unless it follows a side
+ # comment and could become a hanging side comment.
+ && (
+ $rtok_first->[_TOKEN_] ne '#'
+ || ( $last_line_had_side_comment
+ && $rLL->[$K_first]->[_LEVEL_] > 0 )
+ )
- # push it back plus the mate to the newest character
- # unless they balance each other.
- $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
- }
+ # not after a short line ending in an opening token
+ # because we already have space above this comment.
+ # Note that the first comment in this if block, after
+ # the 'if (', does not get a blank line because of this.
+ && !$self->[_last_output_short_opening_token_]
- # return the balanced string
- return $csc;
- }
-}
+ # never before static block comments
+ && !$is_static_block_comment
+ )
+ {
+ $self->flush(); # switching to new output stream
+ $file_writer_object->write_blank_code_line();
+ $self->[_last_line_leading_type_] = 'b';
+ }
-sub add_closing_side_comment {
+ if (
+ $rOpts->{'indent-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
+ || $input_line =~ /^\s+/ )
+ && !$is_static_block_comment_without_leading_space
+ )
+ {
+ my $Ktoken_vars = $K_first;
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ $self->end_batch();
+ }
+ else {
- my $self = shift;
+ # switching to new output stream
+ $self->flush();
- # add closing side comments after closing block braces if -csc used
- my ( $closing_side_comment, $cscw_block_comment );
+ # Note that last arg in call here is 'undef' for comments
+ $file_writer_object->write_code_line(
+ $rtok_first->[_TOKEN_] . "\n", undef );
+ $self->[_last_line_leading_type_] = '#';
+ }
+ return;
+ }
- #---------------------------------------------------------------
- # Step 1: loop through all tokens of this line to accumulate
- # the text needed to create the closing side comments. Also see
- # how the line ends.
- #---------------------------------------------------------------
+ # compare input/output indentation except for continuation lines
+ # (because they have an unknown amount of initial blank space)
+ # and lines which are quotes (because they may have been outdented)
+ $self->compare_indentation_levels( $K_first, $guessed_indentation_level,
+ $input_line_number )
+ unless ( $is_hanging_side_comment
+ || $rtok_first->[_CI_LEVEL_] > 0
+ || $guessed_indentation_level == 0
+ && $rtok_first->[_TYPE_] eq 'Q' );
- my ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label )
- = accumulate_csc_text();
+ ##########################
+ # Handle indentation-only
+ ##########################
- #---------------------------------------------------------------
- # Step 2: make the closing side comment if this ends a block
- #---------------------------------------------------------------
- my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
+ # NOTE: In previous versions we sent all qw lines out immediately here.
+ # No longer doing this: also write a line which is entirely a 'qw' list
+ # to allow stacking of opening and closing tokens. Note that interior
+ # qw lines will still go out at the end of this routine.
+ if ( $CODE_type eq 'IO' ) {
+ $self->flush();
+ my $line = $input_line;
- # if this line might end in a block closure..
- if (
- $terminal_type eq '}'
+ # Fix for rt #125506 Unexpected string formating
+ # in which leading space of a terminal quote was removed
+ $line =~ s/\s+$//;
+ $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
- # ..and either
- && (
+ my $Ktoken_vars = $K_first;
- # the block is long enough
- ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
+ # We work with a copy of the token variables and change the
+ # first token to be the entire line as a quote variable
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
- # or there is an existing comment to check
- || ( $have_side_comment
- && $rOpts->{'closing-side-comment-warnings'} )
- )
+ # Patch: length is not really important here
+ $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
- # .. and if this is one of the types of interest
- && $block_type_to_go[$i_terminal] =~
- /$closing_side_comment_list_pattern/o
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ $self->end_batch();
+ return;
+ }
- # .. but not an anonymous sub
- # These are not normally of interest, and their closing braces are
- # often followed by commas or semicolons anyway. This also avoids
- # possible erratic output due to line numbering inconsistencies
- # in the cases where their closing braces terminate a line.
- && $block_type_to_go[$i_terminal] ne 'sub'
-
- # ..and the corresponding opening brace must is not in this batch
- # (because we do not need to tag one-line blocks, although this
- # should also be caught with a positive -csci value)
- && $self->mate_index_to_go($i_terminal) < 0
+ ############################
+ # Handle all other lines ...
+ ############################
- # ..and either
- && (
+ # If we just saw the end of an elsif block, write nag message
+ # if we do not see another elseif or an else.
+ if ($looking_for_else) {
- # this is the last token (line doesn't have a side comment)
- !$have_side_comment
+ unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
+ write_logfile_entry("(No else block)\n");
+ }
+ $looking_for_else = 0;
+ }
- # or the old side comment is a closing side comment
- || $tokens_to_go[$max_index_to_go] =~
- /$closing_side_comment_prefix_pattern/o
- )
- )
- {
+ # This is a good place to kill incomplete one-line blocks
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $max_index_to_go >= 0 )
+ && ( $last_old_nonblank_type eq ';' )
+ && ( $first_new_nonblank_token ne '}' )
+ )
- # then make the closing side comment text
- if ($block_label) { $block_label .= " " }
- my $token =
-"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $max_index_to_go >= 0
+ && $last_old_nonblank_type eq ',' )
+ )
+ {
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
+ destroy_one_line_block();
+ $self->end_batch();
+ }
- # append any extra descriptive text collected above
- if ( $i_block_leading_text == $i_terminal ) {
- $token .= $block_leading_text;
+ # Keep any requested breaks before this line. Note that we have to
+ # use the original K_first because it may have been reduced above
+ # to add a blank. The value of the flag is as follows:
+ # 1 => hard break, flush the batch
+ # 2 => soft break, set breakpoint and continue building the batch
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
+ destroy_one_line_block();
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ }
+ else {
+ $self->end_batch();
+ }
}
- $token = balance_csc_text($token)
- if $rOpts->{'closing-side-comments-balanced'};
+ # loop to process the tokens one-by-one
- $token =~ s/\s*$//; # trim any trailing whitespace
+ # We do not want a leading blank if the previous batch just got output
+ if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
+ $K_first++;
+ }
- # handle case of existing closing side comment
- if ($have_side_comment) {
+ foreach my $Ktoken_vars ( $K_first .. $K_last ) {
- # warn if requested and tokens differ significantly
- if ( $rOpts->{'closing-side-comment-warnings'} ) {
- my $old_csc = $tokens_to_go[$max_index_to_go];
- my $new_csc = $token;
- $new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g; # trim all whitespace
- $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- # Patch to handle multiple closing side comments at
- # else and elsif's. These have become too complicated
- # to check, so if we see an indication of
- # '[ if' or '[ # elsif', then assume they were made
- # by perltidy.
- if ( $block_type_to_go[$i_terminal] eq 'else' ) {
- if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
- }
- elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
- if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
- }
+ # If we are continuing after seeing a right curly brace, flush
+ # buffer unless we see what we are looking for, as in
+ # } else ...
+ if ( $rbrace_follower && $type ne 'b' ) {
- # if old comment is contained in new comment,
- # only compare the common part.
- if ( length($new_csc) > length($old_csc) ) {
- $new_csc = substr( $new_csc, 0, length($old_csc) );
+ unless ( $rbrace_follower->{$token} ) {
+ $self->end_batch();
}
+ $rbrace_follower = undef;
+ }
- # if the new comment is shorter and has been limited,
- # only compare the common part.
- if ( length($new_csc) < length($old_csc)
- && $new_trailing_dots )
+ # Get next nonblank on this line
+ my $next_nonblank_token = '';
+ my $next_nonblank_token_type = 'b';
+ if ( $Ktoken_vars < $K_last ) {
+ my $Knnb = $Ktoken_vars + 1;
+ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b'
+ && $Knnb < $K_last )
{
- $old_csc = substr( $old_csc, 0, length($new_csc) );
+ $Knnb++;
}
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
- # any remaining difference?
- if ( $new_csc ne $old_csc ) {
+ # Do not allow breaks which would promote a side comment to a
+ # block comment. In order to allow a break before an opening
+ # or closing BLOCK, followed by a side comment, those sections
+ # of code will handle this flag separately.
+ $side_comment_follows = ( $next_nonblank_token_type eq '#' );
+ my $is_opening_BLOCK =
+ ( $type eq '{'
+ && $token eq '{'
+ && $block_type
+ && !$rshort_nested->{$type_sequence}
+ && $block_type ne 't' );
+ my $is_closing_BLOCK =
+ ( $type eq '}'
+ && $token eq '}'
+ && $block_type
+ && !$rshort_nested->{$type_sequence}
+ && $block_type ne 't' );
- # just leave the old comment if we are below the threshold
- # for creating side comments
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- }
+ if ( $side_comment_follows
+ && !$is_opening_BLOCK
+ && !$is_closing_BLOCK )
+ {
+ $no_internal_newlines = 1;
+ }
- # otherwise we'll make a note of it
- else {
+ # We're only going to handle breaking for code BLOCKS at this
+ # (top) level. Other indentation breaks will be handled by
+ # sub scan_list, which is better suited to dealing with them.
+ if ($is_opening_BLOCK) {
- warning(
-"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
- );
+ # Tentatively output this token. This is required before
+ # calling starting_one_line_block. We may have to unstore
+ # it, though, if we have to break before it.
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- # save the old side comment in a new trailing block
- # comment
- my $timestamp = "";
- if ( $rOpts->{'timestamp'} ) {
- my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
- $year += 1900;
- $month += 1;
- $timestamp = "$year-$month-$day";
- }
- $cscw_block_comment =
-"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
-## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
- }
- }
- else {
+ # Look ahead to see if we might form a one-line block..
+ my $too_long =
+ $self->starting_one_line_block( $Ktoken_vars,
+ $K_last_nonblank_code, $K_last );
+ $self->clear_breakpoint_undo_stack();
- # No differences.. we can safely delete old comment if we
- # are below the threshold
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq '#' );
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq 'b' );
- }
+ # to simplify the logic below, set a flag to indicate if
+ # this opening brace is far from the keyword which introduces it
+ my $keyword_on_same_line = 1;
+ if (
+ $max_index_to_go >= 0
+ && $last_nonblank_type eq ')'
+ && ( ( $rtoken_vars->[_SLEVEL_] < $nesting_depth_to_go[0] )
+ || $too_long )
+ )
+ {
+ $keyword_on_same_line = 0;
}
- }
- # switch to the new csc (unless we deleted it!)
- if ($token) {
- $tokens_to_go[$max_index_to_go] = $token;
- $self->sync_token_K($max_index_to_go);
- }
- }
+ # decide if user requested break before '{'
+ my $want_break =
- # handle case of NO existing closing side comment
- else {
+ # This test was added to minimize changes in -bl formatting
+ # caused by other changes to fix cases b562 .. b983
+ # Previously, the -bl flag was being applied almost randomly
+ # to sort/map/grep/eval blocks, depending on if they were
+ # flagged as possible one-line blocks. usually time they
+ # were not given -bl formatting. The following flag was
+ # added to minimize changes to existing formatting.
+ $is_braces_left_exclude_block{$block_type}
+ ? 0
- # To avoid inserting a new token in the token arrays, we
- # will just return the new side comment so that it can be
- # inserted just before it is needed in the call to the
- # vertical aligner.
- $closing_side_comment = $token;
- }
- }
- return ( $closing_side_comment, $cscw_block_comment );
-}
+ # use -bl flag if not a sub block of any type
+ : $block_type !~ /$ANYSUB_PATTERN/
+ ? $rOpts->{'opening-brace-on-new-line'}
-sub previous_nonblank_token {
- my ($i) = @_;
- my $name = "";
- my $im = $i - 1;
- return "" if ( $im < 0 );
- if ( $types_to_go[$im] eq 'b' ) { $im--; }
- return "" if ( $im < 0 );
- $name = $tokens_to_go[$im];
+ # use -sbl flag for a named sub block
+ : $block_type !~ /$ASUB_PATTERN/
+ ? $rOpts->{'opening-sub-brace-on-new-line'}
- # prepend any sub name to an isolated -> to avoid unwanted alignments
- # [test case is test8/penco.pl]
- if ( $name eq '->' ) {
- $im--;
- if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
- $name = $tokens_to_go[$im] . $name;
- }
- }
- return $name;
-}
+ # use -asbl flag for an anonymous sub block
+ : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
-sub send_lines_to_vertical_aligner {
+ # Break if requested with -bli flag
+ $want_break ||= $ris_bli_container->{$type_sequence};
- my ( $self, $rbatch_hash ) = @_;
+ # Do not break if this token is welded to the left
+ if ( $total_weld_count
+ && defined( $rK_weld_left->{$Ktoken_vars} ) )
+ {
+ $want_break = 0;
+ }
- # This routine receives a batch of code for which the final line breaks
- # have been defined. Here we prepare the lines for passing to the vertical
- # aligner. We do the following tasks:
- # - mark certain vertical alignment tokens tokens, such as '=', in each line.
- # - make minor indentation adjustments
- # - insert extra blank spaces to help display certain logical constructions
+ # Break before an opening '{' ...
+ if (
- my $rlines_K = $rbatch_hash->{rlines_K};
- if ( !@{$rlines_K} ) {
- Fault("Unexpected call with no lines");
- return;
- }
- my $n_last_line = @{$rlines_K} - 1;
- my $do_not_pad = $rbatch_hash->{do_not_pad};
+ # if requested
+ $want_break
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
+ # and we were unable to start looking for a block,
+ && $index_start_one_line_block == UNDEFINED_INDEX
- my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
- my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ # or if it will not be on same line as its keyword, so that
+ # it will be outdented (eval.t, overload.t), and the user
+ # has not insisted on keeping it on the right
+ || ( !$keyword_on_same_line
+ && !$rOpts->{'opening-brace-always-on-right'} )
+ )
+ {
- # Construct indexes to the global_to_go arrays so that called routines can
- # still access those arrays. This might eventually be removed
- # when all called routines have been converted to access token values
- # in the rLL array instead.
- my $ibeg0 = $rbatch_hash->{ibeg0};
- my $Kbeg0 = $Kbeg_next;
- my ( $ri_first, $ri_last );
- foreach my $rline ( @{$rlines_K} ) {
- my ( $Kbeg, $Kend ) = @{$rline};
- my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
- my $iend = $ibeg0 + $Kend - $Kbeg0;
- push @{$ri_first}, $ibeg;
- push @{$ri_last}, $iend;
- }
- #####################################################################
-
- my $valign_batch_number = $self->increment_valign_batch_count();
-
- my ( $cscw_block_comment, $closing_side_comment );
- if ( $rOpts->{'closing-side-comments'} ) {
- ( $closing_side_comment, $cscw_block_comment ) =
- $self->add_closing_side_comment();
- }
+ # but only if allowed
+ unless ($no_internal_newlines) {
- my $rindentation_list = [0]; # ref to indentations for each line
+ # since we already stored this token, we must unstore it
+ $self->unstore_token_to_go();
- # define the array @{$ralignment_type_to_go} for the output tokens
- # which will be non-blank for each special token (such as =>)
- # for which alignment is required.
- my $ralignment_type_to_go =
- $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+ # then output the line
+ $self->end_batch();
- # flush before a long if statement to avoid unwanted alignment
- if ( $n_last_line > 0
- && $type_beg_next eq 'k'
- && $token_beg_next =~ /^(if|unless)$/ )
- {
- Perl::Tidy::VerticalAligner::flush();
- }
+ # and now store this token at the start of a new line
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
+ }
- $self->undo_ci( $ri_first, $ri_last );
+ # Now update for side comment
+ if ($side_comment_follows) { $no_internal_newlines = 1 }
- $self->set_logical_padding( $ri_first, $ri_last );
+ # now output this line
+ unless ($no_internal_newlines) {
+ $self->end_batch();
+ }
+ }
- # loop to prepare each line for shipment
- my $in_comma_list;
- my ( $Kbeg, $type_beg, $token_beg );
- my ( $Kend, $type_end );
- for my $n ( 0 .. $n_last_line ) {
+ elsif ($is_closing_BLOCK) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- my $rline = $rlines_K->[$n];
- my $forced_breakpoint = $rline->[2];
+ # If there is a pending one-line block ..
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- # we may need to look at variables on three consecutive lines ...
+ # we have to terminate it if..
+ if (
- # Some vars on line [n-1], if any:
- my $Kbeg_last = $Kbeg;
- my $type_beg_last = $type_beg;
- my $token_beg_last = $token_beg;
- my $Kend_last = $Kend;
- my $type_end_last = $type_end;
+ # it is too long (final length may be different from
+ # initial estimate). note: must allow 1 space for this
+ # token
+ $self->excess_line_length( $index_start_one_line_block,
+ $max_index_to_go ) >= 0
- # Some vars on line [n]:
- $Kbeg = $Kbeg_next;
- $type_beg = $type_beg_next;
- $token_beg = $token_beg_next;
- $Kend = $Kend_next;
- $type_end = $type_end_next;
+ # or if it has too many semicolons
+ || ( $semicolons_before_block_self_destruct == 0
+ && $last_nonblank_type ne ';' )
+ )
+ {
+ destroy_one_line_block();
+ }
+ }
- # We use two slightly different definitions of level jump at the end
- # of line:
- # $ljump is the level jump needed by 'sub set_adjusted_indentation'
- # $level_jump is the level jump needed by the vertical aligner.
- my $ljump = 0; # level jump at end of line
+ # put a break before this closing curly brace if appropriate
+ unless ( $no_internal_newlines
+ || $index_start_one_line_block != UNDEFINED_INDEX )
+ {
- # Get some vars on line [n+1], if any:
- if ( $n < $n_last_line ) {
- ( $Kbeg_next, $Kend_next ) =
- @{ $rlines_K->[ $n + 1 ] };
- $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
- $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
- }
+ # write out everything before this closing curly brace
+ $self->end_batch();
+ }
- # level jump at end of line for the vertical aligner:
- my $level_jump =
- $Kend >= $Klimit
- ? 0
- : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
+ # Now update for side comment
+ if ($side_comment_follows) { $no_internal_newlines = 1 }
- $self->delete_needless_alignments( $ibeg, $iend,
- $ralignment_type_to_go );
+ # store the closing curly brace
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- my ( $rtokens, $rfields, $rpatterns ) =
- $self->make_alignment_patterns( $ibeg, $iend,
- $ralignment_type_to_go );
+ # ok, we just stored a closing curly brace. Often, but
+ # not always, we want to end the line immediately.
+ # So now we have to check for special cases.
- my ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line )
- = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
- $ri_first, $ri_last, $rindentation_list, $ljump );
+ # if this '}' successfully ends a one-line block..
+ my $is_one_line_block = 0;
+ my $keep_going = 0;
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
+ # Remember the type of token just before the
+ # opening brace. It would be more general to use
+ # a stack, but this will work for one-line blocks.
+ $is_one_line_block =
+ $types_to_go[$index_start_one_line_block];
- # which are long quotes, if allowed
- ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+ # we have to actually make it by removing tentative
+ # breaks that were set within it
+ $self->undo_forced_breakpoint_stack(0);
+ $self->set_nobreaks( $index_start_one_line_block,
+ $max_index_to_go - 1 );
- # which are long block comments, if allowed
- || (
- $type_beg eq '#'
- && $rOpts->{'outdent-long-comments'}
+ # then re-initialize for the next one-line block
+ destroy_one_line_block();
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
+ # then decide if we want to break after the '}' ..
+ # We will keep going to allow certain brace followers as in:
+ # do { $ifclosed = 1; last } unless $losing;
+ #
+ # But make a line break if the curly ends a
+ # significant block:
+ if (
+ (
+ $is_block_without_semicolon{$block_type}
- my $rvertical_tightness_flags =
- $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $is_one_line_block =~ /^[UG]$/
+ && $Ktoken_vars == $K_last
+ )
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+ # if needless semicolon follows we handle it later
+ && $next_nonblank_token ne ';'
+ )
+ {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
+ }
- # Set a flag at the final ':' of a ternary chain to request
- # vertical alignment of the final term. Here is a
- # slightly complex example:
- #
- # $self->{_text} = (
- # !$section ? ''
- # : $type eq 'item' ? "the $section entry"
- # : "the section on $section"
- # )
- # . (
- # $page
- # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
- # : ' elsewhere in this document'
- # );
- #
- my $is_terminal_ternary = 0;
+ # set string indicating what we need to look for brace follower
+ # tokens
+ if ( $block_type eq 'do' ) {
+ $rbrace_follower = \%is_do_follower;
+ if ( $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
+ )
+ {
+ $rbrace_follower = { ')' => 1 };
+ }
+ }
+ elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
+ $rbrace_follower = \%is_if_brace_follower;
+ }
+ elsif ( $block_type eq 'else' ) {
+ $rbrace_follower = \%is_else_brace_follower;
+ }
- if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
- my $last_leading_type = $n > 0 ? $type_beg_last : ':';
- if ( $terminal_type ne ';'
- && $n_last_line > $n
- && $level_end == $lev )
- {
- $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
- $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
- }
- if (
- $last_leading_type eq ':'
- && ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $terminal_type ne ':' && $level_end < $lev ) )
- )
- {
+ # added eval for borris.t
+ elsif ($is_sort_map_grep_eval{$block_type}
+ || $is_one_line_block eq 'G' )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
- # the terminal term must not contain any ternary terms, as in
- # my $ECHO = (
- # $Is_MSWin32 ? ".\\echo$$"
- # : $Is_MacOS ? ":echo$$"
- # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
- # );
- $is_terminal_ternary = 1;
+ # anonymous sub
+ elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
- my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
- while ( defined($KP) && $KP <= $Kend ) {
- my $type_KP = $rLL->[$KP]->[_TYPE_];
- if ( $type_KP eq '?' || $type_KP eq ':' ) {
- $is_terminal_ternary = 0;
- last;
+ if ($is_one_line_block) {
+ $rbrace_follower = \%is_anon_sub_1_brace_follower;
+ }
+ else {
+ $rbrace_follower = \%is_anon_sub_brace_follower;
}
- $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
}
- }
- }
-
- # add any new closing side comment to the last line
- if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
- $rfields->[-1] .= " $closing_side_comment";
- }
- # send this new line down the pipe
- my $rvalign_hash = {};
- $rvalign_hash->{level} = $lev;
- $rvalign_hash->{level_end} = $level_end;
- $rvalign_hash->{indentation} = $indentation;
- $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list;
- $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
- $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
- $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
- $rvalign_hash->{do_not_pad} = $do_not_pad;
- $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
- $rvalign_hash->{level_jump} = $level_jump;
+ # None of the above: specify what can follow a closing
+ # brace of a block which is not an
+ # if/elsif/else/do/sort/map/grep/eval
+ # Testfiles:
+ # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
+ else {
+ $rbrace_follower = \%is_other_brace_follower;
+ }
- $rvalign_hash->{valign_batch_number} = $valign_batch_number;
+ # See if an elsif block is followed by another elsif or else;
+ # complain if not.
+ if ( $block_type eq 'elsif' ) {
- Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
- $rtokens, $rpatterns );
+ if ( $next_nonblank_token_type eq 'b' ) { # end of line?
+ $looking_for_else = 1; # ok, check on next line
+ }
+ else {
- $in_comma_list = $type_end eq ',' && $forced_breakpoint;
+ unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
+ write_logfile_entry("No else block :(\n");
+ }
+ }
+ }
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+ # keep going after certain block types (map,sort,grep,eval)
+ # added eval for borris.t
+ if ($keep_going) {
- $do_not_pad = 0;
+ # keep going
+ }
- # Set flag indicating if this line ends in an opening
- # token and is very short, so that a blank line is not
- # needed if the subsequent line is a comment.
- # Examples of what we are looking for:
- # {
- # && (
- # BEGIN {
- # default {
- # sub {
- $last_output_short_opening_token
+ # if no more tokens, postpone decision until re-entring
+ elsif ( ( $next_nonblank_token_type eq 'b' )
+ && $rOpts_add_newlines )
+ {
+ unless ($rbrace_follower) {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
+ }
- # line ends in opening token
- = $type_end =~ /^[\{\(\[L]$/
+ elsif ($rbrace_follower) {
- # and either
- && (
- # line has either single opening token
- $Kend == $Kbeg
+ unless ( $rbrace_follower->{$next_nonblank_token} ) {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
+ $rbrace_follower = undef;
+ }
- # or is a single token followed by opening token.
- # Note that sub identifiers have blanks like 'sub doit'
- || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ )
- )
+ else {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
- # and limit total to 10 character widths
- && token_sequence_length( $ibeg, $iend ) <= 10;
+ } # end treatment of closing block token
- } # end of loop to output each line
+ # handle semicolon
+ elsif ( $type eq ';' ) {
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+ my $break_before_semicolon = ( $Ktoken_vars == $K_first )
+ && $rOpts_break_at_old_semicolon_breakpoints;
- # output any new -cscw block comment
- if ($cscw_block_comment) {
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
- }
- return;
-}
+ # kill one-line blocks with too many semicolons
+ $semicolons_before_block_self_destruct--;
+ if (
+ $break_before_semicolon
+ || ( $semicolons_before_block_self_destruct < 0 )
+ || ( $semicolons_before_block_self_destruct == 0
+ && $next_nonblank_token_type !~ /^[b\}]$/ )
+ )
+ {
+ destroy_one_line_block();
+ $self->end_batch() if ($break_before_semicolon);
+ }
-{ # begin make_alignment_patterns
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- my %block_type_map;
- my %keyword_map;
- my %operator_map;
+ $self->end_batch()
+ unless (
+ $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons
+ && $Ktoken_vars < $K_last )
+ || ( $next_nonblank_token eq '}' )
+ );
- BEGIN {
+ }
- # map related block names into a common name to
- # allow alignment
- %block_type_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'if',
- 'default' => 'if',
- 'case' => 'if',
- 'sort' => 'map',
- 'grep' => 'map',
- );
+ # handle here_doc target string
+ elsif ( $type eq 'h' ) {
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. [elsif.pl]
- %keyword_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'given',
- 'default' => 'given',
- 'case' => 'switch',
+ # no newlines after seeing here-target
+ $no_internal_newlines = 2;
+ ## destroy_one_line_block(); # deleted to fix case b529
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
- # treat an 'undef' similar to numbers and quotes
- 'undef' => 'Q',
- );
+ # handle all other token types
+ else {
- # map certain operators to the same class for pattern matching
- %operator_map = (
- '!~' => '=~',
- '+=' => '+=',
- '-=' => '+=',
- '*=' => '+=',
- '/=' => '+=',
- );
- }
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
- sub delete_needless_alignments {
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+ # remember two previous nonblank OUTPUT tokens
+ if ( $type ne '#' && $type ne 'b' ) {
+ $last_last_nonblank_token = $last_nonblank_token;
+ $last_last_nonblank_type = $last_nonblank_type;
+ $last_nonblank_token = $token;
+ $last_nonblank_type = $type;
+ $last_nonblank_block_type = $block_type;
+ $K_last_last_nonblank_code = $K_last_nonblank_code;
+ $K_last_nonblank_code = $Ktoken_vars;
+ }
- # Remove unwanted alignments. This routine is a place to remove
- # alignments which might cause problems at later stages. There are
- # currently two types of fixes:
+ } # end of loop over all tokens in this 'line_of_tokens'
- # 1. Remove excess parens
- # 2. Remove alignments within 'elsif' conditions
+ my $type = $rLL->[$K_last]->[_TYPE_];
+ my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
- # Patch #1: Excess alignment of parens can prevent other good
- # alignments. For example, note the parens in the first two rows of
- # the following snippet. They would normally get marked for alignment
- # and aligned as follows:
+ # we have to flush ..
+ if (
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
+ # if there is a side comment...
+ $type eq '#'
- # This causes unnecessary paren alignment and prevents the third equals
- # from aligning. If we remove the unwanted alignments we get:
+ # if this line ends in a quote
+ # NOTE: This is critically important for insuring that quoted lines
+ # do not get processed by things like -sot and -sct
+ || $in_quote
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
+ # if this is a VERSION statement
+ || $is_VERSION_statement
- # A rule for doing this which works well is to remove alignment of
- # parens whose containers do not contain other aligning tokens, with
- # the exception that we always keep alignment of the first opening
- # paren on a line (for things like 'if' and 'elsif' statements).
+ # to keep a label at the end of a line
+ || $type eq 'J'
- # Setup needed constants
- my $i_good_paren = -1;
- my $imin_match = $iend + 1;
- my $i_elsif_close = $ibeg - 1;
- my $i_elsif_open = $iend + 1;
- if ( $iend > $ibeg ) {
- if ( $types_to_go[$ibeg] eq 'k' ) {
+ # if we have a hard break request
+ || $break_flag && $break_flag != 2
- # Paren patch: mark a location of a paren we should keep, such
- # as one following something like a leading 'if', 'elsif',..
- $i_good_paren = $ibeg + 1;
- if ( $types_to_go[$i_good_paren] eq 'b' ) {
- $i_good_paren++;
- }
+ # if we are instructed to keep all old line breaks
+ || !$rOpts->{'delete-old-newlines'}
- # 'elsif' patch: remember the range of the parens of an elsif,
- # and do not make alignments within them because this can cause
- # loss of padding and overall brace alignment in the vertical
- # aligner.
- if ( $tokens_to_go[$ibeg] eq 'elsif'
- && $i_good_paren < $iend
- && $tokens_to_go[$i_good_paren] eq '(' )
- {
- $i_elsif_open = $i_good_paren;
- $i_elsif_close = $self->mate_index_to_go($i_good_paren);
- }
- }
+ # if this is a line of the form 'use overload'. A break here
+ # in the input file is a good break because it will allow
+ # the operators which follow to be formatted well. Without
+ # this break the formatting with -ci=4 -xci is poor, for example.
+
+ # use overload
+ # '+' => sub {
+ # print length $_[2], "\n";
+ # my ( $x, $y ) = _order(@_);
+ # Number::Roman->new( int $x + $y );
+ # },
+ # '-' => sub {
+ # my ( $x, $y ) = _order(@_);
+ # Number::Roman->new( int $x - $y );
+ # };
+ || ( $max_index_to_go == 2
+ && $types_to_go[0] eq 'k'
+ && $tokens_to_go[0] eq 'use'
+ && $tokens_to_go[$max_index_to_go] eq 'overload' )
+ )
+ {
+ destroy_one_line_block();
+ $self->end_batch();
}
- # Loop to make the fixes on this line
- my @imatch_list;
- for my $i ( $ibeg .. $iend ) {
-
- if ( $ralignment_type_to_go->[$i] ne '' ) {
-
- # Patch #2: undo alignment within elsif parens
- if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
- $ralignment_type_to_go->[$i] = '';
- next;
- }
- push @imatch_list, $i;
-
- }
- if ( $tokens_to_go[$i] eq ')' ) {
+ # Check for a soft break request
+ if ( $max_index_to_go >= 0 && $break_flag && $break_flag == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ }
- # Patch #1: undo the corresponding opening paren if:
- # - it is at the top of the stack
- # - and not the first overall opening paren
- # - does not follow a leading keyword on this line
- my $imate = $self->mate_index_to_go($i);
- if ( @imatch_list
- && $imatch_list[-1] eq $imate
- && ( $ibeg > 1 || @imatch_list > 1 )
- && $imate > $i_good_paren )
- {
- $ralignment_type_to_go->[$imate] = '';
- pop @imatch_list;
- }
+ # mark old line breakpoints in current output stream
+ if (
+ $max_index_to_go >= 0
+ && ( !$rOpts_ignore_old_breakpoints
+ || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
+ )
+ {
+ my $jobp = $max_index_to_go;
+ if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
+ {
+ $jobp--;
}
+ $old_breakpoint_to_go[$jobp] = 1;
}
return;
+ } ## end sub process_line_of_CODE
+} ## end closure process_line_of_CODE
+
+sub tight_paren_follows {
+
+ my ( $self, $K_to_go_0, $K_ic ) = @_;
+
+ # Input parameters:
+ # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
+ # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
+ # Return parameter:
+ # false if we want a break after the closing do brace
+ # true if we do not want a break after the closing do brace
+
+ # We are at the closing brace of a 'do' block. See if this brace is
+ # followed by a closing paren, and if so, set a flag which indicates
+ # that we do not want a line break between the '}' and ')'.
+
+ # xxxxx ( ...... do { ... } ) {
+ # ^-------looking at this brace, K_ic
+
+ # Subscript notation:
+ # _i = inner container (braces in this case)
+ # _o = outer container (parens in this case)
+ # _io = inner opening = '{'
+ # _ic = inner closing = '}'
+ # _oo = outer opening = '('
+ # _oc = outer closing = ')'
+
+ # |--K_oo |--K_oc = outer container
+ # xxxxx ( ...... do { ...... } ) {
+ # |--K_io |--K_ic = inner container
+
+ # In general, the safe thing to do is return a 'false' value
+ # if the statement appears to be complex. This will have
+ # the downstream side-effect of opening up outer containers
+ # to help make complex code readable. But for simpler
+ # do blocks it can be preferable to keep the code compact
+ # by returning a 'true' value.
+
+ return unless defined($K_ic);
+ my $rLL = $self->[_rLL_];
+
+ # we should only be called at a closing block
+ my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
+ return unless ($seqno_i); # shouldn't happen;
+
+ # This only applies if the next nonblank is a ')'
+ my $K_oc = $self->K_next_nonblank($K_ic);
+ return unless defined($K_oc);
+ my $token_next = $rLL->[$K_oc]->[_TOKEN_];
+ return unless ( $token_next eq ')' );
+
+ my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
+ my $K_io = $self->[_K_opening_container_]->{$seqno_i};
+ my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
+ return unless ( defined($K_io) && defined($K_oo) );
+
+ # RULE 1: Do not break before a closing signature paren
+ # (regardless of complexity). This is a fix for issue git#22.
+ # Looking for something like:
+ # sub xxx ( ... do { ... } ) {
+ # ^----- next block_type
+ my $K_test = $self->K_next_nonblank($K_oc);
+ if ( defined($K_test) ) {
+ my $block_type = $rLL->[$K_test]->[_BLOCK_TYPE_];
+ if ( $block_type
+ && $rLL->[$K_test]->[_TYPE_] eq '{'
+ && $block_type =~ /$ANYSUB_PATTERN/ )
+ {
+ return 1;
+ }
}
- sub make_alignment_patterns {
+ # RULE 2: Break if the contents within braces appears to be 'complex'. We
+ # base this decision on the number of tokens between braces.
- # Here we do some important preliminary work for the
- # vertical aligner. We create three arrays for one
- # output line. These arrays contain strings that can
- # be tested by the vertical aligner to see if
- # consecutive lines can be aligned vertically.
- #
- # The three arrays are indexed on the vertical
- # alignment fields and are:
- # @tokens - a list of any vertical alignment tokens for this line.
- # These are tokens, such as '=' '&&' '#' etc which
- # we want to might align vertically. These are
- # decorated with various information such as
- # nesting depth to prevent unwanted vertical
- # alignment matches.
- # @fields - the actual text of the line between the vertical alignment
- # tokens.
- # @patterns - a modified list of token types, one for each alignment
- # field. These should normally each match before alignment is
- # allowed, even when the alignment tokens match.
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
- my @tokens = ();
- my @fields = ();
- my @patterns = ();
- my $i_start = $ibeg;
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^
- my $depth = 0;
- my @container_name = ("");
- my @multiple_comma_arrows = (undef);
+ # Although very simple, it has the advantages of (1) being insensitive to
+ # changes in lengths of identifier names, (2) easy to understand, implement
+ # and test. A test case for this is 't/snippets/long_line.in'.
- my $j = 0; # field index
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # if ( do { $2 !~ /&/ } ) { ... }
- $patterns[0] = "";
- my %token_count;
- for my $i ( $ibeg .. $iend ) {
+ # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- # Keep track of containers balanced on this line only.
- # These are used below to prevent unwanted cross-line alignments.
- # Unbalanced containers already avoid aligning across
- # container boundaries.
- my $tok = $tokens_to_go[$i];
- if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
+ # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
+ # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
- # if container is balanced on this line...
- my $i_mate = $self->mate_index_to_go($i);
- if ( $i_mate > $i && $i_mate <= $iend ) {
- $depth++;
- my $seqno = $type_sequence_to_go[$i];
- my $count = comma_arrow_count($seqno);
- $multiple_comma_arrows[$depth] = $count && $count > 1;
-
- # Append the previous token name to make the container name
- # more unique. This name will also be given to any commas
- # within this container, and it helps avoid undesirable
- # alignments of different types of containers.
-
- # Containers beginning with { and [ are given those names
- # for uniqueness. That way commas in different containers
- # will not match. Here is an example of what this prevents:
- # a => [ 1, 2, 3 ],
- # b => { b1 => 4, b2 => 5 },
- # Here is another example of what we avoid by labeling the
- # commas properly:
- # is_d( [ $a, $a ], [ $b, $c ] );
- # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
- # is_d( [ \$a, \$a ], [ \$b, \$c ] );
-
- my $name = $tok;
- if ( $tok eq '(' ) {
- $name = previous_nonblank_token($i);
- $name =~ s/^->//;
- }
- $container_name[$depth] = "+" . $name;
-
- # Make the container name even more unique if necessary.
- # If we are not vertically aligning this opening paren,
- # append a character count to avoid bad alignment because
- # it usually looks bad to align commas within containers
- # for which the opening parens do not align. Here
- # is an example very BAD alignment of commas (because
- # the atan2 functions are not all aligned):
- # $XY =
- # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
- # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
- # $X * atan2( $X, 1 ) -
- # $Y * atan2( $Y, 1 );
- #
- # On the other hand, it is usually okay to align commas if
- # opening parens align, such as:
- # glVertex3d( $cx + $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy + $s * $ys, $z );
- # glVertex3d( $cx - $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy - $s * $ys, $z );
- #
- # To distinguish between these situations, we will
- # append the length of the line from the previous matching
- # token, or beginning of line, to the function name. This
- # will allow the vertical aligner to reject undesirable
- # matches.
-
- # if we are not aligning on this paren...
- if ( $ralignment_type_to_go->[$i] eq '' ) {
-
- # Sum length from previous alignment
- my $len = token_sequence_length( $i_start, $i - 1 );
- if ( $i_start == $ibeg ) {
-
- # For first token, use distance from start of line
- # but subtract off the indentation due to level.
- # Otherwise, results could vary with indentation.
- $len += leading_spaces_to_go($ibeg) -
- $levels_to_go[$i_start] * $rOpts_indent_columns;
- if ( $len < 0 ) { $len = 0 }
- }
+ return if ( $K_ic - $K_io > 16 );
- # tack this length onto the container name to try
- # to make a unique token name
- $container_name[$depth] .= "-" . $len;
- }
- }
- }
- elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) {
- $depth-- if $depth > 0;
- }
+ # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
+ # As with the previous rule, we decide based on the token count
- # if we find a new synchronization token, we are done with
- # a field
- if ( $i > $i_start && $ralignment_type_to_go->[$i] ne '' ) {
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^^^
- my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # $K_io - $K_oo = 4 [Pass Rule 3]
+ # if ( do { $2 !~ /&/ } ) { ... }
- # map similar items
- my $tok_map = $operator_map{$tok};
- $tok = $tok_map if ($tok_map);
+ # Example: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 9 [Pass rule 3]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- # make separators in different nesting depths unique
- # by appending the nesting depth digit.
- if ( $raw_tok ne '#' ) {
- $tok .= "$nesting_depth_to_go[$i]";
- }
+ return if ( $K_io - $K_oo > 9 );
- # also decorate commas with any container name to avoid
- # unwanted cross-line alignments.
- if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- $tok .= $container_name[$depth];
- }
- }
+ # RULE 4: Break if we have already broken this batch of output tokens
+ return if ( $K_oo < $K_to_go_0 );
- # Patch to avoid aligning leading and trailing if, unless.
- # Mark trailing if, unless statements with container names.
- # This makes them different from leading if, unless which
- # are not so marked at present. If we ever need to name
- # them too, we could use ci to distinguish them.
- # Example problem to avoid:
- # return ( 2, "DBERROR" )
- # if ( $retval == 2 );
- # if ( scalar @_ ) {
- # my ( $a, $b, $c, $d, $e, $f ) = @_;
- # }
- if ( $raw_tok eq '(' ) {
- my $ci = $ci_levels_to_go[$ibeg];
- if ( $container_name[$depth] =~ /^\+(if|unless)/
- && $ci )
- {
- $tok .= $container_name[$depth];
- }
- }
+ # RULE 5: Break if input is not on one line
+ # For example, we will set the flag for the following expression
+ # written in one line:
- # Decorate block braces with block types to avoid
- # unwanted alignments such as the following:
- # foreach ( @{$routput_array} ) { $fh->print($_) }
- # eval { $fh->close() };
- if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
- my $block_type = $block_type_to_go[$i];
+ # This has: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 8 [Pass rule 3]
+ # $self->debug( 'Error: ' . do { local $/; <$err> } );
- # map certain related block types to allow
- # else blocks to align
- $block_type = $block_type_map{$block_type}
- if ( defined( $block_type_map{$block_type} ) );
+ # but we break after the brace if it is on multiple lines on input, since
+ # the user may prefer it on multiple lines:
- # remove sub names to allow one-line sub braces to align
- # regardless of name
- #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
- if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
+ # [Fail rule 5]
+ # $self->debug(
+ # 'Error: ' . do { local $/; <$err> }
+ # );
- # allow all control-type blocks to align
- if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+ if ( !$rOpts_ignore_old_breakpoints ) {
+ my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
+ return if ( $iline_oo != $iline_oc );
+ }
- $tok .= $block_type;
- }
+ # OK to keep the paren tight
+ return 1;
+}
- # Mark multiple copies of certain tokens with the copy number
- # This will allow the aligner to decide if they are matched.
- # For now, only do this for equals. For example, the two
- # equals on the next line will be labeled '=0' and '=0.2'.
- # Later, the '=0.2' will be ignored in alignment because it
- # has no match.
+sub starting_one_line_block {
- # $| = $debug = 1 if $opt_d;
- # $full_index = 1 if $opt_i;
+ # after seeing an opening curly brace, look for the closing brace and see
+ # if the entire block will fit on a line. This routine is not always right
+ # so a check is made later (at the closing brace) to make sure we really
+ # have a one-line block. We have to do this preliminary check, though,
+ # because otherwise we would always break at a semicolon within a one-line
+ # block if the block contains multiple statements.
- if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
- $token_count{$tok}++;
- if ( $token_count{$tok} > 1 ) {
- $tok .= '.' . $token_count{$tok};
- }
- }
+ my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
- # concatenate the text of the consecutive tokens to form
- # the field
- push( @fields,
- join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
- # store the alignment token for this field
- push( @tokens, $tok );
+ # kill any current block - we can only go 1 deep
+ destroy_one_line_block();
- # get ready for the next batch
- $i_start = $i;
- $j++;
- $patterns[$j] = "";
- }
+ # return value:
+ # 1=distance from start of block to opening brace exceeds line length
+ # 0=otherwise
- # continue accumulating tokens
- # handle non-keywords..
- if ( $types_to_go[$i] ne 'k' ) {
- my $type = $types_to_go[$i];
+ my $i_start = 0;
- # Mark most things before arrows as a quote to
- # get them to line up. Testfile: mixed.pl.
- if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
- my $next_type = $types_to_go[ $i + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ # This routine should not have been called if there are no tokens in the
+ # 'to_go' arrays of previously stored tokens. A previous call to
+ # 'store_token_to_go' should have stored an opening brace. An error here
+ # indicates that a programming change may have caused a flush operation to
+ # clean out the previously stored tokens.
+ if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
+ Fault("program bug: store_token_to_go called incorrectly\n");
+ }
- if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
- $type = 'Q';
+ # Return if block should be broken
+ my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
+ if ( $rbreak_container->{$type_sequence} ) {
+ return 0;
+ }
- # Patch to ignore leading minus before words,
- # by changing pattern 'mQ' into just 'Q',
- # so that we can align things like this:
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
- }
- }
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $is_bli = $ris_bli_container->{$type_sequence};
- # Convert a bareword within braces into a quote for matching. This will
- # allow alignment of expressions like this:
- # local ( $SIG{'INT'} ) = IGNORE;
- # local ( $SIG{ALRM} ) = 'POSTMAN';
- if ( $type eq 'w'
- && $i > $ibeg
- && $i < $iend
- && $types_to_go[ $i - 1 ] eq 'L'
- && $types_to_go[ $i + 1 ] eq 'R' )
- {
- $type = 'Q';
- }
+ my $block_type = $rLL->[$Kj]->[_BLOCK_TYPE_];
+ my $index_max_forced_break = get_index_max_forced_break();
- # patch to make numbers and quotes align
- if ( $type eq 'n' ) { $type = 'Q' }
+ my $previous_nonblank_token = '';
+ my $i_last_nonblank = -1;
+ if ( defined($K_last_nonblank) ) {
+ $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
+ if ( $i_last_nonblank >= 0 ) {
+ $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
+ }
+ }
- # patch to ignore any ! in patterns
- if ( $type eq '!' ) { $type = '' }
+ # find the starting keyword for this block (such as 'if', 'else', ...)
+ if ( $max_index_to_go == 0
+ || $block_type =~ /^[\{\}\;\:]$/
+ || $block_type =~ /^package/ )
+ {
+ $i_start = $max_index_to_go;
+ }
- $patterns[$j] .= $type;
- }
+ # the previous nonblank token should start these block types
+ elsif (
+ $i_last_nonblank >= 0
+ && ( $previous_nonblank_token eq $block_type
+ || $block_type =~ /$ANYSUB_PATTERN/
+ || $block_type =~ /\(\)/ )
+ )
+ {
+ $i_start = $i_last_nonblank;
- # for keywords we have to use the actual text
- else {
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intack, and cause the parenthesized
+ # expression to break open. That looks bad.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
+
+ # Find the opening paren
+ my $K_start = $K_to_go[$i_start];
+ return 0 unless defined($K_start);
+ my $seqno = $type_sequence_to_go[$i_start];
+ return 0 unless ($seqno);
+ my $K_opening = $K_opening_container->{$seqno};
+ return 0 unless defined($K_opening);
+ my $i_opening = $i_start + ( $K_opening - $K_start );
+
+ # give up if not on this line
+ return 0 unless ( $i_opening >= 0 );
+ $i_start = $i_opening; ##$index_max_forced_break + 1;
+
+ # go back one token before the opening paren
+ if ( $i_start > 0 ) { $i_start-- }
+ if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
+ }
+ }
- my $tok = $tokens_to_go[$i];
+ elsif ( $previous_nonblank_token eq ')' ) {
- # but map certain keywords to a common string to allow
- # alignment.
- $tok = $keyword_map{$tok}
- if ( defined( $keyword_map{$tok} ) );
- $patterns[$j] .= $tok;
- }
+ # For something like "if (xxx) {", the keyword "if" will be
+ # just after the most recent break. This will be 0 unless
+ # we have just killed a one-line block and are starting another.
+ # (doif.t)
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
}
- # done with this line .. join text of tokens to make the last field
- push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
- return ( \@tokens, \@fields, \@patterns );
- }
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ $stripped_block_type =~ s/\(\)$//;
-} # end make_alignment_patterns
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
+ return 0;
+ }
+ }
-{ # begin unmatched_indexes
+ # patch for SWITCH/CASE to retain one-line case/when blocks
+ elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
- # closure to keep track of unbalanced containers.
- # arrays shared by the routines in this block:
- my @unmatched_opening_indexes_in_this_batch;
- my @unmatched_closing_indexes_in_this_batch;
- my %comma_arrow_count;
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+ unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ return 0;
+ }
+ }
- sub is_unbalanced_batch {
- return @unmatched_opening_indexes_in_this_batch +
- @unmatched_closing_indexes_in_this_batch;
+ else {
+ return 1;
}
- sub comma_arrow_count {
- my $seqno = shift;
- return $comma_arrow_count{$seqno};
+ my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
+
+ my $maximum_line_length =
+ $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
+
+ # see if block starting location is too great to even start
+ if ( $pos > $maximum_line_length ) {
+ return 1;
}
- sub match_opening_and_closing_tokens {
+ # See if everything to the closing token will fit on one line
+ # This is part of an update to fix cases b562 .. b983
+ my $K_closing = $self->[_K_closing_container_]->{$type_sequence};
+ return 0 unless ( defined($K_closing) );
+ my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
- # Match up indexes of opening and closing braces, etc, in this batch.
- # This has to be done after all tokens are stored because unstoring
- # of tokens would otherwise cause trouble.
+ my $excess = $pos + 1 + $container_length - $maximum_line_length;
- @unmatched_opening_indexes_in_this_batch = ();
- @unmatched_closing_indexes_in_this_batch = ();
- %comma_arrow_count = ();
- my $comma_arrow_count_contained = 0;
+ # Add a small tolerance for welded tokens (case b901)
+ if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
+ $excess += 2;
+ }
- foreach my $i ( 0 .. $max_index_to_go ) {
- if ( $type_sequence_to_go[$i] ) {
- my $token = $tokens_to_go[$i];
- if ( $token =~ /^[\(\[\{\?]$/ ) {
- push @unmatched_opening_indexes_in_this_batch, $i;
- }
- elsif ( $token =~ /^[\)\]\}\:]$/ ) {
+ if ( $excess > 0 ) {
- my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
- if ( defined($i_mate) && $i_mate >= 0 ) {
- if ( $type_sequence_to_go[$i_mate] ==
- $type_sequence_to_go[$i] )
- {
- $mate_index_to_go[$i] = $i_mate;
- $mate_index_to_go[$i_mate] = $i;
- my $seqno = $type_sequence_to_go[$i];
- if ( $comma_arrow_count{$seqno} ) {
- $comma_arrow_count_contained +=
- $comma_arrow_count{$seqno};
- }
- }
- else {
- push @unmatched_opening_indexes_in_this_batch,
- $i_mate;
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- else {
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
- }
- }
- elsif ( $tokens_to_go[$i] eq '=>' ) {
- if (@unmatched_opening_indexes_in_this_batch) {
- my $j = $unmatched_opening_indexes_in_this_batch[-1];
- my $seqno = $type_sequence_to_go[$j];
- $comma_arrow_count{$seqno}++;
- }
- }
- }
- return $comma_arrow_count_contained;
+ # line is too long... there is no chance of forming a one line block
+ # if the excess is more than 1 char
+ return 0 if ( $excess > 1 );
+
+ # ... and give up if it is not a one-line block on input.
+ # note: for a one-line block on input, it may be possible to keep
+ # it as a one-line block (by removing a needless semicolon ).
+ my $K_start = $K_to_go[$i_start];
+ my $ldiff =
+ $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
+ return 0 if ($ldiff);
}
- sub save_opening_indentation {
+ foreach my $Ki ( $Kj + 1 .. $K_last ) {
- # This should be called after each batch of tokens is output. It
- # saves indentations of lines of all unmatched opening tokens.
- # These will be used by sub get_opening_indentation.
+ # old whitespace could be arbitrarily large, so don't use it
+ if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
+ else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
- my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
+ # ignore some small blocks
+ my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
+ my $nobreak = $rshort_nested->{$type_sequence};
- # we no longer need indentations of any saved indentations which
- # are unmatched closing tokens in this batch, because we will
- # never encounter them again. So we can delete them to keep
- # the hash size down.
- foreach (@unmatched_closing_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- delete $saved_opening_indentation{$seqno};
+ # Return false result if we exceed the maximum line length,
+ if ( $pos > $maximum_line_length ) {
+ return 0;
}
- # we need to save indentations of any unmatched opening tokens
- # in this batch because we may need them in a subsequent batch.
- foreach (@unmatched_opening_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- $saved_opening_indentation{$seqno} = [
- lookup_opening_indentation(
- $_, $ri_first, $ri_last, $rindentation_list
- )
- ];
+ # keep going for non-containers
+ elsif ( !$type_sequence ) {
+
}
- return;
- }
-} # end unmatched_indexes
-sub get_opening_indentation {
+ # return if we encounter another opening brace before finding the
+ # closing brace.
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
+ && $rLL->[$Ki]->[_TYPE_] eq '{'
+ && $rLL->[$Ki]->[_BLOCK_TYPE_]
+ && !$nobreak )
+ {
+ return 0;
+ }
- # get the indentation of the line which output the opening token
- # corresponding to a given closing token in the current output batch.
- #
- # given:
- # $i_closing - index in this line of a closing token ')' '}' or ']'
- #
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line.
- #
- # return:
- # -the indentation of the line which contained the opening token
- # which matches the token at index $i_opening
- # -and its offset (number of columns) from the start of the line
- #
- my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
+ # if we find our closing brace..
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
+ && $rLL->[$Ki]->[_TYPE_] eq '}'
+ && $rLL->[$Ki]->[_BLOCK_TYPE_]
+ && !$nobreak )
+ {
- # first, see if the opening token is in the current batch
- my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset, $is_leading, $exists );
- $exists = 1;
- if ( $i_opening >= 0 ) {
+ # be sure any trailing comment also fits on the line
+ my $Ki_nonblank = $Ki;
+ if ( $Ki_nonblank < $K_last ) {
+ $Ki_nonblank++;
+ if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
+ && $Ki_nonblank < $K_last )
+ {
+ $Ki_nonblank++;
+ }
+ }
- # it is..look up the indentation
- ( $indent, $offset, $is_leading ) =
- lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
- $rindentation_list );
- }
+ # Patch for one-line sort/map/grep/eval blocks with side comments:
+ # We will ignore the side comment length for sort/map/grep/eval
+ # because this can lead to statements which change every time
+ # perltidy is run. Here is an example from Denis Moskowitz which
+ # oscillates between these two states without this patch:
- # if not, it should have been stored in the hash by a previous batch
- else {
- my $seqno = $type_sequence_to_go[$i_closing];
- if ($seqno) {
- if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset, $is_leading ) =
- @{ $saved_opening_indentation{$seqno} };
- }
+## --------
+## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+##
+## grep {
+## $_->foo ne 'bar'
+## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+## --------
- # some kind of serious error
- # (example is badfile.t)
- else {
- $indent = 0;
- $offset = 0;
- $is_leading = 0;
- $exists = 0;
+ # When the first line is input it gets broken apart by the main
+ # line break logic in sub process_line_of_CODE.
+ # When the second line is input it gets recombined by
+ # process_line_of_CODE and passed to the output routines. The
+ # output routines (set_continuation_breaks) do not break it apart
+ # because the bond strengths are set to the highest possible value
+ # for grep/map/eval/sort blocks, so the first version gets output.
+ # It would be possible to fix this by changing bond strengths,
+ # but they are high to prevent errors in older versions of perl.
+
+ if ( $Ki < $K_last
+ && $rLL->[$Ki_nonblank]->[_TYPE_] eq '#'
+ && !$is_sort_map_grep{$block_type} )
+ {
+
+ $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
+
+ if ( $Ki_nonblank > $Ki + 1 ) {
+
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
+ $pos += 1;
+ }
+ else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
+ }
+
+ if ( $pos >= $maximum_line_length ) {
+ return 0;
+ }
}
+
+ # ok, it's a one-line block
+ create_one_line_block( $i_start, 20 );
+ return 0;
}
- # if no sequence number it must be an unbalanced container
+ # just keep going for other characters
else {
- $indent = 0;
- $offset = 0;
- $is_leading = 0;
- $exists = 0;
}
}
- return ( $indent, $offset, $is_leading, $exists );
-}
-
-sub lookup_opening_indentation {
- # get the indentation of the line in the current output batch
- # which output a selected opening token
- #
- # given:
- # $i_opening - index of an opening token in the current output batch
- # whose line indentation we need
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line. (NOTE: the first slot in
- # this list is the last returned line number, and this is
- # followed by the list of indentations).
- #
- # return
- # -the indentation of the line which contained token $i_opening
- # -and its offset (number of columns) from the start of the line
+ # We haven't hit the closing brace, but there is still space. So the
+ # question here is, should we keep going to look at more lines in hopes of
+ # forming a new one-line block, or should we stop right now. The problem
+ # with continuing is that we will not be able to honor breaks before the
+ # opening brace if we continue.
- my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
+ # Typically we will want to keep trying to make one-line blocks for things
+ # like sort/map/grep/eval. But it is not always a good idea to make as
+ # many one-line blocks as possible, so other types are not done. The user
+ # can always use -mangle.
- if ( !@{$ri_last} ) {
- warning("Error in opening_indentation: no lines");
- return;
+ # If we want to keep going, we will create a new one-line block.
+ # The blocks which we can keep going are in a hash, but we never want
+ # to continue if we are at a '-bli' block.
+ if ( $want_one_line_block{$block_type} && !$is_bli ) {
+ create_one_line_block( $i_start, 1 );
}
+ return 0;
+}
- my $nline = $rindentation_list->[0]; # line number of previous lookup
-
- # reset line location if necessary
- $nline = 0 if ( $i_opening < $ri_start->[$nline] );
+sub unstore_token_to_go {
- # find the correct line
- unless ( $i_opening > $ri_last->[-1] ) {
- while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
+ # remove most recent token from output stream
+ my $self = shift;
+ if ( $max_index_to_go > 0 ) {
+ $max_index_to_go--;
}
-
- # error - token index is out of bounds - shouldn't happen
else {
- warning(
-"non-fatal program bug in lookup_opening_indentation - index out of range\n"
- );
- report_definite_bug();
- $nline = $#{$ri_last};
+ $max_index_to_go = UNDEFINED_INDEX;
}
-
- $rindentation_list->[0] =
- $nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- my $is_leading = ( $ibeg == $i_opening );
- return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
+ return;
}
-{
- my %is_if_elsif_else_unless_while_until_for_foreach;
-
- BEGIN {
-
- # These block types may have text between the keyword and opening
- # curly. Note: 'else' does not, but must be included to allow trailing
- # if/elsif text to be appended.
- # patch for SWITCH/CASE: added 'case' and 'when'
- my @q = qw(if elsif else unless while until for foreach case when);
- @is_if_elsif_else_unless_while_until_for_foreach{@q} =
- (1) x scalar(@q);
- }
+sub compare_indentation_levels {
- sub set_adjusted_indentation {
+ # Check to see if output line tabbing agrees with input line
+ # this can be very useful for debugging a script which has an extra
+ # or missing brace.
- # This routine has the final say regarding the actual indentation of
- # a line. It starts with the basic indentation which has been
- # defined for the leading token, and then takes into account any
- # options that the user has set regarding special indenting and
- # outdenting.
+ my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
+ return unless ( defined($K_first) );
- my (
- $self, $ibeg, $iend,
- $rfields, $rpatterns, $ri_first,
- $ri_last, $rindentation_list, $level_jump
- ) = @_;
+ my $rLL = $self->[_rLL_];
- my $rLL = $self->{rLL};
+ my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $structural_indentation_level = $radjusted_levels->[$K_first];
+ }
- # we need to know the last token of this line
- my ( $terminal_type, $i_terminal ) =
- $self->terminal_type_i( $ibeg, $iend );
+ my $is_closing_block = $rLL->[$K_first]->[_TYPE_] eq '}'
+ && $rLL->[$K_first]->[_BLOCK_TYPE_];
- my $is_outdented_line = 0;
+ if ( $guessed_indentation_level ne $structural_indentation_level ) {
+ $self->[_last_tabbing_disagreement_] = $line_number;
- my $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
+ if ($is_closing_block) {
- # NOTE: A future improvement would be to make it semicolon terminated
- # even if it does not have a semicolon but is followed by a closing
- # block brace. This would undo ci even for something like the
- # following, in which the final paren does not have a semicolon because
- # it is a possible weld location:
+ if ( !$self->[_in_brace_tabbing_disagreement_] ) {
+ $self->[_in_brace_tabbing_disagreement_] = $line_number;
+ }
+ if ( !$self->[_first_brace_tabbing_disagreement_] ) {
+ $self->[_first_brace_tabbing_disagreement_] = $line_number;
+ }
- # if ($BOLD_MATH) {
- # (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # )
- # }
- #
+ }
- # MOJO: Set a flag if this lines begins with ')->'
- my $leading_paren_arrow = (
- $types_to_go[$ibeg] eq '}'
- && $tokens_to_go[$ibeg] eq ')'
- && (
- ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
- || ( $ibeg < $i_terminal - 1
- && $types_to_go[ $ibeg + 1 ] eq 'b'
- && $types_to_go[ $ibeg + 2 ] eq '->' )
- )
- );
+ if ( !$self->[_in_tabbing_disagreement_] ) {
+ $self->[_tabbing_disagreement_count_]++;
- ##########################################################
- # Section 1: set a flag and a default indentation
- #
- # Most lines are indented according to the initial token.
- # But it is common to outdent to the level just after the
- # terminal token in certain cases...
- # adjust_indentation flag:
- # 0 - do not adjust
- # 1 - outdent
- # 2 - vertically align with opening token
- # 3 - indent
- ##########################################################
- my $adjust_indentation = 0;
- my $default_adjust_indentation = $adjust_indentation;
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
+ );
+ }
+ $self->[_in_tabbing_disagreement_] = $line_number;
+ $self->[_first_tabbing_disagreement_] = $line_number
+ unless ( $self->[_first_tabbing_disagreement_] );
+ }
+ }
+ else {
- my (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- );
+ $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
- my $type_beg = $types_to_go[$ibeg];
- my $token_beg = $tokens_to_go[$ibeg];
- my $K_beg = $K_to_go[$ibeg];
- my $ibeg_weld_fix = $ibeg;
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
+ if ($in_tabbing_disagreement) {
- # QW PATCH 2 (Testing)
- # At an isolated closing token of a qw quote which is welded to
- # a following closing token, we will locally change its type to
- # be the same as its token. This will allow formatting to be the
- # same as for an ordinary closing token.
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"End indentation disagreement from input line $in_tabbing_disagreement\n"
+ );
- # For -lp formatting se use $ibeg_weld_fix to get around the problem
- # that with -lp type formatting the opening and closing tokens to not
- # have sequence numbers.
- if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) {
- my $K_next_nonblank = $self->K_next_code($K_beg);
- if ( defined($K_next_nonblank) ) {
- my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
- my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
- my $welded = weld_len_left( $type_sequence, $token );
- if ($welded) {
- $ibeg_weld_fix = $ibeg + ( $K_next_nonblank - $K_beg );
- $type_beg = ')'; ##$token_beg;
+ if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
+ {
+ write_logfile_entry(
+ "No further tabbing disagreements will be noted\n");
}
}
+ $self->[_in_tabbing_disagreement_] = 0;
+
}
+ }
+ return;
+}
- # if we are at a closing token of some type..
- if ( $type_beg =~ /^[\)\}\]R]$/ ) {
+###################################################
+# CODE SECTION 8: Utilities for setting breakpoints
+###################################################
- # get the indentation of the line containing the corresponding
- # opening token
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
- $ri_last, $rindentation_list );
+{ ## begin closure set_forced_breakpoint
- # First set the default behavior:
- if (
+ my $forced_breakpoint_count;
+ my $forced_breakpoint_undo_count;
+ my @forced_breakpoint_undo_stack;
+ my $index_max_forced_break;
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
- $is_semicolon_terminated
+ # Break before or after certain tokens based on user settings
+ my %break_before_or_after_token;
- # and 'cuddled parens' of the form: ")->pack("
- # Bug fix for RT #123749]: the types here were
- # incorrectly '(' and ')'. Corrected to be '{' and '}'
- || (
- $terminal_type eq '{'
- && $type_beg eq '}'
- && ( $nesting_depth_to_go[$iend] + 1 ==
- $nesting_depth_to_go[$ibeg] )
- )
+ BEGIN {
- # remove continuation indentation for any line like
- # } ... {
- # or without ending '{' and unbalanced, such as
- # such as '}->{$operator}'
- || (
- $type_beg eq '}'
+ # Updated to use all operators. This fixes case b1054
+ # Here is the previous simplified version:
+ ## my @q = qw( . : ? and or xor && || );
+ my @q = @all_operators;
- && ( $types_to_go[$iend] eq '{'
- || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
- )
+ push @q, ',';
+ @break_before_or_after_token{@q} = (1) x scalar(@q);
+ }
- # and when the next line is at a lower indentation level
- # PATCH: and only if the style allows undoing continuation
- # for all closing token types. We should really wait until
- # the indentation of the next line is known and then make
- # a decision, but that would require another pass.
- || ( $level_jump < 0 && !$some_closing_token_indentation )
+ sub initialize_forced_breakpoint_vars {
+ $forced_breakpoint_count = 0;
+ $index_max_forced_break = UNDEFINED_INDEX;
+ $forced_breakpoint_undo_count = 0;
+ @forced_breakpoint_undo_stack = ();
+ return;
+ }
- # Patch for -wn=2, multiple welded closing tokens
- || ( $i_terminal > $ibeg
- && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
+ sub get_forced_breakpoint_count {
+ return $forced_breakpoint_count;
+ }
- )
- {
- $adjust_indentation = 1;
- }
+ sub get_forced_breakpoint_undo_count {
+ return $forced_breakpoint_undo_count;
+ }
- # outdent something like '),'
- if (
- $terminal_type eq ','
+ sub get_index_max_forced_break {
+ return $index_max_forced_break;
+ }
- # Removed this constraint for -wn
- # OLD: allow just one character before the comma
- # && $i_terminal == $ibeg + 1
+ sub set_fake_breakpoint {
- # require LIST environment; otherwise, we may outdent too much -
- # this can happen in calls without parentheses (overload.t);
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- )
- {
- $adjust_indentation = 1;
- }
+ # Just bump up the breakpoint count as a signal that there are breaks.
+ # This is useful if we have breaks but may want to postpone deciding
+ # where to make them.
+ $forced_breakpoint_count++;
+ return;
+ }
- # undo continuation indentation of a terminal closing token if
- # it is the last token before a level decrease. This will allow
- # a closing token to line up with its opening counterpart, and
- # avoids an indentation jump larger than 1 level.
- if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
- && $i_terminal == $ibeg
- && defined($K_beg) )
- {
- my $K_next_nonblank = $self->K_next_code($K_beg);
+ use constant DEBUG_FORCE => 0;
- # Patch for RT#131115: honor -bli flag at closing brace
- my $is_bli =
- $rOpts_brace_left_and_indent
- && $block_type_to_go[$i_terminal]
- && $block_type_to_go[$i_terminal] =~ /$bli_pattern/o;
+ sub set_forced_breakpoint {
+ my ( $self, $i ) = @_;
- if ( !$is_bli && defined($K_next_nonblank) ) {
- my $lev = $rLL->[$K_beg]->[_LEVEL_];
- my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
- $adjust_indentation = 1 if ( $level_next < $lev );
- }
+ return unless defined $i && $i >= 0;
- # Patch for RT #96101, in which closing brace of anonymous subs
- # was not outdented. We should look ahead and see if there is
- # a level decrease at the next token (i.e., a closing token),
- # but right now we do not have that information. For now
- # we see if we are in a list, and this works well.
- # See test files 'sub*.t' for good test cases.
- if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first,
- $ri_last, $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
- }
+ # Back up at a blank in case we need an = break.
+ # This is a backup fix for cases like b932.
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- # YVES patch 1 of 2:
- # Undo ci of line with leading closing eval brace,
- # but not beyond the indention of the line with
- # the opening brace.
- if ( $block_type_to_go[$ibeg] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
+ # no breaks between welded tokens
+ return if ( $total_weld_count && $self->is_welded_right_at_i($i) );
- $default_adjust_indentation = $adjust_indentation;
+ my $token = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
- # Now modify default behavior according to user request:
- # handle option to indent non-blocks of the form ); }; ];
- # But don't do special indentation to something like ')->pack('
- if ( !$block_type_to_go[$ibeg] ) {
- my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
- if ( $cti == 1 ) {
- if ( $i_terminal <= $ibeg + 1
- || $is_semicolon_terminated )
- {
- $adjust_indentation = 2;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 2 ) {
- if ($is_semicolon_terminated) {
- $adjust_indentation = 3;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 3 ) {
- $adjust_indentation = 3;
+ # For certain tokens, use user settings to decide if we break before or
+ # after it
+ if ( $break_before_or_after_token{$token}
+ && ( $type eq $token || $type eq 'k' ) )
+ {
+ if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
+ }
+
+ # breaks are forced before 'if' and 'unless'
+ elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
+
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
+
+ DEBUG_FORCE && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
+ };
+
+ ######################################################################
+ # NOTE: if we call set_closing_breakpoint below it will then call
+ # this routing back. So there is the possibility of an infinite
+ # loop if a programming error is made. As a precaution, I have
+ # added a check on the forced_breakpoint flag, so that we won't
+ # keep trying to set it. That will give additional protection
+ # against a loop.
+ ######################################################################
+
+ if ( $i_nonblank >= 0
+ && $nobreak_to_go[$i_nonblank] == 0
+ && !$forced_breakpoint_to_go[$i_nonblank] )
+ {
+ $forced_breakpoint_to_go[$i_nonblank] = 1;
+
+ if ( $i_nonblank > $index_max_forced_break ) {
+ $index_max_forced_break = $i_nonblank;
}
- }
+ $forced_breakpoint_count++;
+ $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
+ = $i_nonblank;
- # handle option to indent blocks
- else {
- if (
- $rOpts->{'indent-closing-brace'}
- && (
- $i_terminal == $ibeg # isolated terminal '}'
- || $is_semicolon_terminated
- )
- ) # } xxxx ;
+ # if we break at an opening container..break at the closing
+ if ( $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
{
- $adjust_indentation = 3;
+ $self->set_closing_breakpoint($i_nonblank);
}
}
}
+ return;
+ }
- # if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ($rpatterns->[0] =~ /^qb*;$/
- && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
- {
- if ( $closing_token_indentation{$1} == 0 ) {
- $adjust_indentation = 1;
+ sub clear_breakpoint_undo_stack {
+ my ($self) = @_;
+ $forced_breakpoint_undo_count = 0;
+ return;
+ }
+
+ use constant DEBUG_UNDOBP => 0;
+
+ sub undo_forced_breakpoint_stack {
+
+ my ( $self, $i_start ) = @_;
+
+ # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
+ # remove all breakpoints from the top of the 'undo stack' down to and
+ # including index $i_start.
+
+ # The 'undo stack' is a stack of all breakpoints made for a batch of
+ # code.
+
+ if ( $i_start < 0 ) {
+ $i_start = 0;
+ my ( $a, $b, $c ) = caller();
+
+ # Bad call, can only be due to a recent programming change.
+ # Better stop here.
+ Fault(
+"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
+ );
+ }
+
+ while ( $forced_breakpoint_undo_count > $i_start ) {
+ my $i =
+ $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ $forced_breakpoint_to_go[$i] = 0;
+ $forced_breakpoint_count--;
+
+ DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
+ };
}
+
+ # shouldn't happen, but not a critical error
else {
- $adjust_indentation = 3;
+ DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
+ };
}
}
+ return;
+ }
+} ## end closure set_forced_breakpoint
- # if line begins with a ':', align it with any
- # previous line leading with corresponding ?
- elsif ( $types_to_go[$ibeg] eq ':' ) {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- if ($is_leading) { $adjust_indentation = 2; }
- }
+{ ## begin closure set_closing_breakpoint
- ##########################################################
- # Section 2: set indentation according to flag set above
- #
- # Select the indentation object to define leading
- # whitespace. If we are outdenting something like '} } );'
- # then we want to use one level below the last token
- # ($i_terminal) in order to get it to fully outdent through
- # all levels.
- ##########################################################
- my $indentation;
- my $lev;
- my $level_end = $levels_to_go[$iend];
+ my %postponed_breakpoint;
- if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- $lev = $levels_to_go[$ibeg];
- }
- elsif ( $adjust_indentation == 1 ) {
+ sub initialize_postponed_breakpoint {
+ %postponed_breakpoint = ();
+ return;
+ }
- # Change the indentation to be that of a different token on the line
- # Previously, the indentation of the terminal token was used:
- # OLD CODING:
- # $indentation = $reduced_spaces_to_go[$i_terminal];
- # $lev = $levels_to_go[$i_terminal];
+ sub has_postponed_breakpoint {
+ my ($seqno) = @_;
+ return $postponed_breakpoint{$seqno};
+ }
- # Generalization for MOJO:
- # Use the lowest level indentation of the tokens on the line.
- # For example, here we can use the indentation of the ending ';':
- # } until ($selection > 0 and $selection < 10); # ok to use ';'
- # But this will not outdent if we use the terminal indentation:
- # )->then( sub { # use indentation of the ->, not the {
- # Warning: reduced_spaces_to_go[] may be a reference, do not
- # do numerical checks with it
+ sub set_closing_breakpoint {
- my $i_ind = $ibeg;
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- while ( $i_ind < $i_terminal ) {
- $i_ind++;
- if ( $levels_to_go[$i_ind] < $lev ) {
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- }
+ # set a breakpoint at a matching closing token
+ my ( $self, $i_break ) = @_;
+
+ if ( $mate_index_to_go[$i_break] >= 0 ) {
+
+ # CAUTION: infinite recursion possible here:
+ # set_closing_breakpoint calls set_forced_breakpoint, and
+ # set_forced_breakpoint call set_closing_breakpoint
+ # ( test files attrib.t, BasicLyx.pm.html).
+ # Don't reduce the '2' in the statement below
+ if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
+
+ # break before } ] and ), but sub set_forced_breakpoint will decide
+ # to break before or after a ? and :
+ my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
+ $self->set_forced_breakpoint(
+ $mate_index_to_go[$i_break] - $inc );
+ }
+ }
+ else {
+ my $type_sequence = $type_sequence_to_go[$i_break];
+ if ($type_sequence) {
+ my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
+ $postponed_breakpoint{$type_sequence} = 1;
}
}
+ return;
+ }
+} ## end closure set_closing_breakpoint
- # handle indented closing token which aligns with opening token
- elsif ( $adjust_indentation == 2 ) {
+#########################################
+# CODE SECTION 9: Process batches of code
+#########################################
- # handle option to align closing token with opening token
- $lev = $levels_to_go[$ibeg];
+{ ## begin closure grind_batch_of_CODE
- # calculate spaces needed to align with opening token
- my $space_count =
- get_spaces($opening_indentation) + $opening_offset;
+ # The routines in this closure begin the processing of a 'batch' of code.
- # Indent less than the previous line.
- #
- # Problem: For -lp we don't exactly know what it was if there
- # were recoverable spaces sent to the aligner. A good solution
- # would be to force a flush of the vertical alignment buffer, so
- # that we would know. For now, this rule is used for -lp:
- #
- # When the last line did not start with a closing token we will
- # be optimistic that the aligner will recover everything wanted.
- #
- # This rule will prevent us from breaking a hierarchy of closing
- # tokens, and in a worst case will leave a closing paren too far
- # indented, but this is better than frequently leaving it not
- # indented enough.
- my $last_spaces = get_spaces($last_indentation_written);
- if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
- $last_spaces +=
- get_recoverable_spaces($last_indentation_written);
- }
+ # A variable to keep track of consecutive nonblank lines so that we can
+ # insert occasional blanks
+ my @nonblank_lines_at_depth;
- # reset the indentation to the new space count if it works
- # only options are all or none: nothing in-between looks good
- $lev = $levels_to_go[$ibeg];
- if ( $space_count < $last_spaces ) {
- if ($rOpts_line_up_parentheses) {
- my $lev = $levels_to_go[$ibeg];
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
- }
- }
+ # A variable to remember maximum size of previous batches; this is needed
+ # by the logical padding routine
+ my $peak_batch_size;
+ my $batch_count;
- # revert to default if it doesn't work
- else {
- $space_count = leading_spaces_to_go($ibeg);
- if ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- }
- elsif ( $default_adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
- }
+ sub initialize_grind_batch_of_CODE {
+ @nonblank_lines_at_depth = ();
+ $peak_batch_size = 0;
+ $batch_count = 0;
+ return;
+ }
+
+ # sub grind_batch_of_CODE receives sections of code which are the longest
+ # possible lines without a break. In other words, it receives what is left
+ # after applying all breaks forced by blank lines, block comments, side
+ # comments, pod text, and structural braces. Its job is to break this code
+ # down into smaller pieces, if necessary, which fit within the maximum
+ # allowed line length. Then it sends the resulting lines of code on down
+ # the pipeline to the VerticalAligner package, breaking the code into
+ # continuation lines as necessary. The batch of tokens are in the "to_go"
+ # arrays. The name 'grind' is slightly suggestive of a machine continually
+ # breaking down long lines of code, but mainly it is unique and easy to
+ # remember and find with an editor search.
+
+ # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
+ # together in the following way:
+
+ # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
+ # combines them into the largest sequences of tokens which might form a new
+ # line.
+ # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
+ # lines.
+
+ # So sub 'process_line_of_CODE' builds up the longest possible continouus
+ # sequences of tokens, regardless of line length, and then
+ # grind_batch_of_CODE breaks these sequences back down into the new output
+ # lines.
+
+ # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
+
+ use constant DEBUG_GRIND => 0;
+
+ sub grind_batch_of_CODE {
+
+ my ($self) = @_;
+ my $file_writer_object = $self->[_file_writer_object_];
+
+ my $this_batch = $self->[_this_batch_];
+ $batch_count++;
+
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+ my $rK_to_go = $this_batch->[_rK_to_go_];
+ my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
+
+ my $rLL = $self->[_rLL_];
+
+ # This routine is only called from sub flush_batch_of_code, so that
+ # routine is a better spot for debugging.
+ DEBUG_GRIND && do {
+ my $token = my $type = "";
+ if ( $max_index_to_go >= 0 ) {
+ $token = $tokens_to_go[$max_index_to_go];
+ $type = $types_to_go[$max_index_to_go];
}
+ my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
+ print STDERR <<EOM;
+grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
+$output_str
+EOM
+ };
+
+ # Safety check - shouldn't happen. The calling routine must not call
+ # here unless there are tokens in the batch to be processed. This
+ # fault can only be triggered by a recent programming change.
+ if ( $max_index_to_go < 0 ) {
+ Fault(
+"sub grind incorrectly called with max_index_to_go=$max_index_to_go"
+ );
}
- # Full indentaion of closing tokens (-icb and -icp or -cti=2)
- else {
+ # Initialize some batch variables
+ my $comma_count_in_batch = 0;
+ my $ilast_nonblank = -1;
+ my @colon_list;
+ my @ix_seqno_controlling_ci;
+ for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
+ $bond_strength_to_go[$i] = 0;
+ $iprev_to_go[$i] = $ilast_nonblank;
+ $inext_to_go[$i] = $i + 1;
- # handle -icb (indented closing code block braces)
- # Updated method for indented block braces: indent one full level if
- # there is no continuation indentation. This will occur for major
- # structures such as sub, if, else, but not for things like map
- # blocks.
- #
- # Note: only code blocks without continuation indentation are
- # handled here (if, else, unless, ..). In the following snippet,
- # the terminal brace of the sort block will have continuation
- # indentation as shown so it will not be handled by the coding
- # here. We would have to undo the continuation indentation to do
- # this, but it probably looks ok as is. This is a possible future
- # update for semicolon terminated lines.
- #
- # if ($sortby eq 'date' or $sortby eq 'size') {
- # @files = sort {
- # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
- # or $a cmp $b
- # } @files;
- # }
- #
- if ( $block_type_to_go[$ibeg]
- && $ci_levels_to_go[$i_terminal] == 0 )
- {
- my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
- $indentation = $spaces + $rOpts_indent_columns;
+ my $type = $types_to_go[$i];
+ if ( $type ne 'b' ) {
+ if ( $ilast_nonblank >= 0 ) {
+ $inext_to_go[$ilast_nonblank] = $i;
- # NOTE: for -lp we could create a new indentation object, but
- # there is probably no need to do it
- }
+ # just in case there are two blanks in a row (shouldn't
+ # happen)
+ if ( ++$ilast_nonblank < $i ) {
+ $inext_to_go[$ilast_nonblank] = $i;
+ }
+ }
+ $ilast_nonblank = $i;
- # handle -icp and any -icb block braces which fall through above
- # test such as the 'sort' block mentioned above.
- else {
+ # This is a good spot to efficiently collect information needed
+ # for breaking lines...
- # There are currently two ways to handle -icp...
- # One way is to use the indentation of the previous line:
- # $indentation = $last_indentation_written;
+ if ( $type eq ',' ) { $comma_count_in_batch++; }
- # The other way is to use the indentation that the previous line
- # would have had if it hadn't been adjusted:
- $indentation = $last_unadjusted_indentation;
+ # gather info needed by sub set_continuation_breaks
+ my $seqno = $type_sequence_to_go[$i];
+ if ($seqno) {
- # Current method: use the minimum of the two. This avoids
- # inconsistent indentation.
- if ( get_spaces($last_indentation_written) <
- get_spaces($indentation) )
- {
- $indentation = $last_indentation_written;
+ # remember indexes of any tokens controlling xci
+ # in this batch. This list is needed by sub undo_ci.
+ if ( $ris_seqno_controlling_ci->{$seqno} ) {
+ push @ix_seqno_controlling_ci, $i;
+ }
+
+ if ( $type eq '?' ) {
+ push @colon_list, $type;
+ }
+ elsif ( $type eq ':' ) {
+ push @colon_list, $type;
+ }
}
}
-
- # use previous indentation but use own level
- # to cause list to be flushed properly
- $lev = $levels_to_go[$ibeg];
}
- # remember indentation except for multi-line quotes, which get
- # no indentation
- unless ( $ibeg == 0 && $starting_in_quote ) {
- $last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
- $last_leading_token = $tokens_to_go[$ibeg];
- }
+ my $comma_arrow_count_contained =
+ $self->match_opening_and_closing_tokens();
- # be sure lines with leading closing tokens are not outdented more
- # than the line which contained the corresponding opening token.
+ # tell the -lp option we are outputting a batch so it can close
+ # any unfinished items in its stack
+ finish_lp_batch();
- #############################################################
- # updated per bug report in alex_bug.pl: we must not
- # mess with the indentation of closing logical braces so
- # we must treat something like '} else {' as if it were
- # an isolated brace
- #############################################################
- my $is_isolated_block_brace = $block_type_to_go[$ibeg]
- && ( $i_terminal == $ibeg
- || $is_if_elsif_else_unless_while_until_for_foreach{
- $block_type_to_go[$ibeg]
- } );
+ # If this line ends in a code block brace, set breaks at any
+ # previous closing code block braces to breakup a chain of code
+ # blocks on one line. This is very rare but can happen for
+ # user-defined subs. For example we might be looking at this:
+ # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+ my $saw_good_break = 0; # flag to force breaks even if short line
+ if (
- # only do this for a ':; which is aligned with its leading '?'
- my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ # looking for opening or closing block brace
+ $block_type_to_go[$max_index_to_go]
- if (
- defined($opening_indentation)
- && !$leading_paren_arrow # MOJO
- && !$is_isolated_block_brace
- && !$is_unaligned_colon
+ # never any good breaks if just one token
+ && $max_index_to_go > 0
+
+ # but not one of these which are never duplicated on a line:
+ # until|while|for|if|elsif|else
+ && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
+ }
)
{
- if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
- $indentation = $opening_indentation;
+ my $lev = $nesting_depth_to_go[$max_index_to_go];
+
+ # Walk backwards from the end and
+ # set break at any closing block braces at the same level.
+ # But quit if we are not in a chain of blocks.
+ for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+ last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
+ next if ( $levels_to_go[$i] > $lev ); # skip past higher level
+
+ if ( $block_type_to_go[$i] ) {
+ if ( $tokens_to_go[$i] eq '}' ) {
+ $self->set_forced_breakpoint($i);
+ $saw_good_break = 1;
+ }
+ }
+
+ # quit if we see anything besides words, function, blanks
+ # at this level
+ elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
}
- # remember the indentation of each line of this batch
- push @{$rindentation_list}, $indentation;
-
- # outdent lines with certain leading tokens...
- if (
+ my $imin = 0;
+ my $imax = $max_index_to_go;
- # must be first word of this batch
- $ibeg == 0
+ # trim any blank tokens
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- # and ...
- && (
+ # anything left to write?
+ if ( $imin <= $imax ) {
+
+ my $last_line_leading_type = $self->[_last_line_leading_type_];
+ my $last_line_leading_level = $self->[_last_line_leading_level_];
+ my $last_last_line_leading_level =
+ $self->[_last_last_line_leading_level_];
+
+ # add a blank line before certain key types but not after a comment
+ if ( $last_line_leading_type ne '#' ) {
+ my $want_blank = 0;
+ my $leading_token = $tokens_to_go[$imin];
+ my $leading_type = $types_to_go[$imin];
+
+ # blank lines before subs except declarations and one-liners
+ if ( $leading_type eq 'i' ) {
+ if ( $leading_token =~ /$SUB_PATTERN/ ) {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
+ }
- # certain leading keywords if requested
- (
- $rOpts->{'outdent-keywords'}
- && $types_to_go[$ibeg] eq 'k'
- && $outdent_keyword{ $tokens_to_go[$ibeg] }
- )
+ # break before all package declarations
+ elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
+ $want_blank = $rOpts->{'blank-lines-before-packages'};
+ }
+ }
- # or labels if requested
- || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
+ # break before certain key blocks except one-liners
+ if ( $leading_type eq 'k' ) {
+ if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' )
+ {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( $imin, $imax ) ne '}' );
+ }
- # or static block comments if requested
- || ( $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-static-block-comments'}
- && $is_static_block_comment )
- )
- )
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($last_line_leading_type ne 'b'
+ && $leading_token =~
+ /^(unless|if|while|until|for|foreach)$/ )
+ {
+ my $lc =
+ $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
- {
- my $space_count = leading_spaces_to_go($ibeg);
- if ( $space_count > 0 ) {
- $space_count -= $rOpts_continuation_indentation;
- $is_outdented_line = 1;
- if ( $space_count < 0 ) { $space_count = 0 }
+ # patch for RT #128216: no blank line inserted at a level
+ # change
+ if ( $levels_to_go[$imin] != $last_line_leading_level )
+ {
+ $lc = 0;
+ }
- # do not promote a spaced static block comment to non-spaced;
- # this is not normally necessary but could be for some
- # unusual user inputs (such as -ci = -i)
- if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
- $space_count = 1;
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $self->consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && terminal_type_i( $imin, $imax ) ne '}';
+ }
}
- if ($rOpts_line_up_parentheses) {
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks =
+ $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
+ }
+ }
}
- else {
- $indentation = $space_count;
+
+ if ($want_blank) {
+
+ # future: send blank line down normal path to VerticalAligner
+ $self->flush_vertical_aligner();
+ $file_writer_object->require_blank_code_lines($want_blank);
}
}
- }
- return ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line );
- }
-}
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ $last_last_line_leading_level = $last_line_leading_level;
+ $last_line_leading_level = $levels_to_go[$imin];
+ if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
+ $last_line_leading_type = $types_to_go[$imin];
+ if ( $last_line_leading_level == $last_last_line_leading_level
+ && $last_line_leading_type ne 'b'
+ && $last_line_leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$last_line_leading_level] )
+ )
+ {
+ $nonblank_lines_at_depth[$last_line_leading_level]++;
+ }
+ else {
+ $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ }
-sub mate_index_to_go {
- my ( $self, $i ) = @_;
+ $self->[_last_line_leading_type_] = $last_line_leading_type;
+ $self->[_last_line_leading_level_] = $last_line_leading_level;
+ $self->[_last_last_line_leading_level_] =
+ $last_last_line_leading_level;
- # Return the matching index of a container or ternary pair
- # This is equivalent to the array @mate_index_to_go
- my $K = $K_to_go[$i];
- my $K_mate = $self->K_mate_index($K);
- my $i_mate = -1;
- if ( defined($K_mate) ) {
- $i_mate = $i + ( $K_mate - $K );
- if ( $i_mate < 0 || $i_mate > $max_index_to_go ) {
- $i_mate = -1;
- }
- }
- my $i_mate_alt = $mate_index_to_go[$i];
-
- # Debug code to eventually be removed
- if ( 0 && $i_mate_alt != $i_mate ) {
- my $tok = $tokens_to_go[$i];
- my $type = $types_to_go[$i];
- my $tok_mate = '*';
- my $type_mate = '*';
- if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) {
- $tok_mate = $tokens_to_go[$i_mate];
- $type_mate = $types_to_go[$i_mate];
- }
- my $seq = $type_sequence_to_go[$i];
- my $file = $logger_object->get_input_stream_name();
-
- Warn(
-"mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate"
- );
- }
- return $i_mate;
-}
+ # Flag to remember if we called sub 'pad_array_to_go'.
+ # Some routines (scan_list(), set_continuation_breaks() ) need some
+ # extra tokens added at the end of the batch. Most batches do not
+ # use these routines, so we will avoid calling 'pad_array_to_go'
+ # unless it is needed.
+ my $called_pad_array_to_go;
-sub K_mate_index {
+ # set all forced breakpoints for good list formatting
+ my $is_long_line = $max_index_to_go > 0
+ && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
- # Given the index K of an opening or closing container, or ?/: ternary pair,
- # return the index K of the other member of the pair.
- my ( $self, $K ) = @_;
- return unless defined($K);
- my $rLL = $self->{rLL};
- my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_];
- return unless ($seqno);
+ my $old_line_count_in_batch =
+ $max_index_to_go == 0
+ ? 1
+ : $self->get_old_line_count( $K_to_go[0],
+ $K_to_go[$max_index_to_go] );
- my $K_opening = $self->{K_opening_container}->{$seqno};
- if ( defined($K_opening) ) {
- if ( $K != $K_opening ) { return $K_opening }
- return $self->{K_closing_container}->{$seqno};
- }
+ if (
+ $is_long_line
+ || $old_line_count_in_batch > 1
- $K_opening = $self->{K_opening_ternary}->{$seqno};
- if ( defined($K_opening) ) {
- if ( $K != $K_opening ) { return $K_opening }
- return $self->{K_closing_ternary}->{$seqno};
- }
- return;
-}
+ # must always call scan_list() with unbalanced batches because
+ # it is maintaining some stacks
+ || is_unbalanced_batch()
-sub set_vertical_tightness_flags {
+ # call scan_list if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ && $rOpts_maximum_fields_per_table <=
+ $comma_count_in_batch
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
- my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
+ # call scan_list if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
+ )
+ {
+ # add a couple of extra terminal blank tokens
+ $self->pad_array_to_go();
+ $called_pad_array_to_go = 1;
- # Define vertical tightness controls for the nth line of a batch.
- # We create an array of parameters which tell the vertical aligner
- # if we should combine this line with the next line to achieve the
- # desired vertical tightness. The array of parameters contains:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- #
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false. Will be
- # true if appropriate -vt flag is set. Otherwise, Will be
- # made true only for 2 line container in parens with -lp
- #
- # These flags are used by sub set_leading_whitespace in
- # the vertical aligner
+ ## This caused problems in one version of perl for unknown reasons:
+ ## $saw_good_break ||= scan_list();
+ my $sgb = $self->scan_list($is_long_line);
+ $saw_good_break ||= $sgb;
+ }
- my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1:
- # Handle Lines 1 .. n-1 but not the last line
- # For non-BLOCK tokens, we will need to examine the next line
- # too, so we won't consider the last line.
- #--------------------------------------------------------------
- if ( $n < $n_last_line ) {
+ # write a single line if..
+ if (
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1a:
- # Look for Type 1, last token of this line is a non-block opening token
- #--------------------------------------------------------------
- my $ibeg_next = $ri_first->[ $n + 1 ];
- my $token_end = $tokens_to_go[$iend];
- my $iend_next = $ri_last->[ $n + 1 ];
- if (
- $type_sequence_to_go[$iend]
- && !$block_type_to_go[$iend]
- && $is_opening_token{$token_end}
- && (
- $opening_vertical_tightness{$token_end} > 0
+ # we aren't allowed to add any newlines
+ !$rOpts_add_newlines
- # allow 2-line method call to be closed up
- || ( $rOpts_line_up_parentheses
- && $token_end eq '('
- && $iend > $ibeg
- && $types_to_go[ $iend - 1 ] ne 'b' )
- )
- )
- {
+ # or,
+ || (
- # avoid multiple jumps in nesting depth in one line if
- # requested
- my $ovt = $opening_vertical_tightness{$token_end};
- my $iend_next = $ri_last->[ $n + 1 ];
- unless (
- $ovt < 2
- && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
- $nesting_depth_to_go[$ibeg_next] )
+ # this line is 'short'
+ !$is_long_line
+
+ # and we didn't see a good breakpoint
+ && !$saw_good_break
+
+ # and we don't already have an interior breakpoint
+ && !get_forced_breakpoint_count()
+ )
)
{
-
- # If -vt flag has not been set, mark this as invalid
- # and aligner will validate it if it sees the closing paren
- # within 2 lines.
- my $valid_flag = $ovt;
- @{$rvertical_tightness_flags} =
- ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
+ @{$ri_first} = ($imin);
+ @{$ri_last} = ($imax);
}
- }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1b:
- # Look for Type 2, first token of next line is a non-block closing
- # token .. and be sure this line does not have a side comment
- #--------------------------------------------------------------
- my $token_next = $tokens_to_go[$ibeg_next];
- if ( $type_sequence_to_go[$ibeg_next]
- && !$block_type_to_go[$ibeg_next]
- && $is_closing_token{$token_next}
- && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
- {
- my $ovt = $opening_vertical_tightness{$token_next};
- my $cvt = $closing_vertical_tightness{$token_next};
- if (
+ # otherwise use multiple lines
+ else {
- # never append a trailing line like )->pack(
- # because it will throw off later alignment
- (
- $nesting_depth_to_go[$ibeg_next] ==
- $nesting_depth_to_go[ $iend_next + 1 ] + 1
- )
- && (
- $cvt == 2
- || (
- $container_environment_to_go[$ibeg_next] ne 'LIST'
- && (
- $cvt == 1
+ # add a couple of extra terminal blank tokens if we haven't
+ # already done so
+ $self->pad_array_to_go() unless ($called_pad_array_to_go);
- # allow closing up 2-line method calls
- || ( $rOpts_line_up_parentheses
- && $token_next eq ')' )
- )
- )
- )
- )
- {
+ ( $ri_first, $ri_last ) =
+ $self->set_continuation_breaks( $saw_good_break,
+ \@colon_list );
- # decide which trailing closing tokens to append..
- my $ok = 0;
- if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
- else {
- my $str = join( '',
- @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
+ $self->break_all_chain_tokens( $ri_first, $ri_last );
- # append closing token if followed by comment or ';'
- if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
- }
+ $self->break_equals( $ri_first, $ri_last );
- if ($ok) {
- my $valid_flag = $cvt;
- @{$rvertical_tightness_flags} = (
- 2,
- $tightness{$token_next} == 2 ? 0 : 1,
- $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ if ($rOpts_recombine) {
+ ( $ri_first, $ri_last ) =
+ $self->recombine_breakpoints( $ri_first, $ri_last );
}
- }
- }
-
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1c:
- # Implement the Opening Token Right flag (Type 2)..
- # If requested, move an isolated trailing opening token to the end of
- # the previous line which ended in a comma. We could do this
- # in sub recombine_breakpoints but that would cause problems
- # with -lp formatting. The problem is that indentation will
- # quickly move far to the right in nested expressions. By
- # doing it after indentation has been set, we avoid changes
- # to the indentation. Actual movement of the token takes place
- # in sub valign_output_step_B.
- #--------------------------------------------------------------
- if (
- $opening_token_right{ $tokens_to_go[$ibeg_next] }
- # previous line is not opening
- # (use -sot to combine with it)
- && !$is_opening_token{$token_end}
+ $self->insert_final_ternary_breaks( $ri_first, $ri_last )
+ if (@colon_list);
+ }
- # previous line ended in one of these
- # (add other cases if necessary; '=>' and '.' are not necessary
- && !$block_type_to_go[$ibeg_next]
+ $self->insert_breaks_before_list_opening_containers( $ri_first,
+ $ri_last )
+ if ( %break_before_container_types && $max_index_to_go > 0 );
- # this is a line with just an opening token
- && ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
+ # do corrector step if -lp option is used
+ my $do_not_pad = 0;
+ if ($rOpts_line_up_parentheses) {
+ $do_not_pad =
+ $self->correct_lp_indentation( $ri_first, $ri_last );
+ }
- # looks bad if we align vertically with the wrong container
- && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
- }
+ # unmask any invisible line-ending semicolon. They were placed by
+ # sub respace_tokens but we only now know if we actually need them.
+ if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
+ my $i = $imax;
+ my $tok = ';';
+ my $tok_len = 1;
+ if ( $want_left_space{';'} != WS_NO ) {
+ $tok = ' ;';
+ $tok_len = 2;
+ }
+ $tokens_to_go[$i] = $tok;
+ $token_lengths_to_go[$i] = $tok_len;
+ my $KK = $K_to_go[$i];
+ $rLL->[$KK]->[_TOKEN_] = $tok;
+ $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+ my $line_number = 1 + $self->get_old_line_index($KK);
+ $self->note_added_semicolon($line_number);
+ }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1d:
- # Stacking of opening and closing tokens (Type 2)
- #--------------------------------------------------------------
- my $stackable;
- my $token_beg_next = $tokens_to_go[$ibeg_next];
+ if ( $rOpts_one_line_block_semicolons == 0 ) {
+ $self->delete_one_line_semicolons( $ri_first, $ri_last );
+ }
- # patch to make something like 'qw(' behave like an opening paren
- # (aran.t)
- if ( $types_to_go[$ibeg_next] eq 'q' ) {
- if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
- $token_beg_next = $1;
+ # The line breaks for this batch of code have been finalized. Now we
+ # can to package the results for further processing. We will switch
+ # from the local '_to_go' buffer arrays (i-index) back to the global
+ # token arrays (K-index) at this point.
+ my $rlines_K;
+ my $index_error;
+ for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
+ my $ibeg = $ri_first->[$n];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $iend = $ri_last->[$n];
+ my $Kend = $K_to_go[$iend];
+ if ( $iend - $ibeg != $Kend - $Kbeg ) {
+ $index_error = $n unless defined($index_error);
+ }
+ push @{$rlines_K},
+ [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
+ }
+
+ # Check correctness of the mapping between the i and K token
+ # indexes. (The K index is the global index, the i index is the
+ # batch index). It is important to do this check because an error
+ # would be disastrous. The reason that we should never see an
+ # index error here is that sub 'store_token_to_go' has a check to
+ # make sure that the indexes in batches remain continuous. Since
+ # sub 'store_token_to_go' controls feeding tokens into batches,
+ # no index discrepancies should occur unless a recent programming
+ # change has introduced a bug.
+ if ( defined($index_error) ) {
+
+ # Temporary debug code - should never get here
+ for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
+ my $ibeg = $ri_first->[$n];
+ my $Kbeg = $K_to_go[$ibeg];
+ my $iend = $ri_last->[$n];
+ my $Kend = $K_to_go[$iend];
+ my $idiff = $iend - $ibeg;
+ my $Kdiff = $Kend - $Kbeg;
+ print STDERR <<EOM;
+line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
+EOM
+ }
+ Fault(
+ "Index error at line $index_error; i and K ranges differ");
}
- }
- if ( $is_closing_token{$token_end}
- && $is_closing_token{$token_beg_next} )
- {
- $stackable = $stack_closing_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
- elsif ($is_opening_token{$token_end}
- && $is_opening_token{$token_beg_next} )
- {
- $stackable = $stack_opening_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
+ $this_batch->[_rlines_K_] = $rlines_K;
+ $this_batch->[_ibeg0_] = $ri_first->[0];
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_do_not_pad_] = $do_not_pad;
+ $this_batch->[_batch_count_] = $batch_count;
+ $this_batch->[_rix_seqno_controlling_ci_] =
+ \@ix_seqno_controlling_ci;
- if ($stackable) {
+ $self->send_lines_to_vertical_aligner();
- my $is_semicolon_terminated;
- if ( $n + 1 == $n_last_line ) {
- my ( $terminal_type, $i_terminal ) =
- $self->terminal_type_i( $ibeg_next, $iend_next );
- $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend_next] <
- $nesting_depth_to_go[$ibeg_next];
+ # Insert any requested blank lines after an opening brace. We have
+ # to skip back before any side comment to find the terminal token
+ my $iterm;
+ for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
+ next if $types_to_go[$iterm] eq '#';
+ next if $types_to_go[$iterm] eq 'b';
+ last;
}
- # this must be a line with just an opening token
- # or end in a semicolon
- if (
- $is_semicolon_terminated
- || ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
+ # write requested number of blank lines after an opening block brace
+ if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
+ if ( $rOpts->{'blank-lines-after-opening-block'}
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
+ $self->flush_vertical_aligner();
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
}
}
- }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 2:
- # Handle type 3, opening block braces on last line of the batch
- # Check for a last line with isolated opening BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
- && $types_to_go[$iend] eq '{'
- && $block_type_to_go[$iend] =~
- /$block_brace_vertical_tightness_pattern/o )
- {
- @{$rvertical_tightness_flags} =
- ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
- }
+ # Remember the largest batch size processed. This is needed by the
+ # logical padding routine to avoid padding the first nonblank token
+ if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
+ $peak_batch_size = $max_index_to_go;
+ }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 3:
- # Handle type 4, a closing block brace on the last line of the batch Check
- # for a last line with isolated closing BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_stack_closing_block_brace
- && $ibeg eq $iend
- && $block_type_to_go[$iend]
- && $types_to_go[$iend] eq '}' )
- {
- my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
- @{$rvertical_tightness_flags} =
- ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
+ return;
}
+} ## end closure grind_batch_of_CODE
- # pack in the sequence numbers of the ends of this line
- $rvertical_tightness_flags->[4] = get_seqno($ibeg);
- $rvertical_tightness_flags->[5] = get_seqno($iend);
- return $rvertical_tightness_flags;
-}
+{ ## begin closure match_opening_and_closing_tokens
-sub get_seqno {
+ # closure to keep track of unbalanced containers.
+ # arrays shared by the routines in this block:
+ my %saved_opening_indentation;
+ my @unmatched_opening_indexes_in_this_batch;
+ my @unmatched_closing_indexes_in_this_batch;
+ my %comma_arrow_count;
- # get opening and closing sequence numbers of a token for the vertical
- # aligner. Assign qw quotes a value to allow qw opening and closing tokens
- # to be treated somewhat like opening and closing tokens for stacking
- # tokens by the vertical aligner.
- my ($ii) = @_;
- my $seqno = $type_sequence_to_go[$ii];
- if ( $types_to_go[$ii] eq 'q' ) {
- my $SEQ_QW = -1;
- if ( $ii > 0 ) {
- $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
- }
- else {
- if ( !$ending_in_quote ) {
- $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
- }
- }
+ sub initialize_saved_opening_indentation {
+ %saved_opening_indentation = ();
+ return;
}
- return ($seqno);
-}
-{
- my %is_vertical_alignment_type;
- my %is_not_vertical_alignment_token;
- my %is_vertical_alignment_keyword;
- my %is_terminal_alignment_type;
- my %is_low_level_alignment_token;
+ sub is_unbalanced_batch {
+ return @unmatched_opening_indexes_in_this_batch +
+ @unmatched_closing_indexes_in_this_batch;
+ }
- BEGIN {
+ sub match_opening_and_closing_tokens {
- my @q;
+ # Match up indexes of opening and closing braces, etc, in this batch.
+ # This has to be done after all tokens are stored because unstoring
+ # of tokens would otherwise cause trouble.
- # Replaced =~ and // in the list. // had been removed in RT 119588
- @q = qw#
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- { ? : => && || ~~ !~~ =~ !~ //
- #;
- @is_vertical_alignment_type{@q} = (1) x scalar(@q);
+ my ($self) = @_;
+ my $rwant_container_open = $self->[_rwant_container_open_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
- # These 'tokens' are not aligned. We need this to remove [
- # from the above list because it has type ='{'
- @q = qw([);
- @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
+ @unmatched_opening_indexes_in_this_batch = ();
+ @unmatched_closing_indexes_in_this_batch = ();
+ %comma_arrow_count = ();
+ my $comma_arrow_count_contained = 0;
+ my $parent_seqno = $self->parent_seqno_by_K( $K_to_go[0] );
- # these are the only types aligned at a line end
- @q = qw(&& ||);
- @is_terminal_alignment_type{@q} = (1) x scalar(@q);
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ $parent_seqno_to_go[$i] = $parent_seqno;
- # these tokens only align at line level
- @q = ( '{', '(' );
- @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+ my $seqno = $type_sequence_to_go[$i];
+ if ($seqno) {
+ my $token = $tokens_to_go[$i];
+ if ( $is_opening_sequence_token{$token} ) {
+ if ( $is_opening_token{$token} ) {
+ $parent_seqno = $seqno;
+ }
- # eq and ne were removed from this list to improve alignment chances
- @q = qw(if unless and or err for foreach while until);
- @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
- }
+ if ( $rwant_container_open->{$seqno} ) {
+ $self->set_forced_breakpoint($i);
+ }
- sub set_vertical_alignment_markers {
+ push @unmatched_opening_indexes_in_this_batch, $i;
+ }
+ elsif ( $is_closing_sequence_token{$token} ) {
- # This routine takes the first step toward vertical alignment of the
- # lines of output text. It looks for certain tokens which can serve as
- # vertical alignment markers (such as an '=').
- #
- # Method: We look at each token $i in this output batch and set
- # $ralignment_type_to_go->[$i] equal to those tokens at which we would
- # accept vertical alignment.
+ if ( $is_closing_token{$token} ) {
+ $parent_seqno = $rparent_of_seqno->{$seqno};
+ $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
+ $parent_seqno_to_go[$i] = $parent_seqno;
+ }
- my ( $self, $ri_first, $ri_last ) = @_;
+ if ( $rwant_container_open->{$seqno} ) {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
- my $ralignment_type_to_go;
- for my $i ( 0 .. $max_index_to_go ) {
- $ralignment_type_to_go->[$i] = '';
+ my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
+ if ( defined($i_mate) && $i_mate >= 0 ) {
+ if ( $type_sequence_to_go[$i_mate] ==
+ $type_sequence_to_go[$i] )
+ {
+ $mate_index_to_go[$i] = $i_mate;
+ $mate_index_to_go[$i_mate] = $i;
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $comma_arrow_count{$seqno} ) {
+ $comma_arrow_count_contained +=
+ $comma_arrow_count{$seqno};
+ }
+ }
+ else {
+ push @unmatched_opening_indexes_in_this_batch,
+ $i_mate;
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ }
+ else {
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ }
+ }
+ elsif ( $tokens_to_go[$i] eq '=>' ) {
+ if (@unmatched_opening_indexes_in_this_batch) {
+ my $j = $unmatched_opening_indexes_in_this_batch[-1];
+ my $seqno = $type_sequence_to_go[$j];
+ $comma_arrow_count{$seqno}++;
+ }
+ }
}
- # nothing to do if we aren't allowed to change whitespace
- if ( !$rOpts_add_whitespace ) {
- return $ralignment_type_to_go;
- }
+ return $comma_arrow_count_contained;
+ }
- # remember the index of last nonblank token before any sidecomment
- my $i_terminal = $max_index_to_go;
- if ( $types_to_go[$i_terminal] eq '#' ) {
- if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
- if ( $i_terminal > 0 ) { --$i_terminal }
+ sub save_opening_indentation {
+
+ # This should be called after each batch of tokens is output. It
+ # saves indentations of lines of all unmatched opening tokens.
+ # These will be used by sub get_opening_indentation.
+
+ my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
+
+ # QW INDENTATION PATCH 1:
+ # Also save indentation for multiline qw quotes
+ my @i_qw;
+ my $seqno_qw_opening;
+ if ( $types_to_go[$max_index_to_go] eq 'q' ) {
+ my $KK = $K_to_go[$max_index_to_go];
+ $seqno_qw_opening =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
+ if ($seqno_qw_opening) {
+ push @i_qw, $max_index_to_go;
}
}
- # look at each line of this batch..
- my $last_vertical_alignment_before_index;
- my $vert_last_nonblank_type;
- my $vert_last_nonblank_token;
- my $vert_last_nonblank_block_type;
- my $max_line = @{$ri_first} - 1;
+ # we need to save indentations of any unmatched opening tokens
+ # in this batch because we may need them in a subsequent batch.
+ foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
- foreach my $line ( 0 .. $max_line ) {
- my $ibeg = $ri_first->[$line];
- my $iend = $ri_last->[$line];
- $last_vertical_alignment_before_index = -1;
- $vert_last_nonblank_type = '';
- $vert_last_nonblank_token = '';
- $vert_last_nonblank_block_type = '';
+ my $seqno = $type_sequence_to_go[$_];
- # look at each token in this output line..
- my $level_beg = $levels_to_go[$ibeg];
- foreach my $i ( $ibeg .. $iend ) {
- my $alignment_type = '';
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
+ if ( !$seqno ) {
+ if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
+ $seqno = $seqno_qw_opening;
+ }
+ else {
- # do not align tokens at lower level then start of line
- # except for side comments
- if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
- && $types_to_go[$i] ne '#' )
- {
- $ralignment_type_to_go->[$i] = '';
- next;
+ # shouldn't happen
+ $seqno = 'UNKNOWN';
}
+ }
- #--------------------------------------------------------
- # First see if we want to align BEFORE this token
- #--------------------------------------------------------
+ $saved_opening_indentation{$seqno} = [
+ lookup_opening_indentation(
+ $_, $ri_first, $ri_last, $rindentation_list
+ )
+ ];
+ }
+ return;
+ }
- # The first possible token that we can align before
- # is index 2 because: 1) it doesn't normally make sense to
- # align before the first token and 2) the second
- # token must be a blank if we are to align before
- # the third
- if ( $i < $ibeg + 2 ) { }
+ sub get_saved_opening_indentation {
+ my ($seqno) = @_;
+ my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
- # must follow a blank token
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
+ if ($seqno) {
+ if ( $saved_opening_indentation{$seqno} ) {
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
+ $exists = 1;
+ }
+ }
- # align a side comment --
- elsif ( $type eq '#' ) {
+ # some kind of serious error it doesn't exist
+ # (example is badfile.t)
- unless (
+ return ( $indent, $offset, $is_leading, $exists );
+ }
+} ## end closure match_opening_and_closing_tokens
- # it is a static side comment
- (
- $rOpts->{'static-side-comments'}
- && $token =~ /$static_side_comment_pattern/o
- )
+sub lookup_opening_indentation {
- # or a closing side comment
- || ( $vert_last_nonblank_block_type
- && $token =~
- /$closing_side_comment_prefix_pattern/o )
- )
- {
- $alignment_type = $type;
- } ## Example of a static side comment
- }
+ # get the indentation of the line in the current output batch
+ # which output a selected opening token
+ #
+ # given:
+ # $i_opening - index of an opening token in the current output batch
+ # whose line indentation we need
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line. (NOTE: the first slot in
+ # this list is the last returned line number, and this is
+ # followed by the list of indentations).
+ #
+ # return
+ # -the indentation of the line which contained token $i_opening
+ # -and its offset (number of columns) from the start of the line
- # otherwise, do not align two in a row to create a
- # blank field
- elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
+ my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
- # align before one of these keywords
- # (within a line, since $i>1)
- elsif ( $type eq 'k' ) {
+ if ( !@{$ri_last} ) {
- # /^(if|unless|and|or|eq|ne)$/
- if ( $is_vertical_alignment_keyword{$token} ) {
- $alignment_type = $token;
- }
- }
+ # An error here implies a bug introduced by a recent program change.
+ # Every batch of code has lines.
+ Fault("Error in opening_indentation: no lines");
+ return;
+ }
- # align before one of these types..
- # Note: add '.' after new vertical aligner is operational
- elsif ( $is_vertical_alignment_type{$type}
- && !$is_not_vertical_alignment_token{$token} )
- {
- $alignment_type = $token;
+ my $nline = $rindentation_list->[0]; # line number of previous lookup
- # Do not align a terminal token. Although it might
- # occasionally look ok to do this, this has been found to be
- # a good general rule. The main problems are:
- # (1) that the terminal token (such as an = or :) might get
- # moved far to the right where it is hard to see because
- # nothing follows it, and
- # (2) doing so may prevent other good alignments.
- # Current exceptions are && and ||
- if ( $i == $iend || $i >= $i_terminal ) {
- $alignment_type = ""
- unless ( $is_terminal_alignment_type{$type} );
- }
+ # reset line location if necessary
+ $nline = 0 if ( $i_opening < $ri_start->[$nline] );
- # Do not align leading ': (' or '. ('. This would prevent
- # alignment in something like the following:
- # $extra_space .=
- # ( $input_line_number < 10 ) ? " "
- # : ( $input_line_number < 100 ) ? " "
- # : "";
- # or
- # $code =
- # ( $case_matters ? $accessor : " lc($accessor) " )
- # . ( $yesno ? " eq " : " ne " )
+ # find the correct line
+ unless ( $i_opening > $ri_last->[-1] ) {
+ while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
+ }
- # Also, do not align a ( following a leading ? so we can
- # align something like this:
- # $converter{$_}->{ushortok} =
- # $PDL::IO::Pic::biggrays
- # ? ( m/GIF/ ? 0 : 1 )
- # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
- if ( $i == $ibeg + 2
- && $types_to_go[$ibeg] =~ /^[\.\:\?]$/
- && $types_to_go[ $i - 1 ] eq 'b' )
- {
- $alignment_type = "";
- }
+ # Error - token index is out of bounds - shouldn't happen
+ # A program bug has been introduced in one of the calling routines.
+ # We better stop here.
+ else {
+ my $i_last_line = $ri_last->[-1];
+ Fault(<<EOM);
+Program bug in call to lookup_opening_indentation - index out of range
+ called with index i_opening=$i_opening > $i_last_line = max index of last line
+This batch has max index = $max_index_to_go,
+EOM
+ report_definite_bug(); # old coding, will not get here
+ $nline = $#{$ri_last};
+ }
- # Certain tokens only align at the same level as the
- # initial line level
- if ( $is_low_level_alignment_token{$token}
- && $levels_to_go[$i] != $level_beg )
- {
- $alignment_type = "";
- }
+ $rindentation_list->[0] =
+ $nline; # save line number to start looking next call
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
+}
- # For a paren after keyword, only align something like this:
- # if ( $a ) { &a }
- # elsif ( $b ) { &b }
- if ( $token eq '(' ) {
+{ ## begin closure terminal_type_i
- if ( $vert_last_nonblank_type eq 'k' ) {
- $alignment_type = ""
- unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
- }
- }
+ my %is_sort_map_grep_eval_do;
- # be sure the alignment tokens are unique
- # This didn't work well: reason not determined
- # if ($token ne $type) {$alignment_type .= $type}
- }
+ BEGIN {
+ my @q = qw(sort map grep eval do);
+ @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
+ }
- # NOTE: This is deactivated because it causes the previous
- # if/elsif alignment to fail
- #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
- #{ $alignment_type = $type; }
+ sub terminal_type_i {
- if ($alignment_type) {
- $last_vertical_alignment_before_index = $i;
- }
+ # returns type of last token on this line (terminal token), as follows:
+ # returns # for a full-line comment
+ # returns ' ' for a blank line
+ # otherwise returns final token type
- #--------------------------------------------------------
- # Next see if we want to align AFTER the previous nonblank
- #--------------------------------------------------------
+ my ( $ibeg, $iend ) = @_;
- # We want to line up ',' and interior ';' tokens, with the added
- # space AFTER these tokens. (Note: interior ';' is included
- # because it may occur in short blocks).
- if (
+ # Start at the end and work backwards
+ my $i = $iend;
+ my $type_i = $types_to_go[$i];
- # we haven't already set it
- !$alignment_type
+ # Check for side comment
+ if ( $type_i eq '#' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
- # and its not the first token of the line
- && ( $i > $ibeg )
+ # Skip past a blank
+ if ( $type_i eq 'b' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
- # and it follows a blank
- && $types_to_go[ $i - 1 ] eq 'b'
+ # Found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/grep/map because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $block_type_to_go[$i];
+ if (
+ $type_i eq '}'
+ && ( !$block_type
+ || ( $is_sort_map_grep_eval_do{$block_type} ) )
+ )
+ {
+ $type_i = 'b';
+ }
+ return wantarray ? ( $type_i, $i ) : $type_i;
+ }
- # and previous token IS one of these:
- && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
+} ## end closure terminal_type_i
- # and it's NOT one of these
- && ( $type !~ /^[b\#\)\]\}]$/ )
+sub pad_array_to_go {
- # then go ahead and align
- )
+ # To simplify coding in scan_list and set_bond_strengths, it helps to
+ # create some extra blank tokens at the end of the arrays. We also add
+ # some undef's to help guard against using invalid data.
+ my ($self) = @_;
+ $K_to_go[ $max_index_to_go + 1 ] = undef;
+ $tokens_to_go[ $max_index_to_go + 1 ] = '';
+ $tokens_to_go[ $max_index_to_go + 2 ] = '';
+ $tokens_to_go[ $max_index_to_go + 3 ] = undef;
+ $types_to_go[ $max_index_to_go + 1 ] = 'b';
+ $types_to_go[ $max_index_to_go + 2 ] = 'b';
+ $types_to_go[ $max_index_to_go + 3 ] = undef;
+ $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] =
+ $nesting_depth_to_go[$max_index_to_go];
- {
- $alignment_type = $vert_last_nonblank_type;
- }
+ # /^[R\}\)\]]$/
+ if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
+ if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
- #--------------------------------------------------------
- # then store the value
- #--------------------------------------------------------
- $ralignment_type_to_go->[$i] = $alignment_type;
- if ( $type ne 'b' ) {
- $vert_last_nonblank_type = $type;
- $vert_last_nonblank_token = $token;
- $vert_last_nonblank_block_type = $block_type;
- }
+ # Nesting depths are equivalent to the _SLEVEL_ variable which is
+ # clipped to be >=0 in sub write_line, so it should not be possible
+ # to get here unless the code has a bracing error which leaves a
+ # closing brace with zero nesting depth.
+ unless ( get_saw_brace_error() ) {
+ warning(
+"Program bug in pad_array_to_go: hit nesting error which should have been caught\n"
+ );
+ report_definite_bug();
}
}
- return $ralignment_type_to_go;
+ else {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
+ }
+ }
+
+ # /^[L\{\(\[]$/
+ elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
}
+ return;
}
-sub terminal_type_i {
+sub break_all_chain_tokens {
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
+ # scan the current breakpoints looking for breaks at certain "chain
+ # operators" (. : && || + etc) which often occur repeatedly in a long
+ # statement. If we see a break at any one, break at all similar tokens
+ # within the same container.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
- my ( $self, $ibeg, $iend ) = @_;
+ my %saw_chain_type;
+ my %left_chain_type;
+ my %right_chain_type;
+ my %interior_chain_type;
+ my $nmax = @{$ri_right} - 1;
- # Start at the end and work backwards
- my $i = $iend;
- my $type_i = $types_to_go[$i];
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ $typel = '+' if ( $typel eq '-' ); # treat + and - the same
+ $typer = '+' if ( $typer eq '-' );
+ $typel = '*' if ( $typel eq '/' ); # treat * and / the same
+ $typer = '*' if ( $typer eq '/' );
+ my $tokenl = $tokens_to_go[$il];
+ my $tokenr = $tokens_to_go[$ir];
- # Check for side comment
- if ( $type_i eq '#' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
+ next if ( $typel eq '?' );
+ push @{ $left_chain_type{$typel} }, $il;
+ $saw_chain_type{$typel} = 1;
+ $count++;
+ }
+ if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
+ next if ( $typer eq '?' );
+ push @{ $right_chain_type{$typer} }, $ir;
+ $saw_chain_type{$typer} = 1;
+ $count++;
}
- $type_i = $types_to_go[$i];
}
+ return unless $count;
- # Skip past a blank
- if ( $type_i eq 'b' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ # now look for any interior tokens of the same types
+ $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir - 1 ) {
+ my $type = $types_to_go[$i];
+ $type = '+' if ( $type eq '-' );
+ $type = '*' if ( $type eq '/' );
+ if ( $saw_chain_type{$type} ) {
+ push @{ $interior_chain_type{$type} }, $i;
+ $count++;
+ }
}
- $type_i = $types_to_go[$i];
}
+ return unless $count;
- # Found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $block_type_to_go[$i];
- if (
- $type_i eq '}'
- && ( !$block_type
- || ( $is_sort_map_grep_eval_do{$block_type} ) )
- )
- {
- $type_i = 'b';
- }
- return wantarray ? ( $type_i, $i ) : $type_i;
-}
+ # now make a list of all new break points
+ my @insert_list;
-sub terminal_type_K {
+ # loop over all chain types
+ foreach my $type ( keys %saw_chain_type ) {
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
+ # quit if just ONE continuation line with leading . For example--
+ # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
+ # . $contents;
+ last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
- my ( $self, $Kbeg, $Kend ) = @_;
- my $rLL = $self->{rLL};
+ # loop over all interior chain tokens
+ foreach my $itest ( @{ $interior_chain_type{$type} } ) {
- if ( !defined($Kend) ) {
- Fault("Error in terminal_type_K: Kbeg=$Kbeg > $Kend=Kend");
- }
+ # loop over all left end tokens of same type
+ if ( $left_chain_type{$type} ) {
+ next if $nobreak_to_go[ $itest - 1 ];
+ foreach my $i ( @{ $left_chain_type{$type} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest - 1;
- # Start at the end and work backwards
- my $K = $Kend;
- my $type_K = $rLL->[$K]->[_TYPE_];
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
+ last;
+ }
+ }
- # Check for side comment
- if ( $type_K eq '#' ) {
- $K--;
- if ( $K < $Kbeg ) {
- return wantarray ? ( $type_K, $Kbeg ) : $type_K;
- }
- $type_K = $rLL->[$K]->[_TYPE_];
- }
+ # loop over all right end tokens of same type
+ if ( $right_chain_type{$type} ) {
+ next if $nobreak_to_go[$itest];
+ foreach my $i ( @{ $right_chain_type{$type} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest;
- # Skip past a blank
- if ( $type_K eq 'b' ) {
- $K--;
- if ( $K < $Kbeg ) {
- return wantarray ? ( $type_K, $Kbeg ) : $type_K;
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
+ last;
+ }
+ }
}
- $type_K = $rLL->[$K]->[_TYPE_];
}
- # found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $rLL->[$K]->[_BLOCK_TYPE_];
- if (
- $type_K eq '}'
- && ( !$block_type
- || ( $is_sort_map_grep_eval_do{$block_type} ) )
- )
- {
- $type_K = 'b';
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
- return wantarray ? ( $type_K, $K ) : $type_K;
-
+ return;
}
-{ # set_bond_strengths
+sub insert_additional_breaks {
- my %is_good_keyword_breakpoint;
- my %is_lt_gt_le_ge;
+ # this routine will add line breaks at requested locations after
+ # sub set_continuation_breaks has made preliminary breaks.
- my %binary_bond_strength;
- my %nobreak_lhs;
- my %nobreak_rhs;
+ my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
+ my $i_f;
+ my $i_l;
+ my $line_number = 0;
+ foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
- my @bias_tokens;
- my $delta_bias;
+ next if ( $nobreak_to_go[$i_break_left] );
+
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ while ( $i_break_left >= $i_l ) {
+ $line_number++;
+
+ # shouldn't happen unless caller passes bad indexes
+ if ( $line_number >= @{$ri_last} ) {
+ warning(
+"Non-fatal program bug: couldn't set break at $i_break_left\n"
+ );
+ report_definite_bug();
+ return;
+ }
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ }
+
+ # Do not leave a blank at the end of a line; back up if necessary
+ if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
- sub bias_table_key {
- my ( $type, $token ) = @_;
- my $bias_table_key = $type;
- if ( $type eq 'k' ) {
- $bias_table_key = $token;
- if ( $token eq 'err' ) { $bias_table_key = 'or' }
+ my $i_break_right = $inext_to_go[$i_break_left];
+ if ( $i_break_left >= $i_f
+ && $i_break_left < $i_l
+ && $i_break_right > $i_f
+ && $i_break_right <= $i_l )
+ {
+ splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
+ splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
}
- return $bias_table_key;
}
+ return;
+}
- sub initialize_bond_strength_hashes {
+sub in_same_container_i {
- my @q;
- @q = qw(if unless while until for foreach);
- @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
+ # check to see if tokens at i1 and i2 are in the
+ # same container, and not separated by a comma, ? or :
+ # This is an interface between the _to_go arrays to the rLL array
+ my ( $self, $i1, $i2 ) = @_;
- @q = qw(lt gt le ge);
- @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
- #
- # The decision about where to break a line depends upon a "bond
- # strength" between tokens. The LOWER the bond strength, the MORE
- # likely a break. A bond strength may be any value but to simplify
- # things there are several pre-defined strength levels:
+ # quick check
+ return if ( $parent_seqno_to_go[$i1] ne $parent_seqno_to_go[$i2] );
- # NO_BREAK => 10000;
- # VERY_STRONG => 100;
- # STRONG => 2.1;
- # NOMINAL => 1.1;
- # WEAK => 0.8;
- # VERY_WEAK => 0.55;
-
- # The strength values are based on trial-and-error, and need to be
- # tweaked occasionally to get desired results. Some comments:
- #
- # 1. Only relative strengths are important. small differences
- # in strengths can make big formatting differences.
- # 2. Each indentation level adds one unit of bond strength.
- # 3. A value of NO_BREAK makes an unbreakable bond
- # 4. A value of VERY_WEAK is the strength of a ','
- # 5. Values below NOMINAL are considered ok break points.
- # 6. Values above NOMINAL are considered poor break points.
- #
- # The bond strengths should roughly follow precedence order where
- # possible. If you make changes, please check the results very
- # carefully on a variety of scripts. Testing with the -extrude
- # options is particularly helpful in exercising all of the rules.
-
- # Wherever possible, bond strengths are defined in the following
- # tables. There are two main stages to setting bond strengths and
- # two types of tables:
- #
- # The first stage involves looking at each token individually and
- # defining left and right bond strengths, according to if we want
- # to break to the left or right side, and how good a break point it
- # is. For example tokens like =, ||, && make good break points and
- # will have low strengths, but one might want to break on either
- # side to put them at the end of one line or beginning of the next.
- #
- # The second stage involves looking at certain pairs of tokens and
- # defining a bond strength for that particular pair. This second
- # stage has priority.
+ # full check
+ return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
+}
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 1.
- # Set left and right bond strengths of individual tokens.
- #---------------------------------------------------------------
+{ ## begin closure in_same_container_K
+ my $ris_break_token;
+ my $ris_comma_token;
- # NOTE: NO_BREAK's set in this section first are HINTS which will
- # probably not be honored. Essential NO_BREAKS's should be set in
- # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
- # of this subroutine.
+ BEGIN {
- # Note that we are setting defaults in this section. The user
- # cannot change bond strengths but can cause the left and right
- # bond strengths of any token type to be swapped through the use of
- # the -wba and -wbb flags. In this way the user can determine if a
- # breakpoint token should appear at the end of one line or the
- # beginning of the next line.
+ # all cases break on seeing commas at same level
+ my @q = qw( => );
+ push @q, ',';
+ @{$ris_comma_token}{@q} = (1) x scalar(@q);
- # The hash keys in this section are token types, plus the text of
- # certain keywords like 'or', 'and'.
+ # Non-ternary text also breaks on seeing any of qw(? : || or )
+ # Example: we would not want to break at any of these .'s
+ # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
+ push @q, qw( or || ? : );
+ @{$ris_break_token}{@q} = (1) x scalar(@q);
+ }
- # no break around possible filehandle
- $left_bond_strength{'Z'} = NO_BREAK;
- $right_bond_strength{'Z'} = NO_BREAK;
+ sub in_same_container_K {
- # never put a bare word on a new line:
- # example print (STDERR, "bla"); will fail with break after (
- $left_bond_strength{'w'} = NO_BREAK;
+ # Check to see if tokens at K1 and K2 are in the same container,
+ # and not separated by certain characters: => , ? : || or
+ # This version uses the newer $rLL data structure.
- # blanks always have infinite strength to force breaks after
- # real tokens
- $right_bond_strength{'b'} = NO_BREAK;
+ my ( $self, $K1, $K2 ) = @_;
+ if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
+ my $rLL = $self->[_rLL_];
+ my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
+ return if ( $depth_1 < 0 );
+ return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
- # try not to break on exponentation
- @q = qw# ** .. ... <=> #;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ # Select character set to scan for
+ my $type_1 = $rLL->[$K1]->[_TYPE_];
+ my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
- # The comma-arrow has very low precedence but not a good break point
- $left_bond_strength{'=>'} = NO_BREAK;
- $right_bond_strength{'=>'} = NOMINAL;
+ # Fast preliminary loop to verify that tokens are in the same container
+ my $KK = $K1;
+ while (1) {
+ $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ last if !defined($KK);
+ last if ( $KK >= $K2 );
+ my $depth_K = $rLL->[$KK]->[_SLEVEL_];
+ return if ( $depth_K < $depth_1 );
+ next if ( $depth_K > $depth_1 );
+ if ( $type_1 ne ':' ) {
+ my $tok_K = $rLL->[$KK]->[_TOKEN_];
+ return if ( $tok_K eq '?' || $tok_K eq ':' );
+ }
+ }
- # ok to break after label
- $left_bond_strength{'J'} = NO_BREAK;
- $right_bond_strength{'J'} = NOMINAL;
- $left_bond_strength{'j'} = STRONG;
- $right_bond_strength{'j'} = STRONG;
- $left_bond_strength{'A'} = STRONG;
- $right_bond_strength{'A'} = STRONG;
+ # Slow loop checking for certain characters
- $left_bond_strength{'->'} = STRONG;
- $right_bond_strength{'->'} = VERY_STRONG;
+ ###########################################################
+ # This is potentially a slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ ###########################################################
+ return if ( $K2 - $K1 > 200 );
- $left_bond_strength{'CORE::'} = NOMINAL;
- $right_bond_strength{'CORE::'} = NO_BREAK;
+ foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
- # breaking AFTER modulus operator is ok:
- @q = qw< % >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
+ my $depth_K = $rLL->[$K]->[_SLEVEL_];
+ next if ( $depth_K > $depth_1 );
+ return if ( $depth_K < $depth_1 ); # redundant, checked above
+ my $tok = $rLL->[$K]->[_TOKEN_];
+ return if ( $rbreak->{$tok} );
+ }
+ return 1;
+ }
+} ## end closure in_same_container_K
- # Break AFTER math operators * and /
- @q = qw< * / x >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
+sub break_equals {
- # Break AFTER weakest math operators + and -
- # Make them weaker than * but a bit stronger than '.'
- @q = qw< + - >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 2 );
- # breaking BEFORE these is just ok:
- @q = qw# >> << #;
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
- @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
- # breaking before the string concatenation operator seems best
- # because it can be hard to see at the end of a line
- $right_bond_strength{'.'} = STRONG;
- $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
- @q = qw< } ] ) R >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ # now look for any interior tokens of the same types
+ my $il = $ri_left->[0];
+ my $ir = $ri_right->[0];
- # make these a little weaker than nominal so that they get
- # favored for end-of-line characters
- @q = qw< != == =~ !~ ~~ !~~ >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
+ }
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
- # break AFTER these
- @q = qw# < > | & >= <= #;
- @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
- # breaking either before or after a quote is ok
- # but bias for breaking before a quote
- $left_bond_strength{'Q'} = NOMINAL;
- $right_bond_strength{'Q'} = NOMINAL + 0.02;
- $left_bond_strength{'q'} = NOMINAL;
- $right_bond_strength{'q'} = NOMINAL;
+ return unless (@insert_list);
- # starting a line with a keyword is usually ok
- $left_bond_strength{'k'} = NOMINAL;
-
- # we usually want to bond a keyword strongly to what immediately
- # follows, rather than leaving it stranded at the end of a line
- $right_bond_strength{'k'} = STRONG;
+ # One final check...
+ # scan second and third lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
- $left_bond_strength{'G'} = NOMINAL;
- $right_bond_strength{'G'} = STRONG;
+ # ok, insert any new break point
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
- # assignment operators
- @q = qw(
- = **= += *= &= <<= &&=
- -= /= |= >>= ||= //=
- .= %= ^=
- x=
- );
+{ ## begin closure recombine_breakpoints
- # Default is to break AFTER various assignment operators
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
+ # This routine is called once per batch to see if it would be better
+ # to combine some of the lines into which the batch has been broken.
- # Default is to break BEFORE '&&' and '||' and '//'
- # set strength of '||' to same as '=' so that chains like
- # $a = $b || $c || $d will break before the first '||'
- $right_bond_strength{'||'} = NOMINAL;
- $left_bond_strength{'||'} = $right_bond_strength{'='};
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+ my %is_plus_minus;
+ my %is_mult_div;
- # same thing for '//'
- $right_bond_strength{'//'} = NOMINAL;
- $left_bond_strength{'//'} = $right_bond_strength{'='};
+ BEGIN {
- # set strength of && a little higher than ||
- $right_bond_strength{'&&'} = NOMINAL;
- $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
+ my @q;
+ @q = qw( && || );
+ @is_amp_amp{@q} = (1) x scalar(@q);
- $left_bond_strength{';'} = VERY_STRONG;
- $right_bond_strength{';'} = VERY_WEAK;
- $left_bond_strength{'f'} = VERY_STRONG;
+ @q = qw( ? : );
+ @is_ternary{@q} = (1) x scalar(@q);
- # make right strength of for ';' a little less than '='
- # to make for contents break after the ';' to avoid this:
- # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
- # $number_of_fields )
- # and make it weaker than ',' and 'and' too
- $right_bond_strength{'f'} = VERY_WEAK - 0.03;
+ @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
- # The strengths of ?/: should be somewhere between
- # an '=' and a quote (NOMINAL),
- # make strength of ':' slightly less than '?' to help
- # break long chains of ? : after the colons
- $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
- $right_bond_strength{':'} = NO_BREAK;
- $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
- $right_bond_strength{'?'} = NO_BREAK;
+ @q = qw( + - );
+ @is_plus_minus{@q} = (1) x scalar(@q);
- $left_bond_strength{','} = VERY_STRONG;
- $right_bond_strength{','} = VERY_WEAK;
+ @q = qw( * / );
+ @is_mult_div{@q} = (1) x scalar(@q);
+ }
- # remaining digraphs and trigraphs not defined above
- @q = qw( :: <> ++ --);
- @left_bond_strength{@q} = (WEAK) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ sub Debug_dump_breakpoints {
- # Set bond strengths of certain keywords
- # make 'or', 'err', 'and' slightly weaker than a ','
- $left_bond_strength{'and'} = VERY_WEAK - 0.01;
- $left_bond_strength{'or'} = VERY_WEAK - 0.02;
- $left_bond_strength{'err'} = VERY_WEAK - 0.02;
- $left_bond_strength{'xor'} = NOMINAL;
- $right_bond_strength{'and'} = NOMINAL;
- $right_bond_strength{'or'} = NOMINAL;
- $right_bond_strength{'err'} = NOMINAL;
- $right_bond_strength{'xor'} = STRONG;
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $ri_beg->[$n];
+ my $iend = $ri_end->[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
+ }
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ return;
+ }
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 2.
- # Set binary rules for bond strengths between certain token types.
- #---------------------------------------------------------------
+ sub delete_one_line_semicolons {
- # We have a little problem making tables which apply to the
- # container tokens. Here is a list of container tokens and
- # their types:
- #
- # type tokens // meaning
- # { {, [, ( // indent
- # } }, ], ) // outdent
- # [ [ // left non-structural [ (enclosing an array index)
- # ] ] // right non-structural square bracket
- # ( ( // left non-structural paren
- # ) ) // right non-structural paren
- # L { // left non-structural curly brace (enclosing a key)
- # R } // right non-structural curly brace
- #
- # Some rules apply to token types and some to just the token
- # itself. We solve the problem by combining type and token into a
- # new hash key for the container types.
- #
- # If a rule applies to a token 'type' then we need to make rules
- # for each of these 'type.token' combinations:
- # Type Type.Token
- # { {{, {[, {(
- # [ [[
- # ( ((
- # L L{
- # } }}, }], })
- # ] ]]
- # ) ))
- # R R}
- #
- # If a rule applies to a token then we need to make rules for
- # these 'type.token' combinations:
- # Token Type.Token
- # { {{, L{
- # [ {[, [[
- # ( {(, ((
- # } }}, R}
- # ] }], ]]
- # ) }), ))
+ my ( $self, $ri_beg, $ri_end ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
- # allow long lines before final { in an if statement, as in:
- # if (..........
- # ..........)
- # {
- #
- # Otherwise, the line before the { tends to be too short.
+ # Walk down the lines of this batch and delete any semicolons
+ # terminating one-line blocks;
+ my $nmax = @{$ri_end} - 1;
- $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
- $binary_bond_strength{'(('}{'{{'} = NOMINAL;
+ foreach my $n ( 0 .. $nmax ) {
+ my $i_beg = $ri_beg->[$n];
+ my $i_e = $ri_end->[$n];
+ my $K_beg = $K_to_go[$i_beg];
+ my $K_e = $K_to_go[$i_e];
+ my $K_end = $K_e;
+ my $type_end = $rLL->[$K_end]->[_TYPE_];
+ if ( $type_end eq '#' ) {
+ $K_end = $self->K_previous_nonblank($K_end);
+ if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
+ }
- # break on something like '} (', but keep this stronger than a ','
- # example is in 'howe.pl'
- $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ # we are looking for a line ending in closing brace
+ next
+ unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
- # keep matrix and hash indices together
- # but make them a little below STRONG to allow breaking open
- # something like {'some-word'}{'some-very-long-word'} at the }{
- # (bracebrk.t)
- $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ # ...and preceded by a semicolon on the same line
+ my $K_semicolon = $self->K_previous_nonblank($K_end);
+ next unless defined($K_semicolon);
+ my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
+ next if ( $i_semicolon <= $i_beg );
+ next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
- # increase strength to the point where a break in the following
- # will be after the opening paren rather than at the arrow:
- # $a->$b($c);
- $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
+ # Safety check - shouldn't happen - not critical
+ # This is not worth throwing a Fault, except in DEVEL_MODE
+ if ( $types_to_go[$i_semicolon] ne ';' ) {
+ DEVEL_MODE
+ && Fault("unexpected type looking for semicolon");
+ next;
+ }
- $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ # ... with the corresponding opening brace on the same line
+ my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$type_sequence};
+ next unless ( defined($K_opening) );
+ my $i_opening = $i_beg + ( $K_opening - $K_beg );
+ next if ( $i_opening < $i_beg );
- $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ # ... and only one semicolon between these braces
+ my $semicolon_count = 0;
+ foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
+ if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
+ $semicolon_count++;
+ last;
+ }
+ }
+ next if ($semicolon_count);
- #---------------------------------------------------------------
- # Binary NO_BREAK rules
- #---------------------------------------------------------------
+ # ...ok, then make the semicolon invisible
+ $tokens_to_go[$i_semicolon] = "";
+ $token_lengths_to_go[$i_semicolon] = 0;
+ $rLL->[$K_semicolon]->[_TOKEN_] = "";
+ $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
+ }
+ return;
+ }
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
- $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
+ use constant DEBUG_RECOMBINE => 0;
- # Never break between a bareword and a following paren because
- # perl may give an error. For example, if a break is placed
- # between 'to_filehandle' and its '(' the following line will
- # give a syntax error [Carp.pm]: my( $no) =fileno(
- # to_filehandle( $in)) ;
- $binary_bond_strength{'C'}{'(('} = NO_BREAK;
- $binary_bond_strength{'C'}{'{('} = NO_BREAK;
- $binary_bond_strength{'U'}{'(('} = NO_BREAK;
- $binary_bond_strength{'U'}{'{('} = NO_BREAK;
+ sub recombine_breakpoints {
- # use strict requires that bare word within braces not start new
- # line
- $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
+ # sub set_continuation_breaks is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Sometimes small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine.
+ #
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end ) = @_;
- $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
+ # Make a list of all good joining tokens between the lines
+ # n-1 and n.
+ my @joint;
+ my $nmax = @{$ri_end} - 1;
+ for my $n ( 1 .. $nmax ) {
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
- # use strict does not allow separating type info from trailing { }
- # testfile is readmail.pl
- $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
- $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
+ my ( $itok, $itokp, $itokm );
- # As a defensive measure, do not break between a '(' and a
- # filehandle. In some cases, this can cause an error. For
- # example, the following program works:
- # my $msg="hi!\n";
- # print
- # ( STDOUT
- # $msg
- # );
- #
- # But this program fails:
- # my $msg="hi!\n";
- # print
- # (
- # STDOUT
- # $msg
- # );
- #
- # This is normally only a problem with the 'extrude' option
- $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
- $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
+ foreach my $itest ( $iend_1, $ibeg_2 ) {
+ my $type = $types_to_go[$itest];
+ if ( $is_math_op{$type}
+ || $is_amp_amp{$type}
+ || $is_assignment{$type}
+ || $type eq ':' )
+ {
+ $itok = $itest;
+ }
+ }
+ $joint[$n] = [$itok];
+ }
- # never break between sub name and opening paren
- $binary_bond_strength{'w'}{'(('} = NO_BREAK;
- $binary_bond_strength{'w'}{'{('} = NO_BREAK;
+ my $more_to_do = 1;
- # keep '}' together with ';'
- $binary_bond_strength{'}}'}{';'} = NO_BREAK;
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = @{$ri_end};
+ my $reverse = 0;
+ while ($more_to_do) {
+ my $n_best = 0;
+ my $bs_best;
+ my $nmax = @{$ri_end} - 1;
- # Breaking before a ++ can cause perl to guess wrong. For
- # example the following line will cause a syntax error
- # with -extrude if we break between '$i' and '++' [fixstyle2]
- # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
- $nobreak_lhs{'++'} = NO_BREAK;
+ # Safety check for infinite loop
+ unless ( $nmax < $nmax_last ) {
- # Do not break before a possible file handle
- $nobreak_lhs{'Z'} = NO_BREAK;
+ # Shouldn't happen because splice below decreases nmax on each
+ # iteration. An error can only be due to a recent programming
+ # change.
+ Fault("Program bug-infinite loop in recombine breakpoints\n");
+ }
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $skip_Section_3;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
- # use strict hates bare words on any new line. For
- # example, a break before the underscore here provokes the
- # wrath of use strict:
- # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
- $nobreak_rhs{'F'} = NO_BREAK;
- $nobreak_rhs{'CORE::'} = NO_BREAK;
+ # loop over all remaining lines in this batch
+ for my $iter ( 1 .. $nmax ) {
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 3.
- # Define tables and values for applying a small bias to the above
- # values.
- #---------------------------------------------------------------
- # Adding a small 'bias' to strengths is a simple way to make a line
- # break at the first of a sequence of identical terms. For
- # example, to force long string of conditional operators to break
- # with each line ending in a ':', we can add a small number to the
- # bond strength of each ':' (colon.t)
- @bias_tokens = qw( : && || f and or . ); # tokens which get bias
- $delta_bias = 0.0001; # a very small strength level
- return;
+ # alternating sweep direction gives symmetric results
+ # for recombining lines which exceed the line length
+ # such as eval {{{{.... }}}}
+ my $n;
+ if ($reverse) { $n = 1 + $nmax - $iter; }
+ else { $n = $iter }
- } ## end sub initialize_bond_strength_hashes
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $ibeg_nmax = $ri_beg->[$nmax];
- sub set_bond_strengths {
+ # combined line cannot be too long
+ my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ next if ( $excess > 0 );
- # patch-its always ok to break at end of line
- $nobreak_to_go[$max_index_to_go] = 0;
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
- # we start a new set of bias values for each line
- my %bias;
- @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
- my $code_bias = -.01; # bias for closing block braces
+ # terminal token of line 2 if any side comment is ignored:
+ my $iend_2t = $iend_2;
+ my $type_iend_2t = $type_iend_2;
- my $type = 'b';
- my $token = ' ';
- my $last_type;
- my $last_nonblank_type = $type;
- my $last_nonblank_token = $token;
- my $list_str = $left_bond_strength{'?'};
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
- my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
- $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
- );
+ my $bs_tweak = 0;
- # main loop to compute bond strengths between each pair of tokens
- foreach my $i ( 0 .. $max_index_to_go ) {
- $last_type = $type;
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
- $type = $types_to_go[$i];
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
- # strength on both sides of a blank is the same
- if ( $type eq 'b' && $last_type ne 'b' ) {
- $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
- next;
- }
+ DEBUG_RECOMBINE && do {
+ print STDERR
+"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
- $token = $tokens_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $i_next = $i + 1;
- $next_type = $types_to_go[$i_next];
- $next_token = $tokens_to_go[$i_next];
- $total_nesting_depth = $nesting_depth_to_go[$i_next];
- $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
- # We are computing the strength of the bond between the current
- # token and the NEXT token.
+ # a terminal '{' should stay where it is
+ # unless preceded by a fat comma
+ next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
- #---------------------------------------------------------------
- # Bond Strength Section 1:
- # First Approximation.
- # Use minimum of individual left and right tabulated bond
- # strengths.
- #---------------------------------------------------------------
- my $bsr = $right_bond_strength{$type};
- my $bsl = $left_bond_strength{$next_nonblank_type};
+ if ( $type_iend_2 eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' )
+ {
+ $iend_2t = $iend_2 - 2;
+ $type_iend_2t = $types_to_go[$iend_2t];
+ }
- # define right bond strengths of certain keywords
- if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
- $bsr = $right_bond_strength{$token};
- }
- elsif ( $token eq 'ne' or $token eq 'eq' ) {
- $bsr = NOMINAL;
- }
+ $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
+ }
- # set terminal bond strength to the nominal value
- # this will cause good preceding breaks to be retained
- if ( $i_next_nonblank > $max_index_to_go ) {
- $bsl = NOMINAL;
- }
+ #----------------------------------------------------------
+ # Recombine Section 0:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code and
+ # to make formatting independent of whether breaks are to the
+ # left or right of an operator.
+ #----------------------------------------------------------
- # define right bond strengths of certain keywords
- if ( $next_nonblank_type eq 'k'
- && defined( $left_bond_strength{$next_nonblank_token} ) )
- {
- $bsl = $left_bond_strength{$next_nonblank_token};
- }
- elsif ($next_nonblank_token eq 'ne'
- or $next_nonblank_token eq 'eq' )
- {
- $bsl = NOMINAL;
- }
- elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
- $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
- }
+ my ($itok) = @{ $joint[$n] };
+ if ($itok) {
- # Use the minimum of the left and right strengths. Note: it might
- # seem that we would want to keep a NO_BREAK if either token has
- # this value. This didn't work, for example because in an arrow
- # list, it prevents the comma from separating from the following
- # bare word (which is probably quoted by its arrow). So necessary
- # NO_BREAK's have to be handled as special cases in the final
- # section.
- if ( !defined($bsr) ) { $bsr = VERY_STRONG }
- if ( !defined($bsl) ) { $bsl = VERY_STRONG }
- my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
- my $bond_str_1 = $bond_str;
+ my $type = $types_to_go[$itok];
- #---------------------------------------------------------------
- # Bond Strength Section 2:
- # Apply hardwired rules..
- #---------------------------------------------------------------
+ if ( $type eq ':' ) {
- # Patch to put terminal or clauses on a new line: Weaken the bond
- # at an || followed by die or similar keyword to make the terminal
- # or clause fall on a new line, like this:
- #
- # my $class = shift
- # || die "Cannot add broadcast: No class identifier found";
- #
- # Otherwise the break will be at the previous '=' since the || and
- # = have the same starting strength and the or is biased, like
- # this:
- #
- # my $class =
- # shift || die "Cannot add broadcast: No class identifier found";
- #
- # In any case if the user places a break at either the = or the ||
- # it should remain there.
- if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
- if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
- if ( $want_break_before{$token} && $i > 0 ) {
- $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
- }
- else {
- $bond_str -= $delta_bias;
- }
- }
- }
+ # do not join at a colon unless it disobeys the break
+ # request
+ if ( $itok eq $iend_1 ) {
+ next unless $want_break_before{$type};
+ }
+ else {
+ $leading_amp_count++;
+ next if $want_break_before{$type};
+ }
+ } ## end if ':'
- # good to break after end of code blocks
- if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
- $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
- $code_bias += $delta_bias;
- }
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
- if ( $type eq 'k' ) {
+ # This can be important in math-intensive code.
- # allow certain control keywords to stand out
- if ( $next_nonblank_type eq 'k'
- && $is_last_next_redo_return{$token} )
- {
- $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
- }
+ my $good_combo;
- # Don't break after keyword my. This is a quick fix for a
- # rare problem with perl. An example is this line from file
- # Container.pm:
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
+ my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
- # foreach my $question( Debian::DebConf::ConfigDb::gettree(
- # $this->{'question'} ) )
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
- if ( $token eq 'my' ) {
- $bond_str = NO_BREAK;
- }
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
- }
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp] } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ }
+ }
- # good to break before 'if', 'unless', etc
- if ( $is_if_brace_follower{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK;
- }
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
- if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
- # FIXME: needs more testing
- if ( $is_keyword_returning_list{$next_nonblank_token} ) {
- $bond_str = $list_str if ( $bond_str > $list_str );
- }
+ # otherwise look one more token to left
+ else {
- # keywords like 'unless', 'if', etc, within statements
- # make good breaks
- if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK / 1.05;
- }
- }
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm] }
+ );
+ }
+ }
- # try not to break before a comma-arrow
- elsif ( $next_nonblank_type eq '=>' ) {
- if ( $bond_str < STRONG ) { $bond_str = STRONG }
- }
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
- #---------------------------------------------------------------
- # Additional hardwired NOBREAK rules
- #---------------------------------------------------------------
+ # Slight adjustment factor to make results
+ # independent of break before or after operator in
+ # long summed lists. (An operator and a space make
+ # two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
- # map1.t -- correct for a quirk in perl
- if ( $token eq '('
- && $next_nonblank_type eq 'i'
- && $last_nonblank_type eq 'k'
- && $is_sort_map_grep{$last_nonblank_token} )
+ $good_combo =
- # /^(sort|map|grep)$/ )
- {
- $bond_str = NO_BREAK;
- }
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
- # extrude.t: do not break before paren at:
- # -l pid_filename(
- if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
- $bond_str = NO_BREAK;
- }
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right of
+ # joint
+ $itokpp == $iend_2
- # in older version of perl, use strict can cause problems with
- # breaks before bare words following opening parens. For example,
- # this will fail under older versions if a break is made between
- # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
- # command"); close MAIL;
- if ( $type eq '{' ) {
+ # short
+ && token_sequence_length( $itokp, $iend_2 )
+ < $two +
+ $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
- if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
+ # short
+ && token_sequence_length( $ibeg_1, $itokm )
+ < 2 - $two +
+ $rOpts_short_concatenation_item_length
+ )
- # but it's fine to break if the word is followed by a '=>'
- # or if it is obviously a sub call
- my $i_next_next_nonblank = $i_next_nonblank + 1;
- my $next_next_type = $types_to_go[$i_next_next_nonblank];
- if ( $next_next_type eq 'b'
- && $i_next_nonblank < $max_index_to_go )
- {
- $i_next_next_nonblank++;
- $next_next_type = $types_to_go[$i_next_next_nonblank];
- }
+ )
- # We'll check for an old breakpoint and keep a leading
- # bareword if it was that way in the input file.
- # Presumably it was ok that way. For example, the
- # following would remain unchanged:
- #
- # @months = (
- # January, February, March, April,
- # May, June, July, August,
- # September, October, November, December,
- # );
- #
- # This should be sufficient:
- if (
- !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ','
- || $next_next_type eq '}' )
- )
- {
- $bond_str = NO_BREAK;
- }
- }
- }
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] } )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp] } )
+ )
- # Do not break between a possible filehandle and a ? or / and do
- # not introduce a break after it if there is no blank
- # (extrude.t)
- elsif ( $type eq 'Z' ) {
+ ;
+ }
- # don't break..
- if (
+ # it is also good to combine if we can reduce to 2 lines
+ if ( !$good_combo ) {
- # if there is no blank and we do not want one. Examples:
- # print $x++ # do not break after $x
- # print HTML"HELLO" # break ok after HTML
- (
- $next_type ne 'b'
- && defined( $want_left_space{$next_type} )
- && $want_left_space{$next_type} == WS_NO
- )
+ # index on other line where same token would be in a
+ # long chain.
+ my $iother =
+ ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
+
+ next unless ($good_combo);
+
+ } ## end math
+
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
+
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
- # or we might be followed by the start of a quote
- || $next_nonblank_type =~ /^[\/\?]$/
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Join welded nested containers immediately
+ #----------------------------------------------------------
+
+ if (
+ $total_weld_count
+ && ( $type_sequence_to_go[$iend_1]
+ && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
+ || $type_sequence_to_go[$ibeg_2]
+ && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
)
{
- $bond_str = NO_BREAK;
+ $n_best = $n;
+ last;
}
- }
- # Breaking before a ? before a quote can cause trouble if
- # they are not separated by a blank.
- # Example: a syntax error occurs if you break before the ? here
- # my$logic=join$all?' && ':' || ',@regexps;
- # From: Professional_Perl_Programming_Code/multifind.pl
- if ( $next_nonblank_type eq '?' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
- }
+ $reverse = 0;
- # Breaking before a . followed by a number
- # can cause trouble if there is no intervening space
- # Example: a syntax error occurs if you break before the .2 here
- # $str .= pack($endian.2, ensurrogate($ord));
- # From: perl58/Unicode.pm
- elsif ( $next_nonblank_type eq '.' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
- }
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
- my $bond_str_2 = $bond_str;
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
- #---------------------------------------------------------------
- # End of hardwired rules
- #---------------------------------------------------------------
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ # But we do not want to do this for something like the -lp
+ # option where the paren is not outdentable because the
+ # trailing clause will be far to the right.
+ #
+ # The logic here is synchronized with the logic in sub
+ # sub set_adjusted_indentation, which actually does
+ # the outdenting.
+ #
+ $skip_Section_3 ||= $this_line_is_semicolon_terminated
- #---------------------------------------------------------------
- # Bond Strength Section 3:
- # Apply table rules. These have priority over the above
- # hardwired rules.
- #---------------------------------------------------------------
+ # only one token on last line
+ && $ibeg_1 == $iend_1
- my $tabulated_bond_str;
- my $ltype = $type;
- my $rtype = $next_nonblank_type;
- if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
- if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
- $rtype = $next_nonblank_type . $next_nonblank_token;
- }
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
- if ( $binary_bond_strength{$ltype}{$rtype} ) {
- $bond_str = $binary_bond_strength{$ltype}{$rtype};
- $tabulated_bond_str = $bond_str;
- }
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
- if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
- $bond_str = NO_BREAK;
- $tabulated_bond_str = $bond_str;
- }
- my $bond_str_3 = $bond_str;
+ # only leading '&&', '||', and ':' if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
- # If the hardwired rules conflict with the tabulated bond
- # strength then there is an inconsistency that should be fixed
- FORMATTER_DEBUG_FLAG_BOND_TABLES
- && $tabulated_bond_str
- && $bond_str_1
- && $bond_str_1 != $bond_str_2
- && $bond_str_2 != $tabulated_bond_str
- && do {
- print STDERR
-"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
- };
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
- #-----------------------------------------------------------------
- # Bond Strength Section 4:
- # Modify strengths of certain tokens which often occur in sequence
- # by adding a small bias to each one in turn so that the breaks
- # occur from left to right.
- #
- # Note that we only changing strengths by small amounts here,
- # and usually increasing, so we should not be altering any NO_BREAKs.
- # Other routines which check for NO_BREAKs will use a tolerance
- # of one to avoid any problem.
- #-----------------------------------------------------------------
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
- # The bias tables use special keys
- my $left_key = bias_table_key( $type, $token );
- my $right_key =
- bias_table_key( $next_nonblank_type, $next_nonblank_token );
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in set_adjusted_indentation which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'}
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $skip_Section_3 ||= 1;
+ }
- # add any bias set by sub scan_list at old comma break points.
- if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
+ next
+ unless (
+ $skip_Section_3
- # bias left token
- elsif ( defined( $bias{$left_key} ) ) {
- if ( !$want_break_before{$left_key} ) {
- $bias{$left_key} += $delta_bias;
- $bond_str += $bias{$left_key};
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
+ );
}
- }
- # bias right token
- if ( defined( $bias{$right_key} ) ) {
- if ( $want_break_before{$right_key} ) {
+ elsif ( $type_iend_1 eq '{' ) {
- # for leading '.' align all but 'short' quotes; the idea
- # is to not place something like "\n" on a single line.
- if ( $right_key eq '.' ) {
- unless (
- $last_nonblank_type eq '.'
- && (
- length($token) <=
- $rOpts_short_concatenation_item_length )
- && ( !$is_closing_token{$token} )
- )
- {
- $bias{$right_key} += $delta_bias;
- }
- }
- else {
- $bias{$right_key} += $delta_bias;
- }
- $bond_str += $bias{$right_key};
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ next if $forced_breakpoint_to_go[$iend_1];
}
- }
- my $bond_str_4 = $bond_str;
-
- #---------------------------------------------------------------
- # Bond Strength Section 5:
- # Fifth Approximation.
- # Take nesting depth into account by adding the nesting depth
- # to the bond strength.
- #---------------------------------------------------------------
- my $strength;
- if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
- if ( $total_nesting_depth > 0 ) {
- $strength = $bond_str + $total_nesting_depth;
- }
- else {
- $strength = $bond_str;
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ next unless $want_break_before{$type_iend_1};
}
- }
- else {
- $strength = NO_BREAK;
- }
- #---------------------------------------------------------------
- # Bond Strength Section 6:
- # Sixth Approximation. Welds.
- #---------------------------------------------------------------
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
- # Do not allow a break within welds,
- if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
+ # Do not recombine different levels
+ next
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
- # But encourage breaking after opening welded tokens
- elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
- $strength -= 1;
- }
+ # do not recombine unless next line ends in :
+ next unless $type_iend_2 eq ':';
+ }
- # always break after side comment
- if ( $type eq '#' ) { $strength = 0 }
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
- $bond_strength_to_go[$i] = $strength;
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
- FORMATTER_DEBUG_FLAG_BOND && do {
- my $str = substr( $token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- print STDOUT
-"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
- };
- } ## end main loop
- return;
- } ## end sub set_bond_strengths
-}
+ # An isolated '},' may join with an identifier + ';'
+ # This is useful for the class of a 'bless' statement
+ # (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
-sub pad_array_to_go {
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- # to simplify coding in scan_list and set_bond_strengths, it helps
- # to create some extra blank tokens at the end of the arrays
- $tokens_to_go[ $max_index_to_go + 1 ] = '';
- $tokens_to_go[ $max_index_to_go + 2 ] = '';
- $types_to_go[ $max_index_to_go + 1 ] = 'b';
- $types_to_go[ $max_index_to_go + 2 ] = 'b';
- $nesting_depth_to_go[ $max_index_to_go + 1 ] =
- $nesting_depth_to_go[$max_index_to_go];
+ # but otherwise ..
+ else {
- # /^[R\}\)\]]$/
- if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
- if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
+ # do not recombine after a comma unless this will leave
+ # just 1 more line
+ next unless ( $n + 1 >= $nmax );
- # shouldn't happen:
- unless ( get_saw_brace_error() ) {
- warning(
-"Program bug in scan_list: hit nesting error which should have been caught\n"
- );
- report_definite_bug();
- }
- }
- else {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
- }
- }
+ # do not recombine if there is a change in indentation depth
+ next
+ if (
+ $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
- # /^[L\{\(\[]$/
- elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
- }
- return;
-}
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ next if $saw_paren;
+ }
+ }
-{ # begin scan_list
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
- my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_nonblank_block_type, $last_old_breakpoint_count,
- $minimum_depth, $next_nonblank_block_type,
- $next_nonblank_token, $next_nonblank_type,
- $old_breakpoint_count, $starting_breakpoint_count,
- $starting_depth, $token,
- $type, $type_sequence,
- );
+ # No longer doing this
+ }
- my (
- @breakpoint_stack, @breakpoint_undo_stack,
- @comma_index, @container_type,
- @identifier_count_stack, @index_before_arrow,
- @interrupted_list, @item_count_stack,
- @last_comma_index, @last_dot_index,
- @last_nonblank_type, @old_breakpoint_count_stack,
- @opening_structure_index_stack, @rfor_semicolon_list,
- @has_old_logical_breakpoints, @rand_or_list,
- @i_equals,
- );
+ elsif ( $type_iend_1 eq ')' ) {
- # routine to define essential variables when we go 'up' to
- # a new depth
- sub check_for_new_minimum_depth {
- my $depth = shift;
- if ( $depth < $minimum_depth ) {
+ # No longer doing this
+ }
- $minimum_depth = $depth;
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ next;
+ }
- # these arrays need not retain values between calls
- $breakpoint_stack[$depth] = $starting_breakpoint_count;
- $container_type[$depth] = "";
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 1;
- $item_count_stack[$depth] = 0;
- $last_nonblank_type[$depth] = "";
- $opening_structure_index_stack[$depth] = -1;
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
- $breakpoint_undo_stack[$depth] = undef;
- $comma_index[$depth] = undef;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $old_breakpoint_count_stack[$depth] = undef;
- $has_old_logical_breakpoints[$depth] = 0;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
+ next if $old_breakpoint_to_go[$iend_1]
- # these arrays must retain values between calls
- if ( !defined( $has_broken_sublist[$depth] ) ) {
- $dont_align[$depth] = 0;
- $has_broken_sublist[$depth] = 0;
- $want_comma_break[$depth] = 0;
- }
- }
- return;
- }
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1;
- # routine to decide which commas to break at within a container;
- # returns:
- # $bp_count = number of comma breakpoints set
- # $do_not_break_apart = a flag indicating if container need not
- # be broken open
- sub set_comma_breakpoints {
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary =
+ ( $type_ibeg_1 eq '?'
+ && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
- my $dd = shift;
- my $bp_count = 0;
- my $do_not_break_apart = 0;
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
- # anything to do?
- if ( $item_count_stack[$dd] ) {
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
- # handle commas not in containers...
- if ( $dont_align[$dd] ) {
- do_uncontained_comma_breaks($dd);
- }
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
- # handle commas within containers...
- else {
- my $fbc = $forced_breakpoint_count;
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+ # or the next line ends in an open paren or brace
+ # and the break hasn't been forced [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
- }
- }
- return ( $bp_count, $do_not_break_apart );
- }
+ # do not recombine if the two lines might align well
+ # this is a very approximate test for this
+ && (
- sub do_uncontained_comma_breaks {
+ # RT#127633 - the leading tokens are not operators
+ ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
- # Handle commas not in containers...
- # This is a catch-all routine for commas that we
- # don't know what to do with because the don't fall
- # within containers. We will bias the bond strength
- # to break at commas which ended lines in the input
- # file. This usually works better than just trying
- # to put as many items on a line as possible. A
- # downside is that if the input file is garbage it
- # won't work very well. However, the user can always
- # prevent following the old breakpoints with the
- # -iob flag.
- my $dd = shift;
- my $bias = -.01;
- my $old_comma_break_count = 0;
- foreach my $ii ( @{ $comma_index[$dd] } ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $old_comma_break_count++;
- $bond_strength_to_go[$ii] = $bias;
+ # or they are different
+ || ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
+ )
+ );
- # reduce bias magnitude to force breaks in order
- $bias *= 0.99;
- }
- }
+ if (
- # Also put a break before the first comma if
- # (1) there was a break there in the input, and
- # (2) there was exactly one old break before the first comma break
- # (3) OLD: there are multiple old comma breaks
- # (3) NEW: there are one or more old comma breaks (see return example)
- #
- # For example, we will follow the user and break after
- # 'print' in this snippet:
- # print
- # "conformability (Not the same dimension)\n",
- # "\t", $have, " is ", text_unit($hu), "\n",
- # "\t", $want, " is ", text_unit($wu), "\n",
- # ;
- #
- # Another example, just one comma, where we will break after
- # the return:
- # return
- # $x * cos($a) - $y * sin($a),
- # $x * sin($a) + $y * cos($a);
-
- # Breaking a print statement:
- # print SAVEOUT
- # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
- # ( $? & 128 ) ? " -- core dumped" : "", "\n";
- #
- # But we will not force a break after the opening paren here
- # (causes a blinker):
- # $heap->{stream}->set_output_filter(
- # poe::filter::reference->new('myotherfreezer') ),
- # ;
- #
- my $i_first_comma = $comma_index[$dd]->[0];
- if ( $old_breakpoint_to_go[$i_first_comma] ) {
- my $level_comma = $levels_to_go[$i_first_comma];
- my $ibreak = -1;
- my $obp_count = 0;
- for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $obp_count++;
- last if ( $obp_count > 1 );
- $ibreak = $ii
- if ( $levels_to_go[$ii] == $level_comma );
- }
- }
-
- # Changed rule from multiple old commas to just one here:
- if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
- {
- # Do not to break before an opening token because
- # it can lead to "blinkers".
- my $ibreakm = $ibreak;
- $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
- if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
- {
- set_forced_breakpoint($ibreak);
- }
- }
- }
- return;
- }
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
- my %is_logical_container;
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && ( !$rOpts_line_up_parentheses
+ || $type_iend_2 ne ',' )
+ )
+ {
- BEGIN {
- my @q = qw# if elsif unless while and or err not && | || ? : ! #;
- @is_logical_container{@q} = (1) x scalar(@q);
- }
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last
+ # token in case it is an opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 1 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
- sub set_for_semicolon_breakpoints {
- my $dd = shift;
- foreach ( @{ $rfor_semicolon_list[$dd] } ) {
- set_forced_breakpoint($_);
- }
- return;
- }
+ # ok to recombine if no level changes before last token
+ if ( $tv > 0 ) {
- sub set_logical_breakpoints {
- my $dd = shift;
- if (
- $item_count_stack[$dd] == 0
- && $is_logical_container{ $container_type[$dd] }
+ # otherwise, do not recombine if more than two
+ # level changes.
+ next if ( $tv > 1 );
- || $has_old_logical_breakpoints[$dd]
- )
- {
+ # check total complexity of the two adjacent lines
+ # that will occur if we do this join
+ my $istop =
+ ( $n < $nmax )
+ ? $ri_end->[ $n + 1 ]
+ : $iend_2;
+ foreach my $i ( $iend_2 .. $istop ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
- # Look for breaks in this order:
- # 0 1 2 3
- # or and || &&
- foreach my $i ( 0 .. 3 ) {
- if ( $rand_or_list[$dd][$i] ) {
- foreach ( @{ $rand_or_list[$dd][$i] } ) {
- set_forced_breakpoint($_);
+ # do not recombine if total is more than 2 level changes
+ next if ( $tv > 2 );
+ }
+ }
}
- # break at any 'if' and 'unless' too
- foreach ( @{ $rand_or_list[$dd][4] } ) {
- set_forced_breakpoint($_);
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- $rand_or_list[$dd] = [];
- last;
}
- }
- }
- return;
- }
- sub is_unbreakable_container {
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
- # never break a container of one of these types
- # because bad things can happen (map1.t)
- my $dd = shift;
- return $is_sort_map_grep{ $container_type[$dd] };
- }
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
- sub scan_list {
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- # This routine is responsible for setting line breaks for all lists,
- # so that hierarchical structure can be displayed and so that list
- # items can be vertically aligned. The output of this routine is
- # stored in the array @forced_breakpoint_to_go, which is used to set
- # final breakpoints.
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
- $starting_depth = $nesting_depth_to_go[0];
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
+ }
+ }
- $block_type = ' ';
- $current_depth = $starting_depth;
- $i = -1;
- $last_colon_sequence_number = -1;
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_nonblank_block_type = ' ';
- $last_old_breakpoint_count = 0;
- $minimum_depth = $current_depth + 1; # forces update in check below
- $old_breakpoint_count = 0;
- $starting_breakpoint_count = $forced_breakpoint_count;
- $token = ';';
- $type = ';';
- $type_sequence = '';
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
- my $total_depth_variation = 0;
- my $i_old_assignment_break;
- my $depth_last = $starting_depth;
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ # Note that we are skipping the rest of this section
+ # and the rest of the loop to do the join
+ if ($skip_Section_3) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ $n_best = $n;
+ last;
+ }
- check_for_new_minimum_depth($current_depth);
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
- my $want_previous_breakpoint = -1;
+ $leading_amp_count++;
- my $saw_good_breakpoint;
- my $i_line_end = -1;
- my $i_line_start = -1;
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1}
+ && $tokens_to_go[$iend_2] eq '(' )
- # loop over all tokens in this batch
- while ( ++$i <= $max_index_to_go ) {
- if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- $last_nonblank_block_type = $block_type;
- } ## end if ( $type ne 'b' )
- $type = $types_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $token = $tokens_to_go[$i];
- $type_sequence = $type_sequence_to_go[$i];
- my $next_type = $types_to_go[ $i + 1 ];
- my $next_token = $tokens_to_go[ $i + 1 ];
- my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
- # set break if flag was set
- if ( $want_previous_breakpoint >= 0 ) {
- set_forced_breakpoint($want_previous_breakpoint);
- $want_previous_breakpoint = -1;
- }
+ next if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
- $last_old_breakpoint_count = $old_breakpoint_count;
- if ( $old_breakpoint_to_go[$i] ) {
- $i_line_end = $i;
- $i_line_start = $i_next_nonblank;
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
- $old_breakpoint_count++;
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
- # Break before certain keywords if user broke there and
- # this is a 'safe' break point. The idea is to retain
- # any preferred breaks for sequential list operations,
- # like a schwartzian transform.
- if ($rOpts_break_at_old_keyword_breakpoints) {
- if (
- $next_nonblank_type eq 'k'
- && $is_keyword_returning_list{$next_nonblank_token}
- && ( $type =~ /^[=\)\]\}Riw]$/
- || $type eq 'k'
- && $is_keyword_returning_list{$token} )
- )
- {
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
- # we actually have to set this break next time through
- # the loop because if we are at a closing token (such
- # as '}') which forms a one-line block, this break might
- # get undone.
- $want_previous_breakpoint = $i;
- } ## end if ( $next_nonblank_type...)
- } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
- # Break before attributes if user broke there
- if ($rOpts_break_at_old_attribute_breakpoints) {
- if ( $next_nonblank_type eq 'A' ) {
- $want_previous_breakpoint = $i;
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a chain.
+ # we will just look at a few nearby lines to see if
+ # this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
}
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- # remember an = break as possible good break point
- if ( $is_assignment{$type} ) {
- $i_old_assignment_break = $i;
- }
- elsif ( $is_assignment{$next_nonblank_type} ) {
- $i_old_assignment_break = $i_next_nonblank;
- }
- } ## end if ( $old_breakpoint_to_go...)
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
+ next
+ unless (
- next if ( $type eq 'b' );
- $depth = $nesting_depth_to_go[ $i + 1 ];
+ # ... unless there is just one and we can reduce
+ # this to two lines if we do. For example, this
+ #
+ #
+ # $bodyA .=
+ # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
+ #
+ # looks better than this:
+ # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
+ # . '$args .= $pat;'
- $total_depth_variation += abs( $depth - $depth_last );
- $depth_last = $depth;
+ (
+ $n == 2
+ && $n == $nmax
+ && $type_ibeg_1 ne $type_ibeg_2
+ )
- # safety check - be sure we always break after a comment
- # Shouldn't happen .. an error here probably means that the
- # nobreak flag did not get turned off correctly during
- # formatting.
- if ( $type eq '#' ) {
- if ( $i != $max_index_to_go ) {
- warning(
-"Non-fatal program bug: backup logic needed to break after a comment\n"
- );
- report_definite_bug();
- $nobreak_to_go[$i] = 0;
- set_forced_breakpoint($i);
- } ## end if ( $i != $max_index_to_go)
- } ## end if ( $type eq '#' )
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
- # Force breakpoints at certain tokens in long lines.
- # Note that such breakpoints will be undone later if these tokens
- # are fully contained within parens on a line.
- if (
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length )
+ );
+ }
- # break before a keyword within a line
- $type eq 'k'
- && $i > 0
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
- # if one of these keywords:
- && $token =~ /^(if|unless|while|until|for)$/
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+ $type_ibeg_1 eq '}'
+ || (
- # but do not break at something like '1 while'
- && ( $last_nonblank_type ne 'n' || $i > 2 )
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
- # and let keywords follow a closing 'do' brace
- && $last_nonblank_block_type ne 'do'
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ )
+ );
- && (
- $is_long_line
+ #X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless $old_breakpoint_to_go[$iend_1];
+ }
- # or container is broken (by side-comment, etc)
- || ( $next_nonblank_token eq '('
- && $mate_index_to_go[$i_next_nonblank] < $i )
- )
- )
- {
- set_forced_breakpoint( $i - 1 );
- } ## end if ( $type eq 'k' && $i...)
+ # handle leading 'and' and 'xor'
+ elsif ($tokens_to_go[$ibeg_2] eq 'and'
+ || $tokens_to_go[$ibeg_2] eq 'xor' )
+ {
- # remember locations of -> if this is a pre-broken method chain
- if ( $type eq '->' ) {
- if ($rOpts_break_at_old_method_breakpoints) {
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
+
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
- # Case 1: look for lines with leading pointers
- if ( $i == $i_line_start ) {
- set_forced_breakpoint( $i - 1 );
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
}
- # Case 2: look for cuddled pointer calls
- else {
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
- # look for old lines with leading ')->' or ') ->'
- # and, when found, force a break before the
- # opening paren and after the previous closing paren.
- if (
- $types_to_go[$i_line_start] eq '}'
- && ( $i == $i_line_start + 1
- || $i == $i_line_start + 2
- && $types_to_go[ $i - 1 ] eq 'b' )
- )
- {
- set_forced_breakpoint( $i_line_start - 1 );
- set_forced_breakpoint(
- $mate_index_to_go[$i_line_start] );
- }
- }
- }
- } ## end if ( $type eq '->' )
+ # Combine something like:
+ # next
+ # if ( $lang !~ /${l}$/i );
+ # into:
+ # next if ( $lang !~ /${l}$/i );
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # remember locations of '||' and '&&' for possible breaks if we
- # decide this is a long logical expression.
- elsif ( $type eq '||' ) {
- push @{ $rand_or_list[$depth][2] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '||' )
- elsif ( $type eq '&&' ) {
- push @{ $rand_or_list[$depth][3] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '&&' )
- elsif ( $type eq 'f' ) {
- push @{ $rfor_semicolon_list[$depth] }, $i;
- }
- elsif ( $type eq 'k' ) {
- if ( $token eq 'and' ) {
- push @{ $rand_or_list[$depth][1] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end if ( $token eq 'and' )
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
- # break immediately at 'or's which are probably not in a logical
- # block -- but we will break in logical breaks below so that
- # they do not add to the forced_breakpoint_count
- elsif ( $token eq 'or' ) {
- push @{ $rand_or_list[$depth][0] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- if ( $is_logical_container{ $container_type[$depth] } ) {
+ );
}
+
+ # handle all other leading keywords
else {
- if ($is_long_line) { set_forced_breakpoint($i) }
- elsif ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- $saw_good_breakpoint = 1;
+
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ next
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
}
- } ## end else [ if ( $is_logical_container...)]
- } ## end elsif ( $token eq 'or' )
- elsif ( $token eq 'if' || $token eq 'unless' ) {
- push @{ $rand_or_list[$depth][4] }, $i;
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- set_forced_breakpoint($i);
}
- } ## end elsif ( $token eq 'if' ||...)
- } ## end elsif ( $type eq 'k' )
- elsif ( $is_assignment{$type} ) {
- $i_equals[$depth] = $i;
- }
+ }
- if ($type_sequence) {
+ # similar treatment of && and || as above for 'and' and 'or':
+ # NOTE: This block of code is currently bypassed because
+ # of a previous block but is retained for possible future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- # handle any postponed closing breakpoints
- if ( $token =~ /^[\)\]\}\:]$/ ) {
- if ( $type eq ':' ) {
- $last_colon_sequence_number = $type_sequence;
-
- # retain break at a ':' line break
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_ternary_breakpoints )
- {
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- set_forced_breakpoint($i);
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # break at previous '='
- if ( $i_equals[$depth] > 0 ) {
- set_forced_breakpoint( $i_equals[$depth] );
- $i_equals[$depth] = -1;
- }
- } ## end if ( ( $i == $i_line_start...))
- } ## end if ( $type eq ':' )
- if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
- my $inc = ( $type eq ':' ) ? 0 : 1;
- set_forced_breakpoint( $i - $inc );
- delete $postponed_breakpoint{$type_sequence};
- }
- } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
+ # previous line begins with an 'if' or 'unless' keyword
+ && $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
- # set breaks at ?/: if they will get separated (and are
- # not a ?/: chain), or if the '?' is at the end of the
- # line
- elsif ( $token eq '?' ) {
- my $i_colon = $mate_index_to_go[$i];
- if (
- $i_colon <= 0 # the ':' is not in this batch
- || $i == 0 # this '?' is the first token of the line
- || $i ==
- $max_index_to_go # or this '?' is the last token
- )
- {
+ );
+ }
- # don't break at a '?' if preceded by ':' on
- # this line of previous ?/: pair on this line.
- # This is an attempt to preserve a chain of ?/:
- # expressions (elsif2.t). And don't break if
- # this has a side comment.
- set_forced_breakpoint($i)
- unless (
- $type_sequence == (
- $last_colon_sequence_number +
- TYPE_SEQUENCE_INCREMENT
- )
- || $tokens_to_go[$max_index_to_go] eq '#'
- );
- set_closing_breakpoint($i);
- } ## end if ( $i_colon <= 0 ||...)
- } ## end elsif ( $token eq '?' )
- } ## end if ($type_sequence)
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ next unless ( $n == 1 || $n == $nmax );
+ next if $old_breakpoint_to_go[$iend_1];
+ next
+ unless (
-#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
+ # unless we can reduce this to two lines
+ $nmax == 2
- #------------------------------------------------------------
- # Handle Increasing Depth..
- #
- # prepare for a new list when depth increases
- # token $i is a '(','{', or '['
- #------------------------------------------------------------
- if ( $depth > $current_depth ) {
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
- $breakpoint_stack[$depth] = $forced_breakpoint_count;
- $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
- $has_broken_sublist[$depth] = 0;
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 0;
- $item_count_stack[$depth] = 0;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $last_nonblank_type[$depth] = $last_nonblank_type;
- $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
- $opening_structure_index_stack[$depth] = $i;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
- $want_comma_break[$depth] = 0;
- $container_type[$depth] =
- ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
- ? $last_nonblank_token
- : "";
- $has_old_logical_breakpoints[$depth] = 0;
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # if line ends here then signal closing token to break
- if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
- {
- set_closing_breakpoint($i);
+ # or this is a short line ending in ;
+ || ( $n == $nmax && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- # Not all lists of values should be vertically aligned..
- $dont_align[$depth] =
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
- # code BLOCKS are handled at a higher level
- ( $block_type ne "" )
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
- # certain paren lists
- || ( $type eq '(' ) && (
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
- # it does not usually look good to align a list of
- # identifiers in a parameter list, as in:
- # my($var1, $var2, ...)
- # (This test should probably be refined, for now I'm just
- # testing for any keyword)
- ( $last_nonblank_type eq 'k' )
+ # Require a few extra spaces before recombining lines if we are
+ # at an old breakpoint unless this is a simple list or terminal
+ # line. The goal is to avoid oscillating between two
+ # quasi-stable end states. For example this snippet caused
+ # problems:
+## my $this =
+## bless {
+## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
+## },
+## $type;
+ next
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
- # a trailing '(' usually indicates a non-list
- || ( $next_nonblank_type eq '(' )
- );
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $ri_beg->[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
- # patch to outdent opening brace of long if/for/..
- # statements (like this one). See similar coding in
- # set_continuation breaks. We have also catch it here for
- # short line fragments which otherwise will not go through
- # set_continuation_breaks.
- if (
- $block_type
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
- # if we have the ')' but not its '(' in this batch..
- && ( $last_nonblank_token eq ')' )
- && $mate_index_to_go[$i_last_nonblank_token] < 0
+ # honor no-break's
+ next if ( $bs >= NO_BREAK - 1 );
- # and user wants brace to left
- && !$rOpts->{'opening-brace-always-on-right'}
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ else {
- && ( $type eq '{' ) # should be true
- && ( $token eq '{' ) # should be true
- )
- {
- set_forced_breakpoint( $i - 1 );
- } ## end if ( $block_type && ( ...))
- } ## end if ( $depth > $current_depth)
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ }
+ }
- #------------------------------------------------------------
- # Handle Decreasing Depth..
- #
- # finish off any old list when depth decreases
- # token $i is a ')','}', or ']'
- #------------------------------------------------------------
- elsif ( $depth < $current_depth ) {
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @{$ri_beg}, $n_best, 1;
+ splice @{$ri_end}, $n_best - 1, 1;
+ splice @joint, $n_best, 1;
- check_for_new_minimum_depth($depth);
+ # keep going if we are still making progress
+ $more_to_do++;
+ }
+ }
+ return ( $ri_beg, $ri_end );
+ }
+} ## end closure recombine_breakpoints
- # force all outer logical containers to break after we see on
- # old breakpoint
- $has_old_logical_breakpoints[$depth] ||=
- $has_old_logical_breakpoints[$current_depth];
+sub insert_final_ternary_breaks {
- # Patch to break between ') {' if the paren list is broken.
- # There is similar logic in set_continuation_breaks for
- # non-broken lists.
- if ( $token eq ')'
- && $next_nonblank_block_type
- && $interrupted_list[$current_depth]
- && $next_nonblank_type eq '{'
- && !$rOpts->{'opening-brace-always-on-right'} )
- {
- set_forced_breakpoint($i);
- } ## end if ( $token eq ')' && ...
+ my ( $self, $ri_left, $ri_right ) = @_;
-#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
+ # Called once per batch to look for and do any final line breaks for
+ # long ternary chains
- # set breaks at commas if necessary
- my ( $bp_count, $do_not_break_apart ) =
- set_comma_breakpoints($current_depth);
+ my $nmax = @{$ri_right} - 1;
- my $i_opening = $opening_structure_index_stack[$current_depth];
- my $saw_opening_structure = ( $i_opening >= 0 );
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
- # this term is long if we had to break at interior commas..
- my $is_long_term = $bp_count > 0;
+ # For long ternary chains,
+ # if the first : we see has its ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
- # If this is a short container with one or more comma arrows,
- # then we will mark it as a long term to open it if requested.
- # $rOpts_comma_arrow_breakpoints =
- # 0 - open only if comma precedes closing brace
- # 1 - stable: except for one line blocks
- # 2 - try to form 1 line blocks
- # 3 - ignore =>
- # 4 - always open up if vt=0
- # 5 - stable: even for one line blocks if vt=0
- if ( !$is_long_term
- && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
- && $index_before_arrow[ $depth + 1 ] > 0
- && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
- )
- {
- $is_long_term = $rOpts_comma_arrow_breakpoints == 4
- || ( $rOpts_comma_arrow_breakpoints == 0
- && $last_nonblank_token eq ',' )
- || ( $rOpts_comma_arrow_breakpoints == 5
- && $old_breakpoint_to_go[$i_opening] );
- } ## end if ( !$is_long_term &&...)
-
- # mark term as long if the length between opening and closing
- # parens exceeds allowed line length
- if ( !$is_long_term && $saw_opening_structure ) {
- my $i_opening_minus = find_token_starting_list($i_opening);
+ # For now, a good break is either a comma or,
+ # in a long chain, a 'return'.
+ # Patch for RT #126633: added the $nmax>1 check to avoid
+ # breaking after a return for a simple ternary. For longer
+ # chains the break after return allows vertical alignment, so
+ # it is still done. So perltidy -wba='?' will not break
+ # immediately after the return in the following statement:
+ # sub x {
+ # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
+ # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
+ # }
+ if (
+ (
+ $type eq ','
+ || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
+ )
+ && $self->in_same_container_i( $ii, $i_question )
+ )
+ {
+ push @insert_list, $ii;
+ last;
+ }
+ }
- # Note: we have to allow for one extra space after a
- # closing token so that we do not strand a comma or
- # semicolon, hence the '>=' here (oneline.t)
- # Note: we ignore left weld lengths here for best results
- $is_long_term =
- excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
- } ## end if ( !$is_long_term &&...)
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left,
+ $ri_right );
+ }
+ }
+ }
+ return;
+}
- # We've set breaks after all comma-arrows. Now we have to
- # undo them if this can be a one-line block
- # (the only breakpoints set will be due to comma-arrows)
- if (
+sub insert_breaks_before_list_opening_containers {
- # user doesn't require breaking after all comma-arrows
- ( $rOpts_comma_arrow_breakpoints != 0 )
- && ( $rOpts_comma_arrow_breakpoints != 4 )
+ my ( $self, $ri_left, $ri_right ) = @_;
- # and if the opening structure is in this batch
- && $saw_opening_structure
+ # This routine is called once per batch to implement the parameters
+ # --break-before-hash-brace, etc.
- # and either on the same old line
- && (
- $old_breakpoint_count_stack[$current_depth] ==
- $last_old_breakpoint_count
+ # Nothing to do if none of these parameters has been set
+ return unless %break_before_container_types;
- # or user wants to form long blocks with arrows
- || $rOpts_comma_arrow_breakpoints == 2
- )
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 0 );
- # and we made some breakpoints between the opening and closing
- && ( $breakpoint_undo_stack[$current_depth] <
- $forced_breakpoint_undo_count )
+ my $rLL = $self->[_rLL_];
- # and this block is short enough to fit on one line
- # Note: use < because need 1 more space for possible comma
- && !$is_long_term
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- )
- {
- undo_forced_breakpoint_stack(
- $breakpoint_undo_stack[$current_depth] );
- } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
+ # scan the ends of all lines
+ my @insert_list;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ next unless ( $ir > $il );
+ my $Kl = $K_to_go[$il];
+ my $Kr = $K_to_go[$ir];
+ my $Kend = $Kr;
+ my $type_end = $rLL->[$Kr]->[_TYPE_];
- # now see if we have any comma breakpoints left
- my $has_comma_breakpoints =
- ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count );
+ # Backup before any side comment
+ if ( $type_end eq '#' ) {
+ $Kend = $self->K_previous_nonblank($Kr);
+ next unless defined($Kend);
+ $type_end = $rLL->[$Kend]->[_TYPE_];
+ }
- # update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] =
- $has_broken_sublist[$depth]
- || $has_broken_sublist[$current_depth]
- || $is_long_term
- || $has_comma_breakpoints;
+ # Backup to the start of any weld; fix for b1173.
+ if ($total_weld_count) {
+ my $Kend_test = $rK_weld_left->{$Kend};
+ if ( defined($Kend_test) && $Kend_test > $Kl ) {
+ $Kend = $Kend_test;
+ $Kend_test = $rK_weld_left->{$Kend};
+ }
-# Having come to the closing ')', '}', or ']', now we have to decide if we
-# should 'open up' the structure by placing breaks at the opening and
-# closing containers. This is a tricky decision. Here are some of the
-# basic considerations:
-#
-# -If this is a BLOCK container, then any breakpoints will have already
-# been set (and according to user preferences), so we need do nothing here.
-#
-# -If we have a comma-separated list for which we can align the list items,
-# then we need to do so because otherwise the vertical aligner cannot
-# currently do the alignment.
-#
-# -If this container does itself contain a container which has been broken
-# open, then it should be broken open to properly show the structure.
-#
-# -If there is nothing to align, and no other reason to break apart,
-# then do not do it.
-#
-# We will not break open the parens of a long but 'simple' logical expression.
-# For example:
-#
-# This is an example of a simple logical expression and its formatting:
-#
-# if ( $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4 )
-#
-# Most people would prefer this than the 'spacey' version:
-#
-# if (
-# $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4
-# )
-#
-# To illustrate the rules for breaking logical expressions, consider:
-#
-# FULLY DENSE:
-# if ( $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc ))
-#
-# This is on the verge of being difficult to read. The current default is to
-# open it up like this:
-#
-# DEFAULT:
-# if (
-# $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc )
-# )
-#
-# This is a compromise which tries to avoid being too dense and to spacey.
-# A more spaced version would be:
-#
-# SPACEY:
-# if (
-# $opt_excl
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-# )
-#
-# Some people might prefer the spacey version -- an option could be added. The
-# innermost expression contains a long block '( exists $ids_... ')'.
-#
-# Here is how the logic goes: We will force a break at the 'or' that the
-# innermost expression contains, but we will not break apart its opening and
-# closing containers because (1) it contains no multi-line sub-containers itself,
-# and (2) there is no alignment to be gained by breaking it open like this
-#
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-#
-# (although this looks perfectly ok and might be good for long expressions). The
-# outer 'if' container, though, contains a broken sub-container, so it will be
-# broken open to avoid too much density. Also, since it contains no 'or's, there
-# will be a forced break at its 'and'.
+ # Do not break if we did not back up to the start of a weld
+ # (shouldn't happen)
+ next if ( defined($Kend_test) );
+ }
- # set some flags telling something about this container..
- my $is_simple_logical_expression = 0;
- if ( $item_count_stack[$current_depth] == 0
- && $saw_opening_structure
- && $tokens_to_go[$i_opening] eq '('
- && $is_logical_container{ $container_type[$current_depth] }
- )
- {
+ my $token = $rLL->[$Kend]->[_TOKEN_];
+ next unless ( $is_opening_token{$token} );
+ next unless ( $Kl < $Kend - 1 );
- # This seems to be a simple logical expression with
- # no existing breakpoints. Set a flag to prevent
- # opening it up.
- if ( !$has_comma_breakpoints ) {
- $is_simple_logical_expression = 1;
- }
+ my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
+ next unless ( defined($seqno) );
- # This seems to be a simple logical expression with
- # breakpoints (broken sublists, for example). Break
- # at all 'or's and '||'s.
- else {
- set_logical_breakpoints($current_depth);
- }
- } ## end if ( $item_count_stack...)
+ # Use the flag which was previously set
+ next unless ( $rbreak_before_container_by_seqno->{$seqno} );
- if ( $is_long_term
- && @{ $rfor_semicolon_list[$current_depth] } )
- {
- set_for_semicolon_breakpoints($current_depth);
+ # Install a break before this opening token.
+ my $Kbreak = $self->K_previous_nonblank($Kend);
+ my $ibreak = $Kbreak - $Kl + $il;
+ next if ( $ibreak < $il );
+ next if ( $nobreak_to_go[$ibreak] );
+ push @insert_list, $ibreak;
+ }
- # open up a long 'for' or 'foreach' container to allow
- # leading term alignment unless -lp is used.
- $has_comma_breakpoints = 1
- unless $rOpts_line_up_parentheses;
- } ## end if ( $is_long_term && ...)
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
- if (
+sub note_added_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_added_semicolon_at_] = $line_number;
+ if ( $self->[_added_semicolon_count_] == 0 ) {
+ $self->[_first_added_semicolon_at_] = $line_number;
+ }
+ $self->[_added_semicolon_count_]++;
+ write_logfile_entry("Added ';' here\n");
+ return;
+}
- # breaks for code BLOCKS are handled at a higher level
- !$block_type
+sub note_deleted_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_deleted_semicolon_at_] = $line_number;
+ if ( $self->[_deleted_semicolon_count_] == 0 ) {
+ $self->[_first_deleted_semicolon_at_] = $line_number;
+ }
+ $self->[_deleted_semicolon_count_]++;
+ write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
+ return;
+}
- # we do not need to break at the top level of an 'if'
- # type expression
- && !$is_simple_logical_expression
+sub note_embedded_tab {
+ my ( $self, $line_number ) = @_;
+ $self->[_embedded_tab_count_]++;
+ $self->[_last_embedded_tab_at_] = $line_number;
+ if ( !$self->[_first_embedded_tab_at_] ) {
+ $self->[_first_embedded_tab_at_] = $line_number;
+ }
- ## modification to keep ': (' containers vertically tight;
- ## but probably better to let user set -vt=1 to avoid
- ## inconsistency with other paren types
- ## && ($container_type[$current_depth] ne ':')
+ if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry("Embedded tabs in quote or pattern\n");
+ }
+ return;
+}
- # otherwise, we require one of these reasons for breaking:
- && (
+sub correct_lp_indentation {
- # - this term has forced line breaks
- $has_comma_breakpoints
+ # When the -lp option is used, we need to make a last pass through
+ # each line to correct the indentation positions in case they differ
+ # from the predictions. This is necessary because perltidy uses a
+ # predictor/corrector method for aligning with opening parens. The
+ # predictor is usually good, but sometimes stumbles. The corrector
+ # tries to patch things up once the actual opening paren locations
+ # are known.
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $do_not_pad = 0;
- # - the opening container is separated from this batch
- # for some reason (comment, blank line, code block)
- # - this is a non-paren container spanning multiple lines
- || !$saw_opening_structure
+ # Note on flag '$do_not_pad':
+ # We want to avoid a situation like this, where the aligner inserts
+ # whitespace before the '=' to align it with a previous '=', because
+ # otherwise the parens might become mis-aligned in a situation like
+ # this, where the '=' has become aligned with the previous line,
+ # pushing the opening '(' forward beyond where we want it.
+ #
+ # $mkFloor::currentRoom = '';
+ # $mkFloor::c_entry = $c->Entry(
+ # -width => '10',
+ # -relief => 'sunken',
+ # ...
+ # );
+ #
+ # We leave it to the aligner to decide how to do this.
- # - this is a long block contained in another breakable
- # container
- || ( $is_long_term
- && $container_environment_to_go[$i_opening] ne
- 'BLOCK' )
- )
- )
- {
+ # first remove continuation indentation if appropriate
+ my $max_line = @{$ri_first} - 1;
- # For -lp option, we must put a breakpoint before
- # the token which has been identified as starting
- # this indentation level. This is necessary for
- # proper alignment.
- if ( $rOpts_line_up_parentheses && $saw_opening_structure )
- {
- my $item = $leading_spaces_to_go[ $i_opening + 1 ];
- if ( $i_opening + 1 < $max_index_to_go
- && $types_to_go[ $i_opening + 1 ] eq 'b' )
- {
- $item = $leading_spaces_to_go[ $i_opening + 2 ];
- }
- if ( defined($item) ) {
- my $i_start_2 = $item->get_starting_index();
- if (
- defined($i_start_2)
-
- # we are breaking after an opening brace, paren,
- # so don't break before it too
- && $i_start_2 ne $i_opening
- )
- {
-
- # Only break for breakpoints at the same
- # indentation level as the opening paren
- my $test1 = $nesting_depth_to_go[$i_opening];
- my $test2 = $nesting_depth_to_go[$i_start_2];
- if ( $test2 == $test1 ) {
- set_forced_breakpoint( $i_start_2 - 1 );
- }
- } ## end if ( defined($i_start_2...))
- } ## end if ( defined($item) )
- } ## end if ( $rOpts_line_up_parentheses...)
-
- # break after opening structure.
- # note: break before closing structure will be automatic
- if ( $minimum_depth <= $current_depth ) {
-
- set_forced_breakpoint($i_opening)
- unless ( $do_not_break_apart
- || is_unbreakable_container($current_depth) );
-
- # break at ',' of lower depth level before opening token
- if ( $last_comma_index[$depth] ) {
- set_forced_breakpoint( $last_comma_index[$depth] );
- }
+ # looking at each line of this batch..
+ my ( $ibeg, $iend );
+ foreach my $line ( 0 .. $max_line ) {
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
- # break at '.' of lower depth level before opening token
- if ( $last_dot_index[$depth] ) {
- set_forced_breakpoint( $last_dot_index[$depth] );
- }
+ # looking at each token in this output line..
+ foreach my $i ( $ibeg .. $iend ) {
- # break before opening structure if preceded by another
- # closing structure and a comma. This is normally
- # done by the previous closing brace, but not
- # if it was a one-line block.
- if ( $i_opening > 2 ) {
- my $i_prev =
- ( $types_to_go[ $i_opening - 1 ] eq 'b' )
- ? $i_opening - 2
- : $i_opening - 1;
+ # How many space characters to place before this token
+ # for special alignment. Actual padding is done in the
+ # continue block.
- if ( $types_to_go[$i_prev] eq ','
- && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
- {
- set_forced_breakpoint($i_prev);
- }
+ # looking for next unvisited indentation item
+ my $indentation = $leading_spaces_to_go[$i];
+ if ( !$indentation->get_marked() ) {
+ $indentation->set_marked(1);
- # also break before something like ':(' or '?('
- # if appropriate.
- elsif (
- $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
- {
- my $token_prev = $tokens_to_go[$i_prev];
- if ( $want_break_before{$token_prev} ) {
- set_forced_breakpoint($i_prev);
- }
- } ## end elsif ( $types_to_go[$i_prev...])
- } ## end if ( $i_opening > 2 )
- } ## end if ( $minimum_depth <=...)
+ # looking for indentation item for which we are aligning
+ # with parens, braces, and brackets
+ next unless ( $indentation->get_align_paren() );
- # break after comma following closing structure
- if ( $next_type eq ',' ) {
- set_forced_breakpoint( $i + 1 );
+ # skip closed container on this line
+ if ( $i > $ibeg ) {
+ my $im = max( $ibeg, $iprev_to_go[$i] );
+ if ( $type_sequence_to_go[$im]
+ && $mate_index_to_go[$im] <= $iend )
+ {
+ next;
}
+ }
- # break before an '=' following closing structure
- if (
- $is_assignment{$next_nonblank_type}
- && ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count )
- )
- {
- set_forced_breakpoint($i);
- } ## end if ( $is_assignment{$next_nonblank_type...})
+ if ( $line == 1 && $i == $ibeg ) {
+ $do_not_pad = 1;
+ }
- # break at any comma before the opening structure Added
- # for -lp, but seems to be good in general. It isn't
- # obvious how far back to look; the '5' below seems to
- # work well and will catch the comma in something like
- # push @list, myfunc( $param, $param, ..
+ # Ok, let's see what the error is and try to fix it
+ my $actual_pos;
+ my $predicted_pos = $indentation->get_spaces();
+ if ( $i > $ibeg ) {
- my $icomma = $last_comma_index[$depth];
- if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
- unless ( $forced_breakpoint_to_go[$icomma] ) {
- set_forced_breakpoint($icomma);
+ # token is mid-line - use length to previous token
+ $actual_pos = total_line_length( $ibeg, $i - 1 );
+
+ # for mid-line token, we must check to see if all
+ # additional lines have continuation indentation,
+ # and remove it if so. Otherwise, we do not get
+ # good alignment.
+ my $closing_index = $indentation->get_closed();
+ if ( $closing_index > $iend ) {
+ my $ibeg_next = $ri_first->[ $line + 1 ];
+ if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
+ $self->undo_lp_ci( $line, $i, $closing_index,
+ $ri_first, $ri_last );
}
}
- } # end logic to open up a container
+ }
+ elsif ( $line > 0 ) {
- # Break open a logical container open if it was already open
- elsif ($is_simple_logical_expression
- && $has_old_logical_breakpoints[$current_depth] )
- {
- set_logical_breakpoints($current_depth);
+ # handle case where token starts a new line;
+ # use length of previous line
+ my $ibegm = $ri_first->[ $line - 1 ];
+ my $iendm = $ri_last->[ $line - 1 ];
+ $actual_pos = total_line_length( $ibegm, $iendm );
+
+ # follow -pt style
+ ++$actual_pos
+ if ( $types_to_go[ $iendm + 1 ] eq 'b' );
}
+ else {
- # Handle long container which does not get opened up
- elsif ($is_long_term) {
+ # token is first character of first line of batch
+ $actual_pos = $predicted_pos;
+ }
- # must set fake breakpoint to alert outer containers that
- # they are complex
- set_fake_breakpoint();
- } ## end elsif ($is_long_term)
+ my $move_right = $actual_pos - $predicted_pos;
- } ## end elsif ( $depth < $current_depth)
+ # done if no error to correct (gnu2.t)
+ if ( $move_right == 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
- #------------------------------------------------------------
- # Handle this token
- #------------------------------------------------------------
+ # if we have not seen closure for this indentation in
+ # this batch, we can only pass on a request to the
+ # vertical aligner
+ my $closing_index = $indentation->get_closed();
- $current_depth = $depth;
+ if ( $closing_index < 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
- # handle comma-arrow
- if ( $type eq '=>' ) {
- next if ( $last_nonblank_type eq '=>' );
- next if $rOpts_break_at_old_comma_breakpoints;
- next if $rOpts_comma_arrow_breakpoints == 3;
- $want_comma_break[$depth] = 1;
- $index_before_arrow[$depth] = $i_last_nonblank_token;
- next;
- } ## end if ( $type eq '=>' )
+ # If necessary, look ahead to see if there is really any
+ # leading whitespace dependent on this whitespace, and
+ # also find the longest line using this whitespace.
+ # Since it is always safe to move left if there are no
+ # dependents, we only need to do this if we may have
+ # dependent nodes or need to move right.
- elsif ( $type eq '.' ) {
- $last_dot_index[$depth] = $i;
- }
+ my $right_margin = 0;
+ my $have_child = $indentation->get_have_child();
- # Turn off alignment if we are sure that this is not a list
- # environment. To be safe, we will do this if we see certain
- # non-list tokens, such as ';', and also the environment is
- # not a list. Note that '=' could be in any of the = operators
- # (lextest.t). We can't just use the reported environment
- # because it can be incorrect in some cases.
- elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
- && $container_environment_to_go[$i] ne 'LIST' )
- {
- $dont_align[$depth] = 1;
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
+ my %saw_indentation;
+ my $line_count = 1;
+ $saw_indentation{$indentation} = $indentation;
- # now just handle any commas
- next unless ( $type eq ',' );
+ if ( $have_child || $move_right > 0 ) {
+ $have_child = 0;
+ my $max_length = 0;
+ if ( $i == $ibeg ) {
+ $max_length = total_line_length( $ibeg, $iend );
+ }
- $last_dot_index[$depth] = undef;
- $last_comma_index[$depth] = $i;
+ # look ahead at the rest of the lines of this batch..
+ foreach my $line_t ( $line + 1 .. $max_line ) {
+ my $ibeg_t = $ri_first->[$line_t];
+ my $iend_t = $ri_last->[$line_t];
+ last if ( $closing_index <= $ibeg_t );
- # break here if this comma follows a '=>'
- # but not if there is a side comment after the comma
- if ( $want_comma_break[$depth] ) {
+ # remember all different indentation objects
+ my $indentation_t = $leading_spaces_to_go[$ibeg_t];
+ $saw_indentation{$indentation_t} = $indentation_t;
+ $line_count++;
- if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- if ($rOpts_comma_arrow_breakpoints) {
- $want_comma_break[$depth] = 0;
- next;
+ # remember longest line in the group
+ my $length_t = total_line_length( $ibeg_t, $iend_t );
+ if ( $length_t > $max_length ) {
+ $max_length = $length_t;
+ }
}
+ $right_margin =
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
+ $max_length;
+ if ( $right_margin < 0 ) { $right_margin = 0 }
}
- set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
-
- # break before the previous token if it looks safe
- # Example of something that we will not try to break before:
- # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
- # Also we don't want to break at a binary operator (like +):
- # $c->createOval(
- # $x + $R, $y +
- # $R => $x - $R,
- # $y - $R, -fill => 'black',
- # );
- my $ibreak = $index_before_arrow[$depth] - 1;
- if ( $ibreak > 0
- && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
- {
- if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+ my $first_line_comma_count =
+ grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
+ my $comma_count = $indentation->get_comma_count();
+ my $arrow_count = $indentation->get_arrow_count();
- # don't break pointer calls, such as the following:
- # File::Spec->curdir => 1,
- # (This is tokenized as adjacent 'w' tokens)
- ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+ # This is a simple approximate test for vertical alignment:
+ # if we broke just after an opening paren, brace, bracket,
+ # and there are 2 or more commas in the first line,
+ # and there are no '=>'s,
+ # then we are probably vertically aligned. We could set
+ # an exact flag in sub scan_list, but this is good
+ # enough.
+ my $indentation_count = keys %saw_indentation;
+ my $is_vertically_aligned =
+ ( $i == $ibeg
+ && $first_line_comma_count > 1
+ && $indentation_count == 1
+ && ( $arrow_count == 0 || $arrow_count == $line_count ) );
- # And don't break before a comma, as in the following:
- # ( LONGER_THAN,=> 1,
- # EIGHTY_CHARACTERS,=> 2,
- # CAUSES_FORMATTING,=> 3,
- # LIKE_THIS,=> 4,
- # );
- # This example is for -tso but should be general rule
- if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
- && $tokens_to_go[ $ibreak + 1 ] ne ',' )
- {
- set_forced_breakpoint($ibreak);
- }
- } ## end if ( $types_to_go[$ibreak...])
- } ## end if ( $ibreak > 0 && $tokens_to_go...)
+ # Make the move if possible ..
+ if (
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
+ # we can always move left
+ $move_right < 0
- # handle list which mixes '=>'s and ','s:
- # treat any list items so far as an interrupted list
- $interrupted_list[$depth] = 1;
- next;
- } ## end if ( $want_comma_break...)
+ # but we should only move right if we are sure it will
+ # not spoil vertical alignment
+ || ( $comma_count == 0 )
+ || ( $comma_count > 0 && !$is_vertically_aligned )
+ )
+ {
+ my $move =
+ ( $move_right <= $right_margin )
+ ? $move_right
+ : $right_margin;
- # break after all commas above starting depth
- if ( $depth < $starting_depth && !$dont_align[$depth] ) {
- set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
- next;
+ foreach ( keys %saw_indentation ) {
+ $saw_indentation{$_}
+ ->permanently_decrease_available_spaces( -$move );
+ }
+ }
+
+ # Otherwise, record what we want and the vertical aligner
+ # will try to recover it.
+ else {
+ $indentation->set_recoverable_spaces($move_right);
+ }
}
+ }
+ }
+ return $do_not_pad;
+}
- # add this comma to the list..
- my $item_count = $item_count_stack[$depth];
- if ( $item_count == 0 ) {
+sub undo_lp_ci {
- # but do not form a list with no opening structure
- # for example:
+ # If there is a single, long parameter within parens, like this:
+ #
+ # $self->command( "/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?" );
+ #
+ # we can remove the continuation indentation of the 2nd and higher lines
+ # to achieve this effect, which is more pleasing:
+ #
+ # $self->command("/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?");
- # open INFILE_COPY, ">$input_file_copy"
- # or die ("very long message");
+ my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
+ @_;
+ my $max_line = @{$ri_first} - 1;
- if ( ( $opening_structure_index_stack[$depth] < 0 )
- && $container_environment_to_go[$i] eq 'BLOCK' )
- {
- $dont_align[$depth] = 1;
- }
- } ## end if ( $item_count == 0 )
+ # must be multiple lines
+ return unless $max_line > $line_open;
- $comma_index[$depth][$item_count] = $i;
- ++$item_count_stack[$depth];
- if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
- $identifier_count_stack[$depth]++;
- }
- } ## end while ( ++$i <= $max_index_to_go)
+ my $lev_start = $levels_to_go[$i_start];
+ my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
- #-------------------------------------------
- # end of loop over all tokens in this batch
- #-------------------------------------------
+ # see if all additional lines in this container have continuation
+ # indentation
+ my $n;
+ my $line_1 = 1 + $line_open;
+ for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ if ( $ibeg eq $closing_index ) { $n--; last }
+ return if ( $lev_start != $levels_to_go[$ibeg] );
+ return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
+ last if ( $closing_index <= $iend );
+ }
- # set breaks for any unfinished lists ..
- for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
+ # we can reduce the indentation of all continuation lines
+ my $continuation_line_count = $n - $line_open;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
+ return;
+}
- $interrupted_list[$dd] = 1;
- $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
- set_comma_breakpoints($dd);
- set_logical_breakpoints($dd)
- if ( $has_old_logical_breakpoints[$dd] );
- set_for_semicolon_breakpoints($dd);
+###############################################
+# CODE SECTION 10: Code to break long statments
+###############################################
- # break open container...
- my $i_opening = $opening_structure_index_stack[$dd];
- set_forced_breakpoint($i_opening)
- unless (
- is_unbreakable_container($dd)
+sub set_continuation_breaks {
- # Avoid a break which would place an isolated ' or "
- # on a line
- || ( $type eq 'Q'
- && $i_opening >= $max_index_to_go - 2
- && $token =~ /^['"]$/ )
- );
- } ## end for ( my $dd = $current_depth...)
+ # Called once per batch to set breaks in long lines.
- # Return a flag indicating if the input file had some good breakpoints.
- # This flag will be used to force a break in a line shorter than the
- # allowed line length.
- if ( $has_old_logical_breakpoints[$current_depth] ) {
- $saw_good_breakpoint = 1;
- }
+ # Define an array of indexes for inserting newline characters to
+ # keep the line lengths below the maximum desired length. There is
+ # an implied break after the last token, so it need not be included.
- # A complex line with one break at an = has a good breakpoint.
- # This is not complex ($total_depth_variation=0):
- # $res1
- # = 10;
- #
- # This is complex ($total_depth_variation=6):
- # $res2 =
- # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
- elsif ($i_old_assignment_break
- && $total_depth_variation > 4
- && $old_breakpoint_count == 1 )
- {
- $saw_good_breakpoint = 1;
- } ## end elsif ( $i_old_assignment_break...)
+ # Method:
+ # This routine is part of series of routines which adjust line
+ # lengths. It is only called if a statement is longer than the
+ # maximum line length, or if a preliminary scanning located
+ # desirable break points. Sub scan_list has already looked at
+ # these tokens and set breakpoints (in array
+ # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
+ # after commas, after opening parens, and before closing parens).
+ # This routine will honor these breakpoints and also add additional
+ # breakpoints as necessary to keep the line length below the maximum
+ # requested. It bases its decision on where the 'bond strength' is
+ # lowest.
- return $saw_good_breakpoint;
- } ## end sub scan_list
-} # end scan_list
+ # Output: returns references to the arrays:
+ # @i_first
+ # @i_last
+ # which contain the indexes $i of the first and last tokens on each
+ # line.
-sub find_token_starting_list {
+ # In addition, the array:
+ # $forced_breakpoint_to_go[$i]
+ # may be updated to be =1 for any index $i after which there must be
+ # a break. This signals later routines not to undo the breakpoint.
- # When testing to see if a block will fit on one line, some
- # previous token(s) may also need to be on the line; particularly
- # if this is a sub call. So we will look back at least one
- # token. NOTE: This isn't perfect, but not critical, because
- # if we mis-identify a block, it will be wrapped and therefore
- # fixed the next time it is formatted.
- my $i_opening_paren = shift;
- my $i_opening_minus = $i_opening_paren;
- my $im1 = $i_opening_paren - 1;
- my $im2 = $i_opening_paren - 2;
- my $im3 = $i_opening_paren - 3;
- my $typem1 = $types_to_go[$im1];
- my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
+ my ( $self, $saw_good_break, $rcolon_list ) = @_;
- if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
- $i_opening_minus = $i_opening_paren;
- }
- elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
- $i_opening_minus = $im1 if $im1 >= 0;
+ # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
+ # order.
- # walk back to improve length estimate
- for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
- last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
- $i_opening_minus = $j;
- }
- if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
- }
- elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
- elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
- $i_opening_minus = $im2;
- }
- return $i_opening_minus;
-}
+ use constant DEBUG_BREAKPOINTS => 0;
-{ # begin set_comma_breakpoints_do
+ my @i_first = (); # the first index to output
+ my @i_last = (); # the last index to output
+ my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
+ if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
- my %is_keyword_with_special_leading_term;
+ $self->set_bond_strengths();
- BEGIN {
+ my $imin = 0;
+ my $imax = $max_index_to_go;
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
+ my $i_begin = $imin; # index for starting next iteration
- # These keywords have prototypes which allow a special leading item
- # followed by a list
- my @q =
- qw(formline grep kill map printf sprintf push chmod join pack unshift);
- @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
- }
+ my $leading_spaces = leading_spaces_to_go($imin);
+ my $line_count = 0;
+ my $last_break_strength = NO_BREAK;
+ my $i_last_break = -1;
+ my $max_bias = 0.001;
+ my $tiny_bias = 0.0001;
+ my $leading_alignment_token = "";
+ my $leading_alignment_type = "";
- sub set_comma_breakpoints_do {
+ # see if any ?/:'s are in order
+ my $colons_in_order = 1;
+ my $last_tok = "";
+ foreach ( @{$rcolon_list} ) {
+ if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
+ $last_tok = $_;
+ }
- # Given a list with some commas, set breakpoints at some of the
- # commas, if necessary, to make it easy to read. This list is
- # an example:
- my (
- $depth, $i_opening_paren, $i_closing_paren,
- $item_count, $identifier_count, $rcomma_index,
- $next_nonblank_type, $list_type, $interrupted,
- $rdo_not_break_apart, $must_break_open,
- ) = @_;
+ # This is a sufficient but not necessary condition for colon chain
+ my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
- # nothing to do if no commas seen
- return if ( $item_count < 1 );
- my $i_first_comma = $rcomma_index->[0];
- my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
- my $i_last_comma = $i_true_last_comma;
- if ( $i_last_comma >= $max_index_to_go ) {
- $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
- return if ( $item_count < 1 );
- }
+ my $Msg = "";
- #---------------------------------------------------------------
- # find lengths of all items in the list to calculate page layout
- #---------------------------------------------------------------
- my $comma_count = $item_count;
- my @item_lengths;
- my @i_term_begin;
- my @i_term_end;
- my @i_term_comma;
- my $i_prev_plus;
- my @max_length = ( 0, 0 );
- my $first_term_length;
- my $i = $i_opening_paren;
- my $is_odd = 1;
-
- foreach my $j ( 0 .. $comma_count - 1 ) {
- $is_odd = 1 - $is_odd;
- $i_prev_plus = $i + 1;
- $i = $rcomma_index->[$j];
+ #-------------------------------------------------------
+ # BEGINNING of main loop to set continuation breakpoints
+ # Keep iterating until we reach the end
+ #-------------------------------------------------------
+ while ( $i_begin <= $imax ) {
+ my $lowest_strength = NO_BREAK;
+ my $starting_sum = $summed_lengths_to_go[$i_begin];
+ my $i_lowest = -1;
+ my $i_test = -1;
+ my $lowest_next_token = '';
+ my $lowest_next_type = 'b';
+ my $i_lowest_next_nonblank = -1;
+ my $maximum_line_length =
+ $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
- my $i_term_end =
- ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
- my $i_term_begin =
- ( $types_to_go[$i_prev_plus] eq 'b' )
- ? $i_prev_plus + 1
- : $i_prev_plus;
- push @i_term_begin, $i_term_begin;
- push @i_term_end, $i_term_end;
- push @i_term_comma, $i;
+ #-------------------------------------------------------
+ # BEGINNING of inner loop to find the best next breakpoint
+ #-------------------------------------------------------
+ my $strength = NO_BREAK;
+ for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
+ my $type = $types_to_go[$i_test];
+ my $token = $tokens_to_go[$i_test];
+ my $next_type = $types_to_go[ $i_test + 1 ];
+ my $next_token = $tokens_to_go[ $i_test + 1 ];
+ my $i_next_nonblank = $inext_to_go[$i_test];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- # note: currently adding 2 to all lengths (for comma and space)
- my $length =
- 2 + token_sequence_length( $i_term_begin, $i_term_end );
- push @item_lengths, $length;
+ # adjustments to the previous bond strength may have been made, and
+ # we must keep the bond strength of a token and its following blank
+ # the same;
+ my $last_strength = $strength;
+ $strength = $bond_strength_to_go[$i_test];
+ if ( $type eq 'b' ) { $strength = $last_strength }
- if ( $j == 0 ) {
- $first_term_length = $length;
- }
- else {
+ # reduce strength a bit to break ties at an old comma breakpoint ...
+ if (
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
- }
- }
+ $old_breakpoint_to_go[$i_test]
- # now we have to make a distinction between the comma count and item
- # count, because the item count will be one greater than the comma
- # count if the last item is not terminated with a comma
- my $i_b =
- ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
- ? $i_last_comma + 1
- : $i_last_comma;
- my $i_e =
- ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
- ? $i_closing_paren - 2
- : $i_closing_paren - 1;
- my $i_effective_last_comma = $i_last_comma;
+ # Patch: limited to just commas to avoid blinking states
+ && $type eq ','
- my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
+ # which is a 'good' breakpoint, meaning ...
+ # we don't want to break before it
+ && !$want_break_before{$type}
- if ( $last_item_length > 0 ) {
+ # and either we want to break before the next token
+ # or the next token is not short (i.e. not a '*', '/' etc.)
+ && $i_next_nonblank <= $imax
+ && ( $want_break_before{$next_nonblank_type}
+ || $token_lengths_to_go[$i_next_nonblank] > 2
+ || $next_nonblank_type eq ','
+ || $is_opening_type{$next_nonblank_type} )
+ )
+ {
+ $strength -= $tiny_bias;
+ DEBUG_BREAKPOINTS && do { $Msg .= " :-bias at i=$i_test" };
+ }
- # add 2 to length because other lengths include a comma and a blank
- $last_item_length += 2;
- push @item_lengths, $last_item_length;
- push @i_term_begin, $i_b + 1;
- push @i_term_end, $i_e;
- push @i_term_comma, undef;
+ # otherwise increase strength a bit if this token would be at the
+ # maximum line length. This is necessary to avoid blinking
+ # in the above example when the -iob flag is added.
+ else {
+ my $len =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum;
+ if ( $len >= $maximum_line_length ) {
+ $strength += $tiny_bias;
+ DEBUG_BREAKPOINTS && do { $Msg .= " :+bias at i=$i_test" };
+ }
+ }
- my $i_odd = $item_count % 2;
+ my $must_break = 0;
- if ( $last_item_length > $max_length[$i_odd] ) {
- $max_length[$i_odd] = $last_item_length;
+ # Force an immediate break at certain operators
+ # with lower level than the start of the line,
+ # unless we've already seen a better break.
+ #
+ ##############################################
+ # Note on an issue with a preceding ?
+ ##############################################
+ # We don't include a ? in the above list, but there may
+ # be a break at a previous ? if the line is long.
+ # Because of this we do not want to force a break if
+ # there is a previous ? on this line. For now the best way
+ # to do this is to not break if we have seen a lower strength
+ # point, which is probably a ?.
+ #
+ # Example of unwanted breaks we are avoiding at a '.' following a ?
+ # from pod2html using perltidy -gnu:
+ # )
+ # ? "\n<A NAME=\""
+ # . $value
+ # . "\">\n$text</A>\n"
+ # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
+ if (
+ ( $strength <= $lowest_strength )
+ && ( $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_next_nonblank] )
+ && (
+ $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
+ || ( $next_nonblank_type eq 'k'
+ && $next_nonblank_token =~ /^(and|or)$/ )
+ )
+ )
+ {
+ $self->set_forced_breakpoint($i_next_nonblank);
+ DEBUG_BREAKPOINTS
+ && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
}
- $item_count++;
- $i_effective_last_comma = $i_e + 1;
+ if (
- if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
- $identifier_count++;
- }
- }
+ # Try to put a break where requested by scan_list
+ $forced_breakpoint_to_go[$i_test]
- #---------------------------------------------------------------
- # End of length calculations
- #---------------------------------------------------------------
+ # break between ) { in a continued line so that the '{' can
+ # be outdented
+ # See similar logic in scan_list which catches instances
+ # where a line is just something like ') {'. We have to
+ # be careful because the corresponding block keyword might
+ # not be on the first line, such as 'for' here:
+ #
+ # eval {
+ # for ("a") {
+ # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
+ # }
+ # };
+ #
+ || (
+ $line_count
+ && ( $token eq ')' )
+ && ( $next_nonblank_type eq '{' )
+ && ($next_nonblank_block_type)
+ && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
- #---------------------------------------------------------------
- # Compound List Rule 1:
- # Break at (almost) every comma for a list containing a broken
- # sublist. This has higher priority than the Interrupted List
- # Rule.
- #---------------------------------------------------------------
- if ( $has_broken_sublist[$depth] ) {
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceding list is long and broken
+ && !(
+ $next_nonblank_block_type =~ /$ANYSUB_PATTERN/
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
- # Break at every comma except for a comma between two
- # simple, small terms. This prevents long vertical
- # columns of, say, just 0's.
- my $small_length = 10; # 2 + actual maximum length wanted
+ && !$rOpts->{'opening-brace-always-on-right'}
+ )
- # We'll insert a break in long runs of small terms to
- # allow alignment in uniform tables.
- my $skipped_count = 0;
- my $columns = table_columns_available($i_first_comma);
- my $fields = int( $columns / $small_length );
- if ( $rOpts_maximum_fields_per_table
- && $fields > $rOpts_maximum_fields_per_table )
+ # There is an implied forced break at a terminal opening brace
+ || ( ( $type eq '{' ) && ( $i_test == $imax ) )
+ )
{
- $fields = $rOpts_maximum_fields_per_table;
- }
- my $max_skipped_count = $fields - 1;
- my $is_simple_last_term = 0;
- my $is_simple_next_term = 0;
- foreach my $j ( 0 .. $item_count ) {
- $is_simple_last_term = $is_simple_next_term;
- $is_simple_next_term = 0;
- if ( $j < $item_count
- && $i_term_end[$j] == $i_term_begin[$j]
- && $item_lengths[$j] <= $small_length )
- {
- $is_simple_next_term = 1;
- }
- next if $j == 0;
- if ( $is_simple_last_term
- && $is_simple_next_term
- && $skipped_count < $max_skipped_count )
- {
- $skipped_count++;
- }
- else {
- $skipped_count = 0;
- my $i = $i_term_comma[ $j - 1 ];
- last unless defined $i;
- set_forced_breakpoint($i);
+ # Forced breakpoints must sometimes be overridden, for example
+ # because of a side comment causing a NO_BREAK. It is easier
+ # to catch this here than when they are set.
+ if ( $strength < NO_BREAK - 1 ) {
+ $strength = $lowest_strength - $tiny_bias;
+ $must_break = 1;
+ DEBUG_BREAKPOINTS
+ && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
}
}
- # always break at the last comma if this list is
- # interrupted; we wouldn't want to leave a terminal '{', for
- # example.
- if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
- return;
- }
+ # quit if a break here would put a good terminal token on
+ # the next line and we already have a possible break
+ if (
+ !$must_break
+ && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
+ $starting_sum
+ ) > $maximum_line_length
+ )
+ )
+ {
+ if ( $i_lowest >= 0 ) {
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :quit at good terminal='$next_nonblank_type'";
+ };
+ last;
+ }
+ }
-#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
-#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
-#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
-
- #---------------------------------------------------------------
- # Interrupted List Rule:
- # A list is forced to use old breakpoints if it was interrupted
- # by side comments or blank lines, or requested by user.
- #---------------------------------------------------------------
- if ( $rOpts_break_at_old_comma_breakpoints
- || $interrupted
- || $i_opening_paren < 0 )
- {
- copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
- return;
- }
-
- #---------------------------------------------------------------
- # Looks like a list of items. We have to look at it and size it up.
- #---------------------------------------------------------------
-
- my $opening_token = $tokens_to_go[$i_opening_paren];
- my $opening_environment =
- $container_environment_to_go[$i_opening_paren];
-
- #-------------------------------------------------------------------
- # Return if this will fit on one line
- #-------------------------------------------------------------------
+ # Avoid a break which would strand a single punctuation
+ # token. For example, we do not want to strand a leading
+ # '.' which is followed by a long quoted string.
+ # But note that we do want to do this with -extrude (l=1)
+ # so please test any changes to this code on -extrude.
+ if (
+ !$must_break
+ && ( $i_test == $i_begin )
+ && ( $i_test < $imax )
+ && ( $token eq $type )
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum
+ ) < $maximum_line_length
+ )
+ )
+ {
+ $i_test = min( $imax, $inext_to_go[$i_test] );
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :redo at i=$i_test";
+ };
+ redo;
+ }
- my $i_opening_minus = find_token_starting_list($i_opening_paren);
- return
- unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
+ if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
+ {
- #-------------------------------------------------------------------
- # Now we know that this block spans multiple lines; we have to set
- # at least one breakpoint -- real or fake -- as a signal to break
- # open any outer containers.
- #-------------------------------------------------------------------
- set_fake_breakpoint();
+ # break at previous best break if it would have produced
+ # a leading alignment of certain common tokens, and it
+ # is different from the latest candidate break
+ if ($leading_alignment_type) {
+ DEBUG_BREAKPOINTS && do {
+ $Msg .=
+" :last at leading_alignment='$leading_alignment_type'";
+ };
+ last;
+ }
- # be sure we do not extend beyond the current list length
- if ( $i_effective_last_comma >= $max_index_to_go ) {
- $i_effective_last_comma = $max_index_to_go - 1;
- }
+ # Force at least one breakpoint if old code had good
+ # break It is only called if a breakpoint is required or
+ # desired. This will probably need some adjustments
+ # over time. A goal is to try to be sure that, if a new
+ # side comment is introduced into formatted text, then
+ # the same breakpoints will occur. scbreak.t
+ if (
+ $i_test == $imax # we are at the end
+ && !get_forced_breakpoint_count()
+ && $saw_good_break # old line had good break
+ && $type =~ /^[#;\{]$/ # and this line ends in
+ # ';' or side comment
+ && $i_last_break < 0 # and we haven't made a break
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $imax - 1 # (but not just before this ;)
+ && $strength - $lowest_strength < 0.5 * WEAK # and it's good
+ )
+ {
- # Set a flag indicating if we need to break open to keep -lp
- # items aligned. This is necessary if any of the list terms
- # exceeds the available space after the '('.
- my $need_lp_break_open = $must_break_open;
- if ( $rOpts_line_up_parentheses && !$must_break_open ) {
- my $columns_if_unbroken =
- maximum_line_length($i_opening_minus) -
- total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open =
- ( $max_length[0] > $columns_if_unbroken )
- || ( $max_length[1] > $columns_if_unbroken )
- || ( $first_term_length > $columns_if_unbroken );
- }
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :last at good old break\n";
+ };
+ last;
+ }
- # Specify if the list must have an even number of fields or not.
- # It is generally safest to assume an even number, because the
- # list items might be a hash list. But if we can be sure that
- # it is not a hash, then we can allow an odd number for more
- # flexibility.
- my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
+ # Do not skip past an important break point in a short final
+ # segment. For example, without this check we would miss the
+ # break at the final / in the following code:
+ #
+ # $depth_stop =
+ # ( $tau * $mass_pellet * $q_0 *
+ # ( 1. - exp( -$t_stop / $tau ) ) -
+ # 4. * $pi * $factor * $k_ice *
+ # ( $t_melt - $t_ice ) *
+ # $r_pellet *
+ # $t_stop ) /
+ # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
+ #
+ if (
+ $line_count > 2
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $i_test
+ && $i_test > $imax - 2
+ && $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_lowest]
+ && $lowest_strength < $last_break_strength - .5 * WEAK
+ )
+ {
+ # Make this break for math operators for now
+ my $ir = $inext_to_go[$i_lowest];
+ my $il = $iprev_to_go[$ir];
+ if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
+ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
+ {
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :last-noskip_short";
+ };
+ last;
+ }
+ }
- if ( $identifier_count >= $item_count - 1
- || $is_assignment{$next_nonblank_type}
- || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
- )
- {
- $odd_or_even = 1;
- }
+ # Update the minimum bond strength location
+ $lowest_strength = $strength;
+ $i_lowest = $i_test;
+ $lowest_next_token = $next_nonblank_token;
+ $lowest_next_type = $next_nonblank_type;
+ $i_lowest_next_nonblank = $i_next_nonblank;
+ if ($must_break) {
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :last-must_break";
+ };
+ last;
+ }
- # do we have a long first term which should be
- # left on a line by itself?
- my $use_separate_first_term = (
- $odd_or_even == 1 # only if we can use 1 field/line
- && $item_count > 3 # need several items
- && $first_term_length >
- 2 * $max_length[0] - 2 # need long first term
- && $first_term_length >
- 2 * $max_length[1] - 2 # need long first term
- );
+ # set flags to remember if a break here will produce a
+ # leading alignment of certain common tokens
+ if ( $line_count > 0
+ && $i_test < $imax
+ && ( $lowest_strength - $last_break_strength <= $max_bias )
+ )
+ {
+ my $i_last_end = $iprev_to_go[$i_begin];
+ my $tok_beg = $tokens_to_go[$i_begin];
+ my $type_beg = $types_to_go[$i_begin];
+ if (
- # or do we know from the type of list that the first term should
- # be placed alone?
- if ( !$use_separate_first_term ) {
- if ( $is_keyword_with_special_leading_term{$list_type} ) {
- $use_separate_first_term = 1;
+ # check for leading alignment of certain tokens
+ (
+ $tok_beg eq $next_nonblank_token
+ && $is_chain_operator{$tok_beg}
+ && ( $type_beg eq 'k'
+ || $type_beg eq $tok_beg )
+ && $nesting_depth_to_go[$i_begin] >=
+ $nesting_depth_to_go[$i_next_nonblank]
+ )
- # should the container be broken open?
- if ( $item_count < 3 ) {
- if ( $i_first_comma - $i_opening_paren < 4 ) {
- ${$rdo_not_break_apart} = 1;
+ || ( $tokens_to_go[$i_last_end] eq $token
+ && $is_chain_operator{$token}
+ && ( $type eq 'k' || $type eq $token )
+ && $nesting_depth_to_go[$i_last_end] >=
+ $nesting_depth_to_go[$i_test] )
+ )
+ {
+ $leading_alignment_token = $next_nonblank_token;
+ $leading_alignment_type = $next_nonblank_type;
}
}
- elsif ($first_term_length < 20
- && $i_first_comma - $i_opening_paren < 4 )
+ }
+
+ my $too_long = ( $i_test >= $imax );
+ if ( !$too_long ) {
+ my $next_length =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 2 ] -
+ $starting_sum;
+ $too_long = $next_length > $maximum_line_length;
+
+ # To prevent blinkers we will avoid leaving a token exactly at
+ # the line length limit unless it is the last token or one of
+ # several "good" types.
+ #
+ # The following code was a blinker with -pbp before this
+ # modification:
+## $last_nonblank_token eq '('
+## && $is_indirect_object_taker{ $paren_type
+## [$paren_depth] }
+ # The issue causing the problem is that if the
+ # term [$paren_depth] gets broken across a line then
+ # the whitespace routine doesn't see both opening and closing
+ # brackets and will format like '[ $paren_depth ]'. This
+ # leads to an oscillation in length depending if we break
+ # before the closing bracket or not.
+ if ( !$too_long
+ && $i_test + 1 < $imax
+ && $next_nonblank_type ne ','
+ && !$is_closing_type{$next_nonblank_type} )
{
- my $columns = table_columns_available($i_first_comma);
- if ( $first_term_length < $columns ) {
- ${$rdo_not_break_apart} = 1;
+ $too_long = $next_length >= $maximum_line_length;
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :too_long=$too_long" if ($too_long);
}
}
}
- }
-
- # if so,
- if ($use_separate_first_term) {
- # ..set a break and update starting values
- $use_separate_first_term = 1;
- set_forced_breakpoint($i_first_comma);
- $i_opening_paren = $i_first_comma;
- $i_first_comma = $rcomma_index->[1];
- $item_count--;
- return if $comma_count == 1;
- shift @item_lengths;
- shift @i_term_begin;
- shift @i_term_end;
- shift @i_term_comma;
- }
+ DEBUG_BREAKPOINTS && do {
+ my $ltok = $token;
+ my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
+ my $i_testp2 = $i_test + 2;
+ if ( $i_testp2 > $max_index_to_go + 1 ) {
+ $i_testp2 = $max_index_to_go + 1;
+ }
+ if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
+ if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
+ print STDOUT
+"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
+ };
- # if not, update the metrics to include the first term
- else {
- if ( $first_term_length > $max_length[0] ) {
- $max_length[0] = $first_term_length;
+ # allow one extra terminal token after exceeding line length
+ # if it would strand this token.
+ if ( $rOpts_fuzzy_line_length
+ && $too_long
+ && $i_lowest == $i_test
+ && $token_lengths_to_go[$i_test] > 1
+ && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
+ )
+ {
+ $too_long = 0;
+ DEBUG_BREAKPOINTS && do {
+ $Msg .= " :do_not_strand next='$next_nonblank_type'";
+ };
}
- }
- # Field width parameters
- my $pair_width = ( $max_length[0] + $max_length[1] );
- my $max_width =
- ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
-
- # Number of free columns across the page width for laying out tables
- my $columns = table_columns_available($i_first_comma);
-
- # Estimated maximum number of fields which fit this space
- # This will be our first guess
- my $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even, $max_width,
- $pair_width );
- my $number_of_fields = $number_of_fields_max;
+ # we are done if...
+ if (
- # Find the best-looking number of fields
- # and make this our second guess if possible
- my ( $number_of_fields_best, $ri_ragged_break_list,
- $new_identifier_count )
- = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
- $max_width );
+ # ... no more space and we have a break
+ $too_long && $i_lowest >= 0
- if ( $number_of_fields_best != 0
- && $number_of_fields_best < $number_of_fields_max )
- {
- $number_of_fields = $number_of_fields_best;
+ # ... or no more tokens
+ || $i_test == $imax
+ )
+ {
+ DEBUG_BREAKPOINTS && do {
+ $Msg .=
+" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
+ };
+ last;
+ }
}
- # ----------------------------------------------------------------------
- # If we are crowded and the -lp option is being used, try to
- # undo some indentation
- # ----------------------------------------------------------------------
- if (
- $rOpts_line_up_parentheses
- && (
- $number_of_fields == 0
- || ( $number_of_fields == 1
- && $number_of_fields != $number_of_fields_best )
- )
- )
- {
- my $available_spaces = get_available_spaces_to_go($i_first_comma);
- if ( $available_spaces > 0 ) {
-
- my $spaces_wanted = $max_width - $columns; # for 1 field
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint
+ # Now decide exactly where to put the breakpoint
+ #-------------------------------------------------------
- if ( $number_of_fields_best == 0 ) {
- $number_of_fields_best =
- get_maximum_fields_wanted( \@item_lengths );
- }
+ # it's always ok to break at imax if no other break was found
+ if ( $i_lowest < 0 ) { $i_lowest = $imax }
- if ( $number_of_fields_best != 1 ) {
- my $spaces_wanted_2 =
- 1 + $pair_width - $columns; # for 2 fields
- if ( $available_spaces > $spaces_wanted_2 ) {
- $spaces_wanted = $spaces_wanted_2;
- }
- }
+ # semi-final index calculation
+ my $i_next_nonblank = $inext_to_go[$i_lowest];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- if ( $spaces_wanted > 0 ) {
- my $deleted_spaces =
- reduce_lp_indentation( $i_first_comma, $spaces_wanted );
+ #-------------------------------------------------------
+ # ?/: rule 1 : if a break here will separate a '?' on this
+ # line from its closing ':', then break at the '?' instead.
+ #-------------------------------------------------------
+ foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
+ next unless ( $tokens_to_go[$i] eq '?' );
- # redo the math
- if ( $deleted_spaces > 0 ) {
- $columns = table_columns_available($i_first_comma);
- $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even,
- $max_width, $pair_width );
- $number_of_fields = $number_of_fields_max;
+ # do not break if probable sequence of ?/: statements
+ next if ($is_colon_chain);
- if ( $number_of_fields_best == 1
- && $number_of_fields >= 1 )
- {
- $number_of_fields = $number_of_fields_best;
- }
- }
- }
- }
- }
+ # do not break if statement is broken by side comment
+ next
+ if ( $tokens_to_go[$max_index_to_go] eq '#'
+ && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
- # try for one column if two won't work
- if ( $number_of_fields <= 0 ) {
- $number_of_fields = int( $columns / $max_width );
- }
+ # no break needed if matching : is also on the line
+ next
+ if ( $mate_index_to_go[$i] >= 0
+ && $mate_index_to_go[$i] <= $i_next_nonblank );
- # The user can place an upper bound on the number of fields,
- # which can be useful for doing maintenance on tables
- if ( $rOpts_maximum_fields_per_table
- && $number_of_fields > $rOpts_maximum_fields_per_table )
- {
- $number_of_fields = $rOpts_maximum_fields_per_table;
+ $i_lowest = $i;
+ if ( $want_break_before{'?'} ) { $i_lowest-- }
+ last;
}
- # How many columns (characters) and lines would this container take
- # if no additional whitespace were added?
- my $packed_columns = token_sequence_length( $i_opening_paren + 1,
- $i_effective_last_comma + 1 );
- if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
- my $packed_lines = 1 + int( $packed_columns / $columns );
-
- # are we an item contained in an outer list?
- my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint:
+ # Break the line after the token with index i=$i_lowest
+ #-------------------------------------------------------
- if ( $number_of_fields <= 0 ) {
+ # final index calculation
+ $i_next_nonblank = $inext_to_go[$i_lowest];
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
-# #---------------------------------------------------------------
-# # We're in trouble. We can't find a single field width that works.
-# # There is no simple answer here; we may have a single long list
-# # item, or many.
-# #---------------------------------------------------------------
-#
-# In many cases, it may be best to not force a break if there is just one
-# comma, because the standard continuation break logic will do a better
-# job without it.
-#
-# In the common case that all but one of the terms can fit
-# on a single line, it may look better not to break open the
-# containing parens. Consider, for example
-#
-# $color =
-# join ( '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; }
-# keys %colors );
-#
-# which will look like this with the container broken:
-#
-# $color = join (
-# '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
-# );
-#
-# Here is an example of this rule for a long last term:
-#
-# log_message( 0, 256, 128,
-# "Number of routes in adj-RIB-in to be considered: $peercount" );
-#
-# And here is an example with a long first term:
-#
-# $s = sprintf(
-# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
-# $r, $pu, $ps, $cu, $cs, $tt
-# )
-# if $style eq 'all';
+ DEBUG_BREAKPOINTS
+ && print STDOUT
+"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
+ $Msg = "";
- my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
- my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
- my $long_first_term =
- excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
+ #-------------------------------------------------------
+ # ?/: rule 2 : if we break at a '?', then break at its ':'
+ #
+ # Note: this rule is also in sub scan_list to handle a break
+ # at the start and end of a line (in case breaks are dictated
+ # by side comments).
+ #-------------------------------------------------------
+ if ( $next_nonblank_type eq '?' ) {
+ $self->set_closing_breakpoint($i_next_nonblank);
+ }
+ elsif ( $types_to_go[$i_lowest] eq '?' ) {
+ $self->set_closing_breakpoint($i_lowest);
+ }
- # break at every comma ...
- if (
+ #-------------------------------------------------------
+ # ?/: rule 3 : if we break at a ':' then we save
+ # its location for further work below. We may need to go
+ # back and break at its '?'.
+ #-------------------------------------------------------
+ if ( $next_nonblank_type eq ':' ) {
+ push @i_colon_breaks, $i_next_nonblank;
+ }
+ elsif ( $types_to_go[$i_lowest] eq ':' ) {
+ push @i_colon_breaks, $i_lowest;
+ }
- # if requested by user or is best looking
- $number_of_fields_best == 1
+ # here we should set breaks for all '?'/':' pairs which are
+ # separated by this line
- # or if this is a sublist of a larger list
- || $in_hierarchical_list
+ $line_count++;
- # or if multiple commas and we don't have a long first or last
- # term
- || ( $comma_count > 1
- && !( $long_last_term || $long_first_term ) )
- )
- {
- foreach ( 0 .. $comma_count - 1 ) {
- set_forced_breakpoint( $rcomma_index->[$_] );
- }
- }
- elsif ($long_last_term) {
+ # save this line segment, after trimming blanks at the ends
+ push( @i_first,
+ ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
+ push( @i_last,
+ ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
- set_forced_breakpoint($i_last_comma);
- ${$rdo_not_break_apart} = 1 unless $must_break_open;
- }
- elsif ($long_first_term) {
+ # set a forced breakpoint at a container opening, if necessary, to
+ # signal a break at a closing container. Excepting '(' for now.
+ if (
+ (
+ $tokens_to_go[$i_lowest] eq '{'
+ || $tokens_to_go[$i_lowest] eq '['
+ )
+ && !$forced_breakpoint_to_go[$i_lowest]
+ )
+ {
+ $self->set_closing_breakpoint($i_lowest);
+ }
- set_forced_breakpoint($i_first_comma);
- }
- else {
+ # get ready to go again
+ $i_begin = $i_lowest + 1;
+ $last_break_strength = $lowest_strength;
+ $i_last_break = $i_lowest;
+ $leading_alignment_token = "";
+ $leading_alignment_type = "";
+ $lowest_next_token = '';
+ $lowest_next_type = 'b';
- # let breaks be defined by default bond strength logic
- }
- return;
+ if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
+ $i_begin++;
}
- # --------------------------------------------------------
- # We have a tentative field count that seems to work.
- # How many lines will this require?
- # --------------------------------------------------------
- my $formatted_lines = $item_count / ($number_of_fields);
- if ( $formatted_lines != int $formatted_lines ) {
- $formatted_lines = 1 + int $formatted_lines;
+ # update indentation size
+ if ( $i_begin <= $imax ) {
+ $leading_spaces = leading_spaces_to_go($i_begin);
+ DEBUG_BREAKPOINTS
+ && print STDOUT
+ "updating leading spaces to be $leading_spaces at i=$i_begin\n";
}
+ }
- # So far we've been trying to fill out to the right margin. But
- # compact tables are easier to read, so let's see if we can use fewer
- # fields without increasing the number of lines.
- $number_of_fields =
- compactify_table( $item_count, $number_of_fields, $formatted_lines,
- $odd_or_even );
-
- # How many spaces across the page will we fill?
- my $columns_per_line =
- ( int $number_of_fields / 2 ) * $pair_width +
- ( $number_of_fields % 2 ) * $max_width;
+ #-------------------------------------------------------
+ # END of main loop to set continuation breakpoints
+ # Now go back and make any necessary corrections
+ #-------------------------------------------------------
- my $formatted_columns;
+ #-------------------------------------------------------
+ # ?/: rule 4 -- if we broke at a ':', then break at
+ # corresponding '?' unless this is a chain of ?: expressions
+ #-------------------------------------------------------
+ if (@i_colon_breaks) {
- if ( $number_of_fields > 1 ) {
- $formatted_columns =
- ( $pair_width * ( int( $item_count / 2 ) ) +
- ( $item_count % 2 ) * $max_width );
- }
- else {
- $formatted_columns = $max_width * $item_count;
- }
- if ( $formatted_columns < $packed_columns ) {
- $formatted_columns = $packed_columns;
+ # using a simple method for deciding if we are in a ?/: chain --
+ # this is a chain if it has multiple ?/: pairs all in order;
+ # otherwise not.
+ # Note that if line starts in a ':' we count that above as a break
+ my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+
+ unless ($is_chain) {
+ my @insert_list = ();
+ foreach (@i_colon_breaks) {
+ my $i_question = $mate_index_to_go[$_];
+ if ( $i_question >= 0 ) {
+ if ( $want_break_before{'?'} ) {
+ $i_question = $iprev_to_go[$i_question];
+ }
+
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
+ $self->insert_additional_breaks( \@insert_list, \@i_first,
+ \@i_last );
+ }
}
+ }
+ return ( \@i_first, \@i_last );
+}
- my $unused_columns = $formatted_columns - $packed_columns;
+###########################################
+# CODE SECTION 11: Code to break long lists
+###########################################
- # set some empirical parameters to help decide if we should try to
- # align; high sparsity does not look good, especially with few lines
- my $sparsity = ($unused_columns) / ($formatted_columns);
- my $max_allowed_sparsity =
- ( $item_count < 3 ) ? 0.1
- : ( $packed_lines == 1 ) ? 0.15
- : ( $packed_lines == 2 ) ? 0.4
- : 0.7;
+{ ## begin closure scan_list
- # Begin check for shortcut methods, which avoid treating a list
- # as a table for relatively small parenthesized lists. These
- # are usually easier to read if not formatted as tables.
- if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
- && $opening_environment eq 'BLOCK' # not a sub-container
- && $opening_token eq '(' # is paren list
- )
- {
+ # These routines and variables are involved in finding good
+ # places to break long lists.
- # Shortcut method 1: for -lp and just one comma:
- # This is a no-brainer, just break at the comma.
- if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
- && !$must_break_open
- )
- {
- my $i_break = $rcomma_index->[0];
- set_forced_breakpoint($i_break);
- ${$rdo_not_break_apart} = 1;
- return;
+ my (
+ $block_type, $current_depth,
+ $depth, $i,
+ $i_last_nonblank_token, $last_colon_sequence_number,
+ $last_nonblank_token, $last_nonblank_type,
+ $last_nonblank_block_type, $last_old_breakpoint_count,
+ $minimum_depth, $next_nonblank_block_type,
+ $next_nonblank_token, $next_nonblank_type,
+ $old_breakpoint_count, $starting_breakpoint_count,
+ $starting_depth, $token,
+ $type, $type_sequence,
+ );
- }
+ my (
+ @breakpoint_stack, @breakpoint_undo_stack,
+ @comma_index, @container_type,
+ @identifier_count_stack, @index_before_arrow,
+ @interrupted_list, @item_count_stack,
+ @last_comma_index, @last_dot_index,
+ @last_nonblank_type, @old_breakpoint_count_stack,
+ @opening_structure_index_stack, @rfor_semicolon_list,
+ @has_old_logical_breakpoints, @rand_or_list,
+ @i_equals, @override_cab3,
+ @type_sequence_stack,
+ );
- # method 2 is for most small ragged lists which might look
- # best if not displayed as a table.
- if (
- ( $number_of_fields == 2 && $item_count == 3 )
- || (
- $new_identifier_count > 0 # isn't all quotes
- && $sparsity > 0.15
- ) # would be fairly spaced gaps if aligned
- )
- {
+ # these arrays must retain values between calls
+ my ( @has_broken_sublist, @dont_align, @want_comma_break );
- my $break_count = set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ my $length_tol;
+ my $length_tol_boost;
- # NOTE: we should really use the true break count here,
- # which can be greater if there are large terms and
- # little space, but usually this will work well enough.
- unless ($must_break_open) {
+ sub initialize_scan_list {
+ @dont_align = ();
+ @has_broken_sublist = ();
+ @want_comma_break = ();
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
- }
- }
- return;
- }
+ ####################################################
+ # Set tolerances to prevent formatting instabilities
+ ####################################################
- } # end shortcut methods
+ # Define tolerances to use when checking if closed
+ # containers will fit on one line. This is necessary to avoid
+ # formatting instability. The basic tolerance is based on the
+ # following:
- # debug stuff
+ # - Always allow for at least one extra space after a closing token so
+ # that we do not strand a comma or semicolon. (oneline.t).
- FORMATTER_DEBUG_FLAG_SPARSE && do {
- print STDOUT
-"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
+ # - Use an increased line length tolerance when -ci > -i to avoid
+ # blinking states (case b923 and others).
+ $length_tol =
+ 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
- };
+ # In addition, it may be necessary to use a few extra tolerance spaces
+ # when -lp is used and/or when -xci is used. The history of this
+ # so far is as follows:
- #---------------------------------------------------------------
- # Compound List Rule 2:
- # If this list is too long for one line, and it is an item of a
- # larger list, then we must format it, regardless of sparsity
- # (ian.t). One reason that we have to do this is to trigger
- # Compound List Rule 1, above, which causes breaks at all commas of
- # all outer lists. In this way, the structure will be properly
- # displayed.
- #---------------------------------------------------------------
+ # FIX1: At least 3 characters were been found to be required for -lp
+ # to fixes cases b1059 b1063 b1117.
- # Decide if this list is too long for one line unless broken
- my $total_columns = table_columns_available($i_opening_paren);
- my $too_long = $packed_columns > $total_columns;
+ # FIX2: Further testing showed that we need a total of 3 extra spaces
+ # when -lp is set for non-lists, and at least 2 spaces when -lp and
+ # -xci are set.
+ # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
+ # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
+ # b1165
- # For a paren list, include the length of the token just before the
- # '(' because this is likely a sub call, and we would have to
- # include the sub name on the same line as the list. This is still
- # imprecise, but not too bad. (steve.t)
- if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
+ # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
+ # 'find_token_starting_list' to go back before an initial blank space.
+ # This fixed these three cases, and allowed the tolerances to be
+ # reduced to continue to fix all other known cases of instability.
+ # This gives the current tolerance formulation (note that
+ # variable $length_tol_boost is always 0 now):
- $too_long = excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
+ $length_tol_boost = 0;
+ if ($rOpts_line_up_parentheses) {
- # FIXME: For an item after a '=>', try to include the length of the
- # thing before the '=>'. This is crude and should be improved by
- # actually looking back token by token.
- if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
- my $i_opening_minus = $i_opening_paren - 4;
- if ( $i_opening_minus >= 0 ) {
- $too_long = excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
+ if ( $rOpts->{'extended-continuation-indentation'} ) {
+ $length_tol += 2;
+ $length_tol_boost = 0; # was 1 for FIX2, 0 for FIX3
+ }
+ else {
+ $length_tol_boost = 0; # was 3 for FIX2, 0 for FIX3
}
}
- # Always break lists contained in '[' and '{' if too long for 1 line,
- # and always break lists which are too long and part of a more complex
- # structure.
- my $must_break_open_container = $must_break_open
- || ( $too_long
- && ( $in_hierarchical_list || $opening_token ne '(' ) );
+ # The -xci option alone also needs a slightly larger tol for non-lists
+ elsif ( $rOpts->{'extended-continuation-indentation'} ) {
+ $length_tol_boost = 0; # was 1 for FIX2, 0 for FIX3
+ }
+ return;
+ }
-#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+ # routine to define essential variables when we go 'up' to
+ # a new depth
+ sub check_for_new_minimum_depth {
+ my $depth = shift;
+ if ( $depth < $minimum_depth ) {
- #---------------------------------------------------------------
- # The main decision:
- # Now decide if we will align the data into aligned columns. Do not
- # attempt to align columns if this is a tiny table or it would be
- # too spaced. It seems that the more packed lines we have, the
- # sparser the list that can be allowed and still look ok.
- #---------------------------------------------------------------
+ $minimum_depth = $depth;
- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
- || ( $formatted_lines < 2 )
- || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
- )
- {
+ # these arrays need not retain values between calls
+ $breakpoint_stack[$depth] = $starting_breakpoint_count;
+ $container_type[$depth] = "";
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 1;
+ $item_count_stack[$depth] = 0;
+ $last_nonblank_type[$depth] = "";
+ $opening_structure_index_stack[$depth] = -1;
- #---------------------------------------------------------------
- # too sparse: would look ugly if aligned in a table;
- #---------------------------------------------------------------
+ $breakpoint_undo_stack[$depth] = undef;
+ $comma_index[$depth] = undef;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $old_breakpoint_count_stack[$depth] = undef;
+ $has_old_logical_breakpoints[$depth] = 0;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
- # use old breakpoints if this is a 'big' list
- # FIXME: goal is to improve set_ragged_breakpoints so that
- # this is not necessary.
- if ( $packed_lines > 2 && $item_count > 10 ) {
- write_logfile_entry("List sparse: using old breakpoints\n");
- copy_old_breakpoints( $i_first_comma, $i_last_comma );
+ # these arrays must retain values between calls
+ if ( !defined( $has_broken_sublist[$depth] ) ) {
+ $dont_align[$depth] = 0;
+ $has_broken_sublist[$depth] = 0;
+ $want_comma_break[$depth] = 0;
}
+ }
+ return;
+ }
- # let the continuation logic handle it if 2 lines
+ # routine to decide which commas to break at within a container;
+ # returns:
+ # $bp_count = number of comma breakpoints set
+ # $do_not_break_apart = a flag indicating if container need not
+ # be broken open
+ sub set_comma_breakpoints {
+
+ my ( $self, $dd ) = @_;
+ my $bp_count = 0;
+ my $do_not_break_apart = 0;
+
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
+
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ $self->do_uncontained_comma_breaks($dd);
+ }
+
+ # handle commas within containers...
else {
+ my $fbc = get_forced_breakpoint_count();
- my $break_count = set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
- unless ($must_break_open_container) {
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
+ $self->set_comma_breakpoints_do(
{
- ${$rdo_not_break_apart} = 1;
+ depth => $dd,
+ i_opening_paren => $opening_structure_index_stack[$dd],
+ i_closing_paren => $i,
+ item_count => $item_count_stack[$dd],
+ identifier_count => $identifier_count_stack[$dd],
+ rcomma_index => $comma_index[$dd],
+ next_nonblank_type => $next_nonblank_type,
+ list_type => $container_type[$dd],
+ interrupted => $interrupted_list[$dd],
+ rdo_not_break_apart => \$do_not_break_apart,
+ must_break_open => $must_break_open,
+ has_broken_sublist => $has_broken_sublist[$dd],
}
- }
+ );
+ $bp_count = get_forced_breakpoint_count() - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
}
- return;
}
+ return ( $bp_count, $do_not_break_apart );
+ }
- #---------------------------------------------------------------
- # go ahead and format as a table
- #---------------------------------------------------------------
- write_logfile_entry(
- "List: auto formatting with $number_of_fields fields/row\n");
-
- my $j_first_break =
- $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
+ # These types are excluded at breakpoints to prevent blinking
+ my %is_uncontained_comma_break_excluded_type;
- for (
- my $j = $j_first_break ;
- $j < $comma_count ;
- $j += $number_of_fields
- )
- {
- my $i = $rcomma_index->[$j];
- set_forced_breakpoint($i);
- }
- return;
+ BEGIN {
+ my @q = qw< L { ( [ ? : + - >;
+ @is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
}
-}
-sub study_list_complexity {
+ sub do_uncontained_comma_breaks {
- # Look for complex tables which should be formatted with one term per line.
- # Returns the following:
- #
- # \@i_ragged_break_list = list of good breakpoints to avoid lines
- # which are hard to read
- # $number_of_fields_best = suggested number of fields based on
- # complexity; = 0 if any number may be used.
- #
- my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
- my $item_count = @{$ri_term_begin};
- my $complex_item_count = 0;
- my $number_of_fields_best = $rOpts_maximum_fields_per_table;
- my $i_max = @{$ritem_lengths} - 1;
- ##my @item_complexity;
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my ( $self, $dd ) = @_;
+ my $bias = -.01;
+ my $old_comma_break_count = 0;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $old_comma_break_count++;
+ $bond_strength_to_go[$ii] = $bias;
- my $i_last_last_break = -3;
- my $i_last_break = -2;
- my @i_ragged_break_list;
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
- my $definitely_complex = 30;
- my $definitely_simple = 12;
- my $quote_count = 0;
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) there was exactly one old break before the first comma break
+ # (3) OLD: there are multiple old comma breaks
+ # (3) NEW: there are one or more old comma breaks (see return example)
+ # (4) the first comma is at the starting level ...
+ # ... fixes cases b064 b065 b068 b210 b747
+ #
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ #
+ # Another example, just one comma, where we will break after
+ # the return:
+ # return
+ # $x * cos($a) - $y * sin($a),
+ # $x * sin($a) + $y * cos($a);
- for my $i ( 0 .. $i_max ) {
- my $ib = $ri_term_begin->[$i];
- my $ie = $ri_term_end->[$i];
+ # Breaking a print statement:
+ # print SAVEOUT
+ # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
+ # ( $? & 128 ) ? " -- core dumped" : "", "\n";
+ #
+ # But we will not force a break after the opening paren here
+ # (causes a blinker):
+ # $heap->{stream}->set_output_filter(
+ # poe::filter::reference->new('myotherfreezer') ),
+ # ;
+ #
+ my $i_first_comma = $comma_index[$dd]->[0];
+ my $level_comma = $levels_to_go[$i_first_comma];
+ if ( $old_breakpoint_to_go[$i_first_comma]
+ && $level_comma == $levels_to_go[0] )
+ {
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
- # define complexity: start with the actual term length
- my $weighted_length = ( $ritem_lengths->[$i] - 2 );
+ # Changed rule from multiple old commas to just one here:
+ if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
+ {
+ my $ibreakm = $ibreak;
+ $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
+ if ( $ibreakm >= 0 ) {
- ##TBD: join types here and check for variations
- ##my $str=join "", @tokens_to_go[$ib..$ie];
+ # In order to avoid blinkers we have to be fairly
+ # restrictive:
- my $is_quote = 0;
- if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
- $is_quote = 1;
- $quote_count++;
- }
- elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
- $quote_count++;
- }
+ # Rule 1: Do not to break before an opening token
+ # Rule 2: avoid breaking at ternary operators
+ # (see b931, which is similar to the above print example)
+ # Rule 3: Do not break at chain operators to fix case b1119
+ # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
- if ( $ib eq $ie ) {
- if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- else {
- }
- }
- else {
- if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
- $weighted_length += 4;
+ # Be sure to test any changes to these rules against runs
+ # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
+ # series.
+
+ my $typem = $types_to_go[$ibreakm];
+ if ( !$is_uncontained_comma_break_excluded_type{$typem} ) {
+ $self->set_forced_breakpoint($ibreak);
+ }
+ }
}
}
+ return;
+ }
- # add weight for extra tokens.
- $weighted_length += 2 * ( $ie - $ib );
+ my %is_logical_container;
+ my %quick_filter;
-## my $BUB = join '', @tokens_to_go[$ib..$ie];
-## print "# COMPLEXITY:$weighted_length $BUB\n";
+ BEGIN {
+ my @q = qw# if elsif unless while and or err not && | || ? : ! #;
+ @is_logical_container{@q} = (1) x scalar(@q);
-##push @item_complexity, $weighted_length;
+ # This filter will allow most tokens to skip past a section of code
+ %quick_filter = %is_assignment;
+ @q = qw# => . ; < > ~ #;
+ push @q, ',';
+ @quick_filter{@q} = (1) x scalar(@q);
+ }
- # now mark a ragged break after this item it if it is 'long and
- # complex':
- if ( $weighted_length >= $definitely_complex ) {
+ sub set_for_semicolon_breakpoints {
+ my ( $self, $dd ) = @_;
+ foreach ( @{ $rfor_semicolon_list[$dd] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ return;
+ }
- # if we broke after the previous term
- # then break before it too
- if ( $i_last_break == $i - 1
- && $i > 1
- && $i_last_last_break != $i - 2 )
- {
+ sub set_logical_breakpoints {
+ my ( $self, $dd ) = @_;
+ if (
+ $item_count_stack[$dd] == 0
+ && $is_logical_container{ $container_type[$dd] }
- ## FIXME: don't strand a small term
- pop @i_ragged_break_list;
- push @i_ragged_break_list, $i - 2;
- push @i_ragged_break_list, $i - 1;
- }
+ || $has_old_logical_breakpoints[$dd]
+ )
+ {
- push @i_ragged_break_list, $i;
- $i_last_last_break = $i_last_break;
- $i_last_break = $i;
- }
+ # Look for breaks in this order:
+ # 0 1 2 3
+ # or and || &&
+ foreach my $i ( 0 .. 3 ) {
+ if ( $rand_or_list[$dd][$i] ) {
+ foreach ( @{ $rand_or_list[$dd][$i] } ) {
+ $self->set_forced_breakpoint($_);
+ }
- # don't break before a small last term -- it will
- # not look good on a line by itself.
- elsif ($i == $i_max
- && $i_last_break == $i - 1
- && $weighted_length <= $definitely_simple )
- {
- pop @i_ragged_break_list;
+ # break at any 'if' and 'unless' too
+ foreach ( @{ $rand_or_list[$dd][4] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ $rand_or_list[$dd] = [];
+ last;
+ }
+ }
}
+ return;
}
- my $identifier_count = $i_max + 1 - $quote_count;
+ sub is_unbreakable_container {
- # Need more tuning here..
- if ( $max_width > 12
- && $complex_item_count > $item_count / 2
- && $number_of_fields_best != 2 )
- {
- $number_of_fields_best = 1;
+ # never break a container of one of these types
+ # because bad things can happen (map1.t)
+ my $dd = shift;
+ return $is_sort_map_grep{ $container_type[$dd] };
}
- return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
-}
-
-sub get_maximum_fields_wanted {
+ sub scan_list {
- # Not all tables look good with more than one field of items.
- # This routine looks at a table and decides if it should be
- # formatted with just one field or not.
- # This coding is still under development.
- my ($ritem_lengths) = @_;
+ my ( $self, $is_long_line ) = @_;
- my $number_of_fields_best = 0;
+ # This routine is responsible for setting line breaks for all lists,
+ # so that hierarchical structure can be displayed and so that list
+ # items can be vertically aligned. The output of this routine is
+ # stored in the array @forced_breakpoint_to_go, which is used to set
+ # final breakpoints.
- # For just a few items, we tentatively assume just 1 field.
- my $item_count = @{$ritem_lengths};
- if ( $item_count <= 5 ) {
- $number_of_fields_best = 1;
- }
+ # It is called once per batch if the batch is a list.
+ my $rLL = $self->[_rLL_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
- # For larger tables, look at it both ways and see what looks best
- else {
+ $starting_depth = $nesting_depth_to_go[0];
- my $is_odd = 1;
- my @max_length = ( 0, 0 );
- my @last_length_2 = ( undef, undef );
- my @first_length_2 = ( undef, undef );
- my $last_length = undef;
- my $total_variation_1 = 0;
- my $total_variation_2 = 0;
- my @total_variation_2 = ( 0, 0 );
+ $block_type = ' ';
+ $current_depth = $starting_depth;
+ $i = -1;
+ $last_colon_sequence_number = -1;
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_nonblank_block_type = ' ';
+ $last_old_breakpoint_count = 0;
+ $minimum_depth = $current_depth + 1; # forces update in check below
+ $old_breakpoint_count = 0;
+ $starting_breakpoint_count = get_forced_breakpoint_count();
+ $token = ';';
+ $type = ';';
+ $type_sequence = '';
- foreach my $j ( 0 .. $item_count - 1 ) {
-
- $is_odd = 1 - $is_odd;
- my $length = $ritem_lengths->[$j];
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
-
- if ( defined($last_length) ) {
- my $dl = abs( $length - $last_length );
- $total_variation_1 += $dl;
- }
- $last_length = $length;
-
- my $ll = $last_length_2[$is_odd];
- if ( defined($ll) ) {
- my $dl = abs( $length - $ll );
- $total_variation_2[$is_odd] += $dl;
- }
- else {
- $first_length_2[$is_odd] = $length;
- }
- $last_length_2[$is_odd] = $length;
- }
- $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
-
- my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
- unless ( $total_variation_2 < $factor * $total_variation_1 ) {
- $number_of_fields_best = 1;
- }
- }
- return ($number_of_fields_best);
-}
-
-sub table_columns_available {
- my $i_first_comma = shift;
- my $columns =
- maximum_line_length($i_first_comma) -
- leading_spaces_to_go($i_first_comma);
-
- # Patch: the vertical formatter does not line up lines whose lengths
- # exactly equal the available line length because of allowances
- # that must be made for side comments. Therefore, the number of
- # available columns is reduced by 1 character.
- $columns -= 1;
- return $columns;
-}
-
-sub maximum_number_of_fields {
-
- # how many fields will fit in the available space?
- my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
- my $max_pairs = int( $columns / $pair_width );
- my $number_of_fields = $max_pairs * 2;
- if ( $odd_or_even == 1
- && $max_pairs * $pair_width + $max_width <= $columns )
- {
- $number_of_fields++;
- }
- return $number_of_fields;
-}
+ my $total_depth_variation = 0;
+ my $i_old_assignment_break;
+ my $depth_last = $starting_depth;
-sub compactify_table {
+ check_for_new_minimum_depth($current_depth);
- # given a table with a certain number of fields and a certain number
- # of lines, see if reducing the number of fields will make it look
- # better.
- my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
- if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
- my $min_fields;
+ my $want_previous_breakpoint = -1;
- for (
- $min_fields = $number_of_fields ;
- $min_fields >= $odd_or_even
- && $min_fields * $formatted_lines >= $item_count ;
- $min_fields -= $odd_or_even
- )
- {
- $number_of_fields = $min_fields;
- }
- }
- return $number_of_fields;
-}
+ my $saw_good_breakpoint;
+ my $i_line_end = -1;
+ my $i_line_start = -1;
-sub set_ragged_breakpoints {
+ # loop over all tokens in this batch
+ while ( ++$i <= $max_index_to_go ) {
+ if ( $type ne 'b' ) {
+ $i_last_nonblank_token = $i - 1;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ $last_nonblank_block_type = $block_type;
+ } ## end if ( $type ne 'b' )
+ $type = $types_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $token = $tokens_to_go[$i];
+ $type_sequence = $type_sequence_to_go[$i];
+ my $next_type = $types_to_go[ $i + 1 ];
+ my $next_token = $tokens_to_go[ $i + 1 ];
+ my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- # Set breakpoints in a list that cannot be formatted nicely as a
- # table.
- my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
+ # set break if flag was set
+ if ( $want_previous_breakpoint >= 0 ) {
+ $self->set_forced_breakpoint($want_previous_breakpoint);
+ $want_previous_breakpoint = -1;
+ }
- my $break_count = 0;
- foreach ( @{$ri_ragged_break_list} ) {
- my $j = $ri_term_comma->[$_];
- if ($j) {
- set_forced_breakpoint($j);
- $break_count++;
- }
- }
- return $break_count;
-}
+ $last_old_breakpoint_count = $old_breakpoint_count;
-sub copy_old_breakpoints {
- my ( $i_first_comma, $i_last_comma ) = @_;
- for my $i ( $i_first_comma .. $i_last_comma ) {
- if ( $old_breakpoint_to_go[$i] ) {
- set_forced_breakpoint($i);
- }
- }
- return;
-}
+ # Fixed for case b1097 to not consider old breaks at highly
+ # stressed locations, such as types 'L' and 'R'. It might be
+ # useful to generalize this concept in the future by looking at
+ # actual bond strengths.
+ if ( $old_breakpoint_to_go[$i]
+ && $type ne 'L'
+ && $next_nonblank_type ne 'R' )
+ {
+ $i_line_end = $i;
+ $i_line_start = $i_next_nonblank;
-sub set_nobreaks {
- my ( $i, $j ) = @_;
- if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
+ $old_breakpoint_count++;
- FORMATTER_DEBUG_FLAG_NOBREAK && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
- };
+ # Break before certain keywords if user broke there and
+ # this is a 'safe' break point. The idea is to retain
+ # any preferred breaks for sequential list operations,
+ # like a schwartzian transform.
+ if ($rOpts_break_at_old_keyword_breakpoints) {
+ if (
+ $next_nonblank_type eq 'k'
+ && $is_keyword_returning_list{$next_nonblank_token}
+ && ( $type =~ /^[=\)\]\}Riw]$/
+ || $type eq 'k'
+ && $is_keyword_returning_list{$token} )
+ )
+ {
- @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
- }
+ # we actually have to set this break next time through
+ # the loop because if we are at a closing token (such
+ # as '}') which forms a one-line block, this break might
+ # get undone.
- # shouldn't happen; non-critical error
- else {
- FORMATTER_DEBUG_FLAG_NOBREAK && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
- "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
- };
- }
- return;
-}
+ # And do not do this at an equals if the user wants
+ # breaks before an equals (blinker cases b434 b903)
+ unless ( $type eq '=' && $want_break_before{$type} ) {
+ $want_previous_breakpoint = $i;
+ }
+ } ## end if ( $next_nonblank_type...)
+ } ## end if ($rOpts_break_at_old_keyword_breakpoints)
-sub set_fake_breakpoint {
+ # Break before attributes if user broke there
+ if ($rOpts_break_at_old_attribute_breakpoints) {
+ if ( $next_nonblank_type eq 'A' ) {
+ $want_previous_breakpoint = $i;
+ }
+ }
- # Just bump up the breakpoint count as a signal that there are breaks.
- # This is useful if we have breaks but may want to postpone deciding where
- # to make them.
- $forced_breakpoint_count++;
- return;
-}
+ # remember an = break as possible good break point
+ if ( $is_assignment{$type} ) {
+ $i_old_assignment_break = $i;
+ }
+ elsif ( $is_assignment{$next_nonblank_type} ) {
+ $i_old_assignment_break = $i_next_nonblank;
+ }
+ } ## end if ( $old_breakpoint_to_go...)
-sub set_forced_breakpoint {
- my $i = shift;
+ next if ( $type eq 'b' );
+ $depth = $nesting_depth_to_go[ $i + 1 ];
- return unless defined $i && $i >= 0;
+ $total_depth_variation += abs( $depth - $depth_last );
+ $depth_last = $depth;
- # no breaks between welded tokens
- return if ( weld_len_right_to_go($i) );
+ # safety check - be sure we always break after a comment
+ # Shouldn't happen .. an error here probably means that the
+ # nobreak flag did not get turned off correctly during
+ # formatting.
+ if ( $type eq '#' ) {
+ if ( $i != $max_index_to_go ) {
+ warning(
+"Non-fatal program bug: backup logic required to break after a comment\n"
+ );
+ report_definite_bug();
+ $nobreak_to_go[$i] = 0;
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $i != $max_index_to_go)
+ } ## end if ( $type eq '#' )
- # when called with certain tokens, use bond strengths to decide
- # if we break before or after it
- my $token = $tokens_to_go[$i];
+ # Force breakpoints at certain tokens in long lines.
+ # Note that such breakpoints will be undone later if these tokens
+ # are fully contained within parens on a line.
+ if (
- if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
- if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
- }
+ # break before a keyword within a line
+ $type eq 'k'
+ && $i > 0
- # breaks are forced before 'if' and 'unless'
- elsif ( $is_if_unless{$token} ) { $i-- }
+ # if one of these keywords:
+ # /^(if|unless|while|until|for)$/
+ && $is_if_unless_while_until_for{$token}
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
+ # but do not break at something like '1 while'
+ && ( $last_nonblank_type ne 'n' || $i > 2 )
- FORMATTER_DEBUG_FLAG_FORCE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
- };
+ # and let keywords follow a closing 'do' brace
+ && $last_nonblank_block_type ne 'do'
- if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
- $forced_breakpoint_to_go[$i_nonblank] = 1;
+ && (
+ $is_long_line
- if ( $i_nonblank > $index_max_forced_break ) {
- $index_max_forced_break = $i_nonblank;
- }
- $forced_breakpoint_count++;
- $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
- $i_nonblank;
+ # or container is broken (by side-comment, etc)
+ || ( $next_nonblank_token eq '('
+ && $mate_index_to_go[$i_next_nonblank] < $i )
+ )
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ } ## end if ( $type eq 'k' && $i...)
- # if we break at an opening container..break at the closing
- if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
- set_closing_breakpoint($i_nonblank);
+ # remember locations of '||' and '&&' for possible breaks if we
+ # decide this is a long logical expression.
+ if ( $type eq '||' ) {
+ push @{ $rand_or_list[$depth][2] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end elsif ( $type eq '||' )
+ elsif ( $type eq '&&' ) {
+ push @{ $rand_or_list[$depth][3] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end elsif ( $type eq '&&' )
+ elsif ( $type eq 'f' ) {
+ push @{ $rfor_semicolon_list[$depth] }, $i;
}
- }
- }
- return;
-}
+ elsif ( $type eq 'k' ) {
+ if ( $token eq 'and' ) {
+ push @{ $rand_or_list[$depth][1] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end if ( $token eq 'and' )
-sub clear_breakpoint_undo_stack {
- $forced_breakpoint_undo_count = 0;
- return;
-}
+ # break immediately at 'or's which are probably not in a logical
+ # block -- but we will break in logical breaks below so that
+ # they do not add to the forced_breakpoint_count
+ elsif ( $token eq 'or' ) {
+ push @{ $rand_or_list[$depth][0] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ if ( $is_logical_container{ $container_type[$depth] } ) {
+ }
+ else {
+ if ($is_long_line) { $self->set_forced_breakpoint($i) }
+ elsif ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $saw_good_breakpoint = 1;
+ }
+ } ## end else [ if ( $is_logical_container...)]
+ } ## end elsif ( $token eq 'or' )
+ elsif ( $token eq 'if' || $token eq 'unless' ) {
+ push @{ $rand_or_list[$depth][4] }, $i;
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $self->set_forced_breakpoint($i);
+ }
+ } ## end elsif ( $token eq 'if' ||...)
+ } ## end elsif ( $type eq 'k' )
+ elsif ( $is_assignment{$type} ) {
+ $i_equals[$depth] = $i;
+ }
-sub undo_forced_breakpoint_stack {
+ if ($type_sequence) {
- my $i_start = shift;
- if ( $i_start < 0 ) {
- $i_start = 0;
- my ( $a, $b, $c ) = caller();
- warning(
-"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
- );
- }
+ # handle any postponed closing breakpoints
+ if ( $is_closing_sequence_token{$token} ) {
+ if ( $type eq ':' ) {
+ $last_colon_sequence_number = $type_sequence;
- while ( $forced_breakpoint_undo_count > $i_start ) {
- my $i =
- $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- $forced_breakpoint_to_go[$i] = 0;
- $forced_breakpoint_count--;
+ # retain break at a ':' line break
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_ternary_breakpoints )
+ {
- FORMATTER_DEBUG_FLAG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
- };
- }
+ $self->set_forced_breakpoint($i);
- # shouldn't happen, but not a critical error
- else {
- FORMATTER_DEBUG_FLAG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
- };
- }
- }
- return;
-}
+ # break at previous '='
+ if ( $i_equals[$depth] > 0 ) {
+ $self->set_forced_breakpoint(
+ $i_equals[$depth] );
+ $i_equals[$depth] = -1;
+ }
+ } ## end if ( ( $i == $i_line_start...))
+ } ## end if ( $type eq ':' )
+ if ( has_postponed_breakpoint($type_sequence) ) {
+ my $inc = ( $type eq ':' ) ? 0 : 1;
+ $self->set_forced_breakpoint( $i - $inc );
+ }
+ } ## end if ( $is_closing_sequence_token{$token} )
-sub sync_token_K {
- my ( $self, $i ) = @_;
+ # set breaks at ?/: if they will get separated (and are
+ # not a ?/: chain), or if the '?' is at the end of the
+ # line
+ elsif ( $token eq '?' ) {
+ my $i_colon = $mate_index_to_go[$i];
+ if (
+ $i_colon <= 0 # the ':' is not in this batch
+ || $i == 0 # this '?' is the first token of the line
+ || $i ==
+ $max_index_to_go # or this '?' is the last token
+ )
+ {
- # Keep tokens in the rLL array in sync with the _to_go array
- my $rLL = $self->{rLL};
- my $K = $K_to_go[$i];
- if ( defined($K) ) {
- $rLL->[$K]->[_TOKEN_] = $tokens_to_go[$i];
- }
- else {
- # shouldn't happen
- }
- return;
-}
+ # don't break at a '?' if preceded by ':' on
+ # this line of previous ?/: pair on this line.
+ # This is an attempt to preserve a chain of ?/:
+ # expressions (elsif2.t). And don't break if
+ # this has a side comment.
+ $self->set_forced_breakpoint($i)
+ unless (
+ $type_sequence == (
+ $last_colon_sequence_number +
+ TYPE_SEQUENCE_INCREMENT
+ )
+ || $tokens_to_go[$max_index_to_go] eq '#'
+ );
+ $self->set_closing_breakpoint($i);
+ } ## end if ( $i_colon <= 0 ||...)
+ } ## end elsif ( $token eq '?' )
+ } ## end if ($type_sequence)
-{ # begin recombine_breakpoints
+#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
- my %is_amp_amp;
- my %is_ternary;
- my %is_math_op;
- my %is_plus_minus;
- my %is_mult_div;
+ #------------------------------------------------------------
+ # Handle Increasing Depth..
+ #
+ # prepare for a new list when depth increases
+ # token $i is a '(','{', or '['
+ #------------------------------------------------------------
+ if ( $depth > $current_depth ) {
- BEGIN {
+ $type_sequence_stack[$depth] = $type_sequence;
+ $override_cab3[$depth] =
+ $rOpts_comma_arrow_breakpoints == 3
+ && $type_sequence
+ && $self->[_roverride_cab3_]->{$type_sequence};
+ $breakpoint_stack[$depth] = get_forced_breakpoint_count();
+ $breakpoint_undo_stack[$depth] =
+ get_forced_breakpoint_undo_count();
+ $has_broken_sublist[$depth] = 0;
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 0;
+ $item_count_stack[$depth] = 0;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $last_nonblank_type[$depth] = $last_nonblank_type;
+ $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
+ $opening_structure_index_stack[$depth] = $i;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
+ $want_comma_break[$depth] = 0;
+ $container_type[$depth] =
- my @q;
- @q = qw( && || );
- @is_amp_amp{@q} = (1) x scalar(@q);
+ # k => && || ? : .
+ $is_container_label_type{$last_nonblank_type}
+ ? $last_nonblank_token
+ : "";
+ $has_old_logical_breakpoints[$depth] = 0;
- @q = qw( ? : );
- @is_ternary{@q} = (1) x scalar(@q);
+ # if line ends here then signal closing token to break
+ if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
+ {
+ $self->set_closing_breakpoint($i);
+ }
- @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
+ # Not all lists of values should be vertically aligned..
+ $dont_align[$depth] =
- @q = qw( + - );
- @is_plus_minus{@q} = (1) x scalar(@q);
+ # code BLOCKS are handled at a higher level
+ ( $block_type ne "" )
- @q = qw( * / );
- @is_mult_div{@q} = (1) x scalar(@q);
- }
+ # certain paren lists
+ || ( $type eq '(' ) && (
- sub DUMP_BREAKPOINTS {
+ # it does not usually look good to align a list of
+ # identifiers in a parameter list, as in:
+ # my($var1, $var2, ...)
+ # (This test should probably be refined, for now I'm just
+ # testing for any keyword)
+ ( $last_nonblank_type eq 'k' )
- # Debug routine to dump current breakpoints...not normally called
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $ri_beg, $ri_end, $msg ) = @_;
- print STDERR "----Dumping breakpoints from: $msg----\n";
- for my $n ( 0 .. @{$ri_end} - 1 ) {
- my $ibeg = $ri_beg->[$n];
- my $iend = $ri_end->[$n];
- my $text = "";
- foreach my $i ( $ibeg .. $iend ) {
- $text .= $tokens_to_go[$i];
- }
- print STDERR "$n ($ibeg:$iend) $text\n";
- }
- print STDERR "----\n";
- return;
- }
+ # a trailing '(' usually indicates a non-list
+ || ( $next_nonblank_type eq '(' )
+ );
- sub delete_one_line_semicolons {
+ # patch to outdent opening brace of long if/for/..
+ # statements (like this one). See similar coding in
+ # set_continuation breaks. We have also catch it here for
+ # short line fragments which otherwise will not go through
+ # set_continuation_breaks.
+ if (
+ $block_type
- my ( $self, $ri_beg, $ri_end ) = @_;
- my $rLL = $self->{rLL};
- my $K_opening_container = $self->{K_opening_container};
+ # if we have the ')' but not its '(' in this batch..
+ && ( $last_nonblank_token eq ')' )
+ && $mate_index_to_go[$i_last_nonblank_token] < 0
- # Walk down the lines of this batch and delete any semicolons
- # terminating one-line blocks;
- my $nmax = @{$ri_end} - 1;
+ # and user wants brace to left
+ && !$rOpts->{'opening-brace-always-on-right'}
- foreach my $n ( 0 .. $nmax ) {
- my $i_beg = $ri_beg->[$n];
- my $i_e = $ri_end->[$n];
- my $K_beg = $K_to_go[$i_beg];
- my $K_e = $K_to_go[$i_e];
- my $K_end = $K_e;
- my $type_end = $rLL->[$K_end]->[_TYPE_];
- if ( $type_end eq '#' ) {
- $K_end = $self->K_previous_nonblank($K_end);
- if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
- }
+ && ( $type eq '{' ) # should be true
+ && ( $token eq '{' ) # should be true
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ } ## end if ( $block_type && ( ...))
+ } ## end if ( $depth > $current_depth)
- # we are looking for a line ending in closing brace
- next
- unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
+ #------------------------------------------------------------
+ # Handle Decreasing Depth..
+ #
+ # finish off any old list when depth decreases
+ # token $i is a ')','}', or ']'
+ #------------------------------------------------------------
+ elsif ( $depth < $current_depth ) {
- # ...and preceded by a semicolon on the same line
- my $K_semicolon = $self->K_previous_nonblank($K_end);
- my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
- next if ( $i_semicolon <= $i_beg );
- next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
+ check_for_new_minimum_depth($depth);
- # safety check - shouldn't happen
- if ( $types_to_go[$i_semicolon] ne ';' ) {
- Fault("unexpected type looking for semicolon, ignoring");
- next;
- }
+ # force all outer logical containers to break after we see on
+ # old breakpoint
+ $has_old_logical_breakpoints[$depth] ||=
+ $has_old_logical_breakpoints[$current_depth];
- # ... with the corresponding opening brace on the same line
- my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
- my $K_opening = $K_opening_container->{$type_sequence};
- my $i_opening = $i_beg + ( $K_opening - $K_beg );
- next if ( $i_opening < $i_beg );
+ # Patch to break between ') {' if the paren list is broken.
+ # There is similar logic in set_continuation_breaks for
+ # non-broken lists.
+ if ( $token eq ')'
+ && $next_nonblank_block_type
+ && $interrupted_list[$current_depth]
+ && $next_nonblank_type eq '{'
+ && !$rOpts->{'opening-brace-always-on-right'} )
+ {
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $token eq ')' && ...
- # ... and only one semicolon between these braces
- my $semicolon_count = 0;
- foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
- if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
- $semicolon_count++;
- last;
- }
- }
- next if ($semicolon_count);
+#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
- # ...ok, then make the semicolon invisible
- $tokens_to_go[$i_semicolon] = "";
- }
- return;
- }
+ # set breaks at commas if necessary
+ my ( $bp_count, $do_not_break_apart ) =
+ $self->set_comma_breakpoints($current_depth);
- sub unmask_phantom_semicolons {
-
- my ( $self, $ri_beg, $ri_end ) = @_;
-
- # Walk down the lines of this batch and unmask any invisible line-ending
- # semicolons. They were placed by sub respace_tokens but we only now
- # know if we actually need them.
-
- my $nmax = @{$ri_end} - 1;
- foreach my $n ( 0 .. $nmax ) {
-
- my $i = $ri_end->[$n];
- if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
-
- $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
- $self->sync_token_K($i);
-
- my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
- note_added_semicolon($line_number);
- }
- }
- return;
- }
-
- sub recombine_breakpoints {
-
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Sometimes small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine.
- #
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $ri_beg, $ri_end ) = @_;
+ my $i_opening = $opening_structure_index_stack[$current_depth];
+ my $saw_opening_structure = ( $i_opening >= 0 );
- # Make a list of all good joining tokens between the lines
- # n-1 and n.
- my @joint;
- my $nmax = @{$ri_end} - 1;
- for my $n ( 1 .. $nmax ) {
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
+ # this term is long if we had to break at interior commas..
+ my $is_long_term = $bp_count > 0;
- my ( $itok, $itokp, $itokm );
+ # If this is a short container with one or more comma arrows,
+ # then we will mark it as a long term to open it if requested.
+ # $rOpts_comma_arrow_breakpoints =
+ # 0 - open only if comma precedes closing brace
+ # 1 - stable: except for one line blocks
+ # 2 - try to form 1 line blocks
+ # 3 - ignore =>
+ # 4 - always open up if vt=0
+ # 5 - stable: even for one line blocks if vt=0
- foreach my $itest ( $iend_1, $ibeg_2 ) {
- my $type = $types_to_go[$itest];
- if ( $is_math_op{$type}
- || $is_amp_amp{$type}
- || $is_assignment{$type}
- || $type eq ':' )
- {
- $itok = $itest;
+ # PATCH: Modify the -cab flag if we are not processing a list:
+ # We only want the -cab flag to apply to list containers, so
+ # for non-lists we use the default and stable -cab=5 value.
+ # Fixes case b939a.
+ my $cab_flag = $rOpts_comma_arrow_breakpoints;
+ if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
+ $cab_flag = 5;
}
- }
- $joint[$n] = [$itok];
- }
-
- my $more_to_do = 1;
-
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @{$ri_end};
- my $reverse = 0;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $nmax = @{$ri_end} - 1;
-
- # Safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
-
- # Shouldn't happen because splice below decreases nmax on each
- # pass.
- Fault("Program bug-infinite loop in recombine breakpoints\n");
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $skip_Section_3;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
-
- # loop over all remaining lines in this batch
- for my $iter ( 1 .. $nmax ) {
-
- # alternating sweep direction gives symmetric results
- # for recombining lines which exceed the line length
- # such as eval {{{{.... }}}}
- my $n;
- if ($reverse) { $n = 1 + $nmax - $iter; }
- else { $n = $iter }
-
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # -----line $n-1--- | -----line $n-----
- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
- # ^
- # |
- # We want to decide if we should remove the line break
- # between the tokens at $iend_1 and $ibeg_2
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- #
- # beginning and ending tokens of the lines we are working on
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
- my $ibeg_nmax = $ri_beg->[$nmax];
-
- # combined line cannot be too long
- my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
- next if ( $excess > 0 );
-
- my $type_iend_1 = $types_to_go[$iend_1];
- my $type_iend_2 = $types_to_go[$iend_2];
- my $type_ibeg_1 = $types_to_go[$ibeg_1];
- my $type_ibeg_2 = $types_to_go[$ibeg_2];
-
- # terminal token of line 2 if any side comment is ignored:
- my $iend_2t = $iend_2;
- my $type_iend_2t = $type_iend_2;
-
- # some beginning indexes of other lines, which may not exist
- my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
- my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
- my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
-
- my $bs_tweak = 0;
-
- #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
- # $nesting_depth_to_go[$ibeg_1] );
-
- FORMATTER_DEBUG_FLAG_RECOMBINE && do {
- print STDERR
-"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
- };
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
+ if ( !$is_long_term
+ && $saw_opening_structure
+ && $is_opening_token{ $tokens_to_go[$i_opening] }
+ && $index_before_arrow[ $depth + 1 ] > 0
+ && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
+ )
+ {
+ $is_long_term =
+ $cab_flag == 4
+ || $cab_flag == 0 && $last_nonblank_token eq ','
+ || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
+ } ## end if ( !$is_long_term &&...)
- # a terminal '{' should stay where it is
- # unless preceded by a fat comma
- next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
+ # mark term as long if the length between opening and closing
+ # parens exceeds allowed line length
+ if ( !$is_long_term && $saw_opening_structure ) {
- if ( $type_iend_2 eq '#'
- && $iend_2 - $ibeg_2 >= 2
- && $types_to_go[ $iend_2 - 1 ] eq 'b' )
+ my $i_opening_minus =
+ $self->find_token_starting_list($i_opening);
+
+ my $excess =
+ $self->excess_line_length( $i_opening_minus, $i );
+
+ my $tol =
+ $length_tol_boost
+ && !$ris_list_by_seqno->{$type_sequence}
+ ? $length_tol + $length_tol_boost
+ : $length_tol;
+
+ # Patch to avoid blinking with -bbxi=2 and -cab=2
+ # in which variations in -ci cause unstable formatting
+ # in edge cases. We just always add one ci level so that
+ # the formatting is independent of the -BBX results.
+ # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
+ # b1161 b1166 b1167 b1168
+ if ( !$ci_levels_to_go[$i_opening]
+ && $rbreak_before_container_by_seqno->{$type_sequence} )
{
- $iend_2t = $iend_2 - 2;
- $type_iend_2t = $types_to_go[$iend_2t];
+ $tol += $rOpts->{'continuation-indentation'};
}
- $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
- }
-
- #----------------------------------------------------------
- # Recombine Section 0:
- # Examine the special token joining this line pair, if any.
- # Put as many tests in this section to avoid duplicate code and
- # to make formatting independent of whether breaks are to the
- # left or right of an operator.
- #----------------------------------------------------------
-
- my ($itok) = @{ $joint[$n] };
- if ($itok) {
+ $is_long_term = $excess + $tol > 0;
- # FIXME: Patch - may not be necessary
- my $iend_1 =
- $type_iend_1 eq 'b'
- ? $iend_1 - 1
- : $iend_1;
+ } ## end if ( !$is_long_term &&...)
- my $iend_2 =
- $type_iend_2 eq 'b'
- ? $iend_2 - 1
- : $iend_2;
- ## END PATCH
+ # We've set breaks after all comma-arrows. Now we have to
+ # undo them if this can be a one-line block
+ # (the only breakpoints set will be due to comma-arrows)
- my $type = $types_to_go[$itok];
+ if (
- if ( $type eq ':' ) {
+ # user doesn't require breaking after all comma-arrows
+ ( $cab_flag != 0 ) && ( $cab_flag != 4 )
- # do not join at a colon unless it disobeys the break request
- if ( $itok eq $iend_1 ) {
- next unless $want_break_before{$type};
- }
- else {
- $leading_amp_count++;
- next if $want_break_before{$type};
- }
- } ## end if ':'
+ # and if the opening structure is in this batch
+ && $saw_opening_structure
- # handle math operators + - * /
- elsif ( $is_math_op{$type} ) {
+ # and either on the same old line
+ && (
+ $old_breakpoint_count_stack[$current_depth] ==
+ $last_old_breakpoint_count
- # Combine these lines if this line is a single
- # number, or if it is a short term with same
- # operator as the previous line. For example, in
- # the following code we will combine all of the
- # short terms $A, $B, $C, $D, $E, $F, together
- # instead of leaving them one per line:
- # my $time =
- # $A * $B * $C * $D * $E * $F *
- # ( 2. * $eps * $sigma * $area ) *
- # ( 1. / $tcold**3 - 1. / $thot**3 );
+ # or user wants to form long blocks with arrows
+ || $cab_flag == 2
- # This can be important in math-intensive code.
+ # if -cab=3 is overridden then use -cab=2 behavior
+ || $cab_flag == 3 && $override_cab3[$current_depth]
+ )
- my $good_combo;
+ # and we made breakpoints between the opening and closing
+ && ( $breakpoint_undo_stack[$current_depth] <
+ get_forced_breakpoint_undo_count() )
- my $itokp = min( $inext_to_go[$itok], $iend_2 );
- my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
- my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
- my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+ # and this block is short enough to fit on one line
+ # Note: use < because need 1 more space for possible comma
+ && !$is_long_term
- # check for a number on the right
- if ( $types_to_go[$itokp] eq 'n' ) {
+ )
+ {
+ $self->undo_forced_breakpoint_stack(
+ $breakpoint_undo_stack[$current_depth] );
+ } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
- # ok if nothing else on right
- if ( $itokp == $iend_2 ) {
- $good_combo = 1;
- }
- else {
+ # now see if we have any comma breakpoints left
+ my $has_comma_breakpoints =
+ ( $breakpoint_stack[$current_depth] !=
+ get_forced_breakpoint_count() );
- # look one more token to right..
- # okay if math operator or some termination
- $good_combo =
- ( ( $itokpp == $iend_2 )
- && $is_math_op{ $types_to_go[$itokpp] } )
- || $types_to_go[$itokpp] =~ /^[#,;]$/;
- }
- }
+ # update broken-sublist flag of the outer container
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
+ || $has_broken_sublist[$current_depth]
+ || $is_long_term
+ || $has_comma_breakpoints;
- # check for a number on the left
- if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+# Having come to the closing ')', '}', or ']', now we have to decide if we
+# should 'open up' the structure by placing breaks at the opening and
+# closing containers. This is a tricky decision. Here are some of the
+# basic considerations:
+#
+# -If this is a BLOCK container, then any breakpoints will have already
+# been set (and according to user preferences), so we need do nothing here.
+#
+# -If we have a comma-separated list for which we can align the list items,
+# then we need to do so because otherwise the vertical aligner cannot
+# currently do the alignment.
+#
+# -If this container does itself contain a container which has been broken
+# open, then it should be broken open to properly show the structure.
+#
+# -If there is nothing to align, and no other reason to break apart,
+# then do not do it.
+#
+# We will not break open the parens of a long but 'simple' logical expression.
+# For example:
+#
+# This is an example of a simple logical expression and its formatting:
+#
+# if ( $bigwasteofspace1 && $bigwasteofspace2
+# || $bigwasteofspace3 && $bigwasteofspace4 )
+#
+# Most people would prefer this than the 'spacey' version:
+#
+# if (
+# $bigwasteofspace1 && $bigwasteofspace2
+# || $bigwasteofspace3 && $bigwasteofspace4
+# )
+#
+# To illustrate the rules for breaking logical expressions, consider:
+#
+# FULLY DENSE:
+# if ( $opt_excl
+# and ( exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc ))
+#
+# This is on the verge of being difficult to read. The current default is to
+# open it up like this:
+#
+# DEFAULT:
+# if (
+# $opt_excl
+# and ( exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc )
+# )
+#
+# This is a compromise which tries to avoid being too dense and to spacey.
+# A more spaced version would be:
+#
+# SPACEY:
+# if (
+# $opt_excl
+# and (
+# exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc
+# )
+# )
+#
+# Some people might prefer the spacey version -- an option could be added. The
+# innermost expression contains a long block '( exists $ids_... ')'.
+#
+# Here is how the logic goes: We will force a break at the 'or' that the
+# innermost expression contains, but we will not break apart its opening and
+# closing containers because (1) it contains no multi-line sub-containers itself,
+# and (2) there is no alignment to be gained by breaking it open like this
+#
+# and (
+# exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc
+# )
+#
+# (although this looks perfectly ok and might be good for long expressions). The
+# outer 'if' container, though, contains a broken sub-container, so it will be
+# broken open to avoid too much density. Also, since it contains no 'or's, there
+# will be a forced break at its 'and'.
- # okay if nothing else to left
- if ( $itokm == $ibeg_1 ) {
- $good_combo = 1;
- }
+ # set some flags telling something about this container..
+ my $is_simple_logical_expression = 0;
+ if ( $item_count_stack[$current_depth] == 0
+ && $saw_opening_structure
+ && $tokens_to_go[$i_opening] eq '('
+ && $is_logical_container{ $container_type[$current_depth] }
+ )
+ {
- # otherwise look one more token to left
- else {
+ # This seems to be a simple logical expression with
+ # no existing breakpoints. Set a flag to prevent
+ # opening it up.
+ if ( !$has_comma_breakpoints ) {
+ $is_simple_logical_expression = 1;
+ }
- # okay if math operator, comma, or assignment
- $good_combo = ( $itokmm == $ibeg_1 )
- && ( $is_math_op{ $types_to_go[$itokmm] }
- || $types_to_go[$itokmm] =~ /^[,]$/
- || $is_assignment{ $types_to_go[$itokmm] }
- );
- }
- }
+ # This seems to be a simple logical expression with
+ # breakpoints (broken sublists, for example). Break
+ # at all 'or's and '||'s.
+ else {
+ $self->set_logical_breakpoints($current_depth);
+ }
+ } ## end if ( $item_count_stack...)
- # look for a single short token either side of the
- # operator
- if ( !$good_combo ) {
+ if ( $is_long_term
+ && @{ $rfor_semicolon_list[$current_depth] } )
+ {
+ $self->set_for_semicolon_breakpoints($current_depth);
- # Slight adjustment factor to make results
- # independent of break before or after operator in
- # long summed lists. (An operator and a space make
- # two spaces).
- my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+ # open up a long 'for' or 'foreach' container to allow
+ # leading term alignment unless -lp is used.
+ $has_comma_breakpoints = 1
+ unless $rOpts_line_up_parentheses;
+ } ## end if ( $is_long_term && ...)
- $good_combo =
+ if (
- # numbers or id's on both sides of this joint
- $types_to_go[$itokp] =~ /^[in]$/
- && $types_to_go[$itokm] =~ /^[in]$/
+ # breaks for code BLOCKS are handled at a higher level
+ !$block_type
- # one of the two lines must be short:
- && (
- (
- # no more than 2 nonblank tokens right of
- # joint
- $itokpp == $iend_2
+ # we do not need to break at the top level of an 'if'
+ # type expression
+ && !$is_simple_logical_expression
- # short
- && token_sequence_length( $itokp, $iend_2 )
- < $two +
- $rOpts_short_concatenation_item_length
- )
- || (
- # no more than 2 nonblank tokens left of
- # joint
- $itokmm == $ibeg_1
+ ## modification to keep ': (' containers vertically tight;
+ ## but probably better to let user set -vt=1 to avoid
+ ## inconsistency with other paren types
+ ## && ($container_type[$current_depth] ne ':')
- # short
- && token_sequence_length( $ibeg_1, $itokm )
- < 2 - $two +
- $rOpts_short_concatenation_item_length
- )
+ # otherwise, we require one of these reasons for breaking:
+ && (
- )
+ # - this term has forced line breaks
+ $has_comma_breakpoints
- # keep pure terms; don't mix +- with */
- && !(
- $is_plus_minus{$type}
- && ( $is_mult_div{ $types_to_go[$itokmm] }
- || $is_mult_div{ $types_to_go[$itokpp] } )
- )
- && !(
- $is_mult_div{$type}
- && ( $is_plus_minus{ $types_to_go[$itokmm] }
- || $is_plus_minus{ $types_to_go[$itokpp] } )
- )
+ # - the opening container is separated from this batch
+ # for some reason (comment, blank line, code block)
+ # - this is a non-paren container spanning multiple lines
+ || !$saw_opening_structure
- ;
+ # - this is a long block contained in another breakable
+ # container
+ || $is_long_term && !$self->is_in_block_by_i($i_opening)
+ )
+ )
+ {
+
+ # For -lp option, we must put a breakpoint before
+ # the token which has been identified as starting
+ # this indentation level. This is necessary for
+ # proper alignment.
+ if ( $rOpts_line_up_parentheses && $saw_opening_structure )
+ {
+ my $item = $leading_spaces_to_go[ $i_opening + 1 ];
+ if ( $i_opening + 1 < $max_index_to_go
+ && $types_to_go[ $i_opening + 1 ] eq 'b' )
+ {
+ $item = $leading_spaces_to_go[ $i_opening + 2 ];
}
+ if ( defined($item) ) {
+ my $i_start_2;
+ my $K_start_2 = $item->get_starting_index_K();
+ if ( defined($K_start_2) ) {
+ $i_start_2 = $K_start_2 - $K_to_go[0];
+ }
+ if (
+ defined($i_start_2)
- # it is also good to combine if we can reduce to 2 lines
- if ( !$good_combo ) {
+ # we are breaking after an opening brace, paren,
+ # so don't break before it too
+ && $i_start_2 ne $i_opening
+ && $i_start_2 >= 0
+ && $i_start_2 <= $max_index_to_go
+ )
+ {
- # index on other line where same token would be in a
- # long chain.
- my $iother =
- ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+ # Only break for breakpoints at the same
+ # indentation level as the opening paren
+ my $test1 = $nesting_depth_to_go[$i_opening];
+ my $test2 = $nesting_depth_to_go[$i_start_2];
+ if ( $test2 == $test1 ) {
- $good_combo =
- $n == 2
- && $n == $nmax
- && $types_to_go[$iother] ne $type;
- }
+ # Back up at a blank (fixes case b932)
+ my $ibr = $i_start_2 - 1;
+ if ( $ibr > 0
+ && $types_to_go[$ibr] eq 'b' )
+ {
+ $ibr--;
+ }
- next unless ($good_combo);
+ $self->set_forced_breakpoint($ibr);
- } ## end math
+ }
+ } ## end if ( defined($i_start_2...))
+ } ## end if ( defined($item) )
+ } ## end if ( $rOpts_line_up_parentheses...)
- elsif ( $is_amp_amp{$type} ) {
- ##TBD
- } ## end &&, ||
+ # break after opening structure.
+ # note: break before closing structure will be automatic
+ if ( $minimum_depth <= $current_depth ) {
- elsif ( $is_assignment{$type} ) {
- ##TBD
- } ## end assignment
- }
+ $self->set_forced_breakpoint($i_opening)
+ unless ( $do_not_break_apart
+ || is_unbreakable_container($current_depth) );
- #----------------------------------------------------------
- # Recombine Section 1:
- # Join welded nested containers immediately
- #----------------------------------------------------------
- if ( weld_len_right_to_go($iend_1)
- || weld_len_left_to_go($ibeg_2) )
- {
- $n_best = $n;
+ # break at ',' of lower depth level before opening token
+ if ( $last_comma_index[$depth] ) {
+ $self->set_forced_breakpoint(
+ $last_comma_index[$depth] );
+ }
- # Old coding alternated sweep direction: no longer needed
- # $reverse = 1 - $reverse;
- last;
- }
- $reverse = 0;
-
- #----------------------------------------------------------
- # Recombine Section 2:
- # Examine token at $iend_1 (right end of first line of pair)
- #----------------------------------------------------------
-
- # an isolated '}' may join with a ';' terminated segment
- if ( $type_iend_1 eq '}' ) {
-
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
- #
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
- #
- # which makes the parens line up.
- #
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
- #
- # But we do not want to do this for something like the -lp
- # option where the paren is not outdentable because the
- # trailing clause will be far to the right.
- #
- # The logic here is synchronized with the logic in sub
- # sub set_adjusted_indentation, which actually does
- # the outdenting.
- #
- $skip_Section_3 ||= $this_line_is_semicolon_terminated
-
- # only one token on last line
- && $ibeg_1 == $iend_1
-
- # must be structural paren
- && $tokens_to_go[$iend_1] eq ')'
+ # break at '.' of lower depth level before opening token
+ if ( $last_dot_index[$depth] ) {
+ $self->set_forced_breakpoint(
+ $last_dot_index[$depth] );
+ }
- # style must allow outdenting,
- && !$closing_token_indentation{')'}
+ # break before opening structure if preceded by another
+ # closing structure and a comma. This is normally
+ # done by the previous closing brace, but not
+ # if it was a one-line block.
+ if ( $i_opening > 2 ) {
+ my $i_prev =
+ ( $types_to_go[ $i_opening - 1 ] eq 'b' )
+ ? $i_opening - 2
+ : $i_opening - 1;
- # only leading '&&', '||', and ':' if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
+ if (
+ $types_to_go[$i_prev] eq ','
+ && ( $types_to_go[ $i_prev - 1 ] eq ')'
+ || $types_to_go[ $i_prev - 1 ] eq '}' )
+ )
+ {
+ $self->set_forced_breakpoint($i_prev);
+ }
- # but leading colons probably line up with a
- # previous colon or question (count could be wrong).
- && $type_ibeg_2 ne ':'
+ # also break before something like ':(' or '?('
+ # if appropriate.
+ elsif (
+ $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
+ {
+ my $token_prev = $tokens_to_go[$i_prev];
+ if ( $want_break_before{$token_prev} ) {
+ $self->set_forced_breakpoint($i_prev);
+ }
+ } ## end elsif ( $types_to_go[$i_prev...])
+ } ## end if ( $i_opening > 2 )
+ } ## end if ( $minimum_depth <=...)
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$iend_1] ==
- $nesting_depth_to_go[$iend_2] + 1 );
+ # break after comma following closing structure
+ if ( $next_type eq ',' ) {
+ $self->set_forced_breakpoint( $i + 1 );
+ }
- # YVES patch 2 of 2:
- # Allow cuddled eval chains, like this:
- # eval {
- # #STUFF;
- # 1; # return true
- # } or do {
- # #handle error
- # };
- # This patch works together with a patch in
- # setting adjusted indentation (where the closing eval
- # brace is outdented if possible).
- # The problem is that an 'eval' block has continuation
- # indentation and it looks better to undo it in some
- # cases. If we do not use this patch we would get:
- # eval {
- # #STUFF;
- # 1; # return true
- # }
- # or do {
- # #handle error
- # };
- # The alternative, for uncuddled style, is to create
- # a patch in set_adjusted_indentation which undoes
- # the indentation of a leading line like 'or do {'.
- # This doesn't work well with -icb through
+ # break before an '=' following closing structure
if (
- $block_type_to_go[$iend_1] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'}
- && $tokens_to_go[$iend_2] eq '{'
- && (
- ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
- || ( $type_ibeg_2 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_2] } )
- || $is_if_unless{ $tokens_to_go[$ibeg_2] }
- )
+ $is_assignment{$next_nonblank_type}
+ && ( $breakpoint_stack[$current_depth] !=
+ get_forced_breakpoint_count() )
)
{
- $skip_Section_3 ||= 1;
- }
-
- next
- unless (
- $skip_Section_3
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $is_assignment{$next_nonblank_type...})
- # handle '.' and '?' specially below
- || ( $type_ibeg_2 =~ /^[\.\?]$/ )
- );
- }
+ # break at any comma before the opening structure Added
+ # for -lp, but seems to be good in general. It isn't
+ # obvious how far back to look; the '5' below seems to
+ # work well and will catch the comma in something like
+ # push @list, myfunc( $param, $param, ..
- elsif ( $type_iend_1 eq '{' ) {
+ my $icomma = $last_comma_index[$depth];
+ if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
+ unless ( $forced_breakpoint_to_go[$icomma] ) {
+ $self->set_forced_breakpoint($icomma);
+ }
+ }
+ } # end logic to open up a container
- # YVES
- # honor breaks at opening brace
- # Added to prevent recombining something like this:
- # } || eval { package main;
- next if $forced_breakpoint_to_go[$iend_1];
+ # Break open a logical container open if it was already open
+ elsif ($is_simple_logical_expression
+ && $has_old_logical_breakpoints[$current_depth] )
+ {
+ $self->set_logical_breakpoints($current_depth);
}
- # do not recombine lines with ending &&, ||,
- elsif ( $is_amp_amp{$type_iend_1} ) {
- next unless $want_break_before{$type_iend_1};
- }
+ # Handle long container which does not get opened up
+ elsif ($is_long_term) {
- # Identify and recombine a broken ?/: chain
- elsif ( $type_iend_1 eq '?' ) {
+ # must set fake breakpoint to alert outer containers that
+ # they are complex
+ set_fake_breakpoint();
+ } ## end elsif ($is_long_term)
- # Do not recombine different levels
- next
- if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+ } ## end elsif ( $depth < $current_depth)
- # do not recombine unless next line ends in :
- next unless $type_iend_2 eq ':';
- }
+ #------------------------------------------------------------
+ # Handle this token
+ #------------------------------------------------------------
- # for lines ending in a comma...
- elsif ( $type_iend_1 eq ',' ) {
+ $current_depth = $depth;
- # Do not recombine at comma which is following the
- # input bias.
- # TODO: might be best to make a special flag
- next if ( $old_breakpoint_to_go[$iend_1] );
+ # most token types can skip the rest of this loop
+ next unless ( $quick_filter{$type} );
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $type_ibeg_1 eq '}'
- && $type_ibeg_2 eq 'i' )
- {
- next
- unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
- && ( $iend_2 == ( $ibeg_2 + 1 ) )
- && $this_line_is_semicolon_terminated );
+ # handle comma-arrow
+ if ( $type eq '=>' ) {
+ next if ( $last_nonblank_type eq '=>' );
+ next if $rOpts_break_at_old_comma_breakpoints;
+ next
+ if ( $rOpts_comma_arrow_breakpoints == 3
+ && !$override_cab3[$depth] );
+ $want_comma_break[$depth] = 1;
+ $index_before_arrow[$depth] = $i_last_nonblank_token;
+ next;
+ } ## end if ( $type eq '=>' )
- # override breakpoint
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ elsif ( $type eq '.' ) {
+ $last_dot_index[$depth] = $i;
+ }
- # but otherwise ..
- else {
+ # Turn off alignment if we are sure that this is not a list
+ # environment. To be safe, we will do this if we see certain
+ # non-list tokens, such as ';', and also the environment is
+ # not a list. Note that '=' could be in any of the = operators
+ # (lextest.t). We can't just use the reported environment
+ # because it can be incorrect in some cases.
+ elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
+ && !$self->is_in_list_by_i($i) )
+ {
+ $dont_align[$depth] = 1;
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
- # do not recombine after a comma unless this will leave
- # just 1 more line
- next unless ( $n + 1 >= $nmax );
+ # now just handle any commas
+ next unless ( $type eq ',' );
- # do not recombine if there is a change in indentation depth
- next
- if (
- $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+ $last_dot_index[$depth] = undef;
+ $last_comma_index[$depth] = $i;
- # do not recombine a "complex expression" after a
- # comma. "complex" means no parens.
- my $saw_paren;
- foreach my $ii ( $ibeg_2 .. $iend_2 ) {
- if ( $tokens_to_go[$ii] eq '(' ) {
- $saw_paren = 1;
- last;
- }
- }
- next if $saw_paren;
- }
- }
+ # break here if this comma follows a '=>'
+ # but not if there is a side comment after the comma
+ if ( $want_comma_break[$depth] ) {
- # opening paren..
- elsif ( $type_iend_1 eq '(' ) {
-
- # No longer doing this
+ if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
+ if ($rOpts_comma_arrow_breakpoints) {
+ $want_comma_break[$depth] = 0;
+ next;
+ }
}
- elsif ( $type_iend_1 eq ')' ) {
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
- # No longer doing this
- }
+ # break before the previous token if it looks safe
+ # Example of something that we will not try to break before:
+ # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
+ # Also we don't want to break at a binary operator (like +):
+ # $c->createOval(
+ # $x + $R, $y +
+ # $R => $x - $R,
+ # $y - $R, -fill => 'black',
+ # );
+ my $ibreak = $index_before_arrow[$depth] - 1;
+ if ( $ibreak > 0
+ && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
+ {
+ if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
- # keep a terminal for-semicolon
- elsif ( $type_iend_1 eq 'f' ) {
- next;
- }
+ # don't break pointer calls, such as the following:
+ # File::Spec->curdir => 1,
+ # (This is tokenized as adjacent 'w' tokens)
+ ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
- # if '=' at end of line ...
- elsif ( $is_assignment{$type_iend_1} ) {
+ # And don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
+ $self->set_forced_breakpoint($ibreak);
+ }
+ } ## end if ( $types_to_go[$ibreak...])
+ } ## end if ( $ibreak > 0 && $tokens_to_go...)
- # keep break after = if it was in input stream
- # this helps prevent 'blinkers'
- next if $old_breakpoint_to_go[$iend_1]
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
- # don't strand an isolated '='
- && $iend_1 != $ibeg_1;
+ # handle list which mixes '=>'s and ','s:
+ # treat any list items so far as an interrupted list
+ $interrupted_list[$depth] = 1;
+ next;
+ } ## end if ( $want_comma_break...)
- my $is_short_quote =
- ( $type_ibeg_2 eq 'Q'
- && $ibeg_2 == $iend_2
- && token_sequence_length( $ibeg_2, $ibeg_2 ) <
- $rOpts_short_concatenation_item_length );
- my $is_ternary =
- ( $type_ibeg_1 eq '?'
- && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
+ # break after all commas above starting depth
+ if ( $depth < $starting_depth && !$dont_align[$depth] ) {
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
+ next;
+ }
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $ibeg_1 != $iend_1
- && !$is_short_quote
- && !$is_ternary )
- {
- next
- unless (
- (
+ # add this comma to the list..
+ my $item_count = $item_count_stack[$depth];
+ if ( $item_count == 0 ) {
- # unless we can reduce this to two lines
- $nmax < $n + 2
+ # but do not form a list with no opening structure
+ # for example:
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ibeg_nmax] eq ';' )
+ # open INFILE_COPY, ">$input_file_copy"
+ # or die ("very long message");
+ if ( ( $opening_structure_index_stack[$depth] < 0 )
+ && $self->is_in_block_by_i($i) )
+ {
+ $dont_align[$depth] = 1;
+ }
+ } ## end if ( $item_count == 0 )
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ $comma_index[$depth][$item_count] = $i;
+ ++$item_count_stack[$depth];
+ if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
+ $identifier_count_stack[$depth]++;
+ }
+ } ## end while ( ++$i <= $max_index_to_go)
- # or the next line ends in an open paren or brace
- # and the break hasn't been forced [dima.t]
- || ( !$forced_breakpoint_to_go[$iend_1]
- && $type_iend_2 eq '{' )
- )
+ #-------------------------------------------
+ # end of loop over all tokens in this batch
+ #-------------------------------------------
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && (
+ # set breaks for any unfinished lists ..
+ for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
- # RT#127633 - the leading tokens are not operators
- ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+ $interrupted_list[$dd] = 1;
+ $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
+ $self->set_comma_breakpoints($dd);
+ $self->set_logical_breakpoints($dd)
+ if ( $has_old_logical_breakpoints[$dd] );
+ $self->set_for_semicolon_breakpoints($dd);
- # or they are different
- || ( $ibeg_3 >= 0
- && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
- )
- );
+ # break open container...
+ my $i_opening = $opening_structure_index_stack[$dd];
+ $self->set_forced_breakpoint($i_opening)
+ unless (
+ is_unbreakable_container($dd)
- if (
+ # Avoid a break which would place an isolated ' or "
+ # on a line
+ || ( $type eq 'Q'
+ && $i_opening >= $max_index_to_go - 2
+ && ( $token eq "'" || $token eq '"' ) )
+ );
+ } ## end for ( my $dd = $current_depth...)
- # Recombine if we can make two lines
- $nmax >= $n + 2
+ # Return a flag indicating if the input file had some good breakpoints.
+ # This flag will be used to force a break in a line shorter than the
+ # allowed line length.
+ if ( $has_old_logical_breakpoints[$current_depth] ) {
+ $saw_good_breakpoint = 1;
+ }
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have
- # ending comma
- && ( !$rOpts_line_up_parentheses
- || $type_iend_2 ne ',' )
- )
- {
+ # A complex line with one break at an = has a good breakpoint.
+ # This is not complex ($total_depth_variation=0):
+ # $res1
+ # = 10;
+ #
+ # This is complex ($total_depth_variation=6):
+ # $res2 =
+ # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
+ elsif ($i_old_assignment_break
+ && $total_depth_variation > 4
+ && $old_breakpoint_count == 1 )
+ {
+ $saw_good_breakpoint = 1;
+ } ## end elsif ( $i_old_assignment_break...)
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$ibeg_2];
- foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ return $saw_good_breakpoint;
+ } ## end sub scan_list
+} ## end closure scan_list
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
+my %is_kwiZ;
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
+BEGIN {
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax )
- ? $ri_end->[ $n + 1 ]
- : $iend_2;
- foreach my $i ( $iend_2 .. $istop ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 2 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # Added 'w' to fix b1172
+ my @q = qw(k w i Z);
+ @is_kwiZ{@q} = (1) x scalar(@q);
+}
- # do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
- }
- }
- }
+sub find_token_starting_list {
- unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
- }
+ # When testing to see if a block will fit on one line, some
+ # previous token(s) may also need to be on the line; particularly
+ # if this is a sub call. So we will look back at least one
+ # token.
+ my ( $self, $i_opening_paren ) = @_;
- # for keywords..
- elsif ( $type_iend_1 eq 'k' ) {
+ # This will be the return index
+ my $i_opening_minus = $i_opening_paren;
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ goto RETURN if ( $i_opening_minus <= 0 );
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
+ my $im1 = $i_opening_paren - 1;
+ my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
+ if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
+ $iprev_nb -= 1;
+ $type_prev_nb = $types_to_go[$iprev_nb];
+ }
- # but only if followed by multiple lines
- && $n < $nmax
- );
+ if ( $type_prev_nb eq ',' ) {
- if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
- next
- unless $want_break_before{ $tokens_to_go[$iend_1] };
- }
- }
+ # a previous comma is a good break point
+ # $i_opening_minus = $i_opening_paren;
+ }
+ elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
+ $i_opening_minus = $im1;
+
+ # Walk back to improve length estimate...
+ # FIX for cases b1169 b1170 b1171: start walking back
+ # at the previous nonblank. This makes the result insensitive
+ # to the flag --space-function-paren, and similar.
+ # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
+ for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
+ last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
+ $i_opening_minus = $j;
+ }
+ if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
+ }
- #----------------------------------------------------------
- # Recombine Section 3:
- # Examine token at $ibeg_2 (left end of second line of pair)
- #----------------------------------------------------------
+ # Handle non-parens
+ elsif ( $is_kwiZ{$type_prev_nb} ) { $i_opening_minus = $iprev_nb }
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- # Note that we are skipping the rest of this section
- # and the rest of the loop to do the join
- if ($skip_Section_3) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- $n_best = $n;
- last;
- }
+ RETURN:
- # handle lines with leading &&, ||
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ return $i_opening_minus;
+}
- $leading_amp_count++;
+{ ## begin closure set_comma_breakpoints_do
- # ok to recombine if it follows a ? or :
- # and is followed by an open paren..
- my $ok =
- ( $is_ternary{$type_ibeg_1}
- && $tokens_to_go[$iend_2] eq '(' )
+ my %is_keyword_with_special_leading_term;
- # or is followed by a ? or : at same depth
- #
- # We are looking for something like this. We can
- # recombine the && line with the line above to make the
- # structure more clear:
- # return
- # exists $G->{Attr}->{V}
- # && exists $G->{Attr}->{V}->{$u}
- # ? %{ $G->{Attr}->{V}->{$u} }
- # : ();
- #
- # We should probably leave something like this alone:
- # return
- # exists $G->{Attr}->{E}
- # && exists $G->{Attr}->{E}->{$u}
- # && exists $G->{Attr}->{E}->{$u}->{$v}
- # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
- # : ();
- # so that we either have all of the &&'s (or ||'s)
- # on one line, as in the first example, or break at
- # each one as in the second example. However, it
- # sometimes makes things worse to check for this because
- # it prevents multiple recombinations. So this is not done.
- || ( $ibeg_3 >= 0
- && $is_ternary{ $types_to_go[$ibeg_3] }
- && $nesting_depth_to_go[$ibeg_3] ==
- $nesting_depth_to_go[$ibeg_2] );
+ BEGIN {
- next if !$ok && $want_break_before{$type_ibeg_2};
- $forced_breakpoint_to_go[$iend_1] = 0;
+ # These keywords have prototypes which allow a special leading item
+ # followed by a list
+ my @q =
+ qw(formline grep kill map printf sprintf push chmod join pack unshift);
+ @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
+ }
- # tweak the bond strength to give this joint priority
- # over ? and :
- $bs_tweak = 0.25;
- }
+ use constant DEBUG_SPARSE => 0;
- # Identify and recombine a broken ?/: chain
- elsif ( $type_ibeg_2 eq '?' ) {
+ sub set_comma_breakpoints_do {
- # Do not recombine different levels
- my $lev = $levels_to_go[$ibeg_2];
- next if ( $lev ne $levels_to_go[$ibeg_1] );
+ # Given a list with some commas, set breakpoints at some of the
+ # commas, if necessary, to make it easy to read.
+
+ my ( $self, $rinput_hash ) = @_;
+
+ my $depth = $rinput_hash->{depth};
+ my $i_opening_paren = $rinput_hash->{i_opening_paren};
+ my $i_closing_paren = $rinput_hash->{i_closing_paren};
+ my $item_count = $rinput_hash->{item_count};
+ my $identifier_count = $rinput_hash->{identifier_count};
+ my $rcomma_index = $rinput_hash->{rcomma_index};
+ my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
+ my $list_type = $rinput_hash->{list_type};
+ my $interrupted = $rinput_hash->{interrupted};
+ my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
+ my $must_break_open = $rinput_hash->{must_break_open};
+ my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
- # Do not recombine a '?' if either next line or
- # previous line does not start with a ':'. The reasons
- # are that (1) no alignment of the ? will be possible
- # and (2) the expression is somewhat complex, so the
- # '?' is harder to see in the interior of the line.
- my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
- my $precedes_colon =
- $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
- next unless ( $follows_colon || $precedes_colon );
+ # nothing to do if no commas seen
+ return if ( $item_count < 1 );
- # we will always combining a ? line following a : line
- if ( !$follows_colon ) {
+ my $i_first_comma = $rcomma_index->[0];
+ my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
+ my $i_last_comma = $i_true_last_comma;
+ if ( $i_last_comma >= $max_index_to_go ) {
+ $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
+ return if ( $item_count < 1 );
+ }
- # ...otherwise recombine only if it looks like a chain.
- # we will just look at a few nearby lines to see if
- # this looks like a chain.
- my $local_count = 0;
- foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
- $local_count++
- if $ii >= 0
- && $types_to_go[$ii] eq ':'
- && $levels_to_go[$ii] == $lev;
- }
- next unless ( $local_count > 1 );
- }
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ #---------------------------------------------------------------
+ # find lengths of all items in the list to calculate page layout
+ #---------------------------------------------------------------
+ my $comma_count = $item_count;
+ my @item_lengths;
+ my @i_term_begin;
+ my @i_term_end;
+ my @i_term_comma;
+ my $i_prev_plus;
+ my @max_length = ( 0, 0 );
+ my $first_term_length;
+ my $i = $i_opening_paren;
+ my $is_odd = 1;
- # do not recombine lines with leading '.'
- elsif ( $type_ibeg_2 eq '.' ) {
- my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
- next
- unless (
+ foreach my $j ( 0 .. $comma_count - 1 ) {
+ $is_odd = 1 - $is_odd;
+ $i_prev_plus = $i + 1;
+ $i = $rcomma_index->[$j];
- # ... unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- #
- #
- # $bodyA .=
- # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
- #
- # looks better than this:
- # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
- # . '$args .= $pat;'
+ my $i_term_end =
+ ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
+ my $i_term_begin =
+ ( $types_to_go[$i_prev_plus] eq 'b' )
+ ? $i_prev_plus + 1
+ : $i_prev_plus;
+ push @i_term_begin, $i_term_begin;
+ push @i_term_end, $i_term_end;
+ push @i_term_comma, $i;
- (
- $n == 2
- && $n == $nmax
- && $type_ibeg_1 ne $type_ibeg_2
- )
+ # note: currently adding 2 to all lengths (for comma and space)
+ my $length =
+ 2 + token_sequence_length( $i_term_begin, $i_term_end );
+ push @item_lengths, $length;
- # ... or this would strand a short quote , like this
- # . "some long quote"
- # . "\n";
+ if ( $j == 0 ) {
+ $first_term_length = $length;
+ }
+ else {
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $iend_2 - 1
- && $token_lengths_to_go[$i_next_nonblank] <
- $rOpts_short_concatenation_item_length )
- );
+ if ( $length > $max_length[$is_odd] ) {
+ $max_length[$is_odd] = $length;
}
+ }
+ }
- # handle leading keyword..
- elsif ( $type_ibeg_2 eq 'k' ) {
+ # now we have to make a distinction between the comma count and item
+ # count, because the item count will be one greater than the comma
+ # count if the last item is not terminated with a comma
+ my $i_b =
+ ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
+ ? $i_last_comma + 1
+ : $i_last_comma;
+ my $i_e =
+ ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
+ ? $i_closing_paren - 2
+ : $i_closing_paren - 1;
+ my $i_effective_last_comma = $i_last_comma;
- # handle leading "or"
- if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
- $type_ibeg_1 eq '}'
- || (
+ my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ if ( $last_item_length > 0 ) {
+
+ # add 2 to length because other lengths include a comma and a blank
+ $last_item_length += 2;
+ push @item_lengths, $last_item_length;
+ push @i_term_begin, $i_b + 1;
+ push @i_term_end, $i_e;
+ push @i_term_comma, undef;
+
+ my $i_odd = $item_count % 2;
+
+ if ( $last_item_length > $max_length[$i_odd] ) {
+ $max_length[$i_odd] = $last_item_length;
+ }
+
+ $item_count++;
+ $i_effective_last_comma = $i_e + 1;
+
+ if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
+ $identifier_count++;
+ }
+ }
+
+ #---------------------------------------------------------------
+ # End of length calculations
+ #---------------------------------------------------------------
+
+ #---------------------------------------------------------------
+ # Compound List Rule 1:
+ # Break at (almost) every comma for a list containing a broken
+ # sublist. This has higher priority than the Interrupted List
+ # Rule.
+ #---------------------------------------------------------------
+ if ($has_broken_sublist) {
+
+ # Break at every comma except for a comma between two
+ # simple, small terms. This prevents long vertical
+ # columns of, say, just 0's.
+ my $small_length = 10; # 2 + actual maximum length wanted
+
+ # We'll insert a break in long runs of small terms to
+ # allow alignment in uniform tables.
+ my $skipped_count = 0;
+ my $columns = table_columns_available($i_first_comma);
+ my $fields = int( $columns / $small_length );
+ if ( $rOpts_maximum_fields_per_table
+ && $fields > $rOpts_maximum_fields_per_table )
+ {
+ $fields = $rOpts_maximum_fields_per_table;
+ }
+ my $max_skipped_count = $fields - 1;
+
+ my $is_simple_last_term = 0;
+ my $is_simple_next_term = 0;
+ foreach my $j ( 0 .. $item_count ) {
+ $is_simple_last_term = $is_simple_next_term;
+ $is_simple_next_term = 0;
+ if ( $j < $item_count
+ && $i_term_end[$j] == $i_term_begin[$j]
+ && $item_lengths[$j] <= $small_length )
+ {
+ $is_simple_next_term = 1;
+ }
+ next if $j == 0;
+ if ( $is_simple_last_term
+ && $is_simple_next_term
+ && $skipped_count < $max_skipped_count )
+ {
+ $skipped_count++;
+ }
+ else {
+ $skipped_count = 0;
+ my $i = $i_term_comma[ $j - 1 ];
+ last unless defined $i;
+ $self->set_forced_breakpoint($i);
+ }
+ }
+
+ # always break at the last comma if this list is
+ # interrupted; we wouldn't want to leave a terminal '{', for
+ # example.
+ if ($interrupted) {
+ $self->set_forced_breakpoint($i_true_last_comma);
+ }
+ return;
+ }
+
+#my ( $a, $b, $c ) = caller();
+#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
+#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
+#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
+
+ #---------------------------------------------------------------
+ # Interrupted List Rule:
+ # A list is forced to use old breakpoints if it was interrupted
+ # by side comments or blank lines, or requested by user.
+ #---------------------------------------------------------------
+ if ( $rOpts_break_at_old_comma_breakpoints
+ || $interrupted
+ || $i_opening_paren < 0 )
+ {
+ $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
+ return;
+ }
+
+ #---------------------------------------------------------------
+ # Looks like a list of items. We have to look at it and size it up.
+ #---------------------------------------------------------------
+
+ my $opening_token = $tokens_to_go[$i_opening_paren];
+ my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
+
+ #-------------------------------------------------------------------
+ # Return if this will fit on one line
+ #-------------------------------------------------------------------
+
+ my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
+ return
+ unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
+ > 0;
+
+ #-------------------------------------------------------------------
+ # Now we know that this block spans multiple lines; we have to set
+ # at least one breakpoint -- real or fake -- as a signal to break
+ # open any outer containers.
+ #-------------------------------------------------------------------
+ set_fake_breakpoint();
+
+ # be sure we do not extend beyond the current list length
+ if ( $i_effective_last_comma >= $max_index_to_go ) {
+ $i_effective_last_comma = $max_index_to_go - 1;
+ }
+
+ # Set a flag indicating if we need to break open to keep -lp
+ # items aligned. This is necessary if any of the list terms
+ # exceeds the available space after the '('.
+ my $need_lp_break_open = $must_break_open;
+ if ( $rOpts_line_up_parentheses && !$must_break_open ) {
+ my $columns_if_unbroken =
+ $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
+ - total_line_length( $i_opening_minus, $i_opening_paren );
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
+ || ( $max_length[1] > $columns_if_unbroken )
+ || ( $first_term_length > $columns_if_unbroken );
+ }
+
+ # Specify if the list must have an even number of fields or not.
+ # It is generally safest to assume an even number, because the
+ # list items might be a hash list. But if we can be sure that
+ # it is not a hash, then we can allow an odd number for more
+ # flexibility.
+ my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
+
+ if ( $identifier_count >= $item_count - 1
+ || $is_assignment{$next_nonblank_type}
+ || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
+ )
+ {
+ $odd_or_even = 1;
+ }
+
+ # do we have a long first term which should be
+ # left on a line by itself?
+ my $use_separate_first_term = (
+ $odd_or_even == 1 # only if we can use 1 field/line
+ && $item_count > 3 # need several items
+ && $first_term_length >
+ 2 * $max_length[0] - 2 # need long first term
+ && $first_term_length >
+ 2 * $max_length[1] - 2 # need long first term
+ );
+
+ # or do we know from the type of list that the first term should
+ # be placed alone?
+ if ( !$use_separate_first_term ) {
+ if ( $is_keyword_with_special_leading_term{$list_type} ) {
+ $use_separate_first_term = 1;
+
+ # should the container be broken open?
+ if ( $item_count < 3 ) {
+ if ( $i_first_comma - $i_opening_paren < 4 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ elsif ($first_term_length < 20
+ && $i_first_comma - $i_opening_paren < 4 )
+ {
+ my $columns = table_columns_available($i_first_comma);
+ if ( $first_term_length < $columns ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ }
+ }
+
+ # if so,
+ if ($use_separate_first_term) {
+
+ # ..set a break and update starting values
+ $use_separate_first_term = 1;
+ $self->set_forced_breakpoint($i_first_comma);
+ $i_opening_paren = $i_first_comma;
+ $i_first_comma = $rcomma_index->[1];
+ $item_count--;
+ return if $comma_count == 1;
+ shift @item_lengths;
+ shift @i_term_begin;
+ shift @i_term_end;
+ shift @i_term_comma;
+ }
+
+ # if not, update the metrics to include the first term
+ else {
+ if ( $first_term_length > $max_length[0] ) {
+ $max_length[0] = $first_term_length;
+ }
+ }
+
+ # Field width parameters
+ my $pair_width = ( $max_length[0] + $max_length[1] );
+ my $max_width =
+ ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
+
+ # Number of free columns across the page width for laying out tables
+ my $columns = table_columns_available($i_first_comma);
+
+ # Estimated maximum number of fields which fit this space
+ # This will be our first guess
+ my $number_of_fields_max =
+ maximum_number_of_fields( $columns, $odd_or_even, $max_width,
+ $pair_width );
+ my $number_of_fields = $number_of_fields_max;
+
+ # Find the best-looking number of fields
+ # and make this our second guess if possible
+ my ( $number_of_fields_best, $ri_ragged_break_list,
+ $new_identifier_count )
+ = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
+ \@item_lengths, $max_width );
+
+ if ( $number_of_fields_best != 0
+ && $number_of_fields_best < $number_of_fields_max )
+ {
+ $number_of_fields = $number_of_fields_best;
+ }
+
+ # ----------------------------------------------------------------------
+ # If we are crowded and the -lp option is being used, try to
+ # undo some indentation
+ # ----------------------------------------------------------------------
+ if (
+ $rOpts_line_up_parentheses
+ && (
+ $number_of_fields == 0
+ || ( $number_of_fields == 1
+ && $number_of_fields != $number_of_fields_best )
+ )
+ )
+ {
+ my $available_spaces =
+ $self->get_available_spaces_to_go($i_first_comma);
+ if ( $available_spaces > 0 ) {
+
+ my $spaces_wanted = $max_width - $columns; # for 1 field
+
+ if ( $number_of_fields_best == 0 ) {
+ $number_of_fields_best =
+ get_maximum_fields_wanted( \@item_lengths );
+ }
+
+ if ( $number_of_fields_best != 1 ) {
+ my $spaces_wanted_2 =
+ 1 + $pair_width - $columns; # for 2 fields
+ if ( $available_spaces > $spaces_wanted_2 ) {
+ $spaces_wanted = $spaces_wanted_2;
+ }
+ }
+
+ if ( $spaces_wanted > 0 ) {
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $i_first_comma,
+ $spaces_wanted );
+
+ # redo the math
+ if ( $deleted_spaces > 0 ) {
+ $columns = table_columns_available($i_first_comma);
+ $number_of_fields_max =
+ maximum_number_of_fields( $columns, $odd_or_even,
+ $max_width, $pair_width );
+ $number_of_fields = $number_of_fields_max;
+
+ if ( $number_of_fields_best == 1
+ && $number_of_fields >= 1 )
+ {
+ $number_of_fields = $number_of_fields_best;
+ }
+ }
+ }
+ }
+ }
+
+ # try for one column if two won't work
+ if ( $number_of_fields <= 0 ) {
+ $number_of_fields = int( $columns / $max_width );
+ }
+
+ # The user can place an upper bound on the number of fields,
+ # which can be useful for doing maintenance on tables
+ if ( $rOpts_maximum_fields_per_table
+ && $number_of_fields > $rOpts_maximum_fields_per_table )
+ {
+ $number_of_fields = $rOpts_maximum_fields_per_table;
+ }
+
+ # How many columns (characters) and lines would this container take
+ # if no additional whitespace were added?
+ my $packed_columns = token_sequence_length( $i_opening_paren + 1,
+ $i_effective_last_comma + 1 );
+ if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
+ my $packed_lines = 1 + int( $packed_columns / $columns );
+
+ # are we an item contained in an outer list?
+ my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
+
+ if ( $number_of_fields <= 0 ) {
+
+# #---------------------------------------------------------------
+# # We're in trouble. We can't find a single field width that works.
+# # There is no simple answer here; we may have a single long list
+# # item, or many.
+# #---------------------------------------------------------------
+#
+# In many cases, it may be best to not force a break if there is just one
+# comma, because the standard continuation break logic will do a better
+# job without it.
+#
+# In the common case that all but one of the terms can fit
+# on a single line, it may look better not to break open the
+# containing parens. Consider, for example
+#
+# $color =
+# join ( '/',
+# sort { $color_value{$::a} <=> $color_value{$::b}; }
+# keys %colors );
+#
+# which will look like this with the container broken:
+#
+# $color = join (
+# '/',
+# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
+# );
+#
+# Here is an example of this rule for a long last term:
+#
+# log_message( 0, 256, 128,
+# "Number of routes in adj-RIB-in to be considered: $peercount" );
+#
+# And here is an example with a long first term:
+#
+# $s = sprintf(
+# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
+# $r, $pu, $ps, $cu, $cs, $tt
+# )
+# if $style eq 'all';
+
+ my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
+ my $long_last_term =
+ $self->excess_line_length( 0, $i_last_comma ) <= 0;
+ my $long_first_term =
+ $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
+ <= 0;
+
+ # break at every comma ...
+ if (
+
+ # if requested by user or is best looking
+ $number_of_fields_best == 1
+
+ # or if this is a sublist of a larger list
+ || $in_hierarchical_list
+
+ # or if multiple commas and we don't have a long first or last
+ # term
+ || ( $comma_count > 1
+ && !( $long_last_term || $long_first_term ) )
+ )
+ {
+ foreach ( 0 .. $comma_count - 1 ) {
+ $self->set_forced_breakpoint( $rcomma_index->[$_] );
+ }
+ }
+ elsif ($long_last_term) {
+
+ $self->set_forced_breakpoint($i_last_comma);
+ ${$rdo_not_break_apart} = 1 unless $must_break_open;
+ }
+ elsif ($long_first_term) {
+
+ $self->set_forced_breakpoint($i_first_comma);
+ }
+ else {
+
+ # let breaks be defined by default bond strength logic
+ }
+ return;
+ }
+
+ # --------------------------------------------------------
+ # We have a tentative field count that seems to work.
+ # How many lines will this require?
+ # --------------------------------------------------------
+ my $formatted_lines = $item_count / ($number_of_fields);
+ if ( $formatted_lines != int $formatted_lines ) {
+ $formatted_lines = 1 + int $formatted_lines;
+ }
+
+ # So far we've been trying to fill out to the right margin. But
+ # compact tables are easier to read, so let's see if we can use fewer
+ # fields without increasing the number of lines.
+ $number_of_fields =
+ compactify_table( $item_count, $number_of_fields, $formatted_lines,
+ $odd_or_even );
+
+ # How many spaces across the page will we fill?
+ my $columns_per_line =
+ ( int $number_of_fields / 2 ) * $pair_width +
+ ( $number_of_fields % 2 ) * $max_width;
+
+ my $formatted_columns;
+
+ if ( $number_of_fields > 1 ) {
+ $formatted_columns =
+ ( $pair_width * ( int( $item_count / 2 ) ) +
+ ( $item_count % 2 ) * $max_width );
+ }
+ else {
+ $formatted_columns = $max_width * $item_count;
+ }
+ if ( $formatted_columns < $packed_columns ) {
+ $formatted_columns = $packed_columns;
+ }
+
+ my $unused_columns = $formatted_columns - $packed_columns;
+
+ # set some empirical parameters to help decide if we should try to
+ # align; high sparsity does not look good, especially with few lines
+ my $sparsity = ($unused_columns) / ($formatted_columns);
+ my $max_allowed_sparsity =
+ ( $item_count < 3 ) ? 0.1
+ : ( $packed_lines == 1 ) ? 0.15
+ : ( $packed_lines == 2 ) ? 0.4
+ : 0.7;
+
+ # Begin check for shortcut methods, which avoid treating a list
+ # as a table for relatively small parenthesized lists. These
+ # are usually easier to read if not formatted as tables.
+ if (
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
+ && $opening_is_in_block # not a sub-container
+ && $opening_token eq '(' # is paren list
+ )
+ {
+
+ # Shortcut method 1: for -lp and just one comma:
+ # This is a no-brainer, just break at the comma.
+ if (
+ $rOpts_line_up_parentheses # -lp
+ && $item_count == 2 # two items, one comma
+ && !$must_break_open
+ )
+ {
+ my $i_break = $rcomma_index->[0];
+ $self->set_forced_breakpoint($i_break);
+ ${$rdo_not_break_apart} = 1;
+ return;
+
+ }
+
+ # method 2 is for most small ragged lists which might look
+ # best if not displayed as a table.
+ if (
+ ( $number_of_fields == 2 && $item_count == 3 )
+ || (
+ $new_identifier_count > 0 # isn't all quotes
+ && $sparsity > 0.15
+ ) # would be fairly spaced gaps if aligned
+ )
+ {
+
+ my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
+
+ # NOTE: we should really use the true break count here,
+ # which can be greater if there are large terms and
+ # little space, but usually this will work well enough.
+ unless ($must_break_open) {
+
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
+ {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ return;
+ }
+
+ } # end shortcut methods
+
+ # debug stuff
+ DEBUG_SPARSE && do {
+ print STDOUT
+"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
+
+ };
+
+ #---------------------------------------------------------------
+ # Compound List Rule 2:
+ # If this list is too long for one line, and it is an item of a
+ # larger list, then we must format it, regardless of sparsity
+ # (ian.t). One reason that we have to do this is to trigger
+ # Compound List Rule 1, above, which causes breaks at all commas of
+ # all outer lists. In this way, the structure will be properly
+ # displayed.
+ #---------------------------------------------------------------
+
+ # Decide if this list is too long for one line unless broken
+ my $total_columns = table_columns_available($i_opening_paren);
+ my $too_long = $packed_columns > $total_columns;
+
+ # For a paren list, include the length of the token just before the
+ # '(' because this is likely a sub call, and we would have to
+ # include the sub name on the same line as the list. This is still
+ # imprecise, but not too bad. (steve.t)
+ if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
+
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
+
+ # FIXME: For an item after a '=>', try to include the length of the
+ # thing before the '=>'. This is crude and should be improved by
+ # actually looking back token by token.
+ if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
+ my $i_opening_minus = $i_opening_paren - 4;
+ if ( $i_opening_minus >= 0 ) {
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
+ }
+
+ # Always break lists contained in '[' and '{' if too long for 1 line,
+ # and always break lists which are too long and part of a more complex
+ # structure.
+ my $must_break_open_container = $must_break_open
+ || ( $too_long
+ && ( $in_hierarchical_list || $opening_token ne '(' ) );
+
+#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+
+ #---------------------------------------------------------------
+ # The main decision:
+ # Now decide if we will align the data into aligned columns. Do not
+ # attempt to align columns if this is a tiny table or it would be
+ # too spaced. It seems that the more packed lines we have, the
+ # sparser the list that can be allowed and still look ok.
+ #---------------------------------------------------------------
+
+ if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
+ || ( $formatted_lines < 2 )
+ || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
+ )
+ {
+
+ #---------------------------------------------------------------
+ # too sparse: would look ugly if aligned in a table;
+ #---------------------------------------------------------------
+
+ # use old breakpoints if this is a 'big' list
+ if ( $packed_lines > 2 && $item_count > 10 ) {
+ write_logfile_entry("List sparse: using old breakpoints\n");
+ $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
+ }
+
+ # let the continuation logic handle it if 2 lines
+ else {
+
+ my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
+
+ unless ($must_break_open_container) {
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
+ {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ }
+ return;
+ }
+
+ #---------------------------------------------------------------
+ # go ahead and format as a table
+ #---------------------------------------------------------------
+ write_logfile_entry(
+ "List: auto formatting with $number_of_fields fields/row\n");
+
+ my $j_first_break =
+ $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
+
+ for (
+ my $j = $j_first_break ;
+ $j < $comma_count ;
+ $j += $number_of_fields
+ )
+ {
+ my $i = $rcomma_index->[$j];
+ $self->set_forced_breakpoint($i);
+ }
+ return;
+ }
+} ## end closure set_comma_breakpoints_do
+
+sub study_list_complexity {
+
+ # Look for complex tables which should be formatted with one term per line.
+ # Returns the following:
+ #
+ # \@i_ragged_break_list = list of good breakpoints to avoid lines
+ # which are hard to read
+ # $number_of_fields_best = suggested number of fields based on
+ # complexity; = 0 if any number may be used.
+ #
+ my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
+ my $item_count = @{$ri_term_begin};
+ my $complex_item_count = 0;
+ my $number_of_fields_best = $rOpts_maximum_fields_per_table;
+ my $i_max = @{$ritem_lengths} - 1;
+ ##my @item_complexity;
+
+ my $i_last_last_break = -3;
+ my $i_last_break = -2;
+ my @i_ragged_break_list;
+
+ my $definitely_complex = 30;
+ my $definitely_simple = 12;
+ my $quote_count = 0;
+
+ for my $i ( 0 .. $i_max ) {
+ my $ib = $ri_term_begin->[$i];
+ my $ie = $ri_term_end->[$i];
+
+ # define complexity: start with the actual term length
+ my $weighted_length = ( $ritem_lengths->[$i] - 2 );
+
+ ##TBD: join types here and check for variations
+ ##my $str=join "", @tokens_to_go[$ib..$ie];
+
+ my $is_quote = 0;
+ if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
+ $is_quote = 1;
+ $quote_count++;
+ }
+ elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
+ $quote_count++;
+ }
+
+ if ( $ib eq $ie ) {
+ if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
+ $complex_item_count++;
+ $weighted_length *= 2;
+ }
+ else {
+ }
+ }
+ else {
+ if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
+ $complex_item_count++;
+ $weighted_length *= 2;
+ }
+ if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
+ $weighted_length += 4;
+ }
+ }
+
+ # add weight for extra tokens.
+ $weighted_length += 2 * ( $ie - $ib );
+
+## my $BUB = join '', @tokens_to_go[$ib..$ie];
+## print "# COMPLEXITY:$weighted_length $BUB\n";
+
+##push @item_complexity, $weighted_length;
+
+ # now mark a ragged break after this item it if it is 'long and
+ # complex':
+ if ( $weighted_length >= $definitely_complex ) {
+
+ # if we broke after the previous term
+ # then break before it too
+ if ( $i_last_break == $i - 1
+ && $i > 1
+ && $i_last_last_break != $i - 2 )
+ {
+
+ ## FIXME: don't strand a small term
+ pop @i_ragged_break_list;
+ push @i_ragged_break_list, $i - 2;
+ push @i_ragged_break_list, $i - 1;
+ }
+
+ push @i_ragged_break_list, $i;
+ $i_last_last_break = $i_last_break;
+ $i_last_break = $i;
+ }
+
+ # don't break before a small last term -- it will
+ # not look good on a line by itself.
+ elsif ($i == $i_max
+ && $i_last_break == $i - 1
+ && $weighted_length <= $definitely_simple )
+ {
+ pop @i_ragged_break_list;
+ }
+ }
+
+ my $identifier_count = $i_max + 1 - $quote_count;
+
+ # Need more tuning here..
+ if ( $max_width > 12
+ && $complex_item_count > $item_count / 2
+ && $number_of_fields_best != 2 )
+ {
+ $number_of_fields_best = 1;
+ }
+
+ return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
+}
+
+sub get_maximum_fields_wanted {
+
+ # Not all tables look good with more than one field of items.
+ # This routine looks at a table and decides if it should be
+ # formatted with just one field or not.
+ # This coding is still under development.
+ my ($ritem_lengths) = @_;
+
+ my $number_of_fields_best = 0;
+
+ # For just a few items, we tentatively assume just 1 field.
+ my $item_count = @{$ritem_lengths};
+ if ( $item_count <= 5 ) {
+ $number_of_fields_best = 1;
+ }
+
+ # For larger tables, look at it both ways and see what looks best
+ else {
+
+ my $is_odd = 1;
+ my @max_length = ( 0, 0 );
+ my @last_length_2 = ( undef, undef );
+ my @first_length_2 = ( undef, undef );
+ my $last_length = undef;
+ my $total_variation_1 = 0;
+ my $total_variation_2 = 0;
+ my @total_variation_2 = ( 0, 0 );
+
+ foreach my $j ( 0 .. $item_count - 1 ) {
+
+ $is_odd = 1 - $is_odd;
+ my $length = $ritem_lengths->[$j];
+ if ( $length > $max_length[$is_odd] ) {
+ $max_length[$is_odd] = $length;
+ }
+
+ if ( defined($last_length) ) {
+ my $dl = abs( $length - $last_length );
+ $total_variation_1 += $dl;
+ }
+ $last_length = $length;
+
+ my $ll = $last_length_2[$is_odd];
+ if ( defined($ll) ) {
+ my $dl = abs( $length - $ll );
+ $total_variation_2[$is_odd] += $dl;
+ }
+ else {
+ $first_length_2[$is_odd] = $length;
+ }
+ $last_length_2[$is_odd] = $length;
+ }
+ $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
+
+ my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
+ unless ( $total_variation_2 < $factor * $total_variation_1 ) {
+ $number_of_fields_best = 1;
+ }
+ }
+ return ($number_of_fields_best);
+}
+
+sub table_columns_available {
+ my $i_first_comma = shift;
+ my $columns =
+ $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
+ leading_spaces_to_go($i_first_comma);
+
+ # Patch: the vertical formatter does not line up lines whose lengths
+ # exactly equal the available line length because of allowances
+ # that must be made for side comments. Therefore, the number of
+ # available columns is reduced by 1 character.
+ $columns -= 1;
+ return $columns;
+}
+
+sub maximum_number_of_fields {
+
+ # how many fields will fit in the available space?
+ my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
+ my $max_pairs = int( $columns / $pair_width );
+ my $number_of_fields = $max_pairs * 2;
+ if ( $odd_or_even == 1
+ && $max_pairs * $pair_width + $max_width <= $columns )
+ {
+ $number_of_fields++;
+ }
+ return $number_of_fields;
+}
+
+sub compactify_table {
+
+ # given a table with a certain number of fields and a certain number
+ # of lines, see if reducing the number of fields will make it look
+ # better.
+ my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
+ if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
+ my $min_fields;
+
+ for (
+ $min_fields = $number_of_fields ;
+ $min_fields >= $odd_or_even
+ && $min_fields * $formatted_lines >= $item_count ;
+ $min_fields -= $odd_or_even
+ )
+ {
+ $number_of_fields = $min_fields;
+ }
+ }
+ return $number_of_fields;
+}
+
+sub set_ragged_breakpoints {
+
+ # Set breakpoints in a list that cannot be formatted nicely as a
+ # table.
+ my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
+
+ my $break_count = 0;
+ foreach ( @{$ri_ragged_break_list} ) {
+ my $j = $ri_term_comma->[$_];
+ if ($j) {
+ $self->set_forced_breakpoint($j);
+ $break_count++;
+ }
+ }
+ return $break_count;
+}
+
+sub copy_old_breakpoints {
+ my ( $self, $i_first_comma, $i_last_comma ) = @_;
+ for my $i ( $i_first_comma .. $i_last_comma ) {
+ if ( $old_breakpoint_to_go[$i] ) {
+ $self->set_forced_breakpoint($i);
+ }
+ }
+ return;
+}
+
+sub set_nobreaks {
+ my ( $self, $i, $j ) = @_;
+ if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
+
+ 0 && do {
+ my ( $a, $b, $c ) = caller();
+ my $forced_breakpoint_count = get_forced_breakpoint_count();
+ print STDOUT
+"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
+ };
+
+ @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
+ }
+
+ # shouldn't happen; non-critical error
+ else {
+ 0 && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+ "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
+ };
+ }
+ return;
+}
+
+###############################################
+# CODE SECTION 12: Code for setting indentation
+###############################################
+
+sub token_sequence_length {
+
+ # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
+ # returns 0 if $ibeg > $iend (shouldn't happen)
+ my ( $ibeg, $iend ) = @_;
+ return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
+ return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
+ return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
+}
+
+sub total_line_length {
+
+ # return length of a line of tokens ($ibeg .. $iend)
+ my ( $ibeg, $iend ) = @_;
+
+ # original coding:
+ #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+
+ # this is basically sub 'leading_spaces_to_go':
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
+
+ return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
+ $summed_lengths_to_go[$ibeg];
+}
+
+sub excess_line_length {
+
+ # return number of characters by which a line of tokens ($ibeg..$iend)
+ # exceeds the allowable line length.
+
+ # NOTE: Profiling shows that this is a critical routine for efficiency.
+ # Therefore I have eliminated additional calls to subs from it.
+ my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
+
+ # Original expression for line length
+ ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+
+ # This is basically sub 'leading_spaces_to_go':
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
+
+ my $length =
+ $indentation +
+ $summed_lengths_to_go[ $iend + 1 ] -
+ $summed_lengths_to_go[$ibeg];
+
+ # Include right weld lengths unless requested not to.
+ if ( $total_weld_count
+ && !$ignore_right_weld
+ && $type_sequence_to_go[$iend] )
+ {
+ my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
+ $length += $wr if defined($wr);
+ }
+
+ # return the excess
+ return $length - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
+}
+
+sub get_spaces {
+
+ # return the number of leading spaces associated with an indentation
+ # variable $indentation is either a constant number of spaces or an object
+ # with a get_spaces method.
+ my $indentation = shift;
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
+}
+
+sub get_recoverable_spaces {
+
+ # return the number of spaces (+ means shift right, - means shift left)
+ # that we would like to shift a group of lines with the same indentation
+ # to get them to line up with their opening parens
+ my $indentation = shift;
+ return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
+}
+
+sub get_available_spaces_to_go {
+
+ my ( $self, $ii ) = @_;
+ my $item = $leading_spaces_to_go[$ii];
+
+ # return the number of available leading spaces associated with an
+ # indentation variable. $indentation is either a constant number of
+ # spaces or an object with a get_available_spaces method.
+ return ref($item) ? $item->get_available_spaces() : 0;
+}
+
+{ ## begin closure set_leading_whitespace (for -lp indentation)
+
+ # These routines are called batch-by-batch to handle the -lp indentation
+ # option. The coding is rather complex, but is only for -lp.
+
+ my $gnu_position_predictor;
+ my $gnu_sequence_number;
+ my $line_start_index_to_go;
+ my $max_gnu_item_index;
+ my $max_gnu_stack_index;
+ my %gnu_arrow_count;
+ my %gnu_comma_count;
+ my %last_gnu_equals;
+ my @gnu_item_list;
+ my @gnu_stack;
+
+ sub initialize_gnu_vars {
+
+ # initialize gnu variables for a new file;
+ # must be called once at the start of a new file.
+
+ # initialize the leading whitespace stack to negative levels
+ # so that we can never run off the end of the stack
+ $gnu_position_predictor =
+ 0; # where the current token is predicted to be
+ $max_gnu_stack_index = 0;
+ $max_gnu_item_index = -1;
+ $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
+ @gnu_item_list = ();
+ return;
+ }
+
+ sub initialize_gnu_batch_vars {
+
+ # initialize gnu variables for a new batch;
+ # must be called before each new batch
+ $gnu_sequence_number++; # increment output batch counter
+ %last_gnu_equals = ();
+ %gnu_comma_count = ();
+ %gnu_arrow_count = ();
+ $line_start_index_to_go = 0;
+ $max_gnu_item_index = UNDEFINED_INDEX;
+ return;
+ }
+
+ sub new_lp_indentation_item {
+
+ # this is an interface to the IndentationItem class
+ my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
+
+ # A negative level implies not to store the item in the item_list
+ my $index = 0;
+ if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
+
+ my $starting_index_K = 0;
+ if ( defined($line_start_index_to_go)
+ && $line_start_index_to_go >= 0
+ && $line_start_index_to_go <= $max_index_to_go )
+ {
+ $starting_index_K = $K_to_go[$line_start_index_to_go];
+ }
+
+ my $item = Perl::Tidy::IndentationItem->new(
+ spaces => $spaces,
+ level => $level,
+ ci_level => $ci_level,
+ available_spaces => $available_spaces,
+ index => $index,
+ gnu_sequence_number => $gnu_sequence_number,
+ align_paren => $align_paren,
+ stack_depth => $max_gnu_stack_index,
+ starting_index_K => $starting_index_K,
+ );
+
+ if ( $level >= 0 ) {
+ $gnu_item_list[$max_gnu_item_index] = $item;
+ }
+
+ return $item;
+ }
+
+ sub set_leading_whitespace {
+
+ # This routine defines leading whitespace for the case of -lp formatting
+ # given: the level and continuation_level of a token,
+ # define: space count of leading string which would apply if it
+ # were the first token of a new line.
+
+ my ( $self, $Kj, $K_last_nonblank, $K_last_last_nonblank,
+ $level_abs, $ci_level, $in_continued_quote )
+ = @_;
+
+ return unless ($rOpts_line_up_parentheses);
+ return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
+
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $rLL = $self->[_rLL_];
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
+
+ # find needed previous nonblank tokens
+ my $last_nonblank_token = '';
+ my $last_nonblank_type = '';
+ my $last_nonblank_block_type = '';
+
+ # and previous nonblank tokens, just in this batch:
+ my $last_nonblank_token_in_batch = '';
+ my $last_nonblank_type_in_batch = '';
+ my $last_last_nonblank_type_in_batch = '';
+
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
+ $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
+ $last_nonblank_block_type =
+ $rLL->[$K_last_nonblank]->[_BLOCK_TYPE_];
+
+ if ( $K_last_nonblank >= $K_to_go[0] ) {
+ $last_nonblank_token_in_batch = $last_nonblank_token;
+ $last_nonblank_type_in_batch = $last_nonblank_type;
+ if ( defined($K_last_last_nonblank)
+ && $K_last_last_nonblank > $K_to_go[0] )
+ {
+ $last_last_nonblank_type_in_batch =
+ $rLL->[$K_last_last_nonblank]->[_TYPE_];
+ }
+ }
+ }
+
+ ################################################################
+
+ # Adjust levels if necessary to recycle whitespace:
+ my $level = $level_abs;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $nK = @{$rLL};
+ my $nws = @{$radjusted_levels};
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $level = $radjusted_levels->[$Kj];
+ if ( $level < 0 ) { $level = 0 } # note: this should not happen
+ }
+
+ # The continued_quote flag means that this is the first token of a
+ # line, and it is the continuation of some kind of multi-line quote
+ # or pattern. It requires special treatment because it must have no
+ # added leading whitespace. So we create a special indentation item
+ # which is not in the stack.
+ if ($in_continued_quote) {
+ my $space_count = 0;
+ my $available_space = 0;
+ $level = -1; # flag to prevent storing in item_list
+ $leading_spaces_to_go[$max_index_to_go] =
+ $reduced_spaces_to_go[$max_index_to_go] =
+ new_lp_indentation_item( $space_count, $level, $ci_level,
+ $available_space, 0 );
+ return;
+ }
+
+ # get the top state from the stack
+ my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
+ my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+
+ my $type = $types_to_go[$max_index_to_go];
+ my $token = $tokens_to_go[$max_index_to_go];
+ my $total_depth = $nesting_depth_to_go[$max_index_to_go];
+
+ if ( $type eq '{' || $type eq '(' ) {
+
+ $gnu_comma_count{ $total_depth + 1 } = 0;
+ $gnu_arrow_count{ $total_depth + 1 } = 0;
+
+ # If we come to an opening token after an '=' token of some type,
+ # see if it would be helpful to 'break' after the '=' to save space
+ my $last_equals = $last_gnu_equals{$total_depth};
+ if ( $last_equals && $last_equals > $line_start_index_to_go ) {
+
+ my $seqno = $type_sequence_to_go[$max_index_to_go];
+
+ # find the position if we break at the '='
+ my $i_test = $last_equals;
+ if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+
+ # TESTING
+ ##my $too_close = ($i_test==$max_index_to_go-1);
+
+ my $test_position =
+ total_line_length( $i_test, $max_index_to_go );
+ my $mll =
+ $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
+
+ my $bbc_flag = $break_before_container_types{$token};
+
+ if (
+
+ # the equals is not just before an open paren (testing)
+ ##!$too_close &&
+
+ # if we are beyond the midpoint
+ $gnu_position_predictor >
+ $mll - $rOpts_maximum_line_length / 2
+
+ # if a -bbx flag WANTS a break before this opening token
+ || ( $seqno && $rbreak_before_container_by_seqno->{$seqno} )
+
+ # or if we MIGHT want a break (fixes case b826 b909 b989)
+ || ( $bbc_flag && $bbc_flag >= 2 )
+
+ # or we are beyond the 1/4 point and there was an old
+ # break at an assignment (not '=>') [fix for b1035]
+ || (
+ $gnu_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
+ && $types_to_go[$last_equals] ne '=>'
+ && (
+ $old_breakpoint_to_go[$last_equals]
+ || ( $last_equals > 0
+ && $old_breakpoint_to_go[ $last_equals - 1 ] )
+ || ( $last_equals > 1
+ && $types_to_go[ $last_equals - 1 ] eq 'b'
+ && $old_breakpoint_to_go[ $last_equals - 2 ] )
+ )
+ )
+ )
+ {
+
+ # then make the switch -- note that we do not set a real
+ # breakpoint here because we may not really need one; sub
+ # scan_list will do that if necessary
+ $line_start_index_to_go = $i_test + 1;
+ $gnu_position_predictor = $test_position;
+ }
+ }
+ }
+
+ my $halfway =
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2;
+
+ # Check for decreasing depth ..
+ # Note that one token may have both decreasing and then increasing
+ # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
+ # in this example we would first go back to (1,0) then up to (2,0)
+ # in a single call.
+ if ( $level < $current_level || $ci_level < $current_ci_level ) {
+
+ # loop to find the first entry at or completely below this level
+ my ( $lev, $ci_lev );
+ while (1) {
+ if ($max_gnu_stack_index) {
+
+ # save index of token which closes this level
+ $gnu_stack[$max_gnu_stack_index]
+ ->set_closed($max_index_to_go);
+
+ # Undo any extra indentation if we saw no commas
+ my $available_spaces =
+ $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
+
+ my $comma_count = 0;
+ my $arrow_count = 0;
+ if ( $type eq '}' || $type eq ')' ) {
+ $comma_count = $gnu_comma_count{$total_depth};
+ $arrow_count = $gnu_arrow_count{$total_depth};
+ $comma_count = 0 unless $comma_count;
+ $arrow_count = 0 unless $arrow_count;
+ }
+ $gnu_stack[$max_gnu_stack_index]
+ ->set_comma_count($comma_count);
+ $gnu_stack[$max_gnu_stack_index]
+ ->set_arrow_count($arrow_count);
+
+ if ( $available_spaces > 0 ) {
+
+ if ( $comma_count <= 0 || $arrow_count > 0 ) {
+
+ my $i =
+ $gnu_stack[$max_gnu_stack_index]->get_index();
+ my $seqno =
+ $gnu_stack[$max_gnu_stack_index]
+ ->get_sequence_number();
+
+ # Be sure this item was created in this batch. This
+ # should be true because we delete any available
+ # space from open items at the end of each batch.
+ if ( $gnu_sequence_number != $seqno
+ || $i > $max_gnu_item_index )
+ {
+ warning(
+"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
+ );
+ report_definite_bug();
+ }
+
+ else {
+ if ( $arrow_count == 0 ) {
+ $gnu_item_list[$i]
+ ->permanently_decrease_available_spaces(
+ $available_spaces);
+ }
+ else {
+ $gnu_item_list[$i]
+ ->tentatively_decrease_available_spaces(
+ $available_spaces);
+ }
+ foreach my $j ( $i + 1 .. $max_gnu_item_index )
+ {
+ $gnu_item_list[$j]
+ ->decrease_SPACES($available_spaces);
+ }
+ }
+ }
+ }
+
+ # go down one level
+ --$max_gnu_stack_index;
+ $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
+ $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+
+ # stop when we reach a level at or below the current level
+ if ( $lev <= $level && $ci_lev <= $ci_level ) {
+ $space_count =
+ $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ $current_level = $lev;
+ $current_ci_level = $ci_lev;
+ last;
+ }
+ }
+
+ # reached bottom of stack .. should never happen because
+ # only negative levels can get here, and $level was forced
+ # to be positive above.
+ else {
+ warning(
+"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
+ );
+ report_definite_bug();
+ last;
+ }
+ }
+ }
+
+ # handle increasing depth
+ if ( $level > $current_level || $ci_level > $current_ci_level ) {
+
+ # Compute the standard incremental whitespace. This will be
+ # the minimum incremental whitespace that will be used. This
+ # choice results in a smooth transition between the gnu-style
+ # and the standard style.
+ my $standard_increment =
+ ( $level - $current_level ) *
+ $rOpts_indent_columns +
+ ( $ci_level - $current_ci_level ) *
+ $rOpts_continuation_indentation;
+
+ # Now we have to define how much extra incremental space
+ # ("$available_space") we want. This extra space will be
+ # reduced as necessary when long lines are encountered or when
+ # it becomes clear that we do not have a good list.
+ my $available_space = 0;
+ my $align_paren = 0;
+ my $excess = 0;
+
+ my $last_nonblank_seqno;
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_seqno =
+ $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
+ }
+
+ # initialization on empty stack..
+ if ( $max_gnu_stack_index == 0 ) {
+ $space_count = $level * $rOpts_indent_columns;
+ }
+
+ # if this is a BLOCK, add the standard increment
+ elsif ($last_nonblank_block_type) {
+ $space_count += $standard_increment;
+ }
+
+ # add the standard increment for containers excluded by user rules
+ # or which contain here-docs or multiline qw text
+ elsif ( defined($last_nonblank_seqno)
+ && $ris_excluded_lp_container->{$last_nonblank_seqno} )
+ {
+ $space_count += $standard_increment;
+ }
+
+ # if last nonblank token was not structural indentation,
+ # just use standard increment
+ elsif ( $last_nonblank_type ne '{' ) {
+ $space_count += $standard_increment;
+ }
+
+ # otherwise use the space to the first non-blank level change token
+ else {
+
+ $space_count = $gnu_position_predictor;
+
+ my $min_gnu_indentation =
+ $gnu_stack[$max_gnu_stack_index]->get_spaces();
+
+ $available_space = $space_count - $min_gnu_indentation;
+ if ( $available_space >= $standard_increment ) {
+ $min_gnu_indentation += $standard_increment;
+ }
+ elsif ( $available_space > 1 ) {
+ $min_gnu_indentation += $available_space + 1;
+ }
+ elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+ if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
+ $min_gnu_indentation += 2;
+ }
+ else {
+ $min_gnu_indentation += 1;
+ }
+ }
+ else {
+ $min_gnu_indentation += $standard_increment;
+ }
+ $available_space = $space_count - $min_gnu_indentation;
+
+ if ( $available_space < 0 ) {
+ $space_count = $min_gnu_indentation;
+ $available_space = 0;
+ }
+ $align_paren = 1;
+ }
+
+ # update state, but not on a blank token
+ if ( $types_to_go[$max_index_to_go] ne 'b' ) {
+
+ $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
+
+ ++$max_gnu_stack_index;
+ $gnu_stack[$max_gnu_stack_index] =
+ new_lp_indentation_item( $space_count, $level, $ci_level,
+ $available_space, $align_paren );
+
+ # If the opening paren is beyond the half-line length, then
+ # we will use the minimum (standard) indentation. This will
+ # help avoid problems associated with running out of space
+ # near the end of a line. As a result, in deeply nested
+ # lists, there will be some indentations which are limited
+ # to this minimum standard indentation. But the most deeply
+ # nested container will still probably be able to shift its
+ # parameters to the right for proper alignment, so in most
+ # cases this will not be noticeable.
+ if ( $available_space > 0 && $space_count > $halfway ) {
+ $gnu_stack[$max_gnu_stack_index]
+ ->tentatively_decrease_available_spaces($available_space);
+ }
+ }
+ }
+
+ # Count commas and look for non-list characters. Once we see a
+ # non-list character, we give up and don't look for any more commas.
+ if ( $type eq '=>' ) {
+ $gnu_arrow_count{$total_depth}++;
+
+ # remember '=>' like '=' for estimating breaks (but see above note
+ # for b1035)
+ $last_gnu_equals{$total_depth} = $max_index_to_go;
+ }
+
+ elsif ( $type eq ',' ) {
+ $gnu_comma_count{$total_depth}++;
+ }
+
+ elsif ( $is_assignment{$type} ) {
+ $last_gnu_equals{$total_depth} = $max_index_to_go;
+ }
+
+ # this token might start a new line
+ # if this is a non-blank..
+ if ( $type ne 'b' ) {
+
+ # and if ..
+ if (
+
+ # this is the first nonblank token of the line
+ $max_index_to_go == 1 && $types_to_go[0] eq 'b'
+
+ # or previous character was one of these:
+ || $last_nonblank_type_in_batch =~ /^([\:\?\,f])$/
+
+ # or previous character was opening and this does not close it
+ || ( $last_nonblank_type_in_batch eq '{' && $type ne '}' )
+ || ( $last_nonblank_type_in_batch eq '(' and $type ne ')' )
+
+ # or this token is one of these:
+ || $type =~ /^([\.]|\|\||\&\&)$/
+
+ # or this is a closing structure
+ || ( $last_nonblank_type_in_batch eq '}'
+ && $last_nonblank_token_in_batch eq
+ $last_nonblank_type_in_batch )
+
+ # or previous token was keyword 'return'
+ || (
+ $last_nonblank_type_in_batch eq 'k'
+ && ( $last_nonblank_token_in_batch eq 'return'
+ && $type ne '{' )
+ )
+
+ # or starting a new line at certain keywords is fine
+ || ( $type eq 'k'
+ && $is_if_unless_and_or_last_next_redo_return{$token} )
+
+ # or this is after an assignment after a closing structure
+ || (
+ $is_assignment{$last_nonblank_type_in_batch}
+ && (
+ $last_last_nonblank_type_in_batch =~ /^[\}\)\]]$/
+
+ # and it is significantly to the right
+ || $gnu_position_predictor > $halfway
+ )
+ )
+ )
+ {
+ check_for_long_gnu_style_lines($max_index_to_go);
+ $line_start_index_to_go = $max_index_to_go;
+
+ # back up 1 token if we want to break before that type
+ # otherwise, we may strand tokens like '?' or ':' on a line
+ if ( $line_start_index_to_go > 0 ) {
+ if ( $last_nonblank_type_in_batch eq 'k' ) {
+
+ if ( $want_break_before{$last_nonblank_token_in_batch} )
+ {
+ $line_start_index_to_go--;
+ }
+ }
+ elsif ( $want_break_before{$last_nonblank_type_in_batch} ) {
+ $line_start_index_to_go--;
+ }
+ }
+ }
+ }
+
+ # remember the predicted position of this token on the output line
+ if ( $max_index_to_go > $line_start_index_to_go ) {
+ $gnu_position_predictor =
+ total_line_length( $line_start_index_to_go, $max_index_to_go );
+ }
+ else {
+ $gnu_position_predictor =
+ $space_count + $token_lengths_to_go[$max_index_to_go];
+ }
+
+ # store the indentation object for this token
+ # this allows us to manipulate the leading whitespace
+ # (in case we have to reduce indentation to fit a line) without
+ # having to change any token values
+ $leading_spaces_to_go[$max_index_to_go] =
+ $gnu_stack[$max_gnu_stack_index];
+ $reduced_spaces_to_go[$max_index_to_go] =
+ ( $max_gnu_stack_index > 0 && $ci_level )
+ ? $gnu_stack[ $max_gnu_stack_index - 1 ]
+ : $gnu_stack[$max_gnu_stack_index];
+ return;
+ }
+
+ sub check_for_long_gnu_style_lines {
+
+ # look at the current estimated maximum line length, and
+ # remove some whitespace if it exceeds the desired maximum
+ my ($mx_index_to_go) = @_;
+
+ # this is only for the '-lp' style
+ return unless ($rOpts_line_up_parentheses);
+
+ # nothing can be done if no stack items defined for this line
+ return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+
+ # see if we have exceeded the maximum desired line length
+ # keep 2 extra free because they are needed in some cases
+ # (result of trial-and-error testing)
+ my $spaces_needed =
+ $gnu_position_predictor -
+ $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
+
+ return if ( $spaces_needed <= 0 );
+
+ # We are over the limit, so try to remove a requested number of
+ # spaces from leading whitespace. We are only allowed to remove
+ # from whitespace items created on this batch, since others have
+ # already been used and cannot be undone.
+ my @candidates = ();
+ my $i;
+
+ # loop over all whitespace items created for the current batch
+ for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
+ my $item = $gnu_item_list[$i];
+
+ # item must still be open to be a candidate (otherwise it
+ # cannot influence the current token)
+ next if ( $item->get_closed() >= 0 );
+
+ my $available_spaces = $item->get_available_spaces();
+
+ if ( $available_spaces > 0 ) {
+ push( @candidates, [ $i, $available_spaces ] );
+ }
+ }
+
+ return unless (@candidates);
+
+ # sort by available whitespace so that we can remove whitespace
+ # from the maximum available first
+ @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
+
+ # keep removing whitespace until we are done or have no more
+ foreach my $candidate (@candidates) {
+ my ( $i, $available_spaces ) = @{$candidate};
+ my $deleted_spaces =
+ ( $available_spaces > $spaces_needed )
+ ? $spaces_needed
+ : $available_spaces;
+
+ # remove the incremental space from this item
+ $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
+
+ my $i_debug = $i;
+
+ # update the leading whitespace of this item and all items
+ # that came after it
+ for ( ; $i <= $max_gnu_item_index ; $i++ ) {
+
+ my $old_spaces = $gnu_item_list[$i]->get_spaces();
+ if ( $old_spaces >= $deleted_spaces ) {
+ $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
+ }
+
+ # shouldn't happen except for code bug:
+ else {
+ my $level = $gnu_item_list[$i_debug]->get_level();
+ my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
+ my $old_level = $gnu_item_list[$i]->get_level();
+ my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
+ warning(
+"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
+ );
+ report_definite_bug();
+ }
+ }
+ $gnu_position_predictor -= $deleted_spaces;
+ $spaces_needed -= $deleted_spaces;
+ last unless ( $spaces_needed > 0 );
+ }
+ return;
+ }
+
+ sub finish_lp_batch {
+
+ # This routine is called once after each output stream batch is
+ # finished to undo indentation for all incomplete -lp
+ # indentation levels. It is too risky to leave a level open,
+ # because then we can't backtrack in case of a long line to follow.
+ # This means that comments and blank lines will disrupt this
+ # indentation style. But the vertical aligner may be able to
+ # get the space back if there are side comments.
+
+ # this is only for the 'lp' style
+ return unless ($rOpts_line_up_parentheses);
+
+ # nothing can be done if no stack items defined for this line
+ return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+
+ # loop over all whitespace items created for the current batch
+ foreach my $i ( 0 .. $max_gnu_item_index ) {
+ my $item = $gnu_item_list[$i];
+
+ # only look for open items
+ next if ( $item->get_closed() >= 0 );
+
+ # Tentatively remove all of the available space
+ # (The vertical aligner will try to get it back later)
+ my $available_spaces = $item->get_available_spaces();
+ if ( $available_spaces > 0 ) {
+
+ # delete incremental space for this item
+ $gnu_item_list[$i]
+ ->tentatively_decrease_available_spaces($available_spaces);
+
+ # Reduce the total indentation space of any nodes that follow
+ # Note that any such nodes must necessarily be dependents
+ # of this node.
+ foreach ( $i + 1 .. $max_gnu_item_index ) {
+ $gnu_item_list[$_]->decrease_SPACES($available_spaces);
+ }
+ }
+ }
+ return;
+ }
+} ## end closure set_leading_whitespace
+
+sub reduce_lp_indentation {
+
+ # reduce the leading whitespace at token $i if possible by $spaces_needed
+ # (a large value of $spaces_needed will remove all excess space)
+ # NOTE: to be called from scan_list only for a sequence of tokens
+ # contained between opening and closing parens/braces/brackets
+
+ my ( $self, $i, $spaces_wanted ) = @_;
+ my $deleted_spaces = 0;
+
+ my $item = $leading_spaces_to_go[$i];
+ my $available_spaces = $item->get_available_spaces();
+
+ if (
+ $available_spaces > 0
+ && ( ( $spaces_wanted <= $available_spaces )
+ || !$item->get_have_child() )
+ )
+ {
+
+ # we'll remove these spaces, but mark them as recoverable
+ $deleted_spaces =
+ $item->tentatively_decrease_available_spaces($spaces_wanted);
+ }
+
+ return $deleted_spaces;
+}
+
+###########################################################
+# CODE SECTION 13: Preparing batches for vertical alignment
+###########################################################
+
+sub send_lines_to_vertical_aligner {
+
+ my ($self) = @_;
+
+ # This routine receives a batch of code for which the final line breaks
+ # have been defined. Here we prepare the lines for passing to the vertical
+ # aligner. We do the following tasks:
+ # - mark certain vertical alignment tokens, such as '=', in each line
+ # - make minor indentation adjustments
+ # - do logical padding: insert extra blank spaces to help display certain
+ # logical constructions
+
+ my $this_batch = $self->[_this_batch_];
+ my $rlines_K = $this_batch->[_rlines_K_];
+ if ( !@{$rlines_K} ) {
+
+ # This can't happen because sub grind_batch_of_CODE always receives
+ # tokens which it turns into one or more lines. If we get here it means
+ # that a programming error has caused those lines to be lost.
+ Fault("Unexpected call with no lines");
+ return;
+ }
+ my $n_last_line = @{$rlines_K} - 1;
+
+ my $do_not_pad = $this_batch->[_do_not_pad_];
+ my $peak_batch_size = $this_batch->[_peak_batch_size_];
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+ my $ibeg0 = $this_batch->[_ibeg0_];
+ my $rK_to_go = $this_batch->[_rK_to_go_];
+ my $batch_count = $this_batch->[_batch_count_];
+ my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+
+ my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
+ my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
+ my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
+ my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+
+ # Construct indexes to the global_to_go arrays so that called routines can
+ # still access those arrays. This might eventually be removed
+ # when all called routines have been converted to access token values
+ # in the rLL array instead.
+ my $Kbeg0 = $Kbeg_next;
+ my ( $ri_first, $ri_last );
+ foreach my $rline ( @{$rlines_K} ) {
+ my ( $Kbeg, $Kend ) = @{$rline};
+ my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
+ my $iend = $ibeg0 + $Kend - $Kbeg0;
+ push @{$ri_first}, $ibeg;
+ push @{$ri_last}, $iend;
+ }
+
+ my ( $cscw_block_comment, $closing_side_comment );
+ if ( $rOpts->{'closing-side-comments'} ) {
+ ( $closing_side_comment, $cscw_block_comment ) =
+ $self->add_closing_side_comment();
+ }
+
+ my $rindentation_list = [0]; # ref to indentations for each line
+
+ # define the array @{$ralignment_type_to_go} for the output tokens
+ # which will be non-blank for each special token (such as =>)
+ # for which alignment is required.
+ my $ralignment_type_to_go =
+ $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+
+ # flush before a long if statement to avoid unwanted alignment
+ if ( $n_last_line > 0
+ && $type_beg_next eq 'k'
+ && $token_beg_next =~ /^(if|unless)$/ )
+ {
+ $self->flush_vertical_aligner();
+ }
+
+ $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci );
+
+ $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
+ $starting_in_quote )
+ if ( $rOpts->{'logical-padding'} );
+
+ # Resum lengths. We need accurate lengths for making alignment patterns,
+ # and we may have unmasked a semicolon which was not included at the start.
+ for ( 0 .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] =
+ $summed_lengths_to_go[$_] + $token_lengths_to_go[$_];
+ }
+
+ # loop to prepare each line for shipment
+ my ( $Kbeg, $type_beg, $token_beg );
+ my ( $Kend, $type_end );
+ for my $n ( 0 .. $n_last_line ) {
+
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ my $rline = $rlines_K->[$n];
+ my $forced_breakpoint = $rline->[2];
+
+ # we may need to look at variables on three consecutive lines ...
+
+ # Some vars on line [n-1], if any:
+ my $Kbeg_last = $Kbeg;
+ my $type_beg_last = $type_beg;
+ my $token_beg_last = $token_beg;
+ my $Kend_last = $Kend;
+ my $type_end_last = $type_end;
+
+ # Some vars on line [n]:
+ $Kbeg = $Kbeg_next;
+ $type_beg = $type_beg_next;
+ $token_beg = $token_beg_next;
+ $Kend = $Kend_next;
+ $type_end = $type_end_next;
+
+ # Only forward ending K values of non-comments down the pipeline.
+ # This is equivalent to checking that the last CODE_type is blank or
+ # equal to 'VER'. See also sub resync_lines_and_tokens for related
+ # coding. Note that '$batch_CODE_type' is the code type of the line
+ # to which the ending token belongs.
+ my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
+ my $Kend_code =
+ $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
+
+ # We use two slightly different definitions of level jump at the end
+ # of line:
+ # $ljump is the level jump needed by 'sub set_adjusted_indentation'
+ # $level_jump is the level jump needed by the vertical aligner.
+ my $ljump = 0; # level jump at end of line
+
+ # Get some vars on line [n+1], if any:
+ if ( $n < $n_last_line ) {
+ ( $Kbeg_next, $Kend_next ) =
+ @{ $rlines_K->[ $n + 1 ] };
+ $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
+ $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
+ $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
+ }
+ else {
+
+ # Patch for git #51, a bare closing qw paren was not outdented
+ # if the flag '-nodelete-old-newlines is set
+ my $Kbeg_next = $self->K_next_code($Kend);
+ if ( defined($Kbeg_next) ) {
+ $ljump =
+ $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
+ }
+ }
+
+ # level jump at end of line for the vertical aligner:
+ my $level_jump =
+ $Kend >= $Klimit
+ ? 0
+ : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
+
+ $self->delete_needless_alignments( $ibeg, $iend,
+ $ralignment_type_to_go );
+
+ my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
+ $self->make_alignment_patterns( $ibeg, $iend,
+ $ralignment_type_to_go );
+
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
+ = $self->set_adjusted_indentation( $ibeg, $iend, $rfields,
+ $rpatterns, $ri_first, $ri_last,
+ $rindentation_list, $ljump, $starting_in_quote,
+ $is_static_block_comment, );
+
+ # we will allow outdenting of long lines..
+ my $outdent_long_lines = (
+
+ # which are long quotes, if allowed
+ ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+
+ # which are long block comments, if allowed
+ || (
+ $type_beg eq '#'
+ && $rOpts->{'outdent-long-comments'}
+
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ );
+
+ my $break_alignment_before = $is_outdented_line || $do_not_pad;
+ my $break_alignment_after = $is_outdented_line;
+
+ # flush at an 'if' which follows a line with (1) terminal semicolon
+ # or (2) terminal block_type which is not an 'if'. This prevents
+ # unwanted alignment between the lines.
+ if ( $type_beg eq 'k' && $token_beg eq 'if' ) {
+ my $Km = $self->K_previous_code($Kbeg);
+ my $type_m = 'b';
+ my $block_type_m = 'b';
+ if ( defined($Km) ) {
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ $block_type_m = $rLL->[$Km]->[_BLOCK_TYPE_];
+ }
+
+ # break after anything that is not if-like
+ $break_alignment_before ||= $type_m eq ';'
+ || ( $type_m eq '}'
+ && $block_type_m ne 'if'
+ && $block_type_m ne 'unless'
+ && $block_type_m ne 'elsif'
+ && $block_type_m ne 'else' );
+ }
+
+ my $rvertical_tightness_flags =
+ $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last, $ending_in_quote, $closing_side_comment );
+
+ # Set a flag at the final ':' of a ternary chain to request
+ # vertical alignment of the final term. Here is a
+ # slightly complex example:
+ #
+ # $self->{_text} = (
+ # !$section ? ''
+ # : $type eq 'item' ? "the $section entry"
+ # : "the section on $section"
+ # )
+ # . (
+ # $page
+ # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+ # : ' elsewhere in this document'
+ # );
+ #
+ my $is_terminal_ternary = 0;
+
+ if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
+ my $last_leading_type = $n > 0 ? $type_beg_last : ':';
+ if ( $terminal_type ne ';'
+ && $n_last_line > $n
+ && $level_end == $lev )
+ {
+ $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
+ $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
+ }
+ if (
+ $last_leading_type eq ':'
+ && ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $terminal_type ne ':' && $level_end < $lev ) )
+ )
+ {
+
+ # the terminal term must not contain any ternary terms, as in
+ # my $ECHO = (
+ # $Is_MSWin32 ? ".\\echo$$"
+ # : $Is_MacOS ? ":echo$$"
+ # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
+ # );
+ $is_terminal_ternary = 1;
+
+ my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
+ while ( defined($KP) && $KP <= $Kend ) {
+ my $type_KP = $rLL->[$KP]->[_TYPE_];
+ if ( $type_KP eq '?' || $type_KP eq ':' ) {
+ $is_terminal_ternary = 0;
+ last;
+ }
+ $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
+ }
+ }
+ }
+
+ my $level_adj = $lev;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $level_adj = $radjusted_levels->[$Kbeg];
+ if ( $level_adj < 0 ) { $level_adj = 0 }
+ }
+
+ # add any new closing side comment to the last line
+ if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
+ $rfields->[-1] .= " $closing_side_comment";
+
+ # NOTE: Patch for csc. We can just use 1 for the length of the csc
+ # because its length should not be a limiting factor from here on.
+ $rfield_lengths->[-1] += 2;
+ }
+
+ # Programming check: (shouldn't happen)
+ # The number of tokens which separate the fields must always be
+ # one less than the number of fields. If this is not true then
+ # an error has been introduced in sub make_alignment_patterns.
+ if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
+ my $nt = @{$rtokens};
+ my $nf = @{$rfields};
+ my $msg = <<EOM;
+Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
+The number of tokens = $nt should be one less than number of fields: $nf
+EOM
+ Fault($msg);
+ }
+
+ # Set flag which tells if this line is contained in a multi-line list
+ my $list_seqno = $self->is_list_by_K($Kbeg);
+
+ # send this new line down the pipe
+ my $rvalign_hash = {};
+ $rvalign_hash->{level} = $lev;
+ $rvalign_hash->{level_end} = $level_end;
+ $rvalign_hash->{level_adj} = $level_adj;
+ $rvalign_hash->{indentation} = $indentation;
+ $rvalign_hash->{list_seqno} = $list_seqno;
+ $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
+ $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
+ $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
+ $rvalign_hash->{level_jump} = $level_jump;
+ $rvalign_hash->{rfields} = $rfields;
+ $rvalign_hash->{rpatterns} = $rpatterns;
+ $rvalign_hash->{rtokens} = $rtokens;
+ $rvalign_hash->{rfield_lengths} = $rfield_lengths;
+ $rvalign_hash->{terminal_block_type} = $terminal_block_type;
+ $rvalign_hash->{batch_count} = $batch_count;
+ $rvalign_hash->{break_alignment_before} = $break_alignment_before;
+ $rvalign_hash->{break_alignment_after} = $break_alignment_after;
+ $rvalign_hash->{Kend} = $Kend_code;
+ $rvalign_hash->{ci_level} = $ci_levels_to_go[$ibeg];
+
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->valign_input($rvalign_hash);
+
+ $do_not_pad = 0;
+
+ # Set flag indicating if this line ends in an opening
+ # token and is very short, so that a blank line is not
+ # needed if the subsequent line is a comment.
+ # Examples of what we are looking for:
+ # {
+ # && (
+ # BEGIN {
+ # default {
+ # sub {
+ $self->[_last_output_short_opening_token_]
+
+ # line ends in opening token
+ # /^[\{\(\[L]$/
+ = $is_opening_type{$type_end}
+
+ # and either
+ && (
+ # line has either single opening token
+ $Kend == $Kbeg
+
+ # or is a single token followed by opening token.
+ # Note that sub identifiers have blanks like 'sub doit'
+ # $token_beg !~ /\s+/
+ || ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
+ )
+
+ # and limit total to 10 character widths
+ && token_sequence_length( $ibeg, $iend ) <= 10;
+
+ } # end of loop to output each line
+
+ # remember indentation of lines containing opening containers for
+ # later use by sub set_adjusted_indentation
+ $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+
+ # output any new -cscw block comment
+ if ($cscw_block_comment) {
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
+ }
+ return;
+}
+
+{ ## begin closure set_vertical_alignment_markers
+ my %is_vertical_alignment_type;
+ my %is_not_vertical_alignment_token;
+ my %is_vertical_alignment_keyword;
+ my %is_terminal_alignment_type;
+ my %is_low_level_alignment_token;
+
+ BEGIN {
+
+ my @q;
+
+ # Replaced =~ and // in the list. // had been removed in RT 119588
+ @q = qw#
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ { ? : => && || ~~ !~~ =~ !~ // <=> ->
+ #;
+ @is_vertical_alignment_type{@q} = (1) x scalar(@q);
+
+ # These 'tokens' are not aligned. We need this to remove [
+ # from the above list because it has type ='{'
+ @q = qw([);
+ @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
+
+ # these are the only types aligned at a line end
+ @q = qw(&& || =>);
+ @is_terminal_alignment_type{@q} = (1) x scalar(@q);
+
+ # these tokens only align at line level
+ @q = ( '{', '(' );
+ @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+
+ # eq and ne were removed from this list to improve alignment chances
+ @q = qw(if unless and or err for foreach while until);
+ @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
+ }
+
+ sub set_vertical_alignment_markers {
+
+ # This routine takes the first step toward vertical alignment of the
+ # lines of output text. It looks for certain tokens which can serve as
+ # vertical alignment markers (such as an '=').
+ #
+ # Method: We look at each token $i in this output batch and set
+ # $ralignment_type_to_go->[$i] equal to those tokens at which we would
+ # accept vertical alignment.
+
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+
+ my $ralignment_type_to_go;
+
+ # Initialize the alignment array. Note that closing side comments can
+ # insert up to 2 additional tokens beyond the original
+ # $max_index_to_go, so we need to check ri_last for the last index.
+ my $max_line = @{$ri_first} - 1;
+ my $iend = $ri_last->[$max_line];
+ if ( $iend < $max_index_to_go ) { $iend = $max_index_to_go }
+
+ # nothing to do if we aren't allowed to change whitespace
+ # or there is only 1 token
+ if ( $iend == 0 || !$rOpts_add_whitespace ) {
+ for my $i ( 0 .. $iend ) {
+ $ralignment_type_to_go->[$i] = '';
+ }
+ return $ralignment_type_to_go;
+ }
+
+ # remember the index of last nonblank token before any sidecomment
+ my $i_terminal = $max_index_to_go;
+ if ( $types_to_go[$i_terminal] eq '#' ) {
+ if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
+ if ( $i_terminal > 0 ) { --$i_terminal }
+ }
+ }
+
+ # look at each line of this batch..
+ my $last_vertical_alignment_before_index;
+ my $vert_last_nonblank_type;
+ my $vert_last_nonblank_token;
+ my $vert_last_nonblank_block_type;
+
+ foreach my $line ( 0 .. $max_line ) {
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
+ $last_vertical_alignment_before_index = -1;
+ $vert_last_nonblank_type = '';
+ $vert_last_nonblank_token = '';
+ $vert_last_nonblank_block_type = '';
+
+ # look at each token in this output line..
+ my $level_beg = $levels_to_go[$ibeg];
+ foreach my $i ( $ibeg .. $iend ) {
+ my $alignment_type = '';
+ my $type = $types_to_go[$i];
+ my $block_type = $block_type_to_go[$i];
+ my $token = $tokens_to_go[$i];
+
+ # do not align tokens at lower level then start of line
+ # except for side comments
+ if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
+ && $type ne '#' )
+ {
+ $ralignment_type_to_go->[$i] = '';
+ next;
+ }
+
+ #--------------------------------------------------------
+ # First see if we want to align BEFORE this token
+ #--------------------------------------------------------
+
+ # The first possible token that we can align before
+ # is index 2 because: 1) it doesn't normally make sense to
+ # align before the first token and 2) the second
+ # token must be a blank if we are to align before
+ # the third
+ if ( $i < $ibeg + 2 ) { }
+
+ # must follow a blank token
+ elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
+
+ # align a side comment --
+ elsif ( $type eq '#' ) {
+
+ my $KK = $K_to_go[$i];
+ my $sc_type = $rspecial_side_comment_type->{$KK};
+
+ unless (
+
+ # it is any specially marked side comment
+ $sc_type
+
+ # or it is a static side comment
+ || ( $rOpts->{'static-side-comments'}
+ && $token =~ /$static_side_comment_pattern/ )
+
+ # or a closing side comment
+ || ( $vert_last_nonblank_block_type
+ && $token =~
+ /$closing_side_comment_prefix_pattern/ )
+ )
+ {
+ $alignment_type = $type;
+ } ## Example of a static side comment
+ }
+
+ # otherwise, do not align two in a row to create a
+ # blank field
+ elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
+
+ # align before one of these keywords
+ # (within a line, since $i>1)
+ elsif ( $type eq 'k' ) {
+
+ # /^(if|unless|and|or|eq|ne)$/
+ if ( $is_vertical_alignment_keyword{$token} ) {
+ $alignment_type = $token;
+ }
+ }
+
+ # align before one of these types..
+ # Note: add '.' after new vertical aligner is operational
+ elsif ( $is_vertical_alignment_type{$type}
+ && !$is_not_vertical_alignment_token{$token} )
+ {
+ $alignment_type = $token;
+
+ # Do not align a terminal token. Although it might
+ # occasionally look ok to do this, this has been found to be
+ # a good general rule. The main problems are:
+ # (1) that the terminal token (such as an = or :) might get
+ # moved far to the right where it is hard to see because
+ # nothing follows it, and
+ # (2) doing so may prevent other good alignments.
+ # Current exceptions are && and || and =>
+ if ( $i == $iend || $i >= $i_terminal ) {
+ $alignment_type = ""
+ unless ( $is_terminal_alignment_type{$type} );
+ }
+
+ # Do not align leading ': (' or '. ('. This would prevent
+ # alignment in something like the following:
+ # $extra_space .=
+ # ( $input_line_number < 10 ) ? " "
+ # : ( $input_line_number < 100 ) ? " "
+ # : "";
+ # or
+ # $code =
+ # ( $case_matters ? $accessor : " lc($accessor) " )
+ # . ( $yesno ? " eq " : " ne " )
+
+ # Also, do not align a ( following a leading ? so we can
+ # align something like this:
+ # $converter{$_}->{ushortok} =
+ # $PDL::IO::Pic::biggrays
+ # ? ( m/GIF/ ? 0 : 1 )
+ # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
+ if (
+ $i == $ibeg + 2
+ && $types_to_go[ $i - 1 ] eq 'b'
+ && ( $types_to_go[$ibeg] eq '.'
+ || $types_to_go[$ibeg] eq ':'
+ || $types_to_go[$ibeg] eq '?' )
+ )
+ {
+ $alignment_type = "";
+ }
+
+ # Certain tokens only align at the same level as the
+ # initial line level
+ if ( $is_low_level_alignment_token{$token}
+ && $levels_to_go[$i] != $level_beg )
+ {
+ $alignment_type = "";
+ }
+
+ # For a paren after keyword, only align something like this:
+ # if ( $a ) { &a }
+ # elsif ( $b ) { &b }
+ if ( $token eq '(' ) {
+
+ if ( $vert_last_nonblank_type eq 'k' ) {
+ $alignment_type = ""
+ unless $vert_last_nonblank_token =~
+ /^(if|unless|elsif)$/;
+ }
+
+ # Do not align a spaced-function-paren if requested.
+ # Issue git #53. Note that $i-1 is a blank token if we
+ # get here.
+ if ( !$rOpts_function_paren_vertical_alignment
+ && $i > $ibeg + 1 )
+ {
+ my $type_m = $types_to_go[ $i - 2 ];
+ my $token_m = $tokens_to_go[ $i - 2 ];
+
+ # this is the same test as 'space-function-paren'
+ if ( $type_m =~ /^[wUG]$/
+ || $type_m eq '->'
+ || $type_m =~ /^[wi]$/
+ && $token_m =~ /^(\&|->)/ )
+ {
+ $alignment_type = "";
+ }
+ }
+ }
+
+ # be sure the alignment tokens are unique
+ # This didn't work well: reason not determined
+ # if ($token ne $type) {$alignment_type .= $type}
+ }
+
+ # NOTE: This is deactivated because it causes the previous
+ # if/elsif alignment to fail
+ #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+ #{ $alignment_type = $type; }
+
+ if ($alignment_type) {
+ $last_vertical_alignment_before_index = $i;
+ }
+
+ #--------------------------------------------------------
+ # Next see if we want to align AFTER the previous nonblank
+ #--------------------------------------------------------
+
+ # We want to line up ',' and interior ';' tokens, with the added
+ # space AFTER these tokens. (Note: interior ';' is included
+ # because it may occur in short blocks).
+ if (
+
+ # we haven't already set it
+ !$alignment_type
+
+ # and its not the first token of the line
+ && ( $i > $ibeg )
+
+ # and it follows a blank
+ && $types_to_go[ $i - 1 ] eq 'b'
+
+ # and previous token IS one of these:
+ && ( $vert_last_nonblank_type eq ','
+ || $vert_last_nonblank_type eq ';' )
+
+ # and it's NOT one of these
+ && ( $type ne 'b'
+ && $type ne '#'
+ && !$is_closing_token{$type} )
+
+ # then go ahead and align
+ )
+
+ {
+ $alignment_type = $vert_last_nonblank_type;
+ }
+
+ #--------------------------------------------------------
+ # Undo alignment in special cases
+ #--------------------------------------------------------
+ if ($alignment_type) {
+
+ # do not align the opening brace of an anonymous sub
+ if ( $token eq '{' && $block_type =~ /$ASUB_PATTERN/ ) {
+ $alignment_type = "";
+ }
+ }
+
+ #--------------------------------------------------------
+ # then store the value
+ #--------------------------------------------------------
+ $ralignment_type_to_go->[$i] = $alignment_type;
+ if ( $type ne 'b' ) {
+ $vert_last_nonblank_type = $type;
+ $vert_last_nonblank_token = $token;
+ $vert_last_nonblank_block_type = $block_type;
+ }
+ }
+ }
+ return $ralignment_type_to_go;
+ }
+} ## end closure set_vertical_alignment_markers
+
+sub get_seqno {
+
+ # get opening and closing sequence numbers of a token for the vertical
+ # aligner. Assign qw quotes a value to allow qw opening and closing tokens
+ # to be treated somewhat like opening and closing tokens for stacking
+ # tokens by the vertical aligner.
+ my ( $self, $ii, $ending_in_quote ) = @_;
+
+ my $rLL = $self->[_rLL_];
+ my $this_batch = $self->[_this_batch_];
+ my $rK_to_go = $this_batch->[_rK_to_go_];
+
+ my $KK = $rK_to_go->[$ii];
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+
+ if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
+ my $SEQ_QW = -1;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $ii > 0 ) {
+ $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
+ }
+ else {
+ if ( !$ending_in_quote ) {
+ $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
+ }
+ }
+ }
+ return ($seqno);
+}
+
+{
+ my %undo_extended_ci;
+
+ sub initialize_undo_ci {
+ %undo_extended_ci = ();
+ return;
+ }
+
+ sub undo_ci {
+
+ # Undo continuation indentation in certain sequences
+ my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
+ my ( $line_1, $line_2, $lev_last );
+ my $this_line_is_semicolon_terminated;
+ my $max_line = @{$ri_first} - 1;
+
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+
+ # Prepare a list of controlling indexes for each line if required.
+ # This is used for efficient processing below. Note: this is
+ # critical for speed. In the initial implementation I just looped
+ # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
+ # found that this routine was causing a huge run time in large lists.
+ # On a very large list test case, this new coding dropped the run time
+ # of this routine from 30 seconds to 169 milliseconds.
+ my @i_controlling_ci;
+ if ( @{$rix_seqno_controlling_ci} ) {
+ my @tmp = reverse @{$rix_seqno_controlling_ci};
+ my $ix_next = pop @tmp;
+ foreach my $line ( 0 .. $max_line ) {
+ my $iend = $ri_last->[$line];
+ while ( defined($ix_next) && $ix_next <= $iend ) {
+ push @{ $i_controlling_ci[$line] }, $ix_next;
+ $ix_next = pop @tmp;
+ }
+ }
+ }
+
+ # Loop over all lines of the batch ...
+
+ # Workaround for problem c007, in which the combination -lp -xci
+ # can produce a "Program bug" message in unusual circumstances.
+ my $skip_SECTION_1 = $rOpts_line_up_parentheses
+ && $rOpts->{'extended-continuation-indentation'};
+
+ foreach my $line ( 0 .. $max_line ) {
+
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
+ my $lev = $levels_to_go[$ibeg];
+
+ ####################################
+ # SECTION 1: Undo needless common CI
+ ####################################
+
+ # We are looking at leading tokens and looking for a sequence all
+ # at the same level and all at a higher level than enclosing lines.
+
+ # For example, we can undo continuation indentation in sort/map/grep
+ # chains
+
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+
+ # to become
+
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+
+ if ( $line > 0 && !$skip_SECTION_1 ) {
+
+ # if we have started a chain..
+ if ($line_1) {
+
+ # see if it continues..
+ if ( $lev == $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+
+ # chain continues...
+ # check for chain ending at end of a statement
+ if ( $line == $max_line ) {
+
+ # see of this line ends a statement
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' );
+ }
+ $line_2 = $line
+ if ($this_line_is_semicolon_terminated);
+ }
+ else {
+
+ # kill chain
+ $line_1 = undef;
+ }
+ }
+ elsif ( $lev < $lev_last ) {
+
+ # chain ends with previous line
+ $line_2 = $line - 1;
+ }
+ elsif ( $lev > $lev_last ) {
+
+ # kill chain
+ $line_1 = undef;
+ }
+
+ # undo the continuation indentation if a chain ends
+ if ( defined($line_2) && defined($line_1) ) {
+ my $continuation_line_count = $line_2 - $line_1 + 1;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
+ = (0) x ($continuation_line_count)
+ if ( $continuation_line_count >= 0 );
+ @leading_spaces_to_go[ @{$ri_first}
+ [ $line_1 .. $line_2 ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}
+ [ $line_1 .. $line_2 ] ];
+ $line_1 = undef;
+ }
+ }
+
+ # not in a chain yet..
+ else {
+
+ # look for start of a new sort/map/grep chain
+ if ( $lev > $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+ $line_1 = $line;
+ }
+ }
+ }
+ }
+
+ ######################################
+ # SECTION 2: Undo ci at cuddled blocks
+ ######################################
+
+ # Note that sub set_adjusted_indentation will be called later to
+ # actually do this, but for now we will tentatively mark cuddled
+ # lines with ci=0 so that the the -xci loop which follows will be
+ # correct at cuddles.
+ if (
+ $types_to_go[$ibeg] eq '}'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
+ )
+ {
+ my $terminal_type = $types_to_go[$iend];
+ if ( $terminal_type eq '#' && $iend > $ibeg ) {
+ $terminal_type = $types_to_go[ $iend - 1 ];
+ if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
+ $terminal_type = $types_to_go[ $iend - 2 ];
+ }
+ }
+ if ( $terminal_type eq '{' ) {
+ my $Kbeg = $K_to_go[$ibeg];
+ $ci_levels_to_go[$ibeg] = 0;
+ }
+ }
+
+ #########################################################
+ # SECTION 3: Undo ci set by sub extended_ci if not needed
+ #########################################################
+
+ # Undo the ci of the leading token if its controlling token
+ # went out on a previous line without ci
+ if ( $ci_levels_to_go[$ibeg] ) {
+ my $Kbeg = $K_to_go[$ibeg];
+ my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
+ if ( $seqno && $undo_extended_ci{$seqno} ) {
+
+ # but do not undo ci set by the -lp flag
+ if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
+ $ci_levels_to_go[$ibeg] = 0;
+ $leading_spaces_to_go[$ibeg] =
+ $reduced_spaces_to_go[$ibeg];
+ }
+ }
+ }
+
+ # Flag any controlling opening tokens in lines without ci. This
+ # will be used later in the above if statement to undo the ci which
+ # they added. The array i_controlling_ci[$line] was prepared at
+ # the top of this routine.
+ if ( !$ci_levels_to_go[$ibeg]
+ && defined( $i_controlling_ci[$line] ) )
+ {
+ foreach my $i ( @{ $i_controlling_ci[$line] } ) {
+ my $seqno = $type_sequence_to_go[$i];
+ $undo_extended_ci{$seqno} = 1;
+ }
+ }
+
+ $lev_last = $lev;
+ }
+
+ return;
+ }
+}
+
+{ ## begin closure set_logical_padding
+ my %is_math_op;
+
+ BEGIN {
+
+ my @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
+ }
+
+ sub set_logical_padding {
+
+ # Look at a batch of lines and see if extra padding can improve the
+ # alignment when there are certain leading operators. Here is an
+ # example, in which some extra space is introduced before
+ # '( $year' to make it line up with the subsequent lines:
+ #
+ # if ( ( $Year < 1601 )
+ # || ( $Year > 2899 )
+ # || ( $EndYear < 1601 )
+ # || ( $EndYear > 2899 ) )
+ # {
+ # &Error_OutOfRange;
+ # }
+ #
+ my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
+ = @_;
+ my $max_line = @{$ri_first} - 1;
+
+ my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
+
+ # Patch to produce padding in the first line of short code blocks.
+ # This is part of an update to fix cases b562 .. b983.
+ # This is needed to compensate for a change which was made in 'sub
+ # starting_one_line_block' to prevent blinkers. Previously, that sub
+ # would not look at the total block size and rely on sub
+ # set_continuation_breaks to break up long blocks. Consequently, the
+ # first line of those batches would end in the opening block brace of a
+ # sort/map/grep/eval block. When this was changed to immediately check
+ # for blocks which were too long, the opening block brace would go out
+ # in a single batch, and the block contents would go out as the next
+ # batch. This caused the logic in this routine which decides if the
+ # first line should be padded to be incorrect. To fix this, we set a
+ # flag if the previous batch ended in an opening sort/map/grep/eval
+ # block brace, and use it to adjust the logic to compensate.
+
+ # For example, the following would have previously been a single batch
+ # but now is two batches. We want to pad the line starting in '$dir':
+ # my (@indices) = # batch n-1 (prev batch n)
+ # sort { # batch n-1 (prev batch n)
+ # $dir eq 'left' # batch n
+ # ? $cells[$a] <=> $cells[$b] # batch n
+ # : $cells[$b] <=> $cells[$a]; # batch n
+ # } ( 0 .. $#cells ); # batch n
+
+ my $rLL = $self->[_rLL_];
+ my $K0 = $K_to_go[0];
+ my $Kprev = $self->K_previous_code($K0);
+ my $is_short_block;
+ if ( defined($Kprev)
+ && $rLL->[$Kprev]->[_BLOCK_TYPE_] )
+ {
+ my $block_type = $rLL->[$Kprev]->[_BLOCK_TYPE_];
+ $is_short_block = $is_sort_map_grep_eval{$block_type};
+ $is_short_block ||= $want_one_line_block{$block_type};
+ }
+
+ # looking at each line of this batch..
+ foreach my $line ( 0 .. $max_line - 1 ) {
+
+ # see if the next line begins with a logical operator
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
+ $ibeg_next = $ri_first->[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
+
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
+
+ next unless ($has_leading_op_next);
+
+ # next line must not be at lesser depth
+ next
+ if ( $nesting_depth_to_go[$ibeg] >
+ $nesting_depth_to_go[$ibeg_next] );
+
+ # identify the token in this line to be padded on the left
+ $ipad = undef;
+
+ # handle lines at same depth...
+ if ( $nesting_depth_to_go[$ibeg] ==
+ $nesting_depth_to_go[$ibeg_next] )
+ {
+
+ # if this is not first line of the batch ...
+ if ( $line > 0 ) {
+
+ # and we have leading operator..
+ next if $has_leading_op;
+
+ # Introduce padding if..
+ # 1. the previous line is at lesser depth, or
+ # 2. the previous line ends in an assignment
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
+ # Example 1: previous line at lesser depth
+ # if ( ( $Year < 1601 ) # <- we are here but
+ # || ( $Year > 2899 ) # list has not yet
+ # || ( $EndYear < 1601 ) # collapsed vertically
+ # || ( $EndYear > 2899 ) )
+ # {
+ #
+ # Example 2: previous line ending in assignment:
+ # $leapyear =
+ # $year % 4 ? 0 # <- We are here
+ # : $year % 100 ? 1
+ # : $year % 400 ? 0
+ # : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
+
+ # be sure levels agree (do not indent after an indented 'if')
+ next
+ if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
+
+ next
+ unless (
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
+ || ( $nesting_depth_to_go[$ibegm] <
+ $nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
+ );
+
+ # we will add padding before the first token
+ $ipad = $ibeg;
+ }
+
+ # for first line of the batch..
+ else {
+
+ # WARNING: Never indent if first line is starting in a
+ # continued quote, which would change the quote.
+ next if $starting_in_quote;
+
+ # if this is text after closing '}'
+ # then look for an interior token to pad
+ if ( $types_to_go[$ibeg] eq '}' ) {
+
+ }
+
+ # otherwise, we might pad if it looks really good
+ elsif ($is_short_block) {
+ $ipad = $ibeg;
+ }
+ else {
+
+ # we might pad token $ibeg, so be sure that it
+ # is at the same depth as the next line.
+ next
+ if ( $nesting_depth_to_go[$ibeg] !=
+ $nesting_depth_to_go[$ibeg_next] );
+
+ # We can pad on line 1 of a statement if at least 3
+ # lines will be aligned. Otherwise, it
+ # can look very confusing.
+
+ # We have to be careful not to pad if there are too few
+ # lines. The current rule is:
+ # (1) in general we require at least 3 consecutive lines
+ # with the same leading chain operator token,
+ # (2) but an exception is that we only require two lines
+ # with leading colons if there are no more lines. For example,
+ # the first $i in the following snippet would get padding
+ # by the second rule:
+ #
+ # $i == 1 ? ( "First", "Color" )
+ # : $i == 2 ? ( "Then", "Rarity" )
+ # : ( "Then", "Name" );
+
+ if ( $max_line > 1 ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
+
+ # never indent line 1 of a '.' series because
+ # previous line is most likely at same level.
+ # TODO: we should also look at the leading_spaces
+ # of the last output line and skip if it is same
+ # as this line.
+ next if ( $leading_token eq '.' );
+
+ my $count = 1;
+ foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
+ my $ibeg_next_next = $ri_first->[ $line + $l ];
+ if ( $tokens_to_go[$ibeg_next_next] ne
+ $leading_token )
+ {
+ $tokens_differ = 1;
+ last;
+ }
+ $count++;
+ }
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
+ $ipad = $ibeg;
+ }
+ else {
+ next;
+ }
+ }
+ }
+ }
+
+ # find interior token to pad if necessary
+ if ( !defined($ipad) ) {
+
+ for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
+
+ # find any unclosed container
+ next
+ unless ( $type_sequence_to_go[$i]
+ && $mate_index_to_go[$i] > $iend );
+
+ # find next nonblank token to pad
+ $ipad = $inext_to_go[$i];
+ last if ( $ipad > $iend );
+ }
+ last unless $ipad;
+ }
+
+ # We cannot pad the first leading token of a file because
+ # it could cause a bug in which the starting indentation
+ # level is guessed incorrectly each time the code is run
+ # though perltidy, thus causing the code to march off to
+ # the right. For example, the following snippet would have
+ # this problem:
+
+## ov_method mycan( $package, '(""' ), $package
+## or ov_method mycan( $package, '(0+' ), $package
+## or ov_method mycan( $package, '(bool' ), $package
+## or ov_method mycan( $package, '(nomethod' ), $package;
+
+ # If this snippet is within a block this won't happen
+ # unless the user just processes the snippet alone within
+ # an editor. In that case either the user will see and
+ # fix the problem or it will be corrected next time the
+ # entire file is processed with perltidy.
+ next if ( $ipad == 0 && $peak_batch_size <= 1 );
+
+## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
+## IT DID MORE HARM THAN GOOD
+## ceil(
+## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
+## / $upem
+## ),
+##? # do not put leading padding for just 2 lines of math
+##? if ( $ipad == $ibeg
+##? && $line > 0
+##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
+##? && $is_math_op{$type_next}
+##? && $line + 2 <= $max_line )
+##? {
+##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
+##? my $type_next_next = $types_to_go[$ibeg_next_next];
+##? next if !$is_math_op{$type_next_next};
+##? }
+
+ # next line must not be at greater depth
+ my $iend_next = $ri_last->[ $line + 1 ];
+ next
+ if ( $nesting_depth_to_go[ $iend_next + 1 ] >
+ $nesting_depth_to_go[$ipad] );
+
+ # lines must be somewhat similar to be padded..
+ my $inext_next = $inext_to_go[$ibeg_next];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
+
+ # see if there are multiple continuation lines
+ my $logical_continuation_lines = 1;
+ if ( $line + 2 <= $max_line ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
+ && $nesting_depth_to_go[$ibeg_next] eq
+ $nesting_depth_to_go[$ibeg_next_next] )
+ {
+ $logical_continuation_lines++;
+ }
+ }
+
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
+
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
+ }
+ if (
+
+ # either we have multiple continuation lines to follow
+ # and we are not padding the first token
+ (
+ $logical_continuation_lines > 1
+ && ( $ipad > 0 || $is_short_block )
+ )
+
+ # or..
+ || (
+
+ # types must match
+ $types_match
+
+ # and keywords must match if keyword
+ && !(
+ $type eq 'k'
+ && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
+ )
+ )
+ )
+ {
+
+ #----------------------begin special checks--------------
+ #
+ # SPECIAL CHECK 1:
+ # A check is needed before we can make the pad.
+ # If we are in a list with some long items, we want each
+ # item to stand out. So in the following example, the
+ # first line beginning with '$casefold->' would look good
+ # padded to align with the next line, but then it
+ # would be indented more than the last line, so we
+ # won't do it.
+ #
+ # ok(
+ # $casefold->{code} eq '0041'
+ # && $casefold->{status} eq 'C'
+ # && $casefold->{mapping} eq '0061',
+ # 'casefold 0x41'
+ # );
+ #
+ # Note:
+ # It would be faster, and almost as good, to use a comma
+ # count, and not pad if comma_count > 1 and the previous
+ # line did not end with a comma.
+ #
+ my $ok_to_pad = 1;
+
+ my $ibg = $ri_first->[ $line + 1 ];
+ my $depth = $nesting_depth_to_go[ $ibg + 1 ];
+
+ # just use simplified formula for leading spaces to avoid
+ # needless sub calls
+ my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
+
+ # look at each line beyond the next ..
+ my $l = $line + 1;
+ foreach my $ltest ( $line + 2 .. $max_line ) {
+ $l = $ltest;
+ my $ibg = $ri_first->[$l];
+
+ # quit looking at the end of this container
+ last
+ if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
+ || ( $nesting_depth_to_go[$ibg] < $depth );
+
+ # cannot do the pad if a later line would be
+ # outdented more
+ if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
+ $ok_to_pad = 0;
+ last;
+ }
+ }
+
+ # don't pad if we end in a broken list
+ if ( $l == $max_line ) {
+ my $i2 = $ri_last->[$l];
+ if ( $types_to_go[$i2] eq '#' ) {
+ my $i1 = $ri_first->[$l];
+ next if terminal_type_i( $i1, $i2 ) eq ',';
+ }
+ }
+
+ # SPECIAL CHECK 2:
+ # a minus may introduce a quoted variable, and we will
+ # add the pad only if this line begins with a bare word,
+ # such as for the word 'Button' here:
+ # [
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ #
+ # On the other hand, if 'Button' is quoted, it looks best
+ # not to pad:
+ # [
+ # 'Button' => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ if ( $types_to_go[$ibeg_next] eq 'm' ) {
+ $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
+ }
+
+ next unless $ok_to_pad;
+
+ #----------------------end special check---------------
+
+ my $length_1 = total_line_length( $ibeg, $ipad - 1 );
+ my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
+ $pad_spaces = $length_2 - $length_1;
+
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
+
+ # make sure this won't change if -lp is used
+ my $indentation_1 = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation_1) ) {
+ if ( $indentation_1->get_recoverable_spaces() == 0 ) {
+ my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
+ unless ( $indentation_2->get_recoverable_spaces() == 0 )
+ {
+ $pad_spaces = 0;
+ }
+ }
+ }
+
+ # we might be able to handle a pad of -1 by removing a blank
+ # token
+ if ( $pad_spaces < 0 ) {
+
+ # Deactivated for -kpit due to conflict. This block deletes
+ # a space in an attempt to improve alignment in some cases,
+ # but it may conflict with user spacing requests. For now
+ # it is just deactivated if the -kpit option is used.
+ if ( $pad_spaces == -1 ) {
+ if ( $ipad > $ibeg
+ && $types_to_go[ $ipad - 1 ] eq 'b'
+ && !%keyword_paren_inner_tightness )
+ {
+ $self->pad_token( $ipad - 1, $pad_spaces );
+ }
+ }
+ $pad_spaces = 0;
+ }
+
+ # now apply any padding for alignment
+ if ( $ipad >= 0 && $pad_spaces ) {
+
+ my $length_t = total_line_length( $ibeg, $iend );
+ if ( $pad_spaces + $length_t <=
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
+ {
+ $self->pad_token( $ipad, $pad_spaces );
+ }
+ }
+ }
+ }
+ continue {
+ $iendm = $iend;
+ $ibegm = $ibeg;
+ $has_leading_op = $has_leading_op_next;
+ } # end of loop over lines
+ return;
+ }
+} ## end closure set_logical_padding
+
+sub pad_token {
+
+ # insert $pad_spaces before token number $ipad
+ my ( $self, $ipad, $pad_spaces ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $KK = $K_to_go[$ipad];
+ my $tok = $rLL->[$KK]->[_TOKEN_];
+ my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
+
+ if ( $pad_spaces > 0 ) {
+ $tok = ' ' x $pad_spaces . $tok;
+ $tok_len += $pad_spaces;
+ }
+ elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
+ $tok = "";
+ $tok_len = 0;
+ }
+ else {
+
+ # shouldn't happen
+ return;
+ }
+
+ $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
+ $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+
+ $token_lengths_to_go[$ipad] += $pad_spaces;
+ $tokens_to_go[$ipad] = $tok;
+
+ foreach my $i ( $ipad .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
+ }
+ return;
+}
+
+{ ## begin closure make_alignment_patterns
+
+ my %block_type_map;
+ my %keyword_map;
+ my %operator_map;
+ my %is_w_n_C;
+
+ BEGIN {
+
+ # map related block names into a common name to
+ # allow alignment
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
+
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
+
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
+
+ # map certain operators to the same class for pattern matching
+ %operator_map = (
+ '!~' => '=~',
+ '+=' => '+=',
+ '-=' => '+=',
+ '*=' => '+=',
+ '/=' => '+=',
+ );
+
+ %is_w_n_C = (
+ 'w' => 1,
+ 'n' => 1,
+ 'C' => 1,
+ );
+ }
+
+ sub delete_needless_alignments {
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+
+ # Remove unwanted alignments. This routine is a place to remove
+ # alignments which might cause problems at later stages. There are
+ # currently two types of fixes:
+
+ # 1. Remove excess parens
+ # 2. Remove alignments within 'elsif' conditions
+
+ # Patch #1: Excess alignment of parens can prevent other good
+ # alignments. For example, note the parens in the first two rows of
+ # the following snippet. They would normally get marked for alignment
+ # and aligned as follows:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # This causes unnecessary paren alignment and prevents the third equals
+ # from aligning. If we remove the unwanted alignments we get:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # A rule for doing this which works well is to remove alignment of
+ # parens whose containers do not contain other aligning tokens, with
+ # the exception that we always keep alignment of the first opening
+ # paren on a line (for things like 'if' and 'elsif' statements).
+
+ # Setup needed constants
+ my $i_good_paren = -1;
+ my $imin_match = $iend + 1;
+ my $i_elsif_close = $ibeg - 1;
+ my $i_elsif_open = $iend + 1;
+ if ( $iend > $ibeg ) {
+ if ( $types_to_go[$ibeg] eq 'k' ) {
+
+ # Paren patch: mark a location of a paren we should keep, such
+ # as one following something like a leading 'if', 'elsif',..
+ $i_good_paren = $ibeg + 1;
+ if ( $types_to_go[$i_good_paren] eq 'b' ) {
+ $i_good_paren++;
+ }
+
+ # 'elsif' patch: remember the range of the parens of an elsif,
+ # and do not make alignments within them because this can cause
+ # loss of padding and overall brace alignment in the vertical
+ # aligner.
+ if ( $tokens_to_go[$ibeg] eq 'elsif'
+ && $i_good_paren < $iend
+ && $tokens_to_go[$i_good_paren] eq '(' )
+ {
+ $i_elsif_open = $i_good_paren;
+ $i_elsif_close = $mate_index_to_go[$i_good_paren];
+ }
+ }
+ }
+
+ # Loop to make the fixes on this line
+ my @imatch_list;
+ for my $i ( $ibeg .. $iend ) {
+
+ if ( $ralignment_type_to_go->[$i] ) {
+
+ # Patch #2: undo alignment within elsif parens
+ if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
+ $ralignment_type_to_go->[$i] = '';
+ next;
+ }
+ push @imatch_list, $i;
+
+ }
+ if ( $tokens_to_go[$i] eq ')' ) {
+
+ # Patch #1: undo the corresponding opening paren if:
+ # - it is at the top of the stack
+ # - and not the first overall opening paren
+ # - does not follow a leading keyword on this line
+ my $imate = $mate_index_to_go[$i];
+ if ( @imatch_list
+ && $imatch_list[-1] eq $imate
+ && ( $ibeg > 1 || @imatch_list > 1 )
+ && $imate > $i_good_paren )
+ {
+ $ralignment_type_to_go->[$imate] = '';
+ pop @imatch_list;
+ }
+ }
+ }
+ return;
+ }
+
+ sub make_alignment_patterns {
+
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create three arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The three arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+ my @tokens = ();
+ my @fields = ();
+ my @patterns = ();
+ my @field_lengths = ();
+ my $i_start = $ibeg;
+
+ # For a 'use' statement, use the module name as container name.
+ # Fixes issue rt136416.
+ my $cname = "";
+ if ( $types_to_go[$ibeg] eq 'k' && $tokens_to_go[$ibeg] eq 'use' ) {
+ my $inext = $inext_to_go[$ibeg];
+ if ( $inext <= $iend ) { $cname = $tokens_to_go[$inext] }
+ }
+
+ my $depth = 0;
+ my %container_name = ( 0 => "$cname" );
+
+ my $j = 0; # field index
+
+ $patterns[0] = "";
+ my %token_count;
+ for my $i ( $ibeg .. $iend ) {
+
+ # Keep track of containers balanced on this line only.
+ # These are used below to prevent unwanted cross-line alignments.
+ # Unbalanced containers already avoid aligning across
+ # container boundaries.
+
+ my $type = $types_to_go[$i];
+ my $token = $tokens_to_go[$i];
+ my $depth_last = $depth;
+ if ( $type_sequence_to_go[$i] ) {
+ if ( $is_opening_type{$token} ) {
+
+ # if container is balanced on this line...
+ my $i_mate = $mate_index_to_go[$i];
+ if ( $i_mate > $i && $i_mate <= $iend ) {
+ $depth++;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
+
+ # Containers beginning with { and [ are given those names
+ # for uniqueness. That way commas in different containers
+ # will not match. Here is an example of what this prevents:
+ # a => [ 1, 2, 3 ],
+ # b => { b1 => 4, b2 => 5 },
+ # Here is another example of what we avoid by labeling the
+ # commas properly:
+
+ # is_d( [ $a, $a ], [ $b, $c ] );
+ # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
+ # is_d( [ \$a, \$a ], [ \$b, \$c ] );
+
+ my $name = $token;
+ if ( $token eq '(' ) {
+ $name = $self->make_paren_name($i);
+ }
+ $container_name{$depth} = "+" . $name;
+
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment because
+ # it usually looks bad to align commas within containers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas
+ # if opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we will append
+ # the length of the line from the previous matching
+ # token, or beginning of line, to the function name.
+ # This will allow the vertical aligner to reject
+ # undesirable matches.
+
+ # if we are not aligning on this paren...
+ if ( !$ralignment_type_to_go->[$i] ) {
+
+ # Sum length from previous alignment
+ my $len = token_sequence_length( $i_start, $i - 1 );
+
+ # Minor patch: do not include the length of any '!'.
+ # Otherwise, commas in the following line will not
+ # match
+ # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
+ # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
+ if ( grep { $_ eq '!' }
+ @types_to_go[ $i_start .. $i - 1 ] )
+ {
+ $len -= 1;
+ }
+
+ if ( $i_start == $ibeg ) {
+
+ # For first token, use distance from start of line
+ # but subtract off the indentation due to level.
+ # Otherwise, results could vary with indentation.
+ $len +=
+ leading_spaces_to_go($ibeg) -
+ $levels_to_go[$i_start] *
+ $rOpts_indent_columns;
+ if ( $len < 0 ) { $len = 0 }
+ }
+
+ # tack this length onto the container name to try
+ # to make a unique token name
+ $container_name{$depth} .= "-" . $len;
+ }
+ }
+ }
+ elsif ( $is_closing_type{$token} ) {
+ $depth-- if $depth > 0;
+ }
+ }
+
+ # if we find a new synchronization token, we are done with
+ # a field
+ if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
+
+ my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
+
+ # map similar items
+ my $tok_map = $operator_map{$tok};
+ $tok = $tok_map if ($tok_map);
+
+ # make separators in different nesting depths unique
+ # by appending the nesting depth digit.
+ if ( $raw_tok ne '#' ) {
+ $tok .= "$nesting_depth_to_go[$i]";
+ }
+
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
+
+ # If we are at an opening token which increased depth, we have
+ # to use the name from the previous depth.
+ my $depth_p =
+ ( $depth_last < $depth ? $depth_last : $depth );
+ if ( $container_name{$depth_p} ) {
+ $tok .= $container_name{$depth_p};
+ }
+ }
+
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ if ( $ci_levels_to_go[$ibeg]
+ && $container_name{$depth} =~ /^\+(if|unless)/ )
+ {
+ $tok .= $container_name{$depth};
+ }
+ }
+
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
+
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
+
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
+
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+
+ $tok .= $block_type;
+ }
+
+ # Mark multiple copies of certain tokens with the copy number
+ # This will allow the aligner to decide if they are matched.
+ # For now, only do this for equals. For example, the two
+ # equals on the next line will be labeled '=0' and '=0.2'.
+ # Later, the '=0.2' will be ignored in alignment because it
+ # has no match.
+
+ # $| = $debug = 1 if $opt_d;
+ # $full_index = 1 if $opt_i;
+
+ if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
+ $token_count{$tok}++;
+ if ( $token_count{$tok} > 1 ) {
+ $tok .= '.' . $token_count{$tok};
+ }
+ }
+
+ # concatenate the text of the consecutive tokens to form
+ # the field
+ push( @fields,
+ join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
+
+ push @field_lengths,
+ $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
+
+ # store the alignment token for this field
+ push( @tokens, $tok );
+
+ # get ready for the next batch
+ $i_start = $i;
+ $j++;
+ $patterns[$j] = "";
+ }
+
+ # continue accumulating tokens
+
+ # for keywords we have to use the actual text
+ if ( $type eq 'k' ) {
+
+ my $tok_fix = $tokens_to_go[$i];
+
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok_fix = $keyword_map{$tok_fix}
+ if ( defined( $keyword_map{$tok_fix} ) );
+ $patterns[$j] .= $tok_fix;
+ }
+
+ elsif ( $type eq 'b' ) {
+ $patterns[$j] .= $type;
+ }
+
+ # handle non-keywords..
+ else {
+
+ my $type_fix = $type;
+
+ # Mark most things before arrows as a quote to
+ # get them to line up. Testfile: mixed.pl.
+ # $type =~ /^[wnC]$/
+ if ( $i < $iend - 1 && $is_w_n_C{$type} ) {
+ my $next_type = $types_to_go[ $i + 1 ];
+ my $i_next_nonblank =
+ ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+
+ if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
+ $type_fix = 'Q';
+
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
+ }
+ }
+
+ # Convert a bareword within braces into a quote for matching.
+ # This will allow alignment of expressions like this:
+ # local ( $SIG{'INT'} ) = IGNORE;
+ # local ( $SIG{ALRM} ) = 'POSTMAN';
+ if ( $type eq 'w'
+ && $i > $ibeg
+ && $i < $iend
+ && $types_to_go[ $i - 1 ] eq 'L'
+ && $types_to_go[ $i + 1 ] eq 'R' )
+ {
+ $type_fix = 'Q';
+ }
+
+ # patch to make numbers and quotes align
+ if ( $type eq 'n' ) { $type_fix = 'Q' }
+
+ # patch to ignore any ! in patterns
+ if ( $type eq '!' ) { $type_fix = '' }
+
+ $patterns[$j] .= $type_fix;
+
+ # remove any zero-level name at first fat comma
+ if ( $depth == 0 && $type eq '=>' ) {
+ $container_name{$depth} = "";
+ }
+
+ }
+ }
+
+ # done with this line .. join text of tokens to make the last field
+ push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ push @field_lengths,
+ $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
+
+ return ( \@tokens, \@fields, \@patterns, \@field_lengths );
+ }
+
+} ## end closure make_alignment_patterns
+
+sub make_paren_name {
+ my ( $self, $i ) = @_;
+
+ # The token at index $i is a '('.
+ # Create an alignment name for it to avoid incorrect alignments.
+
+ # Start with the name of the previous nonblank token...
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
+
+ # Prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
+ }
+
+ # Finally, remove any leading arrows
+ if ( substr( $name, 0, 2 ) eq '->' ) {
+ $name = substr( $name, 2 );
+ }
+ return $name;
+}
+
+{ ## begin closure set_adjusted_indentation
+
+ my ( $last_indentation_written, $last_unadjusted_indentation,
+ $last_leading_token );
+
+ sub initialize_adjusted_indentation {
+ $last_indentation_written = 0;
+ $last_unadjusted_indentation = 0;
+ $last_leading_token = "";
+ return;
+ }
+
+ sub set_adjusted_indentation {
+
+ # This routine has the final say regarding the actual indentation of
+ # a line. It starts with the basic indentation which has been
+ # defined for the leading token, and then takes into account any
+ # options that the user has set regarding special indenting and
+ # outdenting.
+
+ # This routine has to resolve a number of complex interacting issues,
+ # including:
+ # 1. The various -cti=n type flags, which contain the desired change in
+ # indentation for lines ending in commas and semicolons, should be
+ # followed,
+ # 2. qw quotes require special processing and do not fit perfectly
+ # with normal containers,
+ # 3. formatting with -wn can complicate things, especially with qw
+ # quotes,
+ # 4. formatting with the -lp option is complicated, and does not
+ # work well with qw quotes and with -wn formatting.
+ # 5. a number of special situations, such as 'cuddled' formatting.
+ # 6. This routine is mainly concerned with outdenting closing tokens
+ # but note that there is some overlap with the functions of sub
+ # undo_ci, which was processed earlier, so care has to be taken to
+ # keep them coordinated.
+
+ my (
+ $self, $ibeg,
+ $iend, $rfields,
+ $rpatterns, $ri_first,
+ $ri_last, $rindentation_list,
+ $level_jump, $starting_in_quote,
+ $is_static_block_comment,
+ ) = @_;
+
+ my $rLL = $self->[_rLL_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+ my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+
+ # we need to know the last token of this line
+ my ( $terminal_type, $i_terminal ) = terminal_type_i( $ibeg, $iend );
+
+ my $terminal_block_type = $block_type_to_go[$i_terminal];
+ my $is_outdented_line = 0;
+
+ my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
+
+ my $type_beg = $types_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $K_beg = $K_to_go[$ibeg];
+ my $ibeg_weld_fix = $ibeg;
+ my $seqno_beg = $type_sequence_to_go[$ibeg];
+ my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
+
+ # QW INDENTATION PATCH 3:
+ my $seqno_qw_closing;
+ if ( $type_beg eq 'q' && $ibeg == 0 ) {
+ my $KK = $K_to_go[$ibeg];
+ $seqno_qw_closing =
+ $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
+ }
+
+ my $is_semicolon_terminated = $terminal_type eq ';'
+ && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
+ || $seqno_qw_closing );
+
+ # NOTE: A future improvement would be to make it semicolon terminated
+ # even if it does not have a semicolon but is followed by a closing
+ # block brace. This would undo ci even for something like the
+ # following, in which the final paren does not have a semicolon because
+ # it is a possible weld location:
+
+ # if ($BOLD_MATH) {
+ # (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # )
+ # }
+ #
+
+ # MOJO: Set a flag if this lines begins with ')->'
+ my $leading_paren_arrow = (
+ $types_to_go[$ibeg] eq '}'
+ && $tokens_to_go[$ibeg] eq ')'
+ && (
+ ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
+ || ( $ibeg < $i_terminal - 1
+ && $types_to_go[ $ibeg + 1 ] eq 'b'
+ && $types_to_go[ $ibeg + 2 ] eq '->' )
+ )
+ );
+
+ ##########################################################
+ # Section 1: set a flag and a default indentation
+ #
+ # Most lines are indented according to the initial token.
+ # But it is common to outdent to the level just after the
+ # terminal token in certain cases...
+ # adjust_indentation flag:
+ # 0 - do not adjust
+ # 1 - outdent
+ # 2 - vertically align with opening token
+ # 3 - indent
+ ##########################################################
+ my $adjust_indentation = 0;
+ my $default_adjust_indentation = $adjust_indentation;
+
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
+
+ # Honor any flag to reduce -ci set by the -bbxi=n option
+ if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
+
+ # if this is an opening, it must be alone on the line ...
+ if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
+ $adjust_indentation = 1;
+ }
+
+ # ... or a single welded unit (fix for b1173)
+ elsif ($total_weld_count) {
+ my $Kterm = $K_to_go[$i_terminal];
+ my $Kterm_test = $rK_weld_left->{$Kterm};
+ if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
+ $Kterm = $Kterm_test;
+ }
+ if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
+ }
+ }
+
+ # Update the $is_bli flag as we go. It is initially 1.
+ # We note seeing a leading opening brace by setting it to 2.
+ # If we get to the closing brace without seeing the opening then we
+ # turn it off. This occurs if the opening brace did not get output
+ # at the start of a line, so we will then indent the closing brace
+ # in the default way.
+ if ( $is_bli_beg && $is_bli_beg == 1 ) {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_opening = $K_opening_container->{$seqno_beg};
+ if ( $K_beg eq $K_opening ) {
+ $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
+ }
+ else { $is_bli_beg = 0 }
+ }
+
+ # QW PATCH for the combination -lp -wn
+ # For -lp formatting use $ibeg_weld_fix to get around the problem
+ # that with -lp type formatting the opening and closing tokens to not
+ # have sequence numbers.
+ if ( $seqno_qw_closing && $total_weld_count ) {
+ my $K_next_nonblank = $self->K_next_code($K_beg);
+ if ( defined($K_next_nonblank)
+ && defined( $rK_weld_left->{$K_next_nonblank} ) )
+ {
+ my $itest = $ibeg + ( $K_next_nonblank - $K_beg );
+ if ( $itest <= $max_index_to_go ) {
+ $ibeg_weld_fix = $itest;
+ }
+ }
+ }
+
+ # if we are at a closing token of some type..
+ if ( $is_closing_type{$type_beg} || $seqno_qw_closing ) {
+
+ # get the indentation of the line containing the corresponding
+ # opening token
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
+ $ri_last, $rindentation_list, $seqno_qw_closing );
+
+ # First set the default behavior:
+ if (
+
+ # default behavior is to outdent closing lines
+ # of the form: "); }; ]; )->xxx;"
+ $is_semicolon_terminated
+
+ # and 'cuddled parens' of the form: ")->pack("
+ # Bug fix for RT #123749]: the types here were
+ # incorrectly '(' and ')'. Corrected to be '{' and '}'
+ || (
+ $terminal_type eq '{'
+ && $type_beg eq '}'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
+ )
+
+ # remove continuation indentation for any line like
+ # } ... {
+ # or without ending '{' and unbalanced, such as
+ # such as '}->{$operator}'
+ || (
+ $type_beg eq '}'
+
+ && ( $types_to_go[$iend] eq '{'
+ || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
+ )
+
+ # and when the next line is at a lower indentation level...
+
+ # PATCH #1: and only if the style allows undoing continuation
+ # for all closing token types. We should really wait until
+ # the indentation of the next line is known and then make
+ # a decision, but that would require another pass.
+
+ # PATCH #2: and not if this token is under -xci control
+ || ( $level_jump < 0
+ && !$some_closing_token_indentation
+ && !$rseqno_controlling_my_ci->{$K_beg} )
+
+ # Patch for -wn=2, multiple welded closing tokens
+ || ( $i_terminal > $ibeg
+ && $is_closing_type{ $types_to_go[$iend] } )
+
+ # Alternate Patch for git #51, isolated closing qw token not
+ # outdented if no-delete-old-newlines is set. This works, but
+ # a more general patch elsewhere fixes the real problem: ljump.
+ # || ( $seqno_qw_closing && $ibeg == $i_terminal )
+
+ )
+ {
+ $adjust_indentation = 1;
+ }
+
+ # outdent something like '),'
+ if (
+ $terminal_type eq ','
+
+ # Removed this constraint for -wn
+ # OLD: allow just one character before the comma
+ # && $i_terminal == $ibeg + 1
+
+ # require LIST environment; otherwise, we may outdent too much -
+ # this can happen in calls without parentheses (overload.t);
+ && $terminal_is_in_list
+ )
+ {
+ $adjust_indentation = 1;
+ }
+
+ # undo continuation indentation of a terminal closing token if
+ # it is the last token before a level decrease. This will allow
+ # a closing token to line up with its opening counterpart, and
+ # avoids an indentation jump larger than 1 level.
+ if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
+ && $i_terminal == $ibeg
+ && defined($K_beg) )
+ {
+ my $K_next_nonblank = $self->K_next_code($K_beg);
+
+ if ( !$is_bli_beg && defined($K_next_nonblank) ) {
+ my $lev = $rLL->[$K_beg]->[_LEVEL_];
+ my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
+
+ # and do not undo ci if it was set by the -xci option
+ $adjust_indentation = 1
+ if ( $level_next < $lev
+ && !$rseqno_controlling_my_ci->{$K_beg} );
+ }
+
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
+ && $terminal_is_in_list
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first,
+ $ri_last, $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
+ }
+
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indention of the line with
+ # the opening brace.
+ if ( $block_type_to_go[$ibeg] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
+
+ # patch for issue git #40: -bli setting has priority
+ $adjust_indentation = 0 if ($is_bli_beg);
+
+ $default_adjust_indentation = $adjust_indentation;
+
+ # Now modify default behavior according to user request:
+ # handle option to indent non-blocks of the form ); }; ];
+ # But don't do special indentation to something like ')->pack('
+ if ( !$block_type_to_go[$ibeg] ) {
+
+ # Note that logical padding has already been applied, so we may
+ # need to remove some spaces to get a valid hash key.
+ my $tok = $tokens_to_go[$ibeg];
+ my $cti = $closing_token_indentation{$tok};
+
+ # Fix the value of 'cti' for an isloated non-welded closing qw
+ # delimiter.
+ if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
+
+ # A quote delimiter which is not a container will not have
+ # a cti value defined. In this case use the style of a
+ # paren. For example
+ # my @fars = (
+ # qw<
+ # far
+ # farfar
+ # farfars-far
+ # >,
+ # );
+ if ( !defined($cti) && length($tok) == 1 ) {
+
+ # something other than ')', '}', ']' ; use flag for ')'
+ $cti = $closing_token_indentation{')'};
+
+ # But for now, do not outdent non-container qw
+ # delimiters because it would would change existing
+ # formatting.
+ if ( $tok ne '>' ) { $cti = 3 }
+ }
+
+ # A non-welded closing qw cannot currently use -cti=1
+ # because that option requires a sequence number to find
+ # the opening indentation, and qw quote delimiters are not
+ # sequenced items.
+ if ( defined($cti) && $cti == 1 ) { $cti = 0 }
+ }
+
+ if ( !defined($cti) ) {
+
+ # $cti may not be defined for several reasons.
+ # -padding may have been applied so the character
+ # has a length > 1
+ # - we may have welded to a closing quote token.
+ # Here is an example (perltidy -wn):
+ # __PACKAGE__->load_components( qw(
+ # > Core
+ # >
+ # > ) );
+ $adjust_indentation = 0;
+
+ }
+ elsif ( $cti == 1 ) {
+ if ( $i_terminal <= $ibeg + 1
+ || $is_semicolon_terminated )
+ {
+ $adjust_indentation = 2;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 2 ) {
+ if ($is_semicolon_terminated) {
+ $adjust_indentation = 3;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
+ }
+ elsif ( $cti == 3 ) {
+ $adjust_indentation = 3;
+ }
+ }
+
+ # handle option to indent blocks
+ else {
+ if (
+ $rOpts->{'indent-closing-brace'}
+ && (
+ $i_terminal == $ibeg # isolated terminal '}'
+ || $is_semicolon_terminated
+ )
+ ) # } xxxx ;
+ {
+ $adjust_indentation = 3;
+ }
+ }
+ }
+
+ # if at ');', '};', '>;', and '];' of a terminal qw quote
+ elsif ($rpatterns->[0] =~ /^qb*;$/
+ && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
+ {
+ if ( $closing_token_indentation{$1} == 0 ) {
+ $adjust_indentation = 1;
+ }
+ else {
+ $adjust_indentation = 3;
+ }
+ }
+
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $types_to_go[$ibeg] eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
+
+ ##########################################################
+ # Section 2: set indentation according to flag set above
+ #
+ # Select the indentation object to define leading
+ # whitespace. If we are outdenting something like '} } );'
+ # then we want to use one level below the last token
+ # ($i_terminal) in order to get it to fully outdent through
+ # all levels.
+ ##########################################################
+ my $indentation;
+ my $lev;
+ my $level_end = $levels_to_go[$iend];
+
+ if ( $adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_to_go[$ibeg];
+ $lev = $levels_to_go[$ibeg];
+ }
+ elsif ( $adjust_indentation == 1 ) {
+
+ # Change the indentation to be that of a different token on the line
+ # Previously, the indentation of the terminal token was used:
+ # OLD CODING:
+ # $indentation = $reduced_spaces_to_go[$i_terminal];
+ # $lev = $levels_to_go[$i_terminal];
+
+ # Generalization for MOJO:
+ # Use the lowest level indentation of the tokens on the line.
+ # For example, here we can use the indentation of the ending ';':
+ # } until ($selection > 0 and $selection < 10); # ok to use ';'
+ # But this will not outdent if we use the terminal indentation:
+ # )->then( sub { # use indentation of the ->, not the {
+ # Warning: reduced_spaces_to_go[] may be a reference, do not
+ # do numerical checks with it
+
+ my $i_ind = $ibeg;
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ while ( $i_ind < $i_terminal ) {
+ $i_ind++;
+ if ( $levels_to_go[$i_ind] < $lev ) {
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ }
+ }
+ }
+
+ # handle indented closing token which aligns with opening token
+ elsif ( $adjust_indentation == 2 ) {
+
+ # handle option to align closing token with opening token
+ $lev = $levels_to_go[$ibeg];
+
+ # calculate spaces needed to align with opening token
+ my $space_count =
+ get_spaces($opening_indentation) + $opening_offset;
+
+ # Indent less than the previous line.
+ #
+ # Problem: For -lp we don't exactly know what it was if there
+ # were recoverable spaces sent to the aligner. A good solution
+ # would be to force a flush of the vertical alignment buffer, so
+ # that we would know. For now, this rule is used for -lp:
+ #
+ # When the last line did not start with a closing token we will
+ # be optimistic that the aligner will recover everything wanted.
+ #
+ # This rule will prevent us from breaking a hierarchy of closing
+ # tokens, and in a worst case will leave a closing paren too far
+ # indented, but this is better than frequently leaving it not
+ # indented enough.
+ my $last_spaces = get_spaces($last_indentation_written);
+ if ( !$is_closing_token{$last_leading_token} ) {
+ $last_spaces +=
+ get_recoverable_spaces($last_indentation_written);
+ }
+
+ # reset the indentation to the new space count if it works
+ # only options are all or none: nothing in-between looks good
+ $lev = $levels_to_go[$ibeg];
+ if ( $space_count < $last_spaces ) {
+ if ($rOpts_line_up_parentheses) {
+ my $lev = $levels_to_go[$ibeg];
+ $indentation =
+ new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ }
+ else {
+ $indentation = $space_count;
+ }
+ }
+
+ # revert to default if it doesn't work
+ else {
+ $space_count = leading_spaces_to_go($ibeg);
+ if ( $default_adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_to_go[$ibeg];
+ }
+ elsif ( $default_adjust_indentation == 1 ) {
+ $indentation = $reduced_spaces_to_go[$i_terminal];
+ $lev = $levels_to_go[$i_terminal];
+ }
+ }
+ }
+
+ # Full indentaion of closing tokens (-icb and -icp or -cti=2)
+ else {
+
+ # handle -icb (indented closing code block braces)
+ # Updated method for indented block braces: indent one full level if
+ # there is no continuation indentation. This will occur for major
+ # structures such as sub, if, else, but not for things like map
+ # blocks.
+ #
+ # Note: only code blocks without continuation indentation are
+ # handled here (if, else, unless, ..). In the following snippet,
+ # the terminal brace of the sort block will have continuation
+ # indentation as shown so it will not be handled by the coding
+ # here. We would have to undo the continuation indentation to do
+ # this, but it probably looks ok as is. This is a possible future
+ # update for semicolon terminated lines.
+ #
+ # if ($sortby eq 'date' or $sortby eq 'size') {
+ # @files = sort {
+ # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
+ # or $a cmp $b
+ # } @files;
+ # }
+ #
+ if ( $block_type_to_go[$ibeg]
+ && $ci_levels_to_go[$i_terminal] == 0 )
+ {
+ my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
+ $indentation = $spaces + $rOpts_indent_columns;
- # important: only combine a very simple or
- # statement because the step below may have
- # combined a trailing 'and' with this or,
- # and we do not want to then combine
- # everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
- )
- )
- );
+ # NOTE: for -lp we could create a new indentation object, but
+ # there is probably no need to do it
+ }
- #X: RT #81854
- $forced_breakpoint_to_go[$iend_1] = 0
- unless $old_breakpoint_to_go[$iend_1];
- }
+ # handle -icp and any -icb block braces which fall through above
+ # test such as the 'sort' block mentioned above.
+ else {
- # handle leading 'and'
- elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
+ # There are currently two ways to handle -icp...
+ # One way is to use the indentation of the previous line:
+ # $indentation = $last_indentation_written;
- # Decide if we will combine a single terminal 'and'
- # after an 'if' or 'unless'.
+ # The other way is to use the indentation that the previous line
+ # would have had if it hadn't been adjusted:
+ $indentation = $last_unadjusted_indentation;
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ # Current method: use the minimum of the two. This avoids
+ # inconsistent indentation.
+ if ( get_spaces($last_indentation_written) <
+ get_spaces($indentation) )
+ {
+ $indentation = $last_indentation_written;
+ }
+ }
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
- || $tokens_to_go[$ibeg_1] eq 'or' )
- )
- );
- }
+ # use previous indentation but use own level
+ # to cause list to be flushed properly
+ $lev = $levels_to_go[$ibeg];
+ }
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+ # remember indentation except for multi-line quotes, which get
+ # no indentation
+ unless ( $ibeg == 0 && $starting_in_quote ) {
+ $last_indentation_written = $indentation;
+ $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
+ $last_leading_token = $tokens_to_go[$ibeg];
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
+ # Patch to make a line which is the end of a qw quote work with the
+ # -lp option. Make $token_beg look like a closing token as some
+ # type even if it is not. This veriable will become
+ # $last_leading_token at the end of this loop. Then, if the -lp
+ # style is selected, and the next line is also a
+ # closing token, it will not get more indentation than this line.
+ # We need to do this because qw quotes (at present) only get
+ # continuation indentation, not one level of indentation, so we
+ # need to turn off the -lp indentation.
+
+ # ... a picture is worth a thousand words:
+
+ # perltidy -wn -gnu (Without this patch):
+ # ok(defined(
+ # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
+ # 2981014)])
+ # ));
+
+ # perltidy -wn -gnu (With this patch):
+ # ok(defined(
+ # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
+ # 2981014)])
+ # ));
+ ## if ($seqno_qw_closing) { $last_leading_token = ')' }
+ if ( $seqno_qw_closing
+ && ( length($token_beg) > 1 || $token_beg eq '>' ) )
+ {
+ $last_leading_token = ')';
+ }
+ }
- # previous line begins with 'and' or 'or'
- && $type_ibeg_1 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_1] }
+ # be sure lines with leading closing tokens are not outdented more
+ # than the line which contained the corresponding opening token.
- );
- }
+ #############################################################
+ # updated per bug report in alex_bug.pl: we must not
+ # mess with the indentation of closing logical braces so
+ # we must treat something like '} else {' as if it were
+ # an isolated brace
+ #############################################################
+ my $is_isolated_block_brace = $block_type_to_go[$ibeg]
+ && (
+ $i_terminal == $ibeg
+ || $is_if_elsif_else_unless_while_until_for_foreach{
+ $block_type_to_go[$ibeg]
+ }
+ );
- # handle all other leading keywords
- else {
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{$type_iend_1} ) {
- next
- if ( ( $type_iend_1 ne 'k' )
- && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
- }
- }
- }
+ if (
+ defined($opening_indentation)
+ && !$leading_paren_arrow # MOJO
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon
+ )
+ {
+ if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
+ $indentation = $opening_indentation;
+ }
+ }
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ # remember the indentation of each line of this batch
+ push @{$rindentation_list}, $indentation;
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ # outdent lines with certain leading tokens...
+ if (
- next
- unless (
- $this_line_is_semicolon_terminated
+ # must be first word of this batch
+ $ibeg == 0
- # previous line begins with an 'if' or 'unless' keyword
- && $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ # and ...
+ && (
- );
- }
+ # certain leading keywords if requested
+ (
+ $rOpts->{'outdent-keywords'}
+ && $types_to_go[$ibeg] eq 'k'
+ && $outdent_keyword{ $tokens_to_go[$ibeg] }
+ )
- # handle line with leading = or similar
- elsif ( $is_assignment{$type_ibeg_2} ) {
- next unless ( $n == 1 || $n == $nmax );
- next if $old_breakpoint_to_go[$iend_1];
- next
- unless (
+ # or labels if requested
+ || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
- # unless we can reduce this to two lines
- $nmax == 2
+ # or static block comments if requested
+ || ( $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-static-block-comments'}
+ && $is_static_block_comment )
+ )
+ )
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+ {
+ my $space_count = leading_spaces_to_go($ibeg);
+ if ( $space_count > 0 ) {
+ $space_count -= $rOpts_continuation_indentation;
+ $is_outdented_line = 1;
+ if ( $space_count < 0 ) { $space_count = 0 }
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ # do not promote a spaced static block comment to non-spaced;
+ # this is not normally necessary but could be for some
+ # unusual user inputs (such as -ci = -i)
+ if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
+ $space_count = 1;
+ }
- # or this is a short line ending in ;
- || ( $n == $nmax && $this_line_is_semicolon_terminated )
- );
- $forced_breakpoint_to_go[$iend_1] = 0;
+ if ($rOpts_line_up_parentheses) {
+ $indentation =
+ new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ }
+ else {
+ $indentation = $space_count;
}
+ }
+ }
- #----------------------------------------------------------
- # Recombine Section 4:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ return ( $indentation, $lev, $level_end, $terminal_type,
+ $terminal_block_type, $is_semicolon_terminated,
+ $is_outdented_line );
+ }
+} ## end closure set_adjusted_indentation
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+sub get_opening_indentation {
- my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+ # get the indentation of the line which output the opening token
+ # corresponding to a given closing token in the current output batch.
+ #
+ # given:
+ # $i_closing - index in this line of a closing token ')' '}' or ']'
+ #
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line.
+ # $qw_seqno - optional sequence number to use if normal seqno not defined
+ # (TODO: would be more general to just look this up from index i)
+ #
+ # return:
+ # -the indentation of the line which contained the opening token
+ # which matches the token at index $i_opening
+ # -and its offset (number of columns) from the start of the line
+ #
+ my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
+ = @_;
- # Require a few extra spaces before recombining lines if we are
- # at an old breakpoint unless this is a simple list or terminal
- # line. The goal is to avoid oscillating between two
- # quasi-stable end states. For example this snippet caused
- # problems:
-## my $this =
-## bless {
-## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
-## },
-## $type;
- next
- if ( $old_breakpoint_to_go[$iend_1]
- && !$this_line_is_semicolon_terminated
- && $n < $nmax
- && $excess + 4 > 0
- && $type_iend_2 ne ',' );
+ # first, see if the opening token is in the current batch
+ my $i_opening = $mate_index_to_go[$i_closing];
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
+ if ( defined($i_opening) && $i_opening >= 0 ) {
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $ri_beg->[ $n + 1 ];
- next
- if (
- $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
- && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+ # it is..look up the indentation
+ ( $indent, $offset, $is_leading ) =
+ lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
+ $rindentation_list );
+ }
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $iend_1 - $ibeg_1 <= 2
- && $type_ibeg_1 eq 'k'
- && $tokens_to_go[$ibeg_1] eq 'if'
- && $tokens_to_go[$iend_1] ne '('
- )
- );
- }
+ # if not, it should have been stored in the hash by a previous batch
+ else {
+ my $seqno = $type_sequence_to_go[$i_closing];
+ $seqno = $qw_seqno unless ($seqno);
+ ( $indent, $offset, $is_leading, $exists ) =
+ get_saved_opening_indentation($seqno);
+ }
+ return ( $indent, $offset, $is_leading, $exists );
+}
- # honor no-break's
- next if ( $bs >= NO_BREAK - 1 );
+sub set_vertical_tightness_flags {
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
+ $ending_in_quote, $closing_side_comment )
+ = @_;
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- }
- }
+ # Define vertical tightness controls for the nth line of a batch.
+ # We create an array of parameters which tell the vertical aligner
+ # if we should combine this line with the next line to achieve the
+ # desired vertical tightness. The array of parameters contains:
+ #
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ #
+ # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
+ # if closing: spaces of padding to use
+ # [2] sequence number of container
+ # [3] valid flag: do not append if this flag is false. Will be
+ # true if appropriate -vt flag is set. Otherwise, Will be
+ # made true only for 2 line container in parens with -lp
+ #
+ # These flags are used by sub set_leading_whitespace in
+ # the vertical aligner
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @{$ri_beg}, $n_best, 1;
- splice @{$ri_end}, $n_best - 1, 1;
- splice @joint, $n_best, 1;
+ my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
- # keep going if we are still making progress
- $more_to_do++;
- }
- }
- return ( $ri_beg, $ri_end );
- }
-} # end recombine_breakpoints
+ # The vertical tightness mechanism can add whitespace, so whitespace can
+ # continually increase if we allowed it when the -fws flag is set.
+ # See case b499 for an example.
+ return $rvertical_tightness_flags if ($rOpts_freeze_whitespace);
+
+ # Uses these parameters:
+ # $rOpts_block_brace_tightness
+ # $rOpts_block_brace_vertical_tightness
+ # $rOpts_stack_closing_block_brace
+ # %opening_vertical_tightness
+ # %closing_vertical_tightness
+ # %opening_token_right
+ # %stack_closing_token
+ # %stack_opening_token
-sub break_all_chain_tokens {
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1:
+ # Handle Lines 1 .. n-1 but not the last line
+ # For non-BLOCK tokens, we will need to examine the next line
+ # too, so we won't consider the last line.
+ #--------------------------------------------------------------
+ if ( $n < $n_last_line ) {
- # scan the current breakpoints looking for breaks at certain "chain
- # operators" (. : && || + etc) which often occur repeatedly in a long
- # statement. If we see a break at any one, break at all similar tokens
- # within the same container.
- #
- my ( $self, $ri_left, $ri_right ) = @_;
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1a:
+ # Look for Type 1, last token of this line is a non-block opening token
+ #--------------------------------------------------------------
+ my $ibeg_next = $ri_first->[ $n + 1 ];
+ my $token_end = $tokens_to_go[$iend];
+ my $iend_next = $ri_last->[ $n + 1 ];
+ if (
+ $type_sequence_to_go[$iend]
+ && !$block_type_to_go[$iend]
+ && $is_opening_token{$token_end}
+ && (
+ $opening_vertical_tightness{$token_end} > 0
- my %saw_chain_type;
- my %left_chain_type;
- my %right_chain_type;
- my %interior_chain_type;
- my $nmax = @{$ri_right} - 1;
+ # allow 2-line method call to be closed up
+ || ( $rOpts_line_up_parentheses
+ && $token_end eq '('
+ && $iend > $ibeg
+ && $types_to_go[ $iend - 1 ] ne 'b' )
+ )
+ )
+ {
- # scan the left and right end tokens of all lines
- my $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- $typel = '+' if ( $typel eq '-' ); # treat + and - the same
- $typer = '+' if ( $typer eq '-' );
- $typel = '*' if ( $typel eq '/' ); # treat * and / the same
- $typer = '*' if ( $typer eq '/' );
- my $tokenl = $tokens_to_go[$il];
- my $tokenr = $tokens_to_go[$ir];
+ # avoid multiple jumps in nesting depth in one line if
+ # requested
+ my $ovt = $opening_vertical_tightness{$token_end};
+ my $iend_next = $ri_last->[ $n + 1 ];
+ unless (
+ $ovt < 2
+ && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
+ $nesting_depth_to_go[$ibeg_next] )
+ )
+ {
- if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
- next if ( $typel eq '?' );
- push @{ $left_chain_type{$typel} }, $il;
- $saw_chain_type{$typel} = 1;
- $count++;
- }
- if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
- next if ( $typer eq '?' );
- push @{ $right_chain_type{$typer} }, $ir;
- $saw_chain_type{$typer} = 1;
- $count++;
+ # If -vt flag has not been set, mark this as invalid
+ # and aligner will validate it if it sees the closing paren
+ # within 2 lines.
+ my $valid_flag = $ovt;
+ @{$rvertical_tightness_flags} =
+ ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
+ }
}
- }
- return unless $count;
- # now look for any interior tokens of the same types
- $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir - 1 ) {
- my $type = $types_to_go[$i];
- $type = '+' if ( $type eq '-' );
- $type = '*' if ( $type eq '/' );
- if ( $saw_chain_type{$type} ) {
- push @{ $interior_chain_type{$type} }, $i;
- $count++;
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1b:
+ # Look for Type 2, first token of next line is a non-block closing
+ # token .. and be sure this line does not have a side comment
+ #--------------------------------------------------------------
+ my $token_next = $tokens_to_go[$ibeg_next];
+ if ( $type_sequence_to_go[$ibeg_next]
+ && !$block_type_to_go[$ibeg_next]
+ && $is_closing_token{$token_next}
+ && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
+ {
+ my $ovt = $opening_vertical_tightness{$token_next};
+ my $cvt = $closing_vertical_tightness{$token_next};
+
+ # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
+ # otherwise. Added for rt136417.
+ if ( $cvt == 3 ) {
+ my $seqno = $type_sequence_to_go[$ibeg_next];
+ $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
}
- }
- }
- return unless $count;
- # now make a list of all new break points
- my @insert_list;
+ if (
- # loop over all chain types
- foreach my $type ( keys %saw_chain_type ) {
+ # Never append a trailing line like ')->pack(' because it
+ # will throw off later alignment. So this line must start at a
+ # deeper level than the next line (fix1 for welding, git #45).
+ (
+ $nesting_depth_to_go[$ibeg_next] >=
+ $nesting_depth_to_go[ $iend_next + 1 ] + 1
+ )
+ && (
+ $cvt == 2
+ || (
+ !$self->is_in_list_by_i($ibeg_next)
+ && (
+ $cvt == 1
- # quit if just ONE continuation line with leading . For example--
- # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
- # . $contents;
- last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
+ # allow closing up 2-line method calls
+ || ( $rOpts_line_up_parentheses
+ && $token_next eq ')' )
+ )
+ )
+ )
+ )
+ {
- # loop over all interior chain tokens
- foreach my $itest ( @{ $interior_chain_type{$type} } ) {
+ # decide which trailing closing tokens to append..
+ my $ok = 0;
+ if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
+ else {
+ my $str = join( '',
+ @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
- # loop over all left end tokens of same type
- if ( $left_chain_type{$type} ) {
- next if $nobreak_to_go[ $itest - 1 ];
- foreach my $i ( @{ $left_chain_type{$type} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest - 1;
+ # append closing token if followed by comment or ';'
+ # or another closing token (fix2 for welding, git #45)
+ if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
+ }
- # Break at matching ? if this : is at a different level.
- # For example, the ? before $THRf_DEAD in the following
- # should get a break if its : gets a break.
- #
- # my $flags =
- # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
- # : ( $_ & 4 ) ? $THRf_R_DETACHED
- # : $THRf_R_JOINABLE;
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question > 0 ) {
- push @insert_list, $i_question - 1;
- }
- }
- last;
+ if ($ok) {
+ my $valid_flag = $cvt;
+ @{$rvertical_tightness_flags} = (
+ 2,
+ $tightness{$token_next} == 2 ? 0 : 1,
+ $type_sequence_to_go[$ibeg_next], $valid_flag,
+ );
}
}
+ }
- # loop over all right end tokens of same type
- if ( $right_chain_type{$type} ) {
- next if $nobreak_to_go[$itest];
- foreach my $i ( @{ $right_chain_type{$type} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest;
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1c:
+ # Implement the Opening Token Right flag (Type 2)..
+ # If requested, move an isolated trailing opening token to the end of
+ # the previous line which ended in a comma. We could do this
+ # in sub recombine_breakpoints but that would cause problems
+ # with -lp formatting. The problem is that indentation will
+ # quickly move far to the right in nested expressions. By
+ # doing it after indentation has been set, we avoid changes
+ # to the indentation. Actual movement of the token takes place
+ # in sub valign_output_step_B.
+
+ # Note added 4 May 2021: the man page suggests that the -otr flags
+ # are mainly for opening tokens following commas. But this seems
+ # to have been generalized long ago to include other situations.
+ # I checked the coding back to 2012 and it is essentially the same
+ # as here, so it is best to leave this unchanged for now.
+ #--------------------------------------------------------------
+ if (
+ $opening_token_right{ $tokens_to_go[$ibeg_next] }
- # break at matching ? if this : is at a different level
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
- }
- last;
- }
- }
- }
- }
+ # previous line is not opening
+ # (use -sot to combine with it)
+ && !$is_opening_token{$token_end}
- # insert any new break points
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-}
+ # previous line ended in one of these
+ # (add other cases if necessary; '=>' and '.' are not necessary
+ && !$block_type_to_go[$ibeg_next]
-sub break_equals {
+ # this is a line with just an opening token
+ && ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
- # Look for assignment operators that could use a breakpoint.
- # For example, in the following snippet
- #
- # $HOME = $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # we could break at the = to get this, which is a little nicer:
- # $HOME =
- # $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # The logic here follows the logic in set_logical_padding, which
- # will add the padding in the second line to improve alignment.
- #
- my ( $ri_left, $ri_right ) = @_;
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 2 );
+ # Fix for case b1060 when both -baoo and -otr are set:
+ # to avoid blinking, honor the -baoo flag over the -otr flag.
+ && $token_end ne '||' && $token_end ne '&&'
- # scan the left ends of first two lines
- my $tokbeg = "";
- my $depth_beg;
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $typel = $types_to_go[$il];
- my $tokenl = $tokens_to_go[$il];
+ # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
+ && !( $token_end eq '=' && $rOpts_line_up_parentheses )
- my $has_leading_op = ( $tokenl =~ /^\w/ )
- ? $is_chain_operator{$tokenl} # + - * / : ? && ||
- : $is_chain_operator{$typel}; # and, or
- return unless ($has_leading_op);
- if ( $n > 1 ) {
- return
- unless ( $tokenl eq $tokbeg
- && $nesting_depth_to_go[$il] eq $depth_beg );
+ # looks bad if we align vertically with the wrong container
+ && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
+ )
+ {
+ my $valid_flag = 1;
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+ @{$rvertical_tightness_flags} =
+ ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
}
- $tokbeg = $tokenl;
- $depth_beg = $nesting_depth_to_go[$il];
- }
- # now look for any interior tokens of the same types
- my $il = $ri_left->[0];
- my $ir = $ri_right->[0];
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1d:
+ # Stacking of opening and closing tokens (Type 2)
+ #--------------------------------------------------------------
+ my $stackable;
+ my $token_beg_next = $tokens_to_go[$ibeg_next];
- # now make a list of all new break points
- my @insert_list;
- for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
- my $type = $types_to_go[$i];
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- if ( $want_break_before{$type} ) {
- push @insert_list, $i - 1;
- }
- else {
- push @insert_list, $i;
+ # patch to make something like 'qw(' behave like an opening paren
+ # (aran.t)
+ if ( $types_to_go[$ibeg_next] eq 'q' ) {
+ if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
+ $token_beg_next = $1;
}
}
- }
- # Break after a 'return' followed by a chain of operators
- # return ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- # To give:
- # return
- # ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- my $i = 0;
- if ( $types_to_go[$i] eq 'k'
- && $tokens_to_go[$i] eq 'return'
- && $ir > $il
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- push @insert_list, $i;
- }
+ if ( $is_closing_token{$token_end}
+ && $is_closing_token{$token_beg_next} )
+ {
+ $stackable = $stack_closing_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
+ }
+ elsif ($is_opening_token{$token_end}
+ && $is_opening_token{$token_beg_next} )
+ {
+ $stackable = $stack_opening_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
+ }
- return unless (@insert_list);
+ if ($stackable) {
- # One final check...
- # scan second and third lines and be sure there are no assignments
- # we want to avoid breaking at an = to make something like this:
- # unless ( $icon =
- # $html_icons{"$type-$state"}
- # or $icon = $html_icons{$type}
- # or $icon = $html_icons{$state} )
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir ) {
- my $type = $types_to_go[$i];
- return
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg );
+ my $is_semicolon_terminated;
+ if ( $n + 1 == $n_last_line ) {
+ my ( $terminal_type, $i_terminal ) =
+ terminal_type_i( $ibeg_next, $iend_next );
+ $is_semicolon_terminated = $terminal_type eq ';'
+ && $nesting_depth_to_go[$iend_next] <
+ $nesting_depth_to_go[$ibeg_next];
+ }
+
+ # this must be a line with just an opening token
+ # or end in a semicolon
+ if (
+ $is_semicolon_terminated
+ || ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
+ )
+ {
+ my $valid_flag = 1;
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+ @{$rvertical_tightness_flags} = (
+ 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
+ );
+ }
}
}
- # ok, insert any new break point
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 2:
+ # Handle type 3, opening block braces on last line of the batch
+ # Check for a last line with isolated opening BLOCK curly
+ #--------------------------------------------------------------
+ elsif ($rOpts_block_brace_vertical_tightness
+ && $ibeg eq $iend
+ && $types_to_go[$iend] eq '{'
+ && $block_type_to_go[$iend] =~
+ /$block_brace_vertical_tightness_pattern/ )
+ {
+ @{$rvertical_tightness_flags} =
+ ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
}
- return;
-}
-sub insert_final_breaks {
-
- my ( $self, $ri_left, $ri_right ) = @_;
-
- my $nmax = @{$ri_right} - 1;
-
- # scan the left and right end tokens of all lines
- my $count = 0;
- my $i_first_colon = -1;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- return if ( $typel eq '?' );
- return if ( $typer eq '?' );
- if ( $typel eq ':' ) { $i_first_colon = $il; last; }
- elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 3:
+ # Handle type 4, a closing block brace on the last line of the batch Check
+ # for a last line with isolated closing BLOCK curly
+ # Patch: added a check for any new closing side comment which the
+ # -csc option may generate. If it exists, there will be a side comment
+ # so we cannot combine with a brace on the next line. This issue
+ # occurs for the combination -scbb and -csc is used.
+ #--------------------------------------------------------------
+ elsif ($rOpts_stack_closing_block_brace
+ && $ibeg eq $iend
+ && $block_type_to_go[$iend]
+ && $types_to_go[$iend] eq '}'
+ && ( !$closing_side_comment || $n < $n_last_line ) )
+ {
+ my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
+ @{$rvertical_tightness_flags} =
+ ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
}
- # For long ternary chains,
- # if the first : we see has its ? is in the interior
- # of a preceding line, then see if there are any good
- # breakpoints before the ?.
- if ( $i_first_colon > 0 ) {
- my $i_question = $mate_index_to_go[$i_first_colon];
- if ( $i_question > 0 ) {
- my @insert_list;
- for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
- my $token = $tokens_to_go[$ii];
- my $type = $types_to_go[$ii];
-
- # For now, a good break is either a comma or,
- # in a long chain, a 'return'.
- # Patch for RT #126633: added the $nmax>1 check to avoid
- # breaking after a return for a simple ternary. For longer
- # chains the break after return allows vertical alignment, so
- # it is still done. So perltidy -wba='?' will not break
- # immediately after the return in the following statement:
- # sub x {
- # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
- # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
- # }
- if (
- (
- $type eq ','
- || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
- )
- && $self->in_same_container_i( $ii, $i_question )
- )
- {
- push @insert_list, $ii;
- last;
- }
- }
+ # pack in the sequence numbers of the ends of this line
+ my $seqno_beg = $type_sequence_to_go[$ibeg];
+ if ( !$seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
+ $seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
+ }
+ my $seqno_end = $type_sequence_to_go[$iend];
+ if ( !$seqno_end && $types_to_go[$iend] eq 'q' ) {
+ $seqno_end = $self->get_seqno( $iend, $ending_in_quote );
+ }
+ $rvertical_tightness_flags->[4] = $seqno_beg;
+ $rvertical_tightness_flags->[5] = $seqno_end;
+ return $rvertical_tightness_flags;
+}
- # insert any new break points
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+##########################################################
+# CODE SECTION 14: Code for creating closing side comments
+##########################################################
+
+{ ## begin closure accumulate_csc_text
+
+# These routines are called once per batch when the --closing-side-comments flag
+# has been set.
+
+ my %block_leading_text;
+ my %block_opening_line_number;
+ my $csc_new_statement_ok;
+ my $csc_last_label;
+ my %csc_block_label;
+ my $accumulating_text_for_block;
+ my $leading_block_text;
+ my $rleading_block_if_elsif_text;
+ my $leading_block_text_level;
+ my $leading_block_text_length_exceeded;
+ my $leading_block_text_line_length;
+ my $leading_block_text_line_number;
+
+ sub initialize_csc_vars {
+ %block_leading_text = ();
+ %block_opening_line_number = ();
+ $csc_new_statement_ok = 1;
+ $csc_last_label = "";
+ %csc_block_label = ();
+ $rleading_block_if_elsif_text = [];
+ $accumulating_text_for_block = "";
+ reset_block_text_accumulator();
+ return;
+ }
+
+ sub reset_block_text_accumulator {
+
+ # save text after 'if' and 'elsif' to append after 'else'
+ if ($accumulating_text_for_block) {
+
+ if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
+ push @{$rleading_block_if_elsif_text}, $leading_block_text;
}
}
+ $accumulating_text_for_block = "";
+ $leading_block_text = "";
+ $leading_block_text_level = 0;
+ $leading_block_text_length_exceeded = 0;
+ $leading_block_text_line_number = 0;
+ $leading_block_text_line_length = 0;
+ return;
}
- return;
-}
-sub in_same_container_i {
-
- # check to see if tokens at i1 and i2 are in the
- # same container, and not separated by a comma, ? or :
- # This is an interface between the _to_go arrays to the rLL array
- my ( $self, $i1, $i2 ) = @_;
- return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
-}
+ sub set_block_text_accumulator {
+ my ( $self, $i ) = @_;
+ $accumulating_text_for_block = $tokens_to_go[$i];
+ if ( $accumulating_text_for_block !~ /^els/ ) {
+ $rleading_block_if_elsif_text = [];
+ }
+ $leading_block_text = "";
+ $leading_block_text_level = $levels_to_go[$i];
+ $leading_block_text_line_number = $self->get_output_line_number();
+ $leading_block_text_length_exceeded = 0;
+
+ # this will contain the column number of the last character
+ # of the closing side comment
+ $leading_block_text_line_length =
+ length($csc_last_label) +
+ length($accumulating_text_for_block) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $leading_block_text_level * $rOpts_indent_columns + 3;
+ return;
+ }
-{ # sub in_same_container_K
- my $ris_break_token;
- my $ris_comma_token;
+ sub accumulate_block_text {
+ my ( $self, $i ) = @_;
- BEGIN {
+ # accumulate leading text for -csc, ignoring any side comments
+ if ( $accumulating_text_for_block
+ && !$leading_block_text_length_exceeded
+ && $types_to_go[$i] ne '#' )
+ {
- # all cases break on seeing commas at same level
- my @q = qw( => );
- push @q, ',';
- @{$ris_comma_token}{@q} = (1) x scalar(@q);
+ my $added_length = $token_lengths_to_go[$i];
+ $added_length += 1 if $i == 0;
+ my $new_line_length =
+ $leading_block_text_line_length + $added_length;
- # Non-ternary text also breaks on seeing any of qw(? : || or )
- # Example: we would not want to break at any of these .'s
- # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
- push @q, qw( or || ? : );
- @{$ris_break_token}{@q} = (1) x scalar(@q);
- }
+ # we can add this text if we don't exceed some limits..
+ if (
- sub in_same_container_K {
+ # we must not have already exceeded the text length limit
+ length($leading_block_text) <
+ $rOpts_closing_side_comment_maximum_text
- # Check to see if tokens at K1 and K2 are in the same container,
- # and not separated by certain characters: => , ? : || or
- # This version uses the newer $rLL data structure
+ # and either:
+ # the new total line length must be below the line length limit
+ # or the new length must be below the text length limit
+ # (ie, we may allow one token to exceed the text length limit)
+ && (
+ $new_line_length <
+ $maximum_line_length_at_level[$leading_block_text_level]
- my ( $self, $K1, $K2 ) = @_;
- if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
- my $rLL = $self->{rLL};
- my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
- return if ( $depth_1 < 0 );
- return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
+ || length($leading_block_text) + $added_length <
+ $rOpts_closing_side_comment_maximum_text
+ )
- # Select character set to scan for
- my $type_1 = $rLL->[$K1]->[_TYPE_];
- my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
+ # UNLESS: we are adding a closing paren before the brace we seek.
+ # This is an attempt to avoid situations where the ... to be
+ # added are longer than the omitted right paren, as in:
- # Fast preliminary loop to verify that tokens are in the same container
- my $KK = $K1;
- while (1) {
- $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
- last if !defined($KK);
- last if ( $KK >= $K2 );
- my $depth_K = $rLL->[$KK]->[_SLEVEL_];
- return if ( $depth_K < $depth_1 );
- next if ( $depth_K > $depth_1 );
- if ( $type_1 ne ':' ) {
- my $tok_K = $rLL->[$KK]->[_TOKEN_];
- return if ( $tok_K eq '?' || $tok_K eq ':' );
- }
- }
+ # foreach my $item (@a_rather_long_variable_name_here) {
+ # &whatever;
+ # } ## end foreach my $item (@a_rather_long_variable_name_here...
- # Slow loop checking for certain characters
+ || (
+ $tokens_to_go[$i] eq ')'
+ && (
+ (
+ $i + 1 <= $max_index_to_go
+ && $block_type_to_go[ $i + 1 ] eq
+ $accumulating_text_for_block
+ )
+ || ( $i + 2 <= $max_index_to_go
+ && $block_type_to_go[ $i + 2 ] eq
+ $accumulating_text_for_block )
+ )
+ )
+ )
+ {
- ###########################################################
- # This is potentially a slow routine and not critical.
- # For safety just give up for large differences.
- # See test file 'infinite_loop.txt'
- ###########################################################
- return if ( $K2 - $K1 > 200 );
+ # add an extra space at each newline
+ if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
+ $leading_block_text .= ' ';
+ }
- foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
+ # add the token text
+ $leading_block_text .= $tokens_to_go[$i];
+ $leading_block_text_line_length = $new_line_length;
+ }
- my $depth_K = $rLL->[$K]->[_SLEVEL_];
- next if ( $depth_K > $depth_1 );
- return if ( $depth_K < $depth_1 ); # redundant, checked above
- my $tok = $rLL->[$K]->[_TOKEN_];
- return if ( $rbreak->{$tok} );
+ # show that text was truncated if necessary
+ elsif ( $types_to_go[$i] ne 'b' ) {
+ $leading_block_text_length_exceeded = 1;
+ $leading_block_text .= '...';
+ }
}
- return 1;
+ return;
}
-}
-sub set_continuation_breaks {
+ sub accumulate_csc_text {
- # Define an array of indexes for inserting newline characters to
- # keep the line lengths below the maximum desired length. There is
- # an implied break after the last token, so it need not be included.
+ my ($self) = @_;
- # Method:
- # This routine is part of series of routines which adjust line
- # lengths. It is only called if a statement is longer than the
- # maximum line length, or if a preliminary scanning located
- # desirable break points. Sub scan_list has already looked at
- # these tokens and set breakpoints (in array
- # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
- # after commas, after opening parens, and before closing parens).
- # This routine will honor these breakpoints and also add additional
- # breakpoints as necessary to keep the line length below the maximum
- # requested. It bases its decision on where the 'bond strength' is
- # lowest.
+ # called once per output buffer when -csc is used. Accumulates
+ # the text placed after certain closing block braces.
+ # Defines and returns the following for this buffer:
- # Output: returns references to the arrays:
- # @i_first
- # @i_last
- # which contain the indexes $i of the first and last tokens on each
- # line.
+ my $block_leading_text = ""; # the leading text of the last '}'
+ my $rblock_leading_if_elsif_text;
+ my $i_block_leading_text =
+ -1; # index of token owning block_leading_text
+ my $block_line_count = 100; # how many lines the block spans
+ my $terminal_type = 'b'; # type of last nonblank token
+ my $i_terminal = 0; # index of last nonblank token
+ my $terminal_block_type = "";
- # In addition, the array:
- # $forced_breakpoint_to_go[$i]
- # may be updated to be =1 for any index $i after which there must be
- # a break. This signals later routines not to undo the breakpoint.
+ # update most recent statement label
+ $csc_last_label = "" unless ($csc_last_label);
+ if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
+ my $block_label = $csc_last_label;
- my ( $self, $saw_good_break ) = @_;
- my @i_first = (); # the first index to output
- my @i_last = (); # the last index to output
- my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
- if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
+ # Loop over all tokens of this batch
+ for my $i ( 0 .. $max_index_to_go ) {
+ my $type = $types_to_go[$i];
+ my $block_type = $block_type_to_go[$i];
+ my $token = $tokens_to_go[$i];
- set_bond_strengths();
+ # remember last nonblank token type
+ if ( $type ne '#' && $type ne 'b' ) {
+ $terminal_type = $type;
+ $terminal_block_type = $block_type;
+ $i_terminal = $i;
+ }
- my $imin = 0;
- my $imax = $max_index_to_go;
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin; # index for starting next iteration
+ my $type_sequence = $type_sequence_to_go[$i];
+ if ( $block_type && $type_sequence ) {
- my $leading_spaces = leading_spaces_to_go($imin);
- my $line_count = 0;
- my $last_break_strength = NO_BREAK;
- my $i_last_break = -1;
- my $max_bias = 0.001;
- my $tiny_bias = 0.0001;
- my $leading_alignment_token = "";
- my $leading_alignment_type = "";
+ if ( $token eq '}' ) {
- # see if any ?/:'s are in order
- my $colons_in_order = 1;
- my $last_tok = "";
- my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
- my $colon_count = @colon_list;
- foreach (@colon_list) {
- if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
- $last_tok = $_;
- }
+ # restore any leading text saved when we entered this block
+ if ( defined( $block_leading_text{$type_sequence} ) ) {
+ ( $block_leading_text, $rblock_leading_if_elsif_text )
+ = @{ $block_leading_text{$type_sequence} };
+ $i_block_leading_text = $i;
+ delete $block_leading_text{$type_sequence};
+ $rleading_block_if_elsif_text =
+ $rblock_leading_if_elsif_text;
+ }
- # This is a sufficient but not necessary condition for colon chain
- my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
+ if ( defined( $csc_block_label{$type_sequence} ) ) {
+ $block_label = $csc_block_label{$type_sequence};
+ delete $csc_block_label{$type_sequence};
+ }
- #-------------------------------------------------------
- # BEGINNING of main loop to set continuation breakpoints
- # Keep iterating until we reach the end
- #-------------------------------------------------------
- while ( $i_begin <= $imax ) {
- my $lowest_strength = NO_BREAK;
- my $starting_sum = $summed_lengths_to_go[$i_begin];
- my $i_lowest = -1;
- my $i_test = -1;
- my $lowest_next_token = '';
- my $lowest_next_type = 'b';
- my $i_lowest_next_nonblank = -1;
+ # if we run into a '}' then we probably started accumulating
+ # at something like a trailing 'if' clause..no harm done.
+ if ( $accumulating_text_for_block
+ && $levels_to_go[$i] <= $leading_block_text_level )
+ {
+ my $lev = $levels_to_go[$i];
+ reset_block_text_accumulator();
+ }
- #-------------------------------------------------------
- # BEGINNING of inner loop to find the best next breakpoint
- #-------------------------------------------------------
- for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank = $inext_to_go[$i_test];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- my $strength = $bond_strength_to_go[$i_test];
- my $maximum_line_length = maximum_line_length($i_begin);
+ if ( defined( $block_opening_line_number{$type_sequence} ) )
+ {
+ my $output_line_number =
+ $self->get_output_line_number();
+ $block_line_count =
+ $output_line_number -
+ $block_opening_line_number{$type_sequence} + 1;
+ delete $block_opening_line_number{$type_sequence};
+ }
+ else {
- # use old breaks as a tie-breaker. For example to
- # prevent blinkers with -pbp in this code:
+ # Error: block opening line undefined for this line..
+ # This shouldn't be possible, but it is not a
+ # significant problem.
+ }
+ }
-##@keywords{
-## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
-## = ();
+ elsif ( $token eq '{' ) {
- # At the same time try to prevent a leading * in this code
- # with the default formatting:
- #
-## return
-## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
-## * ( $x**( $a - 1 ) )
-## * ( ( 1 - $x )**( $b - 1 ) );
+ my $line_number = $self->get_output_line_number();
+ $block_opening_line_number{$type_sequence} = $line_number;
- # reduce strength a bit to break ties at an old breakpoint ...
- if (
- $old_breakpoint_to_go[$i_test]
+ # set a label for this block, except for
+ # a bare block which already has the label
+ # A label can only be used on the next {
+ if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
+ $csc_block_label{$type_sequence} = $csc_last_label;
+ $csc_last_label = "";
+
+ if ( $accumulating_text_for_block
+ && $levels_to_go[$i] == $leading_block_text_level )
+ {
+
+ if ( $accumulating_text_for_block eq $block_type ) {
+
+ # save any leading text before we enter this block
+ $block_leading_text{$type_sequence} = [
+ $leading_block_text,
+ $rleading_block_if_elsif_text
+ ];
+ $block_opening_line_number{$type_sequence} =
+ $leading_block_text_line_number;
+ reset_block_text_accumulator();
+ }
+ else {
- # which is a 'good' breakpoint, meaning ...
- # we don't want to break before it
- && !$want_break_before{$type}
+ # shouldn't happen, but not a serious error.
+ # We were accumulating -csc text for block type
+ # $accumulating_text_for_block and unexpectedly
+ # encountered a '{' for block type $block_type.
+ }
+ }
+ }
+ }
- # and either we want to break before the next token
- # or the next token is not short (i.e. not a '*', '/' etc.)
- && $i_next_nonblank <= $imax
- && ( $want_break_before{$next_nonblank_type}
- || $token_lengths_to_go[$i_next_nonblank] > 2
- || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
- )
+ if ( $type eq 'k'
+ && $csc_new_statement_ok
+ && $is_if_elsif_else_unless_while_until_for_foreach{$token}
+ && $token =~ /$closing_side_comment_list_pattern/ )
{
- $strength -= $tiny_bias;
+ $self->set_block_text_accumulator($i);
}
-
- # otherwise increase strength a bit if this token would be at the
- # maximum line length. This is necessary to avoid blinking
- # in the above example when the -iob flag is added.
else {
- my $len =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum;
- if ( $len >= $maximum_line_length ) {
- $strength += $tiny_bias;
+
+ # note: ignoring type 'q' because of tricks being played
+ # with 'q' for hanging side comments
+ if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
+ $csc_new_statement_ok =
+ ( $block_type || $type eq 'J' || $type eq ';' );
+ }
+ if ( $type eq ';'
+ && $accumulating_text_for_block
+ && $levels_to_go[$i] == $leading_block_text_level )
+ {
+ reset_block_text_accumulator();
+ }
+ else {
+ $self->accumulate_block_text($i);
}
}
+ }
- my $must_break = 0;
+ # Treat an 'else' block specially by adding preceding 'if' and
+ # 'elsif' text. Otherwise, the 'end else' is not helpful,
+ # especially for cuddled-else formatting.
+ if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
+ $block_leading_text =
+ $self->make_else_csc_text( $i_terminal, $terminal_block_type,
+ $block_leading_text, $rblock_leading_if_elsif_text );
+ }
- # Force an immediate break at certain operators
- # with lower level than the start of the line,
- # unless we've already seen a better break.
- #
- ##############################################
- # Note on an issue with a preceding ?
- ##############################################
- # We don't include a ? in the above list, but there may
- # be a break at a previous ? if the line is long.
- # Because of this we do not want to force a break if
- # there is a previous ? on this line. For now the best way
- # to do this is to not break if we have seen a lower strength
- # point, which is probably a ?.
- #
- # Example of unwanted breaks we are avoiding at a '.' following a ?
- # from pod2html using perltidy -gnu:
- # )
- # ? "\n<A NAME=\""
- # . $value
- # . "\">\n$text</A>\n"
- # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
- if (
- (
- $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
- || ( $next_nonblank_type eq 'k'
- && $next_nonblank_token =~ /^(and|or)$/ )
- )
- && ( $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_next_nonblank] )
- && ( $strength <= $lowest_strength )
- )
- {
- set_forced_breakpoint($i_next_nonblank);
- }
+ # if this line ends in a label then remember it for the next pass
+ $csc_last_label = "";
+ if ( $terminal_type eq 'J' ) {
+ $csc_last_label = $tokens_to_go[$i_terminal];
+ }
- if (
+ return ( $terminal_type, $i_terminal, $i_block_leading_text,
+ $block_leading_text, $block_line_count, $block_label );
+ }
- # Try to put a break where requested by scan_list
- $forced_breakpoint_to_go[$i_test]
+ sub make_else_csc_text {
- # break between ) { in a continued line so that the '{' can
- # be outdented
- # See similar logic in scan_list which catches instances
- # where a line is just something like ') {'. We have to
- # be careful because the corresponding block keyword might
- # not be on the first line, such as 'for' here:
- #
- # eval {
- # for ("a") {
- # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
- # }
- # };
- #
- || (
- $line_count
- && ( $token eq ')' )
- && ( $next_nonblank_type eq '{' )
- && ($next_nonblank_block_type)
- && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
+ # create additional -csc text for an 'else' and optionally 'elsif',
+ # depending on the value of switch
+ #
+ # = 0 add 'if' text to trailing else
+ # = 1 same as 0 plus:
+ # add 'if' to 'elsif's if can fit in line length
+ # add last 'elsif' to trailing else if can fit in one line
+ # = 2 same as 1 but do not check if exceed line length
+ #
+ # $rif_elsif_text = a reference to a list of all previous closing
+ # side comments created for this if block
+ #
+ my ( $self, $i_terminal, $block_type, $block_leading_text,
+ $rif_elsif_text )
+ = @_;
+ my $csc_text = $block_leading_text;
- # RT #104427: Dont break before opening sub brace because
- # sub block breaks handled at higher level, unless
- # it looks like the preceding list is long and broken
- && !(
- $next_nonblank_block_type =~ /^sub\b/
- && ( $nesting_depth_to_go[$i_begin] ==
- $nesting_depth_to_go[$i_next_nonblank] )
- )
+ if ( $block_type eq 'elsif'
+ && $rOpts_closing_side_comment_else_flag == 0 )
+ {
+ return $csc_text;
+ }
- && !$rOpts->{'opening-brace-always-on-right'}
- )
+ my $count = @{$rif_elsif_text};
+ return $csc_text unless ($count);
- # There is an implied forced break at a terminal opening brace
- || ( ( $type eq '{' ) && ( $i_test == $imax ) )
- )
- {
+ my $if_text = '[ if' . $rif_elsif_text->[0];
- # Forced breakpoints must sometimes be overridden, for example
- # because of a side comment causing a NO_BREAK. It is easier
- # to catch this here than when they are set.
- if ( $strength < NO_BREAK - 1 ) {
- $strength = $lowest_strength - $tiny_bias;
- $must_break = 1;
- }
- }
+ # always show the leading 'if' text on 'else'
+ if ( $block_type eq 'else' ) {
+ $csc_text .= $if_text;
+ }
- # quit if a break here would put a good terminal token on
- # the next line and we already have a possible break
- if (
- !$must_break
- && ( $next_nonblank_type =~ /^[\;\,]$/ )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
- $starting_sum
- ) > $maximum_line_length
- )
- )
- {
- last if ( $i_lowest >= 0 );
- }
+ # see if that's all
+ if ( $rOpts_closing_side_comment_else_flag == 0 ) {
+ return $csc_text;
+ }
- # Avoid a break which would strand a single punctuation
- # token. For example, we do not want to strand a leading
- # '.' which is followed by a long quoted string.
- # But note that we do want to do this with -extrude (l=1)
- # so please test any changes to this code on -extrude.
- if (
- !$must_break
- && ( $i_test == $i_begin )
- && ( $i_test < $imax )
- && ( $token eq $type )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum
- ) < $maximum_line_length
- )
- )
- {
- $i_test = min( $imax, $inext_to_go[$i_test] );
- redo;
- }
+ my $last_elsif_text = "";
+ if ( $count > 1 ) {
+ $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
+ if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
+ }
- if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
- {
+ # tentatively append one more item
+ my $saved_text = $csc_text;
+ if ( $block_type eq 'else' ) {
+ $csc_text .= $last_elsif_text;
+ }
+ else {
+ $csc_text .= ' ' . $if_text;
+ }
- # break at previous best break if it would have produced
- # a leading alignment of certain common tokens, and it
- # is different from the latest candidate break
- last
- if ($leading_alignment_type);
+ # all done if no length checks requested
+ if ( $rOpts_closing_side_comment_else_flag == 2 ) {
+ return $csc_text;
+ }
- # Force at least one breakpoint if old code had good
- # break It is only called if a breakpoint is required or
- # desired. This will probably need some adjustments
- # over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formatted text, then
- # the same breakpoints will occur. scbreak.t
- last
- if (
- $i_test == $imax # we are at the end
- && !$forced_breakpoint_count #
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest >= 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
- && $strength - $lowest_strength < 0.5 * WEAK # and it's good
- );
+ # undo it if line length exceeded
+ my $length =
+ length($csc_text) +
+ length($block_type) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
+ if (
+ $length > $maximum_line_length_at_level[$leading_block_text_level] )
+ {
+ $csc_text = $saved_text;
+ }
+ return $csc_text;
+ }
+} ## end closure accumulate_csc_text
- # Do not skip past an important break point in a short final
- # segment. For example, without this check we would miss the
- # break at the final / in the following code:
- #
- # $depth_stop =
- # ( $tau * $mass_pellet * $q_0 *
- # ( 1. - exp( -$t_stop / $tau ) ) -
- # 4. * $pi * $factor * $k_ice *
- # ( $t_melt - $t_ice ) *
- # $r_pellet *
- # $t_stop ) /
- # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
- #
- if ( $line_count > 2
- && $i_lowest < $i_test
- && $i_test > $imax - 2
- && $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_lowest]
- && $lowest_strength < $last_break_strength - .5 * WEAK )
- {
- # Make this break for math operators for now
- my $ir = $inext_to_go[$i_lowest];
- my $il = $iprev_to_go[$ir];
- last
- if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
- || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
- }
+{ ## begin closure balance_csc_text
- # Update the minimum bond strength location
- $lowest_strength = $strength;
- $i_lowest = $i_test;
- $lowest_next_token = $next_nonblank_token;
- $lowest_next_type = $next_nonblank_type;
- $i_lowest_next_nonblank = $i_next_nonblank;
- last if $must_break;
+ # Some additional routines for handling the --closing-side-comments option
+
+ my %matching_char;
+
+ BEGIN {
+ %matching_char = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ );
+ }
+
+ sub balance_csc_text {
+
+ # Append characters to balance a closing side comment so that editors
+ # such as vim can correctly jump through code.
+ # Simple Example:
+ # input = ## end foreach my $foo ( sort { $b ...
+ # output = ## end foreach my $foo ( sort { $b ...})
- # set flags to remember if a break here will produce a
- # leading alignment of certain common tokens
- if ( $line_count > 0
- && $i_test < $imax
- && ( $lowest_strength - $last_break_strength <= $max_bias )
- )
- {
- my $i_last_end = $iprev_to_go[$i_begin];
- my $tok_beg = $tokens_to_go[$i_begin];
- my $type_beg = $types_to_go[$i_begin];
- if (
+ # NOTE: This routine does not currently filter out structures within
+ # quoted text because the bounce algorithms in text editors do not
+ # necessarily do this either (a version of vim was checked and
+ # did not do this).
- # check for leading alignment of certain tokens
- (
- $tok_beg eq $next_nonblank_token
- && $is_chain_operator{$tok_beg}
- && ( $type_beg eq 'k'
- || $type_beg eq $tok_beg )
- && $nesting_depth_to_go[$i_begin] >=
- $nesting_depth_to_go[$i_next_nonblank]
- )
+ # Some complex examples which will cause trouble for some editors:
+ # while ( $mask_string =~ /\{[^{]*?\}/g ) {
+ # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
+ # if ( $1 eq '{' ) {
+ # test file test1/braces.pl has many such examples.
- || ( $tokens_to_go[$i_last_end] eq $token
- && $is_chain_operator{$token}
- && ( $type eq 'k' || $type eq $token )
- && $nesting_depth_to_go[$i_last_end] >=
- $nesting_depth_to_go[$i_test] )
- )
- {
- $leading_alignment_token = $next_nonblank_token;
- $leading_alignment_type = $next_nonblank_type;
- }
- }
- }
+ my ($csc) = @_;
- my $too_long = ( $i_test >= $imax );
- if ( !$too_long ) {
- my $next_length =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 2 ] -
- $starting_sum;
- $too_long = $next_length > $maximum_line_length;
+ # loop to examine characters one-by-one, RIGHT to LEFT and
+ # build a balancing ending, LEFT to RIGHT.
+ for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
- # To prevent blinkers we will avoid leaving a token exactly at
- # the line length limit unless it is the last token or one of
- # several "good" types.
- #
- # The following code was a blinker with -pbp before this
- # modification:
-## $last_nonblank_token eq '('
-## && $is_indirect_object_taker{ $paren_type
-## [$paren_depth] }
- # The issue causing the problem is that if the
- # term [$paren_depth] gets broken across a line then
- # the whitespace routine doesn't see both opening and closing
- # brackets and will format like '[ $paren_depth ]'. This
- # leads to an oscillation in length depending if we break
- # before the closing bracket or not.
- if ( !$too_long
- && $i_test + 1 < $imax
- && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
- {
- $too_long = $next_length >= $maximum_line_length;
- }
- }
+ my $char = substr( $csc, $pos, 1 );
- FORMATTER_DEBUG_FLAG_BREAK
- && do {
- my $ltok = $token;
- my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
- my $i_testp2 = $i_test + 2;
- if ( $i_testp2 > $max_index_to_go + 1 ) {
- $i_testp2 = $max_index_to_go + 1;
- }
- if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
- if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
- print STDOUT
-"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
- };
+ # ignore everything except structural characters
+ next unless ( $matching_char{$char} );
- # allow one extra terminal token after exceeding line length
- # if it would strand this token.
- if ( $rOpts_fuzzy_line_length
- && $too_long
- && $i_lowest == $i_test
- && $token_lengths_to_go[$i_test] > 1
- && $next_nonblank_type =~ /^[\;\,]$/ )
- {
- $too_long = 0;
- }
+ # pop most recently appended character
+ my $top = chop($csc);
- last
- if (
- ( $i_test == $imax ) # we're done if no more tokens,
- || (
- ( $i_lowest >= 0 ) # or no more space and we have a break
- && $too_long
- )
- );
+ # push it back plus the mate to the newest character
+ # unless they balance each other.
+ $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
}
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint
- # Now decide exactly where to put the breakpoint
- #-------------------------------------------------------
+ # return the balanced string
+ return $csc;
+ }
+} ## end closure balance_csc_text
- # it's always ok to break at imax if no other break was found
- if ( $i_lowest < 0 ) { $i_lowest = $imax }
+sub add_closing_side_comment {
- # semi-final index calculation
- my $i_next_nonblank = $inext_to_go[$i_lowest];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my $self = shift;
+ my $rLL = $self->[_rLL_];
- #-------------------------------------------------------
- # ?/: rule 1 : if a break here will separate a '?' on this
- # line from its closing ':', then break at the '?' instead.
- #-------------------------------------------------------
- foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
- next unless ( $tokens_to_go[$i] eq '?' );
+ # add closing side comments after closing block braces if -csc used
+ my ( $closing_side_comment, $cscw_block_comment );
- # do not break if probable sequence of ?/: statements
- next if ($is_colon_chain);
+ #---------------------------------------------------------------
+ # Step 1: loop through all tokens of this line to accumulate
+ # the text needed to create the closing side comments. Also see
+ # how the line ends.
+ #---------------------------------------------------------------
- # do not break if statement is broken by side comment
- next
- if ( $tokens_to_go[$max_index_to_go] eq '#'
- && $self->terminal_type_i( 0, $max_index_to_go ) !~
- /^[\;\}]$/ );
+ my ( $terminal_type, $i_terminal, $i_block_leading_text,
+ $block_leading_text, $block_line_count, $block_label )
+ = $self->accumulate_csc_text();
- # no break needed if matching : is also on the line
- next
- if ( $mate_index_to_go[$i] >= 0
- && $mate_index_to_go[$i] <= $i_next_nonblank );
+ #---------------------------------------------------------------
+ # Step 2: make the closing side comment if this ends a block
+ #---------------------------------------------------------------
+ my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
- $i_lowest = $i;
- if ( $want_break_before{'?'} ) { $i_lowest-- }
- last;
- }
+ # if this line might end in a block closure..
+ if (
+ $terminal_type eq '}'
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint:
- # Break the line after the token with index i=$i_lowest
- #-------------------------------------------------------
+ # ..and either
+ && (
- # final index calculation
- $i_next_nonblank = $inext_to_go[$i_lowest];
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # the block is long enough
+ ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
- FORMATTER_DEBUG_FLAG_BREAK
- && print STDOUT
- "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
+ # or there is an existing comment to check
+ || ( $have_side_comment
+ && $rOpts->{'closing-side-comment-warnings'} )
+ )
- #-------------------------------------------------------
- # ?/: rule 2 : if we break at a '?', then break at its ':'
- #
- # Note: this rule is also in sub scan_list to handle a break
- # at the start and end of a line (in case breaks are dictated
- # by side comments).
- #-------------------------------------------------------
- if ( $next_nonblank_type eq '?' ) {
- set_closing_breakpoint($i_next_nonblank);
- }
- elsif ( $types_to_go[$i_lowest] eq '?' ) {
- set_closing_breakpoint($i_lowest);
- }
+ # .. and if this is one of the types of interest
+ && $block_type_to_go[$i_terminal] =~
+ /$closing_side_comment_list_pattern/
- #-------------------------------------------------------
- # ?/: rule 3 : if we break at a ':' then we save
- # its location for further work below. We may need to go
- # back and break at its '?'.
- #-------------------------------------------------------
- if ( $next_nonblank_type eq ':' ) {
- push @i_colon_breaks, $i_next_nonblank;
- }
- elsif ( $types_to_go[$i_lowest] eq ':' ) {
- push @i_colon_breaks, $i_lowest;
+ # .. but not an anonymous sub
+ # These are not normally of interest, and their closing braces are
+ # often followed by commas or semicolons anyway. This also avoids
+ # possible erratic output due to line numbering inconsistencies
+ # in the cases where their closing braces terminate a line.
+ && $block_type_to_go[$i_terminal] ne 'sub'
+
+ # ..and the corresponding opening brace must is not in this batch
+ # (because we do not need to tag one-line blocks, although this
+ # should also be caught with a positive -csci value)
+ && $mate_index_to_go[$i_terminal] < 0
+
+ # ..and either
+ && (
+
+ # this is the last token (line doesn't have a side comment)
+ !$have_side_comment
+
+ # or the old side comment is a closing side comment
+ || $tokens_to_go[$max_index_to_go] =~
+ /$closing_side_comment_prefix_pattern/
+ )
+ )
+ {
+
+ # then make the closing side comment text
+ if ($block_label) { $block_label .= " " }
+ my $token =
+"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
+
+ # append any extra descriptive text collected above
+ if ( $i_block_leading_text == $i_terminal ) {
+ $token .= $block_leading_text;
}
- # here we should set breaks for all '?'/':' pairs which are
- # separated by this line
+ $token = balance_csc_text($token)
+ if $rOpts->{'closing-side-comments-balanced'};
- $line_count++;
+ $token =~ s/\s*$//; # trim any trailing whitespace
- # save this line segment, after trimming blanks at the ends
- push( @i_first,
- ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
- push( @i_last,
- ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
+ # handle case of existing closing side comment
+ if ($have_side_comment) {
+
+ # warn if requested and tokens differ significantly
+ if ( $rOpts->{'closing-side-comment-warnings'} ) {
+ my $old_csc = $tokens_to_go[$max_index_to_go];
+ my $new_csc = $token;
+ $new_csc =~ s/\s+//g; # trim all whitespace
+ $old_csc =~ s/\s+//g; # trim all whitespace
+ $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
+ my $new_trailing_dots = $1;
+ $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
- # set a forced breakpoint at a container opening, if necessary, to
- # signal a break at a closing container. Excepting '(' for now.
- if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
- && !$forced_breakpoint_to_go[$i_lowest] )
- {
- set_closing_breakpoint($i_lowest);
- }
+ # Patch to handle multiple closing side comments at
+ # else and elsif's. These have become too complicated
+ # to check, so if we see an indication of
+ # '[ if' or '[ # elsif', then assume they were made
+ # by perltidy.
+ if ( $block_type_to_go[$i_terminal] eq 'else' ) {
+ if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
+ }
+ elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
+ if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
+ }
- # get ready to go again
- $i_begin = $i_lowest + 1;
- $last_break_strength = $lowest_strength;
- $i_last_break = $i_lowest;
- $leading_alignment_token = "";
- $leading_alignment_type = "";
- $lowest_next_token = '';
- $lowest_next_type = 'b';
+ # if old comment is contained in new comment,
+ # only compare the common part.
+ if ( length($new_csc) > length($old_csc) ) {
+ $new_csc = substr( $new_csc, 0, length($old_csc) );
+ }
- if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
- $i_begin++;
- }
+ # if the new comment is shorter and has been limited,
+ # only compare the common part.
+ if ( length($new_csc) < length($old_csc)
+ && $new_trailing_dots )
+ {
+ $old_csc = substr( $old_csc, 0, length($new_csc) );
+ }
- # update indentation size
- if ( $i_begin <= $imax ) {
- $leading_spaces = leading_spaces_to_go($i_begin);
- }
- }
+ # any remaining difference?
+ if ( $new_csc ne $old_csc ) {
- #-------------------------------------------------------
- # END of main loop to set continuation breakpoints
- # Now go back and make any necessary corrections
- #-------------------------------------------------------
+ # just leave the old comment if we are below the threshold
+ # for creating side comments
+ if ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ $token = undef;
+ }
- #-------------------------------------------------------
- # ?/: rule 4 -- if we broke at a ':', then break at
- # corresponding '?' unless this is a chain of ?: expressions
- #-------------------------------------------------------
- if (@i_colon_breaks) {
+ # otherwise we'll make a note of it
+ else {
- # using a simple method for deciding if we are in a ?/: chain --
- # this is a chain if it has multiple ?/: pairs all in order;
- # otherwise not.
- # Note that if line starts in a ':' we count that above as a break
- my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+ warning(
+"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
+ );
- unless ($is_chain) {
- my @insert_list = ();
- foreach (@i_colon_breaks) {
- my $i_question = $mate_index_to_go[$_];
- if ( $i_question >= 0 ) {
- if ( $want_break_before{'?'} ) {
- $i_question = $iprev_to_go[$i_question];
+ # save the old side comment in a new trailing block
+ # comment
+ my $timestamp = "";
+ if ( $rOpts->{'timestamp'} ) {
+ my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
+ $year += 1900;
+ $month += 1;
+ $timestamp = "$year-$month-$day";
+ }
+ $cscw_block_comment =
+"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
+## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
}
+ }
+ else {
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
+ # No differences.. we can safely delete old comment if we
+ # are below the threshold
+ if ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ $token = undef;
+ $self->unstore_token_to_go()
+ if ( $types_to_go[$max_index_to_go] eq '#' );
+ $self->unstore_token_to_go()
+ if ( $types_to_go[$max_index_to_go] eq 'b' );
}
}
- insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
}
+
+ # switch to the new csc (unless we deleted it!)
+ if ($token) {
+ $tokens_to_go[$max_index_to_go] = $token;
+ my $K = $K_to_go[$max_index_to_go];
+ $rLL->[$K]->[_TOKEN_] = $token;
+ $rLL->[$K]->[_TOKEN_LENGTH_] =
+ length($token); # NOTE: length no longer important
+ }
+ }
+
+ # handle case of NO existing closing side comment
+ else {
+
+ # To avoid inserting a new token in the token arrays, we
+ # will just return the new side comment so that it can be
+ # inserted just before it is needed in the call to the
+ # vertical aligner.
+ $closing_side_comment = $token;
}
}
- return ( \@i_first, \@i_last, $colon_count );
+ return ( $closing_side_comment, $cscw_block_comment );
}
-sub insert_additional_breaks {
+############################
+# CODE SECTION 15: Summarize
+############################
- # this routine will add line breaks at requested locations after
- # sub set_continuation_breaks has made preliminary breaks.
+sub wrapup {
- my ( $ri_break_list, $ri_first, $ri_last ) = @_;
- my $i_f;
- my $i_l;
- my $line_number = 0;
- foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
+ # This is the last routine called when a file is formatted.
+ # Flush buffer and write any informative messages
+ my $self = shift;
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- while ( $i_break_left >= $i_l ) {
- $line_number++;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->decrement_output_line_number()
+ ; # fix up line number since it was incremented
+ we_are_at_the_last_line();
+ my $added_semicolon_count = $self->[_added_semicolon_count_];
+ my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
+ my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
- # shouldn't happen unless caller passes bad indexes
- if ( $line_number >= @{$ri_last} ) {
- warning(
-"Non-fatal program bug: couldn't set break at $i_break_left\n"
- );
- report_definite_bug();
- return;
- }
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
+ if ( $added_semicolon_count > 0 ) {
+ my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
+ my $what =
+ ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
+ write_logfile_entry("$added_semicolon_count $what added:\n");
+ write_logfile_entry(
+ " $first at input line $first_added_semicolon_at\n");
+
+ if ( $added_semicolon_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_added_semicolon_at\n");
}
+ write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
+ write_logfile_entry("\n");
+ }
- # Do not leave a blank at the end of a line; back up if necessary
- if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+ my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
+ my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
+ my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
+ if ( $deleted_semicolon_count > 0 ) {
+ my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
+ my $what =
+ ( $deleted_semicolon_count > 1 )
+ ? "semicolons were"
+ : "semicolon was";
+ write_logfile_entry(
+ "$deleted_semicolon_count unnecessary $what deleted:\n");
+ write_logfile_entry(
+ " $first at input line $first_deleted_semicolon_at\n");
- my $i_break_right = $inext_to_go[$i_break_left];
- if ( $i_break_left >= $i_f
- && $i_break_left < $i_l
- && $i_break_right > $i_f
- && $i_break_right <= $i_l )
- {
- splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
- splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+ if ( $deleted_semicolon_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_deleted_semicolon_at\n");
}
+ write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
+ write_logfile_entry("\n");
}
- return;
-}
-sub set_closing_breakpoint {
+ my $embedded_tab_count = $self->[_embedded_tab_count_];
+ my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
+ my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
+ if ( $embedded_tab_count > 0 ) {
+ my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
+ my $what =
+ ( $embedded_tab_count > 1 )
+ ? "quotes or patterns"
+ : "quote or pattern";
+ write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
+ write_logfile_entry(
+"This means the display of this script could vary with device or software\n"
+ );
+ write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
+
+ if ( $embedded_tab_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_embedded_tab_at\n");
+ }
+ write_logfile_entry("\n");
+ }
- # set a breakpoint at a matching closing token
- # at present, this is only used to break at a ':' which matches a '?'
- my $i_break = shift;
+ my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
+ my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
+ my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
- if ( $mate_index_to_go[$i_break] >= 0 ) {
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
+ );
+ }
- # CAUTION: infinite recursion possible here:
- # set_closing_breakpoint calls set_forced_breakpoint, and
- # set_forced_breakpoint call set_closing_breakpoint
- # ( test files attrib.t, BasicLyx.pm.html).
- # Don't reduce the '2' in the statement below
- if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
+ my $first_btd = $self->[_first_brace_tabbing_disagreement_];
+ if ($first_btd) {
+ my $msg =
+"First closing brace indentation disagreement started at input line $first_btd\n";
+ write_logfile_entry($msg);
- # break before } ] and ), but sub set_forced_breakpoint will decide
- # to break before or after a ? and :
- my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
- set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
- }
+ # leave a hint in the .ERR file if there was a brace error
+ if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
- else {
- my $type_sequence = $type_sequence_to_go[$i_break];
- if ($type_sequence) {
- my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
- $postponed_breakpoint{$type_sequence} = 1;
- }
+
+ my $in_btd = $self->[_in_brace_tabbing_disagreement_];
+ if ($in_btd) {
+ my $msg =
+"Ending with brace indentation disagreement which started at input line $in_btd\n";
+ write_logfile_entry($msg);
+
+ # leave a hint in the .ERR file if there was a brace error
+ if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
- return;
-}
-sub compare_indentation_levels {
+ if ($in_tabbing_disagreement) {
+ my $msg =
+"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
+ write_logfile_entry($msg);
+ }
+ else {
- # check to see if output line tabbing agrees with input line
- # this can be very useful for debugging a script which has an extra
- # or missing brace
- my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
- if ( $guessed_indentation_level ne $structural_indentation_level ) {
- $last_tabbing_disagreement = $input_line_number;
+ if ($last_tabbing_disagreement) {
- if ($in_tabbing_disagreement) {
+ write_logfile_entry(
+"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
+ );
}
else {
- $tabbing_disagreement_count++;
-
- if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
- );
- }
- $in_tabbing_disagreement = $input_line_number;
- $first_tabbing_disagreement = $in_tabbing_disagreement
- unless ($first_tabbing_disagreement);
+ write_logfile_entry("No indentation disagreement seen\n");
}
}
- else {
- if ($in_tabbing_disagreement) {
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
+ );
+ }
+ write_logfile_entry("\n");
- if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"End indentation disagreement from input line $in_tabbing_disagreement\n"
- );
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->report_anything_unusual();
+
+ $file_writer_object->report_line_length_errors();
+
+ $self->[_converged_] = $file_writer_object->get_convergence_check()
+ || $rOpts->{'indent-only'};
- if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
- write_logfile_entry(
- "No further tabbing disagreements will be noted\n");
- }
- }
- $in_tabbing_disagreement = 0;
- }
- }
return;
}
+
+} ## end package Perl::Tidy::Formatter
1;
package Perl::Tidy::VerticalAligner;
use strict;
use warnings;
-our $VERSION = '20200110';
+our $VERSION = '20210717';
use Perl::Tidy::VerticalAligner::Alignment;
use Perl::Tidy::VerticalAligner::Line;
# attempts to line up certain common tokens, such as => and #, which are
# identified by the calling routine.
#
-# There are two main routines: valign_input and flush. Append acts as a
-# storage buffer, collecting lines into a group which can be vertically
-# aligned. When alignment is no longer possible or desirable, it dumps
-# the group to flush.
+# Usage:
+# - Initiate an object with a call to new().
+# - Write lines one-by-one with calls to valign_input().
+# - Make a final call to flush() to empty the pipeline.
#
-# valign_input -----> flush
+# The sub valign_input collects lines into groups. When a group reaches
+# the maximum possible size it is processed for alignment and output.
+# The maximum group size is reached whenerver there is a change in indentation
+# level, a blank line, a block comment, or an external flush call. The calling
+# routine may also force a break in alignment at any time.
#
-# collects writes
-# vertical one
-# groups group
+# If the calling routine needs to interrupt the output and send other text to
+# the output, it must first call flush() to empty the output pipeline. This
+# might occur for example if a block of pod text needs to be sent to the output
+# between blocks of code.
+
+# It is essential that a final call to flush() be made. Otherwise some
+# final lines of text will be lost.
+
+# Index...
+# CODE SECTION 1: Preliminary code, global definitions and sub new
+# sub new
+# CODE SECTION 2: Some Basic Utilities
+# CODE SECTION 3: Code to accept input and form groups
+# sub valign_input
+# CODE SECTION 4: Code to process comment lines
+# sub _flush_comment_lines
+# CODE SECTION 5: Code to process groups of code lines
+# sub _flush_group_lines
+# CODE SECTION 6: Output Step A
+# sub valign_output_step_A
+# CODE SECTION 7: Output Step B
+# sub valign_output_step_B
+# CODE SECTION 8: Output Step C
+# sub valign_output_step_C
+# CODE SECTION 9: Output Step D
+# sub valign_output_step_D
+# CODE SECTION 10: Summary
+# sub report_anything_unusual
+
+##################################################################
+# CODE SECTION 1: Preliminary code, global definitions and sub new
+##################################################################
+
+sub AUTOLOAD {
+
+ # Catch any undefined sub calls so that we are sure to get
+ # some diagnostic information. This sub should never be called
+ # except for a programming error.
+ our $AUTOLOAD;
+ return if ( $AUTOLOAD =~ /\bDESTROY$/ );
+ my ( $pkg, $fname, $lno ) = caller();
+ my $my_package = __PACKAGE__;
+ print STDERR <<EOM;
+======================================================================
+Error detected in package '$my_package', version $VERSION
+Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
+Called from package: '$pkg'
+Called from File '$fname' at line '$lno'
+This error is probably due to a recent programming change
+======================================================================
+EOM
+ exit 1;
+}
+
+sub DESTROY {
+
+ # required to avoid call to AUTOLOAD in some versions of perl
+}
BEGIN {
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
+ # Define the fixed indexes for variables in $self, which is an array
+ # reference. Note the convention of leading and trailing underscores to
+ # keep them unique.
+ my $i = 0;
+ use constant {
+ _file_writer_object_ => $i++,
+ _logger_object_ => $i++,
+ _diagnostics_object_ => $i++,
+ _length_function_ => $i++,
+
+ _rOpts_ => $i++,
+ _rOpts_indent_columns_ => $i++,
+ _rOpts_tabs_ => $i++,
+ _rOpts_entab_leading_whitespace_ => $i++,
+ _rOpts_fixed_position_side_comment_ => $i++,
+ _rOpts_minimum_space_to_comment_ => $i++,
+ _rOpts_maximum_line_length_ => $i++,
+ _rOpts_variable_maximum_line_length_ => $i++,
+ _rOpts_valign_ => $i++,
+
+ _last_level_written_ => $i++,
+ _last_side_comment_column_ => $i++,
+ _last_side_comment_line_number_ => $i++,
+ _last_side_comment_length_ => $i++,
+ _last_side_comment_level_ => $i++,
+ _outdented_line_count_ => $i++,
+ _first_outdented_line_at_ => $i++,
+ _last_outdented_line_at_ => $i++,
+ _consecutive_block_comments_ => $i++,
+
+ _rgroup_lines_ => $i++,
+ _group_level_ => $i++,
+ _group_type_ => $i++,
+ _zero_count_ => $i++,
+ _last_leading_space_count_ => $i++,
+ _comment_leading_space_count_ => $i++,
+ };
+
+ # Debug flag. This is a relic from the original program development
+ # looking for problems with tab characters. Caution: this debug flag can
+ # produce a lot of output It should be 0 except when debugging small
+ # scripts.
- use constant VALIGN_DEBUG_FLAG_APPEND => 0;
- use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
- use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
- use constant VALIGN_DEBUG_FLAG_TABS => 0;
+ use constant DEBUG_TABS => 0;
my $debug_warning = sub {
print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
return;
};
- VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
- VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
- VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
- VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
+ DEBUG_TABS && $debug_warning->('TABS');
}
-use vars qw(
- $vertical_aligner_self
- $maximum_alignment_index
- $ralignment_list
- $maximum_jmax_seen
- $minimum_jmax_seen
- $previous_minimum_jmax_seen
- $previous_maximum_jmax_seen
- @group_lines
- $group_level
- $group_type
- $group_maximum_gap
- $marginal_match
- $last_level_written
- $last_leading_space_count
- $extra_indent_ok
- $zero_count
- $last_comment_column
- $last_side_comment_line_number
- $last_side_comment_length
- $last_side_comment_level
- $outdented_line_count
- $first_outdented_line_at
- $last_outdented_line_at
- $diagnostics_object
- $logger_object
- $file_writer_object
- @side_comment_history
- $comment_leading_space_count
- $is_matching_terminal_line
- $consecutive_block_comments
-
- $cached_line_text
- $cached_line_type
- $cached_line_flag
- $cached_seqno
- $cached_line_valid
- $cached_line_leading_space_count
- $cached_seqno_string
-
- $valign_buffer_filling
- @valign_buffer
-
- $seqno_string
- $last_nonblank_seqno_string
-
- $rOpts
-
- $rOpts_maximum_line_length
- $rOpts_variable_maximum_line_length
- $rOpts_continuation_indentation
- $rOpts_indent_columns
- $rOpts_tabs
- $rOpts_entab_leading_whitespace
- $rOpts_valign
-
- $rOpts_fixed_position_side_comment
- $rOpts_minimum_space_to_comment
-
-);
-
-sub initialize {
-
- (
- my $class, $rOpts, $file_writer_object, $logger_object,
- $diagnostics_object
- ) = @_;
-
- # variables describing the entire space group:
- $ralignment_list = [];
- $group_level = 0;
- $last_level_written = -1;
- $extra_indent_ok = 0; # can we move all lines to the right?
- $last_side_comment_length = 0;
- $maximum_jmax_seen = 0;
- $minimum_jmax_seen = 0;
- $previous_minimum_jmax_seen = 0;
- $previous_maximum_jmax_seen = 0;
-
- # variables describing each line of the group
- @group_lines = (); # list of all lines in group
-
- $outdented_line_count = 0;
- $first_outdented_line_at = 0;
- $last_outdented_line_at = 0;
- $last_side_comment_line_number = 0;
- $last_side_comment_level = -1;
- $is_matching_terminal_line = 0;
-
- # most recent 3 side comments; [ line number, column ]
- $side_comment_history[0] = [ -300, 0 ];
- $side_comment_history[1] = [ -200, 0 ];
- $side_comment_history[2] = [ -100, 0 ];
-
- # valign_output_step_B cache:
- $cached_line_text = "";
- $cached_line_type = 0;
- $cached_line_flag = 0;
- $cached_seqno = 0;
- $cached_line_valid = 0;
- $cached_line_leading_space_count = 0;
- $cached_seqno_string = "";
-
- # string of sequence numbers joined together
- $seqno_string = "";
- $last_nonblank_seqno_string = "";
-
- # frequently used parameters
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_tabs = $rOpts->{'tabs'};
- $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
- $rOpts_fixed_position_side_comment =
+sub new {
+
+ my ( $class, @args ) = @_;
+
+ my %defaults = (
+ rOpts => undef,
+ file_writer_object => undef,
+ logger_object => undef,
+ diagnostics_object => undef,
+ length_function => sub { return length( $_[0] ) },
+ );
+ my %args = ( %defaults, @args );
+
+ # Initialize other caches and buffers
+ initialize_step_B_cache();
+ initialize_valign_buffer();
+ initialize_leading_string_cache();
+ initialize_decode();
+
+ # Initialize all variables in $self.
+ # To add an item to $self, first define a new constant index in the BEGIN
+ # section.
+ my $self = [];
+
+ # objects
+ $self->[_file_writer_object_] = $args{file_writer_object};
+ $self->[_logger_object_] = $args{logger_object};
+ $self->[_diagnostics_object_] = $args{diagnostics_object};
+ $self->[_length_function_] = $args{length_function};
+
+ # shortcuts to user options
+ my $rOpts = $args{rOpts};
+
+ $self->[_rOpts_] = $rOpts;
+ $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
+ $self->[_rOpts_tabs_] = $rOpts->{'tabs'};
+ $self->[_rOpts_entab_leading_whitespace_] =
+ $rOpts->{'entab-leading-whitespace'};
+ $self->[_rOpts_fixed_position_side_comment_] =
$rOpts->{'fixed-position-side-comment'};
- $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_variable_maximum_line_length =
+ $self->[_rOpts_minimum_space_to_comment_] =
+ $rOpts->{'minimum-space-to-comment'};
+ $self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'};
+ $self->[_rOpts_variable_maximum_line_length_] =
$rOpts->{'variable-maximum-line-length'};
- $rOpts_valign = $rOpts->{'valign'};
+ $self->[_rOpts_valign_] = $rOpts->{'valign'};
+
+ # Batch of lines being collected
+ $self->[_rgroup_lines_] = [];
+ $self->[_group_level_] = 0;
+ $self->[_group_type_] = "";
+ $self->[_zero_count_] = 0;
+ $self->[_comment_leading_space_count_] = 0;
+ $self->[_last_leading_space_count_] = 0;
+
+ # Memory of what has been processed
+ $self->[_last_level_written_] = -1;
+ $self->[_last_side_comment_column_] = 0;
+ $self->[_last_side_comment_line_number_] = 0;
+ $self->[_last_side_comment_length_] = 0;
+ $self->[_last_side_comment_level_] = -1;
+ $self->[_outdented_line_count_] = 0;
+ $self->[_first_outdented_line_at_] = 0;
+ $self->[_last_outdented_line_at_] = 0;
+ $self->[_consecutive_block_comments_] = 0;
+
+ bless $self, $class;
+ return $self;
+}
+
+#################################
+# CODE SECTION 2: Basic Utilities
+#################################
+
+sub flush {
+
+ # flush() is the external call to completely empty the pipeline.
+ my ($self) = @_;
+
+ # push things out the pipline...
- $consecutive_block_comments = 0;
- forget_side_comment();
+ # push out any current group lines
+ $self->_flush_group_lines();
- initialize_for_new_group();
+ # then anything left in the cache of step_B
+ $self->_flush_cache();
- $vertical_aligner_self = {};
- bless $vertical_aligner_self, $class;
- return $vertical_aligner_self;
+ # then anything left in the buffer of step_C
+ $self->dump_valign_buffer();
+
+ return;
}
sub initialize_for_new_group {
- @group_lines = ();
- $maximum_alignment_index = -1; # alignments in current group
- $zero_count = 0; # count consecutive lines without tokens
- $group_maximum_gap = 0; # largest gap introduced
- $group_type = "";
- $marginal_match = 0;
- $comment_leading_space_count = 0;
- $last_leading_space_count = 0;
+ my ($self) = @_;
+
+ $self->[_rgroup_lines_] = [];
+ $self->[_group_type_] = "";
+ $self->[_zero_count_] = 0;
+ $self->[_comment_leading_space_count_] = 0;
+ $self->[_last_leading_space_count_] = 0;
+
+ # Note that the value for _group_level_ is
+ # handled separately in sub valign_input
return;
}
+sub group_line_count {
+ return +@{ $_[0]->[_rgroup_lines_] };
+}
+
# interface to Perl::Tidy::Diagnostics routines
+# For debugging; not currently used
sub write_diagnostics {
- my $msg = shift;
+ my ( $self, $msg ) = @_;
+ my $diagnostics_object = $self->[_diagnostics_object_];
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics($msg);
}
# interface to Perl::Tidy::Logger routines
sub warning {
- my ($msg) = @_;
+ my ( $self, $msg ) = @_;
+ my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$logger_object->warning($msg);
}
}
sub write_logfile_entry {
- my ($msg) = @_;
+ my ( $self, $msg ) = @_;
+ my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$logger_object->write_logfile_entry($msg);
}
}
sub report_definite_bug {
+ my ( $self, $msg ) = @_;
+ my $logger_object = $self->[_logger_object_];
if ($logger_object) {
$logger_object->report_definite_bug();
}
sub get_cached_line_count {
my $self = shift;
- return @group_lines + ( $cached_line_type ? 1 : 0 );
+ return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
}
sub get_spaces {
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
}
-sub get_stack_depth {
-
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_stack_depth() : 0;
-}
-
-sub make_alignment {
- my ( $col, $token ) = @_;
-
- # make one new alignment at column $col which aligns token $token
- ++$maximum_alignment_index;
-
- #my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
- my $nlines = @group_lines;
- my $alignment = Perl::Tidy::VerticalAligner::Alignment->new(
- column => $col,
- starting_column => $col,
- matching_token => $token,
- starting_line => $nlines - 1,
- ending_line => $nlines - 1,
- serial_number => $maximum_alignment_index,
- );
- $ralignment_list->[$maximum_alignment_index] = $alignment;
- return $alignment;
-}
-
-sub dump_alignments {
- print STDOUT
-"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
- for my $i ( 0 .. $maximum_alignment_index ) {
- my $column = $ralignment_list->[$i]->get_column();
- my $starting_column = $ralignment_list->[$i]->get_starting_column();
- my $matching_token = $ralignment_list->[$i]->get_matching_token();
- my $starting_line = $ralignment_list->[$i]->get_starting_line();
- my $ending_line = $ralignment_list->[$i]->get_ending_line();
- print STDOUT
-"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
- }
- return;
-}
-
-sub save_alignment_columns {
- for my $i ( 0 .. $maximum_alignment_index ) {
- $ralignment_list->[$i]->save_column();
- }
- return;
-}
-
-sub restore_alignment_columns {
- for my $i ( 0 .. $maximum_alignment_index ) {
- $ralignment_list->[$i]->restore_column();
- }
- return;
-}
-
-sub forget_side_comment {
- $last_comment_column = 0;
- return;
-}
-
sub maximum_line_length_for_level {
# return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
+ my ( $self, $level ) = @_;
+ my $maximum_line_length = $self->[_rOpts_maximum_line_length_];
+ if ( $self->[_rOpts_variable_maximum_line_length_] ) {
if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
+ $maximum_line_length += $level * $self->[_rOpts_indent_columns_];
}
return $maximum_line_length;
}
+######################################################
+# CODE SECTION 3: Code to accept input and form groups
+######################################################
+
sub push_group_line {
- my ($new_line) = @_;
- push @group_lines, $new_line;
+ my ( $self, $new_line ) = @_;
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ push @{$rgroup_lines}, $new_line;
return;
}
+use constant DEBUG_VALIGN => 0;
+use constant SC_LONG_LINE_DIFF => 12;
+
sub valign_input {
# Place one line in the current vertical group.
# the matching tokens, and the last one tracks the maximum line length.
#
# Each time a new line comes in, it joins the current vertical
- # group if possible. Otherwise it causes the current group to be dumped
+ # group if possible. Otherwise it causes the current group to be flushed
# and a new group is started.
#
# For each new group member, the column locations are increased, as
# side comments. Tabs in these fields can mess up the column counting.
# The log file warns the user if there are any such tabs.
- my ( $rline_hash, $rfields, $rtokens, $rpatterns ) = @_;
+ my ( $self, $rline_hash ) = @_;
+
my $level = $rline_hash->{level};
my $level_end = $rline_hash->{level_end};
+ my $level_adj = $rline_hash->{level_adj};
my $indentation = $rline_hash->{indentation};
- my $is_forced_break = $rline_hash->{is_forced_break};
+ my $list_seqno = $rline_hash->{list_seqno};
my $outdent_long_lines = $rline_hash->{outdent_long_lines};
my $is_terminal_ternary = $rline_hash->{is_terminal_ternary};
- my $is_terminal_statement = $rline_hash->{is_terminal_statement};
- my $do_not_pad = $rline_hash->{do_not_pad};
my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
my $level_jump = $rline_hash->{level_jump};
+ my $rfields = $rline_hash->{rfields};
+ my $rtokens = $rline_hash->{rtokens};
+ my $rpatterns = $rline_hash->{rpatterns};
+ my $rfield_lengths = $rline_hash->{rfield_lengths};
+ my $terminal_block_type = $rline_hash->{terminal_block_type};
+ my $batch_count = $rline_hash->{batch_count};
+ my $break_alignment_before = $rline_hash->{break_alignment_before};
+ my $break_alignment_after = $rline_hash->{break_alignment_after};
+ my $Kend = $rline_hash->{Kend};
+ my $ci_level = $rline_hash->{ci_level};
+
+ # The index '$Kend' is a value which passed along with the line text to sub
+ # 'write_code_line' for a convergence check.
# number of fields is $jmax
# number of tokens between fields is $jmax-1
# set outdented flag to be sure we either align within statements or
# across statement boundaries, but not both.
- my $is_outdented = $last_leading_space_count > $leading_space_count;
- $last_leading_space_count = $leading_space_count;
+ my $is_outdented =
+ $self->[_last_leading_space_count_] > $leading_space_count;
+ $self->[_last_leading_space_count_] = $leading_space_count;
- # Patch: undo for hanging side comment
+ # Identify a hanging side comment. Hanging side comments have an empty
+ # initial field.
my $is_hanging_side_comment =
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
+
+ # Undo outdented flag for a hanging side comment
$is_outdented = 0 if $is_hanging_side_comment;
- # Forget side comment alignment after seeing 2 or more block comments
- my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
+ # Identify a block comment.
+ my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
+
+ # Block comment .. update count
if ($is_block_comment) {
- $consecutive_block_comments++;
+ $self->[_consecutive_block_comments_]++;
}
+
+ # Not a block comment ..
+ # Forget side comment column if we saw 2 or more block comments,
+ # and reset the count
else {
- if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
- $consecutive_block_comments = 0;
+
+ if ( $self->[_consecutive_block_comments_] > 1 ) {
+ $self->forget_side_comment();
+ }
+ $self->[_consecutive_block_comments_] = 0;
+ }
+
+ # Reset side comment location if we are entering a new block from level 0.
+ # This is intended to keep them from drifting too far to the right.
+ if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) {
+ $self->forget_side_comment();
}
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- my $nlines = @group_lines;
+ my $group_level = $self->[_group_level_];
+
+ DEBUG_VALIGN && do {
+ my $nlines = $self->group_line_count();
print STDOUT
-"APPEND0: entering lines=$nlines new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
+"Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
};
# Validate cached line if necessary: If we can produce a container
# with just 2 lines total by combining an existing cached opening
# token with the closing token to follow, then we will mark both
# cached flags as valid.
- if ($rvertical_tightness_flags) {
- if ( @group_lines <= 1
- && $cached_line_type
- && $cached_seqno
- && $rvertical_tightness_flags->[2]
- && $rvertical_tightness_flags->[2] == $cached_seqno )
- {
- $rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
+ my $cached_line_type = get_cached_line_type();
+ if ($cached_line_type) {
+ my $cached_line_flag = get_cached_line_flag();
+ if ($rvertical_tightness_flags) {
+ my $cached_seqno = get_cached_seqno();
+ if ( $cached_seqno
+ && $self->group_line_count() <= 1
+ && $rvertical_tightness_flags->[2]
+ && $rvertical_tightness_flags->[2] == $cached_seqno )
+ {
+ $rvertical_tightness_flags->[3] ||= 1;
+ set_cached_line_valid(1);
+ }
}
- }
- # do not join an opening block brace with an unbalanced line
- # unless requested with a flag value of 2
- if ( $cached_line_type == 3
- && !@group_lines
- && $cached_line_flag < 2
- && $level_jump != 0 )
- {
- $cached_line_valid = 0;
+ # do not join an opening block brace with an unbalanced line
+ # unless requested with a flag value of 2
+ if ( $cached_line_type == 3
+ && !$self->group_line_count()
+ && $cached_line_flag < 2
+ && $level_jump != 0 )
+ {
+ set_cached_line_valid(0);
+ }
}
- # patch until new aligner is finished
- if ($do_not_pad) { my_flush() }
-
# shouldn't happen:
if ( $level < 0 ) { $level = 0 }
# do not align code across indentation level changes
# or if vertical alignment is turned off for debugging
- if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
-
- # we are allowed to shift a group of lines to the right if its
- # level is greater than the previous and next group
- $extra_indent_ok =
- ( $level < $group_level && $last_level_written < $group_level );
-
- my_flush();
+ if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) {
- # If we know that this line will get flushed out by itself because
- # of level changes, we can leave the extra_indent_ok flag set.
- # That way, if we get an external flush call, we will still be
- # able to do some -lp alignment if necessary.
- $extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
+ $self->_flush_group_lines( $level - $group_level );
$group_level = $level;
+ $self->[_group_level_] = $group_level;
# wait until after the above flush to get the leading space
# count because it may have been changed if the -icp flag is in
# Collect outdentable block COMMENTS
# --------------------------------------------------------------------
my $is_blank_line = "";
- if ( $group_type eq 'COMMENT' ) {
+ if ( $self->[_group_type_] eq 'COMMENT' ) {
if (
(
$is_block_comment
&& $outdent_long_lines
- && $leading_space_count == $comment_leading_space_count
+ && $leading_space_count ==
+ $self->[_comment_leading_space_count_]
)
|| $is_blank_line
)
{
- push_group_line( $rfields->[0] );
+
+ # Note that for a comment group we are not storing a line
+ # but rather just the text and its length.
+ $self->push_group_line(
+ [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
else {
- my_flush();
+ $self->_flush_group_lines();
}
}
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ if ( $break_alignment_before && @{$rgroup_lines} ) {
+ $rgroup_lines->[-1]->set_end_group(1);
+ }
+
# --------------------------------------------------------------------
# add dummy fields for terminal ternary
# --------------------------------------------------------------------
my $j_terminal_match;
- if ( $is_terminal_ternary && @group_lines ) {
+ if ( $is_terminal_ternary && @{$rgroup_lines} ) {
$j_terminal_match =
- fix_terminal_ternary( $group_lines[-1], $rfields, $rtokens,
- $rpatterns );
+ fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
+ $rpatterns, $rfield_lengths, $group_level, );
$jmax = @{$rfields} - 1;
}
# add dummy fields for else statement
# --------------------------------------------------------------------
- if ( $rfields->[0] =~ /^else\s*$/
- && @group_lines
+ # Note the trailing space after 'else' here. If there were no space between
+ # the else and the next '{' then we would not be able to do vertical
+ # alignment of the '{'.
+ if ( $rfields->[0] eq 'else '
+ && @{$rgroup_lines}
&& $level_jump == 0 )
{
$j_terminal_match =
- fix_terminal_else( $group_lines[-1], $rfields, $rtokens, $rpatterns );
+ fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
+ $rpatterns, $rfield_lengths );
$jmax = @{$rfields} - 1;
}
# Handle simple line of code with no fields to match.
# --------------------------------------------------------------------
if ( $jmax <= 0 ) {
- $zero_count++;
+ $self->[_zero_count_]++;
- if ( @group_lines
- && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) )
+ if ( @{$rgroup_lines}
+ && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
+ )
{
# flush the current group if it has some aligned columns..
- if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
-
- # flush current group if we are just collecting side comments..
- elsif (
-
- # ...and we haven't seen a comment lately
- ( $zero_count > 3 )
-
- # ..or if this new line doesn't fit to the left of the comments
- || ( ( $leading_space_count + length( $rfields->[0] ) ) >
- $group_lines[0]->get_column(0) )
- )
+ # or we haven't seen a comment lately
+ if ( $rgroup_lines->[0]->get_jmax() > 1
+ || $self->[_zero_count_] > 3 )
{
- my_flush();
+ $self->_flush_group_lines();
}
}
# start new COMMENT group if this comment may be outdented
if ( $is_block_comment
&& $outdent_long_lines
- && !@group_lines )
+ && !$self->group_line_count() )
{
- $group_type = 'COMMENT';
- $comment_leading_space_count = $leading_space_count;
- push_group_line( $rfields->[0] );
+ $self->[_group_type_] = 'COMMENT';
+ $self->[_comment_leading_space_count_] = $leading_space_count;
+ $self->push_group_line(
+ [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
return;
}
# just write this line directly if no current group, no side comment,
# and no space recovery is needed.
- if ( !@group_lines && !get_recoverable_spaces($indentation) ) {
- valign_output_step_B( $leading_space_count, $rfields->[0], 0,
- $outdent_long_lines, $rvertical_tightness_flags, $level );
+ if ( !$self->group_line_count()
+ && !get_recoverable_spaces($indentation) )
+ {
+
+ $self->valign_output_step_B(
+ {
+ leading_space_count => $leading_space_count,
+ line => $rfields->[0],
+ line_length => $rfield_lengths->[0],
+ side_comment_length => 0,
+ outdent_long_lines => $outdent_long_lines,
+ rvertical_tightness_flags => $rvertical_tightness_flags,
+ level => $level,
+ level_end => $level_end,
+ Kend => $Kend,
+ }
+ );
+
return;
}
}
else {
- $zero_count = 0;
+ $self->[_zero_count_] = 0;
}
- # programming check: (shouldn't happen)
- # an error here implies an incorrect call was made
- if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
- my $nt = @{$rtokens};
- my $nf = @{$rfields};
- warning(
-"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $nt should be one less than number of fields: $nf)\n"
- );
- report_definite_bug();
- }
- my $maximum_line_length_for_level = maximum_line_length_for_level($level);
+ my $maximum_line_length_for_level =
+ $self->maximum_line_length_for_level($level);
# --------------------------------------------------------------------
- # create an object to hold this line
+ # It simplifies things to create a zero length side comment
+ # if none exists.
# --------------------------------------------------------------------
- my $new_line = Perl::Tidy::VerticalAligner::Line->new(
- jmax => $jmax,
- jmax_original_line => $jmax,
- rtokens => $rtokens,
- rfields => $rfields,
- rpatterns => $rpatterns,
- indentation => $indentation,
- leading_space_count => $leading_space_count,
- outdent_long_lines => $outdent_long_lines,
- list_type => "",
- is_hanging_side_comment => $is_hanging_side_comment,
- maximum_line_length => $maximum_line_length_for_level,
- rvertical_tightness_flags => $rvertical_tightness_flags,
- is_terminal_ternary => $is_terminal_ternary,
- j_terminal_match => $j_terminal_match,
- );
+ $self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths );
+ $jmax = @{$rfields} - 1;
# --------------------------------------------------------------------
- # It simplifies things to create a zero length side comment
- # if none exists.
+ # create an object to hold this line
# --------------------------------------------------------------------
- make_side_comment( $new_line, $level_end );
+ my $new_line = Perl::Tidy::VerticalAligner::Line->new(
+ {
+ jmax => $jmax,
+ rtokens => $rtokens,
+ rfields => $rfields,
+ rpatterns => $rpatterns,
+ rfield_lengths => $rfield_lengths,
+ indentation => $indentation,
+ leading_space_count => $leading_space_count,
+ outdent_long_lines => $outdent_long_lines,
+ list_seqno => $list_seqno,
+ list_type => "",
+ is_hanging_side_comment => $is_hanging_side_comment,
+ maximum_line_length => $maximum_line_length_for_level,
+ rvertical_tightness_flags => $rvertical_tightness_flags,
+ is_terminal_ternary => $is_terminal_ternary,
+ j_terminal_match => $j_terminal_match,
+ end_group => $break_alignment_after,
+ Kend => $Kend,
+ ci_level => $ci_level,
+ level => $level,
+ level_end => $level_end,
+ imax_pair => -1,
+ }
+ );
# --------------------------------------------------------------------
# Decide if this is a simple list of items.
- # There are 3 list types: none, comma, comma-arrow.
- # We use this below to be less restrictive in deciding what to align.
+ # We use this to be less restrictive in deciding what to align.
# --------------------------------------------------------------------
- if ($is_forced_break) {
- decide_if_list($new_line);
- }
+ decide_if_list($new_line) if ($list_seqno);
# --------------------------------------------------------------------
# Append this line to the current group (or start new group)
# --------------------------------------------------------------------
- if ( !@group_lines ) {
- add_to_group($new_line);
- }
- else {
- push_group_line($new_line);
- }
+
+ $self->push_group_line($new_line);
# output this group if it ends in a terminal else or ternary line
if ( defined($j_terminal_match) ) {
- my_flush();
+ $self->_flush_group_lines();
}
# Force break after jump to lower level
if ( $level_jump < 0 ) {
- my_flush();
+ $self->_flush_group_lines($level_jump);
}
# --------------------------------------------------------------------
# Some old debugging stuff
# --------------------------------------------------------------------
- VALIGN_DEBUG_FLAG_APPEND && do {
- print STDOUT "APPEND fields:";
+ DEBUG_VALIGN && do {
+ print STDOUT "exiting valign_input fields:";
dump_array( @{$rfields} );
- print STDOUT "APPEND tokens:";
+ print STDOUT "exiting valign_input tokens:";
dump_array( @{$rtokens} );
- print STDOUT "APPEND patterns:";
+ print STDOUT "exiting valign_input patterns:";
dump_array( @{$rpatterns} );
- dump_alignments();
};
return;
sub join_hanging_comment {
- my $line = shift;
- my $jmax = $line->get_jmax();
- return 0 unless $jmax == 1; # must be 2 fields
- my $rtokens = $line->get_rtokens();
- return 0 unless $rtokens->[0] eq '#'; # the second field is a comment..
- my $rfields = $line->get_rfields();
- return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty...
- my $old_line = shift;
+ # Add dummy fields to a hanging side comment to make it look
+ # like the first line in its potential group. This simplifies
+ # the coding.
+ my ( $new_line, $old_line ) = @_;
+
+ my $jmax = $new_line->get_jmax();
+
+ # must be 2 fields
+ return 0 unless $jmax == 1;
+ my $rtokens = $new_line->get_rtokens();
+
+ # the second field must be a comment
+ return 0 unless $rtokens->[0] eq '#';
+ my $rfields = $new_line->get_rfields();
+
+ # the first field must be empty
+ return 0 unless $rfields->[0] =~ /^\s*$/;
+
+ # the current line must have fewer fields
my $maximum_field_index = $old_line->get_jmax();
return 0
- unless $maximum_field_index > $jmax; # the current line has more fields
- my $rpatterns = $line->get_rpatterns();
+ unless $maximum_field_index > $jmax;
+
+ # looks ok..
+ my $rpatterns = $new_line->get_rpatterns();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
- $line->set_is_hanging_side_comment(1);
+ $new_line->set_is_hanging_side_comment(1);
$jmax = $maximum_field_index;
- $line->set_jmax($jmax);
+ $new_line->set_jmax($jmax);
$rfields->[$jmax] = $rfields->[1];
+ $rfield_lengths->[$jmax] = $rfield_lengths->[1];
$rtokens->[ $jmax - 1 ] = $rtokens->[0];
$rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
foreach my $j ( 1 .. $jmax - 1 ) {
- $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
+ $rfields->[$j] = '';
+ $rfield_lengths->[$j] = 0;
$rtokens->[ $j - 1 ] = "";
$rpatterns->[ $j - 1 ] = "";
}
return 1;
}
-sub eliminate_old_fields {
-
- my $new_line = shift;
- my $jmax = $new_line->get_jmax();
- if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
- if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
-
- # there must be one previous line
- return unless ( @group_lines == 1 );
-
- my $old_line = shift;
- my $maximum_field_index = $old_line->get_jmax();
-
- ###############################################
- # Moved below to allow new coding for => matches
- # return unless $maximum_field_index > $jmax;
- ###############################################
-
- # Identify specific cases where field elimination is allowed:
- # case=1: both lines have comma-separated lists, and the first
- # line has an equals
- # case=2: both lines have leading equals
-
- # case 1 is the default
- my $case = 1;
-
- # See if case 2: both lines have leading '='
- # We'll require similar leading patterns in this case
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $rpatterns = $new_line->get_rpatterns();
- my $old_rpatterns = $old_line->get_rpatterns();
- if ( $rtokens->[0] =~ /^=>?\d*$/
- && $old_rtokens->[0] eq $rtokens->[0]
- && $old_rpatterns->[0] eq $rpatterns->[0] )
- {
- $case = 2;
- }
-
- # not too many fewer fields in new line for case 1
- return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
-
- # case 1 must have side comment
- my $old_rfields = $old_line->get_rfields();
- return
- if ( $case == 1
- && length( $old_rfields->[$maximum_field_index] ) == 0 );
-
- my $rfields = $new_line->get_rfields();
-
- my $hid_equals = 0;
-
- my @new_alignments = ();
- my @new_fields = ();
- my @new_matching_patterns = ();
- my @new_matching_tokens = ();
-
- my $j = 0;
- my $current_field = '';
- my $current_pattern = '';
-
- # loop over all old tokens
- my $in_match = 0;
- foreach my $k ( 0 .. $maximum_field_index - 1 ) {
- $current_field .= $old_rfields->[$k];
- $current_pattern .= $old_rpatterns->[$k];
- last if ( $j > $jmax - 1 );
-
- if ( $old_rtokens->[$k] eq $rtokens->[$j] ) {
- $in_match = 1;
- $new_fields[$j] = $current_field;
- $new_matching_patterns[$j] = $current_pattern;
- $current_field = '';
- $current_pattern = '';
- $new_matching_tokens[$j] = $old_rtokens->[$k];
- $new_alignments[$j] = $old_line->get_alignment($k);
- $j++;
- }
- else {
-
- if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) {
- last if ( $case == 2 ); # avoid problems with stuff
- # like: $a=$b=$c=$d;
- $hid_equals = 1;
- }
- last
- if ( $in_match && $case == 1 )
- ; # disallow gaps in matching field types in case 1
- }
- }
-
- # Modify the current state if we are successful.
- # We must exactly reach the ends of the new list for success, and the old
- # pattern must have more fields. Here is an example where the first and
- # second lines have the same number, and we should not align:
- # my @a = map chr, 0 .. 255;
- # my @b = grep /\W/, @a;
- # my @c = grep /[^\w]/, @a;
-
- # Otherwise, we would get all of the commas aligned, which doesn't work as
- # well:
- # my @a = map chr, 0 .. 255;
- # my @b = grep /\W/, @a;
- # my @c = grep /[^\w]/, @a;
-
- if ( ( $j == $jmax )
- && ( $current_field eq '' )
- && ( $case != 1 || $hid_equals )
- && ( $maximum_field_index > $jmax ) )
- {
- my $k = $maximum_field_index;
- $current_field .= $old_rfields->[$k];
- $current_pattern .= $old_rpatterns->[$k];
- $new_fields[$j] = $current_field;
- $new_matching_patterns[$j] = $current_pattern;
-
- $new_alignments[$j] = $old_line->get_alignment($k);
- $maximum_field_index = $j;
-
- $old_line->set_alignments(@new_alignments);
- $old_line->set_jmax($jmax);
- $old_line->set_rtokens( \@new_matching_tokens );
- $old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@{$rpatterns} );
- }
-
- # Dumb Down starting match if necessary:
- #
- # Consider the following two lines:
- #
- # {
- # $a => 20 > 3 ? 1 : 0,
- # $xyz => 5,
- # }
-
- # We would like to get alignment regardless of the order of the two lines.
- # If the lines come in in this order, then we will simplify the patterns of
- # the first line in sub eliminate_new_fields. If the lines come in reverse
- # order, then we achieve this with eliminate_new_fields.
-
- # This update is currently restricted to leading '=>' matches. Although we
- # could do this for both '=' and '=>', overall the results for '=' come out
- # better without this step because this step can eliminate some other good
- # matches. For example, with the '=' we get:
-
-# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
-# my @dsf = map "$_\x{FFFE}Fred", @disilva;
-# my @dsj = map "$_\x{FFFE}John", @disilva;
-# my @dsJ = map "$_ John", @disilva;
-
- # without including '=' we get:
-
-# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
-# my @dsf = map "$_\x{FFFE}Fred", @disilva;
-# my @dsj = map "$_\x{FFFE}John", @disilva;
-# my @dsJ = map "$_ John", @disilva;
- elsif (
- $case == 2
-
- && @new_matching_tokens == 1
- ##&& $new_matching_tokens[0] =~ /^=/ # see note above
- && $new_matching_tokens[0] =~ /^=>/
- && $maximum_field_index > 2
- )
- {
- my $jmaxm = $jmax - 1;
- my $kmaxm = $maximum_field_index - 1;
- my $have_side_comment = $old_rtokens->[$kmaxm] eq '#';
-
- # We need to reduce the group pattern to be just two tokens,
- # the leading equality or => and the final side comment
-
- my $mid_field = join "",
- @{$old_rfields}[ 1 .. $maximum_field_index - 1 ];
- my $mid_patterns = join "",
- @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ];
- my @new_alignments = (
- $old_line->get_alignment(0),
- $old_line->get_alignment( $maximum_field_index - 1 )
- );
- my @new_tokens =
- ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] );
- my @new_fields = (
- $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index]
- );
- my @new_patterns = (
- $old_rpatterns->[0], $mid_patterns,
- $old_rpatterns->[$maximum_field_index]
- );
+sub make_side_comment {
- $maximum_field_index = 2;
- $old_line->set_jmax($maximum_field_index);
- $old_line->set_rtokens( \@new_tokens );
- $old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@new_patterns );
+ # create an empty side comment if none exists
- initialize_for_new_group();
- add_to_group($old_line);
- }
- return;
-}
+ my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_;
-# create an empty side comment if none exists
-sub make_side_comment {
- my ( $new_line, $level_end ) = @_;
- my $jmax = $new_line->get_jmax();
- my $rtokens = $new_line->get_rtokens();
+ my $jmax = @{$rfields} - 1;
# if line does not have a side comment...
if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- $rtokens->[$jmax] = '#';
- $rfields->[ ++$jmax ] = '';
- $rpatterns->[$jmax] = '#';
- $new_line->set_jmax($jmax);
- $new_line->set_jmax_original_line($jmax);
- }
-
- # line has a side comment..
- else {
-
- # don't remember old side comment location for very long
- my $line_number = $vertical_aligner_self->get_output_line_number();
- my $rfields = $new_line->get_rfields();
- if (
- $line_number - $last_side_comment_line_number > 12
-
- # and don't remember comment location across block level changes
- || ( $level_end < $last_side_comment_level
- && $rfields->[0] =~ /^}/ )
- )
- {
- forget_side_comment();
- }
- $last_side_comment_line_number = $line_number;
- $last_side_comment_level = $level_end;
- }
- return;
-}
-
-sub decide_if_list {
-
- my $line = shift;
-
- # A list will be taken to be a line with a forced break in which all
- # of the field separators are commas or comma-arrows (except for the
- # trailing #)
-
- # List separator tokens are things like ',3' or '=>2',
- # where the trailing digit is the nesting depth. Allow braces
- # to allow nested list items.
- my $rtokens = $line->get_rtokens();
- my $test_token = $rtokens->[0];
- if ( $test_token =~ /^(\,|=>)/ ) {
- my $list_type = $test_token;
- my $jmax = $line->get_jmax();
-
- foreach ( 1 .. $jmax - 2 ) {
- if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) {
- $list_type = "";
- last;
- }
- }
- $line->set_list_type($list_type);
+ $jmax += 1;
+ $rtokens->[ $jmax - 1 ] = '#';
+ $rfields->[$jmax] = '';
+ $rfield_lengths->[$jmax] = 0;
+ $rpatterns->[$jmax] = '#';
}
return;
}
-sub eliminate_new_fields {
-
- my ( $new_line, $old_line ) = @_;
- return unless (@group_lines);
- my $jmax = $new_line->get_jmax();
-
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $is_assignment =
- ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
-
- # must be monotonic variation
- return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
+{ ## closure for sub decide_if_list
- # must be more fields in the new line
- my $maximum_field_index = $old_line->get_jmax();
- return unless ( $maximum_field_index < $jmax );
+ my %is_comma_token;
- unless ($is_assignment) {
- return
- unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
- ; # only if monotonic
+ BEGIN {
- # never combine fields of a comma list
- return
- unless ( $maximum_field_index > 1 )
- && ( $new_line->get_list_type() !~ /^,/ );
+ my @q = qw( => );
+ push @q, ',';
+ @is_comma_token{@q} = (1) x scalar(@q);
}
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $old_rpatterns = $old_line->get_rpatterns();
+ sub decide_if_list {
- # loop over all OLD tokens except comment and check match
- my $match = 1;
- foreach my $k ( 0 .. $maximum_field_index - 2 ) {
- if ( ( $old_rtokens->[$k] ne $rtokens->[$k] )
- || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) )
- {
- $match = 0;
- last;
- }
- }
+ my $line = shift;
- # first tokens agree, so combine extra new tokens
- if ($match) {
- foreach my $k ( $maximum_field_index .. $jmax - 1 ) {
+ # A list will be taken to be a line with a forced break in which all
+ # of the field separators are commas or comma-arrows (except for the
+ # trailing #)
- $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k];
- $rfields->[$k] = "";
- $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k];
- $rpatterns->[$k] = "";
+ my $rtokens = $line->get_rtokens();
+ my $test_token = $rtokens->[0];
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($test_token);
+ if ( $is_comma_token{$raw_tok} ) {
+ my $list_type = $test_token;
+ my $jmax = $line->get_jmax();
+
+ foreach ( 1 .. $jmax - 2 ) {
+ ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token( $rtokens->[$_] );
+ if ( !$is_comma_token{$raw_tok} ) {
+ $list_type = "";
+ last;
+ }
+ }
+ $line->set_list_type($list_type);
}
-
- $rtokens->[ $maximum_field_index - 1 ] = '#';
- $rfields->[$maximum_field_index] = $rfields->[$jmax];
- $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax];
- $jmax = $maximum_field_index;
+ return;
}
- $new_line->set_jmax($jmax);
- return;
}
sub fix_terminal_ternary {
# : $year % 400 ? 0
# : 1;
#
- # returns 1 if the terminal item should be indented
+ # returns the index of the terminal question token, if any
- my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_;
- return unless ($old_line);
+ my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
+ $group_level )
+ = @_;
-## FUTURE CODING
-## my ( $old_line, $end_line ) = @_;
-## return unless ( $old_line && $end_line );
-##
-## my $rfields = $end_line->get_rfields();
-## my $rpatterns = $end_line->get_rpatterns();
-## my $rtokens = $end_line->get_rtokens();
+ return unless ($old_line);
+ use constant EXPLAIN_TERNARY => 0;
my $jmax = @{$rfields} - 1;
my $rfields_old = $old_line->get_rfields();
# look for the question mark after the :
my ($jquestion);
my $depth_question;
- my $pad = "";
+ my $pad = "";
+ my $pad_length = 0;
foreach my $j ( 0 .. $maximum_field_index - 1 ) {
my $tok = $rtokens_old->[$j];
- if ( $tok =~ /^\?(\d+)$/ ) {
- $depth_question = $1;
+ my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
+ if ( $raw_tok eq '?' ) {
+ $depth_question = $lev;
# depth must be correct
next unless ( $depth_question eq $group_level );
$jquestion = $j;
if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
- $pad = " " x length($1);
+ $pad_length = length($1);
+ $pad = " " x $pad_length;
}
else {
return; # shouldn't happen
# Work on copies of the actual arrays in case we have
# to return due to an error
- my @fields = @{$rfields};
- my @patterns = @{$rpatterns};
- my @tokens = @{$rtokens};
+ my @fields = @{$rfields};
+ my @patterns = @{$rpatterns};
+ my @tokens = @{$rtokens};
+ my @field_lengths = @{$rfield_lengths};
- VALIGN_DEBUG_FLAG_TERNARY && do {
+ EXPLAIN_TERNARY && do {
local $" = '><';
print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
# Note that this padding will remain even if the terminal value goes
# out on a separate line. This does not seem to look to bad, so no
# mechanism has been included to undo it.
- my $field1 = shift @fields;
+ my $field1 = shift @fields;
+ my $field_length1 = shift @field_lengths;
+ my $len_colon = length($colon);
unshift @fields, ( $colon, $pad . $therest );
+ unshift @field_lengths,
+ ( $len_colon, $pad_length + $field_length1 - $len_colon );
# change the leading pattern from : to ?
return unless ( $patterns[0] =~ s/^\:/?/ );
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
# handle sub-case of first field just equal to leading colon.
$patterns[1] = "?b" . $patterns[1];
# pad the second field
- $fields[1] = $pad . $fields[1];
+ $fields[1] = $pad . $fields[1];
+ $field_lengths[1] = $pad_length + $field_lengths[1];
# install leading tokens and patterns of existing line, replacing
# leading token and inserting appropriate number of empty fields
splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
- splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+ splice( @field_lengths, 1, 0, (0) x $jadd ) if $jadd;
}
}
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
- $jadd = $jquestion + 1;
- $fields[0] = $pad . $fields[0];
- splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+ $jadd = $jquestion + 1;
+ $fields[0] = $pad . $fields[0];
+ $field_lengths[0] = $pad_length + $field_lengths[0];
+ splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+ splice( @field_lengths, 0, 0, (0) x $jadd ) if $jadd;
}
- VALIGN_DEBUG_FLAG_TERNARY && do {
+ EXPLAIN_TERNARY && do {
local $" = '><';
print STDOUT "MODIFIED TOKENS=<@tokens>\n";
print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
};
# all ok .. update the arrays
- @{$rfields} = @fields;
- @{$rtokens} = @tokens;
- @{$rpatterns} = @patterns;
-## FUTURE CODING
-## $end_line->set_rfields( \@fields );
-## $end_line->set_rtokens( \@tokens );
-## $end_line->set_rpatterns( \@patterns );
+ @{$rfields} = @fields;
+ @{$rtokens} = @tokens;
+ @{$rpatterns} = @patterns;
+ @{$rfield_lengths} = @field_lengths;
# force a flush after this line
return $jquestion;
#
# returns a positive value if the else block should be indented
#
- my ( $old_line, $rfields, $rtokens, $rpatterns ) = @_;
+ my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
+
return unless ($old_line);
my $jmax = @{$rfields} - 1;
return unless ( $jmax > 0 );
my $jadd = $jbrace - $jparen;
splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
- splice( @{$rfields}, 1, 0, ('') x $jadd );
+ splice( @{$rfields}, 1, 0, ('') x $jadd );
+ splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
# force a flush after this line if it does not follow a case
if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
else { return $jbrace }
}
-{ # sub check_match
- my %is_good_alignment;
+my %is_closing_block_type;
- BEGIN {
+BEGIN {
+ @_ = qw< } ] >;
+ @is_closing_block_type{@_} = (1) x scalar(@_);
+}
- # Vertically aligning on certain "good" tokens is usually okay
- # so we can be less restrictive in marginal cases.
- my @q = qw( { ? => = );
- push @q, (',');
- @is_good_alignment{@q} = (1) x scalar(@q);
- }
+sub check_match {
- sub check_match {
+ # See if the current line matches the current vertical alignment group.
- # See if the current line matches the current vertical alignment group.
- # If not, flush the current group.
- my ( $new_line, $old_line ) = @_;
+ my ( $self, $new_line, $base_line, $prev_line ) = @_;
- # uses global variables:
- # $previous_minimum_jmax_seen
- # $maximum_jmax_seen
- # $marginal_match
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $old_line->get_jmax();
+ # Given:
+ # $new_line = the line being considered for group inclusion
+ # $base_line = the first line of the current group
+ # $prev_line = the line just before $new_line
- # flush if this line has too many fields
- # variable $GoToLoc indicates goto branch point, for debugging
- my $GoToLoc = 1;
- if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
+ # returns a flag and a value as follows:
+ # return (0, $imax_align) if the line does not match
+ # return (1, $imax_align) if the line matches but does not fit
+ # return (2, $imax_align) if the line matches and fits
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen <
- $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- $GoToLoc = 2;
- goto NO_MATCH;
- }
+ # Returns '$imax_align' which is the index of the maximum matching token.
+ # It will be used in the subsequent left-to-right sweep to align as many
+ # tokens as possible for lines which partially match.
+ my $imax_align = -1;
- # otherwise see if this line matches the current group
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
-
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
-
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
- }
-
- # handle comma-separated lists ..
- if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
- for my $j ( 0 .. $jlimit ) {
- my $old_tok = $old_rtokens->[$j];
- next unless $old_tok;
- my $new_tok = $rtokens->[$j];
- next unless $new_tok;
-
- # lists always match ...
- # unless they would align any '=>'s with ','s
- $GoToLoc = 3;
- goto NO_MATCH
- if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
- || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
- }
- }
-
- # do detailed check for everything else except hanging side comments
- elsif ( !$is_hanging_side_comment ) {
-
- my $leading_space_count = $new_line->get_leading_space_count();
-
- my $max_pad = 0;
- my $min_pad = 0;
- my $saw_good_alignment;
-
- for my $j ( 0 .. $jlimit ) {
-
- my $old_tok = $old_rtokens->[$j];
- my $new_tok = $rtokens->[$j];
-
- # Note on encoding used for alignment tokens:
- # -------------------------------------------
- # Tokens are "decorated" with information which can help
- # prevent unwanted alignments. Consider for example the
- # following two lines:
- # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
- # local ( $i, $f ) = &'bdiv( $xn, $xd );
- # There are three alignment tokens in each line, a comma,
- # an =, and a comma. In the first line these three tokens
- # are encoded as:
- # ,4+local-18 =3 ,4+split-7
- # and in the second line they are encoded as
- # ,4+local-18 =3 ,4+&'bdiv-8
- # Tokens always at least have token name and nesting
- # depth. So in this example the ='s are at depth 3 and
- # the ,'s are at depth 4. This prevents aligning tokens
- # of different depths. Commas contain additional
- # information, as follows:
- # , {depth} + {container name} - {spaces to opening paren}
- # This allows us to reject matching the rightmost commas
- # in the above two lines, since they are for different
- # function calls. This encoding is done in
- # 'sub send_lines_to_vertical_aligner'.
-
- # Pick off actual token.
- # Everything up to the first digit is the actual token.
- my $alignment_token = $new_tok;
- if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
-
- # see if the decorated tokens match
- my $tokens_match = $new_tok eq $old_tok
-
- # Exception for matching terminal : of ternary statement..
- # consider containers prefixed by ? and : a match
- || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
-
- # No match if the alignment tokens differ...
- if ( !$tokens_match ) {
-
- # ...Unless this is a side comment
- if (
- $j == $jlimit
-
- # and there is either at least one alignment token
- # or this is a single item following a list. This
- # latter rule is required for 'December' to join
- # the following list:
- # my (@months) = (
- # '', 'January', 'February', 'March',
- # 'April', 'May', 'June', 'July',
- # 'August', 'September', 'October', 'November',
- # 'December'
- # );
- # If it doesn't then the -lp formatting will fail.
- && ( $j > 0 || $old_tok =~ /^,/ )
- )
- {
- $marginal_match = 1
- if ( $marginal_match == 0
- && @group_lines == 1 );
- last;
- }
+ # variable $GoToMsg explains reason for no match, for debugging
+ my $GoToMsg = "";
+ use constant EXPLAIN_CHECK_MATCH => 0;
- $GoToLoc = 4;
- goto NO_MATCH;
- }
+ # This is a flag for testing alignment by sub sweep_left_to_right only.
+ # This test can help find problems with the alignment logic.
+ # This flag should normally be zero.
+ use constant TEST_SWEEP_ONLY => 0;
- # Calculate amount of padding required to fit this in.
- # $pad is the number of spaces by which we must increase
- # the current field to squeeze in this field.
- my $pad =
- length( $rfields->[$j] ) - $old_line->current_field_width($j);
- if ( $j == 0 ) { $pad += $leading_space_count; }
-
- # remember max pads to limit marginal cases
- if ( $alignment_token ne '#' ) {
- if ( $pad > $max_pad ) { $max_pad = $pad }
- if ( $pad < $min_pad ) { $min_pad = $pad }
- }
- if ( $is_good_alignment{$alignment_token} ) {
- $saw_good_alignment = 1;
- }
+ my $jmax = $new_line->get_jmax();
+ my $maximum_field_index = $base_line->get_jmax();
- # If patterns don't match, we have to be careful...
- if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) {
-
- # flag this as a marginal match since patterns differ
- $marginal_match = 1
- if ( $marginal_match == 0 && @group_lines == 1 );
-
- # We have to be very careful about aligning commas
- # when the pattern's don't match, because it can be
- # worse to create an alignment where none is needed
- # than to omit one. Here's an example where the ','s
- # are not in named containers. The first line below
- # should not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $alignment_token eq ',' ) {
-
- # do not align commas unless they are in named containers
- $GoToLoc = 5;
- goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
- }
+ my $jlimit = $jmax - 2;
+ if ( $jmax > $maximum_field_index ) {
+ $jlimit = $maximum_field_index - 2;
+ }
- # do not align parens unless patterns match;
- # large ugly spaces can occur in math expressions.
- elsif ( $alignment_token eq '(' ) {
+ if ( $new_line->get_is_hanging_side_comment() ) {
- # But we can allow a match if the parens don't
- # require any padding.
- $GoToLoc = 6;
- if ( $pad != 0 ) { goto NO_MATCH }
- }
+ # HSC's can join the group if they fit
+ }
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $alignment_token eq '=' ) {
-
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
- if (
- substr( $old_rpatterns->[$j], 0, 1 ) ne
- substr( $rpatterns->[$j], 0, 1 ) )
- {
- $GoToLoc = 7;
- goto NO_MATCH;
- }
+ # Everything else
+ else {
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( @group_lines == 1 ) {
- $marginal_match =
- 2; # =2 prevents being undone below
- }
- }
- }
+ # A group with hanging side comments ends with the first non hanging
+ # side comment.
+ if ( $base_line->get_is_hanging_side_comment() ) {
+ $GoToMsg = "end of hanging side comments";
+ goto NO_MATCH;
+ }
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
+ # The number of tokens that this line shares with the previous line
+ # has been stored with the previous line. This value was calculated
+ # and stored by sub 'match_line_pair'.
+ $imax_align = $prev_line->get_imax_pair();
- # Exception: suspend this rule to allow last lines to join
- $GoToLoc = 8;
- if ( $pad > 0 ) { goto NO_MATCH; }
- }
- } ## end for my $j ( 0 .. $jlimit)
-
- # Turn off the "marginal match" flag in some cases...
- # A "marginal match" occurs when the alignment tokens agree
- # but there are differences in the other tokens (patterns).
- # If we leave the marginal match flag set, then the rule is that we
- # will align only if there are more than two lines in the group.
- # We will turn of the flag if we almost have a match
- # and either we have seen a good alignment token or we
- # just need a small pad (2 spaces) to fit. These rules are
- # the result of experimentation. Tokens which misaligned by just
- # one or two characters are annoying. On the other hand,
- # large gaps to less important alignment tokens are also annoying.
- if ( $marginal_match == 1
- && $jmax == $maximum_field_index
- && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
- )
- {
- $marginal_match = 0;
- }
- ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
+ if ( $imax_align != $jlimit ) {
+ $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
+ goto NO_MATCH;
}
- # We have a match (even if marginal).
- # If the current line has fewer fields than the current group
- # but otherwise matches, copy the remaining group fields to
- # make it a perfect match.
- if ( $maximum_field_index > $jmax ) {
+ }
+
+ # The tokens match, but the lines must have identical number of
+ # tokens to join the group.
+ if ( $maximum_field_index != $jmax ) {
+ $GoToMsg = "token count differs";
+ goto NO_MATCH;
+ }
- ##########################################################
- # FIXME: The previous version had a bug which made side comments
- # become regular fields, so for now the program does not allow a
- # line with side comment to match. This should eventually be done.
- # The best test file for experimenting is 'lista.t'
- ##########################################################
+ # The tokens match. Now See if there is space for this line in the
+ # current group.
+ if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
- my $comment = $rfields->[$jmax];
- $GoToLoc = 9;
- goto NO_MATCH if ($comment);
+ EXPLAIN_CHECK_MATCH
+ && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
+ return ( 2, $jlimit );
+ }
+ else {
- # Corrected loop
- for my $jj ( $jlimit .. $maximum_field_index ) {
- $rtokens->[$jj] = $old_rtokens->[$jj];
- $rfields->[ $jj + 1 ] = '';
- $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ];
- }
+ EXPLAIN_CHECK_MATCH
+ && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
+ return ( 1, $jlimit );
+ }
-## THESE DO NOT GIVE CORRECT RESULTS
-## $rfields->[$jmax] = $comment;
-## $new_line->set_jmax($jmax);
+ NO_MATCH:
- }
- return;
+ EXPLAIN_CHECK_MATCH
+ && print
+ "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
- NO_MATCH:
+ return ( 0, $imax_align );
+}
- # variable $GoToLoc is for debugging
- #print "no match from $GoToLoc\n";
+sub check_fit {
- # Make one last effort to retain a match of certain statements
- my $match = salvage_equality_matches( $new_line, $old_line );
- my_flush_code() unless ($match);
+ my ( $self, $new_line, $old_line ) = @_;
+
+ # The new line has alignments identical to the current group. Now we have
+ # to fit the new line into the group without causing a field to exceed the
+ # line length limit.
+ # return true if successful
+ # return false if not successful
+
+ my $jmax = $new_line->get_jmax();
+ my $leading_space_count = $new_line->get_leading_space_count();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
+ my $padding_available = $old_line->get_available_space_on_right();
+ my $jmax_old = $old_line->get_jmax();
+
+ # Safety check ... only lines with equal array sizes should arrive here
+ # from sub check_match. So if this error occurs, look at recent changes in
+ # sub check_match. It is only supposed to check the fit of lines with
+ # identical numbers of alignment tokens.
+ if ( $jmax_old ne $jmax ) {
+
+ $self->warning(<<EOM);
+Program bug detected in Perl::Tidy::VerticalAligner sub check_fit
+unexpected difference in array lengths: $jmax != $jmax_old
+EOM
return;
}
-}
-sub salvage_equality_matches {
- my ( $new_line, $old_line ) = @_;
+ # Save current columns in case this line does not fit.
+ my @alignments = $old_line->get_alignments();
+ foreach my $alignment (@alignments) {
+ $alignment->save_column();
+ }
- # Reduce the complexity of the two lines if it will allow us to retain
- # alignment of some common alignments, including '=' and '=>'. We will
- # convert both lines to have just two matching tokens, the equality and the
- # side comment.
-
- # return 0 or undef if unsuccessful
- # return 1 if successful
-
- # Here is a very simple example of two lines where we could at least
- # align the equals:
- # $x = $class->_sub( $x, $delta );
- # $xpownm1 = $class->_pow( $class->_copy($x), $nm1 ); # x(i)^(n-1)
-
- # We will only do this if there is one old line (and one new line)
- return unless ( @group_lines == 1 );
- return if ($is_matching_terminal_line);
-
- # We are only looking for equality type statements
- my $old_rtokens = $old_line->get_rtokens();
- my $rtokens = $new_line->get_rtokens();
- my $is_equals =
- ( $rtokens->[0] =~ /=/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
- return unless ($is_equals);
-
- # The leading patterns must match
- my $old_rpatterns = $old_line->get_rpatterns();
- my $rpatterns = $new_line->get_rpatterns();
- return if ( $old_rpatterns->[0] ne $rpatterns->[0] );
-
- # Both should have side comment fields (should always be true)
- my $jmax_old = $old_line->get_jmax();
- my $jmax_new = $new_line->get_jmax();
- my $end_tok_old = $old_rtokens->[ $jmax_old - 1 ];
- my $end_tok_new = $rtokens->[ $jmax_new - 1 ];
- my $have_side_comments =
- defined($end_tok_old)
- && $end_tok_old eq '#'
- && defined($end_tok_new)
- && $end_tok_new eq '#';
- if ( !$have_side_comments ) { return; }
-
- # Do not match if any remaining tokens in new line include '?', 'if',
- # 'unless','||', '&&'. The reason is that (1) this isn't a great match, and
- # (2) we will prevent possibly better matchs to follow. Here is an
- # example. The match of the first two lines is rejected, and this allows
- # the second and third lines to match.
- # my $type = shift || "o";
- # my $fname = ( $type eq 'oo' ? 'orte_city' : 'orte' );
- # my $suffix = ( $coord_system eq 'standard' ? '' : '-orig' );
- # This logic can cause some unwanted losses of alignments, but it can retain
- # long runs of multiple-token alignments, so overall it is worthwhile.
- # If we had a peek at the subsequent line we could make a much better
- # decision here, but for now this is not available.
- for ( my $j = 1 ; $j < $jmax_new - 1 ; $j++ ) {
- my $new_tok = $rtokens->[$j];
-
- # git#16: do not consider fat commas as good aligmnents here
- my $is_good_alignment =
- ( $new_tok =~ /^(=|\?|if|unless|\|\||\&\&)/ && $new_tok !~ /^=>/ );
- return if ($is_good_alignment);
- }
-
- my $squeeze_line = sub {
- my ($line_obj) = @_;
-
- # reduce a line down to the three fields surrounding
- # the two tokens, an '=' of some sort and a '#' at the end
-
- my $jmax = $line_obj->get_jmax();
- my $jmax_new = 2;
- return unless $jmax > $jmax_new;
- my $rfields = $line_obj->get_rfields();
- my $rpatterns = $line_obj->get_rpatterns();
- my $rtokens = $line_obj->get_rtokens();
- my $rfields_new = [
- $rfields->[0], join( '', @{$rfields}[ 1 .. $jmax - 1 ] ),
- $rfields->[$jmax]
- ];
- my $rpatterns_new = [
- $rpatterns->[0], join( '', @{$rpatterns}[ 1 .. $jmax - 1 ] ),
- $rpatterns->[$jmax]
- ];
- my $rtokens_new = [ $rtokens->[0], $rtokens->[ $jmax - 1 ] ];
- $line_obj->{_rfields} = $rfields_new;
- $line_obj->{_rpatterns} = $rpatterns_new;
- $line_obj->{_rtokens} = $rtokens_new;
- $line_obj->set_jmax($jmax_new);
- };
-
- # Okay, we will force a match at the equals-like token. We will fix both
- # lines to have just 2 tokens and 3 fields:
- $squeeze_line->($new_line);
- $squeeze_line->($old_line);
-
- # start over with a new group
- initialize_for_new_group();
- add_to_group($old_line);
- return 1;
-}
-
-sub check_fit {
-
- my ( $new_line, $old_line ) = @_;
- return unless (@group_lines);
-
- my $jmax = $new_line->get_jmax();
- my $leading_space_count = $new_line->get_leading_space_count();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
-
- my $group_list_type = $group_lines[0]->get_list_type();
-
- my $padding_so_far = 0;
- my $padding_available = $old_line->get_available_space_on_right();
-
- # save current columns in case this doesn't work
- save_alignment_columns();
+ my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ # Loop over all alignments ...
my $maximum_field_index = $old_line->get_jmax();
for my $j ( 0 .. $jmax ) {
- my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
+ my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
if ( $j == 0 ) {
$pad += $leading_space_count;
}
- # remember largest gap of the group, excluding gap to side comment
- if ( $pad < 0
- && $group_maximum_gap < -$pad
- && $j > 0
- && $j < $jmax - 1 )
- {
- $group_maximum_gap = -$pad;
- }
-
+ # Keep going if this field does not need any space.
next if $pad < 0;
- ## OLD NOTES:
- ## This patch helps sometimes, but it doesn't check to see if
- ## the line is too long even without the side comment. It needs
- ## to be reworked.
- ##don't let a long token with no trailing side comment push
- ##side comments out, or end a group. (sidecmt1.t)
- ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
-
- # BEGIN PATCH for keith1.txt.
- # If the group began matching multiple tokens but later this got
- # reduced to a fewer number of matching tokens, then the fields
- # of the later lines will still have to fit into their corresponding
- # fields. So a large later field will "push" the other fields to
- # the right, including previous side comments, and if there is no room
- # then there is no match.
- # For example, look at the last line in the following snippet:
-
- # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false;
- # my $env = ($b_prod_db) ? "prd" : "val";
- # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL";
- # my $task = $OPT{t};
- # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash";
-
- # The long term will push the '?' to the right to fit in, and in this
- # case there is not enough room so it will not match the equals unless
- # we do something special.
-
- # Usually it looks good to keep an initial alignment of '=' going, and
- # we can do this if the long term can fit in the space taken up by the
- # remaining fields (the ? : fields here).
-
- # Allowing any matching token for now, but it could be restricted
- # to an '='-like token if necessary.
+ # See if it needs too much space.
+ if ( $pad > $padding_available ) {
- if (
- $pad > $padding_available
- && $jmax == 2 # matching one thing (plus #)
- && $j == $jmax - 1 # at last field
- && @group_lines > 1 # more than 1 line in group now
- && $jmax < $maximum_field_index # other lines have more fields
- && length( $rfields->[$jmax] ) == 0 # no side comment
-
- # Uncomment to match only equals (but this does not seem necessary)
- # && $rtokens->[0] =~ /^=\d/ # matching an equals
- )
- {
- my $extra_padding = 0;
- foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
- $extra_padding += $old_line->current_field_width($jj);
+ ################################################
+ # Line does not fit -- revert to starting state
+ ################################################
+ foreach my $alignment (@alignments) {
+ $alignment->restore_column();
}
-
- next if ( $pad <= $padding_available + $extra_padding );
- }
-
- # END PATCH for keith1.pl
-
- # This line will need space; lets see if we want to accept it..
- if (
-
- # not if this won't fit
- ( $pad > $padding_available )
-
- # previously, there were upper bounds placed on padding here
- # (maximum_whitespace_columns), but they were not really helpful
-
- )
- {
-
- # revert to starting state then flush; things didn't work out
- restore_alignment_columns();
- my_flush_code();
- last;
+ return;
}
- # patch to avoid excessive gaps in previous lines,
- # due to a line of fewer fields.
- # return join( ".",
- # $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
- # $self->{"area"}, $self->{"id"}, $self->{"sel"} );
- next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
-
- # looks ok, squeeze this field in
+ # make room for this field
$old_line->increase_field_width( $j, $pad );
$padding_available -= $pad;
-
- # remember largest gap of the group, excluding gap to side comment
- if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
- $group_maximum_gap = $pad;
- }
}
- return;
-}
-
-sub add_to_group {
- # The current line either starts a new alignment group or is
- # accepted into the current alignment group.
- my ($new_line) = @_;
- push_group_line($new_line);
+ ######################################
+ # The line fits, the match is accepted
+ ######################################
+ return 1;
- # initialize field lengths if starting new group
- if ( @group_lines == 1 ) {
+}
- my $jmax = $new_line->get_jmax();
- my $rfields = $new_line->get_rfields();
- my $rtokens = $new_line->get_rtokens();
- my $col = $new_line->get_leading_space_count();
+sub install_new_alignments {
- for my $j ( 0 .. $jmax ) {
- $col += length( $rfields->[$j] );
+ my ($new_line) = @_;
- # create initial alignments for the new group
- my $token = "";
- if ( $j < $jmax ) { $token = $rtokens->[$j] }
- my $alignment = make_alignment( $col, $token );
- $new_line->set_alignment( $j, $alignment );
- }
+ my $jmax = $new_line->get_jmax();
+ my $rfield_lengths = $new_line->get_rfield_lengths();
+ my $col = $new_line->get_leading_space_count();
- $maximum_jmax_seen = $jmax;
- $minimum_jmax_seen = $jmax;
- }
+ for my $j ( 0 .. $jmax ) {
+ $col += $rfield_lengths->[$j];
- # use previous alignments otherwise
- else {
- my @new_alignments = $group_lines[-2]->get_alignments();
- $new_line->set_alignments(@new_alignments);
+ # create initial alignments for the new group
+ my $alignment =
+ Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
+ $new_line->set_alignment( $j, $alignment );
}
+ return;
+}
- # remember group jmax extremes for next call to valign_input
- $previous_minimum_jmax_seen = $minimum_jmax_seen;
- $previous_maximum_jmax_seen = $maximum_jmax_seen;
+sub copy_old_alignments {
+ my ( $new_line, $old_line ) = @_;
+ my @new_alignments = $old_line->get_alignments();
+ $new_line->set_alignments(@new_alignments);
return;
}
return;
}
-# flush() sends the current Perl::Tidy::VerticalAligner group down the
-# pipeline to Perl::Tidy::FileWriter.
-
-# This is the external flush, which also empties the buffer and cache
-sub flush {
-
- # the buffer must be emptied first, then any cached text
- dump_valign_buffer();
-
- if (@group_lines) {
- my_flush();
- }
- else {
- if ($cached_line_type) {
- $seqno_string = $cached_seqno_string;
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
- $cached_line_type = 0;
- $cached_line_text = "";
- $cached_seqno_string = "";
- }
- }
- return;
-}
-
-sub reduce_valign_buffer_indentation {
-
- my ($diff) = @_;
- if ( $valign_buffer_filling && $diff ) {
- my $max_valign_buffer = @valign_buffer;
- foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
- my ( $line, $leading_space_count, $level ) =
- @{ $valign_buffer[$i] };
- my $ws = substr( $line, 0, $diff );
- if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
- $line = substr( $line, $diff );
- }
- if ( $leading_space_count >= $diff ) {
- $leading_space_count -= $diff;
- $level = level_change( $leading_space_count, $diff, $level );
- }
- $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
- }
- }
- return;
-}
-
sub level_change {
# compute decrease in level when we remove $diff spaces from the
# leading spaces
- my ( $leading_space_count, $diff, $level ) = @_;
+ my ( $self, $leading_space_count, $diff, $level ) = @_;
+
+ my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
if ($rOpts_indent_columns) {
my $olev =
int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
return $level;
}
-sub dump_valign_buffer {
- if (@valign_buffer) {
- foreach (@valign_buffer) {
- valign_output_step_D( @{$_} );
- }
- @valign_buffer = ();
- }
- $valign_buffer_filling = "";
- return;
-}
+###############################################
+# CODE SECTION 4: Code to process comment lines
+###############################################
-sub my_flush_comment {
+sub _flush_comment_lines {
- # Output a group of COMMENT lines
+ # Output a group consisting of COMMENT lines
- return unless (@group_lines);
- my $leading_space_count = $comment_leading_space_count;
- my $leading_string = get_leading_string($leading_space_count);
+ my ($self) = @_;
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ return unless ( @{$rgroup_lines} );
+ my $group_level = $self->[_group_level_];
+ my $leading_space_count = $self->[_comment_leading_space_count_];
+ my $leading_string =
+ $self->get_leading_string( $leading_space_count, $group_level );
# look for excessively long lines
my $max_excess = 0;
- foreach my $str (@group_lines) {
+ foreach my $item ( @{$rgroup_lines} ) {
+ my ( $str, $str_len ) = @{$item};
my $excess =
- length($str) +
+ $str_len +
$leading_space_count -
- maximum_line_length_for_level($group_level);
+ $self->maximum_line_length_for_level($group_level);
if ( $excess > $max_excess ) {
$max_excess = $excess;
}
if ( $max_excess > 0 ) {
$leading_space_count -= $max_excess;
if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
- $last_outdented_line_at = $file_writer_object->get_output_line_number();
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $last_outdented_line_at =
+ $file_writer_object->get_output_line_number();
+ $self->[_last_outdented_line_at_] = $last_outdented_line_at;
+ my $outdented_line_count = $self->[_outdented_line_count_];
unless ($outdented_line_count) {
- $first_outdented_line_at = $last_outdented_line_at;
+ $self->[_first_outdented_line_at_] = $last_outdented_line_at;
}
- my $nlines = @group_lines;
+ my $nlines = @{$rgroup_lines};
$outdented_line_count += $nlines;
+ $self->[_outdented_line_count_] = $outdented_line_count;
}
# write the lines
my $outdent_long_lines = 0;
- foreach my $line (@group_lines) {
- valign_output_step_B( $leading_space_count, $line, 0,
- $outdent_long_lines, "", $group_level );
+
+ foreach my $item ( @{$rgroup_lines} ) {
+ my ( $str, $str_len, $Kend ) = @{$item};
+ $self->valign_output_step_B(
+ {
+ leading_space_count => $leading_space_count,
+ line => $str,
+ line_length => $str_len,
+ side_comment_length => 0,
+ outdent_long_lines => $outdent_long_lines,
+ rvertical_tightness_flags => "",
+ level => $group_level,
+ level_end => $group_level,
+ Kend => $Kend,
+ }
+ );
}
- initialize_for_new_group();
+ $self->initialize_for_new_group();
return;
}
-sub my_flush_code {
+######################################################
+# CODE SECTION 5: Code to process groups of code lines
+######################################################
+
+sub _flush_group_lines {
+
+ # This is the vertical aligner internal flush, which leaves the cache
+ # intact
+ my ( $self, $level_jump ) = @_;
- # Output a group of CODE lines
+ # $level_jump = $next_level-$group_level, if known
+ # = undef if not known
- return unless (@group_lines);
+ my $rgroup_lines = $self->[_rgroup_lines_];
+ return unless ( @{$rgroup_lines} );
+ my $group_type = $self->[_group_type_];
+ my $group_level = $self->[_group_level_];
- VALIGN_DEBUG_FLAG_APPEND0
- && do {
- my $group_list_type = $group_lines[0]->get_list_type();
+ # Debug
+ 0 && do {
my ( $a, $b, $c ) = caller();
- my $nlines = @group_lines;
- my $maximum_field_index = $group_lines[0]->get_jmax();
- my $rfields_old = $group_lines[0]->get_rfields();
- my $tok = $rfields_old->[0];
+ my $nlines = @{$rgroup_lines};
print STDOUT
-"APPEND0: my_flush_code called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$nlines extra=$extra_indent_ok first tok=$tok;\n";
+"APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
+ };
+
+ ############################################
+ # Section 1: Handle a group of COMMENT lines
+ ############################################
+ if ( $group_type eq 'COMMENT' ) {
+ $self->_flush_comment_lines();
+ return;
+ }
+
+ #########################################################################
+ # Section 2: Handle line(s) of CODE. Most of the actual work of vertical
+ # aligning happens here in the following steps:
+ #########################################################################
+
+ # STEP 1: Remove most unmatched tokens. They block good alignments.
+ my ( $max_lev_diff, $saw_side_comment ) =
+ delete_unmatched_tokens( $rgroup_lines, $group_level );
+
+ # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
+ # matching common alignments. The indexes of these subgroups are in the
+ # return variable.
+ my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
+
+ # STEP 3: Sweep left to right through the lines, looking for leading
+ # alignment tokens shared by groups.
+ sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
+ if ( @{$rgroups} > 1 );
+
+ # STEP 4: Move side comments to a common column if possible.
+ if ($saw_side_comment) {
+ $self->align_side_comments( $rgroup_lines, $rgroups );
+ }
- };
+ # STEP 5: For the -lp option, increase the indentation of lists
+ # to the desired amount, but do not exceed the line length limit.
- # some small groups are best left unaligned
- my $do_not_align = decide_if_aligned_pair();
+ # We are allowed to shift a group of lines to the right if:
+ # (1) its level is greater than the level of the previous group, and
+ # (2) its level is greater than the level of the next line to be written.
- # optimize side comment location
- $do_not_align = adjust_side_comment($do_not_align);
+ my $extra_indent_ok;
+ if ( $group_level > $self->[_last_level_written_] ) {
- # recover spaces for -lp option if possible
- my $extra_leading_spaces = get_extra_leading_spaces();
+ # Use the level jump to next line to come, if given
+ if ( defined($level_jump) ) {
+ $extra_indent_ok = $level_jump < 0;
+ }
+
+ # Otherwise, assume the next line has the level of the end of last line.
+ # This fixes case c008.
+ else {
+ my $level_end = $rgroup_lines->[-1]->get_level_end();
+ $extra_indent_ok = $group_level > $level_end;
+ }
+ }
- # all lines of this group have the same basic leading spacing
- my $group_leader_length = $group_lines[0]->get_leading_space_count();
+ my $extra_leading_spaces =
+ $extra_indent_ok
+ ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
+ : 0;
- # add extra leading spaces if helpful
- # NOTE: Use zero; this did not work well
- my $min_ci_gap = 0;
+ # STEP 6: Output the lines.
+ # All lines in this batch have the same basic leading spacing:
+ my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
- # output the lines
- foreach my $line (@group_lines) {
- valign_output_step_A( $line, $min_ci_gap, $do_not_align,
- $group_leader_length, $extra_leading_spaces );
+ foreach my $line ( @{$rgroup_lines} ) {
+ $self->valign_output_step_A(
+ {
+ line => $line,
+ min_ci_gap => 0,
+ do_not_align => 0,
+ group_leader_length => $group_leader_length,
+ extra_leading_spaces => $extra_leading_spaces,
+ level => $group_level,
+ }
+ );
}
- initialize_for_new_group();
+ $self->initialize_for_new_group();
return;
}
-sub my_flush {
+{ ## closure for sub sweep_top_down
- # This is the vertical aligner internal flush, which leaves the cache
- # intact
- return unless (@group_lines);
+ my $rall_lines; # all of the lines
+ my $grp_level; # level of all lines
+ my $rgroups; # describes the partition of lines we will make here
+ my $group_line_count; # number of lines in current partition
- VALIGN_DEBUG_FLAG_APPEND0 && do {
- my ( $a, $b, $c ) = caller();
- my $nlines = @group_lines;
- print STDOUT
-"APPEND0: my_flush called from $a $b $c lines=$nlines, type=$group_type \n";
- };
+ BEGIN { $rgroups = [] }
- # handle a group of COMMENT lines
- if ( $group_type eq 'COMMENT' ) { my_flush_comment() }
+ sub initialize_for_new_rgroup {
+ $group_line_count = 0;
+ return;
+ }
- # handle a single line of CODE
- elsif ( @group_lines == 1 ) { my_flush_code() }
+ sub add_to_rgroup {
- # handle group(s) of CODE lines
- else {
+ my ($jend) = @_;
+ my $rline = $rall_lines->[$jend];
- # LP FIX PART 1
- # If we are trying to add extra indentation for -lp formatting,
- # then we need to try to keep the group intact. But we have
- # to set the $extra_indent_ok flag to zero in case some lines
- # are output separately. We fix things up at the bottom.
- # NOTE: this is a workaround but is tentative; we should really look to
- # see if if extra indentation is possible.
- my $rOpt_lp = $rOpts->{'line-up-parentheses'};
- my $keep_group_intact = $rOpt_lp && $extra_indent_ok;
- my $extra_indent_ok_save = $extra_indent_ok;
- $extra_indent_ok = 0;
+ my $jbeg = $jend;
+ if ( $group_line_count == 0 ) {
+ install_new_alignments($rline);
+ }
+ else {
+ my $rvals = pop @{$rgroups};
+ $jbeg = $rvals->[0];
+ copy_old_alignments( $rline, $rall_lines->[$jbeg] );
+ }
+ push @{$rgroups}, [ $jbeg, $jend, undef ];
+ $group_line_count++;
+ return;
+ }
+
+ sub get_rgroup_jrange {
+
+ return unless @{$rgroups};
+ return unless ( $group_line_count > 0 );
+ my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
+ return ( $jbeg, $jend );
+ }
- # we will rebuild alignment line group(s);
- my @new_lines = @group_lines;
- initialize_for_new_group();
+ sub end_rgroup {
+
+ my ($imax_align) = @_;
+ return unless @{$rgroups};
+ return unless ( $group_line_count > 0 );
+
+ my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
+ push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
+
+ # Undo some alignments of poor two-line combinations.
+ # We had to wait until now to know the line count.
+ if ( $jend - $jbeg == 1 ) {
+ my $line_0 = $rall_lines->[$jbeg];
+ my $line_1 = $rall_lines->[$jend];
+
+ my $imax_pair = $line_1->get_imax_pair();
+ if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
+
+ ## flag for possible future use:
+ ## my $is_isolated_pair = $imax_pair < 0
+ ## && ( $jbeg == 0
+ ## || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
+
+ my $imax_prev =
+ $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
+
+ my ( $is_marginal, $imax_align_fix ) =
+ is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
+ $imax_prev );
+ if ($is_marginal) {
+ combine_fields( $line_0, $line_1, $imax_align_fix );
+ }
+ }
- # remove unmatched tokens in all lines
- delete_unmatched_tokens( \@new_lines );
+ initialize_for_new_rgroup();
+ return;
+ }
- foreach my $new_line (@new_lines) {
+ sub block_penultimate_match {
- # Start a new group if necessary
- if ( !@group_lines ) {
- add_to_group($new_line);
+ # emergency reset to prevent sweep_left_to_right from trying to match a
+ # failed terminal else match
+ return unless @{$rgroups} > 1;
+ $rgroups->[-2]->[2] = -1;
+ return;
+ }
+ sub sweep_top_down {
+ my ( $self, $rlines, $group_level ) = @_;
+
+ # Partition the set of lines into final alignment subgroups
+ # and store the alignments with the lines.
+
+ # The alignment subgroups we are making here are groups of consecutive
+ # lines which have (1) identical alignment tokens and (2) do not
+ # exceed the allowable maximum line length. A later sweep from
+ # left-to-right ('sweep_lr') will handle additional alignments.
+
+ # transfer args to closure variables
+ $rall_lines = $rlines;
+ $grp_level = $group_level;
+ $rgroups = [];
+ initialize_for_new_rgroup();
+ return unless @{$rlines}; # shouldn't happen
+
+ # Unset the _end_group flag for the last line if it it set because it
+ # is not needed and can causes problems for -lp formatting
+ $rall_lines->[-1]->set_end_group(0);
+
+ # Loop over all lines ...
+ my $jline = -1;
+ foreach my $new_line ( @{$rall_lines} ) {
+ $jline++;
+
+ # Start a new subgroup if necessary
+ if ( !$group_line_count ) {
+ add_to_rgroup($jline);
+ if ( $new_line->get_end_group() ) {
+ end_rgroup(-1);
+ }
next;
}
my $j_terminal_match = $new_line->get_j_terminal_match();
- my $base_line = $group_lines[0];
+ my ( $jbeg, $jend ) = get_rgroup_jrange();
+ if ( !defined($jbeg) ) {
+
+ # safety check, shouldn't happen
+ $self->warning(<<EOM);
+Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down
+undefined index for group line count $group_line_count
+EOM
+ $jbeg = $jline;
+ }
+ my $base_line = $rall_lines->[$jbeg];
# Initialize a global flag saying if the last line of the group
# should match end of group and also terminate the group. There
$col_matching_terminal =
$base_line->get_column($j_terminal_match);
- # set global flag for sub decide_if_aligned_pair
- $is_matching_terminal_line = 1;
+ # Ignore an undefined value as a defensive step; shouldn't
+ # normally happen.
+ $col_matching_terminal = 0
+ unless defined($col_matching_terminal);
}
# -------------------------------------------------------------
- # Allow hanging side comment to join current group, if any. This
- # will help keep side comments aligned, because otherwise we
- # will have to start a new group, making alignment less likely.
+ # Allow hanging side comment to join current group, if any. The
+ # only advantage is to keep the other tokens in the same group. For
+ # example, this would make the '=' align here:
+ # $ax = 1; # side comment
+ # # hanging side comment
+ # $boondoggle = 5; # side comment
+ # $beetle = 5; # side comment
+
+ # here is another example..
+
+ # _rtoc_name_count => {}, # hash to track ..
+ # _rpackage_stack => [], # stack to check ..
+ # # name changes
+ # _rlast_level => \$last_level, # brace indentation
+ #
+ #
+ # If this were not desired, the next step could be skipped.
# -------------------------------------------------------------
-
if ( $new_line->get_is_hanging_side_comment() ) {
join_hanging_comment( $new_line, $base_line );
}
# BEFORE this line unless both it and the previous line have side
# comments. This prevents this line from pushing side coments out
# to the right.
- elsif ( $new_line->get_jmax() == 1 && !$keep_group_intact ) {
+ elsif ( $new_line->get_jmax() == 1 ) {
# There are no matching tokens, so now check side comments.
# Programming note: accessing arrays with index -1 is
# risky in Perl, but we have verified there is at least one
# line in the group and that there is at least one field.
- my $prev_comment = $group_lines[-1]->get_rfields()->[-1];
+ my $prev_comment =
+ $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
my $side_comment = $new_line->get_rfields()->[-1];
- my_flush_code() unless ( $side_comment && $prev_comment );
-
+ end_rgroup(-1) unless ( $side_comment && $prev_comment );
}
- # -------------------------------------------------------------
- # If there is just one previous line, and it has more fields
- # than the new line, try to join fields together to get a match
- # with the new line. At the present time, only a single
- # leading '=' is allowed to be compressed out. This is useful
- # in rare cases where a table is forced to use old breakpoints
- # because of side comments,
- # and the table starts out something like this:
- # my %MonthChars = ('0', 'Jan', # side comment
- # '1', 'Feb',
- # '2', 'Mar',
- # Eliminating the '=' field will allow the remaining fields to
- # line up. This situation does not occur if there are no side
- # comments because scan_list would put a break after the
- # opening '('.
- # -------------------------------------------------------------
-
- eliminate_old_fields( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # If the new line has more fields than the current group,
- # see if we can match the first fields and combine the remaining
- # fields of the new line.
- # -------------------------------------------------------------
-
- eliminate_new_fields( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # Flush previous group unless all common tokens and patterns
- # match..
-
- check_match( $new_line, $base_line );
-
- # -------------------------------------------------------------
- # See if there is space for this line in the current group (if
- # any)
- # -------------------------------------------------------------
- if (@group_lines) {
- check_fit( $new_line, $base_line );
+ # See if the new line matches and fits the current group,
+ # if it still exists. Flush the current group if not.
+ my $match_code;
+ if ($group_line_count) {
+ ( $match_code, my $imax_align ) =
+ $self->check_match( $new_line, $base_line,
+ $rall_lines->[ $jline - 1 ] );
+ if ( $match_code != 2 ) { end_rgroup($imax_align) }
}
- add_to_group($new_line);
+ # Store the new line
+ add_to_rgroup($jline);
if ( defined($j_terminal_match) ) {
- # if there is only one line in the group (maybe due to failure
- # to match perfectly with previous lines), then align the ? or
- # { of this terminal line with the previous one unless that
- # would make the line too long
- if ( @group_lines == 1 ) {
- $base_line = $group_lines[0];
+ # Decide if we should fix a terminal match. We can either:
+ # 1. fix it and prevent the sweep_lr from changing it, or
+ # 2. leave it alone and let sweep_lr try to fix it.
+
+ # The current logic is to fix it if:
+ # -it has not joined to previous lines,
+ # -and either the previous subgroup has just 1 line, or
+ # -this line matched but did not fit (so sweep won't work)
+ my $fixit;
+ if ( $group_line_count == 1 ) {
+ $fixit ||= $match_code;
+ if ( !$fixit ) {
+ if ( @{$rgroups} > 1 ) {
+ my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
+ my $nlines = $jendx - $jbegx + 1;
+ $fixit ||= $nlines <= 1;
+ }
+ }
+ }
+
+ if ($fixit) {
+ $base_line = $new_line;
my $col_now = $base_line->get_column($j_terminal_match);
- my $pad = $col_matching_terminal - $col_now;
+
+ # Ignore an undefined value as a defensive step; shouldn't
+ # normally happen.
+ $col_now = 0 unless defined($col_now);
+
+ my $pad = $col_matching_terminal - $col_now;
my $padding_available =
$base_line->get_available_space_on_right();
- if ( $pad > 0 && $pad <= $padding_available ) {
+ if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
$base_line->increase_field_width( $j_terminal_match,
$pad );
}
+
+ # do not let sweep_left_to_right change an isolated 'else'
+ if ( !$new_line->get_is_terminal_ternary() ) {
+ block_penultimate_match();
+ }
}
- my_flush_code();
- $is_matching_terminal_line = 0;
+ end_rgroup(-1);
}
- # Optional optimization; end the group if we know we cannot match
- # next line.
- elsif ( $new_line->{_end_group} ) {
- my_flush_code();
+ # end the group if we know we cannot match next line.
+ elsif ( $new_line->get_end_group() ) {
+ end_rgroup(-1);
}
- }
+ } ## end loop over lines
- # LP FIX PART 2
- # if we managed to keep the group intact for -lp formatting,
- # restore the flag which allows extra indentation
- if ( $keep_group_intact && @group_lines == @new_lines ) {
- $extra_indent_ok = $extra_indent_ok_save;
- }
- my_flush_code();
+ end_rgroup(-1);
+ return ($rgroups);
}
- return;
}
-sub delete_selected_tokens {
+sub two_line_pad {
- my ( $line_obj, $ridel ) = @_;
+ my ( $line_m, $line, $imax_min ) = @_;
- # remove an unused alignment token(s) to improve alignment chances
- return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
+ # Given:
+ # two isolated (list) lines
+ # imax_min = number of common alignment tokens
+ # Return:
+ # $pad_max = maximum suggested pad distnce
+ # = 0 if alignment not recommended
+ # Note that this is only for two lines which do not have alignment tokens
+ # in common with any other lines. It is intended for lists, but it might
+ # also be used for two non-list lines with a common leading '='.
- my $jmax_old = $line_obj->get_jmax();
- my $rfields_old = $line_obj->get_rfields();
- my $rpatterns_old = $line_obj->get_rpatterns();
- my $rtokens_old = $line_obj->get_rtokens();
+ # Allow alignment if the difference in the two unpadded line lengths
+ # is not more than either line length. The idea is to avoid
+ # aligning lines with very different field lengths, like these two:
- local $" = '> <';
- 0 && print <<EOM;
-delete indexes: <@{$ridel}>
-old jmax: $jmax_old
-old tokens: <@{$rtokens_old}>
-old patterns: <@{$rpatterns_old}>
-old fields: <@{$rfields_old}>
-EOM
+ # [
+ # 'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
+ # 1, 0, 0, 0, undef, 0, 0
+ # ];
+ my $rfield_lengths = $line->get_rfield_lengths();
+ my $rfield_lengths_m = $line_m->get_rfield_lengths();
- my $rfields_new = [];
- my $rpatterns_new = [];
- my $rtokens_new = [];
+ # Safety check - shouldn't happen
+ return 0
+ unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
- my $kmax = @{$ridel} - 1;
- my $k = 0;
- my $jdel_next = $ridel->[$k];
+ my $lensum_m = 0;
+ my $lensum = 0;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ $lensum_m += $rfield_lengths_m->[$i];
+ $lensum += $rfield_lengths->[$i];
+ }
- # FIXME:
- if ( $jdel_next < 0 ) { print STDERR "bad jdel_next=$jdel_next\n"; return }
- my $pattern = $rpatterns_old->[0];
- my $field = $rfields_old->[0];
- push @{$rfields_new}, $field;
- push @{$rpatterns_new}, $pattern;
- for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
- my $token = $rtokens_old->[$j];
- my $field = $rfields_old->[ $j + 1 ];
- my $pattern = $rpatterns_old->[ $j + 1 ];
- if ( $k > $kmax || $j < $jdel_next ) {
- push @{$rtokens_new}, $token;
- push @{$rfields_new}, $field;
- push @{$rpatterns_new}, $pattern;
- }
- elsif ( $j == $jdel_next ) {
- $rfields_new->[-1] .= $field;
- $rpatterns_new->[-1] .= $pattern;
- if ( ++$k <= $kmax ) {
- my $jdel_last = $jdel_next;
- $jdel_next = $ridel->[$k];
- if ( $jdel_next < $jdel_last ) {
-
- # FIXME:
- print STDERR "bad jdel_next=$jdel_next\n";
- return;
- }
- }
+ my ( $lenmin, $lenmax ) =
+ $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
+
+ my $patterns_match;
+ if ( $line_m->get_list_type() && $line->get_list_type() ) {
+ $patterns_match = 1;
+ my $rpatterns_m = $line_m->get_rpatterns();
+ my $rpatterns = $line->get_rpatterns();
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $pat = $rpatterns->[$i];
+ my $pat_m = $rpatterns_m->[$i];
+ if ( $pat ne $pat_m ) { $patterns_match = 0; last }
}
}
- # ----- x ------ x ------ x ------
- #t 0 1 2 <- token indexing
- #f 0 1 2 3 <- field and pattern
+ my $pad_max = $lenmax;
+ if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
- my $jmax_new = @{$rfields_new} - 1;
- $line_obj->set_rtokens($rtokens_new);
- $line_obj->set_rpatterns($rpatterns_new);
- $line_obj->set_rfields($rfields_new);
- $line_obj->set_jmax($jmax_new);
+ return $pad_max;
+}
- 0 && print <<EOM;
+sub sweep_left_to_right {
+
+ my ( $rlines, $rgroups, $group_level ) = @_;
+
+ # So far we have divided the lines into groups having an equal number of
+ # identical alignments. Here we are going to look for common leading
+ # alignments between the different groups and align them when possible.
+ # For example, the three lines below are in three groups because each line
+ # has a different number of commas. In this routine we will sweep from
+ # left to right, aligning the leading commas as we go, but stopping if we
+ # hit the line length limit.
+
+ # my ( $num, $numi, $numj, $xyza, $ka, $xyzb, $kb, $aff, $error );
+ # my ( $i, $j, $error, $aff, $asum, $avec );
+ # my ( $km, $area, $varea );
+
+ # nothing to do if just one group
+ my $ng_max = @{$rgroups} - 1;
+ return unless ( $ng_max > 0 );
+
+ ############################################################################
+ # Step 1: Loop over groups to find all common leading alignment tokens
+ ############################################################################
+
+ my $line;
+ my $rtokens;
+ my $imax; # index of maximum non-side-comment alignment token
+ my $istop; # an optional stopping index
+ my $jbeg; # starting line index
+ my $jend; # ending line index
+
+ my $line_m;
+ my $rtokens_m;
+ my $imax_m;
+ my $istop_m;
+ my $jbeg_m;
+ my $jend_m;
+
+ my $istop_mm;
+
+ # Look at neighboring pairs of groups and form a simple list
+ # of all common leading alignment tokens. Foreach such match we
+ # store [$i, $ng], where
+ # $i = index of the token in the line (0,1,...)
+ # $ng is the second of the two groups with this common token
+ my @icommon;
+
+ # Hash to hold the maximum alignment change for any group
+ my %max_move;
+
+ # a small number of columns
+ my $short_pad = 4;
+
+ my $ng = -1;
+ foreach my $item ( @{$rgroups} ) {
+ $ng++;
+
+ $istop_mm = $istop_m;
+
+ # save _m values of previous group
+ $line_m = $line;
+ $rtokens_m = $rtokens;
+ $imax_m = $imax;
+ $istop_m = $istop;
+ $jbeg_m = $jbeg;
+ $jend_m = $jend;
+
+ # Get values for this group. Note that we just have to use values for
+ # one of the lines of the group since all members have the same
+ # alignments.
+ ( $jbeg, $jend, $istop ) = @{$item};
+
+ $line = $rlines->[$jbeg];
+ $rtokens = $line->get_rtokens();
+ $imax = $line->get_jmax() - 2;
+ $istop = -1 unless ( defined($istop) );
+ $istop = $imax if ( $istop > $imax );
+
+ # Initialize on first group
+ next if ( $ng == 0 );
+
+ # Use the minimum index limit of the two groups
+ my $imax_min = $imax > $imax_m ? $imax_m : $imax;
+
+ # Also impose a limit if given.
+ if ( $istop_m < $imax_min ) {
+ $imax_min = $istop_m;
+ }
-new jmax: $jmax_new
-new tokens: <@{$rtokens_new}>
-new patterns: <@{$rpatterns_new}>
-new fields: <@{$rfields_new}>
-EOM
- return;
-}
+ # Special treatment of two one-line groups isolated from other lines,
+ # unless they form a simple list or a terminal match. Otherwise the
+ # alignment can look strange in some cases.
+ my $list_type = $rlines->[$jbeg]->get_list_type();
+ if (
+ $jend == $jbeg
+ && $jend_m == $jbeg_m
+ && ( $ng == 1 || $istop_mm < 0 )
+ && ( $ng == $ng_max || $istop < 0 )
+ && !$line->get_j_terminal_match()
+
+ # Only do this for imperfect matches. This is normally true except
+ # when two perfect matches cannot form a group because the line
+ # length limit would be exceeded. In that case we can still try
+ # to match as many alignments as possible.
+ && ( $imax != $imax_m || $istop_m != $imax_m )
+ )
+ {
-sub decode_alignment_token {
+ # We will just align assignments and simple lists
+ next unless ( $imax_min >= 0 );
+ next
+ unless ( $rtokens->[0] =~ /^=\d/
+ || $list_type );
+
+ # In this case we will limit padding to a short distance. This
+ # is a compromise to keep some vertical alignment but prevent large
+ # gaps, which do not look good for just two lines.
+ my $pad_max =
+ two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
+ next unless ($pad_max);
+ my $ng_m = $ng - 1;
+ $max_move{"$ng_m"} = $pad_max;
+ $max_move{"$ng"} = $pad_max;
+ }
- # Unpack the values packed in an alignment token
- #
- # Usage:
- # my ( $raw_tok, $lev, $tag, $tok_count ) =
- # decode_alignment_token($token);
-
- # Alignment tokens have a trailing decimal level and optional tag (for
- # commas):
- # For example, the first comma in the following line
- # sub banner { crlf; report( shift, '/', shift ); crlf }
- # is decorated as follows:
- # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
-
- # An optional token count may be appended with a leading dot.
- # Currently this is only done for '=' tokens but this could change.
- # For example, consider the following line:
- # $nport = $port = shift || $name;
- # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
- # The second '=' will be '=0.2' [level 0, second equals]
- my ($tok) = @_;
- my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
- if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
- $raw_tok = $1;
- $lev = $2;
- $tag = $3 if ($3);
- $tok_count = $5 if ($5);
- }
- return ( $raw_tok, $lev, $tag, $tok_count );
+ # Loop to find all common leading tokens.
+ if ( $imax_min >= 0 ) {
+ foreach my $i ( 0 .. $imax_min ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ last if ( $tok ne $tok_m );
+ push @icommon, [ $i, $ng, $tok ];
+ }
+ }
+ }
+ return unless @icommon;
+
+ ###########################################################
+ # Step 2: Reorder and consolidate the list into a task list
+ ###########################################################
+
+ # We have to work first from lowest token index to highest, then by group,
+ # sort our list first on token index then group number
+ @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
+
+ # Make a task list of the form
+ # [$i, ng_beg, $ng_end, $tok], ..
+ # where
+ # $i is the index of the token to be aligned
+ # $ng_beg..$ng_end is the group range for this action
+ my @todo;
+ my ( $i, $ng_end, $tok );
+ foreach my $item (@icommon) {
+ my $ng_last = $ng_end;
+ my $i_last = $i;
+ ( $i, $ng_end, $tok ) = @{$item};
+ my $ng_beg = $ng_end - 1;
+ if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
+ my $var = pop(@todo);
+ $ng_beg = $var->[1];
+ }
+ my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
+ push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
+ }
+
+ ###############################
+ # Step 3: Execute the task list
+ ###############################
+ do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
+ $group_level );
+ return;
}
-{ # sub is_deletable_token
+{ ## closure for sub do_left_to_right_sweep
- my %is_deletable_equals;
+ my %is_good_alignment_token;
BEGIN {
- my @q;
- # These tokens with = may be deleted for vertical aligmnemt
- @q = qw(
- <= >= == =~ != <=>
- );
- @is_deletable_equals{@q} = (1) x scalar(@q);
+ # One of the most difficult aspects of vertical alignment is knowing
+ # when not to align. Alignment can go from looking very nice to very
+ # bad when overdone. In the sweep algorithm there are two special
+ # cases where we may need to limit padding to a '$short_pad' distance
+ # to avoid some very ugly formatting:
- }
+ # 1. Two isolated lines with partial alignment
+ # 2. A 'tail-wag-dog' situation, in which a single terminal
+ # line with partial alignment could cause a significant pad
+ # increase in many previous lines if allowed to join the alignment.
- sub is_deletable_token {
+ # For most alignment tokens, we will allow only a small pad to be
+ # introduced (the hardwired $short_pad variable) . But for some 'good'
+ # alignments we can be less restrictive.
- # Determine if a token with no match possibility can be removed to
- # improve chances of making an alignment.
- my ( $token, $i, $imax, $jline, $i_eq ) = @_;
+ # These are 'good' alignments, which are allowed more padding:
+ my @q = qw(
+ => = ? if unless or || {
+ );
+ push @q, ',';
+ @is_good_alignment_token{@q} = (0) x scalar(@q);
- my ( $raw_tok, $lev, $tag, $tok_count ) =
- decode_alignment_token($token);
+ # Promote a few of these to 'best', with essentially no pad limit:
+ $is_good_alignment_token{'='} = 1;
+ $is_good_alignment_token{'if'} = 1;
+ $is_good_alignment_token{'unless'} = 1;
+ $is_good_alignment_token{'=>'} = 1
- # okay to delete second and higher copies of a token
- if ( $tok_count > 1 ) { return 1 }
+ # Note the hash values are set so that:
+ # if ($is_good_alignment_token{$raw_tok}) => best
+ # if defined ($is_good_alignment_token{$raw_tok}) => good or best
- # only remove lower level commas
- if ( $raw_tok eq ',' ) {
+ }
- return if ( defined($i_eq) && $i < $i_eq );
- return if ( $lev <= $group_level );
- }
+ sub do_left_to_right_sweep {
+ my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
+ = @_;
+
+ # $blocking_level[$nj is the level at a match failure between groups
+ # $ng-1 and $ng
+ my @blocking_level;
+ my $group_list_type = $rlines->[0]->get_list_type();
+
+ my $move_to_common_column = sub {
+
+ # Move the alignment column of token $itok to $col_want for a
+ # sequence of groups.
+ my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
+ return unless ( defined($ngb) && $nge > $ngb );
+ foreach my $ng ( $ngb .. $nge ) {
+
+ my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
+ my $line = $rlines->[$jbeg];
+ my $col = $line->get_column($itok);
+ my $avail = $line->get_available_space_on_right();
+ my $move = $col_want - $col;
+ if ( $move > 0 ) {
+
+ # limit padding increase in isolated two lines
+ next
+ if ( defined( $rmax_move->{$ng} )
+ && $move > $rmax_move->{$ng}
+ && !$is_good_alignment_token{$raw_tok} );
+
+ $line->increase_field_width( $itok, $move );
+ }
+ elsif ( $move < 0 ) {
- # most operators with an equals sign should be retained if at
- # same level as this statement
- elsif ( $raw_tok =~ /=/ ) {
- return
- unless ( $lev > $group_level || $is_deletable_equals{$raw_tok} );
- }
+ # spot to take special action on failure to move
+ }
+ }
+ };
+
+ foreach my $task ( @{$rtodo} ) {
+ my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
+
+ # Nothing to do for a single group
+ next unless ( $ng_end > $ng_beg );
+
+ my $ng_first; # index of the first group of a continuous sequence
+ my $col_want; # the common alignment column of a sequence of groups
+ my $col_limit; # maximum column before bumping into max line length
+ my $line_count_ng_m = 0;
+ my $jmax_m;
+ my $it_stop_m;
+
+ # Loop over the groups
+ # 'ix_' = index in the array of lines
+ # 'ng_' = index in the array of groups
+ # 'it_' = index in the array of tokens
+ my $ix_min = $rgroups->[$ng_beg]->[0];
+ my $ix_max = $rgroups->[$ng_end]->[1];
+ my $lines_total = $ix_max - $ix_min + 1;
+ foreach my $ng ( $ng_beg .. $ng_end ) {
+ my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
+ my $line_count_ng = $ix_end - $ix_beg + 1;
+
+ # Important: note that since all lines in a group have a common
+ # alignments object, we just have to work on one of the lines
+ # (the first line). All of the rest will be changed
+ # automatically.
+ my $line = $rlines->[$ix_beg];
+ my $jmax = $line->get_jmax();
- # otherwise, ok to delete the token
- return 1;
- }
-}
+ # the maximum space without exceeding the line length:
+ my $avail = $line->get_available_space_on_right();
+ my $col = $line->get_column($itok);
+ my $col_max = $col + $avail;
+
+ # Initialize on first group
+ if ( !defined($col_want) ) {
+ $ng_first = $ng;
+ $col_want = $col;
+ $col_limit = $col_max;
+ $line_count_ng_m = $line_count_ng;
+ $jmax_m = $jmax;
+ $it_stop_m = $it_stop;
+ next;
+ }
-sub delete_unmatched_tokens {
- my ($rlines) = @_;
+ # RULE: Throw a blocking flag upon encountering a token level
+ # different from the level of the first blocking token. For
+ # example, in the following example, if the = matches get
+ # blocked between two groups as shown, then we want to start
+ # blocking matches at the commas, which are at deeper level, so
+ # that we do not get the big gaps shown here:
+
+ # my $unknown3 = pack( "v", -2 );
+ # my $unknown4 = pack( "v", 0x09 );
+ # my $unknown5 = pack( "VVV", 0x06, 0x00, 0x00 );
+ # my $num_bbd_blocks = pack( "V", $num_lists );
+ # my $root_startblock = pack( "V", $root_start );
+ # my $unknown6 = pack( "VV", 0x00, 0x1000 );
+
+ # On the other hand, it is okay to keep matching at the same
+ # level such as in a simple list of commas and/or fat arrors.
+
+ my $is_blocked = defined( $blocking_level[$ng] )
+ && $lev > $blocking_level[$ng];
+
+ # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
+ # Do not let one or two lines with a **different number of
+ # alignments** open up a big gap in a large block. For
+ # example, we will prevent something like this, where the first
+ # line prys open the rest:
+
+ # $worksheet->write( "B7", "http://www.perl.com", undef, $format );
+ # $worksheet->write( "C7", "", $format );
+ # $worksheet->write( "D7", "", $format );
+ # $worksheet->write( "D8", "", $format );
+ # $worksheet->write( "D8", "", $format );
+
+ # We should exclude from consideration two groups which are
+ # effectively the same but separated because one does not
+ # fit in the maximum allowed line length.
+ my $is_same_group =
+ $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
+
+ my $lines_above = $ix_beg - $ix_min;
+ my $lines_below = $lines_total - $lines_above;
+
+ # Increase the tolerable gap for certain favorable factors
+ my $factor = 1;
+ my $top_level = $lev == $group_level;
+
+ # Align best top level alignment tokens like '=', 'if', ...
+ # A factor of 10 allows a gap of up to 40 spaces
+ if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
+ $factor = 10;
+ }
- # This is a preliminary step in vertical alignment in which we remove as
- # many obviously un-needed alignment tokens as possible. This will prevent
- # them from interfering with the final alignment.
+ # Otherwise allow some minimal padding of good alignments
+ elsif (
- return unless @{$rlines};
- my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
+ defined( $is_good_alignment_token{$raw_tok} )
- # ignore hanging side comments in these operations
- my @filtered = grep { !$_->{_is_hanging_side_comment} } @{$rlines};
- my $rnew_lines = \@filtered;
- my @i_equals;
- my @min_levels;
+ # We have to be careful if there are just 2 lines. This
+ # two-line factor allows large gaps only for 2 lines which
+ # are simple lists with fewer items on the second line. It
+ # gives results similar to previous versions of perltidy.
+ && ( $lines_total > 2
+ || $group_list_type && $jmax < $jmax_m && $top_level )
+ )
+ {
+ $factor += 1;
+ if ($top_level) {
+ $factor += 1;
+ }
+ }
- my $jmax = @{$rnew_lines} - 1;
+ my $is_big_gap;
+ if ( !$is_same_group ) {
+ $is_big_gap ||=
+ ( $lines_above == 1
+ || $lines_above == 2 && $lines_below >= 4 )
+ && $col_want > $col + $short_pad * $factor;
+ $is_big_gap ||=
+ ( $lines_below == 1
+ || $lines_below == 2 && $lines_above >= 4 )
+ && $col > $col_want + $short_pad * $factor;
+ }
- my %is_good_tok;
+ # if match is limited by gap size, stop aligning at this level
+ if ($is_big_gap) {
+ $blocking_level[$ng] = $lev - 1;
+ }
- # create a hash of tokens for each line
- my $rline_hashes = [];
- foreach my $line ( @{$rnew_lines} ) {
- my $rhash = {};
- my $rtokens = $line->get_rtokens();
- my $i = 0;
- my $i_eq;
- my $lev_min;
- foreach my $tok ( @{$rtokens} ) {
- my ( $raw_tok, $lev, $tag, $tok_count ) =
- decode_alignment_token($tok);
- if ( !defined($lev_min) || $lev < $lev_min ) { $lev_min = $lev }
+ # quit and restart if it cannot join this batch
+ if ( $col_want > $col_max
+ || $col > $col_limit
+ || $is_big_gap
+ || $is_blocked )
+ {
- # Possible future upgrade: for multiple matches,
- # record [$i1, $i2, ..] instead of $i
- $rhash->{$tok} =
- [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
+ # remember the level of the first blocking token
+ if ( !defined( $blocking_level[$ng] ) ) {
+ $blocking_level[$ng] = $lev;
+ }
- # remember the first equals at line level
- if ( !defined($i_eq) && $raw_tok eq '=' ) {
- if ( $lev eq $group_level ) { $i_eq = $i }
- }
- $i++;
- }
- push @{$rline_hashes}, $rhash;
- push @i_equals, $i_eq;
- push @min_levels, $lev_min;
- }
-
- # compare each line pair and record matches
- my $rtok_hash = {};
- my $nr = 0;
- for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
- my $nl = $nr;
- $nr = 0;
- my $jr = $jl + 1;
- my $rhash_l = $rline_hashes->[$jl];
- my $rhash_r = $rline_hashes->[$jr];
- my $count = 0; # UNUSED NOW?
- my $ntoks = 0;
- foreach my $tok ( keys %{$rhash_l} ) {
- $ntoks++;
- if ( defined( $rhash_r->{$tok} ) ) {
- if ( $tok ne '#' ) { $count++; }
- my $il = $rhash_l->{$tok}->[0];
- my $ir = $rhash_r->{$tok}->[0];
- $rhash_l->{$tok}->[2] = $ir;
- $rhash_r->{$tok}->[1] = $il;
- if ( $tok ne '#' ) {
- push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
- $nr++;
+ $move_to_common_column->(
+ $ng_first, $ng - 1, $itok, $col_want, $raw_tok
+ );
+ $ng_first = $ng;
+ $col_want = $col;
+ $col_limit = $col_max;
+ $line_count_ng_m = $line_count_ng;
+ $jmax_m = $jmax;
+ $it_stop_m = $it_stop;
+ next;
}
- }
- }
- # Set a line break if no matching tokens between these lines
- if ( $nr == 0 && $nl > 0 ) {
- $rnew_lines->[$jl]->{_end_group} = 1;
- }
- }
+ $line_count_ng_m += $line_count_ng;
- # find subgroups
- my @subgroups;
- push @subgroups, [ 0, $jmax ];
- for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
- if ( $rnew_lines->[$jl]->{_end_group} ) {
- $subgroups[-1]->[1] = $jl;
- push @subgroups, [ $jl + 1, $jmax ];
- }
+ # update the common column and limit
+ if ( $col > $col_want ) { $col_want = $col }
+ if ( $col_max < $col_limit ) { $col_limit = $col_max }
+
+ } ## end loop over groups
+
+ if ( $ng_end > $ng_first ) {
+ $move_to_common_column->(
+ $ng_first, $ng_end, $itok, $col_want, $raw_tok
+ );
+ } ## end loop over groups for one task
+ } ## end loop over tasks
+
+ return;
}
+}
- # Loop to process each subgroups
- foreach my $item (@subgroups) {
- my ( $jbeg, $jend ) = @{$item};
+sub delete_selected_tokens {
- # look for complete ternary or if/elsif/else blocks
- my $nlines = $jend - $jbeg + 1;
- my %token_line_count;
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
- my %seen;
- my $line = $rnew_lines->[$jj];
- my $rtokens = $line->get_rtokens();
+ my ( $line_obj, $ridel ) = @_;
+
+ # $line_obj is the line to be modified
+ # $ridel is a ref to list of indexes to be deleted
+
+ # remove an unused alignment token(s) to improve alignment chances
+
+ return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
+
+ my $jmax_old = $line_obj->get_jmax();
+ my $rfields_old = $line_obj->get_rfields();
+ my $rfield_lengths_old = $line_obj->get_rfield_lengths();
+ my $rpatterns_old = $line_obj->get_rpatterns();
+ my $rtokens_old = $line_obj->get_rtokens();
+ my $j_terminal_match = $line_obj->get_j_terminal_match();
+
+ use constant EXPLAIN_DELETE_SELECTED => 0;
+
+ local $" = '> <';
+ EXPLAIN_DELETE_SELECTED && print <<EOM;
+delete indexes: <@{$ridel}>
+old jmax: $jmax_old
+old tokens: <@{$rtokens_old}>
+old patterns: <@{$rpatterns_old}>
+old fields: <@{$rfields_old}>
+old field_lengths: <@{$rfield_lengths_old}>
+EOM
+
+ my $rfields_new = [];
+ my $rpatterns_new = [];
+ my $rtokens_new = [];
+ my $rfield_lengths_new = [];
+
+ # Convert deletion list to a hash to allow any order, multiple entries,
+ # and avoid problems with index values out of range
+ my %delete_me;
+ @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
+
+ my $pattern = $rpatterns_old->[0];
+ my $field = $rfields_old->[0];
+ my $field_length = $rfield_lengths_old->[0];
+ push @{$rfields_new}, $field;
+ push @{$rfield_lengths_new}, $field_length;
+ push @{$rpatterns_new}, $pattern;
+
+ # Loop to either copy items or concatenate fields and patterns
+ my $jmin_del;
+ for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
+ my $token = $rtokens_old->[$j];
+ my $field = $rfields_old->[ $j + 1 ];
+ my $field_length = $rfield_lengths_old->[ $j + 1 ];
+ my $pattern = $rpatterns_old->[ $j + 1 ];
+ if ( !$delete_me{$j} ) {
+ push @{$rtokens_new}, $token;
+ push @{$rfields_new}, $field;
+ push @{$rpatterns_new}, $pattern;
+ push @{$rfield_lengths_new}, $field_length;
+ }
+ else {
+ if ( !defined($jmin_del) ) { $jmin_del = $j }
+ $rfields_new->[-1] .= $field;
+ $rfield_lengths_new->[-1] += $field_length;
+ $rpatterns_new->[-1] .= $pattern;
+ }
+ }
+
+ # ----- x ------ x ------ x ------
+ #t 0 1 2 <- token indexing
+ #f 0 1 2 3 <- field and pattern
+
+ my $jmax_new = @{$rfields_new} - 1;
+ $line_obj->set_rtokens($rtokens_new);
+ $line_obj->set_rpatterns($rpatterns_new);
+ $line_obj->set_rfields($rfields_new);
+ $line_obj->set_rfield_lengths($rfield_lengths_new);
+ $line_obj->set_jmax($jmax_new);
+
+ # The value of j_terminal_match will be incorrect if we delete tokens prior
+ # to it. We will have to give up on aligning the terminal tokens if this
+ # happens.
+ if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
+ $line_obj->set_j_terminal_match(undef);
+ }
+
+ # update list type -
+ if ( $line_obj->get_list_seqno() ) {
+
+ ## This works, but for efficiency see if we need to make a change:
+ ## decide_if_list($line_obj);
+
+ # An existing list will still be a list but with possibly different
+ # leading token
+ my $old_list_type = $line_obj->get_list_type();
+ my $new_list_type = "";
+ if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
+ $new_list_type = $rtokens_new->[0];
+ }
+ if ( !$old_list_type || $old_list_type ne $new_list_type ) {
+ decide_if_list($line_obj);
+ }
+ }
+
+ EXPLAIN_DELETE_SELECTED && print <<EOM;
+
+new jmax: $jmax_new
+new tokens: <@{$rtokens_new}>
+new patterns: <@{$rpatterns_new}>
+new fields: <@{$rfields_new}>
+EOM
+ return;
+}
+
+{ ## closure for sub decode_alignment_token
+
+ # This routine is called repeatedly for each token, so it needs to be
+ # efficient. We can speed things up by remembering the inputs and outputs
+ # in a hash.
+ my %decoded_token;
+
+ sub initialize_decode {
+
+ # We will re-initialize the hash for each file. Otherwise, there is
+ # a danger that the hash can become arbitrarily large if a very large
+ # number of files is processed at once.
+ %decoded_token = ();
+ return;
+ }
+
+ sub decode_alignment_token {
+
+ # Unpack the values packed in an alignment token
+ #
+ # Usage:
+ # my ( $raw_tok, $lev, $tag, $tok_count ) =
+ # decode_alignment_token($token);
+
+ # Alignment tokens have a trailing decimal level and optional tag (for
+ # commas):
+ # For example, the first comma in the following line
+ # sub banner { crlf; report( shift, '/', shift ); crlf }
+ # is decorated as follows:
+ # ,2+report-6 => (tok,lev,tag) =qw( , 2 +report-6)
+
+ # An optional token count may be appended with a leading dot.
+ # Currently this is only done for '=' tokens but this could change.
+ # For example, consider the following line:
+ # $nport = $port = shift || $name;
+ # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
+ # The second '=' will be '=0.2' [level 0, second equals]
+ my ($tok) = @_;
+
+ if ( defined( $decoded_token{$tok} ) ) {
+ return @{ $decoded_token{$tok} };
+ }
+
+ my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
+ if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
+ $raw_tok = $1;
+ $lev = $2;
+ $tag = $3 if ($3);
+ $tok_count = $5 if ($5);
+ }
+ my @vals = ( $raw_tok, $lev, $tag, $tok_count );
+ $decoded_token{$tok} = \@vals;
+ return @vals;
+ }
+}
+
+{ ## closure for sub delete_unmatched_tokens
+
+ my %is_assignment;
+ my %keep_after_deleted_assignment;
+
+ BEGIN {
+ my @q;
+
+ @q = qw(
+ = **= += *= &= <<= &&=
+ -= /= |= >>= ||= //=
+ .= %= ^=
+ x=
+ );
+ @is_assignment{@q} = (1) x scalar(@q);
+
+ # These tokens may be kept following an = deletion
+ @q = qw(
+ if unless or ||
+ );
+ @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
+
+ }
+
+ # This flag is for testing only and should normally be zero.
+ use constant TEST_DELETE_NULL => 0;
+
+ sub delete_unmatched_tokens {
+ my ( $rlines, $group_level ) = @_;
+
+ # This is a preliminary step in vertical alignment in which we remove
+ # as many obviously un-needed alignment tokens as possible. This will
+ # prevent them from interfering with the final alignment.
+
+ # These are the return values
+ my $max_lev_diff = 0; # used to avoid a call to prune_tree
+ my $saw_side_comment = 0; # used to avoid a call for side comments
+
+ # Handle no lines -- shouldn't happen
+ return unless @{$rlines};
+
+ # Handle a single line
+ if ( @{$rlines} == 1 ) {
+ my $line = $rlines->[0];
+ my $jmax = $line->get_jmax();
+ my $length = $line->get_rfield_lengths()->[$jmax];
+ $saw_side_comment = $length > 0;
+ return ( $max_lev_diff, $saw_side_comment );
+ }
+
+ my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
+
+ # ignore hanging side comments in these operations
+ my @filtered = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
+ my $rnew_lines = \@filtered;
+
+ $saw_side_comment = @filtered != @{$rlines};
+ $max_lev_diff = 0;
+
+ # nothing to do if all lines were hanging side comments
+ my $jmax = @{$rnew_lines} - 1;
+ return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
+
+ my @equals_info;
+ my @line_info;
+ my %is_good_tok;
+
+ # create a hash of tokens for each line
+ my $rline_hashes = [];
+ foreach my $line ( @{$rnew_lines} ) {
+ my $rhash = {};
+ my $rtokens = $line->get_rtokens();
+ my $rpatterns = $line->get_rpatterns();
+ my $i = 0;
+ my ( $i_eq, $tok_eq, $pat_eq );
+ my ( $lev_min, $lev_max );
foreach my $tok ( @{$rtokens} ) {
- if ( !$seen{$tok} ) {
- $seen{$tok}++;
- $token_line_count{$tok}++;
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+
+ if ( $tok ne '#' ) {
+ if ( !defined($lev_min) ) {
+ $lev_min = $lev;
+ $lev_max = $lev;
+ }
+ else {
+ if ( $lev < $lev_min ) { $lev_min = $lev }
+ if ( $lev > $lev_max ) { $lev_max = $lev }
+ }
+ }
+ else {
+ if ( !$saw_side_comment ) {
+ my $length = $line->get_rfield_lengths()->[ $i + 1 ];
+ $saw_side_comment ||= $length;
+ }
}
+
+ # Possible future upgrade: for multiple matches,
+ # record [$i1, $i2, ..] instead of $i
+ $rhash->{$tok} =
+ [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
+
+ # remember the first equals at line level
+ if ( !defined($i_eq) && $raw_tok eq '=' ) {
+
+ if ( $lev eq $group_level ) {
+ $i_eq = $i;
+ $tok_eq = $tok;
+ $pat_eq = $rpatterns->[$i];
+ }
+ }
+ $i++;
+ }
+ push @{$rline_hashes}, $rhash;
+ push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
+ push @line_info, [ $lev_min, $lev_max ];
+ if ( defined($lev_min) ) {
+ my $lev_diff = $lev_max - $lev_min;
+ if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
}
}
- # Look for if/else/elsif and ternary blocks
- my $is_full_block;
- foreach my $tok ( keys %token_line_count ) {
- if ( $token_line_count{$tok} == $nlines ) {
- if ( $tok =~ /^\?/ || $tok =~ /^\{\d+if/ ) {
- $is_full_block = 1;
+ # compare each line pair and record matches
+ my $rtok_hash = {};
+ my $nr = 0;
+ for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
+ my $nl = $nr;
+ $nr = 0;
+ my $jr = $jl + 1;
+ my $rhash_l = $rline_hashes->[$jl];
+ my $rhash_r = $rline_hashes->[$jr];
+ my $count = 0; # UNUSED NOW?
+ my $ntoks = 0;
+ foreach my $tok ( keys %{$rhash_l} ) {
+ $ntoks++;
+ if ( defined( $rhash_r->{$tok} ) ) {
+ if ( $tok ne '#' ) { $count++; }
+ my $il = $rhash_l->{$tok}->[0];
+ my $ir = $rhash_r->{$tok}->[0];
+ $rhash_l->{$tok}->[2] = $ir;
+ $rhash_r->{$tok}->[1] = $il;
+ if ( $tok ne '#' ) {
+ push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
+ $nr++;
+ }
+ }
+ }
+
+ # Set a line break if no matching tokens between these lines
+ # (this is not strictly necessary now but does not hurt)
+ if ( $nr == 0 && $nl > 0 ) {
+ $rnew_lines->[$jl]->set_end_group(1);
+ }
+
+ # Also set a line break if both lines have simple equals but with
+ # different leading characters in patterns. This check is similar
+ # to one in sub check_match, and will prevent sub
+ # prune_alignment_tree from removing alignments which otherwise
+ # should be kept. This fix is rarely needed, but it can
+ # occasionally improve formatting.
+ # For example:
+ # my $name = $this->{Name};
+ # $type = $this->ctype($genlooptype) if defined $genlooptype;
+ # my $declini = ( $asgnonly ? "" : "\t$type *" );
+ # my $cast = ( $type ? "($type *)" : "" );
+ # The last two lines start with 'my' and will not match the
+ # previous line starting with $type, so we do not want
+ # prune_alignment tree to delete their ? : alignments at a deeper
+ # level.
+ my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
+ my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
+ if ( defined($i_eq_l) && defined($i_eq_r) ) {
+
+ # Also, do not align equals across a change in ci level
+ my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
+ $rnew_lines->[$jr]->get_ci_level();
+
+ if (
+ $tok_eq_l eq $tok_eq_r
+ && $i_eq_l == 0
+ && $i_eq_r == 0
+ && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
+ || $ci_jump )
+ )
+ {
+ $rnew_lines->[$jl]->set_end_group(1);
}
}
}
- # remove unwanted alignment tokens
- for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
- my $line = $rnew_lines->[$jj];
- my $rtokens = $line->get_rtokens();
- my $rhash = $rline_hashes->[$jj];
- my $i = 0;
- my $i_eq = $i_equals[$jj];
- my @idel;
- my $imax = @{$rtokens} - 2;
- my $delete_above_level;
+ # find subgroups
+ my @subgroups;
+ push @subgroups, [ 0, $jmax ];
+ for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
+ if ( $rnew_lines->[$jl]->get_end_group() ) {
+ $subgroups[-1]->[1] = $jl;
+ push @subgroups, [ $jl + 1, $jmax ];
+ }
+ }
- for ( my $i = 0 ; $i <= $imax ; $i++ ) {
- my $tok = $rtokens->[$i];
- next if ( $tok eq '#' ); # shouldn't happen
- my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
- @{ $rhash->{$tok} };
-
- # always remove unmatched tokens
- my $delete_me = !defined($il) && !defined($ir);
-
- # also, if this is a complete ternary or if/elsif/else block,
- # remove all alignments which are not also in every line
- $delete_me ||=
- ( $is_full_block && $token_line_count{$tok} < $nlines );
-
- # Remove all tokens above a certain level following a previous
- # deletion. For example, we have to remove tagged higher level
- # alignment tokens following a => deletion because the tags of
- # higher level tokens will now be incorrect. For example, this
- # will prevent aligning commas as follows after deleting the
- # second =>
- # $w->insert(
- # ListBox => origin => [ 270, 160 ],
- # size => [ 200, 55 ],
- # );
- if ( defined($delete_above_level) ) {
- if ( $lev > $delete_above_level ) {
- $delete_me ||= 1; #$tag;
+ # flag to allow skipping pass 2
+ my $saw_large_group;
+
+ ############################################################
+ # PASS 1 over subgroups to remove unmatched alignment tokens
+ ############################################################
+ foreach my $item (@subgroups) {
+ my ( $jbeg, $jend ) = @{$item};
+
+ my $nlines = $jend - $jbeg + 1;
+
+ ####################################################
+ # Look for complete if/elsif/else and ternary blocks
+ ####################################################
+
+ # We are looking for a common '$dividing_token' like these:
+
+ # if ( $b and $s ) { $p->{'type'} = 'a'; }
+ # elsif ($b) { $p->{'type'} = 'b'; }
+ # elsif ($s) { $p->{'type'} = 's'; }
+ # else { $p->{'type'} = ''; }
+ # ^----------- dividing_token
+
+ # my $severity =
+ # !$routine ? '[PFX]'
+ # : $routine =~ /warn.*_d\z/ ? '[DS]'
+ # : $routine =~ /ck_warn/ ? 'W'
+ # : $routine =~ /ckWARN\d*reg_d/ ? 'S'
+ # : $routine =~ /ckWARN\d*reg/ ? 'W'
+ # : $routine =~ /vWARN\d/ ? '[WDS]'
+ # : '[PFX]';
+ # ^----------- dividing_token
+
+ # Only look for groups which are more than 2 lines long. Two lines
+ # can get messed up doing this, probably due to the various
+ # two-line rules.
+
+ my $dividing_token;
+ my %token_line_count;
+ if ( $nlines > 2 ) {
+
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ my %seen;
+ my $line = $rnew_lines->[$jj];
+ my $rtokens = $line->get_rtokens();
+ foreach my $tok ( @{$rtokens} ) {
+ if ( !$seen{$tok} ) {
+ $seen{$tok}++;
+ $token_line_count{$tok}++;
+ }
}
- else { $delete_above_level = undef }
}
- if (
- $delete_me
- && is_deletable_token( $tok, $i, $imax, $jj, $i_eq )
+ foreach my $tok ( keys %token_line_count ) {
+ if ( $token_line_count{$tok} == $nlines ) {
+ if ( substr( $tok, 0, 1 ) eq '?'
+ || substr( $tok, 0, 1 ) eq '{'
+ && $tok =~ /^\{\d+if/ )
+ {
+ $dividing_token = $tok;
+ last;
+ }
+ }
+ }
+ }
+
+ #####################################################
+ # Loop over lines to remove unwanted alignment tokens
+ #####################################################
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ my $line = $rnew_lines->[$jj];
+ my $rtokens = $line->get_rtokens();
+ my $rhash = $rline_hashes->[$jj];
+ my $i_eq = $equals_info[$jj]->[0];
+ my @idel;
+ my $imax = @{$rtokens} - 2;
+ my $delete_above_level;
+ my $deleted_assignment_token;
+
+ my $saw_dividing_token = "";
+ $saw_large_group ||= $nlines > 2 && $imax > 1;
+
+ # Loop over all alignment tokens
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ next if ( $tok eq '#' ); # shouldn't happen
+ my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $rhash->{$tok} };
+
+ #######################################################
+ # Here is the basic RULE: remove an unmatched alignment
+ # which does not occur in the surrounding lines.
+ #######################################################
+ my $delete_me = !defined($il) && !defined($ir);
+
+ # But now we modify this with exceptions...
+
+ # EXCEPTION 1: If we are in a complete ternary or
+ # if/elsif/else group, and this token is not on every line
+ # of the group, should we delete it to preserve overall
+ # alignment?
+ if ($dividing_token) {
+ if ( $token_line_count{$tok} >= $nlines ) {
+ $saw_dividing_token ||= $tok eq $dividing_token;
+ }
+ else {
+
+ # For shorter runs, delete toks to save alignment.
+ # For longer runs, keep toks after the '{' or '?'
+ # to allow sub-alignments within braces. The
+ # number 5 lines is arbitrary but seems to work ok.
+ $delete_me ||=
+ ( $nlines < 5 || !$saw_dividing_token );
+ }
+ }
+
+ # EXCEPTION 2: Remove all tokens above a certain level
+ # following a previous deletion. For example, we have to
+ # remove tagged higher level alignment tokens following a
+ # '=>' deletion because the tags of higher level tokens
+ # will now be incorrect. For example, this will prevent
+ # aligning commas as follows after deleting the second '=>'
+ # $w->insert(
+ # ListBox => origin => [ 270, 160 ],
+ # size => [ 200, 55 ],
+ # );
+ if ( defined($delete_above_level) ) {
+ if ( $lev > $delete_above_level ) {
+ $delete_me ||= 1; #$tag;
+ }
+ else { $delete_above_level = undef }
+ }
+
+ # EXCEPTION 3: Remove all but certain tokens after an
+ # assignment deletion.
+ if (
+ $deleted_assignment_token
+ && ( $lev > $group_level
+ || !$keep_after_deleted_assignment{$raw_tok} )
+ )
+ {
+ $delete_me ||= 1;
+ }
- # Patch: do not touch the first line of a terminal match,
- # such as below, because j_terminal has already been set.
+ # EXCEPTION 4: Do not touch the first line of a 2 line
+ # terminal match, such as below, because j_terminal has
+ # already been set.
# if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
# else { $tago = $tagc = ''; }
# But see snippets 'else1.t' and 'else2.t'
- && !( $jj == $jbeg && $has_terminal_match && $nlines == 2 )
+ $delete_me = 0
+ if ( $jj == $jbeg
+ && $has_terminal_match
+ && $nlines == 2 );
- )
- {
- push @idel, $i;
- if ( !defined($delete_above_level)
- || $lev < $delete_above_level )
- {
+ # EXCEPTION 5: misc additional rules for commas and equals
+ if ($delete_me) {
+
+ # okay to delete second and higher copies of a token
+ if ( $tok_count == 1 ) {
+
+ # for a comma...
+ if ( $raw_tok eq ',' ) {
+
+ # Do not delete commas before an equals
+ $delete_me = 0
+ if ( defined($i_eq) && $i < $i_eq );
+
+ # Do not delete line-level commas
+ $delete_me = 0 if ( $lev <= $group_level );
+ }
+
+ # For an assignment at group level..
+ if ( $is_assignment{$raw_tok}
+ && $lev == $group_level )
+ {
+
+ # Do not delete if it is the last alignment of
+ # multiple tokens; this will prevent some
+ # undesirable alignments
+ if ( $imax > 0 && $i == $imax ) {
+ $delete_me = 0;
+ }
+
+ # Otherwise, set a flag to delete most
+ # remaining tokens
+ else { $deleted_assignment_token = $raw_tok }
+ }
+ }
+ }
+
+ #####################################
+ # Add this token to the deletion list
+ #####################################
+ if ($delete_me) {
+ push @idel, $i;
+
+ # update deletion propagation flags
+ if ( !defined($delete_above_level)
+ || $lev < $delete_above_level )
+ {
- # delete all following higher level alignments
- $delete_above_level = $lev;
+ # delete all following higher level alignments
+ $delete_above_level = $lev;
- # but keep deleting after => to next lower level
- # to avoid some bizarre alignments
- if ( $raw_tok eq '=>' ) {
- $delete_above_level = $lev - 1;
+ # but keep deleting after => to next lower level
+ # to avoid some bizarre alignments
+ if ( $raw_tok eq '=>' ) {
+ $delete_above_level = $lev - 1;
+ }
}
}
+ } # End loop over alignment tokens
+
+ # Process all deletion requests for this line
+ if (@idel) {
+ delete_selected_tokens( $line, \@idel );
}
- }
+ } # End loopover lines
+ } # End loop over subgroups
+
+ #################################################
+ # PASS 2 over subgroups to remove null alignments
+ #################################################
+
+ # This pass is only used for testing. It is helping to identify
+ # alignment situations which might be improved with a future more
+ # general algorithm which adds a tail matching capability.
+ if (TEST_DELETE_NULL) {
+ delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
+ if ($saw_large_group);
+ }
+
+ # PASS 3: Construct a tree of matched lines and delete some small deeper
+ # levels of tokens. They also block good alignments.
+ prune_alignment_tree($rnew_lines) if ($max_lev_diff);
+
+ # PASS 4: compare all lines for common tokens
+ match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
+
+ return ( $max_lev_diff, $saw_side_comment );
+ }
+}
+
+sub delete_null_alignments {
+ my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
+
+ # This is an optional second pass for deleting alignment tokens which can
+ # occasionally improve alignment. We look for and remove 'null
+ # alignments', which are alignments that require no padding. So we can
+ # 'cheat' and delete them. For example, notice the '=~' alignment in the
+ # first two lines of the following code:
+
+ # $sysname .= 'del' if $self->label =~ /deletion/;
+ # $sysname .= 'ins' if $self->label =~ /insertion/;
+ # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
+
+ # These '=~' tokens are already aligned because they are both the same
+ # distance from the previous alignment token, the 'if'. So we can
+ # eliminate them as alignments. The advantage is that in some cases, such
+ # as this one, this will allow other tokens to be aligned. In this case we
+ # then get the 'if' tokens to align:
+
+ # $sysname .= 'del' if $self->label =~ /deletion/;
+ # $sysname .= 'ins' if $self->label =~ /insertion/;
+ # $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
+
+ # The following rules for limiting this operation have been found to
+ # work well and avoid problems:
+
+ # Rule 1. We only consider a sequence of lines which have the same
+ # sequence of alignment tokens.
+
+ # Rule 2. We never eliminate the first alignment token. One reason is that
+ # lines may have different leading indentation spaces, so keeping the
+ # first alignment token insures that our length measurements start at
+ # a well-defined point. Another reason is that nothing is gained because
+ # the left-to-right sweep can always handle alignment of this token.
+
+ # Rule 3. We require that the first alignment token exist in either
+ # a previous line or a subsequent line. The reason is that this avoids
+ # changing two-line matches which go through special logic.
+
+ # Rule 4. Do not delete a token which occurs in a previous or subsequent
+ # line. For example, in the above example, it was ok to eliminate the '=~'
+ # token from two lines because it did not occur in a surrounding line.
+ # If it did occur in a surrounding line, the result could be confusing
+ # or even incorrectly aligned.
+
+ # A consequence of these rules is that we only need to consider subgroups
+ # with at least 3 lines and 2 alignment tokens.
+
+ # The subgroup line index range
+ my ( $jbeg, $jend );
+
+ # Vars to keep track of the start of a current sequence of matching
+ # lines.
+ my $rtokens_match;
+ my $rfield_lengths_match;
+ my $j_match_beg;
+ my $j_match_end;
+ my $imax_match;
+ my $rneed_pad;
+
+ # Vars for a line being tested
+ my $rtokens;
+ my $rfield_lengths;
+ my $imax;
+
+ my $start_match = sub {
+ my ($jj) = @_;
+ $rtokens_match = $rtokens;
+ $rfield_lengths_match = $rfield_lengths;
+ $j_match_beg = $jj;
+ $j_match_end = $jj;
+ $imax_match = $imax;
+ $rneed_pad = [];
+ return;
+ };
+
+ my $add_to_match = sub {
+ my ($jj) = @_;
+ $j_match_end = $jj;
+
+ # Keep track of any padding that would be needed for each token
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ next if ( $rneed_pad->[$i] );
+ my $length = $rfield_lengths->[$i];
+ my $length_match = $rfield_lengths_match->[$i];
+ if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
+ }
+ };
+
+ my $end_match = sub {
+ return unless ( $j_match_end > $j_match_beg );
+ my $nlines = $j_match_end - $j_match_beg + 1;
+ my $rhash_beg = $rline_hashes->[$j_match_beg];
+ my $rhash_end = $rline_hashes->[$j_match_end];
+ my @idel;
+
+ # Do not delete unless the first token also occurs in a surrounding line
+ my $tok0 = $rtokens_match->[0];
+ return
+ unless (
+ (
+ $j_match_beg > $jbeg
+ && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
+ $tok0
+ )
+ || ( $j_match_end < $jend
+ && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
+ $tok0 )
+ );
+
+ # Note that we are skipping the token at i=0
+ for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
+
+ # do not delete a token which requires padding to align
+ next if ( $rneed_pad->[$i] );
+
+ my $tok = $rtokens_match->[$i];
+
+ # Do not delete a token which occurs in a surrounding line
+ next
+ if ( $j_match_beg > $jbeg
+ && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
+ next
+ if ( $j_match_end < $jend
+ && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
- if (@idel) { delete_selected_tokens( $line, \@idel ) }
+ # ok to delete
+ push @idel, $i;
+ ##print "ok to delete tok=$tok\n";
}
+ if (@idel) {
+ foreach my $j ( $j_match_beg .. $j_match_end ) {
+ delete_selected_tokens( $rnew_lines->[$j], \@idel );
+ }
+ }
+ };
+
+ foreach my $item ( @{$rsubgroups} ) {
+ ( $jbeg, $jend ) = @{$item};
+ my $nlines = $jend - $jbeg + 1;
+ next unless ( $nlines > 2 );
+
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+ my $line = $rnew_lines->[$jj];
+ $rtokens = $line->get_rtokens();
+ $rfield_lengths = $line->get_rfield_lengths();
+ $imax = @{$rtokens} - 2;
+
+ # start a new match group
+ if ( $jj == $jbeg ) {
+ $start_match->($jj);
+ next;
+ }
+
+ # see if all tokens of this line match the current group
+ my $match;
+ if ( $imax == $imax_match ) {
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_match = $rtokens_match->[$i];
+ last if ( $tok ne $tok_match );
+ }
+ $match = 1;
+ }
+
+ # yes, they all match
+ if ($match) {
+ $add_to_match->($jj);
+ }
+
+ # now, this line does not match
+ else {
+ $end_match->();
+ $start_match->($jj);
+ }
+ } # End loopover lines
+ $end_match->();
} # End loop over subgroups
+ return;
+} ## end sub delete_null_alignments
+
+sub match_line_pairs {
+ my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
+
+ # Compare each pair of lines and save information about common matches
+ # $rlines = list of lines including hanging side comments
+ # $rnew_lines = list of lines without any hanging side comments
+ # $rsubgroups = list of subgroups of the new lines
+
+ # TODO:
+ # Maybe change: imax_pair => pair_match_info = ref to array
+ # = [$imax_align, $rMsg, ... ]
+ # This may eventually have multi-level match info
+
+ # Previous line vars
+ my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
+ $list_type_m, $ci_level_m );
+
+ # Current line vars
+ my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
+ $ci_level );
+
+ use constant EXPLAIN_COMPARE_PATTERNS => 0;
+
+ my $compare_patterns = sub {
+
+ # helper routine to decide if patterns match well enough..
+ # return code:
+ # 0 = patterns match, continue
+ # 1 = no match
+ # 2 = no match, and lines do not match at all
+
+ my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
+ my $GoToMsg = "";
+ my $return_code = 1;
+
+ my ( $alignment_token, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named containers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named
+ # containers
+ $GoToMsg = "do not align commas in unnamed containers";
+ goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
+ }
+
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
+
+ # But we can allow a match if the parens don't
+ # require any padding.
+ $GoToMsg = "do not align '(' unless patterns match or pad=0";
+ if ( $pad != 0 ) { goto NO_MATCH }
+ }
+
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
+ $GoToMsg = "first character before equals differ";
+ goto NO_MATCH;
+ }
+
+ # The introduction of sub 'prune_alignment_tree'
+ # enabled alignment of lists left of the equals with
+ # other scalar variables. For example:
+ # my ( $D, $s, $e ) = @_;
+ # my $d = length $D;
+ # my $c = $e - $s - $d;
+
+ # But this would change formatting of a lot of scripts,
+ # so for now we prevent alignment of comma lists on the
+ # left with scalars on the left. We will also prevent
+ # any partial alignments.
+
+ # set return code 2 if the = is at line level, but
+ # set return code 1 if the = is below line level, i.e.
+ # sub new { my ( $p, $v ) = @_; bless \$v, $p }
+ # sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
+
+ elsif (
+ ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
+ {
+ $GoToMsg = "mixed commas/no-commas before equals";
+ if ( $lev eq $group_level ) {
+ $return_code = 2;
+ }
+ goto NO_MATCH;
+ }
+ }
+
+ MATCH:
+ return ( 0, \$GoToMsg );
+
+ NO_MATCH:
+
+ EXPLAIN_COMPARE_PATTERNS
+ && print STDERR "no match because $GoToMsg\n";
+
+ return ( $return_code, \$GoToMsg );
+
+ }; ## end of $compare_patterns->()
+
+ # loop over subgroups
+ foreach my $item ( @{$rsubgroups} ) {
+ my ( $jbeg, $jend ) = @{$item};
+ my $nlines = $jend - $jbeg + 1;
+ next unless ( $nlines > 1 );
+
+ # loop over lines in a subgroup
+ for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
+
+ $line_m = $line;
+ $rtokens_m = $rtokens;
+ $rpatterns_m = $rpatterns;
+ $rfield_lengths_m = $rfield_lengths;
+ $imax_m = $imax;
+ $list_type_m = $list_type;
+ $ci_level_m = $ci_level;
+
+ $line = $rnew_lines->[$jj];
+ $rtokens = $line->get_rtokens();
+ $rpatterns = $line->get_rpatterns();
+ $rfield_lengths = $line->get_rfield_lengths();
+ $imax = @{$rtokens} - 2;
+ $list_type = $line->get_list_type();
+ $ci_level = $line->get_ci_level();
+
+ # nothing to do for first line
+ next if ( $jj == $jbeg );
+
+ my $ci_jump = $ci_level - $ci_level_m;
+
+ my $imax_min = $imax_m < $imax ? $imax_m : $imax;
+
+ my $imax_align = -1;
+
+ # find number of leading common tokens
+
+ #################################
+ # No match to hanging side comment
+ #################################
+ if ( $line->get_is_hanging_side_comment() ) {
+
+ # Should not get here; HSC's have been filtered out
+ $imax_align = -1;
+ }
+
+ ##############################
+ # Handle comma-separated lists
+ ##############################
+ elsif ( $list_type && $list_type eq $list_type_m ) {
+
+ # do not align lists across a ci jump with new list method
+ if ($ci_jump) { $imax_min = -1 }
+
+ my $i_nomatch = $imax_min + 1;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ if ( $tok ne $tok_m ) {
+ $i_nomatch = $i;
+ last;
+ }
+ }
+
+ $imax_align = $i_nomatch - 1;
+ }
+
+ ##################
+ # Handle non-lists
+ ##################
+ else {
+ my $i_nomatch = $imax_min + 1;
+ for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my $tok_m = $rtokens_m->[$i];
+ if ( $tok ne $tok_m ) {
+ $i_nomatch = $i;
+ last;
+ }
+
+ my $pat = $rpatterns->[$i];
+ my $pat_m = $rpatterns_m->[$i];
+
+ # If patterns don't match, we have to be careful...
+ if ( $pat_m ne $pat ) {
+ my $pad =
+ $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
+ my ( $match_code, $rmsg ) = $compare_patterns->(
+ $tok, $tok_m, $pat, $pat_m, $pad
+ );
+ if ($match_code) {
+ if ( $match_code eq 1 ) { $i_nomatch = $i }
+ elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
+ last;
+ }
+ }
+ }
+ $imax_align = $i_nomatch - 1;
+ }
+
+ $line_m->set_imax_pair($imax_align);
+
+ } ## end loop over lines
+
+ # Put fence at end of subgroup
+ $line->set_imax_pair(-1);
+
+ } ## end loop over subgroups
+
+ # if there are hanging side comments, propagate the pair info down to them
+ # so that lines can just look back one line for their pair info.
+ if ( @{$rlines} > @{$rnew_lines} ) {
+ my $last_pair_info = -1;
+ foreach my $line ( @{$rlines} ) {
+ if ( $line->get_is_hanging_side_comment() ) {
+ $line->set_imax_pair($last_pair_info);
+ }
+ else {
+ $last_pair_info = $line->get_imax_pair();
+ }
+ }
+ }
+ return;
+}
+
+sub fat_comma_to_comma {
+ my ($str) = @_;
+
+ # We are changing '=>' to ',' and removing any trailing decimal count
+ # because currently fat commas have a count and commas do not.
+ # For example, we will change '=>2+{-3.2' into ',2+{-3'
+ if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
+ return $str;
+}
+
+sub get_line_token_info {
+
+ # scan lines of tokens and return summary information about the range of
+ # levels and patterns.
+ my ($rlines) = @_;
+
+ # First scan to check monotonicity. Here is an example of several
+ # lines which are monotonic. The = is the lowest level, and
+ # the commas are all one level deeper. So this is not nonmonotonic.
+ # $$d{"weeks"} = [ "w", "wk", "wks", "week", "weeks" ];
+ # $$d{"days"} = [ "d", "day", "days" ];
+ # $$d{"hours"} = [ "h", "hr", "hrs", "hour", "hours" ];
+ my @all_token_info;
+ my $all_monotonic = 1;
+ for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ my ($line) = $rlines->[$jj];
+ my $rtokens = $line->get_rtokens();
+ my $last_lev;
+ my $is_monotonic = 1;
+ my $i = -1;
+ foreach my $tok ( @{$rtokens} ) {
+ $i++;
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+ push @{ $all_token_info[$jj] },
+ [ $raw_tok, $lev, $tag, $tok_count ];
+ last if ( $tok eq '#' );
+ if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
+ $last_lev = $lev;
+ }
+ if ( !$is_monotonic ) { $all_monotonic = 0 }
+ }
+
+ my $rline_values = [];
+ for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
+ my ($line) = $rlines->[$jj];
+
+ my $rtokens = $line->get_rtokens();
+ my $i = -1;
+ my ( $lev_min, $lev_max );
+ my $token_pattern_max = "";
+ my %saw_level;
+ my @token_info;
+ my $is_monotonic = 1;
+
+ # find the index of the last token before the side comment
+ my $imax = @{$rtokens} - 2;
+ my $imax_true = $imax;
+
+ # If the entire group is monotonic, and the line ends in a comma list,
+ # walk it back to the first such comma. this will have the effect of
+ # making all trailing ragged comma lists match in the prune tree
+ # routine. these trailing comma lists can better be handled by later
+ # alignment rules.
+
+ # Treat fat commas the same as commas here by converting them to
+ # commas. This will improve the chance of aligning the leading parts
+ # of ragged lists.
+
+ my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
+ if ( $all_monotonic && $tok_end =~ /^,/ ) {
+ my $i = $imax - 1;
+ while ( $i >= 0
+ && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
+ {
+ $imax = $i;
+ $i--;
+ }
+ }
+
+ # make a first pass to find level range
+ my $last_lev;
+ foreach my $tok ( @{$rtokens} ) {
+ $i++;
+ last if ( $i > $imax );
+ last if ( $tok eq '#' );
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $all_token_info[$jj]->[$i] };
+
+ last if ( $tok eq '#' );
+ $token_pattern_max .= $tok;
+ $saw_level{$lev}++;
+ if ( !defined($lev_min) ) {
+ $lev_min = $lev;
+ $lev_max = $lev;
+ }
+ else {
+ if ( $lev < $lev_min ) { $lev_min = $lev; }
+ if ( $lev > $lev_max ) { $lev_max = $lev; }
+ if ( $lev < $last_lev ) { $is_monotonic = 0 }
+ }
+ $last_lev = $lev;
+ }
+
+ # handle no levels
+ my $rtoken_patterns = {};
+ my $rtoken_indexes = {};
+ my @levs = sort keys %saw_level;
+ if ( !defined($lev_min) ) {
+ $lev_min = -1;
+ $lev_max = -1;
+ $levs[0] = -1;
+ $rtoken_patterns->{$lev_min} = "";
+ $rtoken_indexes->{$lev_min} = [];
+ }
+
+ # handle one level
+ elsif ( $lev_max == $lev_min ) {
+ $rtoken_patterns->{$lev_max} = $token_pattern_max;
+ $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
+ }
+
+ # handle multiple levels
+ else {
+ $rtoken_patterns->{$lev_max} = $token_pattern_max;
+ $rtoken_indexes->{$lev_max} = [ ( 0 .. $imax ) ];
+
+ my $debug = 0;
+ my $lev_top = pop @levs; # alread did max level
+ my $itok = -1;
+ foreach my $tok ( @{$rtokens} ) {
+ $itok++;
+ last if ( $itok > $imax );
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ @{ $all_token_info[$jj]->[$itok] };
+ last if ( $raw_tok eq '#' );
+ foreach my $lev_test (@levs) {
+ next if ( $lev > $lev_test );
+ $rtoken_patterns->{$lev_test} .= $tok;
+ push @{ $rtoken_indexes->{$lev_test} }, $itok;
+ }
+ }
+ push @levs, $lev_top;
+ }
+
+ push @{$rline_values},
+ [
+ $lev_min, $lev_max, $rtoken_patterns, \@levs,
+ $rtoken_indexes, $is_monotonic, $imax_true, $imax,
+ ];
+
+ # debug
+ 0 && do {
+ local $" = ')(';
+ print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
+ foreach my $key ( sort keys %{$rtoken_patterns} ) {
+ print "$key => $rtoken_patterns->{$key}\n";
+ print "$key => @{$rtoken_indexes->{$key}}\n";
+ }
+ };
+ } ## end loop over lines
+ return ( $rline_values, $all_monotonic );
+}
+
+sub prune_alignment_tree {
+ my ($rlines) = @_;
+ my $jmax = @{$rlines} - 1;
+ return unless $jmax > 0;
+
+ # Vertical alignment in perltidy is done as an iterative process. The
+ # starting point is to mark all possible alignment tokens ('=', ',', '=>',
+ # etc) for vertical alignment. Then we have to delete all alignments
+ # which, if actually made, would detract from overall alignment. This
+ # is done in several phases of which this is one.
+
+ # In this routine we look at the alignments of a group of lines as a
+ # hierarchical tree. We will 'prune' the tree to limited depths if that
+ # will improve overall alignment at the lower depths.
+ # For each line we will be looking at its alignment patterns down to
+ # different fixed depths. For each depth, we include all lower depths and
+ # ignore all higher depths. We want to see if we can get alignment of a
+ # larger group of lines if we ignore alignments at some lower depth.
+ # Here is an # example:
+
+ # for (
+ # [ '$var', sub { join $_, "bar" }, 0, "bar" ],
+ # [ 'CONSTANT', sub { join "foo", "bar" }, 0, "bar" ],
+ # [ 'CONSTANT', sub { join "foo", "bar", 3 }, 1, "barfoo3" ],
+ # [ '$myvar', sub { my $var; join $var, "bar" }, 0, "bar" ],
+ # );
+
+ # In the above example, all lines have three commas at the lowest depth
+ # (zero), so if there were no other alignements, these lines would all
+ # align considering only the zero depth alignment token. But some lines
+ # have additional comma alignments at the next depth, so we need to decide
+ # if we should drop those to keep the top level alignments, or keep those
+ # for some additional low level alignments at the expense losing some top
+ # level alignments. In this case we will drop the deeper level commas to
+ # keep the entire collection aligned. But in some cases the decision could
+ # go the other way.
+
+ # The tree for this example at the zero depth has one node containing
+ # all four lines, since they are identical at zero level (three commas).
+ # At depth one, there are three 'children' nodes, namely:
+ # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
+ # - line 3, which has 2 commas at depth 1
+ # - line4, which has a ';' and a ',' at depth 1
+ # There are no deeper alignments in this example.
+ # so the tree structure for this example is:
+ #
+ # depth 0 depth 1 depth 2
+ # [lines 1-4] -- [line 1-2] - (empty)
+ # | [line 3] - (empty)
+ # | [line 4] - (empty)
+
+ # We can carry this to any depth, but it is not really useful to go below
+ # depth 2. To cleanly stop there, we will consider depth 2 to contain all
+ # alignments at depth >=2.
+
+ use constant EXPLAIN_PRUNE => 0;
+
+ ####################################################################
+ # Prune Tree Step 1. Start by scanning the lines and collecting info
+ ####################################################################
+
+ # Note that the caller had this info but we have to redo this now because
+ # alignment tokens may have been deleted.
+ my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
+
+ # If all the lines have levels which increase monotonically from left to
+ # right, then the sweep-left-to-right pass can do a better job of alignment
+ # than pruning, and without deleting alignments.
+ return if ($all_monotonic);
+
+ # Contents of $rline_values
+ # [
+ # $lev_min, $lev_max, $rtoken_patterns, \@levs,
+ # $rtoken_indexes, $is_monotonic, $imax_true, $imax,
+ # ];
+
+ # We can work to any depth, but there is little advantage to working
+ # to a a depth greater than 2
+ my $MAX_DEPTH = 2;
+
+ # This arrays will hold the tree of alignment tokens at different depths
+ # for these lines.
+ my @match_tree;
+
+ # Tree nodes contain these values:
+ # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
+ # $nc_beg_p, $nc_end_p, $rindexes];
+ # where
+ # $depth = 0,1,2 = index of depth of the match
+
+ # $jbeg beginning index j of the range of lines in this match
+ # $jend ending index j of the range of lines in this match
+ # $n_parent = index of the containing group at $depth-1, if it exists
+ # $level = actual level of code being matched in this group
+ # $pattern = alignment pattern being matched
+ # $nc_beg_p = first child
+ # $nc_end_p = last child
+ # $rindexes = ref to token indexes
+
+ # the patterns and levels of the current group being formed at each depth
+ my ( @token_patterns_current, @levels_current, @token_indexes_current );
+
+ # the patterns and levels of the next line being tested at each depth
+ my ( @token_patterns_next, @levels_next, @token_indexes_next );
+
+ #########################################################
+ # define a recursive worker subroutine for tree construction
+ #########################################################
+
+ # This is a recursive routine which is called if a match condition changes
+ # at any depth when a new line is encountered. It ends the match node
+ # which changed plus all deeper nodes attached to it.
+ my $end_node;
+ $end_node = sub {
+ my ( $depth, $jl, $n_parent ) = @_;
+
+ # $depth is the tree depth
+ # $jl is the index of the line
+ # $n_parent is index of the parent node of this node
+
+ return if ( $depth > $MAX_DEPTH );
+
+ # end any current group at this depth
+ if ( $jl >= 0
+ && defined( $match_tree[$depth] )
+ && @{ $match_tree[$depth] }
+ && defined( $levels_current[$depth] ) )
+ {
+ $match_tree[$depth]->[-1]->[1] = $jl;
+ }
+
+ # Define the index of the node we will create below
+ my $ng_self = 0;
+ if ( defined( $match_tree[$depth] ) ) {
+ $ng_self = @{ $match_tree[$depth] };
+ }
+
+ # end any next deeper child node(s)
+ $end_node->( $depth + 1, $jl, $ng_self );
+
+ # update the levels being matched
+ $token_patterns_current[$depth] = $token_patterns_next[$depth];
+ $token_indexes_current[$depth] = $token_indexes_next[$depth];
+ $levels_current[$depth] = $levels_next[$depth];
+
+ # Do not start a new group at this level if it is not being used
+ if ( !defined( $levels_next[$depth] )
+ || $depth > 0
+ && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
+ {
+ return;
+ }
+
+ # Create a node for the next group at this depth. We initially assume
+ # that it will continue to $jmax, and correct that later if the node
+ # ends earlier.
+ push @{ $match_tree[$depth] },
+ [
+ $jl + 1, $jmax, $n_parent, $levels_current[$depth],
+ $token_patterns_current[$depth],
+ undef, undef, $token_indexes_current[$depth],
+ ];
+
+ return;
+ }; ## end sub end_node
+
+ ######################################################
+ # Prune Tree Step 2. Loop to form the tree of matches.
+ ######################################################
+ for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
+
+ # working with two adjacent line indexes, 'm'=minus, 'p'=plus
+ my $jm = $jp - 1;
+
+ # Pull out needed values for the next line
+ my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
+ $is_monotonic, $imax_true, $imax )
+ = @{ $rline_values->[$jp] };
+
+ # Transfer levels and patterns for this line to the working arrays.
+ # If the number of levels differs from our chosen MAX_DEPTH ...
+ # if fewer than MAX_DEPTH: leave levels at missing depths undefined
+ # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
+ @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
+ if ( @{$rlevs} > $MAX_DEPTH ) {
+ $levels_next[$MAX_DEPTH] = $rlevs->[-1];
+ }
+ my $depth = 0;
+ foreach (@levels_next) {
+ $token_patterns_next[$depth] =
+ defined($_) ? $rtoken_patterns->{$_} : undef;
+ $token_indexes_next[$depth] =
+ defined($_) ? $rtoken_indexes->{$_} : undef;
+ $depth++;
+ }
+
+ # Look for a change in match groups...
+
+ # Initialize on the first line
+ if ( $jp == 0 ) {
+ my $n_parent;
+ $end_node->( 0, $jm, $n_parent );
+ }
+
+ # End groups if a hard flag has been set
+ elsif ( $rlines->[$jm]->get_end_group() ) {
+ my $n_parent;
+ $end_node->( 0, $jm, $n_parent );
+ }
+
+ # Continue at hanging side comment
+ elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
+ next;
+ }
+ # Otherwise see if anything changed and update the tree if so
+ else {
+ foreach my $depth ( 0 .. $MAX_DEPTH ) {
+
+ my $def_current = defined( $token_patterns_current[$depth] );
+ my $def_next = defined( $token_patterns_next[$depth] );
+ last unless ( $def_current || $def_next );
+ if ( !$def_current
+ || !$def_next
+ || $token_patterns_current[$depth] ne
+ $token_patterns_next[$depth] )
+ {
+ my $n_parent;
+ if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
+ $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
+ }
+ $end_node->( $depth, $jm, $n_parent );
+ last;
+ }
+ }
+ }
+ } ## end loop to form tree of matches
+
+ ##########################################################
+ # Prune Tree Step 3. Make links from parent to child nodes
+ ##########################################################
+
+ # It seemed cleaner to do this as a separate step rather than during tree
+ # construction. The children nodes have links up to the parent node which
+ # created them. Now make links in the opposite direction, so the parents
+ # can find the children. We store the range of children nodes ($nc_beg,
+ # $nc_end) of each parent with two additional indexes in the orignal array.
+ # These will be undef if no children.
+ for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
+ next unless defined( $match_tree[$depth] );
+ my $nc_max = @{ $match_tree[$depth] } - 1;
+ my $np_now;
+ foreach my $nc ( 0 .. $nc_max ) {
+ my $np = $match_tree[$depth]->[$nc]->[2];
+ if ( !defined($np) ) {
+
+ # shouldn't happen
+ #print STDERR "lost child $np at depth $depth\n";
+ next;
+ }
+ if ( !defined($np_now) || $np != $np_now ) {
+ $np_now = $np;
+ $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
+ }
+ $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
+ }
+ } ## end loop to make links down to the child nodes
+
+ EXPLAIN_PRUNE > 0 && do {
+ print "Tree complete. Found these groups:\n";
+ foreach my $depth ( 0 .. $MAX_DEPTH ) {
+ Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
+ }
+ };
+
+ #######################################################
+ # Prune Tree Step 4. Make a list of nodes to be deleted
+ #######################################################
+
+ # list of lines with tokens to be deleted:
+ # [$jbeg, $jend, $level_keep]
+ # $jbeg..$jend is the range of line indexes,
+ # $level_keep is the minimum level to keep
+ my @delete_list;
+
+ # Groups with ending comma lists and their range of sizes:
+ # $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
+ my %ragged_comma_group;
+
+ # Define a threshold line count for forcing a break
+ my $nlines_break = 3;
+
+ # We work with a list of nodes to visit at the next deeper depth.
+ my @todo_list;
+ if ( defined( $match_tree[0] ) ) {
+ @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
+ }
+
+ for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
+ last unless (@todo_list);
+ my @todo_next;
+ foreach my $np (@todo_list) {
+ my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
+ $rindexes_p )
+ = @{ $match_tree[$depth]->[$np] };
+ my $nlines_p = $jend_p - $jbeg_p + 1;
+
+ # nothing to do if no children
+ next unless defined($nc_beg_p);
+
+ # Define the number of lines to either keep or delete a child node.
+ # This is the key decision we have to make. We want to delete
+ # short runs of matched lines, and keep long runs. It seems easier
+ # for the eye to follow breaks in monotonic level changes than
+ # non-monotonic level changes. For example, the following looks
+ # best if we delete the lower level alignments:
+
+ # [1] ~~ [];
+ # [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
+ # [ qr/o/, qr/a/ ] ~~ [ ["foo"], ["bar"] ];
+ # [ "foo", "bar" ] ~~ [ qr/o/, qr/a/ ];
+ # [ qr/o/, qr/a/ ] ~~ [ "foo", "bar" ];
+ # $deep1 ~~ $deep1;
+
+ # So we will use two thresholds.
+ my $nmin_mono = $depth + 2;
+ my $nmin_non_mono = $depth + 6;
+ if ( $nmin_mono > $nlines_p - 1 ) {
+ $nmin_mono = $nlines_p - 1;
+ }
+ if ( $nmin_non_mono > $nlines_p - 1 ) {
+ $nmin_non_mono = $nlines_p - 1;
+ }
+
+ # loop to keep or delete each child node
+ foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
+ my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
+ $nc_end_c )
+ = @{ $match_tree[ $depth + 1 ]->[$nc] };
+ my $nlines_c = $jend_c - $jbeg_c + 1;
+ my $is_monotonic = $rline_values->[$jbeg_c]->[5];
+ my $nmin = $is_monotonic ? $nmin_mono : $nmin_non_mono;
+ if ( $nlines_c < $nmin ) {
+##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
+ push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
+ }
+ else {
+##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
+ push @todo_next, $nc;
+ }
+ }
+ }
+ @todo_list = @todo_next;
+ } ## end loop to mark nodes to delete
+
+ #############################################################
+ # Prune Tree Step 5. Loop to delete selected alignment tokens
+ #############################################################
+ foreach my $item (@delete_list) {
+ my ( $jbeg, $jend, $level_keep ) = @{$item};
+ foreach my $jj ( $jbeg .. $jend ) {
+ my $line = $rlines->[$jj];
+ my @idel;
+ my $rtokens = $line->get_rtokens();
+ my $imax = @{$rtokens} - 2;
+ for ( my $i = 0 ; $i <= $imax ; $i++ ) {
+ my $tok = $rtokens->[$i];
+ my ( $raw_tok, $lev, $tag, $tok_count ) =
+ decode_alignment_token($tok);
+ if ( $lev > $level_keep ) {
+ push @idel, $i;
+ }
+ }
+ if (@idel) {
+ delete_selected_tokens( $line, \@idel );
+ }
+ }
+ } ## end loop to delete selected alignment tokens
+
+ return;
+} ## end sub prune_alignment_tree
+
+sub Dump_tree_groups {
+ my ( $rgroup, $msg ) = @_;
+ print "$msg\n";
+ local $" = ')(';
+ foreach my $item ( @{$rgroup} ) {
+ my @fix = @{$item};
+ foreach (@fix) { $_ = "undef" unless defined $_; }
+ $fix[4] = "...";
+ print "(@fix)\n";
+ }
return;
}
-{ # decide_if_aligned_pair
+{ ## closure for sub is_marginal_match
my %is_if_or;
my %is_assignment;
+ my %is_good_alignment;
+
+ # This test did not give sufficiently better results to use as an update,
+ # but the flag is worth keeping as a starting point for future testing.
+ use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
BEGIN {
my @q = qw(
- if or ||
+ if unless or ||
);
@is_if_or{@q} = (1) x scalar(@q);
x=
);
@is_assignment{@q} = (1) x scalar(@q);
+
+ # Vertically aligning on certain "good" tokens is usually okay
+ # so we can be less restrictive in marginal cases.
+ @q = qw( { ? => = );
+ push @q, (',');
+ @is_good_alignment{@q} = (1) x scalar(@q);
}
- sub decide_if_aligned_pair {
+ sub is_marginal_match {
- # Do not try to align two lines which are not really similar
- return unless ( @group_lines == 2 );
- return if ($is_matching_terminal_line);
+ my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
- # always align lists
- my $group_list_type = $group_lines[0]->get_list_type();
- return 0 if ($group_list_type);
+ # Decide if we should undo some or all of the common alignments of a
+ # group of just two lines.
+
+ # Given:
+ # $line_0 and $line_1 - the two lines
+ # $group_level = the indentation level of the group being processed
+ # $imax_align = the maximum index of the common alignment tokens
+ # of the two lines
+ # $imax_prev = the maximum index of the common alignment tokens
+ # with the line before $line_0 (=-1 of does not exist)
+
+ # Return:
+ # $is_marginal = true if the two lines should NOT be fully aligned
+ # = false if the two lines can remain fully aligned
+ # $imax_align = the index of the highest alignment token shared by
+ # these two lines to keep if the match is marginal.
- my $jmax0 = $group_lines[0]->get_jmax();
- my $jmax1 = $group_lines[1]->get_jmax();
- my $rtokens = $group_lines[0]->get_rtokens();
- my $leading_equals = ( $rtokens->[0] =~ /=/ );
+ # When we have an alignment group of just two lines like this, we are
+ # working in the twilight zone of what looks good and what looks bad.
+ # This routine is a collection of rules which work have been found to
+ # work fairly well, but it will need to be updated from time to time.
- # scan the tokens on the second line
- my $rtokens1 = $group_lines[1]->get_rtokens();
- my $saw_if_or; # if we saw an 'if' or 'or' at group level
+ my $is_marginal = 0;
+
+ # always keep alignments of a terminal else or ternary
+ goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
+
+ # always align lists
+ my $group_list_type = $line_0->get_list_type();
+ goto RETURN if ($group_list_type);
+
+ # always align hanging side comments
+ my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
+ goto RETURN if ($is_hanging_side_comment);
+
+ my $jmax_0 = $line_0->get_jmax();
+ my $jmax_1 = $line_1->get_jmax();
+ my $rtokens_1 = $line_1->get_rtokens();
+ my $rtokens_0 = $line_0->get_rtokens();
+ my $rfield_lengths_0 = $line_0->get_rfield_lengths();
+ my $rfield_lengths_1 = $line_1->get_rfield_lengths();
+ my $rpatterns_0 = $line_0->get_rpatterns();
+ my $rpatterns_1 = $line_1->get_rpatterns();
+ my $imax_next = $line_1->get_imax_pair();
+
+ # We will scan the alignment tokens and set a flag '$is_marginal' if
+ # it seems that the an alignment would look bad.
+ my $max_pad = 0;
+ my $saw_good_alignment = 0;
+ my $saw_if_or; # if we saw an 'if' or 'or' at group level
my $raw_tokb = ""; # first token seen at group level
- for ( my $j = 0 ; $j < $jmax1 - 1 ; $j++ ) {
+ my $jfirst_bad;
+ my $line_ending_fat_comma; # is last token just a '=>' ?
+ my $j0_eq_pad;
+ my $j0_max_pad = 0;
+
+ for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
my ( $raw_tok, $lev, $tag, $tok_count ) =
- decode_alignment_token( $rtokens1->[$j] );
+ decode_alignment_token( $rtokens_1->[$j] );
if ( $raw_tok && $lev == $group_level ) {
if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
$saw_if_or ||= $is_if_or{$raw_tok};
}
- }
- # A marginal match is a match which has different patterns. Normally,
- # we should not allow exactly two lines to match if marginal. But
- # we can allow matching in some specific cases.
- my $is_marginal = $marginal_match;
+ # When the first of the two lines ends in a bare '=>' this will
+ # probably be marginal match. (For a bare =>, the next field length
+ # will be 2 or 3, depending on side comment)
+ $line_ending_fat_comma =
+ $j == $jmax_1 - 2
+ && $raw_tok eq '=>'
+ && $rfield_lengths_0->[ $j + 1 ] <= 3;
+
+ my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
+ if ( $j == 0 ) {
+ $pad += $line_1->get_leading_space_count() -
+ $line_0->get_leading_space_count();
+
+ # Remember the pad at a leading equals
+ if ( $raw_tok eq '=' && $lev == $group_level ) {
+ $j0_eq_pad = $pad;
+ $j0_max_pad =
+ 0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
+ $j0_max_pad = 4 if ( $j0_max_pad < 4 );
+ }
+ }
- # lines with differing number of alignment tokens are marginal
- $is_marginal ||=
- $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
- && !$is_assignment{$raw_tokb};
+ if ( $pad < 0 ) { $pad = -$pad }
+ if ( $pad > $max_pad ) { $max_pad = $pad }
+ if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
+ $saw_good_alignment = 1;
+ }
+ else {
+ $jfirst_bad = $j unless defined($jfirst_bad);
+ }
+ if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
+
+ # Flag this as a marginal match since patterns differ.
+ # Normally, we will not allow just two lines to match if
+ # marginal. But we can allow matching in some specific cases.
+
+ $jfirst_bad = $j if ( !defined($jfirst_bad) );
+ $is_marginal = 1 if ( $is_marginal == 0 );
+ if ( $raw_tok eq '=' ) {
+
+ # Here is an example of a marginal match:
+ # $done{$$op} = 1;
+ # $op = compile_bblock($op);
+ # The left tokens are both identifiers, but
+ # one accesses a hash and the other doesn't.
+ # We'll let this be a tentative match and undo
+ # it later if we don't find more than 2 lines
+ # in the group.
+ $is_marginal = 2;
+ }
+ }
+ }
+
+ $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
+
+ # Turn off the "marginal match" flag in some cases...
+ # A "marginal match" occurs when the alignment tokens agree
+ # but there are differences in the other tokens (patterns).
+ # If we leave the marginal match flag set, then the rule is that we
+ # will align only if there are more than two lines in the group.
+ # We will turn of the flag if we almost have a match
+ # and either we have seen a good alignment token or we
+ # just need a small pad (2 spaces) to fit. These rules are
+ # the result of experimentation. Tokens which misaligned by just
+ # one or two characters are annoying. On the other hand,
+ # large gaps to less important alignment tokens are also annoying.
+ if ( $is_marginal == 1
+ && ( $saw_good_alignment || $max_pad < 3 ) )
+ {
+ $is_marginal = 0;
+ }
# We will use the line endings to help decide on alignments...
# See if the lines end with semicolons...
- my $rpatterns0 = $group_lines[0]->get_rpatterns();
- my $rpatterns1 = $group_lines[1]->get_rpatterns();
my $sc_term0;
my $sc_term1;
- if ( $jmax0 < 1 || $jmax1 < 1 ) {
+ if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
# shouldn't happen
}
else {
- my $pat0 = $rpatterns0->[ $jmax0 - 1 ];
- my $pat1 = $rpatterns1->[ $jmax1 - 1 ];
+ my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
+ my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
$sc_term0 = $pat0 =~ /;b?$/;
$sc_term1 = $pat1 =~ /;b?$/;
}
# grep { /$handles/ } $self->_get_delegate_method_list;
$is_marginal ||=
( $raw_tokb eq '(' || $raw_tokb eq '{' )
- && $jmax1 == 2
+ && $jmax_1 == 2
&& $sc_term0 ne $sc_term1;
+ ########################################
+ # return unless this is a marginal match
+ ########################################
+ goto RETURN if ( !$is_marginal );
+
# Undo the marginal match flag in certain cases,
- if ($is_marginal) {
-
- # Two lines with a leading equals-like operator are allowed to
- # align if the patterns to the left of the equals are the same.
- # For example the following two lines are a marginal match but have
- # the same left side patterns, so we will align the equals.
- # my $orig = my $format = "^<<<<< ~~\n";
- # my $abc = "abc";
- # But these have a different left pattern so they will not be
- # aligned
- # $xmldoc .= $`;
- # $self->{'leftovers'} .= "<bx-seq:seq" . $';
-
- # First line semicolon terminated but second not, usually ok:
- # my $want = "'ab', 'a', 'b'";
- # my $got = join( ", ",
- # map { defined($_) ? "'$_'" : "undef" }
- # @got );
- # First line not semicolon terminated, Not OK to match:
- # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
- # $$href{-NUM_DIRS} = 0;
- my $pat0 = $rpatterns0->[0];
- my $pat1 = $rpatterns1->[0];
-
- ##########################################################
- # Turn off the marginal flag for some types of assignments
- ##########################################################
- if ( $is_assignment{$raw_tokb} ) {
- # undo marginal flag if first line is semicolon terminated
- # and leading patters match
- if ($sc_term0) { # && $sc_term1) {
- $is_marginal = $pat0 ne $pat1;
- }
+ # Two lines with a leading equals-like operator are allowed to
+ # align if the patterns to the left of the equals are the same.
+ # For example the following two lines are a marginal match but have
+ # the same left side patterns, so we will align the equals.
+ # my $orig = my $format = "^<<<<< ~~\n";
+ # my $abc = "abc";
+ # But these have a different left pattern so they will not be
+ # aligned
+ # $xmldoc .= $`;
+ # $self->{'leftovers'} .= "<bx-seq:seq" . $';
+
+ # First line semicolon terminated but second not, usually ok:
+ # my $want = "'ab', 'a', 'b'";
+ # my $got = join( ", ",
+ # map { defined($_) ? "'$_'" : "undef" }
+ # @got );
+ # First line not semicolon terminated, Not OK to match:
+ # $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
+ # $$href{-NUM_DIRS} = 0;
+ my $pat0 = $rpatterns_0->[0];
+ my $pat1 = $rpatterns_1->[0];
+
+ ##########################################################
+ # Turn off the marginal flag for some types of assignments
+ ##########################################################
+ if ( $is_assignment{$raw_tokb} ) {
+
+ # undo marginal flag if first line is semicolon terminated
+ # and leading patters match
+ if ($sc_term0) { # && $sc_term1) {
+ $is_marginal = $pat0 ne $pat1;
}
- elsif ( $raw_tokb eq '=>' ) {
+ }
+ elsif ( $raw_tokb eq '=>' ) {
+
+ # undo marginal flag if patterns match
+ $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
+ }
+ elsif ( $raw_tokb eq '=~' ) {
- # undo marginal flag if patterns match
+ # undo marginal flag if both lines are semicolon terminated
+ # and leading patters match
+ if ( $sc_term1 && $sc_term0 ) {
$is_marginal = $pat0 ne $pat1;
}
- elsif ( $raw_tokb eq '=~' ) {
+ }
- # undo marginal flag if both lines are semicolon terminated
- # and leading patters match
- if ( $sc_term1 && $sc_term0 ) {
- $is_marginal = $pat0 ne $pat1;
- }
+ ######################################################
+ # Turn off the marginal flag if we saw an 'if' or 'or'
+ ######################################################
+
+ # A trailing 'if' and 'or' often gives a good alignment
+ # For example, we can align these:
+ # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
+ # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
+
+ # or
+ # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
+ # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
+
+ if ($saw_if_or) {
+
+ # undo marginal flag if both lines are semicolon terminated
+ if ( $sc_term0 && $sc_term1 ) {
+ $is_marginal = 0;
}
+ }
+
+ # For a marginal match, only keep matches before the first 'bad' match
+ if ( $is_marginal
+ && defined($jfirst_bad)
+ && $imax_align > $jfirst_bad - 1 )
+ {
+ $imax_align = $jfirst_bad - 1;
+ }
- ######################################################
- # Turn off the marginal flag if we saw an 'if' or 'or'
- ######################################################
+ ###########################################################
+ # Allow sweep to match lines with leading '=' in some cases
+ ###########################################################
+ if ( $imax_align < 0 && defined($j0_eq_pad) ) {
+
+ if (
- # A trailing 'if' and 'or' often gives a good alignment
- # For example, we can align these:
- # return -1 if $_[0] =~ m/^CHAPT|APPENDIX/;
- # return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
+ # If there is a following line with leading equals, or
+ # preceding line with leading equals, then let the sweep align
+ # them without restriction. For example, the first two lines
+ # here are a marginal match, but they are followed by a line
+ # with leading equals, so the sweep-lr logic can align all of
+ # the lines:
+
+ # $date[1] = $month_to_num{ $date[1] }; # <--line_0
+ # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
+ # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
+ # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
+
+ # Likewise, if we reverse the two pairs we want the same result
+
+ # $day = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
+ # $time = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
+ # $date[1] = $month_to_num{ $date[1] }; # <--line_0
+ # @xdate = split( /[:\/\s]/, $log->field('t') ); # <--line_1
+
+ (
+ $imax_next >= 0
+ || $imax_prev >= 0
+ || TEST_MARGINAL_EQ_ALIGNMENT
+ )
+ && $j0_eq_pad >= -$j0_max_pad
+ && $j0_eq_pad <= $j0_max_pad
+ )
+ {
- # or
- # $d_in_m[2] = 29 if ( &Date_LeapYear($y) );
- # $d = $d_in_m[$m] if ( $d > $d_in_m[$m] );
+ # But do not do this if there is a comma before the '='.
+ # For example, the first two lines below have commas and
+ # therefore are not allowed to align with lines 3 & 4:
- if ($saw_if_or) {
+ # my ( $x, $y ) = $self->Size(); #<--line_0
+ # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
+ # my $vx = $right - $left;
+ # my $vy = $bottom - $top;
- # undo marginal flag if both lines are semicolon terminated
- if ( $sc_term0 && $sc_term1 ) {
- $is_marginal = 0;
+ if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
+ $imax_align = 0;
}
}
}
- ###############################
- # Set the return flag:
- # Don't align if still marginal
- ###############################
- my $do_not_align = $is_marginal;
+ RETURN:
+ return ( $is_marginal, $imax_align );
+ }
+}
+
+sub get_extra_leading_spaces {
- # But try to convert them into a simple comment group if the first line
- # a has side comment
- my $rfields = $group_lines[0]->get_rfields();
- my $maximum_field_index = $group_lines[0]->get_jmax();
- if ( $do_not_align
- && ( length( $rfields->[$maximum_field_index] ) > 0 ) )
- {
- combine_fields();
- $do_not_align = 0;
+ my ( $rlines, $rgroups ) = @_;
+
+ #----------------------------------------------------------
+ # Define any extra indentation space (for the -lp option).
+ # Here is why:
+ # If a list has side comments, sub scan_list must dump the
+ # list before it sees everything. When this happens, it sets
+ # the indentation to the standard scheme, but notes how
+ # many spaces it would have liked to use. We may be able
+ # to recover that space here in the event that all of the
+ # lines of a list are back together again.
+ #----------------------------------------------------------
+
+ return 0 unless ( @{$rlines} && @{$rgroups} );
+
+ my $object = $rlines->[0]->get_indentation();
+ return 0 unless ( ref($object) );
+ my $extra_leading_spaces = 0;
+ my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
+ return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
+
+ my $min_spaces = $extra_indentation_spaces_wanted;
+ if ( $min_spaces > 0 ) { $min_spaces = 0 }
+
+ # loop over all groups
+ my $ng = -1;
+ my $ngroups = @{$rgroups};
+ foreach my $item ( @{$rgroups} ) {
+ $ng++;
+ my ( $jbeg, $jend ) = @{$item};
+ foreach my $j ( $jbeg .. $jend ) {
+ next if ( $j == 0 );
+
+ # all indentation objects must be the same
+ if ( $object != $rlines->[$j]->get_indentation() ) {
+ return 0;
+ }
+ }
+
+ # find the maximum space without exceeding the line length for this group
+ my $avail = $rlines->[$jbeg]->get_available_space_on_right();
+ my $spaces =
+ ( $avail > $extra_indentation_spaces_wanted )
+ ? $extra_indentation_spaces_wanted
+ : $avail;
+
+ #########################################################
+ # Note: min spaces can be negative; for example with -gnu
+ # f(
+ # do { 1; !!(my $x = bless []); }
+ # );
+ #########################################################
+ # The following rule is needed to match older formatting:
+ # For multiple groups, we will keep spaces non-negative.
+ # For a single group, we will allow a negative space.
+ if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
+
+ # update the minimum spacing
+ if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
+ $extra_leading_spaces = $spaces;
}
- return $do_not_align;
}
+
+ # update the indentation object because with -icp the terminal
+ # ');' will use the same adjustment.
+ $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
+ return $extra_leading_spaces;
+}
+
+sub forget_side_comment {
+ my ($self) = @_;
+ $self->[_last_side_comment_column_] = 0;
+ return;
}
-sub adjust_side_comment {
+sub is_good_side_comment_column {
+ my ( $self, $line, $line_number, $level, $num5 ) = @_;
+
+ # Upon encountering the first side comment of a group, decide if
+ # a previous side comment should be forgotten. This involves
+ # checking several rules.
+
+ # Return true to keep old comment location
+ # Return false to forget old comment location
+
+ my $rfields = $line->get_rfields();
+ my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
+
+ # RULE1: Never forget comment before a hanging side comment
+ goto KEEP if ($is_hanging_side_comment);
+
+ # RULE2: Forget a side comment after a short line difference,
+ # where 'short line difference' is computed from a formula.
+ # Using a smooth formula helps minimize sudden large changes.
+ my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
+ my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
+
+ # '$num5' is the number of comments in the first 5 lines after the first
+ # comment. It is needed to keep a compact group of side comments from
+ # being influenced by a more distant side comment.
+ $num5 = 1 unless ($num5);
+
+ # Some values:
+
+ # $adiff $num5 $short_diff
+ # 0 * 12
+ # 1 1 6
+ # 1 2 4
+ # 1 3 3
+ # 1 4 2
+ # 2 1 4
+ # 2 2 2
+ # 2 3 1
+ # 3 1 3
+ # 3 2 1
+
+ my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
+
+ goto FORGET
+ if ( $line_diff > $short_diff );
+
+ # RULE3: Forget a side comment if this line is at lower level and
+ # ends a block
+ my $last_sc_level = $self->[_last_side_comment_level_];
+ goto FORGET
+ if ( $level < $last_sc_level
+ && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
+
+ # RULE 4: Forget the last side comment if this comment might join a cached
+ # line ...
+ if ( my $cached_line_type = get_cached_line_type() ) {
+
+ # ... otherwise side comment alignment will get messed up.
+ # For example, in the following test script
+ # with using 'perltidy -sct -act=2', the last comment would try to
+ # align with the previous and then be in the wrong column when
+ # the lines are combined:
+
+ # foreach $line (
+ # [0, 1, 2], [3, 4, 5], [6, 7, 8], # rows
+ # [0, 3, 6], [1, 4, 7], [2, 5, 8], # columns
+ # [0, 4, 8], [2, 4, 6]
+ # ) # diagonals
+ goto FORGET
+ if ( $cached_line_type == 2 || $cached_line_type == 4 );
+ }
+
+ # Otherwise, keep it alive
+ goto KEEP;
- my $do_not_align = shift;
+ FORGET:
+ return 0;
- # let's see if we can move the side comment field out a little
- # to improve readability (the last field is always a side comment field)
- my $have_side_comment = 0;
- my $first_side_comment_line = -1;
- my $maximum_field_index = $group_lines[0]->get_jmax();
- my $i = 0;
- foreach my $line (@group_lines) {
- if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
- $have_side_comment = 1;
- $first_side_comment_line = $i;
- last;
+ KEEP:
+ return 1;
+}
+
+sub align_side_comments {
+
+ my ( $self, $rlines, $rgroups ) = @_;
+
+ # Align any side comments in this batch of lines
+
+ # Given:
+ # $rlines - the lines
+ # $rgroups - the partition of the lines into groups
+ #
+ # We will be working group-by-group because all side comments
+ # (real or fake) in each group are already aligned. So we just have
+ # to make alignments between groups wherever possible.
+
+ # An unusual aspect is that within each group we have aligned both real
+ # and fake side comments. This has the consequence that the lengths of
+ # long lines without real side comments can cause 'push' all side comments
+ # to the right. This seems unusual, but testing with and without this
+ # feature shows that it is usually better this way. Othewise, side
+ # comments can be hidden between long lines without side comments and
+ # thus be harder to read.
+
+ my $group_level = $self->[_group_level_];
+ my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
+ && $group_level == $self->[_last_level_written_];
+
+ # Find groups with side comments, and remember the first nonblank comment
+ my $j_sc_beg;
+ my @todo;
+ my $ng = -1;
+ foreach my $item ( @{$rgroups} ) {
+ $ng++;
+ my ( $jbeg, $jend ) = @{$item};
+ foreach my $j ( $jbeg .. $jend ) {
+ my $line = $rlines->[$j];
+ my $jmax = $line->get_jmax();
+ if ( $line->get_rfield_lengths()->[$jmax] ) {
+
+ # this group has a line with a side comment
+ push @todo, $ng;
+ if ( !defined($j_sc_beg) ) {
+ $j_sc_beg = $j;
+ }
+ last;
+ }
}
- $i++;
}
- my $kmax = $maximum_field_index + 1;
+ # done if no groups with side comments
+ return unless @todo;
+
+ # Count $num5 = number of comments in the 5 lines after the first comment
+ # This is an important factor in a decision formula
+ my $num5 = 1;
+ for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
+ my $ldiff = $jj - $j_sc_beg;
+ last if ( $ldiff > 5 );
+ my $line = $rlines->[$jj];
+ my $jmax = $line->get_jmax();
+ my $sc_len = $line->get_rfield_lengths()->[$jmax];
+ next unless ($sc_len);
+ $num5++;
+ }
+
+ # Forget the old side comment location if necessary
+ my $line = $rlines->[$j_sc_beg];
+ my $lnum =
+ $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
+ my $keep_it =
+ $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
+ my $last_side_comment_column =
+ $keep_it ? $self->[_last_side_comment_column_] : 0;
+
+ # If there are multiple groups we will do two passes
+ # so that we can find a common alignment for all groups.
+ my $MAX_PASS = @todo > 1 ? 2 : 1;
+
+ # Loop over passes
+ my $max_comment_column = $last_side_comment_column;
+ for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
+
+ # If there are two passes, then on the last pass make the old column
+ # equal to the largest of the group. This will result in the comments
+ # being aligned if possible.
+ if ( $PASS == $MAX_PASS ) {
+ $last_side_comment_column = $max_comment_column;
+ }
- if ($have_side_comment) {
+ # Loop over the groups with side comments
+ my $column_limit;
+ foreach my $ng (@todo) {
+ my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
- my $line = $group_lines[0];
+ # Note that since all lines in a group have common alignments, we
+ # just have to work on one of the lines (the first line).
+ my $line = $rlines->[$jbeg];
+ my $jmax = $line->get_jmax();
+ my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
+ last
+ if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
- # the maximum space without exceeding the line length:
- my $avail = $line->get_available_space_on_right();
+ # the maximum space without exceeding the line length:
+ my $avail = $line->get_available_space_on_right();
- # try to use the previous comment column
- my $side_comment_column = $line->get_column( $kmax - 2 );
- my $move = $last_comment_column - $side_comment_column;
+ # try to use the previous comment column
+ my $side_comment_column = $line->get_column( $jmax - 1 );
+ my $move = $last_side_comment_column - $side_comment_column;
-## my $sc_line0 = $side_comment_history[0]->[0];
-## my $sc_col0 = $side_comment_history[0]->[1];
-## my $sc_line1 = $side_comment_history[1]->[0];
-## my $sc_col1 = $side_comment_history[1]->[1];
-## my $sc_line2 = $side_comment_history[2]->[0];
-## my $sc_col2 = $side_comment_history[2]->[1];
-##
-## # FUTURE UPDATES:
-## # Be sure to ignore 'do not align' and '} # end comments'
-## # Find first $move > 0 and $move <= $avail as follows:
-## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
-## # 2. try sc_col2 if (line-sc_line2) < 12
-## # 3. try min possible space, plus up to 8,
-## # 4. try min possible space
+ # Remember the maximum possible column of the first line with
+ # side comment
+ if ( !defined($column_limit) ) {
+ $column_limit = $side_comment_column + $avail;
+ }
- if ( $kmax > 0 && !$do_not_align ) {
+ next if ( $jmax <= 0 );
# but if this doesn't work, give up and use the minimum space
+ my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
if ( $move > $avail ) {
- $move = $rOpts_minimum_space_to_comment - 1;
+ $move = $min_move;
}
# but we want some minimum space to the comment
- my $min_move = $rOpts_minimum_space_to_comment - 1;
if ( $move >= 0
- && $last_side_comment_length > 0
- && ( $first_side_comment_line == 0 )
- && $group_level == $last_level_written )
+ && $j_sc_beg == 0
+ && $continuing_sc_flow )
{
$min_move = 0;
}
+ # remove constraints on hanging side comments
+ if ($is_hanging_side_comment) { $min_move = 0 }
+
if ( $move < $min_move ) {
$move = $min_move;
}
- # previously, an upper bound was placed on $move here,
- # (maximum_space_to_comment), but it was not helpful
-
# don't exceed the available space
if ( $move > $avail ) { $move = $avail }
- # we can only increase space, never decrease
- if ( $move > 0 ) {
- $line->increase_field_width( $maximum_field_index - 1, $move );
- }
+ # We can only increase space, never decrease.
+ if ( $move < 0 ) { $move = 0 }
- # remember this column for the next group
- $last_comment_column = $line->get_column( $kmax - 2 );
- }
- else {
+ # Discover the largest column on the preliminary pass
+ if ( $PASS < $MAX_PASS ) {
+ my $col = $line->get_column( $jmax - 1 ) + $move;
- # try to at least line up the existing side comment location
- if ( $kmax > 0 && $move > 0 && $move < $avail ) {
- $line->increase_field_width( $maximum_field_index - 1, $move );
- $do_not_align = 0;
+ # but ignore columns too large for the starting line
+ if ( $col > $max_comment_column && $col < $column_limit ) {
+ $max_comment_column = $col;
+ }
}
- # reset side comment column if we can't align
+ # Make the changes on the final pass
else {
- forget_side_comment();
+ $line->increase_field_width( $jmax - 1, $move );
+
+ # remember this column for the next group
+ $last_side_comment_column = $line->get_column( $jmax - 1 );
}
+ } ## end loop over groups
+ } ## end loop over passes
+
+ # Find the last side comment
+ my $j_sc_last;
+ my $ng_last = $todo[-1];
+ my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
+ for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
+ my $line = $rlines->[$jj];
+ my $jmax = $line->get_jmax();
+ if ( $line->get_rfield_lengths()->[$jmax] ) {
+ $j_sc_last = $jj;
+ last;
}
}
- return $do_not_align;
+
+ # Save final side comment info for possible use by the next batch
+ if ( defined($j_sc_last) ) {
+ my $line_number =
+ $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
+ $self->[_last_side_comment_column_] = $last_side_comment_column;
+ $self->[_last_side_comment_line_number_] = $line_number;
+ $self->[_last_side_comment_level_] = $group_level;
+ }
+ return;
}
+###############################
+# CODE SECTION 6: Output Step A
+###############################
+
sub valign_output_step_A {
###############################################################
# been found. Then it is shipped to the next step.
###############################################################
- my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
- $extra_leading_spaces )
- = @_;
+ my ( $self, $rinput_hash ) = @_;
+
+ my $line = $rinput_hash->{line};
+ my $min_ci_gap = $rinput_hash->{min_ci_gap};
+ my $do_not_align = $rinput_hash->{do_not_align};
+ my $group_leader_length = $rinput_hash->{group_leader_length};
+ my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
+ my $level = $rinput_hash->{level};
+
my $rfields = $line->get_rfields();
+ my $rfield_lengths = $line->get_rfield_lengths();
my $leading_space_count = $line->get_leading_space_count();
my $outdent_long_lines = $line->get_outdent_long_lines();
my $maximum_field_index = $line->get_jmax();
my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
+ my $Kend = $line->get_Kend();
+ my $level_end = $line->get_level_end();
# add any extra spaces
if ( $leading_space_count > $group_leader_length ) {
$leading_space_count += $min_ci_gap;
}
- my $str = $rfields->[0];
+ my $str = $rfields->[0];
+ my $str_len = $rfield_lengths->[0];
# loop to concatenate all fields of this line and needed padding
my $total_pad_count = 0;
if (
( $j == $maximum_field_index )
&& ( !defined( $rfields->[$j] )
- || ( length( $rfields->[$j] ) == 0 ) )
+ || ( $rfield_lengths->[$j] == 0 ) )
);
# compute spaces of padding before this field
my $col = $line->get_column( $j - 1 );
- my $pad = $col - ( length($str) + $leading_space_count );
+ my $pad = $col - ( $str_len + $leading_space_count );
if ($do_not_align) {
$pad =
( $j < $maximum_field_index )
? 0
- : $rOpts_minimum_space_to_comment - 1;
+ : $self->[_rOpts_minimum_space_to_comment_] - 1;
}
# if the -fpsc flag is set, move the side comment to the selected
# column if and only if it is possible, ignoring constraints on
# line length and minimum space to comment
- if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
+ if ( $self->[_rOpts_fixed_position_side_comment_]
+ && $j == $maximum_field_index )
{
- my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
+ my $newpad =
+ $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
if ( $newpad >= 0 ) { $pad = $newpad; }
}
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
- # add this field
- if ( !defined $rfields->[$j] ) {
- write_diagnostics("UNDEFined field at j=$j\n");
- }
-
# only add padding when we have a finite field;
# this avoids extra terminal spaces if we have empty fields
- if ( length( $rfields->[$j] ) > 0 ) {
+ if ( $rfield_lengths->[$j] > 0 ) {
$str .= ' ' x $total_pad_count;
+ $str_len += $total_pad_count;
$total_pad_count = 0;
$str .= $rfields->[$j];
+ $str_len += $rfield_lengths->[$j];
}
else {
$total_pad_count = 0;
}
-
- # update side comment history buffer
- if ( $j == $maximum_field_index ) {
- my $lineno = $file_writer_object->get_output_line_number();
- shift @side_comment_history;
- push @side_comment_history, [ $lineno, $col ];
- }
}
- my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) );
+ my $side_comment_length = $rfield_lengths->[$maximum_field_index];
# ship this line off
- valign_output_step_B( $leading_space_count + $extra_leading_spaces,
- $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags, $group_level );
+ $self->valign_output_step_B(
+ {
+ leading_space_count => $leading_space_count + $extra_leading_spaces,
+ line => $str,
+ line_length => $str_len,
+ side_comment_length => $side_comment_length,
+ outdent_long_lines => $outdent_long_lines,
+ rvertical_tightness_flags => $rvertical_tightness_flags,
+ level => $level,
+ level_end => $level_end,
+ Kend => $Kend,
+ }
+ );
return;
}
-sub get_extra_leading_spaces {
+sub combine_fields {
- #----------------------------------------------------------
- # Define any extra indentation space (for the -lp option).
- # Here is why:
- # If a list has side comments, sub scan_list must dump the
- # list before it sees everything. When this happens, it sets
- # the indentation to the standard scheme, but notes how
- # many spaces it would have liked to use. We may be able
- # to recover that space here in the event that all of the
- # lines of a list are back together again.
- #----------------------------------------------------------
+ # We have a group of two lines for which we do not want to align tokens
+ # between index $imax_align and the side comment. So we will delete fields
+ # between $imax_align and the side comment. Alignments have already
+ # been set so we have to adjust them.
- my $extra_leading_spaces = 0;
- if ($extra_indent_ok) {
- my $object = $group_lines[0]->get_indentation();
- if ( ref($object) ) {
- my $extra_indentation_spaces_wanted =
- get_recoverable_spaces($object);
+ my ( $line_0, $line_1, $imax_align ) = @_;
- # all indentation objects must be the same
- for my $i ( 1 .. @group_lines - 1 ) {
- if ( $object != $group_lines[$i]->get_indentation() ) {
- $extra_indentation_spaces_wanted = 0;
- last;
- }
- }
+ if ( !defined($imax_align) ) { $imax_align = -1 }
- if ($extra_indentation_spaces_wanted) {
+ # First delete the unwanted tokens
+ my $jmax_old = $line_0->get_jmax();
+ my @old_alignments = $line_0->get_alignments();
+ my @idel = ( $imax_align + 1 .. $jmax_old - 2 );
- # the maximum space without exceeding the line length:
- my $avail = $group_lines[0]->get_available_space_on_right();
- $extra_leading_spaces =
- ( $avail > $extra_indentation_spaces_wanted )
- ? $extra_indentation_spaces_wanted
- : $avail;
+ return unless (@idel);
- # update the indentation object because with -icp the terminal
- # ');' will use the same adjustment.
- $object->permanently_decrease_available_spaces(
- -$extra_leading_spaces );
- }
- }
+ foreach my $line ( $line_0, $line_1 ) {
+ delete_selected_tokens( $line, \@idel );
}
- return $extra_leading_spaces;
-}
-
-sub combine_fields {
-
- # combine all fields except for the comment field ( sidecmt.t )
- # Uses global variables:
- # @group_lines
- my $maximum_field_index = $group_lines[0]->get_jmax();
- foreach my $line (@group_lines) {
- my $rfields = $line->get_rfields();
- foreach ( 1 .. $maximum_field_index - 1 ) {
- $rfields->[0] .= $rfields->[$_];
- }
- $rfields->[1] = $rfields->[$maximum_field_index];
-
- $line->set_jmax(1);
- $line->set_column( 0, 0 );
- $line->set_column( 1, 0 );
+ # Now adjust the alignments. Note that the side comment alignment
+ # is always at jmax-1, and there is an ending alignment at jmax.
+ my @new_alignments;
+ if ( $imax_align >= 0 ) {
+ @new_alignments[ 0 .. $imax_align ] =
+ @old_alignments[ 0 .. $imax_align ];
}
- $maximum_field_index = 1;
-
- foreach my $line (@group_lines) {
- my $rfields = $line->get_rfields();
- for my $k ( 0 .. $maximum_field_index ) {
- my $pad = length( $rfields->[$k] ) - $line->current_field_width($k);
- if ( $k == 0 ) {
- $pad += $line->get_leading_space_count();
- }
- if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
+ my $jmax_new = $line_0->get_jmax();
- }
- }
+ $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
+ $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
+ $line_0->set_alignments(@new_alignments);
+ $line_1->set_alignments(@new_alignments);
return;
}
sub get_output_line_number {
- # the output line number reported to a caller is the number of items
- # written plus the number of items in the buffer
- my $self = shift;
- my $nlines = @group_lines;
- return $nlines + $file_writer_object->get_output_line_number();
+ # The output line number reported to a caller =
+ # the number of items still in the buffer +
+ # the number of items written.
+ return $_[0]->group_line_count() +
+ $_[0]->[_file_writer_object_]->get_output_line_number();
}
-sub valign_output_step_B {
-
- ###############################################################
- # This is Step B in writing vertically aligned lines.
- # Vertical tightness is applied according to preset flags.
- # In particular this routine handles stacking of opening
- # and closing tokens.
- ###############################################################
+###############################
+# CODE SECTION 7: Output Step B
+###############################
+
+{ ## closure for sub valign_output_step_B
+
+ # These are values for a cache used by valign_output_step_B.
+ my $cached_line_text;
+ my $cached_line_text_length;
+ my $cached_line_type;
+ my $cached_line_flag;
+ my $cached_seqno;
+ my $cached_line_valid;
+ my $cached_line_leading_space_count;
+ my $cached_seqno_string;
+ my $cached_line_Kend;
+ my $seqno_string;
+ my $last_nonblank_seqno_string;
+
+ sub get_seqno_string {
+ return $seqno_string;
+ }
- my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags, $level )
- = @_;
+ sub get_last_nonblank_seqno_string {
+ return $last_nonblank_seqno_string;
+ }
- # handle outdenting of long lines:
- if ($outdent_long_lines) {
- my $excess =
- length($str) -
- $side_comment_length +
- $leading_space_count -
- maximum_line_length_for_level($level);
- if ( $excess > 0 ) {
- $leading_space_count = 0;
- $last_outdented_line_at =
- $file_writer_object->get_output_line_number();
+ sub set_last_nonblank_seqno_string {
+ my ($val) = @_;
+ $last_nonblank_seqno_string = $val;
+ return;
+ }
- unless ($outdented_line_count) {
- $first_outdented_line_at = $last_outdented_line_at;
- }
- $outdented_line_count++;
- }
+ sub get_cached_line_flag {
+ return $cached_line_flag;
}
- # Make preliminary leading whitespace. It could get changed
- # later by entabbing, so we have to keep track of any changes
- # to the leading_space_count from here on.
- my $leading_string =
- $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
+ sub get_cached_line_type {
+ return $cached_line_type;
+ }
- # Unpack any recombination data; it was packed by
- # sub send_lines_to_vertical_aligner. Contents:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false
- #
- my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
- $seqno_end );
- if ($rvertical_tightness_flags) {
- (
- $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
- $seqno_end
- ) = @{$rvertical_tightness_flags};
+ sub set_cached_line_valid {
+ my ($val) = @_;
+ $cached_line_valid = $val;
+ return;
}
- $seqno_string = $seqno_end;
+ sub get_cached_seqno {
+ return $cached_seqno;
+ }
- # handle any cached line ..
- # either append this line to it or write it out
- if ( length($cached_line_text) ) {
+ sub initialize_step_B_cache {
+
+ # valign_output_step_B cache:
+ $cached_line_text = "";
+ $cached_line_text_length = 0;
+ $cached_line_type = 0;
+ $cached_line_flag = 0;
+ $cached_seqno = 0;
+ $cached_line_valid = 0;
+ $cached_line_leading_space_count = 0;
+ $cached_seqno_string = "";
+ $cached_line_Kend = undef;
+
+ # These vars hold a string of sequence numbers joined together used by
+ # the cache
+ $seqno_string = "";
+ $last_nonblank_seqno_string = "";
+ return;
+ }
- # Dump an invalid cached line
- if ( !$cached_line_valid ) {
- valign_output_step_C( $cached_line_text,
+ sub _flush_cache {
+ my ($self) = @_;
+ if ($cached_line_type) {
+ $seqno_string = $cached_seqno_string;
+ $self->valign_output_step_C(
+ $cached_line_text,
$cached_line_leading_space_count,
- $last_level_written );
+ $self->[_last_level_written_],
+ $cached_line_Kend,
+ );
+ $cached_line_type = 0;
+ $cached_line_text = "";
+ $cached_line_text_length = 0;
+ $cached_seqno_string = "";
+ $cached_line_Kend = undef;
}
+ return;
+ }
- # Handle cached line ending in OPENING tokens
- elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
-
- my $gap = $leading_space_count - length($cached_line_text);
-
- # handle option of just one tight opening per line:
- if ( $cached_line_flag == 1 ) {
- if ( defined($open_or_close) && $open_or_close == 1 ) {
- $gap = -1;
+ sub valign_output_step_B {
+
+ ###############################################################
+ # This is Step B in writing vertically aligned lines.
+ # Vertical tightness is applied according to preset flags.
+ # In particular this routine handles stacking of opening
+ # and closing tokens.
+ ###############################################################
+
+ my ( $self, $rinput ) = @_;
+
+ my $leading_space_count = $rinput->{leading_space_count};
+ my $str = $rinput->{line};
+ my $str_length = $rinput->{line_length};
+ my $side_comment_length = $rinput->{side_comment_length};
+ my $outdent_long_lines = $rinput->{outdent_long_lines};
+ my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
+ my $level = $rinput->{level};
+ my $level_end = $rinput->{level_end};
+ my $Kend = $rinput->{Kend};
+
+ my $last_level_written = $self->[_last_level_written_];
+
+ # Useful -gcs test cases for wide characters are
+ # perl527/(method.t.2, reg_mesg.t, mime-header.t)
+
+ # handle outdenting of long lines:
+ my $is_outdented_line;
+ if ($outdent_long_lines) {
+ my $excess =
+ $str_length -
+ $side_comment_length +
+ $leading_space_count -
+ $self->maximum_line_length_for_level($level);
+ if ( $excess > 0 ) {
+ $leading_space_count = 0;
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $last_outdented_line_at =
+ $file_writer_object->get_output_line_number();
+ $self->[_last_outdented_line_at_] = $last_outdented_line_at;
+
+ my $outdented_line_count = $self->[_outdented_line_count_];
+ unless ($outdented_line_count) {
+ $self->[_first_outdented_line_at_] =
+ $last_outdented_line_at;
}
+ $outdented_line_count++;
+ $self->[_outdented_line_count_] = $outdented_line_count;
+ $is_outdented_line = 1;
}
+ }
+
+ # Make preliminary leading whitespace. It could get changed
+ # later by entabbing, so we have to keep track of any changes
+ # to the leading_space_count from here on.
+ my $leading_string =
+ $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
+ my $leading_string_length = length($leading_string);
+
+ # Unpack any recombination data; it was packed by
+ # sub send_lines_to_vertical_aligner. Contents:
+ #
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
+ # if closing: spaces of padding to use
+ # [2] sequence number of container
+ # [3] valid flag: do not append if this flag is false
+ #
+ my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+ $seqno_end );
+ if ($rvertical_tightness_flags) {
+ (
+ $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+ $seqno_end
+ ) = @{$rvertical_tightness_flags};
+ }
- if ( $gap >= 0 && defined($seqno_beg) ) {
- $leading_string = $cached_line_text . ' ' x $gap;
- $leading_space_count = $cached_line_leading_space_count;
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
- $level = $last_level_written;
+ $seqno_string = $seqno_end;
+
+ # handle any cached line ..
+ # either append this line to it or write it out
+ # Note: the function length() is used in this next test out of caution.
+ # All testing has shown that the variable $cached_line_text_length is
+ # correct, but its calculation is complex and a loss of cached text
+ # would be a disaster.
+ if ( length($cached_line_text) ) {
+
+ # Dump an invalid cached line
+ if ( !$cached_line_valid ) {
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
}
- else {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+
+ # Handle cached line ending in OPENING tokens
+ elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
+
+ my $gap = $leading_space_count - $cached_line_text_length;
+
+ # handle option of just one tight opening per line:
+ if ( $cached_line_flag == 1 ) {
+ if ( defined($open_or_close) && $open_or_close == 1 ) {
+ $gap = -1;
+ }
+ }
+
+ # Do not join the lines if this might produce a one-line
+ # container which exceeds the maximum line length. This is
+ # necessary prevent blinking, particularly with the combination
+ # -xci -pvt=2. In that case a one-line block alternately forms
+ # and breaks, causing -xci to alternately turn on and off (case
+ # b765).
+ # Patched to fix cases b656 b862 b971 b972: always do the check
+ # if -vmll is set. The reason is that the -vmll option can
+ # cause changes in the maximum line length, leading to blinkers
+ # if not checked.
+ if (
+ $gap >= 0
+ && ( $self->[_rOpts_variable_maximum_line_length_]
+ || ( defined($level_end) && $level > $level_end ) )
+ )
+ {
+ my $test_line_length =
+ $cached_line_text_length + $gap + $str_length;
+ my $maximum_line_length =
+ $self->maximum_line_length_for_level($last_level_written);
+
+ # Add a small tolerance in the length test (fixes case b862)
+ if ( $test_line_length > $maximum_line_length - 2 ) {
+ $gap = -1;
+ }
+ }
+
+ if ( $gap >= 0 && defined($seqno_beg) ) {
+ $leading_string = $cached_line_text . ' ' x $gap;
+ $leading_string_length = $cached_line_text_length + $gap;
+ $leading_space_count = $cached_line_leading_space_count;
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+ $level = $last_level_written;
+ }
+ else {
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
+ }
}
- }
- # Handle cached line ending in CLOSING tokens
- else {
- my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
- if (
+ # Handle cached line ending in CLOSING tokens
+ else {
+ my $test_line =
+ $cached_line_text . ' ' x $cached_line_flag . $str;
+ my $test_line_length =
+ $cached_line_text_length + $cached_line_flag + $str_length;
+ if (
- # The new line must start with container
- $seqno_beg
+ # The new line must start with container
+ $seqno_beg
- # The container combination must be okay..
- && (
+ # The container combination must be okay..
+ && (
- # okay to combine like types
- ( $open_or_close == $cached_line_type )
+ # okay to combine like types
+ ( $open_or_close == $cached_line_type )
- # closing block brace may append to non-block
- || ( $cached_line_type == 2 && $open_or_close == 4 )
+ # closing block brace may append to non-block
+ || ( $cached_line_type == 2 && $open_or_close == 4 )
- # something like ');'
- || ( !$open_or_close && $cached_line_type == 2 )
+ # something like ');'
+ || ( !$open_or_close && $cached_line_type == 2 )
- )
+ )
- # The combined line must fit
- && (
- length($test_line) <=
- maximum_line_length_for_level($last_level_written) )
- )
- {
+ # The combined line must fit
+ && (
+ $test_line_length <=
+ $self->maximum_line_length_for_level(
+ $last_level_written)
+ )
+ )
+ {
- $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
-
- # Patch to outdent closing tokens ending # in ');'
- # If we are joining a line like ');' to a previous stacked
- # set of closing tokens, then decide if we may outdent the
- # combined stack to the indentation of the ');'. Since we
- # should not normally outdent any of the other tokens more than
- # the indentation of the lines that contained them, we will
- # only do this if all of the corresponding opening
- # tokens were on the same line. This can happen with
- # -sot and -sct. For example, it is ok here:
- # __PACKAGE__->load_components( qw(
- # PK::Auto
- # Core
- # ));
- #
- # But, for example, we do not outdent in this example because
- # that would put the closing sub brace out farther than the
- # opening sub brace:
- #
- # perltidy -sot -sct
- # $c->Tk::bind(
- # '<Control-f>' => sub {
- # my ($c) = @_;
- # my $e = $c->XEvent;
- # itemsUnderArea $c;
- # } );
- #
- if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
-
- # The way to tell this is if the stacked sequence numbers
- # of this output line are the reverse of the stacked
- # sequence numbers of the previous non-blank line of
- # sequence numbers. So we can join if the previous
- # nonblank string of tokens is the mirror image. For
- # example if stack )}] is 13:8:6 then we are looking for a
- # leading stack like [{( which is 6:8:13 We only need to
- # check the two ends, because the intermediate tokens must
- # fall in order. Note on speed: having to split on colons
- # and eliminate multiple colons might appear to be slow,
- # but it's not an issue because we almost never come
- # through here. In a typical file we don't.
- $seqno_string =~ s/^:+//;
- $last_nonblank_seqno_string =~ s/^:+//;
- $seqno_string =~ s/:+/:/g;
- $last_nonblank_seqno_string =~ s/:+/:/g;
-
- # how many spaces can we outdent?
- my $diff =
- $cached_line_leading_space_count - $leading_space_count;
- if ( $diff > 0
- && length($seqno_string)
- && length($last_nonblank_seqno_string) ==
- length($seqno_string) )
+ $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+
+ # Patch to outdent closing tokens ending # in ');' If we
+ # are joining a line like ');' to a previous stacked set of
+ # closing tokens, then decide if we may outdent the
+ # combined stack to the indentation of the ');'. Since we
+ # should not normally outdent any of the other tokens more
+ # than the indentation of the lines that contained them, we
+ # will only do this if all of the corresponding opening
+ # tokens were on the same line. This can happen with -sot
+ # and -sct.
+
+ # For example, it is ok here:
+ # __PACKAGE__->load_components( qw(
+ # PK::Auto
+ # Core
+ # ));
+ #
+ # But, for example, we do not outdent in this example
+ # because that would put the closing sub brace out farther
+ # than the opening sub brace:
+ #
+ # perltidy -sot -sct
+ # $c->Tk::bind(
+ # '<Control-f>' => sub {
+ # my ($c) = @_;
+ # my $e = $c->XEvent;
+ # itemsUnderArea $c;
+ # } );
+ #
+ if ( $str =~ /^\);/
+ && $cached_line_text =~ /^[\)\}\]\s]*$/ )
{
- my @seqno_last =
- ( split /:/, $last_nonblank_seqno_string );
- my @seqno_now = ( split /:/, $seqno_string );
- if ( @seqno_now
- && @seqno_last
- && $seqno_now[-1] == $seqno_last[0]
- && $seqno_now[0] == $seqno_last[-1] )
- {
- # OK to outdent ..
- # for absolute safety, be sure we only remove
- # whitespace
- my $ws = substr( $test_line, 0, $diff );
- if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
-
- $test_line = substr( $test_line, $diff );
- $cached_line_leading_space_count -= $diff;
- $last_level_written =
- level_change(
- $cached_line_leading_space_count,
- $diff, $last_level_written );
- reduce_valign_buffer_indentation($diff);
+ # The way to tell this is if the stacked sequence
+ # numbers of this output line are the reverse of the
+ # stacked sequence numbers of the previous non-blank
+ # line of sequence numbers. So we can join if the
+ # previous nonblank string of tokens is the mirror
+ # image. For example if stack )}] is 13:8:6 then we
+ # are looking for a leading stack like [{( which
+ # is 6:8:13. We only need to check the two ends,
+ # because the intermediate tokens must fall in order.
+ # Note on speed: having to split on colons and
+ # eliminate multiple colons might appear to be slow,
+ # but it's not an issue because we almost never come
+ # through here. In a typical file we don't.
+
+ $seqno_string =~ s/^:+//;
+ $last_nonblank_seqno_string =~ s/^:+//;
+ $seqno_string =~ s/:+/:/g;
+ $last_nonblank_seqno_string =~ s/:+/:/g;
+
+ # how many spaces can we outdent?
+ my $diff =
+ $cached_line_leading_space_count -
+ $leading_space_count;
+ if ( $diff > 0
+ && length($seqno_string)
+ && length($last_nonblank_seqno_string) ==
+ length($seqno_string) )
+ {
+ my @seqno_last =
+ ( split /:/, $last_nonblank_seqno_string );
+ my @seqno_now = ( split /:/, $seqno_string );
+ if ( @seqno_now
+ && @seqno_last
+ && $seqno_now[-1] == $seqno_last[0]
+ && $seqno_now[0] == $seqno_last[-1] )
+ {
+
+ # OK to outdent ..
+ # for absolute safety, be sure we only remove
+ # whitespace
+ my $ws = substr( $test_line, 0, $diff );
+ if ( ( length($ws) == $diff )
+ && $ws =~ /^\s+$/ )
+ {
+
+ $test_line = substr( $test_line, $diff );
+ $cached_line_leading_space_count -= $diff;
+ $last_level_written =
+ $self->level_change(
+ $cached_line_leading_space_count,
+ $diff, $last_level_written );
+ $self->reduce_valign_buffer_indentation(
+ $diff);
+ }
+
+ # shouldn't happen, but not critical:
+ ##else {
+ ## ERROR transferring indentation here
+ ##}
}
-
- # shouldn't happen, but not critical:
- ##else {
- ## ERROR transferring indentation here
- ##}
}
}
- }
- $str = $test_line;
- $leading_string = "";
- $leading_space_count = $cached_line_leading_space_count;
- $level = $last_level_written;
+ $str = $test_line;
+ $str_length = $test_line_length;
+ $leading_string = "";
+ $leading_string_length = 0;
+ $leading_space_count = $cached_line_leading_space_count;
+ $level = $last_level_written;
+ }
+ else {
+ $self->valign_output_step_C(
+ $cached_line_text, $cached_line_leading_space_count,
+ $last_level_written, $cached_line_Kend
+ );
+ }
}
- else {
- valign_output_step_C( $cached_line_text,
- $cached_line_leading_space_count,
- $last_level_written );
+ }
+ $cached_line_type = 0;
+ $cached_line_text = "";
+ $cached_line_text_length = 0;
+ $cached_line_Kend = undef;
+
+ # make the line to be written
+ my $line = $leading_string . $str;
+ my $line_length = $leading_string_length + $str_length;
+
+ # Safety check: be sure that a line to be cached as a stacked block
+ # brace line ends in the appropriate opening or closing block brace.
+ # This should always be the case if the caller set flags correctly.
+ # Code '3' is for -sobb, code '4' is for -scbb.
+ if ($open_or_close) {
+ if ( $open_or_close == 3 && $line !~ /\{\s*$/
+ || $open_or_close == 4 && $line !~ /\}\s*$/ )
+ {
+ $open_or_close = 0;
}
}
- }
- $cached_line_type = 0;
- $cached_line_text = "";
- # make the line to be written
- my $line = $leading_string . $str;
+ # write or cache this line ...
+ # fix for case b999: do not cache an outdented line
+ if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
+ {
+ $self->valign_output_step_C( $line, $leading_space_count, $level,
+ $Kend );
+ }
+ else {
+ $cached_line_text = $line;
+ $cached_line_text_length = $line_length;
+ $cached_line_type = $open_or_close;
+ $cached_line_flag = $tightness_flag;
+ $cached_seqno = $seqno;
+ $cached_line_valid = $valid;
+ $cached_line_leading_space_count = $leading_space_count;
+ $cached_seqno_string = $seqno_string;
+ $cached_line_Kend = $Kend;
+ }
- # write or cache this line
- if ( !$open_or_close || $side_comment_length > 0 ) {
- valign_output_step_C( $line, $leading_space_count, $level );
+ $self->[_last_level_written_] = $level;
+ $self->[_last_side_comment_length_] = $side_comment_length;
+ return;
}
- else {
- $cached_line_text = $line;
- $cached_line_type = $open_or_close;
- $cached_line_flag = $tightness_flag;
- $cached_seqno = $seqno;
- $cached_line_valid = $valid;
- $cached_line_leading_space_count = $leading_space_count;
- $cached_seqno_string = $seqno_string;
- }
-
- $last_level_written = $level;
- $last_side_comment_length = $side_comment_length;
- $extra_indent_ok = 0;
- return;
}
-sub valign_output_step_C {
+###############################
+# CODE SECTION 8: Output Step C
+###############################
- ###############################################################
- # This is Step C in writing vertically aligned lines.
- # Lines are either stored in a buffer or passed along to the next step.
- # The reason for storing lines is that we may later want to reduce their
- # indentation when -sot and -sct are both used.
- ###############################################################
- my @args = @_;
+{ ## closure for sub valign_output_step_C
- # Dump any saved lines if we see a line with an unbalanced opening or
- # closing token.
- dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
+ # Vertical alignment buffer used by valign_output_step_C
+ my $valign_buffer_filling;
+ my @valign_buffer;
- # Either store or write this line
- if ($valign_buffer_filling) {
- push @valign_buffer, [@args];
+ sub initialize_valign_buffer {
+ @valign_buffer = ();
+ $valign_buffer_filling = "";
+ return;
}
- else {
- valign_output_step_D(@args);
+
+ sub dump_valign_buffer {
+ my ($self) = @_;
+ if (@valign_buffer) {
+ foreach (@valign_buffer) {
+ $self->valign_output_step_D( @{$_} );
+ }
+ @valign_buffer = ();
+ }
+ $valign_buffer_filling = "";
+ return;
+ }
+
+ sub reduce_valign_buffer_indentation {
+
+ my ( $self, $diff ) = @_;
+ if ( $valign_buffer_filling && $diff ) {
+ my $max_valign_buffer = @valign_buffer;
+ foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
+ my ( $line, $leading_space_count, $level, $Kend ) =
+ @{ $valign_buffer[$i] };
+ my $ws = substr( $line, 0, $diff );
+ if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+ $line = substr( $line, $diff );
+ }
+ if ( $leading_space_count >= $diff ) {
+ $leading_space_count -= $diff;
+ $level =
+ $self->level_change( $leading_space_count, $diff,
+ $level );
+ }
+ $valign_buffer[$i] =
+ [ $line, $leading_space_count, $level, $Kend ];
+ }
+ }
+ return;
}
- # For lines starting or ending with opening or closing tokens..
- if ($seqno_string) {
- $last_nonblank_seqno_string = $seqno_string;
+ sub valign_output_step_C {
- # Start storing lines when we see a line with multiple stacked opening
- # tokens.
- # patch for RT #94354, requested by Colin Williams
- if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
- {
+ ###############################################################
+ # This is Step C in writing vertically aligned lines.
+ # Lines are either stored in a buffer or passed along to the next step.
+ # The reason for storing lines is that we may later want to reduce their
+ # indentation when -sot and -sct are both used.
+ ###############################################################
+ my ( $self, @args ) = @_;
+
+ my $seqno_string = get_seqno_string();
+ my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
+
+ # Dump any saved lines if we see a line with an unbalanced opening or
+ # closing token.
+ $self->dump_valign_buffer()
+ if ( $seqno_string && $valign_buffer_filling );
+
+ # Either store or write this line
+ if ($valign_buffer_filling) {
+ push @valign_buffer, [@args];
+ }
+ else {
+ $self->valign_output_step_D(@args);
+ }
+
+ # For lines starting or ending with opening or closing tokens..
+ if ($seqno_string) {
+ $last_nonblank_seqno_string = $seqno_string;
+ set_last_nonblank_seqno_string($seqno_string);
+
+ # Start storing lines when we see a line with multiple stacked
+ # opening tokens.
+ # patch for RT #94354, requested by Colin Williams
+ if ( $seqno_string =~ /^\d+(\:+\d+)+$/
+ && $args[0] !~ /^[\}\)\]\:\?]/ )
+ {
- # This test is efficient but a little subtle: The first test says
- # that we have multiple sequence numbers and hence multiple opening
- # or closing tokens in this line. The second part of the test
- # rejects stacked closing and ternary tokens. So if we get here
- # then we should have stacked unbalanced opening tokens.
+ # This test is efficient but a little subtle: The first test
+ # says that we have multiple sequence numbers and hence
+ # multiple opening or closing tokens in this line. The second
+ # part of the test rejects stacked closing and ternary tokens.
+ # So if we get here then we should have stacked unbalanced
+ # opening tokens.
- # Here is a complex example:
+ # Here is a complex example:
- # Foo($Bar[0], { # (side comment)
- # baz => 1,
- # });
+ # Foo($Bar[0], { # (side comment)
+ # baz => 1,
+ # });
- # The first line has sequence 6::4. It does not begin with
- # a closing token or ternary, so it passes the test and must be
- # stacked opening tokens.
+ # The first line has sequence 6::4. It does not begin with
+ # a closing token or ternary, so it passes the test and must be
+ # stacked opening tokens.
- # The last line has sequence 4:6 but is a stack of closing tokens,
- # so it gets rejected.
+ # The last line has sequence 4:6 but is a stack of closing
+ # tokens, so it gets rejected.
- # Note that the sequence number of an opening token for a qw quote
- # is a negative number and will be rejected.
- # For example, for the following line:
- # skip_symbols([qw(
- # $seqno_string='10:5:-1'. It would be okay to accept it but
- # I decided not to do this after testing.
+ # Note that the sequence number of an opening token for a qw
+ # quote is a negative number and will be rejected. For
+ # example, for the following line: skip_symbols([qw(
+ # $seqno_string='10:5:-1'. It would be okay to accept it but I
+ # decided not to do this after testing.
- $valign_buffer_filling = $seqno_string;
+ $valign_buffer_filling = $seqno_string;
+ }
}
+ return;
}
- return;
}
+###############################
+# CODE SECTION 9: Output Step D
+###############################
+
sub valign_output_step_D {
###############################################################
# This is Step D in writing vertically aligned lines.
+ # It is the end of the vertical alignment pipeline.
# Write one vertically aligned line of code to the output object.
###############################################################
- my ( $line, $leading_space_count, $level ) = @_;
+ my ( $self, $line, $leading_space_count, $level, $Kend ) = @_;
# The line is currently correct if there is no tabbing (recommended!)
# We may have to lop off some leading spaces and replace with tabs.
if ( $leading_space_count > 0 ) {
+ my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
+ my $rOpts_tabs = $self->[_rOpts_tabs_];
+ my $rOpts_entab_leading_whitespace =
+ $self->[_rOpts_entab_leading_whitespace_];
+
# Nothing to do if no tabs
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
# Handle entab option
elsif ($rOpts_entab_leading_whitespace) {
- # Patch 12-nov-2018 based on report from Glenn. Extra padding was
- # not correctly entabbed, nor were side comments:
- # Increase leading space count for a padded line to get correct tabbing
+ # Patch 12-nov-2018 based on report from Glenn. Extra padding was
+ # not correctly entabbed, nor were side comments: Increase leading
+ # space count for a padded line to get correct tabbing
if ( $line =~ /^(\s+)(.*)$/ ) {
my $spaces = length($1);
if ( $spaces > $leading_space_count ) {
# shouldn't happen - program error counting whitespace
# - skip entabbing
- VALIGN_DEBUG_FLAG_TABS
- && warning(
+ DEBUG_TABS
+ && $self->warning(
"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
);
}
# But it could be an outdented comment
if ( $line !~ /^\s*#/ ) {
- VALIGN_DEBUG_FLAG_TABS
- && warning(
-"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
+ DEBUG_TABS
+ && $self->warning(
+"Error entabbing in valign_output_step_D: for level=$level count=$leading_space_count\n"
);
}
$leading_string = ( ' ' x $leading_space_count );
# shouldn't happen - program error counting whitespace
# we'll skip entabbing
- VALIGN_DEBUG_FLAG_TABS
- && warning(
+ DEBUG_TABS
+ && $self->warning(
"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
);
}
}
}
- $file_writer_object->write_code_line( $line . "\n" );
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line( $line . "\n", $Kend );
+
return;
}
-{ # begin get_leading_string
+{ ## closure for sub get_leading_string
my @leading_string_cache;
+ sub initialize_leading_string_cache {
+ @leading_string_cache = ();
+ return;
+ }
+
sub get_leading_string {
# define the leading whitespace string for this line..
- my $leading_whitespace_count = shift;
+ my ( $self, $leading_whitespace_count, $group_level ) = @_;
# Handle case of zero whitespace, which includes multi-line quotes
# (which may have a finite level; this prevents tab problems)
my $leading_string;
# Handle simple case of no tabs
+ my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
+ my $rOpts_tabs = $self->[_rOpts_tabs_];
+ my $rOpts_entab_leading_whitespace =
+ $self->[_rOpts_entab_leading_whitespace_];
+
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
# shouldn't happen:
if ( $space_count < 0 ) {
- VALIGN_DEBUG_FLAG_TABS
- && warning(
+ DEBUG_TABS
+ && $self->warning(
"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
);
}
} # end get_leading_string
+##########################
+# CODE SECTION 10: Summary
+##########################
+
sub report_anything_unusual {
my $self = shift;
+
+ my $outdented_line_count = $self->[_outdented_line_count_];
if ( $outdented_line_count > 0 ) {
- write_logfile_entry(
+ $self->write_logfile_entry(
"$outdented_line_count long lines were outdented:\n");
- write_logfile_entry(
+ my $first_outdented_line_at = $self->[_first_outdented_line_at_];
+ $self->write_logfile_entry(
" First at output line $first_outdented_line_at\n");
if ( $outdented_line_count > 1 ) {
- write_logfile_entry(
+ my $last_outdented_line_at = $self->[_last_outdented_line_at_];
+ $self->write_logfile_entry(
" Last at output line $last_outdented_line_at\n");
}
- write_logfile_entry(
+ $self->write_logfile_entry(
" use -noll to prevent outdenting, -l=n to increase line length\n"
);
- write_logfile_entry("\n");
+ $self->write_logfile_entry("\n");
}
return;
}