# The Perl::Tidy::Formatter package adds indentation, whitespace, and
# line breaks to the token stream
#
-# WARNING: This is not a real class for speed reasons. Only one
-# Formatter may be used.
-#
#####################################################################
+# Index...
+# CODE SECTION 1: Preliminary code, global definitions and sub new
+# sub new
+# CODE SECTION 2: Some Basic Utilities
+# CODE SECTION 3: Check and process options
+# sub check_options
+# CODE SECTION 4: Receive lines from the tokenizer
+# sub write_line
+# CODE SECTION 5: Pre-process the entire file
+# sub finish_formatting
+# CODE SECTION 6: Process line-by-line
+# sub process_all_lines
+# CODE SECTION 7: Process lines of code
+# process_line_of_CODE
+# CODE SECTION 8: Utilities for setting breakpoints
+# sub set_forced_breakpoint
+# CODE SECTION 9: Process batches of code
+# sub grind_batch_of_CODE
+# CODE SECTION 10: Code to break long statments
+# sub break_long_lines
+# CODE SECTION 11: Code to break long lists
+# sub break_lists
+# CODE SECTION 12: Code for setting indentation
+# CODE SECTION 13: Preparing batch of lines for vertical alignment
+# sub convey_batch_to_vertical_aligner
+# CODE SECTION 14: Code for creating closing side comments
+# sub add_closing_side_comment
+# CODE SECTION 15: Summarize
+# sub wrapup
+
+#######################################################################
+# CODE SECTION 1: Preliminary code and global definitions up to sub new
+#######################################################################
+
package Perl::Tidy::Formatter;
use strict;
use warnings;
+
+# This flag gets switched on during automated testing for extra checking
+use constant DEVEL_MODE => 0;
+
+{ #<<< A non-indenting brace to contain all lexical variables
+
use Carp;
-our $VERSION = '20200110';
+our $VERSION = '20220217';
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
+sub AUTOLOAD {
+
+ # Catch any undefined sub calls so that we are sure to get
+ # some diagnostic information. This sub should never be called
+ # except for a programming error.
+ our $AUTOLOAD;
+ return if ( $AUTOLOAD =~ /\bDESTROY$/ );
+ my ( $pkg, $fname, $lno ) = caller();
+ my $my_package = __PACKAGE__;
+ print STDERR <<EOM;
+======================================================================
+Error detected in package '$my_package', version $VERSION
+Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
+Called from package: '$pkg'
+Called from File '$fname' at line '$lno'
+This error is probably due to a recent programming change
+======================================================================
+EOM
+ exit 1;
+}
+
+sub DESTROY {
+ my $self = shift;
+ $self->_decrement_count();
+ return;
+}
+
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
return;
}
+sub Fault {
+ my ($msg) = @_;
+
+ # This routine is called for errors that really should not occur
+ # except if there has been a bug introduced by a recent program change.
+ # Please add comments at calls to Fault to explain why the call
+ # should not occur, and where to look to fix it.
+ my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+ my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+ my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+ my $input_stream_name = get_input_stream_name();
+
+ Die(<<EOM);
+==============================================================================
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
+==============================================================================
+EOM
+
+ # We shouldn't get here, but this return is to keep Perl-Critic from
+ # complaining.
+ return;
+}
+
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
croak "unexpected return from Perl::Tidy::Exit";
}
-BEGIN {
+# Global variables ...
+my (
+
+ #-----------------------------------------------------------------
+ # Section 1: Global variables which are either always constant or
+ # are constant after being configured by user-supplied
+ # parameters. They remain constant as a file is being processed.
+ #-----------------------------------------------------------------
+
+ # user parameters and shortcuts
+ $rOpts,
+ $rOpts_add_newlines,
+ $rOpts_add_whitespace,
+ $rOpts_blank_lines_after_opening_block,
+ $rOpts_block_brace_tightness,
+ $rOpts_block_brace_vertical_tightness,
+ $rOpts_break_after_labels,
+ $rOpts_break_at_old_attribute_breakpoints,
+ $rOpts_break_at_old_comma_breakpoints,
+ $rOpts_break_at_old_keyword_breakpoints,
+ $rOpts_break_at_old_logical_breakpoints,
+ $rOpts_break_at_old_semicolon_breakpoints,
+ $rOpts_break_at_old_ternary_breakpoints,
+ $rOpts_break_open_paren_list,
+ $rOpts_closing_side_comments,
+ $rOpts_closing_side_comment_else_flag,
+ $rOpts_closing_side_comment_maximum_text,
+ $rOpts_comma_arrow_breakpoints,
+ $rOpts_continuation_indentation,
+ $rOpts_delete_closing_side_comments,
+ $rOpts_delete_old_whitespace,
+ $rOpts_delete_side_comments,
+ $rOpts_extended_continuation_indentation,
+ $rOpts_format_skipping,
+ $rOpts_freeze_whitespace,
+ $rOpts_function_paren_vertical_alignment,
+ $rOpts_fuzzy_line_length,
+ $rOpts_ignore_old_breakpoints,
+ $rOpts_ignore_side_comment_lengths,
+ $rOpts_indent_closing_brace,
+ $rOpts_indent_columns,
+ $rOpts_indent_only,
+ $rOpts_keep_interior_semicolons,
+ $rOpts_line_up_parentheses,
+ $rOpts_logical_padding,
+ $rOpts_maximum_consecutive_blank_lines,
+ $rOpts_maximum_fields_per_table,
+ $rOpts_maximum_line_length,
+ $rOpts_one_line_block_semicolons,
+ $rOpts_opening_brace_always_on_right,
+ $rOpts_outdent_keywords,
+ $rOpts_outdent_labels,
+ $rOpts_outdent_long_comments,
+ $rOpts_outdent_long_quotes,
+ $rOpts_outdent_static_block_comments,
+ $rOpts_recombine,
+ $rOpts_short_concatenation_item_length,
+ $rOpts_stack_closing_block_brace,
+ $rOpts_static_block_comments,
+ $rOpts_sub_alias_list,
+ $rOpts_tee_block_comments,
+ $rOpts_tee_pod,
+ $rOpts_tee_side_comments,
+ $rOpts_variable_maximum_line_length,
+ $rOpts_valign,
+ $rOpts_valign_code,
+ $rOpts_valign_side_comments,
+ $rOpts_whitespace_cycle,
+ $rOpts_extended_line_up_parentheses,
+
+ # Static hashes initialized in a BEGIN block
+ %is_assignment,
+ %is_if_unless_and_or_last_next_redo_return,
+ %is_if_elsif_else_unless_while_until_for_foreach,
+ %is_if_unless_while_until_for_foreach,
+ %is_last_next_redo_return,
+ %is_if_unless,
+ %is_and_or,
+ %is_chain_operator,
+ %is_block_without_semicolon,
+ %ok_to_add_semicolon_for_block_type,
+ %is_opening_type,
+ %is_closing_type,
+ %is_opening_token,
+ %is_closing_token,
+ %is_equal_or_fat_comma,
+ %is_counted_type,
+ %is_opening_sequence_token,
+ %is_closing_sequence_token,
+ %is_container_label_type,
+
+ @all_operators,
+
+ # Initialized in check_options. These are constants and could
+ # just as well be initialized in a BEGIN block.
+ %is_do_follower,
+ %is_if_brace_follower,
+ %is_else_brace_follower,
+ %is_anon_sub_brace_follower,
+ %is_anon_sub_1_brace_follower,
+ %is_other_brace_follower,
+
+ # Initialized and re-initialized in sub initialize_grep_and_friends;
+ # These can be modified by grep-alias-list
+ %is_sort_map_grep,
+ %is_sort_map_grep_eval,
+ %is_sort_map_grep_eval_do,
+ %is_block_with_ci,
+ %is_keyword_returning_list,
+ %block_type_map,
+
+ # Initialized in sub initialize_whitespace_hashes;
+ # Some can be modified according to user parameters.
+ %binary_ws_rules,
+ %want_left_space,
+ %want_right_space,
+
+ # Configured in sub initialize_bond_strength_hashes
+ %right_bond_strength,
+ %left_bond_strength,
+
+ # Hashes for -kbb=s and -kba=s
+ %keep_break_before_type,
+ %keep_break_after_type,
+
+ # Initialized in check_options, modified by prepare_cuddled_block_types:
+ %want_one_line_block,
+
+ # Initialized in sub prepare_cuddled_block_types
+ $rcuddled_block_types,
+
+ # Initialized and configured in check_optioms
+ %outdent_keyword,
+ %keyword_paren_inner_tightness,
+
+ %want_break_before,
+
+ %break_before_container_types,
+ %container_indentation_options,
+
+ %space_after_keyword,
+
+ %tightness,
+ %matching_token,
+
+ %opening_vertical_tightness,
+ %closing_vertical_tightness,
+ %closing_token_indentation,
+ $some_closing_token_indentation,
+
+ %opening_token_right,
+ %stack_opening_token,
+ %stack_closing_token,
+
+ %weld_nested_exclusion_rules,
+ %line_up_parentheses_control_hash,
+ $line_up_parentheses_control_is_lxpl,
+
+ # regex patterns for text identification.
+ # Most are initialized in a sub make_**_pattern during configuration.
+ # Most can be configured by user parameters.
+ $SUB_PATTERN,
+ $ASUB_PATTERN,
+ $static_block_comment_pattern,
+ $static_side_comment_pattern,
+ $format_skipping_pattern_begin,
+ $format_skipping_pattern_end,
+ $non_indenting_brace_pattern,
+ $bl_exclusion_pattern,
+ $bl_pattern,
+ $bli_exclusion_pattern,
+ $bli_pattern,
+ $block_brace_vertical_tightness_pattern,
+ $blank_lines_after_opening_block_pattern,
+ $blank_lines_before_closing_block_pattern,
+ $keyword_group_list_pattern,
+ $keyword_group_list_comment_pattern,
+ $closing_side_comment_prefix_pattern,
+ $closing_side_comment_list_pattern,
+
+ # Table to efficiently find indentation and max line length
+ # from level.
+ @maximum_line_length_at_level,
+ @maximum_text_length_at_level,
+ $stress_level_alpha,
+ $stress_level_beta,
+
+ # Total number of sequence items in a weld, for quick checks
+ $total_weld_count,
+
+ #--------------------------------------------------------
+ # Section 2: Work arrays for the current batch of tokens.
+ #--------------------------------------------------------
+
+ # These are re-initialized for each batch of code
+ # in sub initialize_batch_variables.
+ $max_index_to_go,
+ @block_type_to_go,
+ @type_sequence_to_go,
+ @bond_strength_to_go,
+ @forced_breakpoint_to_go,
+ @token_lengths_to_go,
+ @summed_lengths_to_go,
+ @levels_to_go,
+ @leading_spaces_to_go,
+ @reduced_spaces_to_go,
+ @standard_spaces_to_go,
+ @mate_index_to_go,
+ @ci_levels_to_go,
+ @nesting_depth_to_go,
+ @nobreak_to_go,
+ @old_breakpoint_to_go,
+ @tokens_to_go,
+ @K_to_go,
+ @types_to_go,
+ @inext_to_go,
+ @iprev_to_go,
+ @parent_seqno_to_go,
+
+);
- # Codes for insertion and deletion of blanks
- use constant DELETE => 0;
- use constant STABLE => 1;
- use constant INSERT => 2;
+BEGIN {
- # Caution: these debug flags produce a lot of output
- # They should all be 0 except when debugging small scripts
- use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
- use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
- use constant FORMATTER_DEBUG_FLAG_BOND => 0;
- use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_CI => 0;
- use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
- use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
- use constant FORMATTER_DEBUG_FLAG_LIST => 0;
- use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
- use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
- use constant FORMATTER_DEBUG_FLAG_STORE => 0;
- use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
- use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
-
- my $debug_warning = sub {
- print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
+ # Index names for token variables.
+ # Do not combine with other BEGIN blocks (c101).
+ my $i = 0;
+ use constant {
+ _CI_LEVEL_ => $i++,
+ _CUMULATIVE_LENGTH_ => $i++,
+ _LINE_INDEX_ => $i++,
+ _KNEXT_SEQ_ITEM_ => $i++,
+ _LEVEL_ => $i++,
+ _TOKEN_ => $i++,
+ _TOKEN_LENGTH_ => $i++,
+ _TYPE_ => $i++,
+ _TYPE_SEQUENCE_ => $i++,
+
+ # Number of token variables; must be last in list:
+ _NVARS => $i++,
};
-
- FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
- FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
- FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
- FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
- FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
- FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
- FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
- FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
- FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
- FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
- FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
- FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
- FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
- FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
}
-use vars qw{
-
- @gnu_stack
- $max_gnu_stack_index
- $gnu_position_predictor
- $line_start_index_to_go
- $last_indentation_written
- $last_unadjusted_indentation
- $last_leading_token
- $last_output_short_opening_token
- $peak_batch_size
-
- $saw_VERSION_in_this_file
- $saw_END_or_DATA_
-
- @gnu_item_list
- $max_gnu_item_index
- $gnu_sequence_number
- $last_output_indentation
- %last_gnu_equals
- %gnu_comma_count
- %gnu_arrow_count
-
- @block_type_to_go
- @type_sequence_to_go
- @container_environment_to_go
- @bond_strength_to_go
- @forced_breakpoint_to_go
- @token_lengths_to_go
- @summed_lengths_to_go
- @levels_to_go
- @leading_spaces_to_go
- @reduced_spaces_to_go
- @mate_index_to_go
- @ci_levels_to_go
- @nesting_depth_to_go
- @nobreak_to_go
- @old_breakpoint_to_go
- @tokens_to_go
- @K_to_go
- @types_to_go
- @inext_to_go
- @iprev_to_go
-
- %saved_opening_indentation
-
- $max_index_to_go
- $comma_count_in_batch
- $last_nonblank_index_to_go
- $last_nonblank_type_to_go
- $last_nonblank_token_to_go
- $last_last_nonblank_index_to_go
- $last_last_nonblank_type_to_go
- $last_last_nonblank_token_to_go
- @nonblank_lines_at_depth
- $starting_in_quote
- $ending_in_quote
- @whitespace_level_stack
- $whitespace_last_level
-
- $format_skipping_pattern_begin
- $format_skipping_pattern_end
-
- $forced_breakpoint_count
- $forced_breakpoint_undo_count
- @forced_breakpoint_undo_stack
- %postponed_breakpoint
-
- $tabbing
- $embedded_tab_count
- $first_embedded_tab_at
- $last_embedded_tab_at
- $deleted_semicolon_count
- $first_deleted_semicolon_at
- $last_deleted_semicolon_at
- $added_semicolon_count
- $first_added_semicolon_at
- $last_added_semicolon_at
- $first_tabbing_disagreement
- $last_tabbing_disagreement
- $in_tabbing_disagreement
- $tabbing_disagreement_count
- $input_line_tabbing
-
- $last_line_leading_type
- $last_line_leading_level
- $last_last_line_leading_level
-
- %block_leading_text
- %block_opening_line_number
- $csc_new_statement_ok
- $csc_last_label
- %csc_block_label
- $accumulating_text_for_block
- $leading_block_text
- $rleading_block_if_elsif_text
- $leading_block_text_level
- $leading_block_text_length_exceeded
- $leading_block_text_line_length
- $leading_block_text_line_number
- $closing_side_comment_prefix_pattern
- $closing_side_comment_list_pattern
-
- $blank_lines_after_opening_block_pattern
- $blank_lines_before_closing_block_pattern
-
- $last_nonblank_token
- $last_nonblank_type
- $last_last_nonblank_token
- $last_last_nonblank_type
- $last_nonblank_block_type
- $last_output_level
- %is_do_follower
- %is_if_brace_follower
- %space_after_keyword
- $rbrace_follower
- $looking_for_else
- %is_last_next_redo_return
- %is_other_brace_follower
- %is_else_brace_follower
- %is_anon_sub_brace_follower
- %is_anon_sub_1_brace_follower
- %is_sort_map_grep
- %is_sort_map_grep_eval
- %want_one_line_block
- %is_sort_map_grep_eval_do
- %is_block_without_semicolon
- %is_if_unless
- %is_and_or
- %is_assignment
- %is_chain_operator
- %is_if_unless_and_or_last_next_redo_return
- %ok_to_add_semicolon_for_block_type
-
- @has_broken_sublist
- @dont_align
- @want_comma_break
-
- $is_static_block_comment
- $index_start_one_line_block
- $semicolons_before_block_self_destruct
- $index_max_forced_break
- $input_line_number
- $diagnostics_object
- $vertical_aligner_object
- $logger_object
- $file_writer_object
- $formatter_self
- @ci_stack
- %want_break_before
- %outdent_keyword
- $static_block_comment_pattern
- $static_side_comment_pattern
- %opening_vertical_tightness
- %closing_vertical_tightness
- %closing_token_indentation
- $some_closing_token_indentation
-
- %opening_token_right
- %stack_opening_token
- %stack_closing_token
-
- $block_brace_vertical_tightness_pattern
- $keyword_group_list_pattern
- $keyword_group_list_comment_pattern
-
- $rOpts_add_newlines
- $rOpts_add_whitespace
- $rOpts_block_brace_tightness
- $rOpts_block_brace_vertical_tightness
- $rOpts_brace_left_and_indent
- $rOpts_comma_arrow_breakpoints
- $rOpts_break_at_old_keyword_breakpoints
- $rOpts_break_at_old_comma_breakpoints
- $rOpts_break_at_old_logical_breakpoints
- $rOpts_break_at_old_method_breakpoints
- $rOpts_break_at_old_ternary_breakpoints
- $rOpts_break_at_old_attribute_breakpoints
- $rOpts_closing_side_comment_else_flag
- $rOpts_closing_side_comment_maximum_text
- $rOpts_continuation_indentation
- $rOpts_delete_old_whitespace
- $rOpts_fuzzy_line_length
- $rOpts_indent_columns
- $rOpts_line_up_parentheses
- $rOpts_maximum_fields_per_table
- $rOpts_maximum_line_length
- $rOpts_variable_maximum_line_length
- $rOpts_short_concatenation_item_length
- $rOpts_keep_old_blank_lines
- $rOpts_ignore_old_breakpoints
- $rOpts_format_skipping
- $rOpts_space_function_paren
- $rOpts_space_keyword_paren
- $rOpts_keep_interior_semicolons
- $rOpts_ignore_side_comment_lengths
- $rOpts_stack_closing_block_brace
- $rOpts_space_backslash_quote
- $rOpts_whitespace_cycle
- $rOpts_one_line_block_semicolons
-
- %is_opening_type
- %is_closing_type
- %is_keyword_returning_list
- %tightness
- %matching_token
- $rOpts
- %right_bond_strength
- %left_bond_strength
- %binary_ws_rules
- %want_left_space
- %want_right_space
- %is_digraph
- %is_trigraph
- $bli_pattern
- $bli_list_string
- %is_closing_type
- %is_opening_type
- %is_closing_token
- %is_opening_token
-
- %weld_len_left_closing
- %weld_len_right_closing
- %weld_len_left_opening
- %weld_len_right_opening
-
- $rcuddled_block_types
-
- $SUB_PATTERN
- $ASUB_PATTERN
-
- $NVARS
-
-};
+BEGIN {
+
+ # Index names for $self variables.
+ # Do not combine with other BEGIN blocks (c101).
+ my $i = 0;
+ use constant {
+ _rlines_ => $i++,
+ _rlines_new_ => $i++,
+ _rLL_ => $i++,
+ _Klimit_ => $i++,
+ _rdepth_of_opening_seqno_ => $i++,
+ _rSS_ => $i++,
+ _Iss_opening_ => $i++,
+ _Iss_closing_ => $i++,
+ _rblock_type_of_seqno_ => $i++,
+ _ris_asub_block_ => $i++,
+ _ris_sub_block_ => $i++,
+ _K_opening_container_ => $i++,
+ _K_closing_container_ => $i++,
+ _K_opening_ternary_ => $i++,
+ _K_closing_ternary_ => $i++,
+ _K_first_seq_item_ => $i++,
+ _rK_phantom_semicolons_ => $i++,
+ _rtype_count_by_seqno_ => $i++,
+ _ris_function_call_paren_ => $i++,
+ _rlec_count_by_seqno_ => $i++,
+ _ris_broken_container_ => $i++,
+ _ris_permanently_broken_ => $i++,
+ _rhas_list_ => $i++,
+ _rhas_broken_list_ => $i++,
+ _rhas_broken_list_with_lec_ => $i++,
+ _rhas_code_block_ => $i++,
+ _rhas_broken_code_block_ => $i++,
+ _rhas_ternary_ => $i++,
+ _ris_excluded_lp_container_ => $i++,
+ _rlp_object_by_seqno_ => $i++,
+ _rwant_reduced_ci_ => $i++,
+ _rno_xci_by_seqno_ => $i++,
+ _rbrace_left_ => $i++,
+ _ris_bli_container_ => $i++,
+ _rparent_of_seqno_ => $i++,
+ _rchildren_of_seqno_ => $i++,
+ _ris_list_by_seqno_ => $i++,
+ _rbreak_container_ => $i++,
+ _rshort_nested_ => $i++,
+ _length_function_ => $i++,
+ _is_encoded_data_ => $i++,
+ _fh_tee_ => $i++,
+ _sink_object_ => $i++,
+ _file_writer_object_ => $i++,
+ _vertical_aligner_object_ => $i++,
+ _logger_object_ => $i++,
+ _radjusted_levels_ => $i++,
+ _this_batch_ => $i++,
+
+ _last_output_short_opening_token_ => $i++,
+
+ _last_line_leading_type_ => $i++,
+ _last_line_leading_level_ => $i++,
+ _last_last_line_leading_level_ => $i++,
+
+ _added_semicolon_count_ => $i++,
+ _first_added_semicolon_at_ => $i++,
+ _last_added_semicolon_at_ => $i++,
+
+ _deleted_semicolon_count_ => $i++,
+ _first_deleted_semicolon_at_ => $i++,
+ _last_deleted_semicolon_at_ => $i++,
+
+ _embedded_tab_count_ => $i++,
+ _first_embedded_tab_at_ => $i++,
+ _last_embedded_tab_at_ => $i++,
+
+ _first_tabbing_disagreement_ => $i++,
+ _last_tabbing_disagreement_ => $i++,
+ _tabbing_disagreement_count_ => $i++,
+ _in_tabbing_disagreement_ => $i++,
+ _first_brace_tabbing_disagreement_ => $i++,
+ _in_brace_tabbing_disagreement_ => $i++,
+
+ _saw_VERSION_in_this_file_ => $i++,
+ _saw_END_or_DATA_ => $i++,
+
+ _rK_weld_left_ => $i++,
+ _rK_weld_right_ => $i++,
+ _rweld_len_right_at_K_ => $i++,
+
+ _rspecial_side_comment_type_ => $i++,
+
+ _rseqno_controlling_my_ci_ => $i++,
+ _ris_seqno_controlling_ci_ => $i++,
+ _save_logfile_ => $i++,
+ _maximum_level_ => $i++,
+ _maximum_level_at_line_ => $i++,
+ _maximum_BLOCK_level_ => $i++,
+ _maximum_BLOCK_level_at_line_ => $i++,
+
+ _rKrange_code_without_comments_ => $i++,
+ _rbreak_before_Kfirst_ => $i++,
+ _rbreak_after_Klast_ => $i++,
+ _rwant_container_open_ => $i++,
+ _converged_ => $i++,
+
+ _rstarting_multiline_qw_seqno_by_K_ => $i++,
+ _rending_multiline_qw_seqno_by_K_ => $i++,
+ _rKrange_multiline_qw_by_seqno_ => $i++,
+ _rmultiline_qw_has_extra_level_ => $i++,
+
+ _rcollapsed_length_by_seqno_ => $i++,
+ _rbreak_before_container_by_seqno_ => $i++,
+ _ris_essential_old_breakpoint_ => $i++,
+ _roverride_cab3_ => $i++,
+ _ris_assigned_structure_ => $i++,
+
+ _LAST_SELF_INDEX_ => $i - 1,
+ };
+}
BEGIN {
- # Array index names for token variables
+ # Index names for batch variables.
+ # Do not combine with other BEGIN blocks (c101).
+ # These are stored in _this_batch_, which is a sub-array of $self.
my $i = 0;
use constant {
- _BLOCK_TYPE_ => $i++,
- _CI_LEVEL_ => $i++,
- _CONTAINER_ENVIRONMENT_ => $i++,
- _CONTAINER_TYPE_ => $i++,
- _CUMULATIVE_LENGTH_ => $i++,
- _LINE_INDEX_ => $i++,
- _KNEXT_SEQ_ITEM_ => $i++,
- _LEVEL_ => $i++,
- _LEVEL_TRUE_ => $i++,
- _SLEVEL_ => $i++,
- _TOKEN_ => $i++,
- _TYPE_ => $i++,
- _TYPE_SEQUENCE_ => $i++,
+ _starting_in_quote_ => $i++,
+ _ending_in_quote_ => $i++,
+ _is_static_block_comment_ => $i++,
+ _ri_first_ => $i++,
+ _ri_last_ => $i++,
+ _do_not_pad_ => $i++,
+ _peak_batch_size_ => $i++,
+ _max_index_to_go_ => $i++,
+ _batch_count_ => $i++,
+ _rix_seqno_controlling_ci_ => $i++,
+ _batch_CODE_type_ => $i++,
+ _ri_starting_one_line_block_ => $i++,
};
- $NVARS = 1 + _TYPE_SEQUENCE_;
+}
- # default list of block types for which -bli would apply
- $bli_list_string = 'if else elsif unless while for foreach do : sub';
+BEGIN {
- my @q;
+ # Sequence number assigned to the root of sequence tree.
+ # The minimum of the actual sequences numbers is 4, so we can use 1
+ use constant SEQ_ROOT => 1;
- @q = qw(
- .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x=
- );
- @is_digraph{@q} = (1) x scalar(@q);
+ # Codes for insertion and deletion of blanks
+ use constant DELETE => 0;
+ use constant STABLE => 1;
+ use constant INSERT => 2;
+
+ # whitespace codes
+ use constant WS_YES => 1;
+ use constant WS_OPTIONAL => 0;
+ use constant WS_NO => -1;
+
+ # Token bond strengths.
+ use constant NO_BREAK => 10000;
+ use constant VERY_STRONG => 100;
+ use constant STRONG => 2.1;
+ use constant NOMINAL => 1.1;
+ use constant WEAK => 0.8;
+ use constant VERY_WEAK => 0.55;
+
+ # values for testing indexes in output array
+ use constant UNDEFINED_INDEX => -1;
+
+ # Maximum number of little messages; probably need not be changed.
+ use constant MAX_NAG_MESSAGES => 6;
- @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
- @is_trigraph{@q} = (1) x scalar(@q);
+ # This is the decimal range of printable characters in ASCII. It is used to
+ # make quick preliminary checks before resorting to using a regex.
+ use constant ORD_PRINTABLE_MIN => 33;
+ use constant ORD_PRINTABLE_MAX => 126;
+
+ # Initialize constant hashes ...
+ my @q;
@q = qw(
= **= += *= &= <<= &&=
);
@is_assignment{@q} = (1) x scalar(@q);
- @q = qw(
- grep
- keys
- map
- reverse
- sort
- split
- );
- @is_keyword_returning_list{@q} = (1) x scalar(@q);
-
@q = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
- @q = qw(last next redo return);
- @is_last_next_redo_return{@q} = (1) x scalar(@q);
+ # These block types may have text between the keyword and opening
+ # curly. Note: 'else' does not, but must be included to allow trailing
+ # if/elsif text to be appended.
+ # patch for SWITCH/CASE: added 'case' and 'when'
+ @q = qw(if elsif else unless while until for foreach case when catch);
+ @is_if_elsif_else_unless_while_until_for_foreach{@q} =
+ (1) x scalar(@q);
- @q = qw(sort map grep);
- @is_sort_map_grep{@q} = (1) x scalar(@q);
+ @q = qw(if unless while until for foreach);
+ @is_if_unless_while_until_for_foreach{@q} =
+ (1) x scalar(@q);
- @q = qw(sort map grep eval);
- @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
+ @q = qw(last next redo return);
+ @is_last_next_redo_return{@q} = (1) x scalar(@q);
- @q = qw(sort map grep eval do);
- @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
+ # Map related block names into a common name to allow vertical alignment
+ # used by sub make_alignment_patterns. Note: this is normally unchanged,
+ # but it contains 'grep' and can be re-initized in
+ # sub initialize_grep_and_friends in a testing mode.
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
@q = qw(if unless);
@is_if_unless{@q} = (1) x scalar(@q);
@q = qw(&& || and or : ? . + - * /);
@is_chain_operator{@q} = (1) x scalar(@q);
+ # Operators that the user can request break before or after.
+ # Note that some are keywords
+ @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
+
# We can remove semicolons after blocks preceded by these keywords
@q =
qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
@q = qw< } ) ] >;
@is_closing_token{@q} = (1) x scalar(@q);
- # Patterns for standardizing matches to block types for regular subs and
- # anonymous subs. Examples
- # 'sub process' is a named sub
- # 'sub ::m' is a named sub
- # 'sub' is an anonymous sub
- # 'sub:' is a label, not a sub
- # 'substr' is a keyword
- $SUB_PATTERN = '^sub\s+(::|\w)';
- $ASUB_PATTERN = '^sub$';
-}
+ @q = qw< { ( [ ? >;
+ @is_opening_sequence_token{@q} = (1) x scalar(@q);
-# whitespace codes
-use constant WS_YES => 1;
-use constant WS_OPTIONAL => 0;
-use constant WS_NO => -1;
+ @q = qw< } ) ] : >;
+ @is_closing_sequence_token{@q} = (1) x scalar(@q);
-# Token bond strengths.
-use constant NO_BREAK => 10000;
-use constant VERY_STRONG => 100;
-use constant STRONG => 2.1;
-use constant NOMINAL => 1.1;
-use constant WEAK => 0.8;
-use constant VERY_WEAK => 0.55;
+ # a hash needed by sub break_lists for labeling containers
+ @q = qw( k => && || ? : . );
+ @is_container_label_type{@q} = (1) x scalar(@q);
-# values for testing indexes in output array
-use constant UNDEFINED_INDEX => -1;
+ # Braces -bbht etc must follow these. Note: experimentation with
+ # including a simple comma shows that it adds little and can lead
+ # to poor formatting in complex lists.
+ @q = qw( = => );
+ @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
-# Maximum number of little messages; probably need not be changed.
-use constant MAX_NAG_MESSAGES => 6;
+ @q = qw( => ; h f );
+ push @q, ',';
+ @is_counted_type{@q} = (1) x scalar(@q);
-# increment between sequence numbers for each type
-# For example, ?: pairs might have numbers 7,11,15,...
-use constant TYPE_SEQUENCE_INCREMENT => 4;
+}
-{
+{ ## begin closure to count instances
# methods to count instances
my $_count = 0;
sub get_count { return $_count; }
sub _increment_count { return ++$_count }
sub _decrement_count { return --$_count }
-}
+} ## end closure to count instances
-sub trim {
+sub new {
- # trim leading and trailing whitespace from a string
- my $str = shift;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return $str;
-}
+ my ( $class, @args ) = @_;
-sub max {
- my @vals = @_;
- my $max = shift @vals;
- foreach my $val (@vals) {
- $max = ( $max < $val ) ? $val : $max;
+ # we are given an object with a write_line() method to take lines
+ my %defaults = (
+ sink_object => undef,
+ diagnostics_object => undef,
+ logger_object => undef,
+ length_function => sub { return length( $_[0] ) },
+ is_encoded_data => "",
+ fh_tee => undef,
+ );
+ my %args = ( %defaults, @args );
+
+ my $length_function = $args{length_function};
+ my $is_encoded_data = $args{is_encoded_data};
+ my $fh_tee = $args{fh_tee};
+ my $logger_object = $args{logger_object};
+ my $diagnostics_object = $args{diagnostics_object};
+
+ # we create another object with a get_line() and peek_ahead() method
+ my $sink_object = $args{sink_object};
+ my $file_writer_object =
+ Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
+
+ # initialize closure variables...
+ set_logger_object($logger_object);
+ set_diagnostics_object($diagnostics_object);
+ initialize_lp_vars();
+ initialize_csc_vars();
+ initialize_break_lists();
+ initialize_undo_ci();
+ initialize_process_line_of_CODE();
+ initialize_grind_batch_of_CODE();
+ initialize_final_indentation_adjustment();
+ initialize_postponed_breakpoint();
+ initialize_batch_variables();
+ initialize_forced_breakpoint_vars();
+ initialize_write_line();
+
+ my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
+ rOpts => $rOpts,
+ file_writer_object => $file_writer_object,
+ logger_object => $logger_object,
+ diagnostics_object => $diagnostics_object,
+ length_function => $length_function
+ );
+
+ write_logfile_entry("\nStarting tokenization pass...\n");
+
+ if ( $rOpts->{'entab-leading-whitespace'} ) {
+ write_logfile_entry(
+"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
+ );
+ }
+ elsif ( $rOpts->{'tabs'} ) {
+ write_logfile_entry("Indentation will be with a tab character\n");
+ }
+ else {
+ write_logfile_entry(
+ "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
- return $max;
-}
-sub min {
- my @vals = @_;
- my $min = shift @vals;
- foreach my $val (@vals) {
- $min = ( $min > $val ) ? $val : $min;
+ # Initialize the $self array reference.
+ # To add an item, first add a constant index in the BEGIN block above.
+ my $self = [];
+
+ # Basic data structures...
+ $self->[_rlines_] = []; # = ref to array of lines of the file
+ $self->[_rlines_new_] = []; # = ref to array of output lines
+
+ # 'rLL' = reference to the continuous liner array of all tokens in a file.
+ # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
+ # 'LL' stuck because it is easy to type. The 'rLL' array is updated
+ # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
+ # with '$K' by convention.
+ $self->[_rLL_] = [];
+ $self->[_Klimit_] = undef; # = maximum K index for rLL.
+
+ # Indexes into the rLL list
+ $self->[_K_opening_container_] = {};
+ $self->[_K_closing_container_] = {};
+ $self->[_K_opening_ternary_] = {};
+ $self->[_K_closing_ternary_] = {};
+ $self->[_K_first_seq_item_] = undef; # K of first token with a sequence #
+
+ # Array of phantom semicolons, in case we ever need to undo them
+ $self->[_rK_phantom_semicolons_] = undef;
+
+ # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
+ # numbers with + or - indicating opening or closing. This list represents
+ # the entire container tree and is invariant under reformatting. It can be
+ # used to quickly travel through the tree. Indexes in the rSS array begin
+ # with '$I' by convention. The 'Iss' arrays give the indexes in this list
+ # of opening and closing sequence numbers.
+ $self->[_rSS_] = [];
+ $self->[_Iss_opening_] = [];
+ $self->[_Iss_closing_] = [];
+
+ # Arrays to help traverse the tree
+ $self->[_rdepth_of_opening_seqno_] = [];
+ $self->[_rblock_type_of_seqno_] = {};
+ $self->[_ris_asub_block_] = {};
+ $self->[_ris_sub_block_] = {};
+
+ # Mostly list characteristics and processing flags
+ $self->[_rtype_count_by_seqno_] = {};
+ $self->[_ris_function_call_paren_] = {};
+ $self->[_rlec_count_by_seqno_] = {};
+ $self->[_ris_broken_container_] = {};
+ $self->[_ris_permanently_broken_] = {};
+ $self->[_rhas_list_] = {};
+ $self->[_rhas_broken_list_] = {};
+ $self->[_rhas_broken_list_with_lec_] = {};
+ $self->[_rhas_code_block_] = {};
+ $self->[_rhas_broken_code_block_] = {};
+ $self->[_rhas_ternary_] = {};
+ $self->[_ris_excluded_lp_container_] = {};
+ $self->[_rlp_object_by_seqno_] = {};
+ $self->[_rwant_reduced_ci_] = {};
+ $self->[_rno_xci_by_seqno_] = {};
+ $self->[_rbrace_left_] = {};
+ $self->[_ris_bli_container_] = {};
+ $self->[_rparent_of_seqno_] = {};
+ $self->[_rchildren_of_seqno_] = {};
+ $self->[_ris_list_by_seqno_] = {};
+
+ $self->[_rbreak_container_] = {}; # prevent one-line blocks
+ $self->[_rshort_nested_] = {}; # blocks not forced open
+ $self->[_length_function_] = $length_function;
+ $self->[_is_encoded_data_] = $is_encoded_data;
+
+ # Some objects...
+ $self->[_fh_tee_] = $fh_tee;
+ $self->[_sink_object_] = $sink_object;
+ $self->[_file_writer_object_] = $file_writer_object;
+ $self->[_vertical_aligner_object_] = $vertical_aligner_object;
+ $self->[_logger_object_] = $logger_object;
+
+ # Reference to the batch being processed
+ $self->[_this_batch_] = [];
+
+ # Memory of processed text...
+ $self->[_last_last_line_leading_level_] = 0;
+ $self->[_last_line_leading_level_] = 0;
+ $self->[_last_line_leading_type_] = '#';
+ $self->[_last_output_short_opening_token_] = 0;
+ $self->[_added_semicolon_count_] = 0;
+ $self->[_first_added_semicolon_at_] = 0;
+ $self->[_last_added_semicolon_at_] = 0;
+ $self->[_deleted_semicolon_count_] = 0;
+ $self->[_first_deleted_semicolon_at_] = 0;
+ $self->[_last_deleted_semicolon_at_] = 0;
+ $self->[_embedded_tab_count_] = 0;
+ $self->[_first_embedded_tab_at_] = 0;
+ $self->[_last_embedded_tab_at_] = 0;
+ $self->[_first_tabbing_disagreement_] = 0;
+ $self->[_last_tabbing_disagreement_] = 0;
+ $self->[_tabbing_disagreement_count_] = 0;
+ $self->[_in_tabbing_disagreement_] = 0;
+ $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
+ $self->[_saw_END_or_DATA_] = 0;
+ $self->[_first_brace_tabbing_disagreement_] = undef;
+ $self->[_in_brace_tabbing_disagreement_] = undef;
+
+ # Hashes related to container welding...
+ $self->[_radjusted_levels_] = [];
+
+ # Weld data structures
+ $self->[_rK_weld_left_] = {};
+ $self->[_rK_weld_right_] = {};
+ $self->[_rweld_len_right_at_K_] = {};
+
+ # -xci stuff
+ $self->[_rseqno_controlling_my_ci_] = {};
+ $self->[_ris_seqno_controlling_ci_] = {};
+
+ $self->[_rspecial_side_comment_type_] = {};
+ $self->[_maximum_level_] = 0;
+ $self->[_maximum_level_at_line_] = 0;
+ $self->[_maximum_BLOCK_level_] = 0;
+ $self->[_maximum_BLOCK_level_at_line_] = 0;
+
+ $self->[_rKrange_code_without_comments_] = [];
+ $self->[_rbreak_before_Kfirst_] = {};
+ $self->[_rbreak_after_Klast_] = {};
+ $self->[_rwant_container_open_] = {};
+ $self->[_converged_] = 0;
+
+ # qw stuff
+ $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
+ $self->[_rending_multiline_qw_seqno_by_K_] = {};
+ $self->[_rKrange_multiline_qw_by_seqno_] = {};
+ $self->[_rmultiline_qw_has_extra_level_] = {};
+
+ $self->[_rcollapsed_length_by_seqno_] = {};
+ $self->[_rbreak_before_container_by_seqno_] = {};
+ $self->[_ris_essential_old_breakpoint_] = {};
+ $self->[_roverride_cab3_] = {};
+ $self->[_ris_assigned_structure_] = {};
+
+ # This flag will be updated later by a call to get_save_logfile()
+ $self->[_save_logfile_] = defined($logger_object);
+
+ # Be sure all variables in $self have been initialized above. To find the
+ # correspondence of index numbers and array names, copy a list to a file
+ # and use the unix 'nl' command to number lines 1..
+ if (DEVEL_MODE) {
+ my @non_existant;
+ foreach ( 0 .. _LAST_SELF_INDEX_ ) {
+ if ( !exists( $self->[$_] ) ) {
+ push @non_existant, $_;
+ }
+ }
+ if (@non_existant) {
+ Fault("These indexes in self not initialized: (@non_existant)\n");
+ }
+ }
+
+ bless $self, $class;
+
+ # Safety check..this is not a class yet
+ if ( _increment_count() > 1 ) {
+ confess
+"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
- return $min;
+ return $self;
}
-sub split_words {
+######################################
+# CODE SECTION 2: Some Basic Utilities
+######################################
- # given a string containing words separated by whitespace,
- # return the list of words
- my ($str) = @_;
- return unless $str;
- $str =~ s/\s+$//;
- $str =~ s/^\s+//;
- return split( /\s+/, $str );
+sub check_rLL {
+
+ # Verify that the rLL array has not been auto-vivified
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $num = @{$rLL};
+ if ( ( defined($Klimit) && $Klimit != $num - 1 )
+ || ( !defined($Klimit) && $num > 0 ) )
+ {
+
+ # This fault can occur if the array has been accessed for an index
+ # greater than $Klimit, which is the last token index. Just accessing
+ # the array above index $Klimit, not setting a value, can cause @rLL to
+ # increase beyond $Klimit. If this occurs, the problem can be located
+ # by making calls to this routine at different locations in
+ # sub 'finish_formatting'.
+ $Klimit = 'undef' if ( !defined($Klimit) );
+ $msg = "" unless $msg;
+ Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
+ }
+ return;
}
sub check_keys {
local $" = ')(';
my @expected_keys = sort keys %{$rvalid};
@unknown_keys = sort @unknown_keys;
- Die(<<EOM);
+ Fault(<<EOM);
------------------------------------------------------------------------
Program error detected checking hash keys
Message is: '$msg'
return;
}
-# interface to Perl::Tidy::Logger routines
-sub warning {
- my ($msg) = @_;
- if ($logger_object) { $logger_object->warning($msg); }
- return;
-}
-
-sub complain {
- my ($msg) = @_;
- if ($logger_object) {
- $logger_object->complain($msg);
- }
- return;
-}
+sub check_token_array {
+ my $self = shift;
-sub write_logfile_entry {
- my @msg = @_;
- if ($logger_object) {
- $logger_object->write_logfile_entry(@msg);
- }
- return;
-}
+ # Check for errors in the array of tokens. This is only called
+ # when the DEVEL_MODE flag is set, so this Fault will only occur
+ # during code development.
+ my $rLL = $self->[_rLL_];
+ for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+ my $nvars = @{ $rLL->[$KK] };
+ if ( $nvars != _NVARS ) {
+ my $NVARS = _NVARS;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ $type = '*' unless defined($type);
-sub black_box {
- my @msg = @_;
- if ($logger_object) { $logger_object->black_box(@msg); }
- return;
-}
+ # The number of variables per token node is _NVARS and was set when
+ # the array indexes were generated. So if the number of variables
+ # is different we have done something wrong, like not store all of
+ # them in sub 'write_line' when they were received from the
+ # tokenizer.
+ Fault(
+"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
+ );
+ }
+ foreach my $var ( _TOKEN_, _TYPE_ ) {
+ if ( !defined( $rLL->[$KK]->[$var] ) ) {
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
-sub report_definite_bug {
- if ($logger_object) {
- $logger_object->report_definite_bug();
+ # This is a simple check that each token has some basic
+ # variables. In other words, that there are no holes in the
+ # array of tokens. Sub 'write_line' pushes tokens into the
+ # $rLL array, so this should guarantee no gaps.
+ Fault("Undefined variable $var for K=$KK, line=$iline\n");
+ }
+ }
}
return;
}
-sub get_saw_brace_error {
- if ($logger_object) {
- return $logger_object->get_saw_brace_error();
- }
- return;
-}
+{ ## begin closure check_line_hashes
-sub we_are_at_the_last_line {
- if ($logger_object) {
- $logger_object->we_are_at_the_last_line();
- }
- return;
-}
+ # This code checks that no autovivification occurs in the 'line' hash
-# interface to Perl::Tidy::Diagnostics routine
-sub write_diagnostics {
- my $msg = shift;
- if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
- return;
-}
+ my %valid_line_hash;
-sub get_added_semicolon_count {
- my $self = shift;
- return $added_semicolon_count;
-}
+ BEGIN {
-sub DESTROY {
- my $self = shift;
- $self->_decrement_count();
- return;
-}
+ # These keys are defined for each line in the formatter
+ # Each line must have exactly these quantities
+ my @valid_line_keys = qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _rK_range
+ _square_bracket_depth
+ _starting_in_quote
+ _ended_in_blank_token
+ _code_type
-sub get_output_line_number {
- return $vertical_aligner_object->get_output_line_number();
-}
+ _ci_level_0
+ _level_0
+ _nesting_blocks_0
+ _nesting_tokens_0
+ );
-sub new {
+ @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
+ }
- my ( $class, @args ) = @_;
+ sub check_line_hashes {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $rline ( @{$rlines} ) {
+ my $iline = $rline->{_line_number};
+ my $line_type = $rline->{_line_type};
+ check_keys( $rline, \%valid_line_hash,
+ "Checkpoint: line number =$iline, line_type=$line_type", 1 );
+ }
+ return;
+ }
+} ## end closure check_line_hashes
- # we are given an object with a write_line() method to take lines
- my %defaults = (
- sink_object => undef,
- diagnostics_object => undef,
- logger_object => undef,
- );
- my %args = ( %defaults, @args );
+{ ## begin closure for logger routines
+ my $logger_object;
- $logger_object = $args{logger_object};
- $diagnostics_object = $args{diagnostics_object};
+ # Called once per file to initialize the logger object
+ sub set_logger_object {
+ $logger_object = shift;
+ return;
+ }
- # we create another object with a get_line() and peek_ahead() method
- my $sink_object = $args{sink_object};
- $file_writer_object =
- Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
+ sub get_logger_object {
+ return $logger_object;
+ }
- # initialize the leading whitespace stack to negative levels
- # so that we can never run off the end of the stack
- $peak_batch_size = 0; # flag to determine if we have output code
- $gnu_position_predictor = 0; # where the current token is predicted to be
- $max_gnu_stack_index = 0;
- $max_gnu_item_index = -1;
- $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- $last_output_indentation = 0;
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
- $last_output_short_opening_token = 0;
-
- $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
- $saw_END_or_DATA_ = 0;
-
- @block_type_to_go = ();
- @type_sequence_to_go = ();
- @container_environment_to_go = ();
- @bond_strength_to_go = ();
- @forced_breakpoint_to_go = ();
- @summed_lengths_to_go = (); # line length to start of ith token
- @token_lengths_to_go = ();
- @levels_to_go = ();
- @mate_index_to_go = ();
- @ci_levels_to_go = ();
- @nesting_depth_to_go = (0);
- @nobreak_to_go = ();
- @old_breakpoint_to_go = ();
- @tokens_to_go = ();
- @K_to_go = ();
- @types_to_go = ();
- @leading_spaces_to_go = ();
- @reduced_spaces_to_go = ();
- @inext_to_go = ();
- @iprev_to_go = ();
-
- @whitespace_level_stack = ();
- $whitespace_last_level = -1;
-
- @dont_align = ();
- @has_broken_sublist = ();
- @want_comma_break = ();
-
- @ci_stack = ("");
- $first_tabbing_disagreement = 0;
- $last_tabbing_disagreement = 0;
- $tabbing_disagreement_count = 0;
- $in_tabbing_disagreement = 0;
- $input_line_tabbing = undef;
-
- $last_last_line_leading_level = 0;
- $last_line_leading_level = 0;
- $last_line_leading_type = '#';
-
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_last_nonblank_token = ';';
- $last_last_nonblank_type = ';';
- $last_nonblank_block_type = "";
- $last_output_level = 0;
- $looking_for_else = 0;
- $embedded_tab_count = 0;
- $first_embedded_tab_at = 0;
- $last_embedded_tab_at = 0;
- $deleted_semicolon_count = 0;
- $first_deleted_semicolon_at = 0;
- $last_deleted_semicolon_at = 0;
- $added_semicolon_count = 0;
- $first_added_semicolon_at = 0;
- $last_added_semicolon_at = 0;
- $is_static_block_comment = 0;
- %postponed_breakpoint = ();
-
- # variables for adding side comments
- %block_leading_text = ();
- %block_opening_line_number = ();
- $csc_new_statement_ok = 1;
- %csc_block_label = ();
-
- %saved_opening_indentation = ();
-
- reset_block_text_accumulator();
-
- prepare_for_new_input_lines();
-
- $vertical_aligner_object =
- Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
- $logger_object, $diagnostics_object );
+ sub get_input_stream_name {
+ my $input_stream_name = "";
+ if ($logger_object) {
+ $input_stream_name = $logger_object->get_input_stream_name();
+ }
+ return $input_stream_name;
+ }
- if ( $rOpts->{'entab-leading-whitespace'} ) {
- write_logfile_entry(
-"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
- );
+ # interface to Perl::Tidy::Logger routines
+ sub warning {
+ my ($msg) = @_;
+ if ($logger_object) { $logger_object->warning($msg); }
+ return;
}
- elsif ( $rOpts->{'tabs'} ) {
- write_logfile_entry("Indentation will be with a tab character\n");
+
+ sub complain {
+ my ($msg) = @_;
+ if ($logger_object) {
+ $logger_object->complain($msg);
+ }
+ return;
}
- else {
- write_logfile_entry(
- "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
+
+ sub write_logfile_entry {
+ my @msg = @_;
+ if ($logger_object) {
+ $logger_object->write_logfile_entry(@msg);
+ }
+ return;
}
- # This hash holds the main data structures for formatting
- # All hash keys must be defined here.
- $formatter_self = {
- rlines => [], # = ref to array of lines of the file
- rlines_new => [], # = ref to array of output lines
- # (FOR FUTURE DEVELOPMENT)
- rLL => [], # = ref to array with all tokens
- # in the file. LL originally meant
- # 'Linked List'. Linked lists were a
- # bad idea but LL is easy to type.
- Klimit => undef, # = maximum K index for rLL. This is
- # needed to catch any autovivification
- # problems.
- rnested_pairs => [], # for welding decisions
- K_opening_container => {}, # for quickly traversing structure
- K_closing_container => {}, # for quickly traversing structure
- K_opening_ternary => {}, # for quickly traversing structure
- K_closing_ternary => {}, # for quickly traversing structure
- rcontainer_map => {}, # hierarchical map of containers
- rK_phantom_semicolons =>
- undef, # for undoing phantom semicolons if iterating
- rpaired_to_inner_container => {},
- rbreak_container => {}, # prevent one-line blocks
- rshort_nested => {}, # blocks not forced open
- rvalid_self_keys => [], # for checking
- valign_batch_count => 0,
- };
- my @valid_keys = keys %{$formatter_self};
- $formatter_self->{rvalid_self_keys} = \@valid_keys;
+ sub get_saw_brace_error {
+ if ($logger_object) {
+ return $logger_object->get_saw_brace_error();
+ }
+ return;
+ }
- bless $formatter_self, $class;
+ sub we_are_at_the_last_line {
+ if ($logger_object) {
+ $logger_object->we_are_at_the_last_line();
+ }
+ return;
+ }
- # Safety check..this is not a class yet
- if ( _increment_count() > 1 ) {
- confess
-"Attempt to create more than 1 object in $class, which is not a true class yet\n";
+} ## end closure for logger routines
+
+{ ## begin closure for diagnostics routines
+ my $diagnostics_object;
+
+ # Called once per file to initialize the diagnostics object
+ sub set_diagnostics_object {
+ $diagnostics_object = shift;
+ return;
}
- return $formatter_self;
-}
-# Future routines for storing new lines
-sub push_line {
- my ( $self, $rline ) = @_;
+ sub write_diagnostics {
+ my ($msg) = @_;
+ if ($diagnostics_object) {
+ $diagnostics_object->write_diagnostics($msg);
+ }
+ return;
+ }
+} ## end closure for diagnostics routines
- # my $rline = $rlines->[$index_old];
- # push @{$rlines_new}, $rline;
- return;
+sub get_convergence_check {
+ my ($self) = @_;
+ return $self->[_converged_];
}
-sub push_old_line {
- my ( $self, $index_old ) = @_;
-
- # TODO: This will copy line with index $index_old to the new line array
- # my $rlines = $self->{rlines};
- # my $rline = $rlines->[$index_old];
- # $self->push_line($rline);
- return;
+sub get_added_semicolon_count {
+ my $self = shift;
+ return $self->[_added_semicolon_count_];
}
-sub push_blank_line {
+sub get_output_line_number {
my ($self) = @_;
+ my $vao = $self->[_vertical_aligner_object_];
+ return $vao->get_output_line_number();
+}
- # my $rline = ...
- # $self->push_line($rline);
+sub want_blank_line {
+ my $self = shift;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->want_blank_line();
return;
}
-sub push_CODE_line {
- my ( $self, $Kmin, $Kmax ) = @_;
-
- # TODO: This will store the values for one new line of CODE
- # CHECK TOKEN RANGE HERE
- # $self->push_line($rline);
+sub write_unindented_line {
+ my ( $self, $line ) = @_;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_line($line);
return;
}
-sub increment_valign_batch_count {
- my ($self) = shift;
- return ++$self->{valign_batch_count};
+sub consecutive_nonblank_lines {
+ my ($self) = @_;
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $vao = $self->[_vertical_aligner_object_];
+ return $file_writer_object->get_consecutive_nonblank_lines() +
+ $vao->get_cached_line_count();
}
-sub get_valign_batch_count {
- my ($self) = shift;
- return $self->{valign_batch_count};
+sub max {
+ my (@vals) = @_;
+ my $max = shift @vals;
+ for (@vals) { $max = $_ > $max ? $_ : $max }
+ return $max;
}
-sub Fault {
- my ($msg) = @_;
-
- # This routine is called for errors that really should not occur
- # except if there has been a bug introduced by a recent program change
- my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
- my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
- my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
- my $input_stream_name = $logger_object->get_input_stream_name();
+sub min {
+ my (@vals) = @_;
+ my $min = shift @vals;
+ for (@vals) { $min = $_ < $min ? $_ : $min }
+ return $min;
+}
- Die(<<EOM);
-==============================================================================
-While operating on input stream with name: '$input_stream_name'
-A fault was detected at line $line0 of sub '$subroutine1'
-in file '$filename1'
-which was called from line $line1 of sub '$subroutine2'
-Message: '$msg'
-This is probably an error introduced by a recent programming change.
-==============================================================================
-EOM
+sub split_words {
- # This is for Perl-Critic
- return;
+ # given a string containing words separated by whitespace,
+ # return the list of words
+ my ($str) = @_;
+ return unless $str;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return split( /\s+/, $str );
}
-sub check_self_hash {
- my $self = shift;
- my @valid_self_keys = @{ $self->{rvalid_self_keys} };
- my %valid_self_hash;
- @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
- check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
- return;
-}
+###########################################
+# CODE SECTION 3: Check and process options
+###########################################
-sub check_token_array {
- my $self = shift;
+sub check_options {
- # Check for errors in the array of tokens
- # Uses package variable $NVARS
- $self->check_self_hash();
- my $rLL = $self->{rLL};
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
- my $nvars = @{ $rLL->[$KK] };
- if ( $nvars != $NVARS ) {
- my $type = $rLL->[$KK]->[_TYPE_];
- $type = '*' unless defined($type);
- Fault(
-"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
- );
- }
- foreach my $var ( _TOKEN_, _TYPE_ ) {
- if ( !defined( $rLL->[$KK]->[$var] ) ) {
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
- Fault("Undefined variable $var for K=$KK, line=$iline\n");
- }
- }
- }
- return;
-}
+ # This routine is called to check the user-supplied run parameters
+ # and to configure the control hashes to them.
+ $rOpts = shift;
-sub set_rLL_max_index {
- my $self = shift;
+ initialize_whitespace_hashes();
+ initialize_bond_strength_hashes();
+
+ # This function must be called early to get hashes with grep initialized
+ initialize_grep_and_friends( $rOpts->{'grep-alias-list'} );
+
+ # Make needed regex patterns for matching text.
+ # NOTE: sub_matching_patterns must be made first because later patterns use
+ # them; see RT #133130.
+ make_sub_matching_pattern();
+ make_static_block_comment_pattern();
+ make_static_side_comment_pattern();
+ make_closing_side_comment_prefix();
+ make_closing_side_comment_list_pattern();
+ $format_skipping_pattern_begin =
+ make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
+ $format_skipping_pattern_end =
+ make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
+ make_non_indenting_brace_pattern();
- # Set the limit of the rLL array, assuming that it is correct.
- # This should only be called by routines after they make changes
- # to tokenization
- my $rLL = $self->{rLL};
- if ( !defined($rLL) ) {
+ # If closing side comments ARE selected, then we can safely
+ # delete old closing side comments unless closing side comment
+ # warnings are requested. This is a good idea because it will
+ # eliminate any old csc's which fall below the line count threshold.
+ # We cannot do this if warnings are turned on, though, because we
+ # might delete some text which has been added. So that must
+ # be handled when comments are created. And we cannot do this
+ # with -io because -csc will be skipped altogether.
+ if ( $rOpts->{'closing-side-comments'} ) {
+ if ( !$rOpts->{'closing-side-comment-warnings'}
+ && !$rOpts->{'indent-only'} )
+ {
+ $rOpts->{'delete-closing-side-comments'} = 1;
+ }
+ }
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
+ # If closing side comments ARE NOT selected, but warnings ARE
+ # selected and we ARE DELETING csc's, then we will pretend to be
+ # adding with a huge interval. This will force the comments to be
+ # generated for comparison with the old comments, but not added.
+ elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
+ if ( $rOpts->{'delete-closing-side-comments'} ) {
+ $rOpts->{'delete-closing-side-comments'} = 0;
+ $rOpts->{'closing-side-comments'} = 1;
+ $rOpts->{'closing-side-comment-interval'} = 100000000;
+ }
}
- my $Klimit_old = $self->{Klimit};
- my $num = @{$rLL};
- my $Klimit;
- if ( $num > 0 ) { $Klimit = $num - 1 }
- $self->{Klimit} = $Klimit;
- return ($Klimit);
-}
-sub get_rLL_max_index {
- my $self = shift;
+ make_bli_pattern();
+ make_bl_pattern();
+ make_block_brace_vertical_tightness_pattern();
+ make_blank_line_pattern();
+ make_keyword_group_list_pattern();
- # the memory location $rLL and number of tokens should be obtained
- # from this routine so that any autovivication can be immediately caught.
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- if ( !defined($rLL) ) {
+ # Make initial list of desired one line block types
+ # They will be modified by 'prepare_cuddled_block_types'
+ # NOTE: this line must come after is_sort_map_grep_eval is
+ # initialized in sub 'initialize_grep_and_friends'
+ %want_one_line_block = %is_sort_map_grep_eval;
- # Shouldn't happen because rLL was initialized to be an array ref
- Fault("Undefined Memory rLL");
+ prepare_cuddled_block_types();
+ if ( $rOpts->{'dump-cuddled-block-list'} ) {
+ dump_cuddled_block_list(*STDOUT);
+ Exit(0);
}
- my $num = @{$rLL};
- if ( $num == 0 && defined($Klimit)
- || $num > 0 && !defined($Klimit)
- || $num > 0 && $Klimit != $num - 1 )
- {
- # Possible autovivification problem...
- if ( !defined($Klimit) ) { $Klimit = '*' }
- Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
+ # -xlp implies -lp
+ if ( $rOpts->{'extended-line-up-parentheses'} ) {
+ $rOpts->{'line-up-parentheses'} ||= 1;
}
- return ($Klimit);
-}
-
-sub prepare_for_new_input_lines {
-
- # Remember the largest batch size processed. This is needed
- # by the pad routine to avoid padding the first nonblank token
- if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
- $peak_batch_size = $max_index_to_go;
- }
-
- $gnu_sequence_number++; # increment output batch counter
- %last_gnu_equals = ();
- %gnu_comma_count = ();
- %gnu_arrow_count = ();
- $line_start_index_to_go = 0;
- $max_gnu_item_index = UNDEFINED_INDEX;
- $index_max_forced_break = UNDEFINED_INDEX;
- $max_index_to_go = UNDEFINED_INDEX;
- $last_nonblank_index_to_go = UNDEFINED_INDEX;
- $last_nonblank_type_to_go = '';
- $last_nonblank_token_to_go = '';
- $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
- $last_last_nonblank_type_to_go = '';
- $last_last_nonblank_token_to_go = '';
- $forced_breakpoint_count = 0;
- $forced_breakpoint_undo_count = 0;
- $rbrace_follower = undef;
- $summed_lengths_to_go[0] = 0;
- $comma_count_in_batch = 0;
- $starting_in_quote = 0;
- destroy_one_line_block();
- return;
-}
+ if ( $rOpts->{'line-up-parentheses'} ) {
-sub keyword_group_scan {
- my $self = shift;
+ if ( $rOpts->{'indent-only'}
+ || !$rOpts->{'add-newlines'}
+ || !$rOpts->{'delete-old-newlines'} )
+ {
+ Warn(<<EOM);
+-----------------------------------------------------------------------
+Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
+
+The -lp indentation logic requires that perltidy be able to coordinate
+arbitrarily large numbers of line breakpoints. This isn't possible
+with these flags.
+-----------------------------------------------------------------------
+EOM
+ $rOpts->{'line-up-parentheses'} = 0;
+ $rOpts->{'extended-line-up-parentheses'} = 0;
+ }
- # Manipulate blank lines around keyword groups (kgb* flags)
- # Scan all lines looking for runs of consecutive lines beginning with
- # selected keywords. Example keywords are 'my', 'our', 'local', ... but
- # they may be anything. We will set flags requesting that blanks be
- # inserted around and within them according to input parameters. Note
- # that we are scanning the lines as they came in in the input stream, so
- # they are not necessarily well formatted.
+ if ( $rOpts->{'whitespace-cycle'} ) {
+ Warn(<<EOM);
+Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
+EOM
+ $rOpts->{'whitespace-cycle'} = 0;
+ }
+ }
- # The output of this sub is a return hash ref whose keys are the indexes of
- # lines after which we desire a blank line. For line index i:
- # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
- # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
- my $rhash_of_desires = {};
+ # At present, tabs are not compatible with the line-up-parentheses style
+ # (it would be possible to entab the total leading whitespace
+ # just prior to writing the line, if desired).
+ if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
- my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
- my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
- my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
- my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
+ # Likewise, tabs are not compatible with outdenting..
+ if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
+ Warn(<<EOM);
+Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
+EOM
+ $rOpts->{'tabs'} = 0;
+ }
- # A range of sizes can be input with decimal notation like 'min.max' with
- # any number of dots between the two numbers. Examples:
- # string => min max matches
- # 1.1 1 1 exactly 1
- # 1.3 1 3 1,2, or 3
- # 1..3 1 3 1,2, or 3
- # 5 5 - 5 or more
- # 6. 6 - 6 or more
- # .2 - 2 up to 2
- # 1.0 1 0 nothing
- my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
- if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
- || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
- {
+ if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
-Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
-ignoring all -kgb flags
+Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
- return $rhash_of_desires;
+ $rOpts->{'tabs'} = 0;
}
- $Opt_size_min = 1 unless ($Opt_size_min);
- if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
- return $rhash_of_desires;
+ if ( !$rOpts->{'space-for-semicolon'} ) {
+ $want_left_space{'f'} = -1;
}
- # codes for $Opt_blanks_before and $Opt_blanks_after:
- # 0 = never (delete if exist)
- # 1 = stable (keep unchanged)
- # 2 = always (insert if missing)
+ if ( $rOpts->{'space-terminal-semicolon'} ) {
+ $want_left_space{';'} = 1;
+ }
- return $rhash_of_desires
- unless $Opt_size_min > 0
- && ( $Opt_blanks_before != 1
- || $Opt_blanks_after != 1
- || $Opt_blanks_inside
- || $Opt_blanks_delete );
+ # We should put an upper bound on any -sil=n value. Otherwise enormous
+ # files could be created by mistake.
+ for ( $rOpts->{'starting-indentation-level'} ) {
+ if ( $_ && $_ > 100 ) {
+ Warn(<<EOM);
+The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
+EOM
+ $_ = 0;
+ }
+ }
- my $Opt_pattern = $keyword_group_list_pattern;
- my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
- my $Opt_repeat_count =
- $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
-
- my $rlines = $self->{rlines};
- my $rLL = $self->{rLL};
- my $K_closing_container = $self->{K_closing_container};
+ # implement outdenting preferences for keywords
+ %outdent_keyword = ();
+ my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
+ unless (@okw) {
+ @okw = qw(next last redo goto return); # defaults
+ }
- # variables for the current group and subgroups:
- my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
- @subgroup );
+ # FUTURE: if not a keyword, assume that it is an identifier
+ foreach (@okw) {
+ if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
+ $outdent_keyword{$_} = 1;
+ }
+ else {
+ Warn("ignoring '$_' in -okwl list; not a perl keyword");
+ }
+ }
- # Definitions:
- # ($ibeg, $iend) = starting and ending line indexes of this entire group
- # $count = total number of keywords seen in this entire group
- # $level_beg = indententation level of this group
- # @group = [ $i, $token, $count ] =list of all keywords & blanks
- # @subgroup = $j, index of group where token changes
- # @iblanks = line indexes of blank lines in input stream in this group
- # where i=starting line index
- # token (the keyword)
- # count = number of this token in this subgroup
- # j = index in group where token changes
- #
- # These vars will contain values for the most recently seen line:
- my ( $line_type, $CODE_type, $K_first, $K_last );
+ # setup hash for -kpit option
+ %keyword_paren_inner_tightness = ();
+ my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
+ if ( defined($kpit_value) && $kpit_value != 1 ) {
+ my @kpit =
+ split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
+ unless (@kpit) {
+ @kpit = qw(if elsif unless while until for foreach); # defaults
+ }
- my $number_of_groups_seen = 0;
+ # we will allow keywords and user-defined identifiers
+ foreach (@kpit) {
+ $keyword_paren_inner_tightness{$_} = $kpit_value;
+ }
+ }
- ####################
- # helper subroutines
- ####################
+ # implement user whitespace preferences
+ if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
+ @want_left_space{@q} = (1) x scalar(@q);
+ }
- my $insert_blank_after = sub {
- my ($i) = @_;
- $rhash_of_desires->{$i} = 1;
- my $ip = $i + 1;
- if ( defined( $rhash_of_desires->{$ip} )
- && $rhash_of_desires->{$ip} == 2 )
- {
- $rhash_of_desires->{$ip} = 0;
- }
- return;
- };
+ if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
+ @want_right_space{@q} = (1) x scalar(@q);
+ }
- my $split_into_sub_groups = sub {
+ if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
+ @want_left_space{@q} = (-1) x scalar(@q);
+ }
- # place blanks around long sub-groups of keywords
- # ...if requested
- return unless ($Opt_blanks_inside);
+ if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
+ @want_right_space{@q} = (-1) x scalar(@q);
+ }
+ if ( $rOpts->{'dump-want-left-space'} ) {
+ dump_want_left_space(*STDOUT);
+ Exit(0);
+ }
- # loop over sub-groups, index k
- push @subgroup, scalar @group;
- my $kbeg = 1;
- my $kend = @subgroup - 1;
- for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
+ if ( $rOpts->{'dump-want-right-space'} ) {
+ dump_want_right_space(*STDOUT);
+ Exit(0);
+ }
- # index j runs through all keywords found
- my $j_b = $subgroup[ $k - 1 ];
- my $j_e = $subgroup[$k] - 1;
+ # default keywords for which space is introduced before an opening paren
+ # (at present, including them messes up vertical alignment)
+ my @sak = qw(my local our and or xor err eq ne if else elsif until
+ unless while for foreach return switch case given when catch);
+ %space_after_keyword = map { $_ => 1 } @sak;
- # index i is the actual line number of a keyword
- my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
- my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
- my $num = $count_e - $count_b + 1;
+ # first remove any or all of these if desired
+ if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
- # This subgroup runs from line $ib to line $ie-1, but may contain
- # blank lines
- if ( $num >= $Opt_size_min ) {
+ # -nsak='*' selects all the above keywords
+ if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
+ @space_after_keyword{@q} = (0) x scalar(@q);
+ }
- # if there are blank lines, we require that at least $num lines
- # be non-blank up to the boundary with the next subgroup.
- my $nog_b = my $nog_e = 1;
- if ( @iblanks && !$Opt_blanks_delete ) {
- my $j_bb = $j_b + $num - 1;
- my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
- $nog_b = $count_bb - $count_b + 1 == $num;
+ # then allow user to add to these defaults
+ if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
+ @space_after_keyword{@q} = (1) x scalar(@q);
+ }
- my $j_ee = $j_e - ( $num - 1 );
- my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
- $nog_e = $count_e - $count_ee + 1 == $num;
- }
- if ( $nog_b && $k > $kbeg ) {
- $insert_blank_after->( $i_b - 1 );
- }
- if ( $nog_e && $k < $kend ) {
- my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
- $insert_blank_after->( $i_ep - 1 );
- }
+ # implement user break preferences
+ my $break_after = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
}
}
- };
-
- my $delete_if_blank = sub {
- my ($i) = @_;
-
- # delete line $i if it is blank
- return unless ( $i >= 0 && $i < @{$rlines} );
- my $line_type = $rlines->[$i]->{_line_type};
- return if ( $line_type ne 'CODE' );
- my $code_type = $rlines->[$i]->{_code_type};
- if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
return;
};
- my $delete_inner_blank_lines = sub {
-
- # always remove unwanted trailing blank lines from our list
- return unless (@iblanks);
- while ( my $ibl = pop(@iblanks) ) {
- if ( $ibl < $iend ) { push @iblanks, $ibl; last }
- $iend = $ibl;
+ my $break_before = sub {
+ my @toks = @_;
+ foreach my $tok (@toks) {
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
}
+ return;
+ };
- # now mark mark interior blank lines for deletion if requested
- return unless ($Opt_blanks_delete);
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
- while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
- };
+ # make note if breaks are before certain key types
+ %want_break_before = ();
+ foreach my $tok ( @all_operators, ',' ) {
+ $want_break_before{$tok} =
+ $left_bond_strength{$tok} < $right_bond_strength{$tok};
+ }
- my $end_group = sub {
+ # Coordinate ?/: breaks, which must be similar
+ if ( !$want_break_before{':'} ) {
+ $want_break_before{'?'} = $want_break_before{':'};
+ $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
+ $left_bond_strength{'?'} = NO_BREAK;
+ }
- # end a group of keywords
- my ($bad_ending) = @_;
- if ( defined($ibeg) && $ibeg >= 0 ) {
+ # Only make a hash entry for the next parameters if values are defined.
+ # That allows a quick check to be made later.
+ %break_before_container_types = ();
+ for ( $rOpts->{'break-before-hash-brace'} ) {
+ $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-square-bracket'} ) {
+ $break_before_container_types{'['} = $_ if $_ && $_ > 0;
+ }
+ for ( $rOpts->{'break-before-paren'} ) {
+ $break_before_container_types{'('} = $_ if $_ && $_ > 0;
+ }
- # then handle sufficiently large groups
- if ( $count >= $Opt_size_min ) {
+ #--------------------------------------------------------------
+ # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266)
+ #--------------------------------------------------------------
+ # The -vmll and -lp parameters do not really work well together.
+ # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable).
+ # NOTE: we could make this more precise by looking at any exclusion
+ # flags for -lp, and allowing -bbx=2 for excluded types.
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'ignore-old-breakpoints'}
+ && $rOpts->{'line-up-parentheses'} )
+ {
+ my @changed;
+ foreach my $key ( keys %break_before_container_types ) {
+ if ( $break_before_container_types{$key} == 2 ) {
+ $break_before_container_types{$key} = 1;
+ push @changed, $key;
+ }
+ }
+ if (@changed) {
- $number_of_groups_seen++;
+ # we could write a warning here
+ }
+ }
- # do any blank deletions regardless of the count
- $delete_inner_blank_lines->();
+ #-------------------------------------------------------------------
+ # The combination -xlp and -vmll can be unstable unless -iscl is set
+ #-------------------------------------------------------------------
+ # This is a temporary fix for issue b1302. See also b1306, b1310.
+ # FIXME: look for a better fix.
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'extended-line-up-parentheses'}
+ && !$rOpts->{'ignore-side-comment-lengths'} )
+ {
+ $rOpts->{'ignore-side-comment-lengths'} = 1;
- if ( $ibeg > 0 ) {
- my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
+ # we could write a warning here
+ }
- # patch for hash bang line which is not currently marked as
- # a comment; mark it as a comment
- if ( $ibeg == 1 && !$code_type ) {
- my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
- $code_type = 'BC'
- if ( $line_text && $line_text =~ /^#/ );
- }
+ #-----------------------------------------------------------
+ # The combination -lp -vmll can be unstable if -ci<2 (b1267)
+ #-----------------------------------------------------------
+ # The -vmll and -lp parameters do not really work well together.
+ # This is a very crude fix for an unusual parameter combination.
+ if ( $rOpts->{'variable-maximum-line-length'}
+ && $rOpts->{'line-up-parentheses'}
+ && $rOpts->{'continuation-indentation'} < 2 )
+ {
+ $rOpts->{'continuation-indentation'} = 2;
+ ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
+ }
- # Do not insert a blank after a comment
- # (this could be subject to a flag in the future)
- if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
- if ( $Opt_blanks_before == INSERT ) {
- $insert_blank_after->( $ibeg - 1 );
+ %container_indentation_options = ();
+ foreach my $pair (
+ [ 'break-before-hash-brace-and-indent', '{' ],
+ [ 'break-before-square-bracket-and-indent', '[' ],
+ [ 'break-before-paren-and-indent', '(' ],
+ )
+ {
+ my ( $key, $tok ) = @{$pair};
+ my $opt = $rOpts->{$key};
+ if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
+ {
- }
- elsif ( $Opt_blanks_before == DELETE ) {
- $delete_if_blank->( $ibeg - 1 );
- }
- }
+ # (1) -lp is not compatable with opt=2, silently set to opt=0
+ # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
+ if ( $opt == 2 ) {
+ if ( $rOpts->{'line-up-parentheses'}
+ || $rOpts->{'indent-columns'} ==
+ $rOpts->{'continuation-indentation'} )
+ {
+ $opt = 0;
}
+ }
+ $container_indentation_options{$tok} = $opt;
+ }
+ }
- # We will only put blanks before code lines. We could loosen
- # this rule a little, but we have to be very careful because
- # for example we certainly don't want to drop a blank line
- # after a line like this:
- # my $var = <<EOM;
- if ( $line_type eq 'CODE' && defined($K_first) ) {
+ # Define here tokens which may follow the closing brace of a do statement
+ # on the same line, as in:
+ # } while ( $something);
+ my @dof = qw(until while unless if ; : );
+ push @dof, ',';
+ @is_do_follower{@dof} = (1) x scalar(@dof);
- # - Do not put a blank before a line of different level
- # - Do not put a blank line if we ended the search badly
- # - Do not put a blank at the end of the file
- # - Do not put a blank line before a hanging side comment
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # What tokens may follow the closing brace of an if or elsif block?
+ # Not used. Previously used for cuddled else, but no longer needed.
+ %is_if_brace_follower = ();
- if ( $level == $level_beg
- && $ci_level == 0
- && !$bad_ending
- && $iend < @{$rlines}
- && $CODE_type ne 'HSC' )
- {
- if ( $Opt_blanks_after == INSERT ) {
- $insert_blank_after->($iend);
- }
- elsif ( $Opt_blanks_after == DELETE ) {
- $delete_if_blank->( $iend + 1 );
- }
- }
- }
- }
- $split_into_sub_groups->();
- }
+ # nothing can follow the closing curly of an else { } block:
+ %is_else_brace_follower = ();
- # reset for another group
- $ibeg = -1;
- $iend = undef;
- $level_beg = -1;
- $K_closing = undef;
- @group = ();
- @subgroup = ();
- @iblanks = ();
- };
+ # what can follow a multi-line anonymous sub definition closing curly:
+ my @asf = qw# ; : => or and && || ~~ !~~ ) #;
+ push @asf, ',';
+ @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
- my $find_container_end = sub {
+ # what can follow a one-line anonymous sub closing curly:
+ # one-line anonymous subs also have ']' here...
+ # see tk3.t and PP.pm
+ my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
+ push @asf1, ',';
+ @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
- # If the keyword lines ends with an open token, find the closing token
- # '$K_closing' so that we can easily skip past the contents of the
- # container.
- return if ( $K_last <= $K_first );
- my $KK = $K_last;
- my $type_last = $rLL->[$KK]->[_TYPE_];
- my $tok_last = $rLL->[$KK]->[_TOKEN_];
- if ( $type_last eq '#' ) {
- $KK = $self->K_previous_nonblank($KK);
- $tok_last = $rLL->[$KK]->[_TOKEN_];
- }
- if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
+ # What can follow a closing curly of a block
+ # which is not an if/elsif/else/do/sort/map/grep/eval/sub
+ # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
+ my @obf = qw# ; : => or and && || ) #;
+ push @obf, ',';
+ @is_other_brace_follower{@obf} = (1) x scalar(@obf);
+
+ $right_bond_strength{'{'} = WEAK;
+ $left_bond_strength{'{'} = VERY_STRONG;
+
+ # make -l=0 equal to -l=infinite
+ if ( !$rOpts->{'maximum-line-length'} ) {
+ $rOpts->{'maximum-line-length'} = 1000000;
+ }
+
+ # make -lbl=0 equal to -lbl=infinite
+ if ( !$rOpts->{'long-block-line-count'} ) {
+ $rOpts->{'long-block-line-count'} = 1000000;
+ }
+
+ my $ole = $rOpts->{'output-line-ending'};
+ if ($ole) {
+ my %endings = (
+ dos => "\015\012",
+ win => "\015\012",
+ mac => "\015",
+ unix => "\012",
+ );
+
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- my $lev = $rLL->[$KK]->[_LEVEL_];
- if ( $lev == $level_beg ) {
- $K_closing = $K_closing_container->{$type_sequence};
+ if ( defined( $endings_inverted{$ole} ) ) {
+
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Die(<<EOM);
+Unrecognized line ending '$ole'; expecting one of: $str
+EOM
+ }
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Warn("Ignoring -ple; conflicts with -ole\n");
+ $rOpts->{'preserve-line-endings'} = undef;
}
}
- };
+ }
- my $add_to_group = sub {
- my ( $i, $token, $level ) = @_;
+ # hashes used to simplify setting whitespace
+ %tightness = (
+ '{' => $rOpts->{'brace-tightness'},
+ '}' => $rOpts->{'brace-tightness'},
+ '(' => $rOpts->{'paren-tightness'},
+ ')' => $rOpts->{'paren-tightness'},
+ '[' => $rOpts->{'square-bracket-tightness'},
+ ']' => $rOpts->{'square-bracket-tightness'},
+ );
+ %matching_token = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '?' => ':',
+ );
- # End the previous group if we have reached the maximum
- # group size
- if ( $Opt_size_max && @group >= $Opt_size_max ) {
- $end_group->();
+ if ( $rOpts->{'ignore-old-breakpoints'} ) {
+
+ my @conflicts;
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ $rOpts->{'break-at-old-method-breakpoints'} = 0;
+ push @conflicts, '--break-at-old-method-breakpoints (-bom)';
+ }
+ if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
+ $rOpts->{'break-at-old-comma-breakpoints'} = 0;
+ push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
+ }
+ if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
+ $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
+ push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
+ }
+ if ( $rOpts->{'keep-old-breakpoints-before'} ) {
+ $rOpts->{'keep-old-breakpoints-before'} = "";
+ push @conflicts, '--keep-old-breakpoints-before (-kbb)';
+ }
+ if ( $rOpts->{'keep-old-breakpoints-after'} ) {
+ $rOpts->{'keep-old-breakpoints-after'} = "";
+ push @conflicts, '--keep-old-breakpoints-after (-kba)';
}
- if ( @group == 0 ) {
- $ibeg = $i;
- $level_beg = $level;
- $count = 0;
+ if (@conflicts) {
+ my $msg = join( "\n ",
+" Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
+ @conflicts )
+ . "\n";
+ Warn($msg);
}
- $count++;
- $iend = $i;
+ # Note: These additional parameters are made inactive by -iob.
+ # They are silently turned off here because they are on by default.
+ # We would generate unexpected warnings if we issued a warning.
+ $rOpts->{'break-at-old-keyword-breakpoints'} = 0;
+ $rOpts->{'break-at-old-logical-breakpoints'} = 0;
+ $rOpts->{'break-at-old-ternary-breakpoints'} = 0;
+ $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
+ }
- # New sub-group?
- if ( !@group || $token ne $group[-1]->[1] ) {
- push @subgroup, scalar(@group);
- }
- push @group, [ $i, $token, $count ];
+ %keep_break_before_type = ();
+ initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
+ 'kbb', \%keep_break_before_type );
- # remember if this line ends in an open container
- $find_container_end->();
+ %keep_break_after_type = ();
+ initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
+ 'kba', \%keep_break_after_type );
- return;
- };
+ #------------------------------------------------------------
+ # Make global vars for frequently used options for efficiency
+ #------------------------------------------------------------
- ###################################
- # loop over all lines of the source
- ###################################
- $end_group->();
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ $rOpts_add_newlines = $rOpts->{'add-newlines'};
+ $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
+ $rOpts_blank_lines_after_opening_block =
+ $rOpts->{'blank-lines-after-opening-block'};
+ $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
+ $rOpts_block_brace_vertical_tightness =
+ $rOpts->{'block-brace-vertical-tightness'};
+ $rOpts_break_after_labels = $rOpts->{'break-after-labels'};
+ $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
+ $rOpts_break_at_old_comma_breakpoints =
+ $rOpts->{'break-at-old-comma-breakpoints'};
+ $rOpts_break_at_old_keyword_breakpoints =
+ $rOpts->{'break-at-old-keyword-breakpoints'};
+ $rOpts_break_at_old_logical_breakpoints =
+ $rOpts->{'break-at-old-logical-breakpoints'};
+ $rOpts_break_at_old_semicolon_breakpoints =
+ $rOpts->{'break-at-old-semicolon-breakpoints'};
+ $rOpts_break_at_old_ternary_breakpoints =
+ $rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_break_open_paren_list = $rOpts->{'break-open-paren-list'};
+ $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
+ $rOpts_closing_side_comment_else_flag =
+ $rOpts->{'closing-side-comment-else-flag'};
+ $rOpts_closing_side_comment_maximum_text =
+ $rOpts->{'closing-side-comment-maximum-text'};
+ $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
+ $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
+ $rOpts_delete_closing_side_comments =
+ $rOpts->{'delete-closing-side-comments'};
+ $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
+ $rOpts_extended_continuation_indentation =
+ $rOpts->{'extended-continuation-indentation'};
+ $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
+ $rOpts_function_paren_vertical_alignment =
+ $rOpts->{'function-paren-vertical-alignment'};
+ $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_ignore_side_comment_lengths =
+ $rOpts->{'ignore-side-comment-lengths'};
+ $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
+ $rOpts_indent_columns = $rOpts->{'indent-columns'};
+ $rOpts_indent_only = $rOpts->{'indent-only'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
+ $rOpts_extended_line_up_parentheses =
+ $rOpts->{'extended-line-up-parentheses'};
+ $rOpts_logical_padding = $rOpts->{'logical-padding'};
+ $rOpts_maximum_consecutive_blank_lines =
+ $rOpts->{'maximum-consecutive-blank-lines'};
+ $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
+ $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ $rOpts_opening_brace_always_on_right =
+ $rOpts->{'opening-brace-always-on-right'};
+ $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
+ $rOpts_outdent_labels = $rOpts->{'outdent-labels'};
+ $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
+ $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
+ $rOpts_outdent_static_block_comments =
+ $rOpts->{'outdent-static-block-comments'};
+ $rOpts_recombine = $rOpts->{'recombine'};
+ $rOpts_short_concatenation_item_length =
+ $rOpts->{'short-concatenation-item-length'};
+ $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
+ $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
+ $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'};
+ $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
+ $rOpts_tee_pod = $rOpts->{'tee-pod'};
+ $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
+ $rOpts_valign = $rOpts->{'valign'};
+ $rOpts_valign_code = $rOpts->{'valign-code'};
+ $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
- $i++;
- last
- if ( $Opt_repeat_count > 0
- && $number_of_groups_seen >= $Opt_repeat_count );
+ # Note that both opening and closing tokens can access the opening
+ # and closing flags of their container types.
+ %opening_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness'},
+ '{' => $rOpts->{'brace-vertical-tightness'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness'},
+ ')' => $rOpts->{'paren-vertical-tightness'},
+ '}' => $rOpts->{'brace-vertical-tightness'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness'},
+ );
- $CODE_type = "";
- $K_first = undef;
- $K_last = undef;
- $line_type = $line_of_tokens->{_line_type};
+ %closing_vertical_tightness = (
+ '(' => $rOpts->{'paren-vertical-tightness-closing'},
+ '{' => $rOpts->{'brace-vertical-tightness-closing'},
+ '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ ')' => $rOpts->{'paren-vertical-tightness-closing'},
+ '}' => $rOpts->{'brace-vertical-tightness-closing'},
+ ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
+ );
- # always end a group at non-CODE
- if ( $line_type ne 'CODE' ) { $end_group->(); next }
+ # assume flag for '>' same as ')' for closing qw quotes
+ %closing_token_indentation = (
+ ')' => $rOpts->{'closing-paren-indentation'},
+ '}' => $rOpts->{'closing-brace-indentation'},
+ ']' => $rOpts->{'closing-square-bracket-indentation'},
+ '>' => $rOpts->{'closing-paren-indentation'},
+ );
- $CODE_type = $line_of_tokens->{_code_type};
+ # flag indicating if any closing tokens are indented
+ $some_closing_token_indentation =
+ $rOpts->{'closing-paren-indentation'}
+ || $rOpts->{'closing-brace-indentation'}
+ || $rOpts->{'closing-square-bracket-indentation'}
+ || $rOpts->{'indent-closing-brace'};
- # end any group at a format skipping line
- if ( $CODE_type && $CODE_type eq 'FS' ) {
- $end_group->();
- next;
- }
+ %opening_token_right = (
+ '(' => $rOpts->{'opening-paren-right'},
+ '{' => $rOpts->{'opening-hash-brace-right'},
+ '[' => $rOpts->{'opening-square-bracket-right'},
+ );
- # continue in a verbatim (VB) type; it may be quoted text
- if ( $CODE_type eq 'VB' ) {
- if ( $ibeg >= 0 ) { $iend = $i; }
- next;
- }
+ %stack_opening_token = (
+ '(' => $rOpts->{'stack-opening-paren'},
+ '{' => $rOpts->{'stack-opening-hash-brace'},
+ '[' => $rOpts->{'stack-opening-square-bracket'},
+ );
- # and continue in blank (BL) types
- if ( $CODE_type eq 'BL' ) {
- if ( $ibeg >= 0 ) {
- $iend = $i;
- push @{iblanks}, $i;
+ %stack_closing_token = (
+ ')' => $rOpts->{'stack-closing-paren'},
+ '}' => $rOpts->{'stack-closing-hash-brace'},
+ ']' => $rOpts->{'stack-closing-square-bracket'},
+ );
- # propagate current subgroup token
- my $tok = $group[-1]->[1];
- push @group, [ $i, $tok, $count ];
+ # Create a table of maximum line length vs level for later efficient use.
+ # We will make the tables very long to be sure it will not be exceeded.
+ # But we have to choose a fixed length. A check will be made at the start
+ # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
+ # my standard test problems have indentation levels of about 150, so this
+ # should be fairly large. If the choice of a maximum level ever becomes
+ # an issue then these table values could be returned in a sub with a simple
+ # memoization scheme.
+
+ # Also create a table of the maximum spaces available for text due to the
+ # level only. If a line has continuation indentation, then that space must
+ # be subtracted from the table value. This table is used for preliminary
+ # estimates in welding, extended_ci, BBX, and marking short blocks.
+ my $level_max = 1000;
+
+ # The basic scheme:
+ foreach my $level ( 0 .. $level_max ) {
+ my $indent = $level * $rOpts_indent_columns;
+ $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
+ $maximum_text_length_at_level[$level] =
+ $rOpts_maximum_line_length - $indent;
+ }
+
+ # Correct the maximum_text_length table if the -wc=n flag is used
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+ if ($rOpts_whitespace_cycle) {
+ if ( $rOpts_whitespace_cycle > 0 ) {
+ foreach my $level ( 0 .. $level_max ) {
+ my $level_mod = $level % $rOpts_whitespace_cycle;
+ my $indent = $level_mod * $rOpts_indent_columns;
+ $maximum_text_length_at_level[$level] =
+ $rOpts_maximum_line_length - $indent;
}
- next;
}
+ else {
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
+ }
+ }
- # examine the first token of this line
- my $rK_range = $line_of_tokens->{_rK_range};
- ( $K_first, $K_last ) = @{$rK_range};
- if ( !defined($K_first) ) {
-
- # Unexpected blank line..shouldn't happen
- # $rK_range should be defined for line type CODE
- Warn(
-"Programming Error: Unexpected Blank Line in sub 'keyword_group_scan'. Ignoring"
- );
- return $rhash_of_desires;
+ # Correct the tables if the -vmll flag is used. These values override the
+ # previous values.
+ if ($rOpts_variable_maximum_line_length) {
+ foreach my $level ( 0 .. $level_max ) {
+ $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
+ $maximum_line_length_at_level[$level] =
+ $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
+ }
+ }
+
+ # Define two measures of indentation level, alpha and beta, at which some
+ # formatting features come under stress and need to start shutting down.
+ # Some combination of the two will be used to shut down different
+ # formatting features.
+ # Put a reasonable upper limit on stress level (say 100) in case the
+ # whitespace-cycle variable is used.
+ my $stress_level_limit = min( 100, $level_max );
+
+ # Find stress_level_alpha, targeted at very short maximum line lengths.
+ $stress_level_alpha = $stress_level_limit + 1;
+ foreach my $level_test ( 0 .. $stress_level_limit ) {
+ my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
+ my $excess_inside_space =
+ $max_len -
+ $rOpts_continuation_indentation -
+ $rOpts_indent_columns - 8;
+ if ( $excess_inside_space <= 0 ) {
+ $stress_level_alpha = $level_test;
+ last;
}
+ }
- my $level = $rLL->[$K_first]->[_LEVEL_];
- my $type = $rLL->[$K_first]->[_TYPE_];
- my $token = $rLL->[$K_first]->[_TOKEN_];
- my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
+ # Find stress level beta, a stress level targeted at formatting
+ # at deep levels near the maximum line length. We start increasing
+ # from zero and stop at the first level which shows no more space.
- # see if this is a code type we seek (i.e. comment)
- if ( $CODE_type
- && $Opt_comment_pattern
- && $CODE_type =~ /$Opt_comment_pattern/o )
- {
+ # 'const' is a fixed number of spaces for a typical variable.
+ # Cases b1197-b1204 work ok with const=12 but not with const=8
+ my $const = 16;
+ my $denom = max( 1, $rOpts_indent_columns );
+ $stress_level_beta = 0;
+ foreach my $level ( 0 .. $stress_level_limit ) {
+ my $remaining_cycles = max(
+ 0,
+ (
+ $maximum_text_length_at_level[$level] -
+ $rOpts_continuation_indentation - $const
+ ) / $denom
+ );
+ last if ( $remaining_cycles <= 3 ); # 2 does not work
+ $stress_level_beta = $level;
+ }
- my $tok = $CODE_type;
+ initialize_weld_nested_exclusion_rules($rOpts);
- # Continuing a group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $tok, $level );
- }
+ %line_up_parentheses_control_hash = ();
+ $line_up_parentheses_control_is_lxpl = 1;
+ my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
+ my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
+ if ( $lpxl && $lpil ) {
+ Warn( <<EOM );
+You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
+EOM
+ }
+ if ($lpxl) {
+ $line_up_parentheses_control_is_lxpl = 1;
+ initialize_line_up_parentheses_control_hash(
+ $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
+ }
+ elsif ($lpil) {
+ $line_up_parentheses_control_is_lxpl = 0;
+ initialize_line_up_parentheses_control_hash(
+ $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
+ }
- # Start new group
- else {
+ return;
+}
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
- $add_to_group->( $i, $tok, $level );
- }
- next;
- }
+use constant ALIGN_GREP_ALIASES => 0;
- # See if it is a keyword we seek, but never start a group in a
- # continuation line; the code may be badly formatted.
- if ( $ci_level == 0
- && $type eq 'k'
- && $token =~ /$Opt_pattern/o )
- {
+sub initialize_grep_and_friends {
+ my ($str) = @_;
- # Continuing a keyword group
- if ( $ibeg >= 0 && $level == $level_beg ) {
- $add_to_group->( $i, $token, $level );
- }
+ # Initialize or re-initialize hashes with 'grep' and grep aliases. This
+ # must be done after each set of options because new grep aliases may be
+ # used.
- # Start new keyword group
- else {
+ # re-initialize the hash ... this is critical!
+ %is_sort_map_grep = ();
- # first end old group if any; we might be starting new
- # keywords at different level
- if ( $ibeg > 0 ) { $end_group->(); }
- $add_to_group->( $i, $token, $level );
- }
+ my @q = qw(sort map grep);
+ @is_sort_map_grep{@q} = (1) x scalar(@q);
+
+ # Note that any 'grep-alias-list' string has been preprocessed to be a
+ # trimmed, space-separated list.
+ my @grep_aliases = split /\s+/, $str;
+ @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases);
+
+ ##@q = qw(sort map grep eval);
+ %is_sort_map_grep_eval = %is_sort_map_grep;
+ $is_sort_map_grep_eval{'eval'} = 1;
+
+ ##@q = qw(sort map grep eval do);
+ %is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
+ $is_sort_map_grep_eval_do{'do'} = 1;
+
+ # These block types can take ci. This is used by the -xci option.
+ # Note that the 'sub' in this list is an anonymous sub. To be more correct
+ # we could remove sub and use ASUB pattern to also handle a
+ # prototype/signature. But that would slow things down and would probably
+ # never be useful.
+ ##@q = qw( do sub eval sort map grep );
+ %is_block_with_ci = %is_sort_map_grep_eval_do;
+ $is_block_with_ci{'sub'} = 1;
+
+ %is_keyword_returning_list = ();
+ @q = qw(
+ grep
+ keys
+ map
+ reverse
+ sort
+ split
+ );
+ push @q, @grep_aliases;
+ @is_keyword_returning_list{@q} = (1) x scalar(@q);
+
+ # This code enables vertical alignment of grep aliases for testing. It has
+ # not been found to be beneficial, so it is off by default. But it is
+ # useful for precise testing of the grep alias coding.
+ if (ALIGN_GREP_ALIASES) {
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
+ foreach (@q) {
+ $block_type_map{$_} = 'map' unless ( $_ eq 'map' );
+ }
+ }
+ return;
+}
+
+sub initialize_weld_nested_exclusion_rules {
+ my ($rOpts) = @_;
+ %weld_nested_exclusion_rules = ();
+
+ my $opt_name = 'weld-nested-exclusion-list';
+ my $str = $rOpts->{$opt_name};
+ return unless ($str);
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//;
+ return unless ($str);
+
+ # There are four container tokens.
+ my %token_keys = (
+ '(' => '(',
+ '[' => '[',
+ '{' => '{',
+ 'q' => 'q',
+ );
+
+ # We are parsing an exclusion list for nested welds. The list is a string
+ # with spaces separating any number of items. Each item consists of three
+ # pieces of information:
+ # <optional position> <optional type> <type of container>
+ # < ^ or . > < k or K > < ( [ { >
+
+ # The last character is the required container type and must be one of:
+ # ( = paren
+ # [ = square bracket
+ # { = brace
+
+ # An optional leading position indicator:
+ # ^ means the leading token position in the weld
+ # . means a secondary token position in the weld
+ # no position indicator means all positions match
+
+ # An optional alphanumeric character between the position and container
+ # token selects to which the rule applies:
+ # k = any keyword
+ # K = any non-keyword
+ # f = function call
+ # F = not a function call
+ # w = function or keyword
+ # W = not a function or keyword
+ # no letter means any preceding type matches
+
+ # Examples:
+ # ^( - the weld must not start with a paren
+ # .( - the second and later tokens may not be parens
+ # ( - no parens in weld
+ # ^K( - exclude a leading paren not preceded by a keyword
+ # .k( - exclude a secondary paren preceded by a keyword
+ # [ { - exclude all brackets and braces
+
+ my @items = split /\s+/, $str;
+ my $msg1;
+ my $msg2;
+ foreach my $item (@items) {
+ my $item_save = $item;
+ my $tok = chop($item);
+ my $key = $token_keys{$tok};
+ if ( !defined($key) ) {
+ $msg1 .= " '$item_save'";
next;
}
+ if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
+ $weld_nested_exclusion_rules{$key} = [];
+ }
+ my $rflags = $weld_nested_exclusion_rules{$key};
- # This is not one of our keywords, but we are in a keyword group
- # so see if we should continue or quit
- elsif ( $ibeg >= 0 ) {
+ # A 'q' means do not weld quotes
+ if ( $tok eq 'q' ) {
+ $rflags->[0] = '*';
+ $rflags->[1] = '*';
+ next;
+ }
- # - bail out on a large level change; we may have walked into a
- # data structure or anoymous sub code.
- if ( $level > $level_beg + 1 || $level < $level_beg ) {
- $end_group->();
+ my $pos = '*';
+ my $select = '*';
+ if ($item) {
+ if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
+ $pos = $1 if ($1);
+ $select = $2 if ($2);
+ }
+ else {
+ $msg1 .= " '$item_save'";
next;
}
+ }
- # - keep going on a continuation line of the same level, since
- # it is probably a continuation of our previous keyword,
- # - and keep going past hanging side comments because we never
- # want to interrupt them.
- if ( ( ( $level == $level_beg ) && $ci_level > 0 )
- || $CODE_type eq 'HSC' )
- {
- $iend = $i;
- next;
+ my $err;
+ if ( $pos eq '^' || $pos eq '*' ) {
+ if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
+ $err = 1;
+ }
+ $rflags->[0] = $select;
+ }
+ if ( $pos eq '.' || $pos eq '*' ) {
+ if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
+ $err = 1;
}
+ $rflags->[1] = $select;
+ }
+ if ($err) { $msg2 .= " '$item_save'"; }
+ }
+ if ($msg1) {
+ Warn(<<EOM);
+Unexpecting symbol(s) encountered in --$opt_name will be ignored:
+$msg1
+EOM
+ }
+ if ($msg2) {
+ Warn(<<EOM);
+Multiple specifications were encountered in the --weld-nested-exclusion-list for:
+$msg2
+Only the last will be used.
+EOM
+ }
+ return;
+}
- # - continue if if we are within in a container which started with
- # the line of the previous keyword.
- if ( defined($K_closing) && $K_first <= $K_closing ) {
+sub initialize_line_up_parentheses_control_hash {
+ my ( $str, $opt_name ) = @_;
+ return unless ($str);
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//;
+ return unless ($str);
+
+ # The format is space separated items, where each item must consist of a
+ # string with a token type preceded by an optional text token and followed
+ # by an integer:
+ # For example:
+ # W(1
+ # = (flag1)(key)(flag2), where
+ # flag1 = 'W'
+ # key = '('
+ # flag2 = '1'
+
+ my @items = split /\s+/, $str;
+ my $msg1;
+ my $msg2;
+ foreach my $item (@items) {
+ my $item_save = $item;
+ my ( $flag1, $key, $flag2 );
+ if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
+ $flag1 = $1 if $1;
+ $key = $2 if $2;
+ $flag2 = $3 if $3;
+ }
+ else {
+ $msg1 .= " '$item_save'";
+ next;
+ }
- # continue if entire line is within container
- if ( $K_last <= $K_closing ) { $iend = $i; next }
+ if ( !defined($key) ) {
+ $msg1 .= " '$item_save'";
+ next;
+ }
- # continue at ); or }; or ];
- my $KK = $K_closing + 1;
- if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
- if ( $KK < $K_last ) {
- if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
- if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
- $end_group->(1);
- next;
- }
- }
- $iend = $i;
- next;
- }
+ # Check for valid flag1
+ if ( !defined($flag1) ) { $flag1 = '*' }
+ elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
+ $msg1 .= " '$item_save'";
+ next;
+ }
- $end_group->(1);
- next;
- }
+ # Check for valid flag2
+ # 0 or blank: ignore container contents
+ # 1 all containers with sublists match
+ # 2 all containers with sublists, code blocks or ternary operators match
+ # ... this could be extended in the future
+ if ( !defined($flag2) ) { $flag2 = 0 }
+ elsif ( $flag2 !~ /^[012]$/ ) {
+ $msg1 .= " '$item_save'";
+ next;
+ }
- # - end the group if none of the above
- $end_group->();
+ if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
+ $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
next;
}
- # not in a keyword group; continue
- else { next }
+ # check for multiple conflicting specifications
+ my $rflags = $line_up_parentheses_control_hash{$key};
+ my $err;
+ if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
+ $err = 1;
+ $rflags->[0] = $flag1;
+ }
+ if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
+ $err = 1;
+ $rflags->[1] = $flag2;
+ }
+ $msg2 .= " '$item_save'" if ($err);
+ next;
+ }
+ if ($msg1) {
+ Warn(<<EOM);
+Unexpecting symbol(s) encountered in --$opt_name will be ignored:
+$msg1
+EOM
+ }
+ if ($msg2) {
+ Warn(<<EOM);
+Multiple specifications were encountered in the $opt_name at:
+$msg2
+Only the last will be used.
+EOM
}
- # end of loop over all lines
- $end_group->();
- return $rhash_of_desires;
+ # Speedup: we can turn off -lp if it is not actually used
+ if ($line_up_parentheses_control_is_lxpl) {
+ my $all_off = 1;
+ foreach my $key (qw# ( { [ #) {
+ my $rflags = $line_up_parentheses_control_hash{$key};
+ if ( defined($rflags) ) {
+ my ( $flag1, $flag2 ) = @{$rflags};
+ if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
+ if ($flag2) { $all_off = 0; last }
+ }
+ }
+ if ($all_off) {
+ $rOpts->{'line-up-parentheses'} = "";
+ }
+ }
+
+ return;
}
-sub break_lines {
+use constant DEBUG_KB => 0;
- # Loop over old lines to set new line break points
+sub initialize_keep_old_breakpoints {
+ my ( $str, $short_name, $rkeep_break_hash ) = @_;
+ return unless $str;
- my $self = shift;
- my $rlines = $self->{rlines};
+ my %flags = ();
+ my @list = split_words($str);
+ if ( DEBUG_KB && @list ) {
+ local $" = ' ';
+ print <<EOM;
+DEBUG_KB entering for '$short_name' with str=$str\n";
+list is: @list;
+EOM
+ }
- # Note for RT#118553, leave only one newline at the end of a file.
- # Example code to do this is in comments below:
- # my $Opt_trim_ending_blank_lines = 0;
- # if ($Opt_trim_ending_blank_lines) {
- # while ( my $line_of_tokens = pop @{$rlines} ) {
- # my $line_type = $line_of_tokens->{_line_type};
- # if ( $line_type eq 'CODE' ) {
- # my $CODE_type = $line_of_tokens->{_code_type};
- # next if ( $CODE_type eq 'BL' );
- # }
- # push @{$rlines}, $line_of_tokens;
- # last;
- # }
- # }
+ # - pull out any any leading container code, like f( or *{
+ foreach (@list) {
+ if ( $_ =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
+ $_ = $2;
+ $flags{$2} = $1;
+ }
+ }
- # But while this would be a trivial update, it would have very undesirable
- # side effects when perltidy is run from within an editor on a small snippet.
- # So this is best done with a separate filter, such
- # as 'delete_ending_blank_lines.pl' in the examples folder.
+ my @unknown_types;
+ foreach my $type (@list) {
+ if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
+ push @unknown_types, $type;
+ }
+ }
- # Flag to prevent blank lines when POD occurs in a format skipping sect.
- my $in_format_skipping_section;
+ if (@unknown_types) {
+ my $num = @unknown_types;
+ local $" = ' ';
+ Warn(<<EOM);
+$num unrecognized token types were input with --$short_name :
+@unknown_types
+EOM
+ }
- # set locations for blanks around long runs of keywords
- my $rwant_blank_line_after = $self->keyword_group_scan();
+ @{$rkeep_break_hash}{@list} = (1) x scalar(@list);
- my $line_type = "";
- my $i = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $i++;
+ foreach my $key ( keys %flags ) {
+ my $flag = $flags{$key};
- # insert blank lines requested for keyword sequences
- if ( $i > 0
- && defined( $rwant_blank_line_after->{ $i - 1 } )
- && $rwant_blank_line_after->{ $i - 1 } == 1 )
- {
- $self->want_blank_line();
+ if ( length($flag) != 1 ) {
+ Warn(<<EOM);
+Multiple entries given for '$key' in '$short_name'
+EOM
+ }
+ elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
+ Warn(<<EOM);
+Unknown flag '$flag' given for '$key' in '$short_name'
+EOM
+ }
+ elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
+ Warn(<<EOM);
+Unknown flag '$flag' given for '$key' in '$short_name'
+EOM
}
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
+ $rkeep_break_hash->{$key} = $flag;
+ }
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
+ # Temporary patch and warning during changeover from using type to token for
+ # containers . This can be eliminated after one or two future releases.
+ if ( $rkeep_break_hash->{'{'}
+ && $rkeep_break_hash->{'{'} eq '1'
+ && !$rkeep_break_hash->{'('}
+ && !$rkeep_break_hash->{'['} )
+ {
+ $rkeep_break_hash->{'('} = 1;
+ $rkeep_break_hash->{'['} = 1;
+ Warn(<<EOM);
+Sorry, but the format for the -kbb and -kba flags is changing a little.
+You entered '{' which currently matches '{' '(' and '[',
+but in the future it will only match '{'.
+To prevent this message please do one of the following:
+ use '{ ( [' if you want to match all opening containers, or
+ use '(' or '[' to match just those containers, or
+ use '*{' to match only opening braces
+EOM
+ }
- # put a blank line after an =cut which comes before __END__ and __DATA__
- # (required by podchecker)
- if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
- $self->want_blank_line();
- }
- }
+ if ( $rkeep_break_hash->{'}'}
+ && $rkeep_break_hash->{'}'} eq '1'
+ && !$rkeep_break_hash->{')'}
+ && !$rkeep_break_hash->{']'} )
+ {
+ $rkeep_break_hash->{'('} = 1;
+ $rkeep_break_hash->{'['} = 1;
+ Warn(<<EOM);
+Sorry, but the format for the -kbb and -kba flags is changing a little.
+You entered '}' which currently matches each of '}' ')' and ']',
+but in the future it will only match '}'.
+To prevent this message please do one of the following:
+ use '} ) ]' if you want to match all closing containers, or
+ use ')' or ']' to match just those containers, or
+ use '*}' to match only closing braces
+EOM
+ }
- # handle line of code..
- if ( $line_type eq 'CODE' ) {
-
- my $CODE_type = $line_of_tokens->{_code_type};
- $in_format_skipping_section = $CODE_type eq 'FS';
-
- # Handle blank lines
- if ( $CODE_type eq 'BL' ) {
-
- # If keep-old-blank-lines is zero, we delete all
- # old blank lines and let the blank line rules generate any
- # needed blanks.
-
- # We also delete lines requested by the keyword-group logic
- my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
- && $rwant_blank_line_after->{$i} == 2 );
-
- # But the keep-old-blank-lines flag has priority over kgb flags
- $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
-
- if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
- $self->flush();
- $file_writer_object->write_blank_code_line(
- $rOpts_keep_old_blank_lines == 2 );
- $last_line_leading_type = 'b';
- }
- next;
- }
- else {
-
- # let logger see all non-blank lines of code
- my $output_line_number = get_output_line_number();
- black_box( $line_of_tokens, $output_line_number );
- }
-
- # Handle Format Skipping (FS) and Verbatim (VB) Lines
- if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
- $self->write_unindented_line("$input_line");
- $file_writer_object->reset_consecutive_blank_lines();
- next;
- }
-
- # Handle block comment to be deleted
- elsif ( $CODE_type eq 'DEL' ) {
- $self->flush();
- next;
- }
-
- # Handle all other lines of code
- $self->print_line_of_tokens($line_of_tokens);
- }
-
- # handle line of non-code..
- else {
-
- # set special flags
- my $skip_line = 0;
- my $tee_line = 0;
- if ( $line_type =~ /^POD/ ) {
-
- # Pod docs should have a preceding blank line. But stay
- # out of __END__ and __DATA__ sections, because
- # the user may be using this section for any purpose whatsoever
- if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
- if ( !$skip_line
- && !$in_format_skipping_section
- && $line_type eq 'POD_START'
- && !$saw_END_or_DATA_ )
- {
- $self->want_blank_line();
- }
- }
-
- # leave the blank counters in a predictable state
- # after __END__ or __DATA__
- elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $saw_END_or_DATA_ = 1;
- }
-
- # write unindented non-code line
- if ( !$skip_line ) {
- if ($tee_line) { $file_writer_object->tee_on() }
- $self->write_unindented_line($input_line);
- if ($tee_line) { $file_writer_object->tee_off() }
- }
- }
- }
- return;
-}
-
-{ ## Beginning of routine to check line hashes
-
- my %valid_line_hash;
-
- BEGIN {
-
- # These keys are defined for each line in the formatter
- # Each line must have exactly these quantities
- my @valid_line_keys = qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _rK_range
- _square_bracket_depth
- _starting_in_quote
- _ended_in_blank_token
- _code_type
-
- _ci_level_0
- _level_0
- _nesting_blocks_0
- _nesting_tokens_0
- );
-
- @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
- }
-
- sub check_line_hashes {
- my $self = shift;
- $self->check_self_hash();
- my $rlines = $self->{rlines};
- foreach my $rline ( @{$rlines} ) {
- my $iline = $rline->{_line_number};
- my $line_type = $rline->{_line_type};
- check_keys( $rline, \%valid_line_hash,
- "Checkpoint: line number =$iline, line_type=$line_type", 1 );
- }
- return;
- }
-
-} ## End check line hashes
-
-sub write_line {
-
- # We are caching tokenized lines as they arrive and converting them to the
- # format needed for the final formatting.
- my ( $self, $line_of_tokens_old ) = @_;
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- my $rlines_new = $self->{rlines};
-
- my $Kfirst;
- my $line_of_tokens = {};
- foreach my $key (
- qw(
- _curly_brace_depth
- _ending_in_quote
- _guessed_indentation_level
- _line_number
- _line_text
- _line_type
- _paren_depth
- _quote_character
- _square_bracket_depth
- _starting_in_quote
- )
- )
- {
- $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
- }
-
- # Data needed by Logger
- $line_of_tokens->{_level_0} = 0;
- $line_of_tokens->{_ci_level_0} = 0;
- $line_of_tokens->{_nesting_blocks_0} = "";
- $line_of_tokens->{_nesting_tokens_0} = "";
-
- # Needed to avoid trimming quotes
- $line_of_tokens->{_ended_in_blank_token} = undef;
-
- my $line_type = $line_of_tokens_old->{_line_type};
- my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
- if ( $line_type eq 'CODE' ) {
-
- my $rtokens = $line_of_tokens_old->{_rtokens};
- my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
- my $rblock_type = $line_of_tokens_old->{_rblock_type};
- my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
- my $rcontainer_environment =
- $line_of_tokens_old->{_rcontainer_environment};
- my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
- my $rlevels = $line_of_tokens_old->{_rlevels};
- my $rslevels = $line_of_tokens_old->{_rslevels};
- my $rci_levels = $line_of_tokens_old->{_rci_levels};
- my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
- my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
+ if ( DEBUG_KB && @list ) {
+ my @tmp = %flags;
+ local $" = ' ';
+ print <<EOM;
- my $jmax = @{$rtokens} - 1;
- if ( $jmax >= 0 ) {
- $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
- foreach my $j ( 0 .. $jmax ) {
-
- # Clip negative nesting depths to zero to avoid problems.
- # Negative values can occur in files with unbalanced containers
- my $slevel = $rslevels->[$j];
- if ( $slevel < 0 ) { $slevel = 0 }
-
- my @tokary;
- @tokary[
- _TOKEN_, _TYPE_,
- _BLOCK_TYPE_, _CONTAINER_TYPE_,
- _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
- _LEVEL_, _LEVEL_TRUE_,
- _SLEVEL_, _CI_LEVEL_,
- _LINE_INDEX_,
- ]
- = (
- $rtokens->[$j], $rtoken_type->[$j],
- $rblock_type->[$j], $rcontainer_type->[$j],
- $rcontainer_environment->[$j], $rtype_sequence->[$j],
- $rlevels->[$j], $rlevels->[$j],
- $slevel, $rci_levels->[$j],
- $input_line_no,
- );
- push @{$rLL}, \@tokary;
- }
-
- $Klimit = @{$rLL} - 1;
-
- # Need to remember if we can trim the input line
- $line_of_tokens->{_ended_in_blank_token} =
- $rtoken_type->[$jmax] eq 'b';
+DEBUG_KB -$short_name flag: $str
+final keys: @list
+special flags: @tmp
+EOM
- $line_of_tokens->{_level_0} = $rlevels->[0];
- $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
- $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
- $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
- }
}
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
- $line_of_tokens->{_code_type} = "";
- $self->{Klimit} = $Klimit;
-
- push @{$rlines_new}, $line_of_tokens;
return;
+
}
sub initialize_whitespace_hashes {
- # initialize these global hashes, which control the use of
- # whitespace around tokens:
+ # This is called once before formatting begins to initialize these global
+ # hashes, which control the use of whitespace around tokens:
#
# %binary_ws_rules
# %want_left_space
>;
push( @spaces_right_side, ',' ); # avoids warning message
- # Note that we are in a BEGIN block here. Later in processing
+ %want_left_space = ();
+ %want_right_space = ();
+ %binary_ws_rules = ();
+
+ # Note that we setting defaults here. Later in processing
# the values of %want_left_space and %want_right_space
# may be overridden by any user settings specified by the
# -wls and -wrs parameters. However the binary_whitespace_rules
$binary_ws_rules{'R'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
+ $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
$binary_ws_rules{'}'}{'L'} = WS_NO;
$binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
- # FIXME: we could to split 'i' into variables and functions
- # and have no space for functions but space for variables. For now,
- # I have a special patch in the special rules below
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
} ## end initialize_whitespace_hashes
+# The following hash is used to skip over needless if tests.
+# Be sure to update it when adding new checks in its block.
+my %is_special_ws_type;
+
+BEGIN {
+ my @q = qw(k w i C m - Q);
+ push @q, '#';
+ @is_special_ws_type{@q} = (1) x scalar(@q);
+}
+
+use constant DEBUG_WHITE => 0;
+
sub set_whitespace_flags {
- # This routine examines each pair of nonblank tokens and
- # sets a flag indicating if white space is needed.
+ # This routine is called once per file to set whitespace flags for that
+ # file. This routine examines each pair of nonblank tokens and sets a flag
+ # indicating if white space is needed.
#
- # $rwhitespace_flags->[$j] is a flag indicating whether a white space
- # BEFORE token $j is needed, with the following values:
+ # $rwhitespace_flags->[$j] is a flag indicating whether a white space
+ # BEFORE token $j is needed, with the following values:
#
- # WS_NO = -1 do not want a space before token $j
+ # WS_NO = -1 do not want a space BEFORE token $j
# WS_OPTIONAL= 0 optional space or $j is a whitespace
- # WS_YES = 1 want a space before token $j
+ # WS_YES = 1 want a space BEFORE token $j
#
my $self = shift;
- my $rLL = $self->{rLL};
- my $rwhitespace_flags = [];
+ my $rLL = $self->[_rLL_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $jmax = @{$rLL} - 1;
+
+ my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
+ my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+
+ my $rwhitespace_flags = [];
+ my $ris_function_call_paren = {};
+
+ return $rwhitespace_flags if ( $jmax < 0 );
+
+ my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
+
+ my ( $rtokh, $token, $type );
+ my ( $rtokh_last, $last_token, $last_type );
- my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
- $token, $type, $block_type, $input_line_no );
my $j_tight_closing_paren = -1;
- $token = ' ';
- $type = 'b';
- $block_type = '';
- $input_line_no = 0;
- $last_token = ' ';
- $last_type = 'b';
- $last_block_type = '';
- $last_input_line_no = 0;
+ $rtokh = [ @{ $rLL->[0] } ];
+ $token = ' ';
+ $type = 'b';
- my $jmax = @{$rLL} - 1;
+ $rtokh->[_TOKEN_] = $token;
+ $rtokh->[_TYPE_] = $type;
+ $rtokh->[_TYPE_SEQUENCE_] = '';
+ $rtokh->[_LINE_INDEX_] = 0;
my ($ws);
&& $last_token eq '{'
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
+ # Patch to count a sign separated from a number as a single token, as
+ # in the following line. Otherwise, it takes two steps to converge:
+ # deg2rad(- 0.5)
+ if ( ( $type eq 'm' || $type eq 'p' )
+ && $j < $jmax + 1
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
+ && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
+ && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
+ {
+ $j_here = $j + 2;
+ }
+
# $j_next is where a closing token should be if
# the container has a single token
if ( $j_here + 1 > $jmax ) { return (WS_NO) }
return (WS_YES);
};
- # main loop over all tokens to define the whitespace flags
- for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
+ # Local hashes to set spaces around container tokens according to their
+ # sequence numbers. These are set as keywords are examined.
+ # They are controlled by the -kpit and -kpitl flags.
+ my %opening_container_inside_ws;
+ my %closing_container_inside_ws;
+ my $set_container_ws_by_keyword = sub {
+
+ return unless (%keyword_paren_inner_tightness);
+
+ my ( $word, $sequence_number ) = @_;
+
+ # We just saw a keyword (or other function name) followed by an opening
+ # paren. Now check to see if the following paren should have special
+ # treatment for its inside space. If so we set a hash value using the
+ # sequence number as key.
+ if ( $word && $sequence_number ) {
+ my $tightness = $keyword_paren_inner_tightness{$word};
+ if ( defined($tightness) && $tightness != 1 ) {
+ my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
+ $opening_container_inside_ws{$sequence_number} = $ws_flag;
+ $closing_container_inside_ws{$sequence_number} = $ws_flag;
+ }
+ }
+ return;
+ };
- my $rtokh = $rLL->[$j];
+ my ( $ws_1, $ws_2, $ws_3, $ws_4 );
- # Set a default
- $rwhitespace_flags->[$j] = WS_OPTIONAL;
+ # main loop over all tokens to define the whitespace flags
+ for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
- if ( $rtokh->[_TYPE_] eq 'b' ) {
+ if ( $rLL->[$j]->[_TYPE_] eq 'b' ) {
+ $rwhitespace_flags->[$j] = WS_OPTIONAL;
next;
}
- # set a default value, to be changed as needed
- $ws = undef;
- $last_token = $token;
- $last_type = $type;
- $last_block_type = $block_type;
- $last_input_line_no = $input_line_no;
- $token = $rtokh->[_TOKEN_];
- $type = $rtokh->[_TYPE_];
- $block_type = $rtokh->[_BLOCK_TYPE_];
- $input_line_no = $rtokh->[_LINE_INDEX_];
+ $rtokh_last = $rtokh;
+ $last_token = $token;
+ $last_type = $type;
+
+ $rtokh = $rLL->[$j];
+ $token = $rtokh->[_TOKEN_];
+ $type = $rtokh->[_TYPE_];
+
+ $ws = undef;
#---------------------------------------------------------------
# Whitespace Rules Section 1:
# /^[L\{\(\[]$/
if ( $is_opening_type{$last_type} ) {
+ my $seqno = $rtokh->[_TYPE_SEQUENCE_];
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
+ my $last_block_type = $rblock_type_of_seqno->{$last_seqno};
+
$j_tight_closing_paren = -1;
# let us keep empty matched braces together: () {} []
$ws = $ws_in_container->($j);
}
}
- } # end setting space flag inside opening tokens
- my $ws_1;
- $ws_1 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+
+ # check for special cases which override the above rules
+ if ( %opening_container_inside_ws && $last_seqno ) {
+ my $ws_override = $opening_container_inside_ws{$last_seqno};
+ if ($ws_override) { $ws = $ws_override }
+ }
+
+ $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
+ if DEBUG_WHITE;
+
+ } ## end setting space flag inside opening tokens
#---------------------------------------------------------------
# Whitespace Rules Section 2:
# /[\}\)\]R]/
if ( $is_closing_type{$type} ) {
+ my $seqno = $rtokh->[_TYPE_SEQUENCE_];
if ( $j == $j_tight_closing_paren ) {
$j_tight_closing_paren = -1;
if ( !defined($ws) ) {
my $tightness;
+ my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $type eq '}' && $token eq '}' && $block_type ) {
$tightness = $rOpts_block_brace_tightness;
}
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
}
}
- } # end setting space flag inside closing tokens
- my $ws_2;
- $ws_2 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ # check for special cases which override the above rules
+ if ( %closing_container_inside_ws && $seqno ) {
+ my $ws_override = $closing_container_inside_ws{$seqno};
+ if ($ws_override) { $ws = $ws_override }
+ }
- #---------------------------------------------------------------
- # Whitespace Rules Section 3:
- # Use the binary rule table.
- #---------------------------------------------------------------
- if ( !defined($ws) ) {
- $ws = $binary_ws_rules{$last_type}{$type};
- }
- my $ws_3;
- $ws_3 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ $ws_4 = $ws_3 = $ws_2 = $ws
+ if DEBUG_WHITE;
+ } ## end setting space flag inside closing tokens
#---------------------------------------------------------------
- # Whitespace Rules Section 4:
+ # Whitespace Rules Section 3:
# Handle some special cases.
#---------------------------------------------------------------
- if ( $token eq '(' ) {
- # This will have to be tweaked as tokenization changes.
- # We usually want a space at '} (', for example:
- # <<snippets/space1.in>>
- # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
- #
- # But not others:
- # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
- # At present, the above & block is marked as type L/R so this case
- # won't go through here.
- if ( $last_type eq '}' ) { $ws = WS_YES }
-
- # NOTE: some older versions of Perl had occasional problems if
- # spaces are introduced between keywords or functions and opening
- # parens. So the default is not to do this except is certain
- # cases. The current Perl seems to tolerate spaces.
-
- # Space between keyword and '('
- elsif ( $last_type eq 'k' ) {
- $ws = WS_NO
- unless ( $rOpts_space_keyword_paren
- || $space_after_keyword{$last_token} );
- }
-
- # Space between function and '('
- # -----------------------------------------------------
- # 'w' and 'i' checks for something like:
- # myfun( &myfun( ->myfun(
- # -----------------------------------------------------
- elsif (( $last_type =~ /^[wUG]$/ )
- || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
- {
- $ws = WS_NO unless ($rOpts_space_function_paren);
- }
+ # /^[L\{\(\[]$/
+ elsif ( $is_opening_type{$type} ) {
+
+ if ( $token eq '(' ) {
+
+ my $seqno = $rtokh->[_TYPE_SEQUENCE_];
+
+ # This will have to be tweaked as tokenization changes.
+ # We usually want a space at '} (', for example:
+ # <<snippets/space1.in>>
+ # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
+ #
+ # But not others:
+ # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
+ # At present, the above & block is marked as type L/R so this case
+ # won't go through here.
+ if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
+
+ # NOTE: some older versions of Perl had occasional problems if
+ # spaces are introduced between keywords or functions and opening
+ # parens. So the default is not to do this except is certain
+ # cases. The current Perl seems to tolerate spaces.
+
+ # Space between keyword and '('
+ elsif ( $last_type eq 'k' ) {
+ $ws = WS_NO
+ unless ( $rOpts_space_keyword_paren
+ || $space_after_keyword{$last_token} );
+
+ # Set inside space flag if requested
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ }
+
+ # Space between function and '('
+ # -----------------------------------------------------
+ # 'w' and 'i' checks for something like:
+ # myfun( &myfun( ->myfun(
+ # -----------------------------------------------------
+
+ # Note that at this point an identifier may still have a leading
+ # arrow, but the arrow will be split off during token respacing.
+ # After that, the token may become a bare word without leading
+ # arrow. The point is, it is best to mark function call parens
+ # right here before that happens.
+ # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
+ # NOTE: this would be the place to allow spaces between repeated
+ # parens, like () () (), as in case c017, but I decided that would
+ # not be a good idea.
+ elsif (
+ ( $last_type =~ /^[wCUG]$/ )
+ || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ )
+ )
+ {
+ $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
+ $set_container_ws_by_keyword->( $last_token, $seqno );
+ $ris_function_call_paren->{$seqno} = 1;
+ }
+
+ # space between something like $i and ( in <<snippets/space2.in>>
+ # for $i ( 0 .. 20 ) {
+ # FIXME: eventually, type 'i' could be split into multiple
+ # token types so this can be a hardwired rule.
+ elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
+ $ws = WS_YES;
+ }
- # space between something like $i and ( in <<snippets/space2.in>>
- # for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' needs to be split into multiple
- # token types so this can be a hardwired rule.
- elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
- $ws = WS_YES;
+ # allow constant function followed by '()' to retain no space
+ elsif ($last_type eq 'C'
+ && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
+ {
+ $ws = WS_NO;
+ }
}
- # allow constant function followed by '()' to retain no space
- elsif ($last_type eq 'C'
- && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
- {
- $ws = WS_NO;
+ # patch for SWITCH/CASE: make space at ']{' optional
+ # since the '{' might begin a case or when block
+ elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
+ $ws = WS_OPTIONAL;
}
- }
- # patch for SWITCH/CASE: make space at ']{' optional
- # since the '{' might begin a case or when block
- elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
- $ws = WS_OPTIONAL;
- }
+ # keep space between 'sub' and '{' for anonymous sub definition
+ if ( $type eq '{' ) {
+ if ( $last_token eq 'sub' ) {
+ $ws = WS_YES;
+ }
- # keep space between 'sub' and '{' for anonymous sub definition
- if ( $type eq '{' ) {
- if ( $last_token eq 'sub' ) {
- $ws = WS_YES;
+ # this is needed to avoid no space in '){'
+ if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+
+ # avoid any space before the brace or bracket in something like
+ # @opts{'a','b',...}
+ if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
+ $ws = WS_NO;
+ }
}
+ } ## end if ( $is_opening_type{$type} ) {
- # this is needed to avoid no space in '){'
- if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+ # Special checks for certain other types ...
+ # the hash '%is_special_ws_type' significantly speeds up this routine,
+ # but be sure to update it if a new check is added.
+ # Currently has types: qw(k w i C m - Q #)
+ elsif ( $is_special_ws_type{$type} ) {
+ if ( $type eq 'i' ) {
- # avoid any space before the brace or bracket in something like
- # @opts{'a','b',...}
- if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
- $ws = WS_NO;
+ # never a space before ->
+ if ( substr( $token, 0, 2 ) eq '->' ) {
+ $ws = WS_NO;
+ }
}
- }
- elsif ( $type eq 'i' ) {
+ elsif ( $type eq 'k' ) {
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
+ # Keywords 'for', 'foreach' are special cases for -kpit since
+ # the opening paren does not always immediately follow the
+ # keyword. So we have to search forward for the paren in this
+ # case. I have limited the search to 10 tokens ahead, just in
+ # case somebody has a big file and no opening paren. This
+ # should be enough for all normal code. Added the level check
+ # to fix b1236.
+ if ( $is_for_foreach{$token}
+ && %keyword_paren_inner_tightness
+ && defined( $keyword_paren_inner_tightness{$token} )
+ && $j < $jmax )
+ {
+ my $level = $rLL->[$j]->[_LEVEL_];
+ my $jp = $j;
+ for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
+ $jp++;
+ last if ( $jp > $jmax );
+ last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
+ next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
+ my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
+ $set_container_ws_by_keyword->( $token, $seqno_p );
+ last;
+ }
+ }
}
- }
- # retain any space between '-' and bare word
- elsif ( $type eq 'w' || $type eq 'C' ) {
- $ws = WS_OPTIONAL if $last_type eq '-';
+ # retain any space between '-' and bare word
+ elsif ( $type eq 'w' || $type eq 'C' ) {
+ $ws = WS_OPTIONAL if $last_type eq '-';
+
+ # never a space before ->
+ if ( substr( $token, 0, 2 ) eq '->' ) {
+ $ws = WS_NO;
+ }
+ }
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
+ # retain any space between '-' and bare word; for example
+ # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
+ # $myhash{USER-NAME}='steve';
+ elsif ( $type eq 'm' || $type eq '-' ) {
+ $ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
- }
- # retain any space between '-' and bare word; for example
- # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
- # $myhash{USER-NAME}='steve';
- elsif ( $type eq 'm' || $type eq '-' ) {
- $ws = WS_OPTIONAL if ( $last_type eq 'w' );
- }
+ # always space before side comment
+ elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
- # always space before side comment
- elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+ # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
+ # allow a space between a backslash and single or double quote
+ # to avoid fooling html formatters
+ elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ )
+ {
+ if ($rOpts_space_backslash_quote) {
+ if ( $rOpts_space_backslash_quote == 1 ) {
+ $ws = WS_OPTIONAL;
+ }
+ elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
+ else { } # shouldnt happen
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+ } ## end elsif ( $is_special_ws_type{$type} ...
# always preserver whatever space was used after a possible
# filehandle (except _) or here doc operator
$ws = WS_OPTIONAL;
}
- # space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
- # allow a space between a backslash and single or double quote
- # to avoid fooling html formatters
- elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
- if ($rOpts_space_backslash_quote) {
- if ( $rOpts_space_backslash_quote == 1 ) {
- $ws = WS_OPTIONAL;
- }
- elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
- else { } # shouldnt happen
- }
- else {
- $ws = WS_NO;
- }
- }
+ $ws_4 = $ws_3 = $ws
+ if DEBUG_WHITE;
- my $ws_4;
- $ws_4 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ if ( !defined($ws) ) {
- #---------------------------------------------------------------
- # Whitespace Rules Section 5:
- # Apply default rules not covered above.
- #---------------------------------------------------------------
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 4:
+ # Use the binary rule table.
+ #---------------------------------------------------------------
+ $ws = $binary_ws_rules{$last_type}{$type};
+ $ws_4 = $ws if DEBUG_WHITE;
- # If we fall through to here, look at the pre-defined hash tables for
- # the two tokens, and:
- # if (they are equal) use the common value
- # if (either is zero or undef) use the other
- # if (either is -1) use it
- # That is,
- # left vs right
- # 1 vs 1 --> 1
- # 0 vs 0 --> 0
- # -1 vs -1 --> -1
- #
- # 0 vs -1 --> -1
- # 0 vs 1 --> 1
- # 1 vs 0 --> 1
- # -1 vs 0 --> -1
- #
- # -1 vs 1 --> -1
- # 1 vs -1 --> -1
- if ( !defined($ws) ) {
- my $wl = $want_left_space{$type};
- my $wr = $want_right_space{$last_type};
- if ( !defined($wl) ) { $wl = 0 }
- if ( !defined($wr) ) { $wr = 0 }
- $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
- }
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 5:
+ # Apply default rules not covered above.
+ #---------------------------------------------------------------
- if ( !defined($ws) ) {
- $ws = 0;
- write_diagnostics(
- "WS flag is undefined for tokens $last_token $token\n");
+ # If we fall through to here, look at the pre-defined hash tables for
+ # the two tokens, and:
+ # if (they are equal) use the common value
+ # if (either is zero or undef) use the other
+ # if (either is -1) use it
+ # That is,
+ # left vs right
+ # 1 vs 1 --> 1
+ # 0 vs 0 --> 0
+ # -1 vs -1 --> -1
+ #
+ # 0 vs -1 --> -1
+ # 0 vs 1 --> 1
+ # 1 vs 0 --> 1
+ # -1 vs 0 --> -1
+ #
+ # -1 vs 1 --> -1
+ # 1 vs -1 --> -1
+ if ( !defined($ws) ) {
+ my $wl = $want_left_space{$type};
+ my $wr = $want_right_space{$last_type};
+ if ( !defined($wl) ) {
+ $ws = defined($wr) ? $wr : 0;
+ }
+ elsif ( !defined($wr) ) {
+ $ws = $wl;
+ }
+ else {
+ $ws =
+ ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
+ }
+ }
}
# Treat newline as a whitespace. Otherwise, we might combine
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
- if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
-
- if ( ( $ws == 0 )
- && $j > 0
- && $j < $jmax
- && ( $last_type !~ /^[Zh]$/ ) )
+ if ( $ws == 0
+ && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
{
-
- # If this happens, we have a non-fatal but undesirable
- # hole in the above rules which should be patched.
- write_diagnostics(
- "WS flag is zero for tokens $last_token $token\n");
+ $ws = 1;
}
$rwhitespace_flags->[$j] = $ws;
- FORMATTER_DEBUG_FLAG_WHITE && do {
+ if (DEBUG_WHITE) {
my $str = substr( $last_token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
print STDOUT
"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
- };
+
+ # reset for next pass
+ $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
+ }
} ## end main loop
if ( $rOpts->{'tight-secret-operators'} ) {
new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
}
+ $self->[_ris_function_call_paren_] = $ris_function_call_paren;
return $rwhitespace_flags;
-} ## end sub set_whitespace_flags
-sub respace_tokens {
+} ## end sub set_whitespace_flags
- my $self = shift;
- return if $rOpts->{'indent-only'};
+sub dump_want_left_space {
+ my $fh = shift;
+ local $" = "\n";
+ $fh->print(<<EOM);
+These values are the main control of whitespace to the left of a token type;
+They may be altered with the -wls parameter.
+For a list of token types, use perltidy --dump-token-types (-dtt)
+ 1 means the token wants a space to its left
+-1 means the token does not want a space to its left
+------------------------------------------------------------------------
+EOM
+ foreach my $key ( sort keys %want_left_space ) {
+ $fh->print("$key\t$want_left_space{$key}\n");
+ }
+ return;
+}
- # This routine makes all necessary changes to the tokenization after the
- # file has been read. This consists mostly of inserting and deleting spaces
- # according to the selected parameters. In a few cases non-space characters
- # are added, deleted or modified.
-
- # The old tokens are copied one-by-one, with changes, from the old
- # linear storage array to a new array.
-
- my $rLL = $self->{rLL};
- my $Klimit_old = $self->{Klimit};
- my $rlines = $self->{rlines};
- my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
+sub dump_want_right_space {
+ my $fh = shift;
+ local $" = "\n";
+ $fh->print(<<EOM);
+These values are the main control of whitespace to the right of a token type;
+They may be altered with the -wrs parameter.
+For a list of token types, use perltidy --dump-token-types (-dtt)
+ 1 means the token wants a space to its right
+-1 means the token does not want a space to its right
+------------------------------------------------------------------------
+EOM
+ foreach my $key ( sort keys %want_right_space ) {
+ $fh->print("$key\t$want_right_space{$key}\n");
+ }
+ return;
+}
- my $rLL_new = []; # This is the new array
- my $KK = 0;
- my $rtoken_vars;
- my $Kmax = @{$rLL} - 1;
+{ ## begin closure is_essential_whitespace
- # Set the whitespace flags, which indicate the token spacing preference.
- my $rwhitespace_flags = $self->set_whitespace_flags();
+ my %is_sort_grep_map;
+ my %is_for_foreach;
+ my %is_digraph;
+ my %is_trigraph;
+ my %essential_whitespace_filter_l1;
+ my %essential_whitespace_filter_r1;
+ my %essential_whitespace_filter_l2;
+ my %essential_whitespace_filter_r2;
+ my %is_type_with_space_before_bareword;
+ my %is_special_variable_char;
- # we will be setting token lengths as we go
- my $cumulative_length = 0;
+ BEGIN {
- # We also define these hash indexes giving container token array indexes
- # as a function of the container sequence numbers. For example,
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
+ my @q;
- # List of new K indexes of phantom semicolons
- # This will be needed if we want to undo them for iterations
- my $rK_phantom_semicolons = [];
+ # NOTE: This hash is like the global %is_sort_map_grep, but it ignores
+ # grep aliases on purpose, since here we are looking parens, not braces
+ @q = qw(sort grep map);
+ @is_sort_grep_map{@q} = (1) x scalar(@q);
- # Temporary hashes for adding semicolons
- ##my $rKfirst_new = {};
+ @q = qw(for foreach);
+ @is_for_foreach{@q} = (1) x scalar(@q);
- # a sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
+ @q = qw(
+ .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
+ );
+ @is_digraph{@q} = (1) x scalar(@q);
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL_new->[$Kprev]->[$key] ) )
- {
- $rLL_new->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
- }
- };
+ @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
+ @is_trigraph{@q} = (1) x scalar(@q);
- # A sub to store one token in the new array
- # All new tokens must be stored by this sub so that it can update
- # all data structures on the fly.
- my $last_nonblank_type = ';';
- my $last_nonblank_token = ';';
- my $last_nonblank_block_type = '';
- my $store_token = sub {
- my ($item) = @_;
+ # These are used as a speedup filters for sub is_essential_whitespace.
- # This will be the index of this item in the new array
- my $KK_new = @{$rLL_new};
+ # Filter 1:
+ # These left side token types USUALLY do not require a space:
+ @q = qw( ; { } [ ] L R );
+ push @q, ',';
+ push @q, ')';
+ push @q, '(';
+ @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
- # check for a sequenced item (i.e., container or ?/:)
- my $type_sequence = $item->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
+ # BUT some might if followed by these right token types
+ @q = qw( pp mm << <<= h );
+ @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
- $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
+ # Filter 2:
+ # These right side filters usually do not require a space
+ @q = qw( ; ] R } );
+ push @q, ',';
+ push @q, ')';
+ @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
- my $token = $item->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ # BUT some might if followed by these left token types
+ @q = qw( h Z );
+ @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
- $K_opening_container->{$type_sequence} = $KK_new;
- }
- elsif ( $is_closing_token{$token} ) {
+ # Keep a space between certain types and any bareword:
+ # Q: keep a space between a quote and a bareword to prevent the
+ # bareword from becoming a quote modifier.
+ # &: do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ @q = qw( Q & );
+ @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
- $K_closing_container->{$type_sequence} = $KK_new;
- }
+ # These are the only characters which can (currently) form special
+ # variables, like $^W: (issue c066, c068).
+ @q =
+ qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
+ @{is_special_variable_char}{@q} = (1) x scalar(@q);
- # These are not yet used but could be useful
- else {
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK_new;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK_new;
- }
- else {
- # shouldn't happen
- Fault("Ugh: shouldn't happen");
- }
- }
- }
+ }
- # find the length of this token
- my $token_length = length( $item->[_TOKEN_] );
+ sub is_essential_whitespace {
- # and update the cumulative length
- $cumulative_length += $token_length;
+ # Essential whitespace means whitespace which cannot be safely deleted
+ # without risking the introduction of a syntax error.
+ # We are given three tokens and their types:
+ # ($tokenl, $typel) is the token to the left of the space in question
+ # ($tokenr, $typer) is the token to the right of the space in question
+ # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
+ #
+ # Note1: This routine should almost never need to be changed. It is
+ # for avoiding syntax problems rather than for formatting.
- # Save the length sum to just AFTER this token
- $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ # Note2: The -mangle option causes large numbers of calls to this
+ # routine and therefore is a good test. So if a change is made, be sure
+ # to use nytprof to profile with both old and reviesed coding using the
+ # -mangle option and check differences.
- my $type = $item->[_TYPE_];
+ my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
- # trim side comments
- if ( $type eq '#' ) {
- $item->[_TOKEN_] =~ s/\s*$//;
- }
+ # This is potentially a very slow routine but the following quick
+ # filters typically catch and handle over 90% of the calls.
- if ( $type && $type ne 'b' && $type ne '#' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $item->[_TOKEN_];
- $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
- }
+ # Filter 1: usually no space required after common types ; , [ ] { } ( )
+ return
+ if ( $essential_whitespace_filter_l1{$typel}
+ && !$essential_whitespace_filter_r1{$typer} );
- # and finally, add this item to the new array
- push @{$rLL_new}, $item;
- };
+ # Filter 2: usually no space before common types ; ,
+ return
+ if ( $essential_whitespace_filter_r2{$typer}
+ && !$essential_whitespace_filter_l2{$typel} );
- my $store_token_and_space = sub {
- my ( $item, $want_space ) = @_;
+ # Filter 3: Handle side comments: a space is only essential if the left
+ # token ends in '$' For example, we do not want to create $#foo below:
- # store a token with preceding space if requested and needed
+ # sub t086
+ # ( #foo)))
+ # $ #foo)))
+ # a #foo)))
+ # ) #foo)))
+ # { ... }
- # First store the space
- if ( $want_space
- && @{$rLL_new}
- && $rLL_new->[-1]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace )
- {
- my $rcopy = copy_token_as_type( $item, 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
+ # Also, I prefer not to put a ? and # together because ? used to be
+ # a pattern delmiter and spacing was used if guessing was needed.
- # then the token
- $store_token->($item);
- };
+ if ( $typer eq '#' ) {
- my $K_end_q = sub {
- my ($KK) = @_;
- my $K_end = $KK;
- my $Kn = $self->K_next_nonblank($KK);
- while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
- $K_end = $Kn;
- $Kn = $self->K_next_nonblank($Kn);
+ return 1
+ if ( $tokenl
+ && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
+ return;
}
- return $K_end;
- };
-
- my $add_phantom_semicolon = sub {
- my ($KK) = @_;
+ my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
+ my $tokenr_is_open_paren = $tokenr eq '(';
+ my $token_joined = $tokenl . $tokenr;
+ my $tokenl_is_dash = $tokenl eq '-';
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
+ my $result =
- # we are only adding semicolons for certain block types
- my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
- return
- unless ( $ok_to_add_semicolon_for_block_type{$block_type}
- || $block_type =~ /^(sub|package)/
- || $block_type =~ /^\w+\:$/ );
+ # never combine two bare words or numbers
+ # examples: and ::ok(1)
+ # return ::spw(...)
+ # for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ # $input eq"quit" to make $inputeq"quit"
+ # my $size=-s::SINK if $file; <==OK but we won't do it
+ # don't join something like: for bla::bla:: abc
+ # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+ ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
+ && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ # do not combine a number with a concatenation dot
+ # example: pom.caputo:
+ # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
+ || $typel eq 'n' && $tokenr eq '.'
+ || $typer eq 'n' && $tokenl eq '.'
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ # cases of a space before a bareword...
+ || (
+ $tokenr_is_bareword && (
+
+ # do not join a minus with a bare word, because you might form
+ # a file test operator. Example from Complex.pm:
+ # if (CORE::abs($z - i) < $eps);
+ # "z-i" would be taken as a file test.
+ $tokenl_is_dash && length($tokenr) == 1
+
+ # and something like this could become ambiguous without space
+ # after the '-':
+ # use constant III=>1;
+ # $a = $b - III;
+ # and even this:
+ # $a = - III;
+ || $tokenl_is_dash && $typer =~ /^[wC]$/
+
+ # keep space between types Q & and a bareword
+ || $is_type_with_space_before_bareword{$typel}
+
+ # +-: binary plus and minus before a bareword could get
+ # converted into unary plus and minus on next pass through the
+ # tokenizer. This can lead to blinkers: cases b660 b670 b780
+ # b781 b787 b788 b790 So we keep a space unless the +/- clearly
+ # follows an operator
+ || ( ( $typel eq '+' || $typel eq '-' )
+ && $typell !~ /^[niC\)\}\]R]$/ )
+
+ # keep a space between a token ending in '$' and any word;
+ # this caused trouble: "die @$ if $@"
+ ##|| $typel eq 'i' && $tokenl =~ /\$$/
+ || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
+
+ # don't combine $$ or $# with any alphanumeric
+ # (testfile mangle.t with --mangle)
+ ##|| $tokenl =~ /^\$[\$\#]$/
+ || $tokenl eq '$$'
+ || $tokenl eq '$#'
- # Do not add a semicolon if...
- return
- if (
+ )
+ ) ## end $tokenr_is_bareword
- # it would follow a comment (and be isolated)
- $previous_nonblank_type eq '#'
+ # OLD, not used
+ # '= -' should not become =- or you will get a warning
+ # about reversed -=
+ # || ($tokenr eq '-')
- # it follows a code block ( because they are not always wanted
- # there and may add clutter)
- || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
+ # do not join a bare word with a minus, like between 'Send' and
+ # '-recipients' here <<snippets/space3.in>>
+ # my $msg = new Fax::Send
+ # -recipients => $to,
+ # -data => $data;
+ # This is the safest thing to do. If we had the token to the right of
+ # the minus we could do a better check.
+ #
+ # And do not combine a bareword and a quote, like this:
+ # oops "Your login, $Bad_Login, is not valid";
+ # It can cause a syntax error if oops is a sub
+ || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
- # it would follow a label
- || $previous_nonblank_type eq 'J'
+ # perl is very fussy about spaces before <<
+ || substr( $tokenr, 0, 2 ) eq '<<'
+ ##|| $tokenr =~ /^\<\</
- # it would be inside a 'format' statement (and cause syntax error)
- || ( $previous_nonblank_type eq 'k'
- && $previous_nonblank_token =~ /format/ )
+ # avoid combining tokens to create new meanings. Example:
+ # $a+ +$b must not become $a++$b
+ || ( $is_digraph{$token_joined} )
+ || $is_trigraph{$token_joined}
- # if it would prevent welding two containers
- || $rpaired_to_inner_container->{$type_sequence}
+ # another example: do not combine these two &'s:
+ # allow_options & &OPT_EXECCGI
+ || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
- );
+ # retain any space after possible filehandle
+ # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
+ || $typel eq 'Z'
- # We will insert an empty semicolon here as a placeholder. Later, if
- # it becomes the last token on a line, we will bring it to life. The
- # advantage of doing this is that (1) we just have to check line
- # endings, and (2) the phantom semicolon has zero width and therefore
- # won't cause needless breaks of one-line blocks.
- my $Ktop = -1;
- if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
- && $want_left_space{';'} == WS_NO )
- {
+ # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
+ # space after type Y. Otherwise, it will get parsed as type 'Z' later
+ # and any space would have to be added back manually if desired.
+ || $typel eq 'Y'
- # convert the blank into a semicolon..
- # be careful: we are working on the new stack top
- # on a token which has been stored.
- my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
+ # Perl is sensitive to whitespace after the + here:
+ # $b = xvals $a + 0.1 * yvals $a;
+ || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
- # Convert the existing blank to:
- # a phantom semicolon for one_line_block option = 0 or 1
- # a real semicolon for one_line_block option = 2
- my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : '';
+ || (
+ $tokenr_is_open_paren && (
- $rLL_new->[$Ktop]->[_TOKEN_] = $tok; # zero length if phantom
- $rLL_new->[$Ktop]->[_TYPE_] = ';';
- $rLL_new->[$Ktop]->[_SLEVEL_] =
- $rLL->[$KK]->[_SLEVEL_];
+ # keep paren separate in 'use Foo::Bar ()'
+ ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ # OLD: keep any space between filehandle and paren:
+ # file mangle.t with --mangle:
+ # NEW: this test is no longer necessary here (moved above)
+ ## || $typel eq 'Y'
- # Then store a new blank
- $store_token->($rcopy);
- }
- else {
+ # must have space between grep and left paren; "grep(" will fail
+ || $is_sort_grep_map{$tokenl}
- # insert a new token
- my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
- $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
- $store_token->($rcopy);
- push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- }
- };
+ # don't stick numbers next to left parens, as in:
+ #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
+ || $typel eq 'n'
+ )
+ ) ## end $tokenr_is_open_paren
- my $check_Q = sub {
+ # retain any space after here doc operator ( hereerr.t)
+ || $typel eq 'h'
- # Check that a quote looks okay
- # This sub works but needs to by sync'd with the log file output
- # before it can be used.
- my ( $KK, $Kfirst ) = @_;
- my $token = $rLL->[$KK]->[_TOKEN_];
- note_embedded_tab() if ( $token =~ "\t" );
+ # be careful with a space around ++ and --, to avoid ambiguity as to
+ # which token it applies
+ ##|| $typer =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
+ || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
+ || ( $typel eq '++' || $typel eq '--' )
+ && $tokenr !~ /^[\;\}\)\]]/
+ ##|| $typel =~ /^(\+\+|\-\-)$/ && $tokenr !~ /^[\;\}\)\]]/
- my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
- return unless ( defined($Kp) );
- my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
- my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+ # need space after foreach my; for example, this will fail in
+ # older versions of Perl:
+ # foreach my$ft(@filetypes)...
+ || (
+ $tokenl eq 'my'
- my $previous_nonblank_type_2 = 'b';
- my $previous_nonblank_token_2 = "";
- my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
- $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
- }
+ && substr( $tokenr, 0, 1 ) eq '$'
+ ##&& $tokenr =~ /^\$/
- my $Kn = $self->K_next_nonblank($KK);
- my $next_nonblank_token = "";
- if ( defined($Kn) ) {
- $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
- }
+ # /^(for|foreach)$/
+ && $is_for_foreach{$tokenll}
+ )
- my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
- my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
+ # Keep space after like $^ if needed to avoid forming a different
+ # special variable (issue c068). For example:
+ # my $aa = $^ ? "none" : "ok";
+ || ( $typel eq 'i'
+ && length($tokenl) == 2
+ && substr( $tokenl, 1, 1 ) eq '^'
+ && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } )
- # make note of something like '$var = s/xxx/yyy/;'
- # in case it should have been '$var =~ s/xxx/yyy/;'
- if (
- $token =~ /^(s|tr|y|m|\/)/
- && $previous_nonblank_token =~ /^(=|==|!=)$/
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
- # preceded by simple scalar
- && $previous_nonblank_type_2 eq 'i'
- && $previous_nonblank_token_2 =~ /^\$/
+ # Space stacked labels...
+ # Not really required: Perl seems to accept non-spaced labels.
+ ## || $typel eq 'J' && $typer eq 'J'
- # followed by some kind of termination
- # (but give complaint if we can not see far enough ahead)
- && $next_nonblank_token =~ /^[; \)\}]$/
+ ; # the value of this long logic sequence is the result we want
+ return $result;
+ }
+} ## end closure is_essential_whitespace
- # scalar is not declared
- && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
- )
- {
- my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
- complain(
-"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
- );
- }
- };
+{ ## begin closure new_secret_operator_whitespace
- # Main loop over all lines of the file
- my $last_K_out;
- my $CODE_type = "";
- my $line_type = "";
+ my %secret_operators;
+ my %is_leading_secret_token;
- # Testing option to break qw. Do not use; it can make a mess.
- my $ALLOW_BREAK_MULTILINE_QW = 0;
- my $in_multiline_qw;
- foreach my $line_of_tokens ( @{$rlines} ) {
+ BEGIN {
- $input_line_number = $line_of_tokens->{_line_number};
- my $last_line_type = $line_type;
- $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $last_CODE_type = $CODE_type;
- $CODE_type = $line_of_tokens->{_code_type};
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- next unless defined($Kfirst);
+ # token lists for perl secret operators as compiled by Philippe Bruhat
+ # at: https://metacpan.org/module/perlsecret
+ %secret_operators = (
+ 'Goatse' => [qw#= ( ) =#], #=( )=
+ 'Venus1' => [qw#0 +#], # 0+
+ 'Venus2' => [qw#+ 0#], # +0
+ 'Enterprise' => [qw#) x ! !#], # ()x!!
+ 'Kite1' => [qw#~ ~ <>#], # ~~<>
+ 'Kite2' => [qw#~~ <>#], # ~~<>
+ 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
+ 'Bang bang ' => [qw#! !#], # !!
+ );
- # Check for correct sequence of token indexes...
- # An error here means that sub write_line() did not correctly
- # package the tokenized lines as it received them.
- if ( defined($last_K_out) ) {
- if ( $Kfirst != $last_K_out + 1 ) {
- Fault(
- "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
- );
- }
- }
- else {
- if ( $Kfirst != 0 ) {
- Fault("Program Bug: first K is $Kfirst but should be 0");
- }
+ # The following operators and constants are not included because they
+ # are normally kept tight by perltidy:
+ # ~~ <~>
+ #
+
+ # Make a lookup table indexed by the first token of each operator:
+ # first token => [list, list, ...]
+ foreach my $value ( values(%secret_operators) ) {
+ my $tok = $value->[0];
+ push @{ $is_leading_secret_token{$tok} }, $value;
}
- $last_K_out = $Klast;
+ }
- # Handle special lines of code
- if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+ sub new_secret_operator_whitespace {
- # CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'FS' = Format Skipping - line goes out verbatim, no blanks
- # 'IO' = Indent Only - only indentation may be changed
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'DEL'=Delete this line
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
+ my ( $rlong_array, $rwhitespace_flags ) = @_;
- # For a hanging side comment we insert an empty quote before
- # the comment so that it becomes a normal side comment and
- # will be aligned by the vertical aligner
- if ( $CODE_type eq 'HSC' ) {
+ # Loop over all tokens in this line
+ my ( $token, $type );
+ my $jmax = @{$rlong_array} - 1;
+ foreach my $j ( 0 .. $jmax ) {
- # Safety Check: This must be a line with one token (a comment)
- my $rtoken_vars = $rLL->[$Kfirst];
- if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
+ $token = $rlong_array->[$j]->[_TOKEN_];
+ $type = $rlong_array->[$j]->[_TYPE_];
- # Note that even if the flag 'noadd-whitespace' is set, we
- # will make an exception here and allow a blank to be
- # inserted to push the comment to the right. We can think
- # of this as an adjustment of indentation rather than
- # whitespace between tokens. This will also prevent the
- # hanging side comment from getting converted to a block
- # comment if whitespace gets deleted, as for example with
- # the -extrude and -mangle options.
- my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
- $store_token->($rcopy);
- $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- $store_token->($rtoken_vars);
- next;
- }
- else {
+ # Skip unless this token might start a secret operator
+ next if ( $type eq 'b' );
+ next unless ( $is_leading_secret_token{$token} );
- # This line was mis-marked by sub scan_comment
- Fault(
- "Program bug. A hanging side comment has been mismarked"
- );
- }
- }
+ # Loop over all secret operators with this leading token
+ foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
+ my $jend = $j - 1;
+ foreach my $tok ( @{$rpattern} ) {
+ $jend++;
+ $jend++
- # Copy tokens unchanged
- foreach my $KK ( $Kfirst .. $Klast ) {
- $store_token->( $rLL->[$KK] );
- }
- next;
- }
+ if ( $jend <= $jmax
+ && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
+ if ( $jend > $jmax
+ || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
+ {
+ $jend = undef;
+ last;
+ }
+ }
- # Handle normal line..
+ if ($jend) {
- # Insert any essential whitespace between lines
- # if last line was normal CODE.
- # Patch for rt #125012: use K_previous_code rather than '_nonblank'
- # because comments may disappear.
- my $type_next = $rLL->[$Kfirst]->[_TYPE_];
- my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
- my $Kp = $self->K_previous_code( undef, $rLL_new );
- if ( $last_line_type eq 'CODE'
- && $type_next ne 'b'
- && defined($Kp) )
- {
- my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
- my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ # set flags to prevent spaces within this operator
+ foreach my $jj ( $j + 1 .. $jend ) {
+ $rwhitespace_flags->[$jj] = WS_NO;
+ }
+ $j = $jend;
+ last;
+ }
+ } ## End Loop over all operators
+ } ## End loop over all tokens
+ return;
+ } # End sub
+} ## end closure new_secret_operator_whitespace
- my ( $token_pp, $type_pp );
- my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
- if ( defined($Kpp) ) {
- $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
+{ ## begin closure set_bond_strengths
- if (
- is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- )
- )
- {
+ # These routines and variables are involved in deciding where to break very
+ # long lines.
- # Copy this first token as blank, but use previous line number
- my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
- $rcopy->[_LINE_INDEX_] =
- $rLL_new->[-1]->[_LINE_INDEX_];
- $store_token->($rcopy);
- }
- }
+ my %is_good_keyword_breakpoint;
+ my %is_lt_gt_le_ge;
+ my %is_container_token;
- # loop to copy all tokens on this line, with any changes
- my $type_sequence;
- for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
- $rtoken_vars = $rLL->[$KK];
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- my $last_type_sequence = $type_sequence;
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ my %binary_bond_strength_nospace;
+ my %binary_bond_strength;
+ my %nobreak_lhs;
+ my %nobreak_rhs;
- # Handle a blank space ...
- if ( $type eq 'b' ) {
+ my @bias_tokens;
+ my %bias_hash;
+ my %bias;
+ my $delta_bias;
- # Delete it if not wanted by whitespace rules
- # or we are deleting all whitespace
- # Note that whitespace flag is a flag indicating whether a
- # white space BEFORE the token is needed
- next if ( $KK >= $Klast ); # skip terminal blank
- my $Knext = $KK + 1;
- my $ws = $rwhitespace_flags->[$Knext];
- if ( $ws == -1
- || $rOpts_delete_old_whitespace )
- {
+ sub initialize_bond_strength_hashes {
- # FIXME: maybe switch to using _new
- my $Kp = $self->K_previous_nonblank($KK);
- next unless defined($Kp);
- my $token_p = $rLL->[$Kp]->[_TOKEN_];
- my $type_p = $rLL->[$Kp]->[_TYPE_];
+ my @q;
+ @q = qw(if unless while until for foreach);
+ @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
- my ( $token_pp, $type_pp );
+ @q = qw(lt gt le ge);
+ @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
- #my $Kpp = $K_previous_nonblank->($Kp);
- my $Kpp = $self->K_previous_nonblank($Kp);
- if ( defined($Kpp) ) {
- $token_pp = $rLL->[$Kpp]->[_TOKEN_];
- $type_pp = $rLL->[$Kpp]->[_TYPE_];
- }
- else {
- $token_pp = ";";
- $type_pp = ';';
- }
- my $token_next = $rLL->[$Knext]->[_TOKEN_];
- my $type_next = $rLL->[$Knext]->[_TYPE_];
+ @q = qw/ ( [ { } ] ) /;
+ @is_container_token{@q} = (1) x scalar(@q);
- my $do_not_delete = is_essential_whitespace(
- $token_pp, $type_pp, $token_p,
- $type_p, $token_next, $type_next,
- );
+ # The decision about where to break a line depends upon a "bond
+ # strength" between tokens. The LOWER the bond strength, the MORE
+ # likely a break. A bond strength may be any value but to simplify
+ # things there are several pre-defined strength levels:
- next unless ($do_not_delete);
- }
+ # NO_BREAK => 10000;
+ # VERY_STRONG => 100;
+ # STRONG => 2.1;
+ # NOMINAL => 1.1;
+ # WEAK => 0.8;
+ # VERY_WEAK => 0.55;
- # make it just one character if allowed
- if ($rOpts_add_whitespace) {
- $rtoken_vars->[_TOKEN_] = ' ';
- }
- $store_token->($rtoken_vars);
- next;
- }
+ # The strength values are based on trial-and-error, and need to be
+ # tweaked occasionally to get desired results. Some comments:
+ #
+ # 1. Only relative strengths are important. small differences
+ # in strengths can make big formatting differences.
+ # 2. Each indentation level adds one unit of bond strength.
+ # 3. A value of NO_BREAK makes an unbreakable bond
+ # 4. A value of VERY_WEAK is the strength of a ','
+ # 5. Values below NOMINAL are considered ok break points.
+ # 6. Values above NOMINAL are considered poor break points.
+ #
+ # The bond strengths should roughly follow precedence order where
+ # possible. If you make changes, please check the results very
+ # carefully on a variety of scripts. Testing with the -extrude
+ # options is particularly helpful in exercising all of the rules.
- # Handle a nonblank token...
+ # Wherever possible, bond strengths are defined in the following
+ # tables. There are two main stages to setting bond strengths and
+ # two types of tables:
+ #
+ # The first stage involves looking at each token individually and
+ # defining left and right bond strengths, according to if we want
+ # to break to the left or right side, and how good a break point it
+ # is. For example tokens like =, ||, && make good break points and
+ # will have low strengths, but one might want to break on either
+ # side to put them at the end of one line or beginning of the next.
+ #
+ # The second stage involves looking at certain pairs of tokens and
+ # defining a bond strength for that particular pair. This second
+ # stage has priority.
- # check for a qw quote
- if ( $type eq 'q' ) {
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 1.
+ # Set left and right bond strengths of individual tokens.
+ #---------------------------------------------------------------
- # trim blanks from right of qw quotes
- # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
- # this)
- $token =~ s/\s*$//;
- $rtoken_vars->[_TOKEN_] = $token;
- note_embedded_tab() if ( $token =~ "\t" );
+ # NOTE: NO_BREAK's set in this section first are HINTS which will
+ # probably not be honored. Essential NO_BREAKS's should be set in
+ # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
+ # of this subroutine.
- if ($in_multiline_qw) {
+ # Note that we are setting defaults in this section. The user
+ # cannot change bond strengths but can cause the left and right
+ # bond strengths of any token type to be swapped through the use of
+ # the -wba and -wbb flags. In this way the user can determine if a
+ # breakpoint token should appear at the end of one line or the
+ # beginning of the next line.
- # If we are at the end of a multiline qw ..
- if ( $in_multiline_qw == $KK ) {
+ %right_bond_strength = ();
+ %left_bond_strength = ();
+ %binary_bond_strength_nospace = ();
+ %binary_bond_strength = ();
+ %nobreak_lhs = ();
+ %nobreak_rhs = ();
- # Split off the closing delimiter character
- # so that the formatter can put a line break there if necessary
- my $part1 = $token;
- my $part2 = substr( $part1, -1, 1, "" );
+ # The hash keys in this section are token types, plus the text of
+ # certain keywords like 'or', 'and'.
- if ($part1) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q', $part1 );
- $store_token->($rcopy);
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
-
- }
- $in_multiline_qw = undef;
-
- # store without preceding blank
- $store_token->($rtoken_vars);
- next;
- }
- else {
- # continuing a multiline qw
- $store_token->($rtoken_vars);
- next;
- }
- }
-
- else {
+ # no break around possible filehandle
+ $left_bond_strength{'Z'} = NO_BREAK;
+ $right_bond_strength{'Z'} = NO_BREAK;
- # we are encountered new qw token...see if multiline
- my $K_end = $K_end_q->($KK);
- if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
-
- # Starting multiline qw...
- # set flag equal to the ending K
- $in_multiline_qw = $K_end;
-
- # Split off the leading part
- # so that the formatter can put a line break there if necessary
- if ( $token =~ /^(qw\s*.)(.*)$/ ) {
- my $part1 = $1;
- my $part2 = $2;
- if ($part2) {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'q',
- $part1 );
- $store_token_and_space->(
- $rcopy, $rwhitespace_flags->[$KK] == WS_YES
- );
- $token = $part2;
- $rtoken_vars->[_TOKEN_] = $token;
-
- # Second part goes without intermediate blank
- $store_token->($rtoken_vars);
- next;
- }
- }
- }
- else {
+ # never put a bare word on a new line:
+ # example print (STDERR, "bla"); will fail with break after (
+ $left_bond_strength{'w'} = NO_BREAK;
- # this is a new single token qw -
- # store with possible preceding blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
- next;
- }
- }
- } ## end if ( $type eq 'q' )
+ # blanks always have infinite strength to force breaks after
+ # real tokens
+ $right_bond_strength{'b'} = NO_BREAK;
- # Modify certain tokens here for whitespace
- # The following is not yet done, but could be:
- # sub (x x x)
- elsif ( $type =~ /^[wit]$/ ) {
+ # try not to break on exponentation
+ @q = qw# ** .. ... <=> #;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
- # Examples: <<snippets/space1.in>>
- # change '$ var' to '$var' etc
- # '-> new' to '->new'
- if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
- $token =~ s/\s*//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # The comma-arrow has very low precedence but not a good break point
+ $left_bond_strength{'=>'} = NO_BREAK;
+ $right_bond_strength{'=>'} = NOMINAL;
- # Split identifiers with leading arrows, inserting blanks if
- # necessary. It is easier and safer here than in the
- # tokenizer. For example '->new' becomes two tokens, '->' and
- # 'new' with a possible blank between.
- #
- # Note: there is a related patch in sub set_whitespace_flags
- if ( $token =~ /^\-\>(.*)$/ && $1 ) {
- my $token_save = $1;
- my $type_save = $type;
+ # ok to break after label
+ $left_bond_strength{'J'} = NO_BREAK;
+ $right_bond_strength{'J'} = NOMINAL;
+ $left_bond_strength{'j'} = STRONG;
+ $right_bond_strength{'j'} = STRONG;
+ $left_bond_strength{'A'} = STRONG;
+ $right_bond_strength{'A'} = STRONG;
- # store a blank to left of arrow if necessary
- my $Kprev = $self->K_previous_nonblank($KK);
- if ( defined($Kprev)
- && $rLL->[$Kprev]->[_TYPE_] ne 'b'
- && $rOpts_add_whitespace
- && $want_left_space{'->'} == WS_YES )
- {
- my $rcopy =
- copy_token_as_type( $rtoken_vars, 'b', ' ' );
- $store_token->($rcopy);
- }
+ $left_bond_strength{'->'} = STRONG;
+ $right_bond_strength{'->'} = VERY_STRONG;
- # then store the arrow
- my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
- $store_token->($rcopy);
+ $left_bond_strength{'CORE::'} = NOMINAL;
+ $right_bond_strength{'CORE::'} = NO_BREAK;
- # then reset the current token to be the remainder,
- # and reset the whitespace flag according to the arrow
- $token = $rtoken_vars->[_TOKEN_] = $token_save;
- $type = $rtoken_vars->[_TYPE_] = $type_save;
- $store_token->($rtoken_vars);
- next;
- }
+ # breaking AFTER modulus operator is ok:
+ @q = qw< % >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
- if ( $token =~ /$SUB_PATTERN/ ) {
+ # Break AFTER math operators * and /
+ @q = qw< * / x >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # -spp = 0 : no space before opening prototype paren
- # -spp = 1 : stable (follow input spacing)
- # -spp = 2 : always space before opening prototype paren
- my $spp = $rOpts->{'space-prototype-paren'};
- if ( defined($spp) ) {
- if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
- elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
- }
+ # Break AFTER weakest math operators + and -
+ # Make them weaker than * but a bit stronger than '.'
+ @q = qw< + - >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
- # one space max, and no tabs
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # Define left strength of unary plus and minus (fixes case b511)
+ $left_bond_strength{p} = $left_bond_strength{'+'};
+ $left_bond_strength{m} = $left_bond_strength{'-'};
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- if ( $type eq 'i' ) {
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
+ # And make right strength of unary plus and minus very high.
+ # Fixes cases b670 b790
+ $right_bond_strength{p} = NO_BREAK;
+ $right_bond_strength{m} = NO_BREAK;
- # change 'LABEL :' to 'LABEL:'
- elsif ( $type eq 'J' ) {
- $token =~ s/\s+//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ # breaking BEFORE these is just ok:
+ @q = qw# >> << #;
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # patch to add space to something like "x10"
- # This avoids having to split this token in the pre-tokenizer
- elsif ( $type eq 'n' ) {
- if ( $token =~ /^x\d+/ ) {
- $token =~ s/x/x /;
- $rtoken_vars->[_TOKEN_] = $token;
- }
- }
+ # breaking before the string concatenation operator seems best
+ # because it can be hard to see at the end of a line
+ $right_bond_strength{'.'} = STRONG;
+ $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
- # check a quote for problems
- elsif ( $type eq 'Q' ) {
- $check_Q->( $KK, $Kfirst );
- }
+ @q = qw< } ] ) R >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
- # handle semicolons
- elsif ( $type eq ';' ) {
+ # make these a little weaker than nominal so that they get
+ # favored for end-of-line characters
+ @q = qw< != == =~ !~ ~~ !~~ >;
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
- # Remove unnecessary semicolons, but not after bare
- # blocks, where it could be unsafe if the brace is
- # mistokenized.
- if (
- $rOpts->{'delete-semicolons'}
- && (
- (
- $last_nonblank_type eq '}'
- && (
- $is_block_without_semicolon{
- $last_nonblank_block_type}
- || $last_nonblank_block_type =~ /$SUB_PATTERN/
- || $last_nonblank_block_type =~ /^\w+:$/ )
- )
- || $last_nonblank_type eq ';'
- )
- )
- {
+ # break AFTER these
+ @q = qw# < > | & >= <= #;
+ @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
- # This looks like a deletable semicolon, but even if a
- # semicolon can be deleted it is necessarily best to do so.
- # We apply these additional rules for deletion:
- # - Always ok to delete a ';' at the end of a line
- # - Never delete a ';' before a '#' because it would
- # promote it to a block comment.
- # - If a semicolon is not at the end of line, then only
- # delete if it is followed by another semicolon or closing
- # token. This includes the comment rule. It may take
- # two passes to get to a final state, but it is a little
- # safer. For example, keep the first semicolon here:
- # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
- # It is not required but adds some clarity.
- my $ok_to_delete = 1;
- if ( $KK < $Klast ) {
- my $Kn = $self->K_next_nonblank($KK);
- if ( defined($Kn) && $Kn <= $Klast ) {
- my $next_nonblank_token_type =
- $rLL->[$Kn]->[_TYPE_];
- $ok_to_delete = $next_nonblank_token_type eq ';'
- || $next_nonblank_token_type eq '}';
- }
- }
+ # breaking either before or after a quote is ok
+ # but bias for breaking before a quote
+ $left_bond_strength{'Q'} = NOMINAL;
+ $right_bond_strength{'Q'} = NOMINAL + 0.02;
+ $left_bond_strength{'q'} = NOMINAL;
+ $right_bond_strength{'q'} = NOMINAL;
- if ($ok_to_delete) {
- note_deleted_semicolon();
- next;
- }
- else {
- write_logfile_entry("Extra ';'\n");
- }
- }
- }
+ # starting a line with a keyword is usually ok
+ $left_bond_strength{'k'} = NOMINAL;
- elsif ($type_sequence) {
+ # we usually want to bond a keyword strongly to what immediately
+ # follows, rather than leaving it stranded at the end of a line
+ $right_bond_strength{'k'} = STRONG;
- # if ( $is_opening_token{$token} ) {
- # }
+ $left_bond_strength{'G'} = NOMINAL;
+ $right_bond_strength{'G'} = STRONG;
- if ( $is_closing_token{$token} ) {
+ # assignment operators
+ @q = qw(
+ = **= += *= &= <<= &&=
+ -= /= |= >>= ||= //=
+ .= %= ^=
+ x=
+ );
- # Insert a tentative missing semicolon if the next token is
- # a closing block brace
- if (
- $type eq '}'
- && $token eq '}'
+ # Default is to break AFTER various assignment operators
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
- # not preceded by a ';'
- && $last_nonblank_type ne ';'
+ # Default is to break BEFORE '&&' and '||' and '//'
+ # set strength of '||' to same as '=' so that chains like
+ # $a = $b || $c || $d will break before the first '||'
+ $right_bond_strength{'||'} = NOMINAL;
+ $left_bond_strength{'||'} = $right_bond_strength{'='};
- # and this is not a VERSION stmt (is all one line, we are not
- # inserting semicolons on one-line blocks)
- && $CODE_type ne 'VER'
+ # same thing for '//'
+ $right_bond_strength{'//'} = NOMINAL;
+ $left_bond_strength{'//'} = $right_bond_strength{'='};
- # and we are allowed to add semicolons
- && $rOpts->{'add-semicolons'}
- )
- {
- $add_phantom_semicolon->($KK);
- }
- }
- }
+ # set strength of && a little higher than ||
+ $right_bond_strength{'&&'} = NOMINAL;
+ $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
- # Store this token with possible previous blank
- $store_token_and_space->(
- $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
- );
+ $left_bond_strength{';'} = VERY_STRONG;
+ $right_bond_strength{';'} = VERY_WEAK;
+ $left_bond_strength{'f'} = VERY_STRONG;
- } # End token loop
- } # End line loop
+ # make right strength of for ';' a little less than '='
+ # to make for contents break after the ';' to avoid this:
+ # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
+ # $number_of_fields )
+ # and make it weaker than ',' and 'and' too
+ $right_bond_strength{'f'} = VERY_WEAK - 0.03;
- # Reset memory to be the new array
- $self->{rLL} = $rLL_new;
- $self->set_rLL_max_index();
- $self->{K_opening_container} = $K_opening_container;
- $self->{K_closing_container} = $K_closing_container;
- $self->{K_opening_ternary} = $K_opening_ternary;
- $self->{K_closing_ternary} = $K_closing_ternary;
- $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
+ # The strengths of ?/: should be somewhere between
+ # an '=' and a quote (NOMINAL),
+ # make strength of ':' slightly less than '?' to help
+ # break long chains of ? : after the colons
+ $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
+ $right_bond_strength{':'} = NO_BREAK;
+ $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
+ $right_bond_strength{'?'} = NO_BREAK;
- # make sure the new array looks okay
- $self->check_token_array();
+ $left_bond_strength{','} = VERY_STRONG;
+ $right_bond_strength{','} = VERY_WEAK;
- # reset the token limits of each line
- $self->resync_lines_and_tokens();
+ # remaining digraphs and trigraphs not defined above
+ @q = qw( :: <> ++ --);
+ @left_bond_strength{@q} = (WEAK) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
- return;
-}
+ # Set bond strengths of certain keywords
+ # make 'or', 'err', 'and' slightly weaker than a ','
+ $left_bond_strength{'and'} = VERY_WEAK - 0.01;
+ $left_bond_strength{'or'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'err'} = VERY_WEAK - 0.02;
+ $left_bond_strength{'xor'} = VERY_WEAK - 0.01;
+ $right_bond_strength{'and'} = NOMINAL;
+ $right_bond_strength{'or'} = NOMINAL;
+ $right_bond_strength{'err'} = NOMINAL;
+ $right_bond_strength{'xor'} = NOMINAL;
-{ # scan_comments
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 2.
+ # Set binary rules for bond strengths between certain token types.
+ #---------------------------------------------------------------
- my $Last_line_had_side_comment;
- my $In_format_skipping_section;
- my $Saw_VERSION_in_this_file;
+ # We have a little problem making tables which apply to the
+ # container tokens. Here is a list of container tokens and
+ # their types:
+ #
+ # type tokens // meaning
+ # { {, [, ( // indent
+ # } }, ], ) // outdent
+ # [ [ // left non-structural [ (enclosing an array index)
+ # ] ] // right non-structural square bracket
+ # ( ( // left non-structural paren
+ # ) ) // right non-structural paren
+ # L { // left non-structural curly brace (enclosing a key)
+ # R } // right non-structural curly brace
+ #
+ # Some rules apply to token types and some to just the token
+ # itself. We solve the problem by combining type and token into a
+ # new hash key for the container types.
+ #
+ # If a rule applies to a token 'type' then we need to make rules
+ # for each of these 'type.token' combinations:
+ # Type Type.Token
+ # { {{, {[, {(
+ # [ [[
+ # ( ((
+ # L L{
+ # } }}, }], })
+ # ] ]]
+ # ) ))
+ # R R}
+ #
+ # If a rule applies to a token then we need to make rules for
+ # these 'type.token' combinations:
+ # Token Type.Token
+ # { {{, L{
+ # [ {[, [[
+ # ( {(, ((
+ # } }}, R}
+ # ] }], ]]
+ # ) }), ))
- sub scan_comments {
- my $self = shift;
- my $rlines = $self->{rlines};
+ # allow long lines before final { in an if statement, as in:
+ # if (..........
+ # ..........)
+ # {
+ #
+ # Otherwise, the line before the { tends to be too short.
- $Last_line_had_side_comment = undef;
- $In_format_skipping_section = undef;
- $Saw_VERSION_in_this_file = undef;
+ $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
+ $binary_bond_strength{'(('}{'{{'} = NOMINAL;
- # Loop over all lines
- foreach my $line_of_tokens ( @{$rlines} ) {
- my $line_type = $line_of_tokens->{_line_type};
- next unless ( $line_type eq 'CODE' );
- my $CODE_type = $self->get_CODE_type($line_of_tokens);
- $line_of_tokens->{_code_type} = $CODE_type;
- }
- return;
- }
+ # break on something like '} (', but keep this stronger than a ','
+ # example is in 'howe.pl'
+ $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- sub get_CODE_type {
- my ( $self, $line_of_tokens ) = @_;
+ # keep matrix and hash indices together
+ # but make them a little below STRONG to allow breaking open
+ # something like {'some-word'}{'some-very-long-word'} at the }{
+ # (bracebrk.t)
+ $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- # We are looking at a line of code and setting a flag to
- # describe any special processing that it requires
+ # increase strength to the point where a break in the following
+ # will be after the opening paren rather than at the arrow:
+ # $a->$b($c);
+ $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
- # Possible CODE_types are as follows.
- # 'BL' = Blank Line
- # 'VB' = Verbatim - line goes out verbatim
- # 'IO' = Indent Only - line goes out unchanged except for indentation
- # 'NIN' = No Internal Newlines - line does not get broken
- # 'HSC'=Hanging Side Comment - fix this hanging side comment
- # 'BC'=Block Comment - an ordinary full line comment
- # 'SBC'=Static Block Comment - a block comment which does not get
- # indented
- # 'SBCX'=Static Block Comment Without Leading Space
- # 'DEL'=Delete this line
- # 'VER'=VERSION statement
- # '' or (undefined) - no restructions
+ # Note that the following alternative strength would make the break at the
+ # '->' rather than opening the '('. Both have advantages and disadvantages.
+ # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
+ $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
- my $no_internal_newlines = 1 - $rOpts_add_newlines;
- if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
+ $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
- # extract what we need for this line..
+ #---------------------------------------------------------------
+ # Binary NO_BREAK rules
+ #---------------------------------------------------------------
- # Global value for error messages:
- $input_line_number = $line_of_tokens->{_line_number};
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
+ $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $jmax = -1;
- if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
- my $input_line = $line_of_tokens->{_line_text};
- my $in_continued_quote = my $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- my $in_quote = $line_of_tokens->{_ending_in_quote};
- my $ending_in_quote = $in_quote;
- my $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ $binary_bond_strength{'C'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'C'}{'{('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'{('} = NO_BREAK;
- my $is_static_block_comment = 0;
+ # use strict requires that bare word within braces not start new
+ # line
+ $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
- # Handle a continued quote..
- if ($in_continued_quote) {
+ $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- note_embedded_tab();
- }
- $Last_line_had_side_comment = 0;
- return 'VB';
- }
- }
+ # The following two rules prevent a syntax error caused by breaking up
+ # a construction like '{-y}'. The '-' quotes the 'y' and prevents
+ # it from being taken as a transliteration. We have to keep
+ # token types 'L m w' together to prevent this error.
+ $binary_bond_strength{'L{'}{'m'} = NO_BREAK;
+ $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
- my $is_block_comment =
- ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
+ # keep 'bareword-' together, but only if there is no space between
+ # the word and dash. Do not keep together if there is a space.
+ # example 'use perl6-alpha'
+ $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
- # Write line verbatim if we are in a formatting skip section
- if ($In_format_skipping_section) {
- $Last_line_had_side_comment = 0;
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
- # Note: extra space appended to comment simplifies pattern matching
- if ( $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_end/o )
- {
- $In_format_skipping_section = 0;
- write_logfile_entry("Exiting formatting skip section\n");
- }
- return 'FS';
- }
+ # use strict does not allow separating type info from trailing { }
+ # testfile is readmail.pl
+ $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
+ $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
- # See if we are entering a formatting skip section
- if ( $rOpts_format_skipping
- && $is_block_comment
- && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
- /$format_skipping_pattern_begin/o )
- {
- $In_format_skipping_section = 1;
- write_logfile_entry("Entering formatting skip section\n");
- $Last_line_had_side_comment = 0;
- return 'FS';
- }
+ # As a defensive measure, do not break between a '(' and a
+ # filehandle. In some cases, this can cause an error. For
+ # example, the following program works:
+ # my $msg="hi!\n";
+ # print
+ # ( STDOUT
+ # $msg
+ # );
+ #
+ # But this program fails:
+ # my $msg="hi!\n";
+ # print
+ # (
+ # STDOUT
+ # $msg
+ # );
+ #
+ # This is normally only a problem with the 'extrude' option
+ $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
+ $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
- # ignore trailing blank tokens (they will get deleted later)
- if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
- $jmax--;
- }
+ # never break between sub name and opening paren
+ $binary_bond_strength{'w'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'w'}{'{('} = NO_BREAK;
- # Handle a blank line..
- if ( $jmax < 0 ) {
- $Last_line_had_side_comment = 0;
- return 'BL';
- }
+ # keep '}' together with ';'
+ $binary_bond_strength{'}}'}{';'} = NO_BREAK;
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment_without_leading_space = 0;
- if ( $is_block_comment
- && $rOpts->{'static-block-comments'}
- && $input_line =~ /$static_block_comment_pattern/o )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space =
- substr( $input_line, 0, 1 ) eq '#';
- }
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ $nobreak_lhs{'++'} = NO_BREAK;
- # Check for comments which are line directives
- # Treat exactly as static block comments without leading space
- # reference: perlsyn, near end, section Plain Old Comments (Not!)
- # example: '# line 42 "new_filename.plx"'
- if (
- $is_block_comment
- && $input_line =~ /^\# \s*
- line \s+ (\d+) \s*
- (?:\s("?)([^"]+)\2)? \s*
- $/x
- )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space = 1;
- }
+ # Do not break before a possible file handle
+ $nobreak_lhs{'Z'} = NO_BREAK;
- # look for hanging side comment
- if (
- $is_block_comment
- && $Last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
- && !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing
- # hanging side comments
- # like this
- )
- {
- $Last_line_had_side_comment = 1;
- return 'HSC';
- }
+ # use strict hates bare words on any new line. For
+ # example, a break before the underscore here provokes the
+ # wrath of use strict:
+ # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
+ $nobreak_rhs{'F'} = NO_BREAK;
+ $nobreak_rhs{'CORE::'} = NO_BREAK;
- # remember if this line has a side comment
- $Last_line_had_side_comment =
- ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
+ # To prevent the tokenizer from switching between types 'w' and 'G' we
+ # need to avoid breaking between type 'G' and the following code block
+ # brace. Fixes case b929.
+ $nobreak_rhs{G} = NO_BREAK;
- # Handle a block (full-line) comment..
- if ($is_block_comment) {
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 3.
+ # Define tables and values for applying a small bias to the above
+ # values.
+ #---------------------------------------------------------------
+ # Adding a small 'bias' to strengths is a simple way to make a line
+ # break at the first of a sequence of identical terms. For
+ # example, to force long string of conditional operators to break
+ # with each line ending in a ':', we can add a small number to the
+ # bond strength of each ':' (colon.t)
+ @bias_tokens = qw( : && || f and or . ); # tokens which get bias
+ %bias_hash = map { $_ => 0 } @bias_tokens;
+ $delta_bias = 0.0001; # a very small strength level
+ return;
- if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
+ } ## end sub initialize_bond_strength_hashes
- # TRIM COMMENTS -- This could be turned off as a option
- $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
+ use constant DEBUG_BOND => 0;
- if ($is_static_block_comment_without_leading_space) {
- return 'SBCX';
- }
- elsif ($is_static_block_comment) {
- return 'SBC';
- }
- else {
- return 'BC';
- }
- }
+ sub set_bond_strengths {
- # Patch needed for MakeMaker. Do not break a statement
- # in which $VERSION may be calculated. See MakeMaker.pm;
- # this is based on the coding in it.
- # The first line of a file that matches this will be eval'd:
- # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- # Examples:
- # *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
- # We will pass such a line straight through without breaking
- # it unless -npvl is used.
+ my ($self) = @_;
- # Patch for problem reported in RT #81866, where files
- # had been flattened into a single line and couldn't be
- # tidied without -npvl. There are two parts to this patch:
- # First, it is not done for a really long line (80 tokens for now).
- # Second, we will only allow up to one semicolon
- # before the VERSION. We need to allow at least one semicolon
- # for statements like this:
- # require Exporter; our $VERSION = $Exporter::VERSION;
- # where both statements must be on a single line for MakeMaker
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
- my $is_VERSION_statement = 0;
- if ( !$Saw_VERSION_in_this_file
- && $jmax < 80
- && $input_line =~
- /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
- {
- $Saw_VERSION_in_this_file = 1;
- write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- $CODE_type = 'VER';
- }
- return $CODE_type;
- }
-}
+ # patch-its always ok to break at end of line
+ $nobreak_to_go[$max_index_to_go] = 0;
-sub find_nested_pairs {
- my $self = shift;
+ # we start a new set of bias values for each line
+ %bias = %bias_hash;
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ my $code_bias = -.01; # bias for closing block braces
- # We define an array of pairs of nested containers
- my @nested_pairs;
-
- # We also set the following hash values to identify container pairs for
- # which the opening and closing tokens are adjacent in the token stream:
- # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
- # $seqno_in are the seqence numbers of the outer and inner containers of
- # the pair We need these later to decide if we can insert a missing
- # semicolon
- my $rpaired_to_inner_container = {};
-
- # This local hash remembers if an outer container has a close following
- # inner container;
- # The key is the outer sequence number
- # The value is the token_hash of the inner container
-
- my %has_close_following_opening;
-
- # Names of calling routines can either be marked as 'i' or 'w',
- # and they may invoke a sub call with an '->'. We will consider
- # any consecutive string of such types as a single unit when making
- # weld decisions. We also allow a leading !
- my $is_name_type = {
- 'i' => 1,
- 'w' => 1,
- 'U' => 1,
- '->' => 1,
- '!' => 1,
- };
+ my $type = 'b';
+ my $token = ' ';
+ my $token_length = 1;
+ my $last_type;
+ my $last_nonblank_type = $type;
+ my $last_nonblank_token = $token;
+ my $list_str = $left_bond_strength{'?'};
- my $is_name = sub {
- my $type = shift;
- return $type && $is_name_type->{$type};
- };
+ my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
- my $last_container;
- my $last_last_container;
- my $last_nonblank_token_vars;
- my $last_count;
+ my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
+ $next_nonblank_type, $next_token, $next_type,
+ $total_nesting_depth, );
- my $nonblank_token_count = 0;
+ # main loop to compute bond strengths between each pair of tokens
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ $last_type = $type;
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ }
+ $type = $types_to_go[$i];
- # loop over all tokens
- foreach my $rtoken_vars ( @{$rLL} ) {
+ # strength on both sides of a blank is the same
+ if ( $type eq 'b' && $last_type ne 'b' ) {
+ $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
+ $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
+ next;
+ }
- my $type = $rtoken_vars->[_TYPE_];
+ $token = $tokens_to_go[$i];
+ $token_length = $token_lengths_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $i_next = $i + 1;
+ $next_type = $types_to_go[$i_next];
+ $next_token = $tokens_to_go[$i_next];
+ $total_nesting_depth = $nesting_depth_to_go[$i_next];
+ $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- next if ( $type eq 'b' );
+ my $seqno = $type_sequence_to_go[$i];
+ my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
- # long identifier-like items are counted as a single item
- $nonblank_token_count++
- unless ( $is_name->($type)
- && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
+ # We are computing the strength of the bond between the current
+ # token and the NEXT token.
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
+ #---------------------------------------------------------------
+ # Bond Strength Section 1:
+ # First Approximation.
+ # Use minimum of individual left and right tabulated bond
+ # strengths.
+ #---------------------------------------------------------------
+ my $bsr = $right_bond_strength{$type};
+ my $bsl = $left_bond_strength{$next_nonblank_type};
- my $token = $rtoken_vars->[_TOKEN_];
+ # define right bond strengths of certain keywords
+ if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
+ $bsr = $right_bond_strength{$token};
+ }
+ elsif ( $token eq 'ne' or $token eq 'eq' ) {
+ $bsr = NOMINAL;
+ }
- if ( $is_opening_token{$token} ) {
+ # set terminal bond strength to the nominal value
+ # this will cause good preceding breaks to be retained
+ if ( $i_next_nonblank > $max_index_to_go ) {
+ $bsl = NOMINAL;
+ }
- # following previous opening token ...
- if ( $last_container
- && $is_opening_token{ $last_container->[_TOKEN_] } )
- {
+ # define right bond strengths of certain keywords
+ if ( $next_nonblank_type eq 'k'
+ && defined( $left_bond_strength{$next_nonblank_token} ) )
+ {
+ $bsl = $left_bond_strength{$next_nonblank_token};
+ }
+ elsif ($next_nonblank_token eq 'ne'
+ or $next_nonblank_token eq 'eq' )
+ {
+ $bsl = NOMINAL;
+ }
+ elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
+ $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
+ }
- # adjacent to this one
- my $tok_diff = $nonblank_token_count - $last_count;
+ # Use the minimum of the left and right strengths. Note: it might
+ # seem that we would want to keep a NO_BREAK if either token has
+ # this value. This didn't work, for example because in an arrow
+ # list, it prevents the comma from separating from the following
+ # bare word (which is probably quoted by its arrow). So necessary
+ # NO_BREAK's have to be handled as special cases in the final
+ # section.
+ if ( !defined($bsr) ) { $bsr = VERY_STRONG }
+ if ( !defined($bsl) ) { $bsl = VERY_STRONG }
+ my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
+ $bond_str_1 = $bond_str if (DEBUG_BOND);
- my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
+ #---------------------------------------------------------------
+ # Bond Strength Section 2:
+ # Apply hardwired rules..
+ #---------------------------------------------------------------
- if ( $tok_diff == 1
- || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
- {
+ # Patch to put terminal or clauses on a new line: Weaken the bond
+ # at an || followed by die or similar keyword to make the terminal
+ # or clause fall on a new line, like this:
+ #
+ # my $class = shift
+ # || die "Cannot add broadcast: No class identifier found";
+ #
+ # Otherwise the break will be at the previous '=' since the || and
+ # = have the same starting strength and the or is biased, like
+ # this:
+ #
+ # my $class =
+ # shift || die "Cannot add broadcast: No class identifier found";
+ #
+ # In any case if the user places a break at either the = or the ||
+ # it should remain there.
+ if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
+ if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
+ if ( $want_break_before{$token} && $i > 0 ) {
+ $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
- # remember this pair...
- my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $inner_seqno = $type_sequence;
- $has_close_following_opening{$outer_seqno} =
- $rtoken_vars;
+ # keep bond strength of a token and its following blank
+ # the same
+ if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
+ $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
+ }
+ }
+ else {
+ $bond_str -= $delta_bias;
}
}
}
- elsif ( $is_closing_token{$token} ) {
+ # good to break after end of code blocks
+ if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
- # if the corresponding opening token had an adjacent opening
- if ( $has_close_following_opening{$type_sequence}
- && $is_closing_token{ $last_container->[_TOKEN_] }
- && $has_close_following_opening{$type_sequence}
- ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
- {
+ $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
+ $code_bias += $delta_bias;
+ }
+
+ if ( $type eq 'k' ) {
- # The closing weld tokens must be adjacent
- # NOTE: so intermediate commas and semicolons
- # can currently block a weld. This is something
- # that could be fixed in the future by including
- # a flag to delete un-necessary commas and semicolons.
- my $tok_diff = $nonblank_token_count - $last_count;
+ # allow certain control keywords to stand out
+ if ( $next_nonblank_type eq 'k'
+ && $is_last_next_redo_return{$token} )
+ {
+ $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
+ }
- if ( $tok_diff == 1 ) {
+ # Don't break after keyword my. This is a quick fix for a
+ # rare problem with perl. An example is this line from file
+ # Container.pm:
- # This is a closely nested pair ..
- my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
- my $outer_seqno = $type_sequence;
- $rpaired_to_inner_container->{$outer_seqno} =
- $inner_seqno;
+ # foreach my $question( Debian::DebConf::ConfigDb::gettree(
+ # $this->{'question'} ) )
- push @nested_pairs, [ $inner_seqno, $outer_seqno ];
- }
+ if ( $token eq 'my' ) {
+ $bond_str = NO_BREAK;
}
+
}
- $last_last_container = $last_container;
- $last_container = $rtoken_vars;
- $last_count = $nonblank_token_count;
- }
- $last_nonblank_token_vars = $rtoken_vars;
- }
- $self->{rnested_pairs} = \@nested_pairs;
- $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
- return;
-}
+ # good to break before 'if', 'unless', etc
+ if ( $is_if_brace_follower{$next_nonblank_token} ) {
+ $bond_str = VERY_WEAK;
+ }
-sub dump_tokens {
+ if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
- # a debug routine, not normally used
- my ( $self, $msg ) = @_;
- my $rLL = $self->{rLL};
- my $nvars = @{$rLL};
- print STDERR "$msg\n";
- print STDERR "ntokens=$nvars\n";
- print STDERR "K\t_TOKEN_\t_TYPE_\n";
- my $K = 0;
+ if ( $is_keyword_returning_list{$next_nonblank_token} ) {
+ $bond_str = $list_str if ( $bond_str > $list_str );
+ }
- foreach my $item ( @{$rLL} ) {
- print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
- $K++;
- }
- return;
-}
+ # keywords like 'unless', 'if', etc, within statements
+ # make good breaks
+ if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
+ $bond_str = VERY_WEAK / 1.05;
+ }
+ }
-sub get_old_line_index {
- my ( $self, $K ) = @_;
- my $rLL = $self->{rLL};
- return 0 unless defined($K);
- return $rLL->[$K]->[_LINE_INDEX_];
-}
+ # try not to break before a comma-arrow
+ elsif ( $next_nonblank_type eq '=>' ) {
+ if ( $bond_str < STRONG ) { $bond_str = STRONG }
+ }
-sub get_old_line_count {
- my ( $self, $Kbeg, $Kend ) = @_;
- my $rLL = $self->{rLL};
- return 0 unless defined($Kbeg);
- return 0 unless defined($Kend);
- return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
-}
+ #---------------------------------------------------------------
+ # Additional hardwired NOBREAK rules
+ #---------------------------------------------------------------
-sub K_next_code {
- my ( $self, $KK, $rLL ) = @_;
+ # map1.t -- correct for a quirk in perl
+ if ( $token eq '('
+ && $next_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'k'
+ && $is_sort_map_grep{$last_nonblank_token} )
- # return the index K of the next nonblank, non-comment token
- return unless ( defined($KK) && $KK >= 0 );
+ # /^(sort|map|grep)$/ )
+ {
+ $bond_str = NO_BREAK;
+ }
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
- && $rLL->[$Knnb]->[_TYPE_] ne '#' )
- {
- return $Knnb;
- }
- $Knnb++;
- }
- return;
-}
+ # extrude.t: do not break before paren at:
+ # -l pid_filename(
+ if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
+ }
-sub K_next_nonblank {
- my ( $self, $KK, $rLL ) = @_;
+ # OLD COMMENT: In older version of perl, use strict can cause
+ # problems with breaks before bare words following opening parens.
+ # For example, this will fail under older versions if a break is
+ # made between '(' and 'MAIL':
- # return the index K of the next nonblank token
- return unless ( defined($KK) && $KK >= 0 );
+ # use strict; open( MAIL, "a long filename or command"); close MAIL;
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- my $Knnb = $KK + 1;
- while ( $Knnb < $Num ) {
- if ( !defined( $rLL->[$Knnb] ) ) {
- Fault("Undefined entry for k=$Knnb");
- }
- if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
- $Knnb++;
- }
- return;
-}
+ # NEW COMMENT: Third fix for b1213:
+ # This option does not seem to be needed any longer, and it can
+ # cause instabilities. It can be turned off, but to minimize
+ # changes to existing formatting it is retained only in the case
+ # where the previous token was 'open' and there was no line break.
+ # Even this could eventually be removed if it causes instability.
+ if ( $type eq '{' ) {
-sub K_previous_code {
+ if ( $token eq '('
+ && $next_nonblank_type eq 'w'
+ && $last_nonblank_type eq 'k'
+ && $last_nonblank_token eq 'open'
+ && !$old_breakpoint_to_go[$i] )
+ {
+ $bond_str = NO_BREAK;
+ }
+ }
- # return the index K of the previous nonblank, non-comment token
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ # Do not break between a possible filehandle and a ? or / and do
+ # not introduce a break after it if there is no blank
+ # (extrude.t)
+ elsif ( $type eq 'Z' ) {
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # don't break..
+ if (
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
- }
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
- && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
- {
- return $Kpnb;
- }
- $Kpnb--;
- }
- return;
-}
+ # if there is no blank and we do not want one. Examples:
+ # print $x++ # do not break after $x
+ # print HTML"HELLO" # break ok after HTML
+ (
+ $next_type ne 'b'
+ && defined( $want_left_space{$next_type} )
+ && $want_left_space{$next_type} == WS_NO
+ )
-sub K_previous_nonblank {
+ # or we might be followed by the start of a quote,
+ # and this is not an existing breakpoint; fixes c039.
+ || !$old_breakpoint_to_go[$i]
+ && substr( $next_nonblank_token, 0, 1 ) eq '/'
- # return index of previous nonblank token before item K;
- # Call with $KK=undef to start search at the top of the array
- my ( $self, $KK, $rLL ) = @_;
+ )
+ {
+ $bond_str = NO_BREAK;
+ }
+ }
- # use the standard array unless given otherwise
- $rLL = $self->{rLL} unless ( defined($rLL) );
- my $Num = @{$rLL};
- if ( !defined($KK) ) { $KK = $Num }
- elsif ( $KK > $Num ) {
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ if ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
+ }
- # The caller should make the first call with KK_new=undef to
- # avoid this error
- Fault(
-"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
- );
- }
- my $Kpnb = $KK - 1;
- while ( $Kpnb >= 0 ) {
- if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
- $Kpnb--;
- }
- return;
-}
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
+ }
-sub map_containers {
+ # Fix for c039
+ elsif ( $type eq 'w' ) {
+ $bond_str = NO_BREAK
+ if ( !$old_breakpoint_to_go[$i]
+ && substr( $next_nonblank_token, 0, 1 ) eq '/' );
+ }
- # Maps the container hierarchy
- my $self = shift;
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ $bond_str_2 = $bond_str if (DEBUG_BOND);
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
- my $rcontainer_map = $self->{rcontainer_map};
+ #---------------------------------------------------------------
+ # End of hardwired rules
+ #---------------------------------------------------------------
- # loop over containers
- my @stack; # stack of container sequence numbers
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $type_sequence not defined at K=$KK");
- }
+ #---------------------------------------------------------------
+ # Bond Strength Section 3:
+ # Apply table rules. These have priority over the above
+ # hardwired rules.
+ #---------------------------------------------------------------
- my $token = $rtoken_vars->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
- if (@stack) {
- $rcontainer_map->{$type_sequence} = $stack[-1];
+ my $tabulated_bond_str;
+ my $ltype = $type;
+ my $rtype = $next_nonblank_type;
+ if ( $seqno && $is_container_token{$token} ) {
+ $ltype = $type . $token;
}
- push @stack, $type_sequence;
- }
- if ( $is_closing_token{$token} ) {
- if (@stack) {
- my $seqno = pop @stack;
- if ( $seqno != $type_sequence ) {
- # shouldn't happen unless file is garbage
- }
+ if ( $next_nonblank_seqno
+ && $is_container_token{$next_nonblank_token} )
+ {
+ $rtype = $next_nonblank_type . $next_nonblank_token;
+
+ # Alternate Fix #1 for issue b1299. This version makes the
+ # decision as soon as possible. See Alternate Fix #2 also.
+ # Do not separate a bareword identifier from its paren: b1299
+ # This is currently needed for stability because if the bareword
+ # gets separated from a preceding '->' and following '(' then
+ # the tokenizer may switch from type 'i' to type 'w'. This
+ # patch will prevent this by keeping it adjacent to its '('.
+## if ( $next_nonblank_token eq '('
+## && $ltype eq 'i'
+## && substr( $token, 0, 1 ) =~ /^\w$/ )
+## {
+## $ltype = 'w';
+## }
+ }
+
+ # apply binary rules which apply regardless of space between tokens
+ if ( $binary_bond_strength{$ltype}{$rtype} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$rtype};
+ $tabulated_bond_str = $bond_str;
}
- }
- }
- # the stack should be empty for a good file
- if (@stack) {
+ # apply binary rules which apply only if no space between tokens
+ if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$next_type};
+ $tabulated_bond_str = $bond_str;
+ }
- # unbalanced containers; file probably bad
- }
- else {
- # ok
- }
- return;
-}
+ if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
+ $bond_str = NO_BREAK;
+ $tabulated_bond_str = $bond_str;
+ }
-sub mark_short_nested_blocks {
+ $bond_str_3 = $bond_str if (DEBUG_BOND);
- # This routine looks at the entire file and marks any short nested blocks
- # which should not be broken. The results are stored in the hash
- # $rshort_nested->{$type_sequence}
- # which will be true if the container should remain intact.
- #
- # For example, consider the following line:
+ # If the hardwired rules conflict with the tabulated bond
+ # strength then there is an inconsistency that should be fixed
+ DEBUG_BOND
+ && $tabulated_bond_str
+ && $bond_str_1
+ && $bond_str_1 != $bond_str_2
+ && $bond_str_2 != $tabulated_bond_str
+ && do {
+ print STDERR
+"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
+ };
- # sub cxt_two { sort { $a <=> $b } test_if_list() }
+ #-----------------------------------------------------------------
+ # Bond Strength Section 4:
+ # Modify strengths of certain tokens which often occur in sequence
+ # by adding a small bias to each one in turn so that the breaks
+ # occur from left to right.
+ #
+ # Note that we only changing strengths by small amounts here,
+ # and usually increasing, so we should not be altering any NO_BREAKs.
+ # Other routines which check for NO_BREAKs will use a tolerance
+ # of one to avoid any problem.
+ #-----------------------------------------------------------------
- # The 'sort' block is short and nested within an outer sub block.
- # Normally, the existance of the 'sort' block will force the sub block to
- # break open, but this is not always desirable. Here we will set a flag for
- # the sort block to prevent this. To give the user control, we will
- # follow the input file formatting. If either of the blocks is broken in
- # the input file then we will allow it to remain broken. Otherwise we will
- # set a flag to keep it together in later formatting steps.
+ # The bias tables use special keys:
+ # $type - if not keyword
+ # $token - if keyword, but map some keywords together
+ my $left_key =
+ $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
+ my $right_key =
+ $next_nonblank_type eq 'k'
+ ? $next_nonblank_token eq 'err'
+ ? 'or'
+ : $next_nonblank_token
+ : $next_nonblank_type;
- # The flag which is set here will be checked in two places:
- # 'sub print_line_of_tokens' and 'sub starting_one_line_block'
+ if ( $type eq ',' ) {
- my $self = shift;
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+ # add any bias set by sub break_lists at old comma break points
+ $bond_str += $bond_strength_to_go[$i];
- return unless ( $rOpts->{'one-line-block-nesting'} );
+ }
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
- my $rbreak_container = $self->{rbreak_container};
- my $rshort_nested = $self->{rshort_nested};
- my $rcontainer_map = $self->{rcontainer_map};
- my $rlines = $self->{rlines};
+ # bias left token
+ elsif ( defined( $bias{$left_key} ) ) {
+ if ( !$want_break_before{$left_key} ) {
+ $bias{$left_key} += $delta_bias;
+ $bond_str += $bias{$left_key};
+ }
+ }
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
- my $length_tol = 1;
+ # bias right token
+ if ( defined( $bias{$right_key} ) ) {
+ if ( $want_break_before{$right_key} ) {
- my $excess_length_to_K = sub {
- my ($K) = @_;
+ # for leading '.' align all but 'short' quotes; the idea
+ # is to not place something like "\n" on a single line.
+ if ( $right_key eq '.' ) {
+ unless (
+ $last_nonblank_type eq '.'
+ && ( $token_length <=
+ $rOpts_short_concatenation_item_length )
+ && ( !$is_closing_token{$token} )
+ )
+ {
+ $bias{$right_key} += $delta_bias;
+ }
+ }
+ else {
+ $bias{$right_key} += $delta_bias;
+ }
+ $bond_str += $bias{$right_key};
+ }
+ }
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
+ $bond_str_4 = $bond_str if (DEBUG_BOND);
- my $is_broken_block = sub {
+ #---------------------------------------------------------------
+ # Bond Strength Section 5:
+ # Fifth Approximation.
+ # Take nesting depth into account by adding the nesting depth
+ # to the bond strength.
+ #---------------------------------------------------------------
+ my $strength;
- # a block is broken if the input line numbers of the braces differ
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
+ if ( $total_nesting_depth > 0 ) {
+ $strength = $bond_str + $total_nesting_depth;
+ }
+ else {
+ $strength = $bond_str;
+ }
+ }
+ else {
+ $strength = NO_BREAK;
- # loop over all containers
- my @open_block_stack;
- my $iline = -1;
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
+ # For critical code such as lines with here targets we must
+ # be absolutely sure that we do not allow a break. So for
+ # these the nobreak flag exceeds 1 as a signal. Otherwise we
+ # can run into trouble when small tolerances are added.
+ $strength += 1 if ( $nobreak_to_go[$i] > 1 );
+ }
- # an error here is most likely due to a recent programming change
- Fault("sequence = $type_sequence not defined at K=$KK");
- }
+ #---------------------------------------------------------------
+ # Bond Strength Section 6:
+ # Sixth Approximation. Welds.
+ #---------------------------------------------------------------
- # We are just looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- next unless ($block_type);
+ # Do not allow a break within welds
+ if ( $total_weld_count && $seqno ) {
+ my $KK = $K_to_go[$i];
+ if ( $rK_weld_right->{$KK} ) {
+ $strength = NO_BREAK;
+ }
- # Keep a stack of all acceptable block braces seen.
- # Only consider blocks entirely on one line so dump the stack when line
- # changes.
- my $iline_last = $iline;
- $iline = $rLL->[$KK]->[_LINE_INDEX_];
- if ( $iline != $iline_last ) { @open_block_stack = () }
+ # But encourage breaking after opening welded tokens
+ elsif ($rK_weld_left->{$KK}
+ && $is_opening_token{$token} )
+ {
+ $strength -= 1;
+ }
+ }
- if ( $token eq '}' ) {
- if (@open_block_stack) { pop @open_block_stack }
- }
- next unless ( $token eq '{' );
+ # always break after side comment
+ if ( $type eq '#' ) { $strength = 0 }
- # block must be balanced (bad scripts may be unbalanced)
- my $K_opening = $K_opening_container->{$type_sequence};
- my $K_closing = $K_closing_container->{$type_sequence};
- next unless ( defined($K_opening) && defined($K_closing) );
+ $bond_strength_to_go[$i] = $strength;
- # require that this block be entirely on one line
- next if ( $is_broken_block->($type_sequence) );
+ # Fix for case c001: be sure NO_BREAK's are enforced by later
+ # routines, except at a '?' because '?' as quote delimiter is
+ # deprecated.
+ if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
+ $nobreak_to_go[$i] ||= 1;
+ }
- # See if this block fits on one line of allowed length (which may
- # be different from the input script)
- $starting_lentot =
- $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$KK]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
-
- # Dump the stack if block is too long and skip this block
- if ( $excess_length_to_K->($K_closing) > 0 ) {
- @open_block_stack = ();
- next;
- }
+ DEBUG_BOND && do {
+ my $str = substr( $token, 0, 15 );
+ $str .= ' ' x ( 16 - length($str) );
+ print STDOUT
+"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
- # OK, Block passes tests, remember it
- push @open_block_stack, $type_sequence;
+ # reset for next pass
+ $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
+ };
- # We are only marking nested code blocks,
- # so check for a previous block on the stack
- next unless ( @open_block_stack > 1 );
+ } ## end main loop
+ return;
+ } ## end sub set_bond_strengths
+} ## end closure set_bond_strengths
- # Looks OK, mark this as a short nested block
- $rshort_nested->{$type_sequence} = 1;
+sub bad_pattern {
- }
- return;
+ # See if a pattern will compile. We have to use a string eval here,
+ # but it should be safe because the pattern has been constructed
+ # by this program.
+ my ($pattern) = @_;
+ eval "'##'=~/$pattern/";
+ return $@;
}
-sub weld_containers {
-
- # do any welding operations
- my $self = shift;
-
- # initialize weld length hashes needed later for checking line lengths
- # TODO: These should eventually be stored in $self rather than be package vars
- %weld_len_left_closing = ();
- %weld_len_right_closing = ();
- %weld_len_left_opening = ();
- %weld_len_right_opening = ();
-
- return if ( $rOpts->{'indent-only'} );
- return unless ($rOpts_add_newlines);
-
- if ( $rOpts->{'weld-nested-containers'} ) {
+{ ## begin closure prepare_cuddled_block_types
- # if called, weld_nested_containers must be called before other weld
- # operations. # This is because weld_nested_containers could overwrite
- # hash values written by weld_cuddled_blocks and weld_nested_quotes.
- $self->weld_nested_containers();
+ my %no_cuddle;
- $self->weld_nested_quotes();
+ # Add keywords here which really should not be cuddled
+ BEGIN {
+ my @q = qw(if unless for foreach while);
+ @no_cuddle{@q} = (1) x scalar(@q);
}
- # Note that weld_nested_containers() changes the _LEVEL_ values, so
- # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
-
- # Here is a good test case to Be sure that both cuddling and welding
- # are working and not interfering with each other: <<snippets/ce_wn1.in>>
-
- # perltidy -wn -ce
-
- # if ($BOLD_MATH) { (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # ) } else { (
- # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
- # $after
- # ) }
-
- $self->weld_cuddled_blocks();
-
- return;
-}
-
-sub cumulative_length_before_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->{rLL};
- return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
-}
-
-sub cumulative_length_after_K {
- my ( $self, $KK ) = @_;
- my $rLL = $self->{rLL};
- return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
-}
-
-sub weld_cuddled_blocks {
- my $self = shift;
-
- # This routine implements the -cb flag by finding the appropriate
- # closing and opening block braces and welding them together.
- return unless ( %{$rcuddled_block_types} );
+ sub prepare_cuddled_block_types {
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
- my $rbreak_container = $self->{rbreak_container};
+ # the cuddled-else style, if used, is controlled by a hash that
+ # we construct here
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
+ # Include keywords here which should not be cuddled
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ my $cuddled_string = "";
+ if ( $rOpts->{'cuddled-else'} ) {
- my $is_broken_block = sub {
+ # set the default
+ $cuddled_string = 'elsif else continue catch finally'
+ unless ( $rOpts->{'cuddled-block-list-exclusive'} );
- # a block is broken if the input line numbers of the braces differ
- # we can only cuddle between broken blocks
- my ($seqno) = @_;
- my $K_opening = $K_opening_container->{$seqno};
- return unless ( defined($K_opening) );
- my $K_closing = $K_closing_container->{$seqno};
- return unless ( defined($K_closing) );
- return $rbreak_container->{$seqno}
- || $rLL->[$K_closing]->[_LINE_INDEX_] !=
- $rLL->[$K_opening]->[_LINE_INDEX_];
- };
+ # This is the old equivalent but more complex version
+ # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
- # A stack to remember open chains at all levels:
- # $in_chain[$level] = [$chain_type, $type_sequence];
- my @in_chain;
- my $CBO = $rOpts->{'cuddled-break-option'};
+ # Add users other blocks to be cuddled
+ my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
+ if ($cuddled_block_list) {
+ $cuddled_string .= " " . $cuddled_block_list;
+ }
- # loop over structure items to find cuddled pairs
- my $level = 0;
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$type_sequence ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $type_sequence not defined at K=$KK");
}
- # We use the original levels because they get changed by sub
- # 'weld_nested_containers'. So if this were to be called before that
- # routine, the levels would be wrong and things would go bad.
- my $last_level = $level;
- $level = $rtoken_vars->[_LEVEL_TRUE_];
-
- if ( $level < $last_level ) { $in_chain[$last_level] = undef }
- elsif ( $level > $last_level ) { $in_chain[$level] = undef }
+ # If we have a cuddled string of the form
+ # 'try-catch-finally'
- # We are only looking at code blocks
- my $token = $rtoken_vars->[_TOKEN_];
- my $type = $rtoken_vars->[_TYPE_];
- next unless ( $type eq $token );
+ # we want to prepare a hash of the form
- if ( $token eq '{' ) {
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- if ( !$block_type ) {
+ # use -dcbl to dump this hash
- # patch for unrecognized block types which may not be labeled
- my $Kp = $self->K_previous_nonblank($KK);
- while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
- $Kp = $self->K_previous_nonblank($Kp);
- }
- next unless $Kp;
- $block_type = $rLL->[$Kp]->[_TOKEN_];
- }
- if ( $in_chain[$level] ) {
+ # Multiple such strings are input as a space or comma separated list
- # we are in a chain and are at an opening block brace.
- # See if we are welding this opening brace with the previous
- # block brace. Get their identification numbers:
- my $closing_seqno = $in_chain[$level]->[1];
- my $opening_seqno = $type_sequence;
+ # If we get two lists with the same leading type, such as
+ # -cbl = "-try-catch-finally -try-catch-otherwise"
+ # then they will get merged as follows:
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 2,
+ # 'otherwise' => 1,
+ # },
+ # };
+ # This will allow either type of chain to be followed.
- # The preceding block must be on multiple lines so that its
- # closing brace will start a new line.
- if ( !$is_broken_block->($closing_seqno) ) {
- next unless ( $CBO == 2 );
- $rbreak_container->{$closing_seqno} = 1;
- }
+ $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
+ my @cuddled_strings = split /\s+/, $cuddled_string;
- # we will let the trailing block be either broken or intact
- ## && $is_broken_block->($opening_seqno);
+ $rcuddled_block_types = {};
- # We can weld the closing brace to its following word ..
- my $Ko = $K_closing_container->{$closing_seqno};
- my $Kon = $self->K_next_nonblank($Ko);
+ # process each dash-separated string...
+ my $string_count = 0;
+ foreach my $string (@cuddled_strings) {
+ next unless $string;
+ my @words = split /-+/, $string; # allow multiple dashes
- # ..unless it is a comment
- if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
- my $dlen =
- $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
- $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
- $weld_len_right_closing{$closing_seqno} = $dlen;
+ # we could look for and report possible errors here...
+ next unless ( @words > 0 );
- # Set flag that we want to break the next container
- # so that the cuddled line is balanced.
- $rbreak_container->{$opening_seqno} = 1
- if ($CBO);
- }
+ # allow either '-continue' or *-continue' for arbitrary starting type
+ my $start = '*';
+ # a single word without dashes is a secondary block type
+ if ( @words > 1 ) {
+ $start = shift @words;
}
- else {
- # We are not in a chain. Start a new chain if we see the
- # starting block type.
- if ( $rcuddled_block_types->{$block_type} ) {
- $in_chain[$level] = [ $block_type, $type_sequence ];
- }
- else {
- $block_type = '*';
- $in_chain[$level] = [ $block_type, $type_sequence ];
- }
+ # always make an entry for the leading word. If none follow, this
+ # will still prevent a wildcard from matching this word.
+ if ( !defined( $rcuddled_block_types->{$start} ) ) {
+ $rcuddled_block_types->{$start} = {};
}
- }
- elsif ( $token eq '}' ) {
- if ( $in_chain[$level] ) {
-
- # We are in a chain at a closing brace. See if this chain
- # continues..
- my $Knn = $self->K_next_code($KK);
- next unless $Knn;
- my $chain_type = $in_chain[$level]->[0];
- my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
- if (
- $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
- )
- {
-
- # Note that we do not weld yet because we must wait until
- # we we are sure that an opening brace for this follows.
- $in_chain[$level]->[1] = $type_sequence;
+ # The count gives the original word order in case we ever want it.
+ $string_count++;
+ my $word_count = 0;
+ foreach my $word (@words) {
+ next unless $word;
+ if ( $no_cuddle{$word} ) {
+ Warn(
+"## Ignoring keyword '$word' in -cbl; does not seem right\n"
+ );
+ next;
}
- else { $in_chain[$level] = undef }
+ $word_count++;
+ $rcuddled_block_types->{$start}->{$word} =
+ 1; #"$string_count.$word_count";
+
+ # git#9: Remove this word from the list of desired one-line
+ # blocks
+ $want_one_line_block{$word} = 0;
}
}
+ return;
}
+} ## begin closure prepare_cuddled_block_types
- return;
-}
+sub dump_cuddled_block_list {
+ my ($fh) = @_;
-sub weld_nested_containers {
- my $self = shift;
+ # ORIGINAL METHOD: Here is the format of the cuddled block type hash
+ # which controls this routine
+ # my $rcuddled_block_types = {
+ # 'if' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # },
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- # This routine implements the -wn flag by "welding together"
- # the nested closing and opening tokens which were previously
- # identified by sub 'find_nested_pairs'. "welding" simply
- # involves setting certain hash values which will be checked
- # later during formatting.
+ # SIMPLFIED METHOD: the simplified method uses a wildcard for
+ # the starting block type and puts all cuddled blocks together:
+ # my $rcuddled_block_types = {
+ # '*' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
- my $rLL = $self->{rLL};
- my $Klimit = $self->get_rLL_max_index();
- my $rnested_pairs = $self->{rnested_pairs};
- my $rlines = $self->{rlines};
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
+ # Both methods work, but the simplified method has proven to be adequate and
+ # easier to manage.
- # Return unless there are nested pairs to weld
- return unless defined($rnested_pairs) && @{$rnested_pairs};
+ my $cuddled_string = $rOpts->{'cuddled-block-list'};
+ $cuddled_string = '' unless $cuddled_string;
- # This array will hold the sequence numbers of the tokens to be welded.
- my @welds;
+ my $flags = "";
+ $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
+ $flags .= " -cbl='$cuddled_string'";
- # Variables needed for estimating line lengths
- my $starting_indent;
- my $starting_lentot;
+ unless ( $rOpts->{'cuddled-else'} ) {
+ $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
+ }
- # A tolerance to the length for length estimates. In some rare cases
- # this can avoid problems where a final weld slightly exceeds the
- # line length and gets broken in a bad spot.
- my $length_tol = 1;
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+Hash of cuddled block types prepared for a run with these parameters:
+ $flags
+------------------------------------------------------------------------
+EOM
- my $excess_length_to_K = sub {
- my ($K) = @_;
+ use Data::Dumper;
+ $fh->print( Dumper($rcuddled_block_types) );
- # Estimate the length from the line start to a given token
- my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return ($excess_length);
- };
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+EOM
+ return;
+}
- my $length_to_opening_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_opening_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+sub make_static_block_comment_pattern {
- my $length_to_closing_seqno = sub {
- my ($seqno) = @_;
- my $KK = $K_closing_container->{$seqno};
- my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- return $lentot;
- };
+ # create the pattern used to identify static block comments
+ $static_block_comment_pattern = '^\s*##';
- # Abbreviations:
- # _oo=outer opening, i.e. first of { {
- # _io=inner opening, i.e. second of { {
- # _oc=outer closing, i.e. second of } {
- # _ic=inner closing, i.e. first of } }
+ # allow the user to change it
+ if ( $rOpts->{'static-block-comment-prefix'} ) {
+ my $prefix = $rOpts->{'static-block-comment-prefix'};
+ $prefix =~ s/^\s*//;
+ my $pattern = $prefix;
- my $previous_pair;
+ # user may give leading caret to force matching left comments only
+ if ( $prefix !~ /^\^#/ ) {
+ if ( $prefix !~ /^#/ ) {
+ Die(
+"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
+ );
+ }
+ $pattern = '^\s*' . $prefix;
+ }
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $static_block_comment_pattern = $pattern;
+ }
+ return;
+}
- # We are working from outermost to innermost pairs so that
- # level changes will be complete when we arrive at the inner pairs.
+sub make_format_skipping_pattern {
+ my ( $opt_name, $default ) = @_;
+ my $param = $rOpts->{$opt_name};
+ unless ($param) { $param = $default }
+ $param =~ s/^\s*//;
+ if ( $param !~ /^#/ ) {
+ Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
+ }
+ my $pattern = '^' . $param . '\s';
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
+ );
+ }
+ return $pattern;
+}
- while ( my $item = pop( @{$rnested_pairs} ) ) {
- my ( $inner_seqno, $outer_seqno ) = @{$item};
+sub make_non_indenting_brace_pattern {
- my $Kouter_opening = $K_opening_container->{$outer_seqno};
- my $Kinner_opening = $K_opening_container->{$inner_seqno};
- my $Kouter_closing = $K_closing_container->{$outer_seqno};
- my $Kinner_closing = $K_closing_container->{$inner_seqno};
+ # Create the pattern used to identify static side comments.
+ # Note that we are ending the pattern in a \s. This will allow
+ # the pattern to be followed by a space and some text, or a newline.
+ # The pattern is used in sub 'non_indenting_braces'
+ $non_indenting_brace_pattern = '^#<<<\s';
- my $outer_opening = $rLL->[$Kouter_opening];
- my $inner_opening = $rLL->[$Kinner_opening];
- my $outer_closing = $rLL->[$Kouter_closing];
- my $inner_closing = $rLL->[$Kinner_closing];
+ # allow the user to change it
+ if ( $rOpts->{'non-indenting-brace-prefix'} ) {
+ my $prefix = $rOpts->{'non-indenting-brace-prefix'};
+ $prefix =~ s/^\s*//;
+ if ( $prefix !~ /^#/ ) {
+ Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
+ }
+ my $pattern = '^' . $prefix . '\s';
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $non_indenting_brace_pattern = $pattern;
+ }
+ return;
+}
- my $iline_oo = $outer_opening->[_LINE_INDEX_];
- my $iline_io = $inner_opening->[_LINE_INDEX_];
+sub make_closing_side_comment_list_pattern {
- # Set flag saying if this pair starts a new weld
- my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
+ # turn any input list into a regex for recognizing selected block types
+ $closing_side_comment_list_pattern = '^\w+';
+ if ( defined( $rOpts->{'closing-side-comment-list'} )
+ && $rOpts->{'closing-side-comment-list'} )
+ {
+ $closing_side_comment_list_pattern =
+ make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
+ }
+ return;
+}
- # Set flag saying if this pair is adjacent to the previous nesting pair
- # (even if previous pair was rejected as a weld)
- my $touch_previous_pair =
- defined($previous_pair) && $outer_seqno == $previous_pair->[0];
- $previous_pair = $item;
+sub make_sub_matching_pattern {
- # Set a flag if we should not weld. It sometimes looks best not to weld
- # when the opening and closing tokens are very close. However, there
- # is a danger that we will create a "blinker", which oscillates between
- # two semi-stable states, if we do not weld. So the rules for
- # not welding have to be carefully defined and tested.
- my $do_not_weld;
- if ( !$touch_previous_pair ) {
+ # Patterns for standardizing matches to block types for regular subs and
+ # anonymous subs. Examples
+ # 'sub process' is a named sub
+ # 'sub ::m' is a named sub
+ # 'sub' is an anonymous sub
+ # 'sub:' is a label, not a sub
+ # 'sub :' is a label, not a sub ( block type will be <sub:> )
+ # sub'_ is a named sub ( block type will be <sub '_> )
+ # 'substr' is a keyword
+ # So note that named subs always have a space after 'sub'
+ $SUB_PATTERN = '^sub\s'; # match normal sub
+ $ASUB_PATTERN = '^sub$'; # match anonymous sub
- # If this pair is not adjacent to the previous pair (skipped or
- # not), then measure lengths from the start of line of oo
+ # Note (see also RT #133130): These patterns are used by
+ # sub make_block_pattern, which is used for making most patterns.
+ # So this sub needs to be called before other pattern-making routines.
- my $rK_range = $rlines->[$iline_oo]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- $starting_lentot =
- $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ if ( $rOpts->{'sub-alias-list'} ) {
- # DO-NOT-WELD RULE 1:
- # Do not weld something that looks like the start of a two-line
- # function call, like this: <<snippets/wn6.in>>
- # $trans->add_transformation(
- # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
- # We will look for a semicolon after the closing paren.
+ # Note that any 'sub-alias-list' has been preprocessed to
+ # be a trimmed, space-separated list which includes 'sub'
+ # for example, it might be 'sub method fun'
+ my $sub_alias_list = $rOpts->{'sub-alias-list'};
+ $sub_alias_list =~ s/\s+/\|/g;
+ $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ }
+ return;
+}
- # We want to weld something complex, like this though
- # my $compass = uc( opposite_direction( line_to_canvas_direction(
- # @{ $coords[0] }, @{ $coords[1] } ) ) );
- # Otherwise we will get a 'blinker'
+sub make_bl_pattern {
- my $iline_oc = $outer_closing->[_LINE_INDEX_];
- if ( $iline_oc <= $iline_oo + 1 ) {
+ # Set defaults lists to retain historical default behavior for -bl:
+ my $bl_list_string = '*';
+ my $bl_exclusion_list_string = 'sort map grep eval asub';
- # Look for following semicolon...
- my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
- my $next_nonblank_type =
- defined($Knext_nonblank)
- ? $rLL->[$Knext_nonblank]->[_TYPE_]
- : 'b';
- if ( $next_nonblank_type eq ';' ) {
+ if ( defined( $rOpts->{'brace-left-list'} )
+ && $rOpts->{'brace-left-list'} )
+ {
+ $bl_list_string = $rOpts->{'brace-left-list'};
+ }
+ if ( $bl_list_string =~ /\bsub\b/ ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-brace-on-new-line'};
+ }
+ if ( $bl_list_string =~ /\basub\b/ ) {
+ $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-brace-on-new-line'};
+ }
- # Then do not weld if no other containers between inner
- # opening and closing.
- my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
- if ( $Knext_seq_item == $Kinner_closing ) {
- $do_not_weld ||= 1;
- }
- }
- }
- }
+ $bl_pattern = make_block_pattern( '-bll', $bl_list_string );
- my $iline_ic = $inner_closing->[_LINE_INDEX_];
+ # for -bl, a list with '*' turns on -sbl and -asbl
+ if ( $bl_pattern =~ /\.\*/ ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-brace-on-new-line'};
+ $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||=
+ $rOpts->{'opening-anonymous-brace-on-new-line'};
+ }
- # DO-NOT-WELD RULE 2:
- # Do not weld an opening paren to an inner one line brace block
- # We will just use old line numbers for this test and require
- # iterations if necessary for convergence
+ if ( defined( $rOpts->{'brace-left-exclusion-list'} )
+ && $rOpts->{'brace-left-exclusion-list'} )
+ {
+ $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'};
+ if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
+ $rOpts->{'opening-sub-brace-on-new-line'} = 0;
+ }
+ if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
+ $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0;
+ }
+ }
- # For example, otherwise we could cause the opening paren
- # in the following example to separate from the caller name
- # as here:
+ $bl_exclusion_pattern =
+ make_block_pattern( '-blxl', $bl_exclusion_list_string );
+ return;
+}
- # $_[0]->code_handler
- # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
+sub make_bli_pattern {
- # Here is another example where we do not want to weld:
- # $wrapped->add_around_modifier(
- # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+ # default list of block types for which -bli would apply
+ my $bli_list_string = 'if else elsif unless while for foreach do : sub';
+ my $bli_exclusion_list_string = ' ';
- # If the one line sub block gets broken due to length or by the
- # user, then we can weld. The result will then be:
- # $wrapped->add_around_modifier( sub {
- # push @tracelog => 'around 1';
- # $_[0]->();
- # } );
+ if ( defined( $rOpts->{'brace-left-and-indent-list'} )
+ && $rOpts->{'brace-left-and-indent-list'} )
+ {
+ $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
+ }
- if ( $iline_ic == $iline_io ) {
+ $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
- my $token_oo = $outer_opening->[_TOKEN_];
- my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
- my $token_io = $inner_opening->[_TOKEN_];
- $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
- }
+ if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
+ && $rOpts->{'brace-left-and-indent-exclusion-list'} )
+ {
+ $bli_exclusion_list_string =
+ $rOpts->{'brace-left-and-indent-exclusion-list'};
+ }
+ $bli_exclusion_pattern =
+ make_block_pattern( '-blixl', $bli_exclusion_list_string );
+ return;
+}
- # DO-NOT-WELD RULE 3:
- # Do not weld if this makes our line too long
- $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
+sub make_keyword_group_list_pattern {
- # DO-NOT-WELD RULE 4; implemented for git#10:
- # Do not weld an opening -ce brace if the next container is on a single
- # line, different from the opening brace. (This is very rare). For
- # example, given the following with -ce, we will avoid joining the {
- # and [
+ # turn any input list into a regex for recognizing selected block types.
+ # Here are the defaults:
+ $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
+ $keyword_group_list_comment_pattern = '';
+ if ( defined( $rOpts->{'keyword-group-blanks-list'} )
+ && $rOpts->{'keyword-group-blanks-list'} )
+ {
+ my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
+ my @keyword_list;
+ my @comment_list;
+ foreach my $word (@words) {
+ if ( $word =~ /^(BC|SBC)$/ ) {
+ push @comment_list, $word;
+ if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
+ }
+ else {
+ push @keyword_list, $word;
+ }
+ }
+ $keyword_group_list_pattern =
+ make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
+ $keyword_group_list_comment_pattern =
+ make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
+ }
+ return;
+}
- # } else {
- # [ $_, length($_) ]
- # }
+sub make_block_brace_vertical_tightness_pattern {
- # because this would produce a terminal one-line block:
+ # turn any input list into a regex for recognizing selected block types
+ $block_brace_vertical_tightness_pattern =
+ '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
+ && $rOpts->{'block-brace-vertical-tightness-list'} )
+ {
+ $block_brace_vertical_tightness_pattern =
+ make_block_pattern( '-bbvtl',
+ $rOpts->{'block-brace-vertical-tightness-list'} );
+ }
+ return;
+}
- # } else { [ $_, length($_) ] }
+sub make_blank_line_pattern {
- # which may not be what is desired. But given this input:
+ $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
+ my $key = 'blank-lines-before-closing-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_before_closing_block_pattern =
+ make_block_pattern( '-blbcl', $rOpts->{$key} );
+ }
- # } else { [ $_, length($_) ] }
+ $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
+ $key = 'blank-lines-after-opening-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_after_opening_block_pattern =
+ make_block_pattern( '-blaol', $rOpts->{$key} );
+ }
+ return;
+}
- # then we will do the weld and retain the one-line block
- if ( $rOpts->{'cuddled-else'} ) {
- my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
- if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
- my $io_line = $inner_opening->[_LINE_INDEX_];
- my $ic_line = $inner_closing->[_LINE_INDEX_];
- my $oo_line = $outer_opening->[_LINE_INDEX_];
- $do_not_weld ||=
- ( $oo_line < $io_line && $ic_line == $io_line );
- }
- }
+sub make_block_pattern {
- if ($do_not_weld) {
+ # given a string of block-type keywords, return a regex to match them
+ # The only tricky part is that labels are indicated with a single ':'
+ # and the 'sub' token text may have additional text after it (name of
+ # sub).
+ #
+ # Example:
+ #
+ # input string: "if else elsif unless while for foreach do : sub";
+ # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- # After neglecting a pair, we start measuring from start of point io
- $starting_lentot =
- $self->cumulative_length_before_K($Kinner_opening);
- $starting_indent = 0;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $inner_opening->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
+ # Minor Update:
+ #
+ # To distinguish between anonymous subs and named subs, use 'sub' to
+ # indicate a named sub, and 'asub' to indicate an anonymous sub
- # Normally, a broken pair should not decrease indentation of
- # intermediate tokens:
- ## if ( $last_pair_broken ) { next }
- # However, for long strings of welded tokens, such as '{{{{{{...'
- # we will allow broken pairs to also remove indentation.
- # This will keep very long strings of opening and closing
- # braces from marching off to the right. We will do this if the
- # number of tokens in a weld before the broken weld is 4 or more.
- # This rule will mainly be needed for test scripts, since typical
- # welds have fewer than about 4 welded tokens.
- if ( !@welds || @{ $welds[-1] } < 4 ) { next }
+ my ( $abbrev, $string ) = @_;
+ my @list = split_words($string);
+ my @words = ();
+ my %seen;
+ for my $i (@list) {
+ if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
+ next if $seen{$i};
+ $seen{$i} = 1;
+ if ( $i eq 'sub' ) {
}
-
- # otherwise start new weld ...
- elsif ($starting_new_weld) {
- push @welds, $item;
+ elsif ( $i eq 'asub' ) {
+ }
+ elsif ( $i eq ';' ) {
+ push @words, ';';
+ }
+ elsif ( $i eq '{' ) {
+ push @words, '\{';
+ }
+ elsif ( $i eq ':' ) {
+ push @words, '\w+:';
+ }
+ elsif ( $i =~ /^\w/ ) {
+ push @words, $i;
}
-
- # ... or extend current weld
else {
- unshift @{ $welds[-1] }, $inner_seqno;
+ Warn("unrecognized block type $i after $abbrev, ignoring\n");
}
+ }
- # After welding, reduce the indentation level if all intermediate tokens
- my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
- if ( $dlevel != 0 ) {
- my $Kstart = $Kinner_opening;
- my $Kstop = $Kinner_closing;
- for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
- $rLL->[$KK]->[_LEVEL_] += $dlevel;
- }
- }
+ # Fix 2 for c091, prevent the pattern from matching an empty string
+ # '1 ' is an impossible block name.
+ if ( !@words ) { push @words, "1 " }
+
+ my $pattern = '(' . join( '|', @words ) . ')$';
+ my $sub_patterns = "";
+ if ( $seen{'sub'} ) {
+ $sub_patterns .= '|' . $SUB_PATTERN;
}
+ if ( $seen{'asub'} ) {
+ $sub_patterns .= '|' . $ASUB_PATTERN;
+ }
+ if ($sub_patterns) {
+ $pattern = '(' . $pattern . $sub_patterns . ')';
+ }
+ $pattern = '^' . $pattern;
+ return $pattern;
+}
- # Define weld lengths needed later to set line breaks
- foreach my $item (@welds) {
+sub make_static_side_comment_pattern {
- # sweep from inner to outer
+ # create the pattern used to identify static side comments
+ $static_side_comment_pattern = '^##';
- my $inner_seqno;
- my $len_close = 0;
- my $len_open = 0;
- foreach my $outer_seqno ( @{$item} ) {
- if ($inner_seqno) {
+ # allow the user to change it
+ if ( $rOpts->{'static-side-comment-prefix'} ) {
+ my $prefix = $rOpts->{'static-side-comment-prefix'};
+ $prefix =~ s/^\s*//;
+ my $pattern = '^' . $prefix;
+ if ( bad_pattern($pattern) ) {
+ Die(
+"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
+ );
+ }
+ $static_side_comment_pattern = $pattern;
+ }
+ return;
+}
- my $dlen_opening =
- $length_to_opening_seqno->($inner_seqno) -
- $length_to_opening_seqno->($outer_seqno);
+sub make_closing_side_comment_prefix {
- my $dlen_closing =
- $length_to_closing_seqno->($outer_seqno) -
- $length_to_closing_seqno->($inner_seqno);
+ # Be sure we have a valid closing side comment prefix
+ my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
+ my $csc_prefix_pattern;
+ if ( !defined($csc_prefix) ) {
+ $csc_prefix = '## end';
+ $csc_prefix_pattern = '^##\s+end';
+ }
+ else {
+ my $test_csc_prefix = $csc_prefix;
+ if ( $test_csc_prefix !~ /^#/ ) {
+ $test_csc_prefix = '#' . $test_csc_prefix;
+ }
- $len_open += $dlen_opening;
- $len_close += $dlen_closing;
+ # make a regex to recognize the prefix
+ my $test_csc_prefix_pattern = $test_csc_prefix;
- }
+ # escape any special characters
+ $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
- $weld_len_left_closing{$outer_seqno} = $len_close;
- $weld_len_right_opening{$outer_seqno} = $len_open;
+ $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
- $inner_seqno = $outer_seqno;
- }
+ # allow exact number of intermediate spaces to vary
+ $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
- # sweep from outer to inner
- foreach my $seqno ( reverse @{$item} ) {
- $weld_len_right_closing{$seqno} =
- $len_close - $weld_len_left_closing{$seqno};
- $weld_len_left_opening{$seqno} =
- $len_open - $weld_len_right_opening{$seqno};
- }
- }
+ # make sure we have a good pattern
+ # if we fail this we probably have an error in escaping
+ # characters.
- #####################################
- # DEBUG
- #####################################
- if (0) {
- my $count = 0;
- local $" = ')(';
- foreach my $weld (@welds) {
- print "\nWeld number $count has seq: (@{$weld})\n";
- foreach my $seq ( @{$weld} ) {
- print <<EOM;
- seq=$seq
- left_opening=$weld_len_left_opening{$seq};
- right_opening=$weld_len_right_opening{$seq};
- left_closing=$weld_len_left_closing{$seq};
- right_closing=$weld_len_right_closing{$seq};
+ if ( bad_pattern($test_csc_prefix_pattern) ) {
+
+ # shouldn't happen..must have screwed up escaping, above
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
EOM
}
- $count++;
+ # just warn and keep going with defaults
+ Warn(
+"Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
+ );
+ Warn("Please consider using a simpler -cscp prefix\n");
+ Warn("Using default -cscp instead; please check output\n");
+ }
+ else {
+ $csc_prefix = $test_csc_prefix;
+ $csc_prefix_pattern = $test_csc_prefix_pattern;
}
}
+ $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
+ $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
return;
}
-sub weld_nested_quotes {
- my $self = shift;
+##################################################
+# CODE SECTION 4: receive lines from the tokenizer
+##################################################
- my $rLL = $self->{rLL};
- return unless ( defined($rLL) && @{$rLL} );
+{ ## begin closure write_line
- my $K_opening_container = $self->{K_opening_container};
- my $K_closing_container = $self->{K_closing_container};
- my $rlines = $self->{rlines};
+ my $nesting_depth;
- my $is_single_quote = sub {
- my ( $Kbeg, $Kend, $quote_type ) = @_;
- foreach my $K ( $Kbeg .. $Kend ) {
- my $test_type = $rLL->[$K]->[_TYPE_];
- next if ( $test_type eq 'b' );
- return if ( $test_type ne $quote_type );
- }
- return 1;
- };
+ # Variables used by sub check_sequence_numbers:
+ my $last_seqno;
+ my %saw_opening_seqno;
+ my %saw_closing_seqno;
+ my $initial_seqno;
- my $excess_line_length = sub {
- my ( $KK, $Ktest ) = @_;
+ sub initialize_write_line {
- # what is the excess length if we add token $Ktest to the line with $KK?
- my $iline = $rLL->[$KK]->[_LINE_INDEX_];
- my $rK_range = $rlines->[$iline]->{_rK_range};
- my ( $Kfirst, $Klast ) = @{$rK_range};
- my $starting_lentot =
- $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- my $starting_indent = 0;
- my $length_tol = 1;
- if ( !$rOpts_variable_maximum_line_length ) {
- my $level = $rLL->[$Kfirst]->[_LEVEL_];
- $starting_indent = $rOpts_indent_columns * $level;
- }
-
- my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
- my $excess_length =
- $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
- return $excess_length;
- };
+ $nesting_depth = undef;
- # look for single qw quotes nested in containers
- my $KNEXT = 0;
- while ( defined($KNEXT) ) {
- my $KK = $KNEXT;
- $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
- my $rtoken_vars = $rLL->[$KK];
- my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- if ( !$outer_seqno ) {
- next if ( $KK == 0 ); # first token in file may not be container
- Fault("sequence = $outer_seqno not defined at K=$KK");
- }
+ $last_seqno = SEQ_ROOT;
+ %saw_opening_seqno = ();
+ %saw_closing_seqno = ();
- my $token = $rtoken_vars->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ return;
+ }
- # see if the next token is a quote of some type
- my $Kn = $self->K_next_nonblank($KK);
- next unless $Kn;
- my $next_token = $rLL->[$Kn]->[_TOKEN_];
- my $next_type = $rLL->[$Kn]->[_TYPE_];
- next
- unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
- && $next_token =~ /^q/ );
+ sub check_sequence_numbers {
- # The token before the closing container must also be a quote
- my $K_closing = $K_closing_container->{$outer_seqno};
- my $Kt_end = $self->K_previous_nonblank($K_closing);
- next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
+ # Routine for checking sequence numbers. This only needs to be
+ # done occasionally in DEVEL_MODE to be sure everything is working
+ # correctly.
+ my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
+ my $jmax = @{$rtokens} - 1;
+ return unless ( $jmax >= 0 );
+ foreach my $j ( 0 .. $jmax ) {
+ my $seqno = $rtype_sequence->[$j];
+ my $token = $rtokens->[$j];
+ my $type = $rtoken_type->[$j];
+ my $err_msg =
+"Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
+
+ if ( !$seqno ) {
+
+ # Sequence numbers are generated for opening tokens, so every opening
+ # token should be sequenced. Closing tokens will be unsequenced
+ # if they do not have a matching opening token.
+ if ( $is_opening_sequence_token{$token}
+ && $type ne 'q'
+ && $type ne 'Q' )
+ {
+ Fault(
+ <<EOM
+$err_msg Unexpected opening token without sequence number
+EOM
+ );
+ }
+ }
+ else {
- # Do not weld to single-line quotes. Nothing is gained, and it may
- # look bad.
- next if ( $Kt_end == $Kn );
-
- # Only weld to quotes delimited with container tokens. This is
- # because welding to arbitrary quote delimiters can produce code
- # which is less readable than without welding.
- my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
- next
- unless ( $is_closing_token{$closing_delimiter}
- || $closing_delimiter eq '>' );
-
- # Now make sure that there is just a single quote in the container
- next
- unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
-
- # If welded, the line must not exceed allowed line length
- # Assume old line breaks for this estimate.
- next if ( $excess_line_length->( $KK, $Kn ) > 0 );
-
- # OK to weld
- # FIXME: Are these always correct?
- $weld_len_left_closing{$outer_seqno} = 1;
- $weld_len_right_opening{$outer_seqno} = 2;
-
- # QW PATCH 1 (Testing)
- # undo CI for welded quotes
- foreach my $K ( $Kn .. $Kt_end ) {
- $rLL->[$K]->[_CI_LEVEL_] = 0;
- }
-
- # Change the level of a closing qw token to be that of the outer
- # containing token. This will allow -lp indentation to function
- # correctly in the vertical aligner.
- $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
- }
- }
- return;
-}
-
-sub weld_len_left {
-
- my ( $seqno, $type_or_tok ) = @_;
-
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its left
-
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $weld_len_left_closing{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $weld_len_left_opening{$seqno};
- }
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
-
-sub weld_len_right {
-
- my ( $seqno, $type_or_tok ) = @_;
-
- # Given the sequence number of a token, and the token or its type,
- # return the length of any weld to its right
-
- my $weld_len;
- if ($seqno) {
- if ( $is_closing_type{$type_or_tok} ) {
- $weld_len = $weld_len_right_closing{$seqno};
- }
- elsif ( $is_opening_type{$type_or_tok} ) {
- $weld_len = $weld_len_right_opening{$seqno};
- }
- }
- if ( !defined($weld_len) ) { $weld_len = 0 }
- return $weld_len;
-}
-
-sub weld_len_left_to_go {
- my ($i) = @_;
-
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its left
- return if ( $i < 0 );
- my $weld_len =
- weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
-
-sub weld_len_right_to_go {
- my ($i) = @_;
-
- # Given the index of a token in the 'to_go' array
- # return the length of any weld to its right
- return if ( $i < 0 );
- if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- my $weld_len =
- weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
- return $weld_len;
-}
+ # Save starting seqno to identify sequence method:
+ # New method starts with 2 and has continuous numbering
+ # Old method starts with >2 and may have gaps
+ if ( !defined($initial_seqno) ) { $initial_seqno = $seqno }
-sub link_sequence_items {
-
- # This has been merged into 'respace_tokens' but retained for reference
- my $self = shift;
- my $rlines = $self->{rlines};
- my $rLL = $self->{rLL};
-
- # We walk the token list and make links to the next sequence item.
- # We also define these hashes to container tokens using sequence number as
- # the key:
- my $K_opening_container = {}; # opening [ { or (
- my $K_closing_container = {}; # closing ] } or )
- my $K_opening_ternary = {}; # opening ? of ternary
- my $K_closing_ternary = {}; # closing : of ternary
-
- # sub to link preceding nodes forward to a new node type
- my $link_back = sub {
- my ( $Ktop, $key ) = @_;
-
- my $Kprev = $Ktop - 1;
- while ( $Kprev >= 0
- && !defined( $rLL->[$Kprev]->[$key] ) )
- {
- $rLL->[$Kprev]->[$key] = $Ktop;
- $Kprev -= 1;
- }
- };
-
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
-
- $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
-
- my $type = $rLL->[$KK]->[_TYPE_];
-
- next if ( $type eq 'b' );
-
- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- if ($type_sequence) {
-
- $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
-
- my $token = $rLL->[$KK]->[_TOKEN_];
- if ( $is_opening_token{$token} ) {
+ if ( $is_opening_sequence_token{$token} ) {
- $K_opening_container->{$type_sequence} = $KK;
- }
- elsif ( $is_closing_token{$token} ) {
+ # New method should have continuous numbering
+ if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
+ Fault(
+ <<EOM
+$err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
+EOM
+ );
+ }
+ $last_seqno = $seqno;
+
+ # Numbers must be unique
+ if ( $saw_opening_seqno{$seqno} ) {
+ my $lno = $saw_opening_seqno{$seqno};
+ Fault(
+ <<EOM
+$err_msg Already saw an opening tokens at line $lno with this sequence number
+EOM
+ );
+ }
+ $saw_opening_seqno{$seqno} = $input_line_no;
+ }
- $K_closing_container->{$type_sequence} = $KK;
- }
+ # only one closing item per seqno
+ elsif ( $is_closing_sequence_token{$token} ) {
+ if ( $saw_closing_seqno{$seqno} ) {
+ my $lno = $saw_closing_seqno{$seqno};
+ Fault(
+ <<EOM
+$err_msg Already saw a closing token with this seqno at line $lno
+EOM
+ );
+ }
+ $saw_closing_seqno{$seqno} = $input_line_no;
- # These are not yet used but could be useful
- else {
- if ( $token eq '?' ) {
- $K_opening_ternary->{$type_sequence} = $KK;
- }
- elsif ( $token eq ':' ) {
- $K_closing_ternary->{$type_sequence} = $KK;
+ # Every closing seqno must have an opening seqno
+ if ( !$saw_opening_seqno{$seqno} ) {
+ Fault(
+ <<EOM
+$err_msg Saw a closing token but no opening token with this seqno
+EOM
+ );
+ }
}
+
+ # Sequenced items must be opening or closing
else {
- Fault(<<EOM);
-Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
+ Fault(
+ <<EOM
+$err_msg Unexpected token type with a sequence number
EOM
+ );
}
}
}
+ return;
}
- $self->{K_opening_container} = $K_opening_container;
- $self->{K_closing_container} = $K_closing_container;
- $self->{K_opening_ternary} = $K_opening_ternary;
- $self->{K_closing_ternary} = $K_closing_ternary;
- return;
-}
-
-sub sum_token_lengths {
- my $self = shift;
+ sub write_line {
+
+ # This routine receives lines one-by-one from the tokenizer and stores
+ # them in a format suitable for further processing. After the last
+ # line has been sent, the tokenizer will call sub 'finish_formatting'
+ # to do the actual formatting.
+
+ my ( $self, $line_of_tokens_old ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines_new = $self->[_rlines_];
+
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rSS = $self->[_rSS_];
+ my $Iss_opening = $self->[_Iss_opening_];
+ my $Iss_closing = $self->[_Iss_closing_];
+
+ my $Kfirst;
+ my $line_of_tokens = {};
+ foreach (
+ qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _square_bracket_depth
+ _starting_in_quote
+ )
+ )
+ {
+ $line_of_tokens->{$_} = $line_of_tokens_old->{$_};
+ }
- # This has been merged into 'respace_tokens' but retained for reference
- my $rLL = $self->{rLL};
- my $cumulative_length = 0;
- for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+ # Data needed by Logger
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = "";
+ $line_of_tokens->{_nesting_tokens_0} = "";
- # now set the length of this token
- my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
+ # Needed to avoid trimming quotes
+ $line_of_tokens->{_ended_in_blank_token} = undef;
- $cumulative_length += $token_length;
+ my $line_type = $line_of_tokens_old->{_line_type};
+ my $line_number = $line_of_tokens_old->{_line_number};
+ my $CODE_type = "";
+ my $tee_output;
- # Save the length sum to just AFTER this token
- $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+ # Handle line of non-code
+ if ( $line_type ne 'CODE' ) {
+ $tee_output ||= $rOpts_tee_pod
+ && substr( $line_type, 0, 3 ) eq 'POD';
+ }
- }
- return;
-}
+ # Handle line of code
+ else {
-sub resync_lines_and_tokens {
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
+ my $rblock_type = $line_of_tokens_old->{_rblock_type};
+ my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
+ my $rcontainer_environment =
+ $line_of_tokens_old->{_rcontainer_environment};
+ my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
+ my $rlevels = $line_of_tokens_old->{_rlevels};
+ my $rslevels = $line_of_tokens_old->{_rslevels};
+ my $rci_levels = $line_of_tokens_old->{_rci_levels};
+ my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
+ my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
+
+ my $jmax = @{$rtokens} - 1;
+ if ( $jmax >= 0 ) {
+ $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
+
+ DEVEL_MODE
+ && check_sequence_numbers( $rtokens, $rtoken_type,
+ $rtype_sequence, $line_number );
+
+ # Find the starting nesting depth ...
+ # It must be the value of variable 'level' of the first token
+ # because the nesting depth is used as a token tag in the
+ # vertical aligner and is compared to actual levels.
+ # So vertical alignment problems will occur with any other
+ # starting value.
+ if ( !defined($nesting_depth) ) {
+ $nesting_depth = $rlevels->[0];
+ $nesting_depth = 0 if ( $nesting_depth < 0 );
+ $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
+ }
- my $self = shift;
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
- my $rlines = $self->{rlines};
+ foreach my $j ( 0 .. $jmax ) {
+
+ # Do not clip the 'level' variable yet. We will do this
+ # later, in sub 'store_token_to_go'. The reason is that in
+ # files with level errors, the logic in 'weld_cuddled_else'
+ # uses a stack logic that will give bad welds if we clip
+ # levels here.
+ ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
+
+ # Handle tokens with sequence numbers ...
+ my $seqno = $rtype_sequence->[$j];
+ if ($seqno) {
+ my $token = $rtokens->[$j];
+ my $sign = 1;
+ if ( $is_opening_token{$token} ) {
+ $K_opening_container->{$seqno} = @{$rLL};
+ $rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
+ $nesting_depth++;
+
+ # Save a sequenced block type at its opening token.
+ # Note that unsequenced block types can occur in
+ # unbalanced code with errors but are ignored here.
+ if ( $rblock_type->[$j] ) {
+ my $block_type = $rblock_type->[$j];
+ $rblock_type_of_seqno->{$seqno} = $block_type;
+ if ( substr( $block_type, 0, 3 ) eq 'sub'
+ || $rOpts_sub_alias_list )
+ {
+ if ( $block_type =~ /$ASUB_PATTERN/ ) {
+ $self->[_ris_asub_block_]->{$seqno} = 1;
+ }
+ elsif ( $block_type =~ /$SUB_PATTERN/ ) {
+ $self->[_ris_sub_block_]->{$seqno} = 1;
+ }
+ }
+ }
+ }
+ elsif ( $is_closing_token{$token} ) {
+
+ # The opening depth should always be defined, and
+ # it should equal $nesting_depth-1. To protect
+ # against unforseen error conditions, however, we
+ # will check this and fix things if necessary. For
+ # a test case see issue c055.
+ my $opening_depth =
+ $rdepth_of_opening_seqno->[$seqno];
+ if ( !defined($opening_depth) ) {
+ $opening_depth = $nesting_depth - 1;
+ $opening_depth = 0 if ( $opening_depth < 0 );
+ $rdepth_of_opening_seqno->[$seqno] =
+ $opening_depth;
+
+ # This is not fatal but should not happen. The
+ # tokenizer generates sequence numbers
+ # incrementally upon encountering each new
+ # opening token, so every positive sequence
+ # number should correspond to an opening token.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
+EOM
+ }
+ }
+ $K_closing_container->{$seqno} = @{$rLL};
+ $nesting_depth = $opening_depth;
+ $sign = -1;
+ }
+ elsif ( $token eq '?' ) {
+ }
+ elsif ( $token eq ':' ) {
+ $sign = -1;
+ }
- # Re-construct the arrays of tokens associated with the original input lines
- # since they have probably changed due to inserting and deleting blanks
- # and a few other tokens.
+ # The only sequenced types output by the tokenizer are
+ # the opening & closing containers and the ternary
+ # types. So we would only get here if the tokenizer has
+ # been changed to mark some other tokens with sequence
+ # numbers, or if an error has been introduced in a
+ # hash such as %is_opening_container
+ else {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
+Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
+EOM
+ }
+ }
- my $Kmax = -1;
+ if ( $sign > 0 ) {
+ $Iss_opening->[$seqno] = @{$rSS};
+
+ # For efficiency, we find the maximum level of
+ # opening tokens of any type. The actual maximum
+ # level will be that of their contents which is 1
+ # greater. That will be fixed in sub
+ # 'finish_formatting'.
+ my $level = $rlevels->[$j];
+ if ( $level > $self->[_maximum_level_] ) {
+ $self->[_maximum_level_] = $level;
+ $self->[_maximum_level_at_line_] = $line_number;
+ }
+ }
+ else { $Iss_closing->[$seqno] = @{$rSS} }
+ push @{$rSS}, $sign * $seqno;
- # This is the next token and its line index:
- my $Knext = 0;
- my $inext;
- if ( defined($rLL) && @{$rLL} ) {
- $Kmax = @{$rLL} - 1;
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
+ }
- my $get_inext = sub {
- if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
- else {
- $inext = $rLL->[$Knext]->[_LINE_INDEX_];
- }
- return $inext;
- };
+ my @tokary;
+ @tokary[
+ _TOKEN_, _TYPE_, _TYPE_SEQUENCE_,
+ _LEVEL_, _CI_LEVEL_, _LINE_INDEX_,
+ ]
+ = (
+ $rtokens->[$j], $rtoken_type->[$j],
+ $seqno, $rlevels->[$j],
+ $rci_levels->[$j], $line_number - 1,
+ );
+ push @{$rLL}, \@tokary;
+ } ## end foreach my $j ( 0 .. $jmax )
- # Remember the most recently output token index
- my $Klast_out;
+ $Klimit = @{$rLL} - 1;
- my $iline = -1;
- foreach my $line_of_tokens ( @{$rlines} ) {
- $iline++;
- my $line_type = $line_of_tokens->{_line_type};
- if ( $line_type eq 'CODE' ) {
+ # Need to remember if we can trim the input line
+ $line_of_tokens->{_ended_in_blank_token} =
+ $rtoken_type->[$jmax] eq 'b';
- my @K_array;
- my $rK_range;
- $inext = $get_inext->();
- while ( defined($inext) && $inext <= $iline ) {
- push @{K_array}, $Knext;
- $Knext += 1;
- $inext = $get_inext->();
- }
+ $line_of_tokens->{_level_0} = $rlevels->[0];
+ $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
+ $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
+ $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
+ } ## end if ( $jmax >= 0 )
- # Delete any terminal blank token
- if (@K_array) {
- if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
- pop @K_array;
- }
- }
+ $tee_output ||=
+ $rOpts_tee_block_comments
+ && $jmax == 0
+ && $rLL->[$Kfirst]->[_TYPE_] eq '#';
- # Define the range of K indexes for the line:
- # $Kfirst = index of first token on line
- # $Klast_out = index of last token on line
- my ( $Kfirst, $Klast );
- if (@K_array) {
- $Kfirst = $K_array[0];
- $Klast = $K_array[-1];
- $Klast_out = $Klast;
- }
+ $tee_output ||=
+ $rOpts_tee_side_comments
+ && defined($Kfirst)
+ && $Klimit > $Kfirst
+ && $rLL->[$Klimit]->[_TYPE_] eq '#';
- # It is only safe to trim the actual line text if the input
- # line had a terminal blank token. Otherwise, we may be
- # in a quote.
- if ( $line_of_tokens->{_ended_in_blank_token} ) {
- $line_of_tokens->{_line_text} =~ s/\s+$//;
- }
- $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+ } ## end if ( $line_type eq 'CODE')
- # Deleting semicolons can create new empty code lines
- # which should be marked as blank
- if ( !defined($Kfirst) ) {
- my $code_type = $line_of_tokens->{_code_type};
- if ( !$code_type ) {
- $line_of_tokens->{_code_type} = 'BL';
- }
- }
+ # Finish storing line variables
+ if ($tee_output) {
+ my $fh_tee = $self->[_fh_tee_];
+ my $line_text = $line_of_tokens_old->{_line_text};
+ $fh_tee->print($line_text) if ($fh_tee);
}
- }
- # There shouldn't be any nodes beyond the last one unless we start
- # allowing 'link_after' calls
- if ( defined($inext) ) {
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
+ $line_of_tokens->{_code_type} = $CODE_type;
+ $self->[_Klimit_] = $Klimit;
- Fault("unexpected tokens at end of file when reconstructing lines");
+ push @{$rlines_new}, $line_of_tokens;
+ return;
}
+} ## end closure write_line
- return;
-}
-
-sub dump_verbatim {
- my $self = shift;
- my $rlines = $self->{rlines};
- foreach my $line ( @{$rlines} ) {
- my $input_line = $line->{_line_text};
- $self->write_unindented_line($input_line);
- }
- return;
-}
+#############################################
+# CODE SECTION 5: Pre-process the entire file
+#############################################
sub finish_formatting {
# The file has been tokenized and is ready to be formatted.
# All of the relevant data is stored in $self, ready to go.
+ # Check the maximum level. If it is extremely large we will give up and
+ # output the file verbatim. Note that the actual maximum level is 1
+ # greater than the saved value, so we fix that here.
+ $self->[_maximum_level_] += 1;
+ my $maximum_level = $self->[_maximum_level_];
+ my $maximum_table_index = $#maximum_line_length_at_level;
+ if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
+ $severe_error ||= 1;
+ Warn(<<EOM);
+The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
+Something may be wrong; formatting will be skipped.
+EOM
+ }
+
# output file verbatim if severe error or no formatting requested
if ( $severe_error || $rOpts->{notidy} ) {
$self->dump_verbatim();
return;
}
- # Make a pass through the lines, looking at lines of CODE and identifying
- # special processing needs, such format skipping sections marked by
- # special comments
- $self->scan_comments();
-
- # Find nested pairs of container tokens for any welding. This information
- # is also needed for adding semicolons, so it is split apart from the
- # welding step.
- $self->find_nested_pairs();
-
- # Make sure everything looks good
- $self->check_line_hashes();
+ # Update the 'save_logfile' flag based to include any tokenization errors.
+ # We can save time by skipping logfile calls if it is not going to be saved.
+ my $logger_object = $self->[_logger_object_];
+ if ($logger_object) {
+ $self->[_save_logfile_] = $logger_object->get_save_logfile();
+ }
- # Future: Place to Begin future Iteration Loop
- # foreach my $it_count(1..$maxit) {
+ $self->set_CODE_type();
- # Future: We must reset some things after the first iteration.
- # This includes:
- # - resetting levels if there was any welding
- # - resetting any phantom semicolons
- # - dealing with any line numbering issues so we can relate final lines
- # line numbers with input line numbers.
- #
- # If ($it_count>1) {
- # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
- # Renumber lines
- # }
+ # Verify that the line hash does not have any unknown keys.
+ $self->check_line_hashes() if (DEVEL_MODE);
# Make a pass through all tokens, adding or deleting any whitespace as
# required. Also make any other changes, such as adding semicolons.
# remains fixed for the rest of this iteration.
$self->respace_tokens();
- # Make a hierarchical map of the containers
- $self->map_containers();
+ $self->set_excluded_lp_containers();
+
+ $self->find_multiline_qw();
+
+ $self->keep_old_line_breaks();
# Implement any welding needed for the -wn or -cb options
$self->weld_containers();
+ $self->collapsed_lengths()
+ if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
+
# Locate small nested blocks which should not be broken
$self->mark_short_nested_blocks();
+ $self->adjust_indentation_levels();
+
+ # Verify that the main token array looks OK. If this ever causes a fault
+ # then place similar checks before the sub calls above to localize the
+ # problem.
+ $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
+
# Finishes formatting and write the result to the line sink.
# Eventually this call should just change the 'rlines' data according to the
# new line breaks and then return so that we can do an internal iteration
# before continuing with the next stages of formatting.
- $self->break_lines();
-
- ############################################################
- # A possible future decomposition of 'break_lines()' follows.
- # Benefits:
- # - allow perltidy to do an internal iteration which eliminates
- # many unnecessary steps, such as re-parsing and vertical alignment.
- # This will allow iterations to be automatic.
- # - consolidate all length calculations to allow utf8 alignment
- ############################################################
-
- # Future: Check for convergence of beginning tokens on CODE lines
-
- # Future: End of Iteration Loop
-
- # Future: add_padding($rargs);
-
- # Future: add_closing_side_comments($rargs);
-
- # Future: vertical_alignment($rargs);
-
- # Future: output results
+ $self->process_all_lines();
# A final routine to tie up any loose ends
$self->wrapup();
return;
}
-sub create_one_line_block {
- ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
- @_;
- return;
-}
+sub set_CODE_type {
+ my ($self) = @_;
-sub destroy_one_line_block {
- $index_start_one_line_block = UNDEFINED_INDEX;
- $semicolons_before_block_self_destruct = 0;
- return;
-}
+ # This routine performs two tasks:
+
+ # TASK 1: Examine each line of code and set a flag '$CODE_type' to describe
+ # any special processing that it requires.
+
+ # TASK 2: Delete side comments if requested.
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
+ my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
+ my $rOpts_static_block_comment_prefix =
+ $rOpts->{'static-block-comment-prefix'};
+
+ # Remember indexes of lines with side comments
+ my @ix_side_comments;
+
+ my $In_format_skipping_section = 0;
+ my $Saw_VERSION_in_this_file = 0;
+ my $has_side_comment = 0;
+ my ( $Kfirst, $Klast );
+ my $CODE_type;
+
+ #------------------------------
+ # TASK 1: Loop to set CODE_type
+ #------------------------------
+
+ # Possible CODE_types
+ # 'VB' = Verbatim - line goes out verbatim (a quote)
+ # 'FS' = Format Skipping - line goes out verbatim
+ # 'BL' = Blank Line
+ # 'HSC' = Hanging Side Comment - fix this hanging side comment
+ # 'SBCX'= Static Block Comment Without Leading Space
+ # 'SBC' = Static Block Comment
+ # 'BC' = Block Comment - an ordinary full line comment
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'VER' = VERSION statement
+ # '' = ordinary line of code with no restructions
+
+ my $ix_line = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $ix_line++;
+ my $input_line_no = $line_of_tokens->{_line_number};
+ my $line_type = $line_of_tokens->{_line_type};
-sub leading_spaces_to_go {
+ my $Last_line_had_side_comment = $has_side_comment;
+ if ($has_side_comment) {
+ push @ix_side_comments, $ix_line - 1;
+ }
+ $has_side_comment = 0;
- # return the number of indentation spaces for a token in the output stream;
- # these were previously stored by 'set_leading_whitespace'.
+ next unless ( $line_type eq 'CODE' );
- my $ii = shift;
- if ( $ii < 0 ) { $ii = 0 }
- return get_spaces( $leading_spaces_to_go[$ii] );
+ my $Klast_prev = $Klast;
-}
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $Kfirst, $Klast ) = @{$rK_range};
-sub get_spaces {
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = "";
- # return the number of leading spaces associated with an indentation
- # variable $indentation is either a constant number of spaces or an object
- # with a get_spaces method.
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_spaces() : $indentation;
-}
+ my $input_line = $line_of_tokens->{_line_text};
+ my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
-sub get_recoverable_spaces {
+ my $is_block_comment = 0;
+ if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
+ if ( $jmax == 0 ) { $is_block_comment = 1; }
+ else { $has_side_comment = 1 }
+ }
- # return the number of spaces (+ means shift right, - means shift left)
- # that we would like to shift a group of lines with the same indentation
- # to get them to line up with their opening parens
- my $indentation = shift;
- return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
-}
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
-sub get_available_spaces_to_go {
+ # Note: extra space appended to comment simplifies pattern matching
+ if (
+ $is_block_comment
- my $ii = shift;
- my $item = $leading_spaces_to_go[$ii];
+ # optional fast pre-check
+ && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
+ || $rOpts_format_skipping_end )
- # return the number of available leading spaces associated with an
- # indentation variable. $indentation is either a constant number of
- # spaces or an object with a get_available_spaces method.
- return ref($item) ? $item->get_available_spaces() : 0;
-}
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_end/
+ )
+ {
+ $In_format_skipping_section = 0;
+ write_logfile_entry(
+ "Line $input_line_no: Exiting format-skipping section\n");
+ }
+ $CODE_type = 'FS';
+ goto NEXT;
+ }
-sub new_lp_indentation_item {
+ # Check for a continued quote..
+ if ( $line_of_tokens->{_starting_in_quote} ) {
- # this is an interface to the IndentationItem class
- my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( ( $input_line =~ "\t" ) ) {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->note_embedded_tab($input_line_number);
+ }
+ $CODE_type = 'VB';
+ goto NEXT;
+ }
+ }
- # A negative level implies not to store the item in the item_list
- my $index = 0;
- if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
+ # See if we are entering a formatting skip section
+ if (
+ $is_block_comment
- my $item = Perl::Tidy::IndentationItem->new(
- $spaces, $level,
- $ci_level, $available_spaces,
- $index, $gnu_sequence_number,
- $align_paren, $max_gnu_stack_index,
- $line_start_index_to_go,
- );
+ # optional fast pre-check
+ && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
+ || $rOpts_format_skipping_begin )
- if ( $level >= 0 ) {
- $gnu_item_list[$max_gnu_item_index] = $item;
- }
-
- return $item;
-}
-
-sub set_leading_whitespace {
-
- # This routine defines leading whitespace
- # given: the level and continuation_level of a token,
- # define: space count of leading string which would apply if it
- # were the first token of a new line.
-
- my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
+ && $rOpts_format_skipping
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_begin/
+ )
+ {
+ $In_format_skipping_section = 1;
+ write_logfile_entry(
+ "Line $input_line_no: Entering format-skipping section\n");
+ $CODE_type = 'FS';
+ goto NEXT;
+ }
- # Adjust levels if necessary to recycle whitespace:
- # given $level_abs, the absolute level
- # define $level, a possibly reduced level for whitespace
- my $level = $level_abs;
- if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
- if ( $level_abs < $whitespace_last_level ) {
- pop(@whitespace_level_stack);
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
}
- if ( !@whitespace_level_stack ) {
- push @whitespace_level_stack, $level_abs;
+
+ # blank line..
+ if ( $jmax < 0 ) {
+ $CODE_type = 'BL';
+ goto NEXT;
}
- elsif ( $level_abs > $whitespace_last_level ) {
- $level = $whitespace_level_stack[-1] +
- ( $level_abs - $whitespace_last_level );
+ # Handle comments
+ if ($is_block_comment) {
+
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment = 0;
+ my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
if (
- # 1 Try to break at a block brace
+
+ # optional fast pre-check
(
- $level > $rOpts_whitespace_cycle
- && $last_nonblank_type eq '{'
- && $last_nonblank_token eq '{'
+ substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
+ || $rOpts_static_block_comment_prefix
)
- # 2 Then either a brace or bracket
- || ( $level > $rOpts_whitespace_cycle + 1
- && $last_nonblank_token =~ /^[\{\[]$/ )
-
- # 3 Then a paren too
- || $level > $rOpts_whitespace_cycle + 2
+ && $rOpts_static_block_comments
+ && $input_line =~ /$static_block_comment_pattern/
)
{
- $level = 1;
+ $is_static_block_comment = 1;
}
- push @whitespace_level_stack, $level;
- }
- $level = $whitespace_level_stack[-1];
- }
- $whitespace_last_level = $level_abs;
-
- # modify for -bli, which adds one continuation indentation for
- # opening braces
- if ( $rOpts_brace_left_and_indent
- && $max_index_to_go == 0
- && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
- {
- $ci_level++;
- }
-
- # patch to avoid trouble when input file has negative indentation.
- # other logic should catch this error.
- if ( $level < 0 ) { $level = 0 }
-
- #-------------------------------------------
- # handle the standard indentation scheme
- #-------------------------------------------
- unless ($rOpts_line_up_parentheses) {
- my $space_count =
- $ci_level * $rOpts_continuation_indentation +
- $level * $rOpts_indent_columns;
- my $ci_spaces =
- ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
-
- if ($in_continued_quote) {
- $space_count = 0;
- $ci_spaces = 0;
- }
- $leading_spaces_to_go[$max_index_to_go] = $space_count;
- $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
- return;
- }
-
- #-------------------------------------------------------------
- # handle case of -lp indentation..
- #-------------------------------------------------------------
-
- # The continued_quote flag means that this is the first token of a
- # line, and it is the continuation of some kind of multi-line quote
- # or pattern. It requires special treatment because it must have no
- # added leading whitespace. So we create a special indentation item
- # which is not in the stack.
- if ($in_continued_quote) {
- my $space_count = 0;
- my $available_space = 0;
- $level = -1; # flag to prevent storing in item_list
- $leading_spaces_to_go[$max_index_to_go] =
- $reduced_spaces_to_go[$max_index_to_go] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, 0 );
- return;
- }
-
- # get the top state from the stack
- my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
- my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
- my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
-
- my $type = $types_to_go[$max_index_to_go];
- my $token = $tokens_to_go[$max_index_to_go];
- my $total_depth = $nesting_depth_to_go[$max_index_to_go];
-
- if ( $type eq '{' || $type eq '(' ) {
-
- $gnu_comma_count{ $total_depth + 1 } = 0;
- $gnu_arrow_count{ $total_depth + 1 } = 0;
-
- # If we come to an opening token after an '=' token of some type,
- # see if it would be helpful to 'break' after the '=' to save space
- my $last_equals = $last_gnu_equals{$total_depth};
- if ( $last_equals && $last_equals > $line_start_index_to_go ) {
-
- # find the position if we break at the '='
- my $i_test = $last_equals;
- if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
-
- # TESTING
- ##my $too_close = ($i_test==$max_index_to_go-1);
-
- my $test_position = total_line_length( $i_test, $max_index_to_go );
- my $mll = maximum_line_length($i_test);
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
if (
-
- # the equals is not just before an open paren (testing)
- ##!$too_close &&
-
- # if we are beyond the midpoint
- $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
-
- # or we are beyond the 1/4 point and there was an old
- # break at the equals
- || (
- $gnu_position_predictor >
- $mll - $rOpts_maximum_line_length * 3 / 4
- && (
- $old_breakpoint_to_go[$last_equals]
- || ( $last_equals > 0
- && $old_breakpoint_to_go[ $last_equals - 1 ] )
- || ( $last_equals > 1
- && $types_to_go[ $last_equals - 1 ] eq 'b'
- && $old_breakpoint_to_go[ $last_equals - 2 ] )
- )
- )
+ $no_leading_space
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
)
{
-
- # then make the switch -- note that we do not set a real
- # breakpoint here because we may not really need one; sub
- # scan_list will do that if necessary
- $line_start_index_to_go = $i_test + 1;
- $gnu_position_predictor = $test_position;
+ $is_static_block_comment = 1;
}
- }
- }
-
- my $halfway =
- maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
-
- # Check for decreasing depth ..
- # Note that one token may have both decreasing and then increasing
- # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
- # in this example we would first go back to (1,0) then up to (2,0)
- # in a single call.
- if ( $level < $current_level || $ci_level < $current_ci_level ) {
-
- # loop to find the first entry at or completely below this level
- my ( $lev, $ci_lev );
- while (1) {
- if ($max_gnu_stack_index) {
-
- # save index of token which closes this level
- $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
- # Undo any extra indentation if we saw no commas
- my $available_spaces =
- $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
+ # look for hanging side comment ...
+ if (
+ $Last_line_had_side_comment # last line had side comment
+ && !$no_leading_space # there is some leading space
+ && !
+ $is_static_block_comment # do not make static comment hanging
+ )
+ {
- my $comma_count = 0;
- my $arrow_count = 0;
- if ( $type eq '}' || $type eq ')' ) {
- $comma_count = $gnu_comma_count{$total_depth};
- $arrow_count = $gnu_arrow_count{$total_depth};
- $comma_count = 0 unless $comma_count;
- $arrow_count = 0 unless $arrow_count;
+ # continuing an existing HSC chain?
+ if ( $last_CODE_type eq 'HSC' ) {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ goto NEXT;
}
- $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
- $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
-
- if ( $available_spaces > 0 ) {
- if ( $comma_count <= 0 || $arrow_count > 0 ) {
+ # starting a new HSC chain?
+ elsif (
- my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
- my $seqno =
- $gnu_stack[$max_gnu_stack_index]
- ->get_sequence_number();
+ $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
- # Be sure this item was created in this batch. This
- # should be true because we delete any available
- # space from open items at the end of each batch.
- if ( $gnu_sequence_number != $seqno
- || $i > $max_gnu_item_index )
- {
- warning(
-"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
- );
- report_definite_bug();
- }
+ && ( defined($Klast_prev) && $Klast_prev > 1 )
- else {
- if ( $arrow_count == 0 ) {
- $gnu_item_list[$i]
- ->permanently_decrease_available_spaces(
- $available_spaces);
- }
- else {
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces(
- $available_spaces);
- }
- foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$j]
- ->decrease_SPACES($available_spaces);
- }
- }
- }
- }
+ # and the previous side comment was not static (issue c070)
+ && !(
+ $rOpts->{'static-side-comments'}
+ && $rLL->[$Klast_prev]->[_TOKEN_] =~
+ /$static_side_comment_pattern/
+ )
- # go down one level
- --$max_gnu_stack_index;
- $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
- $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
+ )
+ {
- # stop when we reach a level at or below the current level
- if ( $lev <= $level && $ci_lev <= $ci_level ) {
- $space_count =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
- $current_level = $lev;
- $current_ci_level = $ci_lev;
- last;
+ # and it is not a closing side comment (issue c070).
+ my $K_penult = $Klast_prev - 1;
+ $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
+ my $follows_csc =
+ ( $rLL->[$K_penult]->[_TOKEN_] eq '}'
+ && $rLL->[$K_penult]->[_TYPE_] eq '}'
+ && $rLL->[$Klast_prev]->[_TOKEN_] =~
+ /$closing_side_comment_prefix_pattern/ );
+
+ if ( !$follows_csc ) {
+ $has_side_comment = 1;
+ $CODE_type = 'HSC';
+ goto NEXT;
+ }
}
}
- # reached bottom of stack .. should never happen because
- # only negative levels can get here, and $level was forced
- # to be positive above.
+ if ($is_static_block_comment) {
+ $CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
+ goto NEXT;
+ }
+ elsif ($Last_line_had_side_comment
+ && !$rOpts_maximum_consecutive_blank_lines
+ && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
+ {
+ # Emergency fix to keep a block comment from becoming a hanging
+ # side comment. This fix is for the case that blank lines
+ # cannot be inserted. There is related code in sub
+ # 'process_line_of_CODE'
+ $CODE_type = 'SBCX';
+ goto NEXT;
+ }
else {
- warning(
-"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
- );
- report_definite_bug();
- last;
+ $CODE_type = 'BC';
+ goto NEXT;
}
}
- }
-
- # handle increasing depth
- if ( $level > $current_level || $ci_level > $current_ci_level ) {
- # Compute the standard incremental whitespace. This will be
- # the minimum incremental whitespace that will be used. This
- # choice results in a smooth transition between the gnu-style
- # and the standard style.
- my $standard_increment =
- ( $level - $current_level ) * $rOpts_indent_columns +
- ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
+ # End of comments. Handle a line of normal code:
- # Now we have to define how much extra incremental space
- # ("$available_space") we want. This extra space will be
- # reduced as necessary when long lines are encountered or when
- # it becomes clear that we do not have a good list.
- my $available_space = 0;
- my $align_paren = 0;
- my $excess = 0;
-
- # initialization on empty stack..
- if ( $max_gnu_stack_index == 0 ) {
- $space_count = $level * $rOpts_indent_columns;
+ if ($rOpts_indent_only) {
+ $CODE_type = 'IO';
+ goto NEXT;
}
- # if this is a BLOCK, add the standard increment
- elsif ($last_nonblank_block_type) {
- $space_count += $standard_increment;
+ if ( !$rOpts_add_newlines ) {
+ $CODE_type = 'NIN';
+ goto NEXT;
}
- # if last nonblank token was not structural indentation,
- # just use standard increment
- elsif ( $last_nonblank_type ne '{' ) {
- $space_count += $standard_increment;
+ # Patch needed for MakeMaker. Do not break a statement
+ # in which $VERSION may be calculated. See MakeMaker.pm;
+ # this is based on the coding in it.
+ # The first line of a file that matches this will be eval'd:
+ # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
+ # Examples:
+ # *VERSION = \'1.01';
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # We will pass such a line straight through without breaking
+ # it unless -npvl is used.
+
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
+
+ my $is_VERSION_statement = 0;
+ if ( !$Saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
+ {
+ $Saw_VERSION_in_this_file = 1;
+ write_logfile_entry("passing VERSION line; -npvl deactivates\n");
+
+ # This code type has lower priority than others
+ $CODE_type = 'VER';
+ goto NEXT;
}
- # otherwise use the space to the first non-blank level change token
- else {
+ NEXT:
+ $line_of_tokens->{_code_type} = $CODE_type;
+ }
- $space_count = $gnu_position_predictor;
+ if ($has_side_comment) {
+ push @ix_side_comments, $ix_line;
+ }
- my $min_gnu_indentation =
- $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ return
+ if ( !$rOpts_delete_side_comments
+ && !$rOpts_delete_closing_side_comments );
- $available_space = $space_count - $min_gnu_indentation;
- if ( $available_space >= $standard_increment ) {
- $min_gnu_indentation += $standard_increment;
- }
- elsif ( $available_space > 1 ) {
- $min_gnu_indentation += $available_space + 1;
- }
- elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
- if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
- $min_gnu_indentation += 2;
- }
- else {
- $min_gnu_indentation += 1;
- }
- }
- else {
- $min_gnu_indentation += $standard_increment;
- }
- $available_space = $space_count - $min_gnu_indentation;
+ #-------------------------------------
+ # TASK 2: Loop to delete side comments
+ #-------------------------------------
+
+ # Handle any requested side comment deletions. It is easier to get
+ # this done here rather than farther down the pipeline because IO
+ # lines take a different route, and because lines with deleted HSC
+ # become BL lines. We have already handled any tee requests in sub
+ # getline, so it is safe to delete side comments now.
- if ( $available_space < 0 ) {
- $space_count = $min_gnu_indentation;
- $available_space = 0;
+ # Also, we can get this done efficiently here.
+
+ foreach my $ix (@ix_side_comments) {
+ my $line_of_tokens = $rlines->[$ix];
+ my $line_type = $line_of_tokens->{_line_type};
+
+ # This fault shouldn't happen because we only saved CODE lines with
+ # side comments in the TASK 1 loop above.
+ if ( $line_type ne 'CODE' ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Hit unexpected line_type = '$line_type' while deleting side comments, should be 'CODE'
+EOM
}
- $align_paren = 1;
+ next;
}
- # update state, but not on a blank token
- if ( $types_to_go[$max_index_to_go] ne 'b' ) {
+ my $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $delete_side_comment =
+ $rOpts_delete_side_comments
+ && defined($Kfirst)
+ && $rLL->[$Klast]->[_TYPE_] eq '#'
+ && ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
+ && (!$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' );
+
+ if (
+ $rOpts_delete_closing_side_comments
+ && !$delete_side_comment
+ && defined($Kfirst)
+ && $Klast > $Kfirst
+ && $rLL->[$Klast]->[_TYPE_] eq '#'
+ && ( !$CODE_type
+ || $CODE_type eq 'HSC'
+ || $CODE_type eq 'IO'
+ || $CODE_type eq 'NIN' )
+ )
+ {
+ my $token = $rLL->[$Klast]->[_TOKEN_];
+ my $K_m = $Klast - 1;
+ my $type_m = $rLL->[$K_m]->[_TYPE_];
+ if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
+ my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
+ if ($seqno_m) {
+ my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ if ( $block_type_m
+ && $token =~ /$closing_side_comment_prefix_pattern/
+ && $block_type_m =~ /$closing_side_comment_list_pattern/ )
+ {
+ $delete_side_comment = 1;
+ }
+ }
+ } ## end if ( $rOpts_delete_closing_side_comments...)
- $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
+ if ($delete_side_comment) {
- ++$max_gnu_stack_index;
- $gnu_stack[$max_gnu_stack_index] =
- new_lp_indentation_item( $space_count, $level, $ci_level,
- $available_space, $align_paren );
+ # We are actually just changing the side comment to a blank.
+ # This may produce multiple blanks in a row, but sub respace_tokens
+ # will check for this and fix it.
+ $rLL->[$Klast]->[_TYPE_] = 'b';
+ $rLL->[$Klast]->[_TOKEN_] = ' ';
- # If the opening paren is beyond the half-line length, then
- # we will use the minimum (standard) indentation. This will
- # help avoid problems associated with running out of space
- # near the end of a line. As a result, in deeply nested
- # lists, there will be some indentations which are limited
- # to this minimum standard indentation. But the most deeply
- # nested container will still probably be able to shift its
- # parameters to the right for proper alignment, so in most
- # cases this will not be noticeable.
- if ( $available_space > 0 && $space_count > $halfway ) {
- $gnu_stack[$max_gnu_stack_index]
- ->tentatively_decrease_available_spaces($available_space);
+ # The -io option outputs the line text, so we have to update
+ # the line text so that the comment does not reappear.
+ if ( $CODE_type eq 'IO' ) {
+ my $line = "";
+ foreach my $KK ( $Kfirst .. $Klast - 1 ) {
+ $line .= $rLL->[$KK]->[_TOKEN_];
+ }
+ $line =~ s/\s+$//;
+ $line_of_tokens->{_line_text} = $line . "\n";
}
+
+ # If we delete a hanging side comment the line becomes blank.
+ if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
}
}
- # Count commas and look for non-list characters. Once we see a
- # non-list character, we give up and don't look for any more commas.
- if ( $type eq '=>' ) {
- $gnu_arrow_count{$total_depth}++;
-
- # tentatively treating '=>' like '=' for estimating breaks
- # TODO: this could use some experimentation
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+ return;
+}
- elsif ( $type eq ',' ) {
- $gnu_comma_count{$total_depth}++;
+sub dump_verbatim {
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ foreach my $line ( @{$rlines} ) {
+ my $input_line = $line->{_line_text};
+ $self->write_unindented_line($input_line);
}
+ return;
+}
- elsif ( $is_assignment{$type} ) {
- $last_gnu_equals{$total_depth} = $max_index_to_go;
- }
+my %wU;
+my %wiq;
+my %is_wit;
+my %is_sigil;
+my %is_nonlist_keyword;
+my %is_nonlist_type;
+my %is_special_check_type;
+my %is_s_y_m_slash;
+my %is_unexpected_equals;
- # this token might start a new line
- # if this is a non-blank..
- if ( $type ne 'b' ) {
+BEGIN {
- # and if ..
- if (
+ # added 'U' to fix cases b1125 b1126 b1127
+ my @q = qw(w U);
+ @{wU}{@q} = (1) x scalar(@q);
- # this is the first nonblank token of the line
- $max_index_to_go == 1 && $types_to_go[0] eq 'b'
+ @q = qw(w i q Q G C Z);
+ @{wiq}{@q} = (1) x scalar(@q);
- # or previous character was one of these:
- || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
+ @q = qw(w i t);
+ @{is_wit}{@q} = (1) x scalar(@q);
- # or previous character was opening and this does not close it
- || ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
- || ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
+ @q = qw($ & % * @);
+ @{is_sigil}{@q} = (1) x scalar(@q);
- # or this token is one of these:
- || $type =~ /^([\.]|\|\||\&\&)$/
+ # Parens following these keywords will not be marked as lists. Note that
+ # 'for' is not included and is handled separately, by including 'f' in the
+ # hash %is_counted_type, since it may or may not be a c-style for loop.
+ @q = qw( if elsif unless and or );
+ @is_nonlist_keyword{@q} = (1) x scalar(@q);
- # or this is a closing structure
- || ( $last_nonblank_type_to_go eq '}'
- && $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
+ # Parens following these types will not be marked as lists
+ @q = qw( && || );
+ @is_nonlist_type{@q} = (1) x scalar(@q);
- # or previous token was keyword 'return'
- || ( $last_nonblank_type_to_go eq 'k'
- && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
+ @q = qw( s y m / );
+ @is_s_y_m_slash{@q} = (1) x scalar(@q);
- # or starting a new line at certain keywords is fine
- || ( $type eq 'k'
- && $is_if_unless_and_or_last_next_redo_return{$token} )
+ @q = qw( = == != );
+ @is_unexpected_equals{@q} = (1) x scalar(@q);
- # or this is after an assignment after a closing structure
- || (
- $is_assignment{$last_nonblank_type_to_go}
- && (
- $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
+}
- # and it is significantly to the right
- || $gnu_position_predictor > $halfway
- )
- )
- )
- {
- check_for_long_gnu_style_lines();
- $line_start_index_to_go = $max_index_to_go;
+sub respace_tokens {
- # back up 1 token if we want to break before that type
- # otherwise, we may strand tokens like '?' or ':' on a line
- if ( $line_start_index_to_go > 0 ) {
- if ( $last_nonblank_type_to_go eq 'k' ) {
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
- if ( $want_break_before{$last_nonblank_token_to_go} ) {
- $line_start_index_to_go--;
- }
- }
- elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
- $line_start_index_to_go--;
- }
- }
- }
- }
+ # This routine is called once per file to do as much formatting as possible
+ # before new line breaks are set.
- # remember the predicted position of this token on the output line
- if ( $max_index_to_go > $line_start_index_to_go ) {
- $gnu_position_predictor =
- total_line_length( $line_start_index_to_go, $max_index_to_go );
- }
- else {
- $gnu_position_predictor =
- $space_count + $token_lengths_to_go[$max_index_to_go];
- }
-
- # store the indentation object for this token
- # this allows us to manipulate the leading whitespace
- # (in case we have to reduce indentation to fit a line) without
- # having to change any token values
- $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
- $reduced_spaces_to_go[$max_index_to_go] =
- ( $max_gnu_stack_index > 0 && $ci_level )
- ? $gnu_stack[ $max_gnu_stack_index - 1 ]
- : $gnu_stack[$max_gnu_stack_index];
- return;
-}
+ # This routine makes all necessary and possible changes to the tokenization
+ # after the initial tokenization of the file. This is a tedious routine,
+ # but basically it consists of inserting and deleting whitespace between
+ # nonblank tokens according to the selected parameters. In a few cases
+ # non-space characters are added, deleted or modified.
-sub check_for_long_gnu_style_lines {
+ # The goal of this routine is to create a new token array which only needs
+ # the definition of new line breaks and padding to complete formatting. In
+ # a few cases we have to cheat a little to achieve this goal. In
+ # particular, we may not know if a semicolon will be needed, because it
+ # depends on how the line breaks go. To handle this, we include the
+ # semicolon as a 'phantom' which can be displayed as normal or as an empty
+ # string.
- # look at the current estimated maximum line length, and
- # remove some whitespace if it exceeds the desired maximum
+ # Method: The old tokens are copied one-by-one, with changes, from the old
+ # linear storage array $rLL to a new array $rLL_new.
- # this is only for the '-lp' style
- return unless ($rOpts_line_up_parentheses);
+ my $rLL = $self->[_rLL_];
+ my $Klimit_old = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $length_function = $self->[_length_function_];
+ my $is_encoded_data = $self->[_is_encoded_data_];
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ my $rLL_new = []; # This is the new array
+ my $rtoken_vars;
+ my $Ktoken_vars; # the old K value of $rtoken_vars
+ my ( $Kfirst_old, $Klast_old ); # Range of old line
+ my $Klast_old_code; # K of last token if side comment
+ my $Kmax = @{$rLL} - 1;
- # see if we have exceeded the maximum desired line length
- # keep 2 extra free because they are needed in some cases
- # (result of trial-and-error testing)
- my $spaces_needed =
- $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
+ my $CODE_type = "";
+ my $line_type = "";
- return if ( $spaces_needed <= 0 );
+ # Set the whitespace flags, which indicate the token spacing preference.
+ my $rwhitespace_flags = $self->set_whitespace_flags();
- # We are over the limit, so try to remove a requested number of
- # spaces from leading whitespace. We are only allowed to remove
- # from whitespace items created on this batch, since others have
- # already been used and cannot be undone.
- my @candidates = ();
- my $i;
+ # we will be setting token lengths as we go
+ my $cumulative_length = 0;
- # loop over all whitespace items created for the current batch
- for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
- my $item = $gnu_item_list[$i];
+ my %seqno_stack;
+ my %K_old_opening_by_seqno = (); # Note: old K index
+ my $depth_next = 0;
+ my $depth_next_max = 0;
+
+ # Note that $K_opening_container and $K_closing_container have values
+ # defined in sub get_line() for the previous K indexes. They were needed
+ # in case option 'indent-only' was set, and we didn't get here. We no longer
+ # need those and will eliminate them now to avoid any possible mixing of
+ # old and new values.
+ my $K_opening_container = $self->[_K_opening_container_] = {};
+ my $K_closing_container = $self->[_K_closing_container_] = {};
+
+ my $K_closing_ternary = $self->[_K_closing_ternary_];
+ my $K_opening_ternary = $self->[_K_opening_ternary_];
+ my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_];
+ my $rchildren_of_seqno = $self->[_rchildren_of_seqno_];
+ my $rhas_broken_code_block = $self->[_rhas_broken_code_block_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
+ my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
+ my $rhas_code_block = $self->[_rhas_code_block_];
+ my $rhas_list = $self->[_rhas_list_];
+ my $rhas_ternary = $self->[_rhas_ternary_];
+ my $ris_assigned_structure = $self->[_ris_assigned_structure_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
+ my $roverride_cab3 = $self->[_roverride_cab3_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my $last_nonblank_code_type = ';';
+ my $last_nonblank_code_token = ';';
+ my $last_nonblank_block_type = '';
+ my $last_last_nonblank_code_type = ';';
+ my $last_last_nonblank_code_token = ';';
+
+ my %K_first_here_doc_by_seqno;
+
+ my $set_permanently_broken = sub {
+ my ($seqno) = @_;
+ while ( defined($seqno) ) {
+ $ris_permanently_broken->{$seqno} = 1;
+ $seqno = $rparent_of_seqno->{$seqno};
+ }
+ return;
+ };
+ my $store_token = sub {
+ my ($item) = @_;
- # item must still be open to be a candidate (otherwise it
- # cannot influence the current token)
- next if ( $item->get_closed() >= 0 );
+ # This will be the index of this item in the new array
+ my $KK_new = @{$rLL_new};
- my $available_spaces = $item->get_available_spaces();
+ my $type = $item->[_TYPE_];
+ my $is_blank = $type eq 'b';
+ my $block_type = "";
- if ( $available_spaces > 0 ) {
- push( @candidates, [ $i, $available_spaces ] );
+ # Do not output consecutive blanks. This situation should have been
+ # prevented earlier, but it is worth checking because later routines
+ # make this assumption.
+ if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
+ return;
}
- }
-
- return unless (@candidates);
- # sort by available whitespace so that we can remove whitespace
- # from the maximum available first
- @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
+ # check for a sequenced item (i.e., container or ?/:)
+ my $type_sequence = $item->[_TYPE_SEQUENCE_];
+ my $token = $item->[_TOKEN_];
+ if ($type_sequence) {
- # keep removing whitespace until we are done or have no more
- foreach my $candidate (@candidates) {
- my ( $i, $available_spaces ) = @{$candidate};
- my $deleted_spaces =
- ( $available_spaces > $spaces_needed )
- ? $spaces_needed
- : $available_spaces;
+ if ( $is_opening_token{$token} ) {
- # remove the incremental space from this item
- $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
+ $K_opening_container->{$type_sequence} = $KK_new;
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
+
+ # Fix for case b1100: Count a line ending in ', [' as having
+ # a line-ending comma. Otherwise, these commas can be hidden
+ # with something like --opening-square-bracket-right
+ if ( $last_nonblank_code_type eq ','
+ && $Ktoken_vars == $Klast_old_code
+ && $Ktoken_vars > $Kfirst_old )
+ {
+ $rlec_count_by_seqno->{$type_sequence}++;
+ }
- my $i_debug = $i;
+ if ( $last_nonblank_code_type eq '='
+ || $last_nonblank_code_type eq '=>' )
+ {
+ $ris_assigned_structure->{$type_sequence} =
+ $last_nonblank_code_type;
+ }
- # update the leading whitespace of this item and all items
- # that came after it
- for ( ; $i <= $max_gnu_item_index ; $i++ ) {
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
+ $seqno_stack{$depth_next} = $type_sequence;
+ $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
+ $depth_next++;
- my $old_spaces = $gnu_item_list[$i]->get_spaces();
- if ( $old_spaces >= $deleted_spaces ) {
- $gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
+ if ( $depth_next > $depth_next_max ) {
+ $depth_next_max = $depth_next;
+ }
}
+ elsif ( $is_closing_token{$token} ) {
- # shouldn't happen except for code bug:
- else {
- my $level = $gnu_item_list[$i_debug]->get_level();
- my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
- my $old_level = $gnu_item_list[$i]->get_level();
- my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
- warning(
-"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
- );
- report_definite_bug();
- }
- }
- $gnu_position_predictor -= $deleted_spaces;
- $spaces_needed -= $deleted_spaces;
- last unless ( $spaces_needed > 0 );
- }
- return;
-}
+ $K_closing_container->{$type_sequence} = $KK_new;
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
-sub finish_lp_batch {
+ # Do not include terminal commas in counts
+ if ( $last_nonblank_code_type eq ','
+ || $last_nonblank_code_type eq '=>' )
+ {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ($seqno) {
+ $rtype_count_by_seqno->{$seqno}
+ ->{$last_nonblank_code_type}--;
+
+ if ( $Ktoken_vars == $Kfirst_old
+ && $last_nonblank_code_type eq ','
+ && $rlec_count_by_seqno->{$seqno} )
+ {
+ $rlec_count_by_seqno->{$seqno}--;
+ }
+ }
+ }
- # This routine is called once after each output stream batch is
- # finished to undo indentation for all incomplete -lp
- # indentation levels. It is too risky to leave a level open,
- # because then we can't backtrack in case of a long line to follow.
- # This means that comments and blank lines will disrupt this
- # indentation style. But the vertical aligner may be able to
- # get the space back if there are side comments.
+ # Update the stack...
+ $depth_next--;
+ }
+ else {
- # this is only for the 'lp' style
- return unless ($rOpts_line_up_parentheses);
+ # For ternary, note parent but do not include as child
+ my $seqno_parent = $seqno_stack{ $depth_next - 1 };
+ $seqno_parent = SEQ_ROOT unless defined($seqno_parent);
+ $rparent_of_seqno->{$type_sequence} = $seqno_parent;
- # nothing can be done if no stack items defined for this line
- return if ( $max_gnu_item_index == UNDEFINED_INDEX );
+ # These are not yet used but could be useful
+ if ( $token eq '?' ) {
+ $K_opening_ternary->{$type_sequence} = $KK_new;
+ }
+ elsif ( $token eq ':' ) {
+ $K_closing_ternary->{$type_sequence} = $KK_new;
+ }
+ else {
- # loop over all whitespace items created for the current batch
- foreach my $i ( 0 .. $max_gnu_item_index ) {
- my $item = $gnu_item_list[$i];
+ # We really shouldn't arrive here, just being cautious:
+ # The only sequenced types output by the tokenizer are the
+ # opening & closing containers and the ternary types. Each
+ # of those was checked above. So we would only get here
+ # if the tokenizer has been changed to mark some other
+ # tokens with sequence numbers.
+ if (DEVEL_MODE) {
+ my $type = $item->[_TYPE_];
+ Fault(
+"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
+ );
+ }
+ }
+ }
+ }
- # only look for open items
- next if ( $item->get_closed() >= 0 );
+ # Find the length of this token. Later it may be adjusted if phantom
+ # or ignoring side comment lengths.
+ my $token_length =
+ $is_encoded_data
+ ? $length_function->($token)
+ : length($token);
- # Tentatively remove all of the available space
- # (The vertical aligner will try to get it back later)
- my $available_spaces = $item->get_available_spaces();
- if ( $available_spaces > 0 ) {
+ # handle comments
+ my $is_comment = $type eq '#';
+ if ($is_comment) {
- # delete incremental space for this item
- $gnu_item_list[$i]
- ->tentatively_decrease_available_spaces($available_spaces);
+ # trim comments if necessary
+ my $ord = ord( substr( $token, -1, 1 ) );
+ if (
+ $ord > 0
+ && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ && $token =~ s/\s+$//
+ )
+ {
+ $token_length = $length_function->($token);
+ $item->[_TOKEN_] = $token;
+ }
- # Reduce the total indentation space of any nodes that follow
- # Note that any such nodes must necessarily be dependents
- # of this node.
- foreach ( $i + 1 .. $max_gnu_item_index ) {
- $gnu_item_list[$_]->decrease_SPACES($available_spaces);
+ # Mark length of side comments as just 1 if sc lengths are ignored
+ if ( $rOpts_ignore_side_comment_lengths
+ && ( !$CODE_type || $CODE_type eq 'HSC' ) )
+ {
+ $token_length = 1;
+ }
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno)
+ && !$ris_permanently_broken->{$seqno} )
+ {
+ $set_permanently_broken->($seqno);
}
}
- }
- return;
-}
-sub reduce_lp_indentation {
+ $item->[_TOKEN_LENGTH_] = $token_length;
- # reduce the leading whitespace at token $i if possible by $spaces_needed
- # (a large value of $spaces_needed will remove all excess space)
- # NOTE: to be called from scan_list only for a sequence of tokens
- # contained between opening and closing parens/braces/brackets
-
- my ( $i, $spaces_wanted ) = @_;
- my $deleted_spaces = 0;
+ # and update the cumulative length
+ $cumulative_length += $token_length;
- my $item = $leading_spaces_to_go[$i];
- my $available_spaces = $item->get_available_spaces();
+ # Save the length sum to just AFTER this token
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
- if (
- $available_spaces > 0
- && ( ( $spaces_wanted <= $available_spaces )
- || !$item->get_have_child() )
- )
- {
+ if ( !$is_blank && !$is_comment ) {
+
+ # Remember the most recent two non-blank, non-comment tokens.
+ # NOTE: the phantom semicolon code may change the output stack
+ # without updating these values. Phantom semicolons are considered
+ # the same as blanks for now, but future needs might change that.
+ # See the related note in sub '$add_phantom_semicolon'.
+ $last_last_nonblank_code_type = $last_nonblank_code_type;
+ $last_last_nonblank_code_token = $last_nonblank_code_token;
+
+ $last_nonblank_code_type = $type;
+ $last_nonblank_code_token = $token;
+ $last_nonblank_block_type = $block_type;
+
+ # count selected types
+ if ( $is_counted_type{$type} ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno) ) {
+ $rtype_count_by_seqno->{$seqno}->{$type}++;
+
+ # Count line-ending commas for -bbx
+ if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
+ $rlec_count_by_seqno->{$seqno}++;
+ }
- # we'll remove these spaces, but mark them as recoverable
- $deleted_spaces =
- $item->tentatively_decrease_available_spaces($spaces_wanted);
- }
+ # Remember index of first here doc target
+ if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
+ $K_first_here_doc_by_seqno{$seqno} = $KK_new;
+ }
+ }
+ }
+ }
- return $deleted_spaces;
-}
+ # For reference, here is how to get the parent sequence number.
+ # This is not used because it is slower than finding it on the fly
+ # in sub parent_seqno_by_K:
-sub token_sequence_length {
+ # my $seqno_parent =
+ # $type_sequence && $is_opening_token{$token}
+ # ? $seqno_stack{ $depth_next - 2 }
+ # : $seqno_stack{ $depth_next - 1 };
+ # my $KK = @{$rLL_new};
+ # $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
- # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
- # returns 0 if $ibeg > $iend (shouldn't happen)
- my ( $ibeg, $iend ) = @_;
- return 0 if ( $iend < 0 || $ibeg > $iend );
- return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
- return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
-}
+ # and finally, add this item to the new array
+ push @{$rLL_new}, $item;
+ return;
+ };
-sub total_line_length {
+ my $store_token_and_space = sub {
+ my ( $item, $want_space ) = @_;
- # return length of a line of tokens ($ibeg .. $iend)
- my ( $ibeg, $iend ) = @_;
- return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
-}
+ # store a token with preceding space if requested and needed
-sub maximum_line_length_for_level {
+ # First store the space
+ if ( $want_space
+ && @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ my $rcopy = [ @{$item} ];
+ $rcopy->[_TYPE_] = 'b';
+ $rcopy->[_TOKEN_] = ' ';
+ $rcopy->[_TYPE_SEQUENCE_] = '';
- # return maximum line length for line starting with a given level
- my $maximum_line_length = $rOpts_maximum_line_length;
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
- # Modify if -vmll option is selected
- if ($rOpts_variable_maximum_line_length) {
- my $level = shift;
- if ( $level < 0 ) { $level = 0 }
- $maximum_line_length += $level * $rOpts_indent_columns;
- }
- return $maximum_line_length;
-}
+ # Patch 23-Jan-2021 to fix -lp blinkers:
+ # The level and ci_level of newly created spaces should be the same
+ # as the previous token. Otherwise the coding for the -lp option
+ # can create a blinking state in some rare cases.
+ $rcopy->[_LEVEL_] =
+ $rLL_new->[-1]->[_LEVEL_];
+ $rcopy->[_CI_LEVEL_] =
+ $rLL_new->[-1]->[_CI_LEVEL_];
-sub maximum_line_length {
+ $store_token->($rcopy);
+ }
- # return maximum line length for line starting with the token at given index
- my $ii = shift;
- return maximum_line_length_for_level( $levels_to_go[$ii] );
-}
+ # then the token
+ $store_token->($item);
+ return;
+ };
-sub excess_line_length {
+ my $add_phantom_semicolon = sub {
- # return number of characters by which a line of tokens ($ibeg..$iend)
- # exceeds the allowable line length.
- my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
+ my ($KK) = @_;
- # Include left and right weld lengths unless requested not to
- my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
- my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
- return total_line_length( $ibeg, $iend ) + $wl + $wr -
- maximum_line_length($ibeg);
-}
+ # we are only adding semicolons for certain block types
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ return unless ($type_sequence);
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ return unless ($block_type);
+ return
+ unless ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ );
-sub wrapup {
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
- # flush buffer and write any informative messages
- my $self = shift;
+ # Do not add a semicolon if...
+ return
+ if (
- $self->flush();
- $file_writer_object->decrement_output_line_number()
- ; # fix up line number since it was incremented
- we_are_at_the_last_line();
- if ( $added_semicolon_count > 0 ) {
- my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
- write_logfile_entry("$added_semicolon_count $what added:\n");
- write_logfile_entry(
- " $first at input line $first_added_semicolon_at\n");
+ # it would follow a comment (and be isolated)
+ $type_p eq '#'
- if ( $added_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_added_semicolon_at\n");
- }
- write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
- write_logfile_entry("\n");
- }
+ # it follows a code block ( because they are not always wanted
+ # there and may add clutter)
+ || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
- if ( $deleted_semicolon_count > 0 ) {
- my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
- my $what =
- ( $deleted_semicolon_count > 1 )
- ? "semicolons were"
- : "semicolon was";
- write_logfile_entry(
- "$deleted_semicolon_count unnecessary $what deleted:\n");
- write_logfile_entry(
- " $first at input line $first_deleted_semicolon_at\n");
+ # it would follow a label
+ || $type_p eq 'J'
- if ( $deleted_semicolon_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_deleted_semicolon_at\n");
- }
- write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
- write_logfile_entry("\n");
- }
+ # it would be inside a 'format' statement (and cause syntax error)
+ || ( $type_p eq 'k'
+ && $token_p =~ /format/ )
- if ( $embedded_tab_count > 0 ) {
- my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
- my $what =
- ( $embedded_tab_count > 1 )
- ? "quotes or patterns"
- : "quote or pattern";
- write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
- write_logfile_entry(
-"This means the display of this script could vary with device or software\n"
- );
- write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
+ );
- if ( $embedded_tab_count > 1 ) {
- write_logfile_entry(
- " Last at input line $last_embedded_tab_at\n");
- }
- write_logfile_entry("\n");
- }
+ # Do not add a semicolon if it would impede a weld with an immediately
+ # following closing token...like this
+ # { ( some code ) }
+ # ^--No semicolon can go here
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
- );
- }
+ # look at the previous token... note use of the _NEW rLL array here,
+ # but sequence numbers are invariant.
+ my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
- if ($in_tabbing_disagreement) {
- write_logfile_entry(
-"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
- );
- }
- else {
+ # If it is also a CLOSING token we have to look closer...
+ if (
+ $seqno_inner
+ && $is_closing_token{$token_p}
- if ($last_tabbing_disagreement) {
+ # we only need to look if there is just one inner container..
+ && defined( $rchildren_of_seqno->{$type_sequence} )
+ && @{ $rchildren_of_seqno->{$type_sequence} } == 1
+ )
+ {
- write_logfile_entry(
-"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
- );
- }
- else {
- write_logfile_entry("No indentation disagreement seen\n");
+ # Go back and see if the corresponding two OPENING tokens are also
+ # together. Note that we are using the OLD K indexing here:
+ my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
+ if ( defined($K_outer_opening) ) {
+ my $K_nxt = $self->K_next_nonblank($K_outer_opening);
+ if ( defined($K_nxt) ) {
+ my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
+
+ # Is the next token after the outer opening the same as
+ # our inner closing (i.e. same sequence number)?
+ # If so, do not insert a semicolon here.
+ return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
+ }
+ }
}
- }
- if ($first_tabbing_disagreement) {
- write_logfile_entry(
-"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
- );
- }
- write_logfile_entry("\n");
- $vertical_aligner_object->report_anything_unusual();
-
- $file_writer_object->report_line_length_errors();
+ # We will insert an empty semicolon here as a placeholder. Later, if
+ # it becomes the last token on a line, we will bring it to life. The
+ # advantage of doing this is that (1) we just have to check line
+ # endings, and (2) the phantom semicolon has zero width and therefore
+ # won't cause needless breaks of one-line blocks.
+ my $Ktop = -1;
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
+ && $want_left_space{';'} == WS_NO )
+ {
- return;
-}
+ # convert the blank into a semicolon..
+ # be careful: we are working on the new stack top
+ # on a token which has been stored.
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
-sub check_options {
+ # Convert the existing blank to:
+ # a phantom semicolon for one_line_block option = 0 or 1
+ # a real semicolon for one_line_block option = 2
+ my $tok = '';
+ my $len_tok = 0;
+ if ( $rOpts_one_line_block_semicolons == 2 ) {
+ $tok = ';';
+ $len_tok = 1;
+ }
- # This routine is called to check the Opts hash after it is defined
- $rOpts = shift;
+ $rLL_new->[$Ktop]->[_TOKEN_] = $tok;
+ $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
+ $rLL_new->[$Ktop]->[_TYPE_] = ';';
- initialize_whitespace_hashes();
- initialize_bond_strength_hashes();
+ # NOTE: we are changing the output stack without updating variables
+ # $last_nonblank_code_type, etc. Future needs might require that
+ # those variables be updated here. For now, it seems ok to skip
+ # this.
- make_static_block_comment_pattern();
- make_static_side_comment_pattern();
- make_closing_side_comment_prefix();
- make_closing_side_comment_list_pattern();
- $format_skipping_pattern_begin =
- make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
- $format_skipping_pattern_end =
- make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
+ # Save list of new K indexes of phantom semicolons.
+ # This will be needed if we want to undo them for iterations in
+ # future coding.
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
- # If closing side comments ARE selected, then we can safely
- # delete old closing side comments unless closing side comment
- # warnings are requested. This is a good idea because it will
- # eliminate any old csc's which fall below the line count threshold.
- # We cannot do this if warnings are turned on, though, because we
- # might delete some text which has been added. So that must
- # be handled when comments are created.
- if ( $rOpts->{'closing-side-comments'} ) {
- if ( !$rOpts->{'closing-side-comment-warnings'} ) {
- $rOpts->{'delete-closing-side-comments'} = 1;
+ # Then store a new blank
+ $store_token->($rcopy);
}
- }
+ else {
- # If closing side comments ARE NOT selected, but warnings ARE
- # selected and we ARE DELETING csc's, then we will pretend to be
- # adding with a huge interval. This will force the comments to be
- # generated for comparison with the old comments, but not added.
- elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
- if ( $rOpts->{'delete-closing-side-comments'} ) {
- $rOpts->{'delete-closing-side-comments'} = 0;
- $rOpts->{'closing-side-comments'} = 1;
- $rOpts->{'closing-side-comment-interval'} = 100000000;
+ # Patch for issue c078: keep line indexes in order. If the top
+ # token is a space that we are keeping (due to '-wls=';') then
+ # we have to check that old line indexes stay in order.
+ # In very rare
+ # instances in which side comments have been deleted and converted
+ # into blanks, we may have filtered down multiple blanks into just
+ # one. In that case the top blank may have a higher line number
+ # than the previous nonblank token. Although the line indexes of
+ # blanks are not really significant, we need to keep them in order
+ # in order to pass error checks.
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
+ my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
+ my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
+ if ( $new_top_ix < $old_top_ix ) {
+ $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
+ }
+ }
+
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
+ $store_token->($rcopy);
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
}
- }
+ return;
+ };
- make_sub_matching_pattern();
- make_bli_pattern();
- make_block_brace_vertical_tightness_pattern();
- make_blank_line_pattern();
- make_keyword_group_list_pattern();
+ my $check_Q = sub {
- # Make initial list of desired one line block types
- # They will be modified by 'prepare_cuddled_block_types'
- %want_one_line_block = %is_sort_map_grep_eval;
+ # Check that a quote looks okay
+ # This sub works but needs to by sync'd with the log file output
+ # before it can be used.
+ my ( $KK, $Kfirst, $line_number ) = @_;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ $self->note_embedded_tab($line_number) if ( $token =~ "\t" );
- prepare_cuddled_block_types();
- if ( $rOpts->{'dump-cuddled-block-list'} ) {
- dump_cuddled_block_list(*STDOUT);
- Exit(0);
- }
+ # The remainder of this routine looks for something like
+ # '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
- if ( $rOpts->{'line-up-parentheses'} ) {
+ # Start by looking for a token begining with one of: s y m / tr
+ return
+ unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
+ || substr( $token, 0, 2 ) eq 'tr' );
- if ( $rOpts->{'indent-only'}
- || !$rOpts->{'add-newlines'}
- || !$rOpts->{'delete-old-newlines'} )
- {
- Warn(<<EOM);
------------------------------------------------------------------------
-Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
-
-The -lp indentation logic requires that perltidy be able to coordinate
-arbitrarily large numbers of line breakpoints. This isn't possible
-with these flags. Sometimes an acceptable workaround is to use -wocb=3
------------------------------------------------------------------------
-EOM
- $rOpts->{'line-up-parentheses'} = 0;
+ # ... and preceded by one of: = == !=
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ return unless ( $is_unexpected_equals{$previous_nonblank_type} );
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+
+ my $previous_nonblank_type_2 = 'b';
+ my $previous_nonblank_token_2 = "";
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
+ $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
}
- }
- # At present, tabs are not compatible with the line-up-parentheses style
- # (it would be possible to entab the total leading whitespace
- # just prior to writing the line, if desired).
- if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ my $next_nonblank_token = "";
+ my $Kn = $KK + 1;
+ if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
+ if ( $Kn <= $Kmax ) {
+ $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ }
- # Likewise, tabs are not compatible with outdenting..
- if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
- if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
- Warn(<<EOM);
-Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
-EOM
- $rOpts->{'tabs'} = 0;
- }
+ if (
+ ##$token =~ /^(s|tr|y|m|\/)/
+ ##&& $previous_nonblank_token =~ /^(=|==|!=)$/
+ 1
- if ( !$rOpts->{'space-for-semicolon'} ) {
- $want_left_space{'f'} = -1;
- }
+ # preceded by simple scalar
+ && $previous_nonblank_type_2 eq 'i'
+ && $previous_nonblank_token_2 =~ /^\$/
- if ( $rOpts->{'space-terminal-semicolon'} ) {
- $want_left_space{';'} = 1;
- }
+ # followed by some kind of termination
+ # (but give complaint if we can not see far enough ahead)
+ && $next_nonblank_token =~ /^[; \)\}]$/
- # implement outdenting preferences for keywords
- %outdent_keyword = ();
- my @okw = split_words( $rOpts->{'outdent-keyword-okl'} );
- unless (@okw) {
- @okw = qw(next last redo goto return); # defaults
- }
+ # scalar is not declared
+ && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
+ )
+ {
+ my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
+ my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
+ complain(
+"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+ );
+ }
+ return;
+ };
- # FUTURE: if not a keyword, assume that it is an identifier
- foreach (@okw) {
- if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
- $outdent_keyword{$_} = 1;
+ #-------------------------------------------
+ # Main loop to respace all lines of the file
+ #-------------------------------------------
+ my $last_K_out;
+
+ foreach my $line_of_tokens ( @{$rlines} ) {
+
+ my $input_line_number = $line_of_tokens->{_line_number};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+ ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
+ $Klast_old_code = $Klast_old;
+
+ # Be sure an old K value is defined for sub $store_token
+ $Ktoken_vars = $Kfirst;
+
+ # Check for correct sequence of token indexes...
+ # An error here means that sub write_line() did not correctly
+ # package the tokenized lines as it received them. If we
+ # get a fault here it has not output a continuous sequence
+ # of K values. Or a line of CODE may have been mismarked as
+ # something else. There is no good way to continue after such an
+ # error.
+ # FIXME: Calling Fault will produce zero output; it would be best to
+ # find a way to dump the input file.
+ if ( defined($last_K_out) ) {
+ if ( $Kfirst != $last_K_out + 1 ) {
+ Fault(
+ "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
+ );
+ }
}
else {
- Warn("ignoring '$_' in -okwl list; not a perl keyword");
- }
- }
-
- # implement user whitespace preferences
- if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
- @want_left_space{@q} = (1) x scalar(@q);
- }
- if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
- @want_right_space{@q} = (1) x scalar(@q);
- }
+ # The first token should always have been given index 0 by sub
+ # write_line()
+ if ( $Kfirst != 0 ) {
+ Fault("Program Bug: first K is $Kfirst but should be 0");
+ }
+ }
+ $last_K_out = $Klast;
- if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
- @want_left_space{@q} = (-1) x scalar(@q);
- }
+ # Handle special lines of code
+ if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
- if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
- @want_right_space{@q} = (-1) x scalar(@q);
- }
- if ( $rOpts->{'dump-want-left-space'} ) {
- dump_want_left_space(*STDOUT);
- Exit(0);
- }
+ # CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'FS' = Format Skipping - line goes out verbatim, no blanks
+ # 'IO' = Indent Only - only indentation may be changed
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
- if ( $rOpts->{'dump-want-right-space'} ) {
- dump_want_right_space(*STDOUT);
- Exit(0);
- }
+ # For a hanging side comment we insert an empty quote before
+ # the comment so that it becomes a normal side comment and
+ # will be aligned by the vertical aligner
+ if ( $CODE_type eq 'HSC' ) {
- # default keywords for which space is introduced before an opening paren
- # (at present, including them messes up vertical alignment)
- my @sak = qw(my local our and or err eq ne if else elsif until
- unless while for foreach return switch case given when catch);
- @space_after_keyword{@sak} = (1) x scalar(@sak);
+ # Safety Check: This must be a line with one token (a comment)
+ my $rtoken_vars = $rLL->[$Kfirst];
+ if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
- # first remove any or all of these if desired
- if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
+ # Note that even if the flag 'noadd-whitespace' is set, we
+ # will make an exception here and allow a blank to be
+ # inserted to push the comment to the right. We can think
+ # of this as an adjustment of indentation rather than
+ # whitespace between tokens. This will also prevent the
+ # hanging side comment from getting converted to a block
+ # comment if whitespace gets deleted, as for example with
+ # the -extrude and -mangle options.
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
+ $store_token->($rcopy);
+ $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
- # -nsak='*' selects all the above keywords
- if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
- @space_after_keyword{@q} = (0) x scalar(@q);
- }
+ # This line was mis-marked by sub scan_comment. Catch in
+ # DEVEL_MODE, otherwise try to repair and keep going.
+ Fault(
+ "Program bug. A hanging side comment has been mismarked"
+ ) if (DEVEL_MODE);
- # then allow user to add to these defaults
- if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
- @space_after_keyword{@q} = (1) x scalar(@q);
- }
+ $CODE_type = "";
+ $line_of_tokens->{_code_type} = $CODE_type;
+ }
+ }
- # implement user break preferences
- my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- . : ? && || and or err xor
- );
+ if ( $CODE_type eq 'BL' ) {
+ my $seqno = $seqno_stack{ $depth_next - 1 };
+ if ( defined($seqno)
+ && !$ris_permanently_broken->{$seqno}
+ && $rOpts_maximum_consecutive_blank_lines )
+ {
+ $set_permanently_broken->($seqno);
+ }
+ }
- my $break_after = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ # Copy tokens unchanged
+ foreach my $KK ( $Kfirst .. $Klast ) {
+ $Ktoken_vars = $KK;
+ $store_token->( $rLL->[$KK] );
}
+ next;
}
- };
- my $break_before = sub {
- my @toks = @_;
- foreach my $tok (@toks) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ # Handle normal line..
+
+ # Define index of last token before any side comment for comma counts
+ my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
+ if ( ( $type_end eq '#' || $type_end eq 'b' )
+ && $Klast_old_code > $Kfirst_old )
+ {
+ $Klast_old_code--;
+ if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
+ && $Klast_old_code > $Kfirst_old )
+ {
+ $Klast_old_code--;
}
}
- };
- $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
- $break_before->(@all_operators)
- if ( $rOpts->{'break-before-all-operators'} );
+ # Insert any essential whitespace between lines
+ # if last line was normal CODE.
+ # Patch for rt #125012: use K_previous_code rather than '_nonblank'
+ # because comments may disappear.
+ if ( $last_line_type eq 'CODE' ) {
+ my $type_next = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
+ if (
+ is_essential_whitespace(
+ $last_last_nonblank_code_token,
+ $last_last_nonblank_code_type,
+ $last_nonblank_code_token,
+ $last_nonblank_code_type,
+ $token_next,
+ $type_next,
+ )
+ )
+ {
- $break_after->( split_words( $rOpts->{'want-break-after'} ) );
- $break_before->( split_words( $rOpts->{'want-break-before'} ) );
+ # Copy this first token as blank, but use previous line number
+ my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
- # make note if breaks are before certain key types
- %want_break_before = ();
- foreach my $tok ( @all_operators, ',' ) {
- $want_break_before{$tok} =
- $left_bond_strength{$tok} < $right_bond_strength{$tok};
- }
+ # The level and ci_level of newly created spaces should be the
+ # same as the previous token. Otherwise blinking states can
+ # be created if the -lp mode is used. See similar coding in
+ # sub 'store_token_and_space'. Fixes cases b1109 b1110.
+ $rcopy->[_LEVEL_] =
+ $rLL_new->[-1]->[_LEVEL_];
+ $rcopy->[_CI_LEVEL_] =
+ $rLL_new->[-1]->[_CI_LEVEL_];
- # Coordinate ?/: breaks, which must be similar
- if ( !$want_break_before{':'} ) {
- $want_break_before{'?'} = $want_break_before{':'};
- $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
- $left_bond_strength{'?'} = NO_BREAK;
- }
+ $store_token->($rcopy);
+ }
+ }
- # Define here tokens which may follow the closing brace of a do statement
- # on the same line, as in:
- # } while ( $something);
- my @dof = qw(until while unless if ; : );
- push @dof, ',';
- @is_do_follower{@dof} = (1) x scalar(@dof);
+ #-------------------------------------------------------
+ # Loop to copy all tokens on this line, with any changes
+ #-------------------------------------------------------
+ my $type_sequence;
+ for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
+ $Ktoken_vars = $KK;
+ $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $last_type_sequence = $type_sequence;
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- # What tokens may follow the closing brace of an if or elsif block?
- # Not used. Previously used for cuddled else, but no longer needed.
- %is_if_brace_follower = ();
+ # Handle a blank space ...
+ if ( $type eq 'b' ) {
- # nothing can follow the closing curly of an else { } block:
- %is_else_brace_follower = ();
+ # Delete it if not wanted by whitespace rules
+ # or we are deleting all whitespace
+ # Note that whitespace flag is a flag indicating whether a
+ # white space BEFORE the token is needed
+ next if ( $KK >= $Klast ); # skip terminal blank
+ my $Knext = $KK + 1;
- # what can follow a multi-line anonymous sub definition closing curly:
- my @asf = qw# ; : => or and && || ~~ !~~ ) #;
- push @asf, ',';
- @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
+ if ($rOpts_freeze_whitespace) {
+ $store_token->($rtoken_vars);
+ next;
+ }
- # what can follow a one-line anonymous sub closing curly:
- # one-line anonymous subs also have ']' here...
- # see tk3.t and PP.pm
- my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
- push @asf1, ',';
- @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
+ my $ws = $rwhitespace_flags->[$Knext];
+ if ( $ws == -1
+ || $rOpts_delete_old_whitespace )
+ {
- # What can follow a closing curly of a block
- # which is not an if/elsif/else/do/sort/map/grep/eval/sub
- # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
- my @obf = qw# ; : => or and && || ) #;
- push @obf, ',';
- @is_other_brace_follower{@obf} = (1) x scalar(@obf);
+ my $token_next = $rLL->[$Knext]->[_TOKEN_];
+ my $type_next = $rLL->[$Knext]->[_TYPE_];
- $right_bond_strength{'{'} = WEAK;
- $left_bond_strength{'{'} = VERY_STRONG;
+ my $do_not_delete = is_essential_whitespace(
+ $last_last_nonblank_code_token,
+ $last_last_nonblank_code_type,
+ $last_nonblank_code_token,
+ $last_nonblank_code_type,
+ $token_next,
+ $type_next,
+ );
- # make -l=0 equal to -l=infinite
- if ( !$rOpts->{'maximum-line-length'} ) {
- $rOpts->{'maximum-line-length'} = 1000000;
- }
+ # Note that repeated blanks will get filtered out here
+ next unless ($do_not_delete);
+ }
- # make -lbl=0 equal to -lbl=infinite
- if ( !$rOpts->{'long-block-line-count'} ) {
- $rOpts->{'long-block-line-count'} = 1000000;
- }
+ # make it just one character
+ $rtoken_vars->[_TOKEN_] = ' ';
+ $store_token->($rtoken_vars);
+ next;
+ }
- my $enc = $rOpts->{'character-encoding'};
- if ( $enc && $enc !~ /^(none|utf8)$/i ) {
- Die(<<EOM);
-Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
-EOM
- }
+ # Handle a nonblank token...
- my $ole = $rOpts->{'output-line-ending'};
- if ($ole) {
- my %endings = (
- dos => "\015\012",
- win => "\015\012",
- mac => "\015",
- unix => "\012",
- );
+ if ($type_sequence) {
- # Patch for RT #99514, a memoization issue.
- # Normally, the user enters one of 'dos', 'win', etc, and we change the
- # value in the options parameter to be the corresponding line ending
- # character. But, if we are using memoization, on later passes through
- # here the option parameter will already have the desired ending
- # character rather than the keyword 'dos', 'win', etc. So
- # we must check to see if conversion has already been done and, if so,
- # bypass the conversion step.
- my %endings_inverted = (
- "\015\012" => 'dos',
- "\015\012" => 'win',
- "\015" => 'mac',
- "\012" => 'unix',
- );
+ # Insert a tentative missing semicolon if the next token is
+ # a closing block brace
+ if (
+ $type eq '}'
+ && $token eq '}'
- if ( defined( $endings_inverted{$ole} ) ) {
+ # not preceded by a ';'
+ && $last_nonblank_code_type ne ';'
- # we already have valid line ending, nothing more to do
- }
- else {
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- Die(<<EOM);
-Unrecognized line ending '$ole'; expecting one of: $str
-EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- Warn("Ignoring -ple; conflicts with -ole\n");
- $rOpts->{'preserve-line-endings'} = undef;
+ # and this is not a VERSION stmt (is all one line, we
+ # are not inserting semicolons on one-line blocks)
+ && $CODE_type ne 'VER'
+
+ # and we are allowed to add semicolons
+ && $rOpts->{'add-semicolons'}
+ )
+ {
+ $add_phantom_semicolon->($KK);
+ }
}
- }
- }
- # hashes used to simplify setting whitespace
- %tightness = (
- '{' => $rOpts->{'brace-tightness'},
- '}' => $rOpts->{'brace-tightness'},
- '(' => $rOpts->{'paren-tightness'},
- ')' => $rOpts->{'paren-tightness'},
- '[' => $rOpts->{'square-bracket-tightness'},
- ']' => $rOpts->{'square-bracket-tightness'},
- );
- %matching_token = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '?' => ':',
- );
+ # Modify certain tokens here for whitespace
+ # The following is not yet done, but could be:
+ # sub (x x x)
+ # ( $type =~ /^[wit]$/ )
+ elsif ( $is_wit{$type} ) {
- if ( $rOpts->{'ignore-old-breakpoints'} ) {
- if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
- );
- }
- if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
- Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
- );
- }
+ # change '$ var' to '$var' etc
+ # change '@ ' to '@'
+ # Examples: <<snippets/space1.in>>
+ my $ord = ord( substr( $token, 1, 1 ) );
+ if (
- # Note: there are additional parameters that can be made inactive by
- # -iob, but they are on by default so we would generate excessive
- # warnings if we noted them. They are:
- # $rOpts->{'break-at-old-keyword-breakpoints'}
- # $rOpts->{'break-at-old-logical-breakpoints'}
- # $rOpts->{'break-at-old-ternary-breakpoints'}
- # $rOpts->{'break-at-old-attribute-breakpoints'}
- }
+ # quick test for possible blank at second char
+ $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ )
+ {
+ my ( $sigil, $word ) = split /\s+/, $token, 2;
- # frequently used parameters
- $rOpts_add_newlines = $rOpts->{'add-newlines'};
- $rOpts_add_whitespace = $rOpts->{'add-whitespace'};
- $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
- $rOpts_block_brace_vertical_tightness =
- $rOpts->{'block-brace-vertical-tightness'};
- $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
- $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
- $rOpts_break_at_old_ternary_breakpoints =
- $rOpts->{'break-at-old-ternary-breakpoints'};
- $rOpts_break_at_old_attribute_breakpoints =
- $rOpts->{'break-at-old-attribute-breakpoints'};
- $rOpts_break_at_old_comma_breakpoints =
- $rOpts->{'break-at-old-comma-breakpoints'};
- $rOpts_break_at_old_keyword_breakpoints =
- $rOpts->{'break-at-old-keyword-breakpoints'};
- $rOpts_break_at_old_logical_breakpoints =
- $rOpts->{'break-at-old-logical-breakpoints'};
- $rOpts_break_at_old_method_breakpoints =
- $rOpts->{'break-at-old-method-breakpoints'};
- $rOpts_closing_side_comment_else_flag =
- $rOpts->{'closing-side-comment-else-flag'};
- $rOpts_closing_side_comment_maximum_text =
- $rOpts->{'closing-side-comment-maximum-text'};
- $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
- $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
- $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
- $rOpts_indent_columns = $rOpts->{'indent-columns'};
- $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
- $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
- $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
- $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
+ # $sigil =~ /^[\$\&\%\*\@]$/ )
+ if ( $is_sigil{$sigil} ) {
+ $token = $sigil;
+ $token .= $word if ( defined($word) ); # fix c104
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
- $rOpts_variable_maximum_line_length =
- $rOpts->{'variable-maximum-line-length'};
- $rOpts_short_concatenation_item_length =
- $rOpts->{'short-concatenation-item-length'};
+ # Split identifiers with leading arrows, inserting blanks
+ # if necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->'
+ # and 'new' with a possible blank between.
+ #
+ # Note: there is a related patch in sub set_whitespace_flags
+ elsif (length($token) > 2
+ && substr( $token, 0, 2 ) eq '->'
+ && $token =~ /^\-\>(.*)$/
+ && $1 )
+ {
- $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
- $rOpts_ignore_side_comment_lengths =
- $rOpts->{'ignore-side-comment-lengths'};
+ my $token_save = $1;
+ my $type_save = $type;
- # Note that both opening and closing tokens can access the opening
- # and closing flags of their container types.
- %opening_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness'},
- '{' => $rOpts->{'brace-vertical-tightness'},
- '[' => $rOpts->{'square-bracket-vertical-tightness'},
- ')' => $rOpts->{'paren-vertical-tightness'},
- '}' => $rOpts->{'brace-vertical-tightness'},
- ']' => $rOpts->{'square-bracket-vertical-tightness'},
- );
+ # Change '-> new' to '->new'
+ $token_save =~ s/^\s+//g;
- %closing_vertical_tightness = (
- '(' => $rOpts->{'paren-vertical-tightness-closing'},
- '{' => $rOpts->{'brace-vertical-tightness-closing'},
- '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- ')' => $rOpts->{'paren-vertical-tightness-closing'},
- '}' => $rOpts->{'brace-vertical-tightness-closing'},
- ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
- );
+ # store a blank to left of arrow if necessary
+ my $Kprev = $self->K_previous_nonblank($KK);
+ if ( defined($Kprev)
+ && $rLL->[$Kprev]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace
+ && $want_left_space{'->'} == WS_YES )
+ {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
- # assume flag for '>' same as ')' for closing qw quotes
- %closing_token_indentation = (
- ')' => $rOpts->{'closing-paren-indentation'},
- '}' => $rOpts->{'closing-brace-indentation'},
- ']' => $rOpts->{'closing-square-bracket-indentation'},
- '>' => $rOpts->{'closing-paren-indentation'},
- );
+ # then store the arrow
+ my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
+ $store_token->($rcopy);
- # flag indicating if any closing tokens are indented
- $some_closing_token_indentation =
- $rOpts->{'closing-paren-indentation'}
- || $rOpts->{'closing-brace-indentation'}
- || $rOpts->{'closing-square-bracket-indentation'}
- || $rOpts->{'indent-closing-brace'};
+ # store a blank after the arrow if requested
+ # added for issue git #33
+ if ( $want_right_space{'->'} == WS_YES ) {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
- %opening_token_right = (
- '(' => $rOpts->{'opening-paren-right'},
- '{' => $rOpts->{'opening-hash-brace-right'},
- '[' => $rOpts->{'opening-square-bracket-right'},
- );
+ # then reset the current token to be the remainder,
+ # and reset the whitespace flag according to the arrow
+ $token = $rtoken_vars->[_TOKEN_] = $token_save;
+ $type = $rtoken_vars->[_TYPE_] = $type_save;
+ $store_token->($rtoken_vars);
+ next;
+ }
- %stack_opening_token = (
- '(' => $rOpts->{'stack-opening-paren'},
- '{' => $rOpts->{'stack-opening-hash-brace'},
- '[' => $rOpts->{'stack-opening-square-bracket'},
- );
+ # Trim certain spaces in identifiers
+ if ( $type eq 'i' ) {
- %stack_closing_token = (
- ')' => $rOpts->{'stack-closing-paren'},
- '}' => $rOpts->{'stack-closing-hash-brace'},
- ']' => $rOpts->{'stack-closing-square-bracket'},
- );
- $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
- $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
- return;
-}
+ if (
+ (
+ substr( $token, 0, 3 ) eq 'sub'
+ || $rOpts_sub_alias_list
+ )
+ && $token =~ /$SUB_PATTERN/
+ )
+ {
-sub bad_pattern {
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ my $spp = $rOpts->{'space-prototype-paren'};
+ if ( defined($spp) ) {
+ if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
+ elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
+ }
- # See if a pattern will compile. We have to use a string eval here,
- # but it should be safe because the pattern has been constructed
- # by this program.
- my ($pattern) = @_;
- eval "'##'=~/$pattern/";
- return $@;
-}
+ # one space max, and no tabs
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
-{
- my %no_cuddle;
+ # clean up spaces in package identifiers, like
+ # "package Bob::Dog;"
+ elsif ( substr( $token, 0, 7 ) eq 'package'
+ && $token =~ /^package\s/ )
+ {
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # Add keywords here which really should not be cuddled
- BEGIN {
- my @q = qw(if unless for foreach while);
- @no_cuddle{@q} = (1) x scalar(@q);
- }
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ my $ord = ord( substr( $token, -1, 1 ) );
+ if (
- sub prepare_cuddled_block_types {
+ # quick check for possible ending space
+ $ord > 0 && ( $ord < ORD_PRINTABLE_MIN
+ || $ord > ORD_PRINTABLE_MAX )
+ )
+ {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+ }
- # the cuddled-else style, if used, is controlled by a hash that
- # we construct here
+ # handle semicolons
+ elsif ( $type eq ';' ) {
- # Include keywords here which should not be cuddled
+ # Remove unnecessary semicolons, but not after bare
+ # blocks, where it could be unsafe if the brace is
+ # mistokenized.
+ if (
+ $rOpts->{'delete-semicolons'}
+ && (
+ (
+ $last_nonblank_block_type
+ && $last_nonblank_code_type eq '}'
+ && (
+ $is_block_without_semicolon{
+ $last_nonblank_block_type}
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
+ || $last_nonblank_block_type =~ /^\w+:$/
+ )
+ )
+ || $last_nonblank_code_type eq ';'
+ )
+ )
+ {
- my $cuddled_string = "";
- if ( $rOpts->{'cuddled-else'} ) {
+ # This looks like a deletable semicolon, but even if a
+ # semicolon can be deleted it is not necessarily best to do
+ # so. We apply these additional rules for deletion:
+ # - Always ok to delete a ';' at the end of a line
+ # - Never delete a ';' before a '#' because it would
+ # promote it to a block comment.
+ # - If a semicolon is not at the end of line, then only
+ # delete if it is followed by another semicolon or closing
+ # token. This includes the comment rule. It may take
+ # two passes to get to a final state, but it is a little
+ # safer. For example, keep the first semicolon here:
+ # eval { sub bubba { ok(0) }; ok(0) } || ok(1);
+ # It is not required but adds some clarity.
+ my $ok_to_delete = 1;
+ if ( $KK < $Klast ) {
+ my $Kn = $self->K_next_nonblank($KK);
+ if ( defined($Kn) && $Kn <= $Klast ) {
+ my $next_nonblank_token_type =
+ $rLL->[$Kn]->[_TYPE_];
+ $ok_to_delete = $next_nonblank_token_type eq ';'
+ || $next_nonblank_token_type eq '}';
+ }
+ }
- # set the default
- $cuddled_string = 'elsif else continue catch finally'
- unless ( $rOpts->{'cuddled-block-list-exclusive'} );
+ # do not delete only nonblank token in a file
+ else {
+ my $Kp = $self->K_previous_code( undef, $rLL_new );
+ my $Kn = $self->K_next_nonblank($KK);
+ $ok_to_delete = defined($Kn) || defined($Kp);
+ }
- # This is the old equivalent but more complex version
- # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
+ if ($ok_to_delete) {
+ $self->note_deleted_semicolon($input_line_number);
+ next;
+ }
+ else {
+ write_logfile_entry("Extra ';'\n");
+ }
+ }
+ }
- # Add users other blocks to be cuddled
- my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
- if ($cuddled_block_list) {
- $cuddled_string .= " " . $cuddled_block_list;
+ # Old patch to add space to something like "x10".
+ # Note: This is now done in the Tokenizer, but this code remains
+ # for reference.
+ elsif ( $type eq 'n' ) {
+ if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
+ $token =~ s/x/x /;
+ $rtoken_vars->[_TOKEN_] = $token;
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
+EOM
+ }
+ }
}
- }
+ # check for a qw quote
+ elsif ( $type eq 'q' ) {
- # If we have a cuddled string of the form
- # 'try-catch-finally'
+ # trim blanks from right of qw quotes
+ # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
+ # this)
+ $token =~ s/\s*$//;
+ $rtoken_vars->[_TOKEN_] = $token;
+ $self->note_embedded_tab($input_line_number)
+ if ( $token =~ "\t" );
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
+ next;
+ } ## end if ( $type eq 'q' )
- # we want to prepare a hash of the form
+ # change 'LABEL :' to 'LABEL:'
+ elsif ( $type eq 'J' ) {
+ $token =~ s/\s+//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ # check a quote for problems
+ elsif ( $type eq 'Q' ) {
+ $check_Q->( $KK, $Kfirst, $input_line_number );
+ }
- # use -dcbl to dump this hash
+ # Store this token with possible previous blank
+ $store_token_and_space->(
+ $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
+ );
- # Multiple such strings are input as a space or comma separated list
+ } # End token loop
+ } # End line loop
- # If we get two lists with the same leading type, such as
- # -cbl = "-try-catch-finally -try-catch-otherwise"
- # then they will get merged as follows:
- # $rcuddled_block_types = {
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 2,
- # 'otherwise' => 1,
- # },
- # };
- # This will allow either type of chain to be followed.
+ # Walk backwards through the tokens, making forward links to sequence items.
+ if ( @{$rLL_new} ) {
+ my $KNEXT;
+ for ( my $KK = @{$rLL_new} - 1 ; $KK >= 0 ; $KK-- ) {
+ $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT;
+ if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK }
+ }
+ $self->[_K_first_seq_item_] = $KNEXT;
+ }
- $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
- my @cuddled_strings = split /\s+/, $cuddled_string;
+ # Find and remember lists by sequence number
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+ my $K_opening = $K_opening_container->{$seqno};
+ next unless defined($K_opening);
- $rcuddled_block_types = {};
+ # code errors may leave undefined closing tokens
+ my $K_closing = $K_closing_container->{$seqno};
+ next unless defined($K_closing);
+
+ my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
+ my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
+ my $line_diff = $lx_close - $lx_open;
+ $ris_broken_container->{$seqno} = $line_diff;
+
+ # See if this is a list
+ my $is_list;
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ if ($rtype_count) {
+ my $comma_count = $rtype_count->{','};
+ my $fat_comma_count = $rtype_count->{'=>'};
+ my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'};
+
+ # We will define a list to be a container with one or more commas
+ # and no semicolons. Note that we have included the semicolons
+ # in a 'for' container in the simicolon count to keep c-style for
+ # statements from being formatted as lists.
+ if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) {
+ $is_list = 1;
+
+ # We need to do one more check for a perenthesized list:
+ # At an opening paren following certain tokens, such as 'if',
+ # we do not want to format the contents as a list.
+ if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) {
+ my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
+ if ( defined($Kp) ) {
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+ if ( $type_p eq 'k' ) {
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ $is_list = 0 if ( $is_nonlist_keyword{$token_p} );
+ }
+ else {
+ $is_list = 0 if ( $is_nonlist_type{$type_p} );
+ }
+ }
+ }
+ }
+ }
- # process each dash-separated string...
- my $string_count = 0;
- foreach my $string (@cuddled_strings) {
- next unless $string;
- my @words = split /-+/, $string; # allow multiple dashes
+ # Look for a block brace marked as uncertain. If the tokenizer thinks
+ # its guess is uncertain for the type of a brace following an unknown
+ # bareword then it adds a trailing space as a signal. We can fix the
+ # type here now that we have had a better look at the contents of the
+ # container. This fixes case b1085. To find the corresponding code in
+ # Tokenizer.pm search for 'b1085' with an editor.
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ( $block_type && substr( $block_type, -1, 1 ) eq ' ' ) {
- # we could look for and report possible errors here...
- next unless ( @words > 0 );
+ # Always remove the trailing space
+ $block_type =~ s/\s+$//;
- # allow either '-continue' or *-continue' for arbitrary starting type
- my $start = '*';
+ # Try to filter out parenless sub calls
+ my ( $Knn1, $Knn2 );
+ my ( $type_nn1, $type_nn2 ) = ( 'b', 'b' );
+ $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
+ $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ) if defined($Knn1);
+ $type_nn1 = $rLL_new->[$Knn1]->[_TYPE_] if ( defined($Knn1) );
+ $type_nn2 = $rLL_new->[$Knn2]->[_TYPE_] if ( defined($Knn2) );
- # a single word without dashes is a secondary block type
- if ( @words > 1 ) {
- $start = shift @words;
+ # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
+ if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
+ $is_list = 0;
}
- # always make an entry for the leading word. If none follow, this
- # will still prevent a wildcard from matching this word.
- if ( !defined( $rcuddled_block_types->{$start} ) ) {
- $rcuddled_block_types->{$start} = {};
- }
+ # Convert to a hash brace if it looks like it holds a list
+ if ($is_list) {
- # The count gives the original word order in case we ever want it.
- $string_count++;
- my $word_count = 0;
- foreach my $word (@words) {
- next unless $word;
- if ( $no_cuddle{$word} ) {
- Warn(
-"## Ignoring keyword '$word' in -cbl; does not seem right\n"
- );
- next;
- }
- $word_count++;
- $rcuddled_block_types->{$start}->{$word} =
- 1; #"$string_count.$word_count";
+ $block_type = "";
- # git#9: Remove this word from the list of desired one-line
- # blocks
- $want_one_line_block{$word} = 0;
+ $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1;
+ $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1;
}
+
+ $rblock_type_of_seqno->{$seqno} = $block_type;
}
- return;
- }
-}
-sub dump_cuddled_block_list {
- my ($fh) = @_;
+ # Handle a list container
+ if ( $is_list && !$block_type ) {
+ $ris_list_by_seqno->{$seqno} = $seqno;
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ my $depth = 0;
+ while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+ $depth++;
- # ORIGINAL METHOD: Here is the format of the cuddled block type hash
- # which controls this routine
- # my $rcuddled_block_types = {
- # 'if' => {
- # 'else' => 1,
- # 'elsif' => 1
- # },
- # 'try' => {
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ # for $rhas_list we need to save the minimum depth
+ if ( !$rhas_list->{$seqno_parent}
+ || $rhas_list->{$seqno_parent} > $depth )
+ {
+ $rhas_list->{$seqno_parent} = $depth;
+ }
- # SIMPLFIED METHOD: the simplified method uses a wildcard for
- # the starting block type and puts all cuddled blocks together:
- # my $rcuddled_block_types = {
- # '*' => {
- # 'else' => 1,
- # 'elsif' => 1
- # 'catch' => 1,
- # 'finally' => 1
- # },
- # };
+ if ($line_diff) {
+ $rhas_broken_list->{$seqno_parent} = 1;
- # Both methods work, but the simplified method has proven to be adequate and
- # easier to manage.
+ # Patch1: We need to mark broken lists with non-terminal
+ # line-ending commas for the -bbx=2 parameter. This insures
+ # that the list will stay broken. Otherwise the flag
+ # -bbx=2 can be unstable. This fixes case b789 and b938.
- my $cuddled_string = $rOpts->{'cuddled-block-list'};
- $cuddled_string = '' unless $cuddled_string;
+ # Patch2: Updated to also require either one fat comma or
+ # one more line-ending comma. Fixes cases b1069 b1070
+ # b1072 b1076.
+ if (
+ $rlec_count_by_seqno->{$seqno}
+ && ( $rlec_count_by_seqno->{$seqno} > 1
+ || $rtype_count_by_seqno->{$seqno}->{'=>'} )
+ )
+ {
+ $rhas_broken_list_with_lec->{$seqno_parent} = 1;
+ }
+ }
+ $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+ }
+ }
- my $flags = "";
- $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
- $flags .= " -cbl='$cuddled_string'";
+ # Handle code blocks ...
+ # The -lp option needs to know if a container holds a code block
+ elsif ( $block_type && $rOpts_line_up_parentheses ) {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+ $rhas_code_block->{$seqno_parent} = 1;
+ $rhas_broken_code_block->{$seqno_parent} = $line_diff;
+ $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+ }
+ }
+ }
- unless ( $rOpts->{'cuddled-else'} ) {
- $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
+ # Find containers with ternaries, needed for -lp formatting.
+ foreach my $seqno ( keys %{$K_opening_ternary} ) {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) {
+ $rhas_ternary->{$seqno_parent} = 1;
+ $seqno_parent = $rparent_of_seqno->{$seqno_parent};
+ }
}
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-Hash of cuddled block types prepared for a run with these parameters:
- $flags
-------------------------------------------------------------------------
-EOM
+ # Turn off -lp for containers with here-docs with text within a container,
+ # since they have their own fixed indentation. Fixes case b1081.
+ if ($rOpts_line_up_parentheses) {
+ foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
+ my $Kh = $K_first_here_doc_by_seqno{$seqno};
+ my $Kc = $K_closing_container->{$seqno};
+ my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
+ my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
+ next if ( $line_Kh == $line_Kc );
+ $ris_excluded_lp_container->{$seqno} = 1;
+ }
+ }
- use Data::Dumper;
- $fh->print( Dumper($rcuddled_block_types) );
+ # Set a flag to turn off -cab=3 in complex structures. Otherwise,
+ # instability can occur. When it is overridden the behavior of the closest
+ # match, -cab=2, will be used instead. This fixes cases b1096 b1113.
+ if ( $rOpts_comma_arrow_breakpoints == 3 ) {
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ next unless ( $rtype_count && $rtype_count->{'=>'} );
+
+ # override -cab=3 if this contains a sub-list
+ if ( $rhas_list->{$seqno} ) {
+ $roverride_cab3->{$seqno} = 1;
+ }
+
+ # or if this is a sub-list of its parent container
+ else {
+ my $seqno_parent = $rparent_of_seqno->{$seqno};
+ if ( defined($seqno_parent)
+ && $ris_list_by_seqno->{$seqno_parent} )
+ {
+ $roverride_cab3->{$seqno} = 1;
+ }
+ }
+ }
+ }
+
+ # Reset memory to be the new array
+ $self->[_rLL_] = $rLL_new;
+ my $Klimit;
+ if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
+ $self->[_Klimit_] = $Klimit;
+
+ # During development, verify that the new array still looks okay.
+ DEVEL_MODE && $self->check_token_array();
+
+ # reset the token limits of each line
+ $self->resync_lines_and_tokens();
- $fh->print(<<EOM);
-------------------------------------------------------------------------
-EOM
return;
}
-sub make_static_block_comment_pattern {
-
- # create the pattern used to identify static block comments
- $static_block_comment_pattern = '^\s*##';
+sub copy_token_as_type {
- # allow the user to change it
- if ( $rOpts->{'static-block-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-block-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = $prefix;
+ # This provides a quick way to create a new token by
+ # slightly modifying an existing token.
+ my ( $rold_token, $type, $token ) = @_;
+ if ( $type eq 'b' ) {
+ $token = " " unless defined($token);
+ }
+ elsif ( $type eq 'q' ) {
+ $token = '' unless defined($token);
+ }
+ elsif ( $type eq '->' ) {
+ $token = '->' unless defined($token);
+ }
+ elsif ( $type eq ';' ) {
+ $token = ';' unless defined($token);
+ }
+ else {
- # user may give leading caret to force matching left comments only
- if ( $prefix !~ /^\^#/ ) {
- if ( $prefix !~ /^#/ ) {
- Die(
-"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
- );
- }
- $pattern = '^\s*' . $prefix;
+ # Unexpected type ... this sub will work as long as both $token and
+ # $type are defined, but we should catch any unexpected types during
+ # development.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
+EOM
}
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
+ else {
+ # shouldn't happen
}
- $static_block_comment_pattern = $pattern;
}
- return;
+
+ my @rnew_token = @{$rold_token};
+ $rnew_token[_TYPE_] = $type;
+ $rnew_token[_TOKEN_] = $token;
+ $rnew_token[_TYPE_SEQUENCE_] = '';
+ return \@rnew_token;
}
-sub make_format_skipping_pattern {
- my ( $opt_name, $default ) = @_;
- my $param = $rOpts->{$opt_name};
- unless ($param) { $param = $default }
- $param =~ s/^\s*//;
- if ( $param !~ /^#/ ) {
- Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
- }
- my $pattern = '^' . $param . '\s';
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
- );
+sub Debug_dump_tokens {
+
+ # a debug routine, not normally used
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $nvars = @{$rLL};
+ print STDERR "$msg\n";
+ print STDERR "ntokens=$nvars\n";
+ print STDERR "K\t_TOKEN_\t_TYPE_\n";
+ my $K = 0;
+
+ foreach my $item ( @{$rLL} ) {
+ print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
+ $K++;
}
- return $pattern;
+ return;
}
-sub make_closing_side_comment_list_pattern {
+sub K_next_code {
+ my ( $self, $KK, $rLL ) = @_;
- # turn any input list into a regex for recognizing selected block types
- $closing_side_comment_list_pattern = '^\w+';
- if ( defined( $rOpts->{'closing-side-comment-list'} )
- && $rOpts->{'closing-side-comment-list'} )
- {
- $closing_side_comment_list_pattern =
- make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
+ # return the index K of the next nonblank, non-comment token
+ return unless ( defined($KK) && $KK >= 0 );
+
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
+
+ # We seem to have encountered a gap in our array.
+ # This shouldn't happen because sub write_line() pushed
+ # items into the $rLL array.
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Knnb]->[_TYPE_] ne '#' )
+ {
+ return $Knnb;
+ }
+ $Knnb++;
}
return;
}
-sub make_sub_matching_pattern {
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
- $SUB_PATTERN = '^sub\s+(::|\w)';
- $ASUB_PATTERN = '^sub$';
+ # return the index K of the next nonblank token, or
+ # return undef if none
+ return unless ( defined($KK) && $KK >= 0 );
- if ( $rOpts->{'sub-alias-list'} ) {
+ # The third arg allows this routine to be used on any array. This is
+ # useful in sub respace_tokens when we are copying tokens from an old $rLL
+ # to a new $rLL array. But usually the third arg will not be given and we
+ # will just use the $rLL array in $self.
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ return unless ( $Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+ return unless ( ++$Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. Very unlikely to get here; it means we have neighboring
+ # blanks in the token stream.
+ $Knnb++;
+ while ( $Knnb < $Num ) {
- # Note that any 'sub-alias-list' has been preprocessed to
- # be a trimmed, space-separated list which includes 'sub'
- # for example, it might be 'sub method fun'
- my $sub_alias_list = $rOpts->{'sub-alias-list'};
- $sub_alias_list =~ s/\s+/\|/g;
- $SUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
- $ASUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
+ # Safety check, this fault shouldn't happen: The $rLL array is the
+ # main array of tokens, so all entries should be used. It is
+ # initialized in sub write_line, and then re-initialized by sub
+ # $store_token() within sub respace_tokens. Tokens are pushed on
+ # so there shouldn't be any gaps.
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
}
return;
}
-sub make_bli_pattern {
+sub K_previous_code {
- if ( defined( $rOpts->{'brace-left-and-indent-list'} )
- && $rOpts->{'brace-left-and-indent-list'} )
- {
- $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
+ # return the index K of the previous nonblank, non-comment token
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
+
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
+
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ ) if (DEVEL_MODE);
+ return;
}
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
+ {
+ return $Kpnb;
+ }
+ $Kpnb--;
+ }
+ return;
+}
- $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
+sub K_previous_nonblank {
+
+ # return index of previous nonblank token before item K;
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
+
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
+
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ ) if (DEVEL_MODE);
+ return;
+ }
+ my $Kpnb = $KK - 1;
+ return unless ( $Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+ return unless ( --$Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. We should not get here unless some routine
+ # slipped repeated blanks into the token stream.
+ return unless ( --$Kpnb >= 0 );
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
return;
}
-sub make_keyword_group_list_pattern {
+sub parent_seqno_by_K {
- # turn any input list into a regex for recognizing selected block types.
- # Here are the defaults:
- $keyword_group_list_pattern = '^(our|local|my|use|require|)$';
- $keyword_group_list_comment_pattern = '';
- if ( defined( $rOpts->{'keyword-group-blanks-list'} )
- && $rOpts->{'keyword-group-blanks-list'} )
- {
- my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
- my @keyword_list;
- my @comment_list;
- foreach my $word (@words) {
- if ( $word =~ /^(BC|SBC)$/ ) {
- push @comment_list, $word;
- if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
+ # Return the sequence number of the parent container of token K, if any.
+
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+
+ # The task is to jump forward to the next container token
+ # and use the sequence number of either it or its parent.
+
+ # For example, consider the following with seqno=5 of the '[' and ']'
+ # being called with index K of the first token of each line:
+
+ # # result
+ # push @tests, # -
+ # [ # -
+ # sub { 99 }, 'do {&{%s} for 1,2}', # 5
+ # '(&{})(&{})', undef, # 5
+ # [ 2, 2, 0 ], 0 # 5
+ # ]; # -
+
+ # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
+ # unbalanced files, last sequence number will either be undefined or it may
+ # be at a deeper level. In either case we will just return SEQ_ROOT to
+ # have a defined value and allow formatting to proceed.
+ my $parent_seqno = SEQ_ROOT;
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ }
+ else {
+ my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type = $rLL->[$Kt]->[_TYPE_];
+
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
+ $parent_seqno = $type_sequence;
}
+
+ # otherwise we want its parent container
else {
- push @keyword_list, $word;
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
}
- $keyword_group_list_pattern =
- make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
- $keyword_group_list_comment_pattern =
- make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
}
+ $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
+ return $parent_seqno;
+}
+
+sub is_in_block_by_i {
+ my ( $self, $i ) = @_;
+
+ # returns true if
+ # token at i is contained in a BLOCK
+ # or is at root level
+ # or there is some kind of error (i.e. unbalanced file)
+ # returns false otherwise
+ return 1 if ( $i < 0 ); # shouldn't happen, bad call
+ my $seqno = $parent_seqno_to_go[$i];
+ return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
+ return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
return;
}
-sub make_block_brace_vertical_tightness_pattern {
+sub is_in_list_by_i {
+ my ( $self, $i ) = @_;
- # turn any input list into a regex for recognizing selected block types
- $block_brace_vertical_tightness_pattern =
- '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
- if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
- && $rOpts->{'block-brace-vertical-tightness-list'} )
- {
- $block_brace_vertical_tightness_pattern =
- make_block_pattern( '-bbvtl',
- $rOpts->{'block-brace-vertical-tightness-list'} );
+ # returns true if token at i is contained in a LIST
+ # returns false otherwise
+ my $seqno = $parent_seqno_to_go[$i];
+ return unless ( $seqno && $seqno ne SEQ_ROOT );
+ if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
+ return 1;
}
return;
}
-sub make_blank_line_pattern {
+sub is_list_by_K {
- $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
- my $key = 'blank-lines-before-closing-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_before_closing_block_pattern =
- make_block_pattern( '-blbcl', $rOpts->{$key} );
- }
+ # Return true if token K is in a list
+ my ( $self, $KK ) = @_;
- $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
- $key = 'blank-lines-after-opening-block-list';
- if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
- $blank_lines_after_opening_block_pattern =
- make_block_pattern( '-blaol', $rOpts->{$key} );
- }
- return;
+ my $parent_seqno = $self->parent_seqno_by_K($KK);
+ return unless defined($parent_seqno);
+ return $self->[_ris_list_by_seqno_]->{$parent_seqno};
}
-sub make_block_pattern {
+sub is_list_by_seqno {
- # given a string of block-type keywords, return a regex to match them
- # The only tricky part is that labels are indicated with a single ':'
- # and the 'sub' token text may have additional text after it (name of
- # sub).
- #
- # Example:
- #
- # input string: "if else elsif unless while for foreach do : sub";
- # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ # Return true if the immediate contents of a container appears to be a
+ # list.
+ my ( $self, $seqno ) = @_;
+ return unless defined($seqno);
+ return $self->[_ris_list_by_seqno_]->{$seqno};
+}
- # Minor Update:
- #
- # To distinguish between anonymous subs and named subs, use 'sub' to
- # indicate a named sub, and 'asub' to indicate an anonymous sub
+sub resync_lines_and_tokens {
- my ( $abbrev, $string ) = @_;
- my @list = split_words($string);
- my @words = ();
- my %seen;
- for my $i (@list) {
- if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
- next if $seen{$i};
- $seen{$i} = 1;
- if ( $i eq 'sub' ) {
- }
- elsif ( $i eq 'asub' ) {
- }
- elsif ( $i eq ';' ) {
- push @words, ';';
- }
- elsif ( $i eq '{' ) {
- push @words, '\{';
- }
- elsif ( $i eq ':' ) {
- push @words, '\w+:';
- }
- elsif ( $i =~ /^\w/ ) {
- push @words, $i;
- }
- else {
- Warn("unrecognized block type $i after $abbrev, ignoring\n");
+ my $self = shift;
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my @Krange_code_without_comments;
+ my @Klast_valign_code;
+
+ # Re-construct the arrays of tokens associated with the original input lines
+ # since they have probably changed due to inserting and deleting blanks
+ # and a few other tokens.
+
+ # This is the next token and its line index:
+ my $Knext = 0;
+ my $Kmax = defined($Klimit) ? $Klimit : -1;
+
+ # Verify that old line indexes are in still order. If this error occurs,
+ # check locations where sub 'respace_tokens' creates new tokens (like
+ # blank spaces). It must have set a bad old line index.
+ if ( DEVEL_MODE && defined($Klimit) ) {
+ my $iline = $rLL->[0]->[_LINE_INDEX_];
+ for ( my $KK = 1 ; $KK <= $Klimit ; $KK++ ) {
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline < $iline_last ) {
+ my $KK_m = $KK - 1;
+ my $token_m = $rLL->[$KK_m]->[_TOKEN_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $type_m = $rLL->[$KK_m]->[_TYPE_];
+ my $type = $rLL->[$KK]->[_TYPE_];
+ Fault(<<EOM);
+Line indexes out of order at index K=$KK:
+at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
+at KK =$KK: old line=$iline, type='$type', token='$token',
+EOM
+ }
}
}
- my $pattern = '(' . join( '|', @words ) . ')$';
- my $sub_patterns = "";
- if ( $seen{'sub'} ) {
- $sub_patterns .= '|' . $SUB_PATTERN;
- }
- if ( $seen{'asub'} ) {
- $sub_patterns .= '|' . $ASUB_PATTERN;
- }
- if ($sub_patterns) {
- $pattern = '(' . $pattern . $sub_patterns . ')';
- }
- $pattern = '^' . $pattern;
- return $pattern;
-}
-sub make_static_side_comment_pattern {
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type eq 'CODE' ) {
- # create the pattern used to identify static side comments
- $static_side_comment_pattern = '^##';
+ # Get the old number of tokens on this line
+ my $rK_range_old = $line_of_tokens->{_rK_range};
+ my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
+ my $Kdiff_old = 0;
+ if ( defined($Kfirst_old) ) {
+ $Kdiff_old = $Klast_old - $Kfirst_old;
+ }
- # allow the user to change it
- if ( $rOpts->{'static-side-comment-prefix'} ) {
- my $prefix = $rOpts->{'static-side-comment-prefix'};
- $prefix =~ s/^\s*//;
- my $pattern = '^' . $prefix;
- if ( bad_pattern($pattern) ) {
- Die(
-"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
- );
- }
- $static_side_comment_pattern = $pattern;
- }
- return;
-}
+ # Find the range of NEW K indexes for the line:
+ # $Kfirst = index of first token on line
+ # $Klast = index of last token on line
+ my ( $Kfirst, $Klast );
-sub make_closing_side_comment_prefix {
+ my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
+
+ # Optimization: Although the actual K indexes may be completely
+ # changed after respacing, the number of tokens on any given line
+ # will often be nearly unchanged. So we will see if we can start
+ # our search by guessing that the new line has the same number
+ # of tokens as the old line.
+ my $Knext_guess = $Knext + $Kdiff_old;
+ if ( $Knext_guess > $Knext
+ && $Knext_guess < $Kmax
+ && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
+ {
- # Be sure we have a valid closing side comment prefix
- my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
- my $csc_prefix_pattern;
- if ( !defined($csc_prefix) ) {
- $csc_prefix = '## end';
- $csc_prefix_pattern = '^##\s+end';
- }
- else {
- my $test_csc_prefix = $csc_prefix;
- if ( $test_csc_prefix !~ /^#/ ) {
- $test_csc_prefix = '#' . $test_csc_prefix;
- }
+ # the guess is good, so we can start our search here
+ $Knext = $Knext_guess + 1;
+ }
- # make a regex to recognize the prefix
- my $test_csc_prefix_pattern = $test_csc_prefix;
+ while ($Knext <= $Kmax
+ && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
+ {
+ $Knext++;
+ }
- # escape any special characters
- $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
+ if ( $Knext > $Knext_beg ) {
- $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
+ $Klast = $Knext - 1;
- # allow exact number of intermediate spaces to vary
- $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
+ # Delete any terminal blank token
+ if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
- # make sure we have a good pattern
- # if we fail this we probably have an error in escaping
- # characters.
+ if ( $Klast < $Knext_beg ) {
+ $Klast = undef;
+ }
+ else {
- if ( bad_pattern($test_csc_prefix_pattern) ) {
+ $Kfirst = $Knext_beg;
- # shouldn't happen..must have screwed up escaping, above
- report_definite_bug();
- Warn(
-"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
- );
+ # Save ranges of non-comment code. This will be used by
+ # sub keep_old_line_breaks.
+ if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
+ push @Krange_code_without_comments, [ $Kfirst, $Klast ];
+ }
- # just warn and keep going with defaults
- Warn("Please consider using a simpler -cscp prefix\n");
- Warn("Using default -cscp instead; please check output\n");
- }
- else {
- $csc_prefix = $test_csc_prefix;
- $csc_prefix_pattern = $test_csc_prefix_pattern;
+ # Only save ending K indexes of code types which are blank
+ # or 'VER'. These will be used for a convergence check.
+ # See related code in sub 'convey_batch_to_vertical_aligner'
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type
+ || $CODE_type eq 'VER' )
+ {
+ push @Klast_valign_code, $Klast;
+ }
+ }
+ }
+
+ # It is only safe to trim the actual line text if the input
+ # line had a terminal blank token. Otherwise, we may be
+ # in a quote.
+ if ( $line_of_tokens->{_ended_in_blank_token} ) {
+ $line_of_tokens->{_line_text} =~ s/\s+$//;
+ }
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+
+ # Deleting semicolons can create new empty code lines
+ # which should be marked as blank
+ if ( !defined($Kfirst) ) {
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type ) {
+ $line_of_tokens->{_code_type} = 'BL';
+ }
+ }
}
}
- $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
- $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
- return;
-}
-sub dump_want_left_space {
- my $fh = shift;
- local $" = "\n";
- print $fh <<EOM;
-These values are the main control of whitespace to the left of a token type;
-They may be altered with the -wls parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its left
--1 means the token does not want a space to its left
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_left_space ) {
- print $fh "$key\t$want_left_space{$key}\n";
+ # There shouldn't be any nodes beyond the last one. This routine is
+ # relinking lines and tokens after the tokens have been respaced. A fault
+ # here indicates some kind of bug has been introduced into the above loops.
+ # There is not good way to keep going; we better stop here.
+ # FIXME: This will produce zero output. it would be best to find a way to
+ # dump the input file.
+ if ( $Knext <= $Kmax ) {
+
+ Fault("unexpected tokens at end of file when reconstructing lines");
}
- return;
-}
+ $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
+
+ # Setup the convergence test in the FileWriter based on line-ending indexes
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->setup_convergence_test( \@Klast_valign_code );
+
+ # Mark essential old breakpoints if combination -iob -lp is used. These
+ # two options do not work well together, but we can avoid turning -iob off
+ # by ignoring -iob at certain essential line breaks.
+ # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
+ if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
+ my %is_assignment_or_fat_comma = %is_assignment;
+ $is_assignment_or_fat_comma{'=>'} = 1;
+ my $ris_essential_old_breakpoint =
+ $self->[_ris_essential_old_breakpoint_];
+ my $iline = -1;
+ my ( $Kfirst, $Klast );
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type ne 'CODE' ) {
+ ( $Kfirst, $Klast ) = ( undef, undef );
+ next;
+ }
+ my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
+ ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
-sub dump_want_right_space {
- my $fh = shift;
- local $" = "\n";
- print $fh <<EOM;
-These values are the main control of whitespace to the right of a token type;
-They may be altered with the -wrs parameter.
-For a list of token types, use perltidy --dump-token-types (-dtt)
- 1 means the token wants a space to its right
--1 means the token does not want a space to its right
-------------------------------------------------------------------------
-EOM
- foreach my $key ( sort keys %want_right_space ) {
- print $fh "$key\t$want_right_space{$key}\n";
+ next unless defined($Klast_prev);
+ next unless defined($Kfirst);
+ my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
+ my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
+ next
+ unless ( $is_assignment_or_fat_comma{$type_last}
+ || $is_assignment_or_fat_comma{$type_first} );
+ $ris_essential_old_breakpoint->{$Klast_prev} = 1;
+ }
}
return;
}
-{ # begin is_essential_whitespace
+sub keep_old_line_breaks {
- my %is_sort_grep_map;
- my %is_for_foreach;
+ # Called once per file to find and mark any old line breaks which
+ # should be kept. We will be translating the input hashes into
+ # token indexes.
- BEGIN {
+ # A flag is set as follows:
+ # = 1 make a hard break (flush the current batch)
+ # best for something like leading commas (-kbb=',')
+ # = 2 make a soft break (keep building current batch)
+ # best for something like leading ->
- my @q;
- @q = qw(sort grep map);
- @is_sort_grep_map{@q} = (1) x scalar(@q);
+ my ($self) = @_;
- @q = qw(for foreach);
- @is_for_foreach{@q} = (1) x scalar(@q);
+ my $rLL = $self->[_rLL_];
+ my $rKrange_code_without_comments =
+ $self->[_rKrange_code_without_comments_];
+ my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
+ my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
+ my $rwant_container_open = $self->[_rwant_container_open_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+
+ # This code moved here from sub break_lists to fix b1120
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
+ my $token = $rLL->[$Kfirst]->[_TOKEN_];
+
+ # leading '->' use a value of 2 which causes a soft
+ # break rather than a hard break
+ if ( $type eq '->' ) {
+ $rbreak_before_Kfirst->{$Kfirst} = 2;
+ }
- }
+ # leading ')->' use a special flag to insure that both
+ # opening and closing parens get opened
+ # Fix for b1120: only for parens, not braces
+ elsif ( $token eq ')' ) {
+ my $Kn = $self->K_next_nonblank($Kfirst);
+ next
+ unless ( defined($Kn)
+ && $Kn <= $Klast
+ && $rLL->[$Kn]->[_TYPE_] eq '->' );
+ my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
+ next unless ($seqno);
- sub is_essential_whitespace {
+ # Note: in previous versions there was a fix here to avoid
+ # instability between conflicting -bom and -pvt or -pvtc flags.
+ # The fix skipped -bom for a small line difference. But this
+ # was troublesome, and instead the fix has been moved to
+ # sub set_vertical_tightness_flags where priority is given to
+ # the -bom flag over -pvt and -pvtc flags. Both opening and
+ # closing paren flags are involved because even though -bom only
+ # requests breaking before the closing paren, automated logic
+ # opens the opening paren when the closing paren opens.
+ # Relevant cases are b977, b1215, b1270, b1303
- # Essential whitespace means whitespace which cannot be safely deleted
- # without risking the introduction of a syntax error.
- # We are given three tokens and their types:
- # ($tokenl, $typel) is the token to the left of the space in question
- # ($tokenr, $typer) is the token to the right of the space in question
- # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
- #
- # This is a slow routine but is not needed too often except when -mangle
- # is used.
- #
- # Note: This routine should almost never need to be changed. It is
- # for avoiding syntax problems rather than for formatting.
- my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
+ $rwant_container_open->{$seqno} = 1;
+ }
+ }
+ }
- my $result =
+ return unless ( %keep_break_before_type || %keep_break_after_type );
- # never combine two bare words or numbers
- # examples: and ::ok(1)
- # return ::spw(...)
- # for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- # $input eq"quit" to make $inputeq"quit"
- # my $size=-s::SINK if $file; <==OK but we won't do it
- # don't join something like: for bla::bla:: abc
- # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
- && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
+ my $check_for_break = sub {
+ my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- # do not combine a number with a concatenation dot
- # example: pom.caputo:
- # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
- || ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
- || ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
+ # non-container tokens use the type as the key
+ if ( !$seqno ) {
+ my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $rkeep_break_hash->{$type} ) {
+ $rbreak_hash->{$KK} = 1;
+ }
+ }
- # do not join a minus with a bare word, because you might form
- # a file test operator. Example from Complex.pm:
- # if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
- || ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
+ # container tokens use the token as the key
+ else {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $flag = $rkeep_break_hash->{$token};
+ if ($flag) {
- # do not join a bare word with a minus, like between 'Send' and
- # '-recipients' here <<snippets/space3.in>>
- # my $msg = new Fax::Send
- # -recipients => $to,
- # -data => $data;
- # This is the safest thing to do. If we had the token to the right of
- # the minus we could do a better check.
- || ( ( $tokenr eq '-' ) && ( $typel eq 'w' ) )
+ my $match = $flag eq '1' || $flag eq '*';
- # and something like this could become ambiguous without space
- # after the '-':
- # use constant III=>1;
- # $a = $b - III;
- # and even this:
- # $a = - III;
- || ( ( $tokenl eq '-' )
- && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
+ # check for special matching codes
+ if ( !$match ) {
+ if ( $token eq '(' || $token eq ')' ) {
+ $match = $self->match_paren_flag( $KK, $flag );
+ }
+ elsif ( $token eq '{' || $token eq '}' ) {
+
+ # These tentative codes 'b' and 'B' for brace types are
+ # placeholders for possible future brace types. They
+ # are not documented and may be changed.
+ my $block_type =
+ $self->[_rblock_type_of_seqno_]->{$seqno};
+ if ( $flag eq 'b' ) { $match = $block_type }
+ elsif ( $flag eq 'B' ) { $match = !$block_type }
+ else {
+ # unknown code - no match
+ }
+ }
+ }
+ $rbreak_hash->{$KK} = 1 if ($match);
+ }
+ }
+ };
- # '= -' should not become =- or you will get a warning
- # about reversed -=
- # || ($tokenr eq '-')
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ $check_for_break->(
+ $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
+ );
+ $check_for_break->(
+ $Klast, \%keep_break_after_type, $rbreak_after_Klast
+ );
+ }
+ return;
+}
- # keep a space between a quote and a bareword to prevent the
- # bareword from becoming a quote modifier.
- || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+sub weld_containers {
- # keep a space between a token ending in '$' and any word;
- # this caused trouble: "die @$ if $@"
- || ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
- && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+ # Called once per file to do any welding operations requested by --weld*
+ # flags.
+ my ($self) = @_;
- # perl is very fussy about spaces before <<
- || ( $tokenr =~ /^\<\</ )
+ # This count is used to eliminate needless calls for weld checks elsewere
+ $total_weld_count = 0;
- # avoid combining tokens to create new meanings. Example:
- # $a+ +$b must not become $a++$b
- || ( $is_digraph{ $tokenl . $tokenr } )
- || ( $is_trigraph{ $tokenl . $tokenr } )
+ return if ( $rOpts->{'indent-only'} );
+ return unless ($rOpts_add_newlines);
- # another example: do not combine these two &'s:
- # allow_options & &OPT_EXECCGI
- || ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
+ # Important: sub 'weld_cuddled_blocks' must be called before
+ # sub 'weld_nested_containers'. This is because the cuddled option needs to
+ # use the original _LEVEL_ values of containers, but the weld nested
+ # containers changes _LEVEL_ of welded containers.
- # don't combine $$ or $# with any alphanumeric
- # (testfile mangle.t with --mangle)
- || ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
+ # Here is a good test case to be sure that both cuddling and welding
+ # are working and not interfering with each other: <<snippets/ce_wn1.in>>
- # retain any space after possible filehandle
- # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
- || ( $typel eq 'Z' )
+ # perltidy -wn -ce
- # Perl is sensitive to whitespace after the + here:
- # $b = xvals $a + 0.1 * yvals $a;
- || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
- # keep paren separate in 'use Foo::Bar ()'
- || ( $tokenr eq '('
- && $typel eq 'w'
- && $typell eq 'k'
- && $tokenll eq 'use' )
+ $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
- # keep any space between filehandle and paren:
- # file mangle.t with --mangle:
- || ( $typel eq 'Y' && $tokenr eq '(' )
+ if ( $rOpts->{'weld-nested-containers'} ) {
- # retain any space after here doc operator ( hereerr.t)
- || ( $typel eq 'h' )
+ $self->weld_nested_containers();
- # be careful with a space around ++ and --, to avoid ambiguity as to
- # which token it applies
- || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
- || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
+ $self->weld_nested_quotes();
+ }
- # need space after foreach my; for example, this will fail in
- # older versions of Perl:
- # foreach my$ft(@filetypes)...
- || (
- $tokenl eq 'my'
+ #-------------------------------------------------------------
+ # All welding is done. Finish setting up weld data structures.
+ #-------------------------------------------------------------
- # /^(for|foreach)$/
- && $is_for_foreach{$tokenll}
- && $tokenr =~ /^\$/
- )
+ my $rLL = $self->[_rLL_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
+
+ my @K_multi_weld;
+ my @keys = keys %{$rK_weld_right};
+ $total_weld_count = @keys;
+
+ # First pass to process binary welds.
+ # This loop is processed in unsorted order for efficiency.
+ foreach my $Kstart (@keys) {
+ my $Kend = $rK_weld_right->{$Kstart};
+
+ # An error here would be due to an incorrect initialization introduced
+ # in one of the above weld routines, like sub weld_nested.
+ if ( $Kend <= $Kstart ) {
+ Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
+ if (DEVEL_MODE);
+ next;
+ }
- # must have space between grep and left paren; "grep(" will fail
- || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
+ # Set weld values for all tokens this welded pair
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
- # don't stick numbers next to left parens, as in:
- #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
- || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
+ # Remember the leftmost index of welds which continue to the right
+ if ( defined( $rK_weld_right->{$Kend} )
+ && !defined( $rK_weld_left->{$Kstart} ) )
+ {
+ push @K_multi_weld, $Kstart;
+ }
+ }
- # We must be sure that a space between a ? and a quoted string
- # remains if the space before the ? remains. [Loca.pm, lockarea]
- # ie,
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma?',' : ':', @_; # ok!
- # $b=join $comma ?',' : ':', @_; # error!
- # Not really required:
- ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
+ # Second pass to process chains of welds (these are rare).
+ # This has to be processed in sorted order.
+ if (@K_multi_weld) {
+ my $Kend = -1;
+ foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
- # do not remove space between an '&' and a bare word because
- # it may turn into a function evaluation, like here
- # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
- # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
- || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
+ # Skip any interior K which was originally missing a left link
+ next if ( $Kstart <= $Kend );
- # space stacked labels (TODO: check if really necessary)
- || ( $typel eq 'J' && $typer eq 'J' )
+ # Find the end of this chain
+ $Kend = $rK_weld_right->{$Kstart};
+ my $Knext = $rK_weld_right->{$Kend};
+ while ( defined($Knext) ) {
+ $Kend = $Knext;
+ $Knext = $rK_weld_right->{$Kend};
+ }
- ; # the value of this long logic sequence is the result we want
-##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
- return $result;
+ # Set weld values this chain
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
+ }
}
-}
-{
- my %secret_operators;
- my %is_leading_secret_token;
+ return;
+}
- BEGIN {
+sub cumulative_length_before_K {
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+}
- # token lists for perl secret operators as compiled by Philippe Bruhat
- # at: https://metacpan.org/module/perlsecret
- %secret_operators = (
- 'Goatse' => [qw#= ( ) =#], #=( )=
- 'Venus1' => [qw#0 +#], # 0+
- 'Venus2' => [qw#+ 0#], # +0
- 'Enterprise' => [qw#) x ! !#], # ()x!!
- 'Kite1' => [qw#~ ~ <>#], # ~~<>
- 'Kite2' => [qw#~~ <>#], # ~~<>
- 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
- 'Bang bang ' => [qw#! !#], # !!
- );
+sub weld_cuddled_blocks {
+ my ($self) = @_;
- # The following operators and constants are not included because they
- # are normally kept tight by perltidy:
- # ~~ <~>
- #
+ # Called once per file to handle cuddled formatting
- # Make a lookup table indexed by the first token of each operator:
- # first token => [list, list, ...]
- foreach my $value ( values(%secret_operators) ) {
- my $tok = $value->[0];
- push @{ $is_leading_secret_token{$tok} }, $value;
- }
- }
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- sub new_secret_operator_whitespace {
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+ return unless ( %{$rcuddled_block_types} );
- my ( $rlong_array, $rwhitespace_flags ) = @_;
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $rbreak_container = $self->[_rbreak_container_];
- # Loop over all tokens in this line
- my ( $token, $type );
- my $jmax = @{$rlong_array} - 1;
- foreach my $j ( 0 .. $jmax ) {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
- $token = $rlong_array->[$j]->[_TOKEN_];
- $type = $rlong_array->[$j]->[_TYPE_];
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
- # Skip unless this token might start a secret operator
- next if ( $type eq 'b' );
- next unless ( $is_leading_secret_token{$token} );
+ my $is_broken_block = sub {
- # Loop over all secret operators with this leading token
- foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
- my $jend = $j - 1;
- foreach my $tok ( @{$rpattern} ) {
- $jend++;
- $jend++
+ # a block is broken if the input line numbers of the braces differ
+ # we can only cuddle between broken blocks
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
- if ( $jend <= $jmax
- && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
- if ( $jend > $jmax
- || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
- {
- $jend = undef;
- last;
- }
- }
+ # A stack to remember open chains at all levels: This is a hash rather than
+ # an array for safety because negative levels can occur in files with
+ # errors. This allows us to keep processing with negative levels.
+ # $in_chain{$level} = [$chain_type, $type_sequence];
+ my %in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
- if ($jend) {
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- # set flags to prevent spaces within this operator
- foreach my $jj ( $j + 1 .. $jend ) {
- $rwhitespace_flags->[$jj] = WS_NO;
- }
- $j = $jend;
- last;
- }
- } ## End Loop over all operators
- } ## End loop over all tokens
- return;
- } # End sub
-}
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
+ }
-{ # begin print_line_of_tokens
+ # NOTE: we must use the original levels here. They can get changed
+ # by sub 'weld_nested_containers', so this routine must be called
+ # before sub 'weld_nested_containers'.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_];
- my $rinput_token_array; # Current working array
- my $rinput_K_array; # Future working array
+ if ( $level < $last_level ) { $in_chain{$last_level} = undef }
+ elsif ( $level > $last_level ) { $in_chain{$level} = undef }
- my $in_quote;
- my $guessed_indentation_level;
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
- # This should be a return variable from extract_token
- # These local token variables are stored by store_token_to_go:
- my $Ktoken_vars;
- my $block_type;
- my $ci_level;
- my $container_environment;
- my $container_type;
- my $in_continued_quote;
- my $level;
- my $no_internal_newlines;
- my $slevel;
- my $token;
- my $type;
- my $type_sequence;
+ if ( $token eq '{' ) {
- # routine to pull the jth token from the line of tokens
- sub extract_token {
- my ( $self, $j ) = @_;
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ if ( !$block_type ) {
- my $rLL = $self->{rLL};
- $Ktoken_vars = $rinput_K_array->[$j];
- if ( !defined($Ktoken_vars) ) {
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
+ }
+ if ( $in_chain{$level} ) {
- # Shouldn't happen: an error here would be due to a recent program change
- Fault("undefined index K for j=$j");
- }
- my $rtoken_vars = $rLL->[$Ktoken_vars];
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain{$level}->[1];
+ my $opening_seqno = $type_sequence;
- if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$is_broken_block->($closing_seqno) ) {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
- # Shouldn't happen: an error here would be due to a recent program change
- Fault(<<EOM);
- j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
-EOM
- }
+ # we will let the trailing block be either broken or intact
+ ## && $is_broken_block->($opening_seqno);
- #########################################################
- # these are now redundant and can eventually be eliminated
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon;
+ if ( defined($Ko) ) {
+ $Kon = $self->K_next_nonblank($Ko);
+ }
- $token = $rtoken_vars->[_TOKEN_];
- $type = $rtoken_vars->[_TYPE_];
- $block_type = $rtoken_vars->[_BLOCK_TYPE_];
- $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
- $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
- $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
- $level = $rtoken_vars->[_LEVEL_];
- $slevel = $rtoken_vars->[_SLEVEL_];
- $ci_level = $rtoken_vars->[_CI_LEVEL_];
- #########################################################
+ # ..unless it is a comment
+ if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
- return;
- }
+ # OK to weld these two tokens...
+ $rK_weld_right->{$Ko} = $Kon;
+ $rK_weld_left->{$Kon} = $Ko;
- {
- my @saved_token;
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
+ }
- sub save_current_token {
+ }
+ else {
- @saved_token = (
- $block_type, $ci_level,
- $container_environment, $container_type,
- $in_continued_quote, $level,
- $no_internal_newlines, $slevel,
- $token, $type,
- $type_sequence, $Ktoken_vars,
- );
- return;
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ }
}
+ elsif ( $token eq '}' ) {
+ if ( $in_chain{$level} ) {
- sub restore_current_token {
- (
- $block_type, $ci_level,
- $container_environment, $container_type,
- $in_continued_quote, $level,
- $no_internal_newlines, $slevel,
- $token, $type,
- $type_sequence, $Ktoken_vars,
- ) = @saved_token;
- return;
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_code($KK);
+ next unless $Knn;
+
+ my $chain_type = $in_chain{$level}->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
+
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain{$level}->[1] = $type_sequence;
+ }
+ else { $in_chain{$level} = undef }
+ }
}
}
+ return;
+}
- sub token_length {
+sub find_nested_pairs {
+ my $self = shift;
- # Returns the length of a token, given:
- # $token=text of the token
- # $type = type
- # $not_first_token = should be TRUE if this is not the first token of
- # the line. It might the index of this token in an array. It is
- # used to test for a side comment vs a block comment.
- # Note: Eventually this should be the only routine determining the
- # length of a token in this package.
- my ( $token, $type, $not_first_token ) = @_;
- my $token_length = length($token);
+ # This routine is called once per file to do preliminary work needed for
+ # the --weld-nested option. This information is also needed for adding
+ # semicolons.
- # We mark lengths of side comments as just 1 if we are
- # ignoring their lengths when setting line breaks.
- $token_length = 1
- if ( $rOpts_ignore_side_comment_lengths
- && $not_first_token
- && $type eq '#' );
- return $token_length;
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
- sub rtoken_length {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # return length of ith token in @{$rtokens}
- my ($i) = @_;
- return token_length( $rinput_token_array->[$i]->[_TOKEN_],
- $rinput_token_array->[$i]->[_TYPE_], $i );
- }
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
- # Routine to place the current token into the output stream.
- # Called once per output token.
- sub store_token_to_go {
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
- my ( $self, $side_comment_follows ) = @_;
+ # Loop over all closing container tokens
+ foreach my $inner_seqno ( keys %{$K_closing_container} ) {
+ my $K_inner_closing = $K_closing_container->{$inner_seqno};
+
+ # See if it is immediately followed by another, outer closing token
+ my $K_outer_closing = $K_inner_closing + 1;
+ $K_outer_closing += 1
+ if ( $K_outer_closing < $Num
+ && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
+
+ next unless ( $K_outer_closing < $Num );
+ my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
+ next unless ($outer_seqno);
+ my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
+ next unless ( $is_closing_token{$token_outer_closing} );
+
+ # Now we have to check the opening tokens.
+ my $K_outer_opening = $K_opening_container->{$outer_seqno};
+ my $K_inner_opening = $K_opening_container->{$inner_seqno};
+ next unless defined($K_outer_opening) && defined($K_inner_opening);
+
+ my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
+ my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
+
+ # Verify that the inner opening token is the next container after the
+ # outer opening token.
+ my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
+ next unless defined($K_io_check);
+ if ( $K_io_check != $K_inner_opening ) {
+
+ # The inner opening container does not immediately follow the outer
+ # opening container, but we may still allow a weld if they are
+ # separated by a sub signature. For example, we may have something
+ # like this, where $K_io_check may be at the first 'x' instead of
+ # 'io'. So we need to hop over the signature and see if we arrive
+ # at 'io'.
+
+ # oo io
+ # | x x |
+ # $obj->then( sub ( $code ) {
+ # ...
+ # return $c->render(text => '', status => $code);
+ # } );
+ # | |
+ # ic oc
+
+ next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
+ next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
+ my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
+ next unless defined($seqno_signature);
+ my $K_signature_closing = $K_closing_container->{$seqno_signature};
+ next unless defined($K_signature_closing);
+ my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
+ next
+ unless ( defined($K_test) && $K_test == $K_inner_opening );
+
+ # OK, we have arrived at 'io' in the above diagram. We should put
+ # a limit on the length or complexity of the signature here. There
+ # is no perfect way to do this, one way is to put a limit on token
+ # count. For consistency with older versions, we should allow a
+ # signature with a single variable to weld, but not with
+ # multiple variables. A single variable as in 'sub ($code) {' can
+ # have a $Kdiff of 2 to 4, depending on spacing.
+
+ # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
+ # 7, depending on spacing. So to keep formatting consistent with
+ # previous versions, we will also avoid welding if there is a comma
+ # in the signature.
+
+ my $Kdiff = $K_signature_closing - $K_io_check;
+ next if ( $Kdiff > 4 );
+
+ my $saw_comma;
+ foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) {
+ if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last }
+ }
+ next if ($saw_comma);
+ }
+
+ # Yes .. this is a possible nesting pair.
+ # They can be separated by a small amount.
+ my $K_diff = $K_inner_opening - $K_outer_opening;
+
+ # Count nonblank characters separating them.
+ if ( $K_diff < 0 ) { next } # Shouldn't happen
+ my $Kn = $K_outer_opening;
+ my $nonblank_count = 0;
+ my $type;
+ my $is_name;
+
+ # Here is an example of a long identifier chain which counts as a
+ # single nonblank here (this spans about 10 K indexes):
+ # if ( !Boucherot::SetOfConnections->new->handler->execute(
+ # ^--K_o_o ^--K_i_o
+ # @array) )
+ my $Kn_first = $K_outer_opening;
+ my $Kn_last_nonblank;
+ my $saw_comment;
+ for (
+ my $Kn = $K_outer_opening + 1 ;
+ $Kn <= $K_inner_opening ;
+ $Kn += 1
+ )
+ {
+ next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ if ( !$nonblank_count ) { $Kn_first = $Kn }
+ if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
+ $Kn_last_nonblank = $Kn;
+
+ # skip chain of identifier tokens
+ my $last_type = $type;
+ my $last_is_name = $is_name;
+ $type = $rLL->[$Kn]->[_TYPE_];
+ if ( $type eq '#' ) { $saw_comment = 1; last }
+ $is_name = $is_name_type->{$type};
+ next if ( $is_name && $last_is_name );
+
+ $nonblank_count++;
+ last if ( $nonblank_count > 2 );
+ }
+
+ # Do not weld across a comment .. fix for c058.
+ next if ($saw_comment);
+
+ # Patch for b1104: do not weld to a paren preceded by sort/map/grep
+ # because the special line break rules may cause a blinking state
+ if ( defined($Kn_last_nonblank)
+ && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
+ && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
+ {
+ my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
- my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
+ # Turn off welding at sort/map/grep (
+ if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
+ }
- ++$max_index_to_go;
- $K_to_go[$max_index_to_go] = $Ktoken_vars;
- $tokens_to_go[$max_index_to_go] = $token;
- $types_to_go[$max_index_to_go] = $type;
- $nobreak_to_go[$max_index_to_go] = $flag;
- $old_breakpoint_to_go[$max_index_to_go] = 0;
- $forced_breakpoint_to_go[$max_index_to_go] = 0;
- $block_type_to_go[$max_index_to_go] = $block_type;
- $type_sequence_to_go[$max_index_to_go] = $type_sequence;
- $container_environment_to_go[$max_index_to_go] = $container_environment;
- $ci_levels_to_go[$max_index_to_go] = $ci_level;
- $mate_index_to_go[$max_index_to_go] = -1;
- $bond_strength_to_go[$max_index_to_go] = 0;
-
- # Note: negative levels are currently retained as a diagnostic so that
- # the 'final indentation level' is correctly reported for bad scripts.
- # But this means that every use of $level as an index must be checked.
- # If this becomes too much of a problem, we might give up and just clip
- # them at zero.
- ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
- $levels_to_go[$max_index_to_go] = $level;
- $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
-
- # link the non-blank tokens
- my $iprev = $max_index_to_go - 1;
- $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
- $iprev_to_go[$max_index_to_go] = $iprev;
- $inext_to_go[$iprev] = $max_index_to_go
- if ( $iprev >= 0 && $type ne 'b' );
- $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
-
- $token_lengths_to_go[$max_index_to_go] =
- token_length( $token, $type, $max_index_to_go );
-
- # We keep a running sum of token lengths from the start of this batch:
- # summed_lengths_to_go[$i] = total length to just before token $i
- # summed_lengths_to_go[$i+1] = total length to just after token $i
- $summed_lengths_to_go[ $max_index_to_go + 1 ] =
- $summed_lengths_to_go[$max_index_to_go] +
- $token_lengths_to_go[$max_index_to_go];
-
- # Define the indentation that this token would have if it started
- # a new line. We have to do this now because we need to know this
- # when considering one-line blocks.
- set_leading_whitespace( $level, $ci_level, $in_continued_quote );
-
- # remember previous nonblank tokens seen
- if ( $type ne 'b' ) {
- $last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
- $last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
- $last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
- $last_nonblank_index_to_go = $max_index_to_go;
- $last_nonblank_type_to_go = $type;
- $last_nonblank_token_to_go = $token;
- if ( $type eq ',' ) {
- $comma_count_in_batch++;
- }
- }
-
- FORMATTER_DEBUG_FLAG_STORE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
- };
- return;
- }
-
- sub copy_hash {
- my ($rold_token_hash) = @_;
- my %new_token_hash =
- map { ( $_, $rold_token_hash->{$_} ) } keys %{$rold_token_hash};
- return \%new_token_hash;
- }
-
- sub copy_array {
- my ($rold) = @_;
- my @new = map { $_ } @{$rold};
- return \@new;
- }
+ if (
- sub copy_token_as_type {
- my ( $rold_token, $type, $token ) = @_;
- if ( $type eq 'b' ) {
- $token = " " unless defined($token);
- }
- elsif ( $type eq 'q' ) {
- $token = '' unless defined($token);
- }
- elsif ( $type eq '->' ) {
- $token = '->' unless defined($token);
- }
- elsif ( $type eq ';' ) {
- $token = ';' unless defined($token);
- }
- else {
- Fault(
-"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
- );
+ # adjacent opening containers, like: do {{
+ $nonblank_count == 1
+
+ # short item following opening paren, like: fun( yyy (
+ || ( $nonblank_count == 2
+ && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' )
+
+ # anonymous sub + prototype or sig: )->then( sub ($code) {
+ # ... but it seems best not to stack two structural blocks, like
+ # this
+ # sub make_anon_with_my_sub { sub {
+ # because it probably hides the structure a little too much.
+ || ( $inner_blocktype
+ && $inner_blocktype eq 'sub'
+ && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
+ && !$outer_blocktype )
+ )
+ {
+ push @nested_pairs,
+ [ $inner_seqno, $outer_seqno, $K_inner_closing ];
}
- my $rnew_token = copy_array($rold_token);
- $rnew_token->[_TYPE_] = $type;
- $rnew_token->[_TOKEN_] = $token;
- $rnew_token->[_BLOCK_TYPE_] = '';
- $rnew_token->[_CONTAINER_TYPE_] = '';
- $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
- $rnew_token->[_TYPE_SEQUENCE_] = '';
- return $rnew_token;
- }
-
- sub boolean_equals {
- my ( $val1, $val2 ) = @_;
- return ( $val1 && $val2 || !$val1 && !$val2 );
+ next;
}
- sub print_line_of_tokens {
-
- my ( $self, $line_of_tokens ) = @_;
+ # The weld routine expects the pairs in order in the form
+ # [$seqno_inner, $seqno_outer]
+ # And they must be in the same order as the inner closing tokens
+ # (otherwise, welds of three or more adjacent tokens will not work). The K
+ # value of this inner closing token has temporarily been stored for
+ # sorting.
+ @nested_pairs =
- # This routine is called once per input line to process all of
- # the tokens on that line. This is the first stage of
- # beautification.
- #
- # Full-line comments and blank lines may be processed immediately.
- #
- # For normal lines of code, the tokens are stored one-by-one,
- # via calls to 'sub store_token_to_go', until a known line break
- # point is reached. Then, the batch of collected tokens is
- # passed along to 'sub output_line_to_go' for further
- # processing. This routine decides if there should be
- # whitespace between each pair of non-white tokens, so later
- # routines only need to decide on any additional line breaks.
- # Any whitespace is initially a single space character. Later,
- # the vertical aligner may expand that to be multiple space
- # characters if necessary for alignment.
-
- $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- my $CODE_type = $line_of_tokens->{_code_type};
-
- my $rK_range = $line_of_tokens->{_rK_range};
- my ( $K_first, $K_last ) = @{$rK_range};
-
- my $rLL = $self->{rLL};
- my $rbreak_container = $self->{rbreak_container};
- my $rshort_nested = $self->{rshort_nested};
-
- if ( !defined($K_first) ) {
+ # Drop the K index after sorting (it would cause trouble downstream)
+ map { [ $_->[0], $_->[1] ] }
- # Empty line: This can happen if tokens are deleted, for example
- # with the -mangle parameter
- return;
- }
+ # Sort on the K values
+ sort { $a->[2] <=> $b->[2] } @nested_pairs;
- $no_internal_newlines = 1 - $rOpts_add_newlines;
- my $is_comment =
- ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
- my $is_static_block_comment_without_leading_space =
- $CODE_type eq 'SBCX';
- $is_static_block_comment =
- $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
- my $is_hanging_side_comment = $CODE_type eq 'HSC';
- my $is_VERSION_statement = $CODE_type eq 'VER';
- if ($is_VERSION_statement) {
- $saw_VERSION_in_this_file = 1;
- $no_internal_newlines = 1;
- }
+ return \@nested_pairs;
+}
- # Add interline blank if any
- my $last_old_nonblank_type = "b";
- my $first_new_nonblank_type = "b";
- my $first_new_nonblank_token = " ";
- if ( $max_index_to_go >= 0 ) {
- $last_old_nonblank_type = $types_to_go[$max_index_to_go];
- $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
- $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
- if ( !$is_comment
- && $types_to_go[$max_index_to_go] ne 'b'
- && $K_first > 0
- && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
- {
- $K_first -= 1;
- }
- }
+sub match_paren_flag {
+
+ # Decide if this paren is excluded by user request:
+ # undef matches no parens
+ # '*' matches all parens
+ # 'k' matches only if the previous nonblank token is a perl builtin
+ # keyword (such as 'if', 'while'),
+ # 'K' matches if 'k' does not, meaning if the previous token is not a
+ # keyword.
+ # 'f' matches if the previous token is a function other than a keyword.
+ # 'F' matches if 'f' does not.
+ # 'w' matches if either 'k' or 'f' match.
+ # 'W' matches if 'w' does not.
+ my ( $self, $KK, $flag ) = @_;
+
+ return 0 unless ( defined($flag) );
+ return 0 if $flag eq '0';
+ return 1 if $flag eq '1';
+ return 1 if $flag eq '*';
+ return 0 unless ( defined($KK) );
+
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ return 0 unless ($seqno);
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $K_opening = $KK;
+ if ( !$is_opening_token{$token} ) {
+ $K_opening = $self->[_K_opening_container_]->{$seqno};
+ }
+ return unless ( defined($K_opening) );
+
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank($K_opening);
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+
+ # keyword?
+ $is_k = $type_p eq 'k';
+
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
+
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
+ }
+ my $match;
+ if ( $flag eq 'k' ) { $match = $is_k }
+ elsif ( $flag eq 'K' ) { $match = !$is_k }
+ elsif ( $flag eq 'f' ) { $match = $is_f }
+ elsif ( $flag eq 'F' ) { $match = !$is_f }
+ elsif ( $flag eq 'w' ) { $match = $is_w }
+ elsif ( $flag eq 'W' ) { $match = !$is_w }
+ return $match;
+}
- # Copy the tokens into local arrays
- $rinput_token_array = [];
- $rinput_K_array = [];
- $rinput_K_array = [ ( $K_first .. $K_last ) ];
- $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
- my $jmax = @{$rinput_K_array} - 1;
+sub is_excluded_weld {
+
+ # decide if this weld is excluded by user request
+ my ( $self, $KK, $is_leading ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $weld_nested_exclusion_rules{$token};
+ return 0 unless ( defined($rflags) );
+ my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
+ return 0 unless ( defined($flag) );
+ return 1 if $flag eq '*';
+ return $self->match_paren_flag( $KK, $flag );
+}
- $in_continued_quote = $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- $in_quote = $line_of_tokens->{_ending_in_quote};
- $ending_in_quote = $in_quote;
- $guessed_indentation_level =
- $line_of_tokens->{_guessed_indentation_level};
+# hashes to simplify welding logic
+my %type_ok_after_bareword;
+my %is_ternary;
+my %has_tight_paren;
- my $j_next;
- my $next_nonblank_token;
- my $next_nonblank_token_type;
+BEGIN {
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
+ # types needed for welding RULE 6
+ my @q = qw# => -> { ( [ #;
+ @type_ok_after_bareword{@q} = (1) x scalar(@q);
- ######################################
- # Handle a block (full-line) comment..
- ######################################
- if ($is_comment) {
+ @q = qw( ? : );
+ @is_ternary{@q} = (1) x scalar(@q);
- if ( $rOpts->{'tee-block-comments'} ) {
- $file_writer_object->tee_on();
- }
+ # these types do not 'like' to be separated from a following paren
+ @q = qw(w i q Q G C Z U);
+ @{has_tight_paren}{@q} = (1) x scalar(@q);
+}
- destroy_one_line_block();
- $self->output_line_to_go();
+use constant DEBUG_WELD => 0;
- # output a blank line before block comments
- if (
- # unless we follow a blank or comment line
- $last_line_leading_type !~ /^[#b]$/
+sub setup_new_weld_measurements {
- # only if allowed
- && $rOpts->{'blanks-before-comments'}
+ # Define quantities to check for excess line lengths when welded.
+ # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
- # if this is NOT an empty comment line
- && $rinput_token_array->[0]->[_TOKEN_] ne '#'
+ my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
- # not after a short line ending in an opening token
- # because we already have space above this comment.
- # Note that the first comment in this if block, after
- # the 'if (', does not get a blank line because of this.
- && !$last_output_short_opening_token
+ # Given indexes of outer and inner opening containers to be welded:
+ # $Kouter_opening, $Kinner_opening
- # never before static block comments
- && !$is_static_block_comment
- )
- {
- $self->flush(); # switching to new output stream
- $file_writer_object->write_blank_code_line();
- $last_line_leading_type = 'b';
- }
+ # Returns these variables:
+ # $new_weld_ok = true (new weld ok) or false (do not start new weld)
+ # $starting_indent = starting indentation
+ # $starting_lentot = starting cumulative length
+ # $msg = diagnostic message for debugging
- # TRIM COMMENTS -- This could be turned off as a option
- $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
- if (
- $rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
- || $input_line =~ /^\s+/ )
- && !$is_static_block_comment_without_leading_space
- )
- {
- $self->extract_token(0);
- $self->store_token_to_go();
- $self->output_line_to_go();
- }
- else {
- $self->flush(); # switching to new output stream
- $file_writer_object->write_code_line(
- $rinput_token_array->[0]->[_TOKEN_] . "\n" );
- $last_line_leading_type = '#';
- }
- if ( $rOpts->{'tee-block-comments'} ) {
- $file_writer_object->tee_off();
- }
- return;
+ my $starting_level;
+ my $starting_ci;
+ my $starting_lentot;
+ my $maximum_text_length;
+ my $msg = "";
+
+ my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+
+ #-------------------------------------------------------------------------
+ # We now define a reference index, '$Kref', from which to start measuring
+ # This choice turns out to be critical for keeping welds stable during
+ # iterations, so we go through a number of STEPS...
+ #-------------------------------------------------------------------------
+
+ # STEP 1: Our starting guess is to use measure from the first token of the
+ # current line. This is usually a good guess.
+ my $Kref = $Kfirst;
+
+ # STEP 2: See if we should go back a little farther
+ my $Kprev = $self->K_previous_nonblank($Kfirst);
+ if ( defined($Kprev) ) {
+
+ # Avoid measuring from between an opening paren and a previous token
+ # which should stay close to it ... fixes b1185
+ my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ if ( $Kouter_opening == $Kfirst
+ && $token_oo eq '('
+ && $has_tight_paren{$type_prev} )
+ {
+ $Kref = $Kprev;
}
- # compare input/output indentation except for continuation lines
- # (because they have an unknown amount of initial blank space)
- # and lines which are quotes (because they may have been outdented)
- my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
- compare_indentation_levels( $guessed_indentation_level,
- $structural_indentation_level )
- unless ( $is_hanging_side_comment
- || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
- || $guessed_indentation_level == 0
- && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
-
- ##########################
- # Handle indentation-only
- ##########################
+ # Back up and count length from a token like '=' or '=>' if -lp
+ # is used (this fixes b520)
+ # ...or if a break is wanted before there
+ elsif ($rOpts_line_up_parentheses
+ || $want_break_before{$type_prev} )
+ {
- # NOTE: In previous versions we sent all qw lines out immediately here.
- # No longer doing this: also write a line which is entirely a 'qw' list
- # to allow stacking of opening and closing tokens. Note that interior
- # qw lines will still go out at the end of this routine.
- if ( $CODE_type eq 'IO' ) {
- $self->flush();
- my $line = $input_line;
+ # If there are other sequence items between the start of this line
+ # and the opening token in question, then do not include tokens on
+ # the previous line in length calculations. This check added to
+ # fix case b1174 which had a '?' on the line
+ my $no_previous_seq_item = $Kref == $Kouter_opening
+ || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening;
- # delete side comments if requested with -io, but
- # we will not allow deleting of closing side comments with -io
- # because the coding would be more complex
- if ( $rOpts->{'delete-side-comments'}
- && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
+ if ( $no_previous_seq_item
+ && substr( $type_prev, 0, 1 ) eq '=' )
{
-
- $line = "";
- foreach my $jj ( 0 .. $jmax - 1 ) {
- $line .= $rinput_token_array->[$jj]->[_TOKEN_];
+ $Kref = $Kprev;
+
+ # Fix for b1144 and b1112: backup to the first nonblank
+ # character before the =>, or to the start of its line.
+ if ( $type_prev eq '=>' ) {
+ my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline_prev]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ for ( my $KK = $Kref - 1 ; $KK >= $Kfirst ; $KK-- ) {
+ next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
+ $Kref = $KK;
+ last;
+ }
}
}
-
- # Fix for rt #125506 Unexpected string formating
- # in which leading space of a terminal quote was removed
- $line =~ s/\s+$//;
- $line =~ s/^\s+// unless ($in_continued_quote);
-
- $self->extract_token(0);
- $token = $line;
- $type = 'q';
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
- $self->store_token_to_go();
- $self->output_line_to_go();
- return;
}
+ }
- ############################
- # Handle all other lines ...
- ############################
-
- #######################################################
- # FIXME: this should become unnecessary
- # making $j+2 valid simplifies coding
- my $rnew_blank =
- copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
- push @{$rinput_token_array}, $rnew_blank;
- push @{$rinput_token_array}, $rnew_blank;
- #######################################################
-
- # If we just saw the end of an elsif block, write nag message
- # if we do not see another elseif or an else.
- if ($looking_for_else) {
-
- unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
- write_logfile_entry("(No else block)\n");
+ # STEP 3: Now look ahead for a ternary and, if found, use it.
+ # This fixes case b1182.
+ # Also look for a ')' at the same level and, if found, use it.
+ # This fixes case b1224.
+ if ( $Kref < $Kouter_opening ) {
+ my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_];
+ my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ while ( $Knext < $Kouter_opening ) {
+ if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
+ if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
+ || $rLL->[$Knext]->[_TOKEN_] eq ')' )
+ {
+ $Kref = $Knext;
+ last;
+ }
}
- $looking_for_else = 0;
- }
-
- # This is a good place to kill incomplete one-line blocks
- if (
- (
- ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $last_old_nonblank_type eq ';' )
- && ( $first_new_nonblank_token ne '}' )
+ $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_];
+ }
+ }
+
+ # Define the starting measurements we will need
+ $starting_lentot =
+ $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
+ $starting_level = $rLL->[$Kref]->[_LEVEL_];
+ $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
+
+ $maximum_text_length = $maximum_text_length_at_level[$starting_level] -
+ $starting_ci * $rOpts_continuation_indentation;
+
+ # STEP 4: Switch to using the outer opening token as the reference
+ # point if a line break before it would make a longer line.
+ # Fixes case b1055 and is also an alternate fix for b1065.
+ my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ if ( $Kref < $Kouter_opening ) {
+ my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+ my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $maximum_text_length_oo =
+ $maximum_text_length_at_level[$starting_level_oo] -
+ $starting_ci_oo * $rOpts_continuation_indentation;
+
+ # The excess length to any cumulative length K = lenK is either
+ # $excess = $lenk - ($lentot + $maximum_text_length), or
+ # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
+ # so the worst case (maximum excess) corresponds to the configuration
+ # with minimum value of the sum: $lentot + $maximum_text_length
+ if ( $lentot_oo + $maximum_text_length_oo <
+ $starting_lentot + $maximum_text_length )
+ {
+ $Kref = $Kouter_opening;
+ $starting_level = $starting_level_oo;
+ $starting_ci = $starting_ci_oo;
+ $starting_lentot = $lentot_oo;
+ $maximum_text_length = $maximum_text_length_oo;
+ }
+ }
+
+ my $new_weld_ok = 1;
+
+ # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
+ # combination -wn -lp -dws -naws does not work well and can cause blinkers.
+ # It will probably only occur in stress testing. For this situation we
+ # will only start a new weld if we start at a 'good' location.
+ # - Added 'if' to fix case b1032.
+ # - Require blank before certain previous characters to fix b1111.
+ # - Add ';' to fix case b1139
+ # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
+ # - relaxed constraints for b1227
+ if ( $starting_ci
+ && $rOpts_line_up_parentheses
+ && $rOpts_delete_old_whitespace
+ && !$rOpts_add_whitespace
+ && defined($Kprev) )
+ {
+ my $type_first = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_prev = $rLL->[$Kprev]->[_TYPE_];
+ my $type_pp = 'b';
+ if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
+ unless (
+ $type_prev =~ /^[\,\.\;]/
+ || $type_prev =~ /^[=\{\[\(\L]/
+ && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' )
+ || $type_first =~ /^[=\,\.\;\{\[\(\L]/
+ || $type_first eq '||'
+ || (
+ $type_first eq 'k'
+ && ( $token_first eq 'if'
+ || $token_first eq 'or' )
)
-
- # Patch for RT #98902. Honor request to break at old commas.
- || ( $rOpts_break_at_old_comma_breakpoints
- && $max_index_to_go >= 0
- && $last_old_nonblank_type eq ',' )
)
{
- $forced_breakpoint_to_go[$max_index_to_go] = 1
- if ($rOpts_break_at_old_comma_breakpoints);
- destroy_one_line_block();
- $self->output_line_to_go();
+ $msg =
+"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
+ $new_weld_ok = 0;
}
+ }
+ return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
+}
- # loop to process the tokens one-by-one
- $type = 'b';
- $token = "";
+sub excess_line_length_for_Krange {
+ my ( $self, $Kfirst, $Klast ) = @_;
- # We do not want a leading blank if the previous batch just got output
- my $jmin = 0;
- if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
- $jmin = 1;
- }
+ # returns $excess_length =
+ # by how many characters a line composed of tokens $Kfirst .. $Klast will
+ # exceed the allowed line length
- foreach my $j ( $jmin .. $jmax ) {
+ my $rLL = $self->[_rLL_];
+ my $length_before_Kfirst =
+ $Kfirst <= 0
+ ? 0
+ : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
- # pull out the local values for this token
- $self->extract_token($j);
+ # backup before a side comment if necessary
+ my $Kend = $Klast;
+ if ( $rOpts_ignore_side_comment_lengths
+ && $rLL->[$Klast]->[_TYPE_] eq '#' )
+ {
+ my $Kprev = $self->K_previous_nonblank($Klast);
+ if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
+ }
- if ( $type eq '#' ) {
+ # get the length of the text
+ my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
- if (
- $rOpts->{'delete-side-comments'}
+ # get the size of the text window
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
+ my $max_text_length = $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
- # delete closing side comments if necessary
- || ( $rOpts->{'delete-closing-side-comments'}
- && $token =~ /$closing_side_comment_prefix_pattern/o
- && $last_nonblank_block_type =~
- /$closing_side_comment_list_pattern/o )
- )
- {
- if ( $types_to_go[$max_index_to_go] eq 'b' ) {
- unstore_token_to_go();
- }
- last;
- }
- }
+ my $excess_length = $length - $max_text_length;
- # If we are continuing after seeing a right curly brace, flush
- # buffer unless we see what we are looking for, as in
- # } else ...
- if ( $rbrace_follower && $type ne 'b' ) {
+ DEBUG_WELD
+ && print
+"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
+ return ($excess_length);
+}
- unless ( $rbrace_follower->{$token} ) {
- $self->output_line_to_go();
- }
- $rbrace_follower = undef;
- }
+sub weld_nested_containers {
+ my ($self) = @_;
- $j_next =
- ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
- ? $j + 2
- : $j + 1;
- $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
- $next_nonblank_token_type =
- $rinput_token_array->[$j_next]->[_TYPE_];
-
- # Do not allow breaks which would promote a side comment to a
- # block comment. In order to allow a break before an opening
- # or closing BLOCK, followed by a side comment, those sections
- # of code will handle this flag separately.
- my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
- my $is_opening_BLOCK =
- ( $type eq '{'
- && $token eq '{'
- && $block_type
- && !$rshort_nested->{$type_sequence}
- && $block_type ne 't' );
- my $is_closing_BLOCK =
- ( $type eq '}'
- && $token eq '}'
- && $block_type
- && !$rshort_nested->{$type_sequence}
- && $block_type ne 't' );
-
- if ( $side_comment_follows
- && !$is_opening_BLOCK
- && !$is_closing_BLOCK )
- {
- $no_internal_newlines = 1;
- }
+ # Called once per file for option '--weld-nested-containers'
- # We're only going to handle breaking for code BLOCKS at this
- # (top) level. Other indentation breaks will be handled by
- # sub scan_list, which is better suited to dealing with them.
- if ($is_opening_BLOCK) {
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
- # Tentatively output this token. This is required before
- # calling starting_one_line_block. We may have to unstore
- # it, though, if we have to break before it.
- $self->store_token_to_go($side_comment_follows);
+ # This routine implements the -wn flag by "welding together"
+ # the nested closing and opening tokens which were previously
+ # identified by sub 'find_nested_pairs'. "welding" simply
+ # involves setting certain hash values which will be checked
+ # later during formatting.
- # Look ahead to see if we might form a one-line block..
- my $too_long =
- $self->starting_one_line_block( $j, $jmax, $level, $slevel,
- $ci_level, $rinput_token_array );
- clear_breakpoint_undo_stack();
+ my $rLL = $self->[_rLL_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
- # to simplify the logic below, set a flag to indicate if
- # this opening brace is far from the keyword which introduces it
- my $keyword_on_same_line = 1;
- if ( ( $max_index_to_go >= 0 )
- && ( $last_nonblank_type eq ')' )
- && ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) )
- {
- $keyword_on_same_line = 0;
- }
+ # Find nested pairs of container tokens for any welding.
+ my $rnested_pairs = $self->find_nested_pairs();
- # decide if user requested break before '{'
- my $want_break =
+ # Return unless there are nested pairs to weld
+ return unless defined($rnested_pairs) && @{$rnested_pairs};
- # use -bl flag if not a sub block of any type
- $block_type !~ /^sub\b/
- ? $rOpts->{'opening-brace-on-new-line'}
+ my $rOpts_break_at_old_method_breakpoints =
+ $rOpts->{'break-at-old-method-breakpoints'};
- # use -sbl flag for a named sub block
- : $block_type !~ /$ASUB_PATTERN/
- ? $rOpts->{'opening-sub-brace-on-new-line'}
+ # This array will hold the sequence numbers of the tokens to be welded.
+ my @welds;
- # use -asbl flag for an anonymous sub block
- : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+ # Variables needed for estimating line lengths
+ my $maximum_text_length; # maximum spaces available for text
+ my $starting_lentot; # cumulative text to start of current line
- # Do not break if this token is welded to the left
- if ( weld_len_left( $type_sequence, $token ) ) {
- $want_break = 0;
- }
+ my $iline_outer_opening = -1;
+ my $weld_count_this_start = 0;
- # Break before an opening '{' ...
- if (
+ # OLD: $single_line_tol added to fix cases b1180 b1181
+ # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
+ # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now
+ my $single_line_tol = 0;
- # if requested
- $want_break
+ my $multiline_tol = $single_line_tol + 1 +
+ max( $rOpts_indent_columns, $rOpts_continuation_indentation );
- # and we were unable to start looking for a block,
- && $index_start_one_line_block == UNDEFINED_INDEX
+ # Define a welding cutoff level: do not start a weld if the inside
+ # container level equals or exceeds this level.
- # or if it will not be on same line as its keyword, so that
- # it will be outdented (eval.t, overload.t), and the user
- # has not insisted on keeping it on the right
- || ( !$keyword_on_same_line
- && !$rOpts->{'opening-brace-always-on-right'} )
+ # We use the minimum of two criteria, either of which may be more
+ # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
+ # the 'beta' value is more restrictive in other cases (b1243).
- )
- {
+ my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 );
- # but only if allowed
- unless ($no_internal_newlines) {
+ # The vertical tightness flags can throw off line length calculations.
+ # This patch was added to fix instability issue b1284.
+ # It works to always use a tol of 1 for 1 line block length tests, but
+ # this restricted value keeps test case wn6.wn working as before.
+ # It may be necessary to include '[' and '{' here in the future.
+ my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
- # since we already stored this token, we must unstore it
- $self->unstore_token_to_go();
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = defined($KK)
+ && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
+ return $lentot;
+ };
- # then output the line
- $self->output_line_to_go();
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = defined($KK)
+ && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
+ return $lentot;
+ };
- # and now store this token at the start of a new line
- $self->store_token_to_go($side_comment_follows);
- }
- }
+ # Abbreviations:
+ # _oo=outer opening, i.e. first of { {
+ # _io=inner opening, i.e. second of { {
+ # _oc=outer closing, i.e. second of } {
+ # _ic=inner closing, i.e. first of } }
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
+ my $previous_pair;
- # now output this line
- unless ($no_internal_newlines) {
- $self->output_line_to_go();
- }
- }
+ # Main loop over nested pairs...
+ # We are working from outermost to innermost pairs so that
+ # level changes will be complete when we arrive at the inner pairs.
+ while ( my $item = pop( @{$rnested_pairs} ) ) {
+ my ( $inner_seqno, $outer_seqno ) = @{$item};
- elsif ($is_closing_BLOCK) {
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $Kinner_opening = $K_opening_container->{$inner_seqno};
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $K_closing_container->{$inner_seqno};
- # If there is a pending one-line block ..
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ # RULE: do not weld if inner container has <= 3 tokens unless the next
+ # token is a heredoc (so we know there will be multiple lines)
+ if ( $Kinner_closing - $Kinner_opening <= 4 ) {
+ my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
+ next unless defined($Knext_nonblank);
+ my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
+ next unless ( $type eq 'h' );
+ }
- # we have to terminate it if..
- if (
+ my $outer_opening = $rLL->[$Kouter_opening];
+ my $inner_opening = $rLL->[$Kinner_opening];
+ my $outer_closing = $rLL->[$Kouter_closing];
+ my $inner_closing = $rLL->[$Kinner_closing];
- # it is too long (final length may be different from
- # initial estimate). note: must allow 1 space for this
- # token
- excess_line_length( $index_start_one_line_block,
- $max_index_to_go ) >= 0
+ # RULE: do not weld to a hash brace. The reason is that it has a very
+ # strong bond strength to the next token, so a line break after it
+ # may not work. Previously we allowed welding to something like @{
+ # but that caused blinking states (cases b751, b779).
+ if ( $inner_opening->[_TYPE_] eq 'L' ) {
+ next;
+ }
- # or if it has too many semicolons
- || ( $semicolons_before_block_self_destruct == 0
- && $last_nonblank_type ne ';' )
- )
- {
- destroy_one_line_block();
- }
- }
+ # RULE: do not weld to a square bracket which does not contain commas
+ if ( $inner_opening->[_TYPE_] eq '[' ) {
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
+ next unless ($rtype_count);
+ my $comma_count = $rtype_count->{','};
+ next unless ($comma_count);
- # put a break before this closing curly brace if appropriate
- unless ( $no_internal_newlines
- || $index_start_one_line_block != UNDEFINED_INDEX )
- {
+ # Do not weld if there is text before a '[' such as here:
+ # curr_opt ( @beg [2,5] )
+ # It will not break into the desired sandwich structure.
+ # This fixes case b109, 110.
+ my $Kdiff = $Kinner_opening - $Kouter_opening;
+ next if ( $Kdiff > 2 );
+ next
+ if ( $Kdiff == 2
+ && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
- # write out everything before this closing curly brace
- $self->output_line_to_go();
- }
+ }
- # Now update for side comment
- if ($side_comment_follows) { $no_internal_newlines = 1 }
+ # RULE: Avoid welding under stress. The idea is that we need to have a
+ # little space* within a welded container to avoid instability. Note
+ # that after each weld the level values are reduced, so long multiple
+ # welds can still be made. This rule will seldom be a limiting factor
+ # in actual working code. Fixes b1206, b1243.
+ my $inner_level = $inner_opening->[_LEVEL_];
+ if ( $inner_level >= $weld_cutoff_level ) { next }
- # store the closing curly brace
- $self->store_token_to_go();
+ # Set flag saying if this pair starts a new weld
+ my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
- # ok, we just stored a closing curly brace. Often, but
- # not always, we want to end the line immediately.
- # So now we have to check for special cases.
+ # Set flag saying if this pair is adjacent to the previous nesting pair
+ # (even if previous pair was rejected as a weld)
+ my $touch_previous_pair =
+ defined($previous_pair) && $outer_seqno == $previous_pair->[0];
+ $previous_pair = $item;
- # if this '}' successfully ends a one-line block..
- my $is_one_line_block = 0;
- my $keep_going = 0;
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ my $do_not_weld_rule = 0;
+ my $Msg = "";
+ my $is_one_line_weld;
- # Remember the type of token just before the
- # opening brace. It would be more general to use
- # a stack, but this will work for one-line blocks.
- $is_one_line_block =
- $types_to_go[$index_start_one_line_block];
+ my $iline_oo = $outer_opening->[_LINE_INDEX_];
+ my $iline_io = $inner_opening->[_LINE_INDEX_];
+ my $iline_ic = $inner_closing->[_LINE_INDEX_];
+ my $iline_oc = $outer_closing->[_LINE_INDEX_];
+ my $token_oo = $outer_opening->[_TOKEN_];
+ my $token_io = $inner_opening->[_TOKEN_];
+
+ my $is_multiline_weld =
+ $iline_oo == $iline_io
+ && $iline_ic == $iline_oc
+ && $iline_io != $iline_ic;
+
+ if (DEBUG_WELD) {
+ my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
+ my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
+ $Msg .= <<EOM;
+Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
+Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
+tokens '$token_oo' .. '$token_io'
+EOM
+ }
- # we have to actually make it by removing tentative
- # breaks that were set within it
- undo_forced_breakpoint_stack(0);
- set_nobreaks( $index_start_one_line_block,
- $max_index_to_go - 1 );
+ # DO-NOT-WELD RULE 0:
+ # Avoid a new paren-paren weld if inner parens are 'sheared' (separated
+ # by one line). This can produce instabilities (fixes b1250 b1251
+ # 1256).
+ if ( !$is_multiline_weld
+ && $iline_ic == $iline_io + 1
+ && $token_oo eq '('
+ && $token_io eq '(' )
+ {
+ if (DEBUG_WELD) {
+ $Msg .= "RULE 0: Not welding due to sheared inner parens\n";
+ print $Msg;
+ }
+ next;
+ }
- # then re-initialize for the next one-line block
- destroy_one_line_block();
+ # If this pair is not adjacent to the previous pair (skipped or not),
+ # then measure lengths from the start of line of oo.
+ if (
+ !$touch_previous_pair
- # then decide if we want to break after the '}' ..
- # We will keep going to allow certain brace followers as in:
- # do { $ifclosed = 1; last } unless $losing;
- #
- # But make a line break if the curly ends a
- # significant block:
- if (
- (
- $is_block_without_semicolon{$block_type}
+ # Also do this if restarting at a new line; fixes case b965, s001
+ || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
+ )
+ {
- # Follow users break point for
- # one line block types U & G, such as a 'try' block
- || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
- )
+ # Remember the line we are using as a reference
+ $iline_outer_opening = $iline_oo;
+ $weld_count_this_start = 0;
- # if needless semicolon follows we handle it later
- && $next_nonblank_token ne ';'
+ ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
+ = $self->setup_new_weld_measurements( $Kouter_opening,
+ $Kinner_opening );
+
+ if (
+ !$new_weld_ok
+ && ( $iline_oo != $iline_io
+ || $iline_ic != $iline_oc )
+ )
+ {
+ if (DEBUG_WELD) { print $msg}
+ next;
+ }
+
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+
+ # An existing one-line weld is a line in which
+ # (1) the containers are all on one line, and
+ # (2) the line does not exceed the allowable length
+ if ( $iline_oo == $iline_oc ) {
+
+ # All the tokens are on one line, now check their length.
+ # Start with the full line index range. We will reduce this
+ # in the coding below in some cases.
+ my $Kstart = $Kfirst;
+ my $Kstop = $Klast;
+
+ # Note that the following minimal choice for measuring will
+ # work and will not cause any instabilities because it is
+ # invariant:
+
+ ## my $Kstart = $Kouter_opening;
+ ## my $Kstop = $Kouter_closing;
+
+ # But that can lead to some undesirable welds. So a little
+ # more complicated method has been developed.
+
+ # We are trying to avoid creating bad two-line welds when we are
+ # working on long, previously unwelded input text, such as
+
+ # INPUT (example of a long input line weld candidate):
+ ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
+
+ # GOOD two-line break: (not welded; result marked too long):
+ ## $mutation->transpos(
+ ## $self->RNA->position($mutation->label, $atg_label));
+
+ # BAD two-line break: (welded; result if we weld):
+ ## $mutation->transpos($self->RNA->position(
+ ## $mutation->label, $atg_label));
+
+ # We can only get an approximate estimate of the final length,
+ # since the line breaks may change, and for -lp mode because
+ # even the indentation is not yet known.
+
+ my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
+ my $level_last = $rLL->[$Klast]->[_LEVEL_];
+ my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
+ my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
+
+ # - measure to the end of the original line if balanced
+ # - measure to the closing container if unbalanced (fixes b1230)
+ #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
+ if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
+
+ # - measure from the start of the original line if balanced
+ # - measure from the most previous token with same level
+ # if unbalanced (b1232)
+ if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
+ $Kstart = $Kouter_opening;
+ for (
+ my $KK = $Kouter_opening - 1 ;
+ $KK > $Kfirst ;
+ $KK -= 1
)
{
- $self->output_line_to_go()
- unless ($no_internal_newlines);
+ next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
+ last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
+ $Kstart = $KK;
}
}
- # set string indicating what we need to look for brace follower
- # tokens
- if ( $block_type eq 'do' ) {
- $rbrace_follower = \%is_do_follower;
- }
- elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
- $rbrace_follower = \%is_if_brace_follower;
- }
- elsif ( $block_type eq 'else' ) {
- $rbrace_follower = \%is_else_brace_follower;
- }
-
- # added eval for borris.t
- elsif ($is_sort_map_grep_eval{$block_type}
- || $is_one_line_block eq 'G' )
- {
- $rbrace_follower = undef;
- $keep_going = 1;
- }
-
- # anonymous sub
- elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
+ my $excess =
+ $self->excess_line_length_for_Krange( $Kstart, $Kstop );
- if ($is_one_line_block) {
- $rbrace_follower = \%is_anon_sub_1_brace_follower;
- }
- else {
- $rbrace_follower = \%is_anon_sub_brace_follower;
- }
- }
+ # Coding simplified here for case b1219.
+ # Increased tol from 0 to 1 when pvt>0 to fix b1284.
+ $is_one_line_weld = $excess <= $one_line_tol;
+ }
- # None of the above: specify what can follow a closing
- # brace of a block which is not an
- # if/elsif/else/do/sort/map/grep/eval
- # Testfiles:
- # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
- else {
- $rbrace_follower = \%is_other_brace_follower;
- }
+ # DO-NOT-WELD RULE 1:
+ # Do not weld something that looks like the start of a two-line
+ # function call, like this: <<snippets/wn6.in>>
+ # $trans->add_transformation(
+ # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
+ # We will look for a semicolon after the closing paren.
- # See if an elsif block is followed by another elsif or else;
- # complain if not.
- if ( $block_type eq 'elsif' ) {
+ # We want to weld something complex, like this though
+ # my $compass = uc( opposite_direction( line_to_canvas_direction(
+ # @{ $coords[0] }, @{ $coords[1] } ) ) );
+ # Otherwise we will get a 'blinker'. For example, the following
+ # would become a blinker without this rule:
+ # $Self->_Add( $SortOrderDisplay{ $Field
+ # ->GenerateFieldForSelectSQL() } );
+ # But it is okay to weld a two-line statement if it looks like
+ # it was already welded, meaning that the two opening containers are
+ # on a different line that the two closing containers. This is
+ # necessary to prevent blinking of something like this with
+ # perltidy -wn -pbp (starting indentation two levels deep):
+
+ # $top_label->set_text( gettext(
+ # "Unable to create personal directory - check permissions.") );
+ if ( $iline_oc == $iline_oo + 1
+ && $iline_io == $iline_ic
+ && $token_oo eq '(' )
+ {
- if ( $next_nonblank_token_type eq 'b' ) { # end of line?
- $looking_for_else = 1; # ok, check on next line
- }
- else {
+ # Look for following semicolon...
+ my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
+ my $next_nonblank_type =
+ defined($Knext_nonblank)
+ ? $rLL->[$Knext_nonblank]->[_TYPE_]
+ : 'b';
+ if ( $next_nonblank_type eq ';' ) {
- unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
- write_logfile_entry("No else block :(\n");
- }
+ # Then do not weld if no other containers between inner
+ # opening and closing.
+ my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
+ if ( $Knext_seq_item == $Kinner_closing ) {
+ $do_not_weld_rule = 1;
}
}
+ }
+ } ## end starting new weld sequence
- # keep going after certain block types (map,sort,grep,eval)
- # added eval for borris.t
- if ($keep_going) {
+ else {
- # keep going
- }
+ # set the 1-line flag if continuing a weld sequence; fixes b1239
+ $is_one_line_weld = ( $iline_oo == $iline_oc );
+ }
- # if no more tokens, postpone decision until re-entring
- elsif ( ( $next_nonblank_token_type eq 'b' )
- && $rOpts_add_newlines )
- {
- unless ($rbrace_follower) {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- }
+ # DO-NOT-WELD RULE 2:
+ # Do not weld an opening paren to an inner one line brace block
+ # We will just use old line numbers for this test and require
+ # iterations if necessary for convergence
- elsif ($rbrace_follower) {
+ # For example, otherwise we could cause the opening paren
+ # in the following example to separate from the caller name
+ # as here:
- unless ( $rbrace_follower->{$next_nonblank_token} ) {
- $self->output_line_to_go()
- unless ($no_internal_newlines);
- }
- $rbrace_follower = undef;
- }
+ # $_[0]->code_handler
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
- else {
- $self->output_line_to_go() unless ($no_internal_newlines);
- }
+ # Here is another example where we do not want to weld:
+ # $wrapped->add_around_modifier(
+ # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+
+ # If the one line sub block gets broken due to length or by the
+ # user, then we can weld. The result will then be:
+ # $wrapped->add_around_modifier( sub {
+ # push @tracelog => 'around 1';
+ # $_[0]->();
+ # } );
- } # end treatment of closing block token
+ # Updated to fix cases b1082 b1102 b1106 b1115:
+ # Also, do not weld to an intact inner block if the outer opening token
+ # is on a different line. For example, this prevents oscillation
+ # between these two states in case b1106:
+
+ # return map{
+ # ($_,[$self->$_(@_[1..$#_])])
+ # }@every;
+
+ # return map { (
+ # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
+ # ) } @every;
+
+ # The effect of this change on typical code is very minimal. Sometimes
+ # it may take a second iteration to converge, but this gives protection
+ # against blinking.
+ if ( !$do_not_weld_rule
+ && !$is_one_line_weld
+ && $iline_ic == $iline_io )
+ {
+ $do_not_weld_rule = 2
+ if ( $token_oo eq '(' || $iline_oo != $iline_io );
+ }
- # handle semicolon
- elsif ( $type eq ';' ) {
+ # DO-NOT-WELD RULE 2A:
+ # Do not weld an opening asub brace in -lp mode if -asbl is set. This
+ # helps avoid instabilities in one-line block formation, and fixes
+ # b1241. Previously, the '$is_one_line_weld' flag was tested here
+ # instead of -asbl, and this fixed most cases. But it turns out that
+ # the real problem was the -asbl flag, and switching to this was
+ # necessary to fixe b1268. This also fixes b1269, b1277, b1278.
+ if (
+ !$do_not_weld_rule
+ ##&& $is_one_line_weld
+ && $rOpts_line_up_parentheses
+ && $rOpts_asbl
+ && $ris_asub_block->{$outer_seqno}
+ )
+ {
+ $do_not_weld_rule = '2A';
+ }
- # kill one-line blocks with too many semicolons
- $semicolons_before_block_self_destruct--;
- if (
- ( $semicolons_before_block_self_destruct < 0 )
- || ( $semicolons_before_block_self_destruct == 0
- && $next_nonblank_token_type !~ /^[b\}]$/ )
- )
- {
- destroy_one_line_block();
+ # DO-NOT-WELD RULE 3:
+ # Do not weld if this makes our line too long.
+ # Use a tolerance which depends on if the old tokens were welded
+ # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
+ if ( !$do_not_weld_rule ) {
+
+ # Measure to a little beyond the inner opening token if it is
+ # followed by a bare word, which may have unusual line break rules.
+
+ # NOTE: Originally this was OLD RULE 6: do not weld to a container
+ # which is followed on the same line by an unknown bareword token.
+ # This can cause blinkers (cases b626, b611). But OK to weld one
+ # line welds to fix cases b1057 b1064. For generality, OLD RULE 6
+ # has been merged into RULE 3 here to also fix cases b1078 b1091.
+
+ my $K_for_length = $Kinner_opening;
+ my $Knext_io = $self->K_next_nonblank($Kinner_opening);
+ next unless ( defined($Knext_io) ); # shouldn't happen
+ my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
+
+ # Note: may need to eventually also include other types here,
+ # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
+ if ( $type_io_next eq 'w' ) {
+ my $Knext_io2 = $self->K_next_nonblank($Knext_io);
+ next unless ( defined($Knext_io2) );
+ my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
+ if ( !$type_ok_after_bareword{$type_io_next2} ) {
+ $K_for_length = $Knext_io2;
}
+ }
+
+ # Use a tolerance for welds over multiple lines to avoid blinkers.
+ # We can use zero tolerance if it looks like we are working on an
+ # existing weld.
+ my $tol =
+ $is_one_line_weld || $is_multiline_weld
+ ? $single_line_tol
+ : $multiline_tol;
- $self->store_token_to_go();
+ # By how many characters does this exceed the text window?
+ my $excess =
+ $self->cumulative_length_before_K($K_for_length) -
+ $starting_lentot + 1 + $tol -
+ $maximum_text_length;
- $self->output_line_to_go()
- unless ( $no_internal_newlines
- || ( $rOpts_keep_interior_semicolons && $j < $jmax )
- || ( $next_nonblank_token eq '}' ) );
+ # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
+ # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
+ # Revised patch: New tolerance definition allows going back to '> 0'
+ # here. This fixes case b1124. See also cases b1087 and b1087a.
+ if ( $excess > 0 ) { $do_not_weld_rule = 3 }
+ if (DEBUG_WELD) {
+ $Msg .=
+"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
}
+ }
- # handle here_doc target string
- elsif ( $type eq 'h' ) {
+ # DO-NOT-WELD RULE 4; implemented for git#10:
+ # Do not weld an opening -ce brace if the next container is on a single
+ # line, different from the opening brace. (This is very rare). For
+ # example, given the following with -ce, we will avoid joining the {
+ # and [
- # no newlines after seeing here-target
- $no_internal_newlines = 1;
- destroy_one_line_block();
- $self->store_token_to_go();
- }
+ # } else {
+ # [ $_, length($_) ]
+ # }
- # handle all other token types
- else {
+ # because this would produce a terminal one-line block:
- $self->store_token_to_go();
- }
+ # } else { [ $_, length($_) ] }
- # remember two previous nonblank OUTPUT tokens
- if ( $type ne '#' && $type ne 'b' ) {
- $last_last_nonblank_token = $last_nonblank_token;
- $last_last_nonblank_type = $last_nonblank_type;
- $last_nonblank_token = $token;
- $last_nonblank_type = $type;
- $last_nonblank_block_type = $block_type;
- }
+ # which may not be what is desired. But given this input:
- # unset the continued-quote flag since it only applies to the
- # first token, and we want to resume normal formatting if
- # there are additional tokens on the line
- $in_continued_quote = 0;
+ # } else { [ $_, length($_) ] }
- } # end of loop over all tokens in this 'line_of_tokens'
+ # then we will do the weld and retain the one-line block
+ if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
+ my $block_type = $rblock_type_of_seqno->{$outer_seqno};
+ if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
+ my $io_line = $inner_opening->[_LINE_INDEX_];
+ my $ic_line = $inner_closing->[_LINE_INDEX_];
+ my $oo_line = $outer_opening->[_LINE_INDEX_];
+ if ( $oo_line < $io_line && $ic_line == $io_line ) {
+ $do_not_weld_rule = 4;
+ }
+ }
+ }
- # we have to flush ..
+ # DO-NOT-WELD RULE 5: do not include welds excluded by user
if (
+ !$do_not_weld_rule
+ && %weld_nested_exclusion_rules
+ && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
+ || $self->is_excluded_weld( $Kinner_opening, 0 ) )
+ )
+ {
+ $do_not_weld_rule = 5;
+ }
- # if there is a side comment
- ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
+ # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
- # if this line ends in a quote
- # NOTE: This is critically important for insuring that quoted lines
- # do not get processed by things like -sot and -sct
- || $in_quote
+ # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
+ # (case b973)
+ if ( !$do_not_weld_rule
+ && $rOpts_break_at_old_method_breakpoints
+ && $iline_io > $iline_oo )
+ {
- # if this is a VERSION statement
- || $is_VERSION_statement
-
- # to keep a label at the end of a line
- || $type eq 'J'
-
- # if we are instructed to keep all old line breaks
- || !$rOpts->{'delete-old-newlines'}
- )
- {
- destroy_one_line_block();
- $self->output_line_to_go();
- }
-
- # mark old line breakpoints in current output stream
- if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
- my $jobp = $max_index_to_go;
- if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
- {
- $jobp--;
+ foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ next unless defined($rK_range);
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+ if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
+ $do_not_weld_rule = 7;
+ last;
+ }
}
- $old_breakpoint_to_go[$jobp] = 1;
}
- return;
- } ## end sub print_line_of_tokens
-} ## end block print_line_of_tokens
-
-sub consecutive_nonblank_lines {
- return $file_writer_object->get_consecutive_nonblank_lines() +
- $vertical_aligner_object->get_cached_line_count();
-}
-# sub output_line_to_go sends one logical line of tokens on down the
-# pipeline to the VerticalAligner package, breaking the line into continuation
-# lines as necessary. The line of tokens is ready to go in the "to_go"
-# arrays.
-sub output_line_to_go {
+ if ($do_not_weld_rule) {
- my $self = shift;
- my $rLL = $self->{rLL};
-
- # debug stuff; this routine can be called from many points
- FORMATTER_DEBUG_FLAG_OUTPUT && do {
- my ( $a, $b, $c ) = caller;
- write_diagnostics(
-"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
- );
- my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
- write_diagnostics("$output_str\n");
- };
-
- # Do not end line in a weld
- return if ( weld_len_right_to_go($max_index_to_go) );
+ # After neglecting a pair, we start measuring from start of point
+ # io ... but not if previous type does not like to be separated
+ # from its container (fixes case b1184)
+ my $Kprev = $self->K_previous_nonblank($Kinner_opening);
+ my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
+ if ( !$has_tight_paren{$type_prev} ) {
+ my $starting_level = $inner_opening->[_LEVEL_];
+ my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
+ $starting_lentot =
+ $self->cumulative_length_before_K($Kinner_opening);
+ $maximum_text_length =
+ $maximum_text_length_at_level[$starting_level] -
+ $starting_ci_level * $rOpts_continuation_indentation;
+ }
- # just set a tentative breakpoint if we might be in a one-line block
- if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- set_forced_breakpoint($max_index_to_go);
- return;
- }
+ if (DEBUG_WELD) {
+ $Msg .= "Not welding due to RULE $do_not_weld_rule\n";
+ print $Msg;
+ }
- my $comma_arrow_count_contained = match_opening_and_closing_tokens();
+ # Normally, a broken pair should not decrease indentation of
+ # intermediate tokens:
+ ## if ( $last_pair_broken ) { next }
+ # However, for long strings of welded tokens, such as '{{{{{{...'
+ # we will allow broken pairs to also remove indentation.
+ # This will keep very long strings of opening and closing
+ # braces from marching off to the right. We will do this if the
+ # number of tokens in a weld before the broken weld is 4 or more.
+ # This rule will mainly be needed for test scripts, since typical
+ # welds have fewer than about 4 welded tokens.
+ if ( !@welds || @{ $welds[-1] } < 4 ) { next }
+ }
- # tell the -lp option we are outputting a batch so it can close
- # any unfinished items in its stack
- finish_lp_batch();
+ # otherwise start new weld ...
+ elsif ($starting_new_weld) {
+ $weld_count_this_start++;
+ if (DEBUG_WELD) {
+ $Msg .= "Starting new weld\n";
+ print $Msg;
+ }
+ push @welds, $item;
- # If this line ends in a code block brace, set breaks at any
- # previous closing code block braces to breakup a chain of code
- # blocks on one line. This is very rare but can happen for
- # user-defined subs. For example we might be looking at this:
- # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
- my $saw_good_break = 0; # flag to force breaks even if short line
- if (
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
- # looking for opening or closing block brace
- $block_type_to_go[$max_index_to_go]
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+ }
- # but not one of these which are never duplicated on a line:
- # until|while|for|if|elsif|else
- && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
- )
- {
- my $lev = $nesting_depth_to_go[$max_index_to_go];
+ # ... or extend current weld
+ else {
+ $weld_count_this_start++;
+ if (DEBUG_WELD) {
+ $Msg .= "Extending current weld\n";
+ print $Msg;
+ }
+ unshift @{ $welds[-1] }, $inner_seqno;
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
- # Walk backwards from the end and
- # set break at any closing block braces at the same level.
- # But quit if we are not in a chain of blocks.
- for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
- last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
- next if ( $levels_to_go[$i] > $lev ); # skip past higher level
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
+ }
- if ( $block_type_to_go[$i] ) {
- if ( $tokens_to_go[$i] eq '}' ) {
- set_forced_breakpoint($i);
- $saw_good_break = 1;
- }
+ # After welding, reduce the indentation level if all intermediate tokens
+ my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
+ if ( $dlevel != 0 ) {
+ my $Kstart = $Kinner_opening;
+ my $Kstop = $Kinner_closing;
+ for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
+ $rLL->[$KK]->[_LEVEL_] += $dlevel;
}
- # quit if we see anything besides words, function, blanks
- # at this level
- elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
+ # Copy opening ci level to help break at = for -lp mode (case b1124)
+ $rLL->[$Kinner_opening]->[_CI_LEVEL_] =
+ $rLL->[$Kouter_opening]->[_CI_LEVEL_];
+
+ # But do not copy the closing ci level ... it can give poor results
+ ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] =
+ ## $rLL->[$Kouter_closing]->[_CI_LEVEL_];
}
}
- my $imin = 0;
- my $imax = $max_index_to_go;
-
- # trim any blank tokens
- if ( $max_index_to_go >= 0 ) {
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- }
+ return;
+}
- # anything left to write?
- if ( $imin <= $imax ) {
+sub weld_nested_quotes {
- # add a blank line before certain key types but not after a comment
- if ( $last_line_leading_type !~ /^[#]/ ) {
- my $want_blank = 0;
- my $leading_token = $tokens_to_go[$imin];
- my $leading_type = $types_to_go[$imin];
+ # Called once per file for option '--weld-nested-containers'. This
+ # does welding on qw quotes.
- # blank lines before subs except declarations and one-liners
- if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( $self->terminal_type_i( $imin, $imax ) !~ /^[\;\}]$/ );
- }
+ my $self = shift;
- # break before all package declarations
- elsif ($leading_token =~ /^(package\s)/
- && $leading_type eq 'i' )
- {
- $want_blank = $rOpts->{'blank-lines-before-packages'};
- }
+ # See if quotes are excluded from welding
+ my $rflags = $weld_nested_exclusion_rules{'q'};
+ return if ( defined($rflags) && defined( $rflags->[1] ) );
- # break before certain key blocks except one-liners
- if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
- $want_blank = $rOpts->{'blank-lines-before-subs'}
- if ( $self->terminal_type_i( $imin, $imax ) ne '}' );
- }
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
- # Break before certain block types if we haven't had a
- # break at this level for a while. This is the
- # difficult decision..
- elsif ($leading_type eq 'k'
- && $last_line_leading_type ne 'b'
- && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
- {
- my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
- if ( !defined($lc) ) { $lc = 0 }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
- # patch for RT #128216: no blank line inserted at a level change
- if ( $levels_to_go[$imin] != $last_line_leading_level ) {
- $lc = 0;
- }
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rlines = $self->[_rlines_];
- $want_blank =
- $rOpts->{'blanks-before-blocks'}
- && $lc >= $rOpts->{'long-block-line-count'}
- && consecutive_nonblank_lines() >=
- $rOpts->{'long-block-line-count'}
- && $self->terminal_type_i( $imin, $imax ) ne '}';
- }
+ my $starting_lentot;
+ my $maximum_text_length;
- # Check for blank lines wanted before a closing brace
- if ( $leading_token eq '}' ) {
- if ( $rOpts->{'blank-lines-before-closing-block'}
- && $block_type_to_go[$imin]
- && $block_type_to_go[$imin] =~
- /$blank_lines_before_closing_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
- if ( $nblanks > $want_blank ) {
- $want_blank = $nblanks;
- }
- }
- }
+ my $is_single_quote = sub {
+ my ( $Kbeg, $Kend, $quote_type ) = @_;
+ foreach my $K ( $Kbeg .. $Kend ) {
+ my $test_type = $rLL->[$K]->[_TYPE_];
+ next if ( $test_type eq 'b' );
+ return if ( $test_type ne $quote_type );
+ }
+ return 1;
+ };
- if ($want_blank) {
+ # Length tolerance - same as previously used for sub weld_nested
+ my $multiline_tol =
+ 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
- # future: send blank line down normal path to VerticalAligner
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->require_blank_code_lines($want_blank);
- }
- }
+ # look for single qw quotes nested in containers
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$outer_seqno ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- # update blank line variables and count number of consecutive
- # non-blank, non-comment lines at this level
- $last_last_line_leading_level = $last_line_leading_level;
- $last_line_leading_level = $levels_to_go[$imin];
- if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
- $last_line_leading_type = $types_to_go[$imin];
- if ( $last_line_leading_level == $last_last_line_leading_level
- && $last_line_leading_type ne 'b'
- && $last_line_leading_type ne '#'
- && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
- {
- $nonblank_lines_at_depth[$last_line_leading_level]++;
- }
- else {
- $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $outer_seqno not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
}
- FORMATTER_DEBUG_FLAG_FLUSH && do {
- my ( $package, $file, $line ) = caller;
- print STDOUT
-"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
- };
-
- # add a couple of extra terminal blank tokens
- pad_array_to_go();
+ my $token = $rtoken_vars->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
- # set all forced breakpoints for good list formatting
- my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
+ # see if the next token is a quote of some type
+ my $Kn = $KK + 1;
+ $Kn += 1
+ if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ next unless ( $Kn < $Num );
- my $old_line_count_in_batch =
- $self->get_old_line_count( $K_to_go[0], $K_to_go[$max_index_to_go] );
+ my $next_token = $rLL->[$Kn]->[_TOKEN_];
+ my $next_type = $rLL->[$Kn]->[_TYPE_];
+ next
+ unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
+ && $next_token =~ /^q/ );
- if (
- $is_long_line
- || $old_line_count_in_batch > 1
+ # The token before the closing container must also be a quote
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
+ next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type;
- # must always call scan_list() with unbalanced batches because it
- # is maintaining some stacks
- || is_unbalanced_batch()
+ # This is an inner opening container
+ my $Kinner_opening = $Kn;
- # call scan_list if we might want to break at commas
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
+ # Do not weld to single-line quotes. Nothing is gained, and it may
+ # look bad.
+ next if ( $Kinner_closing == $Kinner_opening );
- # call scan_list if user may want to break open some one-line
- # hash references
- || ( $comma_arrow_count_contained
- && $rOpts_comma_arrow_breakpoints != 3 )
- )
- {
- ## This caused problems in one version of perl for unknown reasons:
- ## $saw_good_break ||= scan_list();
- my $sgb = scan_list();
- $saw_good_break ||= $sgb;
- }
+ # Only weld to quotes delimited with container tokens. This is
+ # because welding to arbitrary quote delimiters can produce code
+ # which is less readable than without welding.
+ my $closing_delimiter =
+ substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
+ next
+ unless ( $is_closing_token{$closing_delimiter}
+ || $closing_delimiter eq '>' );
- # let $ri_first and $ri_last be references to lists of
- # first and last tokens of line fragments to output..
- my ( $ri_first, $ri_last );
+ # Now make sure that there is just a single quote in the container
+ next
+ unless (
+ $is_single_quote->(
+ $Kinner_opening + 1,
+ $Kinner_closing - 1,
+ $next_type
+ )
+ );
- # write a single line if..
- if (
+ # OK: This is a candidate for welding
+ my $Msg = "";
+ my $do_not_weld;
+
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
+ my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
+ my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
+ my $is_old_weld =
+ ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
+
+ # Fix for case b1189. If quote is marked as type 'Q' then only weld
+ # if the two closing tokens are on the same input line. Otherwise,
+ # the closing line will be output earlier in the pipeline than
+ # other CODE lines and welding will not actually occur. This will
+ # leave a half-welded structure with potential formatting
+ # instability. This might be fixed by adding a check for a weld on
+ # a closing Q token and sending it down the normal channel, but it
+ # would complicate the code and is potentially risky.
+ next
+ if (!$is_old_weld
+ && $next_type eq 'Q'
+ && $iline_ic != $iline_oc );
- # we aren't allowed to add any newlines
- !$rOpts_add_newlines
+ # If welded, the line must not exceed allowed line length
+ ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg )
+ = $self->setup_new_weld_measurements( $Kouter_opening,
+ $Kinner_opening );
+ if ( !$ok_to_weld ) {
+ if (DEBUG_WELD) { print $msg}
+ next;
+ }
- # or, we don't already have an interior breakpoint
- # and we didn't see a good breakpoint
- || (
- !$forced_breakpoint_count
- && !$saw_good_break
+ my $length =
+ $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
+ my $excess = $length + $multiline_tol - $maximum_text_length;
- # and this line is 'short'
- && !$is_long_line
- )
- )
- {
- @{$ri_first} = ($imin);
- @{$ri_last} = ($imax);
- }
+ my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
+ if ( $excess >= $excess_max ) {
+ $do_not_weld = 1;
+ }
- # otherwise use multiple lines
- else {
+ if (DEBUG_WELD) {
+ if ( !$is_old_weld ) { $is_old_weld = "" }
+ $Msg .=
+"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
+ }
- ( $ri_first, $ri_last, my $colon_count ) =
- $self->set_continuation_breaks($saw_good_break);
+ # Check weld exclusion rules for outer container
+ if ( !$do_not_weld ) {
+ my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
+ if ( $self->is_excluded_weld( $KK, $is_leading ) ) {
+ if (DEBUG_WELD) {
+ $Msg .=
+"No qw weld due to weld exclusion rules for outer container\n";
+ }
+ $do_not_weld = 1;
+ }
+ }
- $self->break_all_chain_tokens( $ri_first, $ri_last );
+ # Check the length of the last line (fixes case b1039)
+ if ( !$do_not_weld ) {
+ my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
+ my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic};
+ my $excess_ic =
+ $self->excess_line_length_for_Krange( $Kfirst_ic,
+ $Kouter_closing );
+
+ # Allow extra space for additional welded closing container(s)
+ # and a space and comma or semicolon.
+ # NOTE: weld len has not been computed yet. Use 2 spaces
+ # for now, correct for a single weld. This estimate could
+ # be made more accurate if necessary.
+ my $weld_len =
+ defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
+ if ( $excess_ic + $weld_len + 2 > 0 ) {
+ if (DEBUG_WELD) {
+ $Msg .=
+"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
+ }
+ $do_not_weld = 1;
+ }
+ }
- break_equals( $ri_first, $ri_last );
+ if ($do_not_weld) {
+ if (DEBUG_WELD) {
+ $Msg .= "Not Welding QW\n";
+ print $Msg;
+ }
+ next;
+ }
- # now we do a correction step to clean this up a bit
- # (The only time we would not do this is for debugging)
- if ( $rOpts->{'recombine'} ) {
- ( $ri_first, $ri_last ) =
- recombine_breakpoints( $ri_first, $ri_last );
+ # OK to weld
+ if (DEBUG_WELD) {
+ $Msg .= "Welding QW\n";
+ print $Msg;
}
- $self->insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
- }
+ $rK_weld_right->{$Kouter_opening} = $Kinner_opening;
+ $rK_weld_left->{$Kinner_opening} = $Kouter_opening;
- # do corrector step if -lp option is used
- my $do_not_pad = 0;
- if ($rOpts_line_up_parentheses) {
- $do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
- }
- $self->unmask_phantom_semicolons( $ri_first, $ri_last );
- if ( $rOpts_one_line_block_semicolons == 0 ) {
- $self->delete_one_line_semicolons( $ri_first, $ri_last );
- }
+ $rK_weld_right->{$Kinner_closing} = $Kouter_closing;
+ $rK_weld_left->{$Kouter_closing} = $Kinner_closing;
- # The line breaks for this batch of code have been finalized. Now we
- # can to package the results for further processing. We will switch
- # from the local '_to_go' buffer arrays (i-index) back to the global
- # token arrays (K-index) at this point.
- my $rlines_K;
- my $index_error;
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- if ( $iend - $ibeg != $Kend - $Kbeg ) {
- $index_error = $n unless defined($index_error);
+ # Undo one indentation level if an extra level was added to this
+ # multiline quote
+ my $qw_seqno =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
+ if ( $qw_seqno
+ && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
+ {
+ foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
+ $rLL->[$K]->[_LEVEL_] -= 1;
+ }
+ $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
+ $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
}
- push @{$rlines_K},
- [ $Kbeg, $Kend, $forced_breakpoint_to_go[$iend] ];
- }
- # Check correctness of the mapping between the i and K token indexes
- if ( defined($index_error) ) {
+ # undo CI for other welded quotes
+ else {
- # Temporary debug code - should never get here
- for ( my $n = 0 ; $n < @{$ri_first} ; $n++ ) {
- my $ibeg = $ri_first->[$n];
- my $Kbeg = $K_to_go[$ibeg];
- my $iend = $ri_last->[$n];
- my $Kend = $K_to_go[$iend];
- my $idiff = $iend - $ibeg;
- my $Kdiff = $Kend - $Kbeg;
- print STDERR <<EOM;
-line $n, irange $ibeg-$iend = $idiff, Krange $Kbeg-$Kend = $Kdiff;
-EOM
+ foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
+ $rLL->[$K]->[_CI_LEVEL_] = 0;
+ }
}
- Fault("Index error at line $index_error; i and K ranges differ");
- }
-
- my $rbatch_hash = {
- rlines_K => $rlines_K,
- do_not_pad => $do_not_pad,
- ibeg0 => $ri_first->[0],
- };
-
- $self->send_lines_to_vertical_aligner($rbatch_hash);
- # Insert any requested blank lines after an opening brace. We have to
- # skip back before any side comment to find the terminal token
- my $iterm;
- for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
- next if $types_to_go[$iterm] eq '#';
- next if $types_to_go[$iterm] eq 'b';
- last;
- }
-
- # write requested number of blank lines after an opening block brace
- if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
- if ( $rOpts->{'blank-lines-after-opening-block'}
- && $block_type_to_go[$iterm]
- && $block_type_to_go[$iterm] =~
- /$blank_lines_after_opening_block_pattern/ )
- {
- my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->require_blank_code_lines($nblanks);
+ # Change the level of a closing qw token to be that of the outer
+ # containing token. This will allow -lp indentation to function
+ # correctly in the vertical aligner.
+ # Patch to fix c002: but not if it contains text
+ if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
+ $rLL->[$Kinner_closing]->[_LEVEL_] =
+ $rLL->[$Kouter_closing]->[_LEVEL_];
}
}
}
+ return;
+}
- prepare_for_new_input_lines();
+sub is_welded_at_seqno {
- return;
+ my ( $self, $seqno ) = @_;
+
+ # given a sequence number:
+ # return true if it is welded either left or right
+ # return false otherwise
+ return unless ( $total_weld_count && defined($seqno) );
+ my $KK_o = $self->[_K_opening_container_]->{$seqno};
+ return unless defined($KK_o);
+ return defined( $self->[_rK_weld_left_]->{$KK_o} )
+ || defined( $self->[_rK_weld_right_]->{$KK_o} );
}
-sub note_added_semicolon {
- my ($line_number) = @_;
- $last_added_semicolon_at = $line_number;
- if ( $added_semicolon_count == 0 ) {
- $first_added_semicolon_at = $last_added_semicolon_at;
- }
- $added_semicolon_count++;
- write_logfile_entry("Added ';' here\n");
- return;
-}
-
-sub note_deleted_semicolon {
- $last_deleted_semicolon_at = $input_line_number;
- if ( $deleted_semicolon_count == 0 ) {
- $first_deleted_semicolon_at = $last_deleted_semicolon_at;
- }
- $deleted_semicolon_count++;
- write_logfile_entry("Deleted unnecessary ';' at line $input_line_number\n");
- return;
-}
+sub mark_short_nested_blocks {
-sub note_embedded_tab {
- $embedded_tab_count++;
- $last_embedded_tab_at = $input_line_number;
- if ( !$first_embedded_tab_at ) {
- $first_embedded_tab_at = $last_embedded_tab_at;
- }
+ # This routine looks at the entire file and marks any short nested blocks
+ # which should not be broken. The results are stored in the hash
+ # $rshort_nested->{$type_sequence}
+ # which will be true if the container should remain intact.
+ #
+ # For example, consider the following line:
- if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry("Embedded tabs in quote or pattern\n");
- }
- return;
-}
+ # sub cxt_two { sort { $a <=> $b } test_if_list() }
-sub starting_one_line_block {
+ # The 'sort' block is short and nested within an outer sub block.
+ # Normally, the existence of the 'sort' block will force the sub block to
+ # break open, but this is not always desirable. Here we will set a flag for
+ # the sort block to prevent this. To give the user control, we will
+ # follow the input file formatting. If either of the blocks is broken in
+ # the input file then we will allow it to remain broken. Otherwise we will
+ # set a flag to keep it together in later formatting steps.
- # after seeing an opening curly brace, look for the closing brace
- # and see if the entire block will fit on a line. This routine is
- # not always right because it uses the old whitespace, so a check
- # is made later (at the closing brace) to make sure we really
- # have a one-line block. We have to do this preliminary check,
- # though, because otherwise we would always break at a semicolon
- # within a one-line block if the block contains multiple statements.
+ # The flag which is set here will be checked in two places:
+ # 'sub process_line_of_CODE' and 'sub starting_one_line_block'
- my ( $self, $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
- my $rbreak_container = $self->{rbreak_container};
- my $rshort_nested = $self->{rshort_nested};
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
- my $jmax_check = @{$rtoken_array};
- if ( $jmax_check < $jmax ) {
- Fault("jmax=$jmax > $jmax_check");
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # kill any current block - we can only go 1 deep
- destroy_one_line_block();
+ return unless ( $rOpts->{'one-line-block-nesting'} );
- # return value:
- # 1=distance from start of block to opening brace exceeds line length
- # 0=otherwise
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rlines = $self->[_rlines_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my $i_start = 0;
+ # Variables needed for estimating line lengths
+ my $maximum_text_length;
+ my $starting_lentot;
+ my $length_tol = 1;
- # shouldn't happen: there must have been a prior call to
- # store_token_to_go to put the opening brace in the output stream
- if ( $max_index_to_go < 0 ) {
- Fault("program bug: store_token_to_go called incorrectly\n");
- }
+ my $excess_length_to_K = sub {
+ my ($K) = @_;
- # return if block should be broken
- my $type_sequence = $rtoken_array->[$j]->[_TYPE_SEQUENCE_];
- if ( $rbreak_container->{$type_sequence} ) {
- return 0;
- }
+ # Estimate the length from the line start to a given token
+ my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
+ my $excess_length = $length + $length_tol - $maximum_text_length;
+ return ($excess_length);
+ };
- my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
+ my $is_broken_block = sub {
- # find the starting keyword for this block (such as 'if', 'else', ...)
+ # a block is broken if the input line numbers of the braces differ
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
- if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
- $i_start = $max_index_to_go;
- }
+ # loop over all containers
+ my @open_block_stack;
+ my $iline = -1;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
- # the previous nonblank token should start these block types
- elsif (( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub\b/ )
- || $block_type =~ /\(\)/ )
- {
- $i_start = $last_last_nonblank_index_to_go;
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
+ }
- # For signatures and extended syntax ...
- # If this brace follows a parenthesized list, we should look back to
- # find the keyword before the opening paren because otherwise we might
- # form a one line block which stays intack, and cause the parenthesized
- # expression to break open. That looks bad. However, actually
- # searching for the opening paren is slow and tedius.
- # The actual keyword is often at the start of a line, but might not be.
- # For example, we might have an anonymous sub with signature list
- # following a =>. It is safe to mark the start anywhere before the
- # opening paren, so we just go back to the prevoious break (or start of
- # the line) if that is before the opening paren. The minor downside is
- # that we may very occasionally break open a block unnecessarily.
- if ( $tokens_to_go[$i_start] eq ')' ) {
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
- my $lev = $levels_to_go[$i_start];
- if ( $lev > $level ) { return 0 }
+ # Patch: do not mark short blocks with welds.
+ # In some cases blinkers can form (case b690).
+ if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
+ next;
}
- }
- elsif ( $last_last_nonblank_token_to_go eq ')' ) {
+ # We are just looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+ next unless ( $rblock_type_of_seqno->{$type_sequence} );
- # For something like "if (xxx) {", the keyword "if" will be
- # just after the most recent break. This will be 0 unless
- # we have just killed a one-line block and are starting another.
- # (doif.t)
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
+ # Keep a stack of all acceptable block braces seen.
+ # Only consider blocks entirely on one line so dump the stack when line
+ # changes.
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline != $iline_last ) { @open_block_stack = () }
+
+ if ( $token eq '}' ) {
+ if (@open_block_stack) { pop @open_block_stack }
}
+ next unless ( $token eq '{' );
- # Patch to avoid breaking short blocks defined with extended_syntax:
- # Strip off any trailing () which was added in the parser to mark
- # the opening keyword. For example, in the following
- # create( TypeFoo $e) {$bubba}
- # the blocktype would be marked as create()
- my $stripped_block_type = $block_type;
- $stripped_block_type =~ s/\(\)$//;
+ # block must be balanced (bad scripts may be unbalanced)
+ my $K_opening = $K_opening_container->{$type_sequence};
+ my $K_closing = $K_closing_container->{$type_sequence};
+ next unless ( defined($K_opening) && defined($K_closing) );
- unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
- return 0;
- }
- }
+ # require that this block be entirely on one line
+ next if ( $is_broken_block->($type_sequence) );
- # patch for SWITCH/CASE to retain one-line case/when blocks
- elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+ # See if this block fits on one line of allowed length (which may
+ # be different from the input script)
+ $starting_lentot =
+ $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
+ $maximum_text_length =
+ $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
- # Note: cannot use inext_index_to_go[] here because that array
- # is still being constructed.
- $i_start = $index_max_forced_break + 1;
- if ( $types_to_go[$i_start] eq 'b' ) {
- $i_start++;
- }
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
- return 0;
+ # Dump the stack if block is too long and skip this block
+ if ( $excess_length_to_K->($K_closing) > 0 ) {
+ @open_block_stack = ();
+ next;
}
- }
- else {
- return 1;
- }
+ # OK, Block passes tests, remember it
+ push @open_block_stack, $type_sequence;
- my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
+ # We are only marking nested code blocks,
+ # so check for a previous block on the stack
+ next unless ( @open_block_stack > 1 );
- # see if length is too long to even start
- if ( $pos > maximum_line_length($i_start) ) {
- return 1;
- }
+ # Looks OK, mark this as a short nested block
+ $rshort_nested->{$type_sequence} = 1;
- foreach my $i ( $j + 1 .. $jmax ) {
+ }
+ return;
+}
- # old whitespace could be arbitrarily large, so don't use it
- if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
- else { $pos += rtoken_length($i) }
+sub adjust_indentation_levels {
- # ignore some small blocks
- my $type_sequence = $rtoken_array->[$i]->[_TYPE_SEQUENCE_];
- my $nobreak = $rshort_nested->{$type_sequence};
+ my ($self) = @_;
- # Return false result if we exceed the maximum line length,
- if ( $pos > maximum_line_length($i_start) ) {
- return 0;
- }
+ # Called once per file to do special indentation adjustments.
+ # These routines adjust levels either by changing _CI_LEVEL_ directly or
+ # by setting modified levels in the array $self->[_radjusted_levels_].
- # keep going for non-containers
- elsif ( !$type_sequence ) {
+ # Initialize the adjusted levels. These will be the levels actually used
+ # for computing indentation.
- }
+ # NOTE: This routine is called after the weld routines, which may have
+ # already adjusted _LEVEL_, so we are making adjustments on top of those
+ # levels. It would be much nicer to have the weld routines also use this
+ # adjustment, but that gets complicated when we combine -gnu -wn and have
+ # some welded quotes.
+ my $Klimit = $self->[_Klimit_];
+ my $rLL = $self->[_rLL_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
- # return if we encounter another opening brace before finding the
- # closing brace.
- elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
- && $rtoken_array->[$i]->[_TYPE_] eq '{'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_]
- && !$nobreak )
- {
- return 0;
- }
+ return unless ( defined($Klimit) );
- # if we find our closing brace..
- elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
- && $rtoken_array->[$i]->[_TYPE_] eq '}'
- && $rtoken_array->[$i]->[_BLOCK_TYPE_]
- && !$nobreak )
- {
+ foreach my $KK ( 0 .. $Klimit ) {
+ $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_];
+ }
- # be sure any trailing comment also fits on the line
- my $i_nonblank =
- ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
+ # First set adjusted levels for any non-indenting braces.
+ $self->non_indenting_braces();
- # Patch for one-line sort/map/grep/eval blocks with side comments:
- # We will ignore the side comment length for sort/map/grep/eval
- # because this can lead to statements which change every time
- # perltidy is run. Here is an example from Denis Moskowitz which
- # oscillates between these two states without this patch:
+ # Adjust breaks and indentation list containers
+ $self->break_before_list_opening_containers();
-## --------
-## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-##
-## grep {
-## $_->foo ne 'bar'
-## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
-## @baz;
-## --------
+ # Set adjusted levels for the whitespace cycle option.
+ $self->whitespace_cycle_adjustment();
- # When the first line is input it gets broken apart by the main
- # line break logic in sub print_line_of_tokens.
- # When the second line is input it gets recombined by
- # print_line_of_tokens and passed to the output routines. The
- # output routines (set_continuation_breaks) do not break it apart
- # because the bond strengths are set to the highest possible value
- # for grep/map/eval/sort blocks, so the first version gets output.
- # It would be possible to fix this by changing bond strengths,
- # but they are high to prevent errors in older versions of perl.
+ $self->braces_left_setup();
- if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
- && !$is_sort_map_grep{$block_type} )
- {
+ # Adjust continuation indentation if -bli is set
+ $self->bli_adjustment();
- $pos += rtoken_length($i_nonblank);
+ $self->extended_ci()
+ if ($rOpts_extended_continuation_indentation);
- if ( $i_nonblank > $i + 1 ) {
+ # Now clip any adjusted levels to be non-negative
+ $self->clip_adjusted_levels();
- # source whitespace could be anything, assume
- # at least one space before the hash on output
- if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
- $pos += 1;
- }
- else { $pos += rtoken_length( $i + 1 ) }
- }
+ return;
+}
- if ( $pos >= maximum_line_length($i_start) ) {
- return 0;
- }
- }
+sub clip_adjusted_levels {
- # ok, it's a one-line block
- create_one_line_block( $i_start, 20 );
- return 0;
- }
+ # Replace any negative adjusted levels with zero.
+ # Negative levels can occur in files with brace errors.
+ my ($self) = @_;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ return unless defined($radjusted_levels) && @{$radjusted_levels};
+ foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
+ return;
+}
- # just keep going for other characters
- else {
- }
- }
+sub non_indenting_braces {
- # Allow certain types of new one-line blocks to form by joining
- # input lines. These can be safely done, but for other block types,
- # we keep old one-line blocks but do not form new ones. It is not
- # always a good idea to make as many one-line blocks as possible,
- # so other types are not done. The user can always use -mangle.
- if ( $want_one_line_block{$block_type} ) {
- create_one_line_block( $i_start, 1 );
- }
- return 0;
-}
+ # Called once per file to handle the --non-indenting-braces parameter.
+ # Remove indentation within marked braces if requested
+ my ($self) = @_;
+ return unless ( $rOpts->{'non-indenting-braces'} );
-sub unstore_token_to_go {
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # remove most recent token from output stream
- my $self = shift;
- if ( $max_index_to_go > 0 ) {
- $max_index_to_go--;
- }
- else {
- $max_index_to_go = UNDEFINED_INDEX;
+ my $Klimit = $self->[_Klimit_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+
+ # First locate all of the marked blocks
+ my @K_stack;
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+ my $KK = $K_opening_container->{$seqno};
+
+ # followed by a comment
+ my $K_sc = $KK + 1;
+ $K_sc += 1
+ if ( $K_sc <= $Klimit && $rLL->[$K_sc]->[_TYPE_] eq 'b' );
+ next unless ( $K_sc <= $Klimit );
+ my $type_sc = $rLL->[$K_sc]->[_TYPE_];
+ next unless ( $type_sc eq '#' );
+
+ # on the same line
+ my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
+ my $line_index_sc = $rLL->[$K_sc]->[_LINE_INDEX_];
+ next unless ( $line_index_sc == $line_index );
+
+ # get the side comment text
+ my $token_sc = $rLL->[$K_sc]->[_TOKEN_];
+
+ # The pattern ends in \s but we have removed the newline, so
+ # we added it back for the match. That way we require an exact
+ # match to the special string and also allow additional text.
+ $token_sc .= "\n";
+ next unless ( $token_sc =~ /$non_indenting_brace_pattern/ );
+ $rspecial_side_comment_type->{$K_sc} = 'NIB';
+ push @K_stack, [ $KK, 1 ];
+ my $Kc = $K_closing_container->{$seqno};
+ push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
+ }
+ return unless (@K_stack);
+ @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
+
+ # Then loop to remove indentation within marked blocks
+ my $KK_last = 0;
+ my $ndeep = 0;
+ foreach my $item (@K_stack) {
+ my ( $KK, $inc ) = @{$item};
+ if ( $ndeep > 0 ) {
+
+ foreach ( $KK_last + 1 .. $KK ) {
+ $radjusted_levels->[$_] -= $ndeep;
+ }
+
+ # We just subtracted the old $ndeep value, which only applies to a
+ # '{'. The new $ndeep applies to a '}', so we undo the error.
+ if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
+ }
+
+ $ndeep += $inc;
+ $KK_last = $KK;
}
return;
}
-sub want_blank_line {
+sub whitespace_cycle_adjustment {
+
my $self = shift;
- $self->flush();
- $file_writer_object->want_blank_line();
- return;
-}
-sub write_unindented_line {
- my ( $self, $line ) = @_;
- $self->flush();
- $file_writer_object->write_line($line);
- return;
-}
+ # Called once per file to implement the --whitespace-cycle option
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $maximum_level = $self->[_maximum_level_];
-sub undo_ci {
-
- # Undo continuation indentation in certain sequences
- # For example, we can undo continuation indentation in sort/map/grep chains
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- # To align the map/sort/grep keywords like this:
- # my $dat1 = pack( "n*",
- # map { $_, $lookup->{$_} }
- # sort { $a <=> $b }
- # grep { $lookup->{$_} ne $default } keys %$lookup );
- my ( $self, $ri_first, $ri_last ) = @_;
- my ( $line_1, $line_2, $lev_last );
- my $this_line_is_semicolon_terminated;
- my $max_line = @{$ri_first} - 1;
+ if ( $rOpts_whitespace_cycle
+ && $rOpts_whitespace_cycle > 0
+ && $rOpts_whitespace_cycle < $maximum_level )
+ {
- # looking at each line of this batch..
- # We are looking at leading tokens and looking for a sequence
- # all at the same level and higher level than enclosing lines.
- foreach my $line ( 0 .. $max_line ) {
+ my $Kmax = @{$rLL} - 1;
- my $ibeg = $ri_first->[$line];
- my $lev = $levels_to_go[$ibeg];
- if ( $line > 0 ) {
+ my $whitespace_last_level = -1;
+ my @whitespace_level_stack = ();
+ my $last_nonblank_type = 'b';
+ my $last_nonblank_token = '';
+ foreach my $KK ( 0 .. $Kmax ) {
+ my $level_abs = $radjusted_levels->[$KK];
+ my $level = $level_abs;
+ if ( $level_abs < $whitespace_last_level ) {
+ pop(@whitespace_level_stack);
+ }
+ if ( !@whitespace_level_stack ) {
+ push @whitespace_level_stack, $level_abs;
+ }
+ elsif ( $level_abs > $whitespace_last_level ) {
+ $level = $whitespace_level_stack[-1] +
+ ( $level_abs - $whitespace_last_level );
+
+ if (
+ # 1 Try to break at a block brace
+ (
+ $level > $rOpts_whitespace_cycle
+ && $last_nonblank_type eq '{'
+ && $last_nonblank_token eq '{'
+ )
- # if we have started a chain..
- if ($line_1) {
+ # 2 Then either a brace or bracket
+ || ( $level > $rOpts_whitespace_cycle + 1
+ && $last_nonblank_token =~ /^[\{\[]$/ )
- # see if it continues..
- if ( $lev == $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
+ # 3 Then a paren too
+ || $level > $rOpts_whitespace_cycle + 2
+ )
+ {
+ $level = 1;
+ }
+ push @whitespace_level_stack, $level;
+ }
+ $level = $whitespace_level_stack[-1];
+ $radjusted_levels->[$KK] = $level;
+
+ $whitespace_last_level = $level_abs;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $type ne 'b' ) {
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ }
+ }
+ }
+ return;
+}
- # chain continues...
- # check for chain ending at end of a statement
- if ( $line == $max_line ) {
+use constant DEBUG_BBX => 0;
- # see of this line ends a statement
- my $iend = $ri_last->[$line];
- $this_line_is_semicolon_terminated =
- $types_to_go[$iend] eq ';'
+sub break_before_list_opening_containers {
- # with possible side comment
- || ( $types_to_go[$iend] eq '#'
- && $iend - $ibeg >= 2
- && $types_to_go[ $iend - 2 ] eq ';'
- && $types_to_go[ $iend - 1 ] eq 'b' );
- }
- $line_2 = $line if ($this_line_is_semicolon_terminated);
- }
- else {
+ my ($self) = @_;
- # kill chain
- $line_1 = undef;
- }
- }
- elsif ( $lev < $lev_last ) {
+ # This routine is called once per batch to implement parameters
+ # --break-before-hash-brace=n and similar -bbx=n flags
+ # and their associated indentation flags:
+ # --break-before-hash-brace-and-indent and similar -bbxi=n
- # chain ends with previous line
- $line_2 = $line - 1;
- }
- elsif ( $lev > $lev_last ) {
+ # Nothing to do if none of the -bbx=n parameters has been set
+ return unless %break_before_container_types;
- # kill chain
- $line_1 = undef;
- }
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # undo the continuation indentation if a chain ends
- if ( defined($line_2) && defined($line_1) ) {
- my $continuation_line_count = $line_2 - $line_1 + 1;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
- (0) x ($continuation_line_count)
- if ( $continuation_line_count >= 0 );
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
- = @reduced_spaces_to_go[ @{$ri_first}
- [ $line_1 .. $line_2 ] ];
- $line_1 = undef;
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $rhas_list = $self->[_rhas_list_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
+ my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ my $rlines = $self->[_rlines_];
+ my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
+ my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
+ my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my $length_tol =
+ max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
+ if ($rOpts_ignore_old_breakpoints) {
+
+ # Patch suggested by b1231; the old tol was excessive.
+ ## $length_tol += $rOpts_maximum_line_length;
+ $length_tol *= 2;
+ }
+
+ my $rbreak_before_container_by_seqno = {};
+ my $rwant_reduced_ci = {};
+ foreach my $seqno ( keys %{$K_opening_container} ) {
+
+ #----------------------------------------------------------------
+ # Part 1: Examine any -bbx=n flags
+ #----------------------------------------------------------------
+
+ next if ( $rblock_type_of_seqno->{$seqno} );
+ my $KK = $K_opening_container->{$seqno};
+
+ # This must be a list or contain a list.
+ # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
+ # Note2: 'has_list' holds the depth to the sub-list. We will require
+ # a depth of just 1
+ my $is_list = $self->is_list_by_seqno($seqno);
+ my $has_list = $rhas_list->{$seqno};
+
+ # Fix for b1173: if welded opening container, use flag of innermost
+ # seqno. Otherwise, the restriction $has_list==1 prevents triple and
+ # higher welds from following the -BBX parameters.
+ if ($total_weld_count) {
+ my $KK_test = $rK_weld_right->{$KK};
+ if ( defined($KK_test) ) {
+ my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
+ $is_list ||= $self->is_list_by_seqno($seqno_inner);
+ $has_list = $rhas_list->{$seqno_inner};
+ }
+ }
+
+ next unless ( $is_list || $has_list && $has_list == 1 );
+
+ my $has_broken_list = $rhas_broken_list->{$seqno};
+ my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
+
+ # Only for types of container tokens with a non-default break option
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $break_option = $break_before_container_types{$token};
+ next unless ($break_option);
+
+ # Do not use -bbx under stress for stability ... fixes b1300
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ if ( $level >= $stress_level_beta ) {
+ DEBUG_BBX
+ && print
+"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
+ next;
+ }
+
+ # Require previous nonblank to be '=' or '=>'
+ my $Kprev = $KK - 1;
+ next if ( $Kprev < 0 );
+ my $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ if ( $prev_type eq 'b' ) {
+ $Kprev--;
+ next if ( $Kprev < 0 );
+ $prev_type = $rLL->[$Kprev]->[_TYPE_];
+ }
+ next unless ( $is_equal_or_fat_comma{$prev_type} );
+
+ my $ci = $rLL->[$KK]->[_CI_LEVEL_];
+
+ #--------------------------------------------
+ # New coding for option 2 (break if complex).
+ #--------------------------------------------
+ # This new coding uses clues which are invariant under formatting to
+ # decide if a list is complex. For now it is only applied when -lp
+ # and -vmll are used, but eventually it may become the standard method.
+ # Fixes b1274, b1275, and others, including b1099.
+ if ( $break_option == 2 ) {
+
+ if ( $rOpts_line_up_parentheses
+ || $rOpts_variable_maximum_line_length )
+ {
+
+ # Start with the basic definition of a complex list...
+ my $is_complex = $is_list && $has_list;
+
+ # and it is also complex if the parent is a list
+ if ( !$is_complex ) {
+ my $parent = $rparent_of_seqno->{$seqno};
+ if ( $self->is_list_by_seqno($parent) ) {
+ $is_complex = 1;
+ }
}
- }
- # not in a chain yet..
- else {
+ # finally, we will call it complex if there are inner opening
+ # and closing container tokens, not parens, within the outer
+ # container tokens.
+ if ( !$is_complex ) {
+ my $Kp = $self->K_next_nonblank($KK);
+ my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
+ if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
+
+ my $Kc = $K_closing_container->{$seqno};
+ my $Km = $self->K_previous_nonblank($Kc);
+ my $token_m =
+ defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+
+ # ignore any optional ending comma
+ if ( $token_m eq ',' ) {
+ $Km = $self->K_previous_nonblank($Km);
+ $token_m =
+ defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
+ }
- # look for start of a new sort/map/grep chain
- if ( $lev > $lev_last ) {
- if ( $types_to_go[$ibeg] eq 'k'
- && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
- {
- $line_1 = $line;
+ $is_complex ||=
+ $is_closing_token{$token_m} && $token_m ne ')';
}
}
+
+ # Convert to option 3 (always break) if complex
+ next unless ($is_complex);
+ $break_option = 3;
}
}
- $lev_last = $lev;
- }
- return;
-}
-sub undo_lp_ci {
+ # Fix for b1231: the has_list_with_lec does not cover all cases.
+ # A broken container containing a list and with line-ending commas
+ # will stay broken, so can be treated as if it had a list with lec.
+ $has_list_with_lec ||=
+ $has_list
+ && $ris_broken_container->{$seqno}
+ && $rlec_count_by_seqno->{$seqno};
- # If there is a single, long parameter within parens, like this:
- #
- # $self->command( "/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?" );
- #
- # we can remove the continuation indentation of the 2nd and higher lines
- # to achieve this effect, which is more pleasing:
- #
- # $self->command("/msg "
- # . $infoline->chan
- # . " You said $1, but did you know that it's square was "
- # . $1 * $1 . " ?");
+ DEBUG_BBX
+ && print STDOUT
+"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
- my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
- my $max_line = @{$ri_first} - 1;
+ # -bbx=1 = stable, try to follow input
+ if ( $break_option == 1 ) {
- # must be multiple lines
- return unless $max_line > $line_open;
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ my $rK_range = $rlines->[$iline]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless ( $KK == $Kfirst );
+ }
- my $lev_start = $levels_to_go[$i_start];
- my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
+ # -bbx=2 => apply this style only for a 'complex' list
+ elsif ( $break_option == 2 ) {
- # see if all additional lines in this container have continuation
- # indentation
- my $n;
- my $line_1 = 1 + $line_open;
- for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- if ( $ibeg eq $closing_index ) { $n--; last }
- return if ( $lev_start != $levels_to_go[$ibeg] );
- return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
- last if ( $closing_index <= $iend );
- }
+ # break if this list contains a broken list with line-ending comma
+ my $ok_to_break;
+ my $Msg = "";
+ if ($has_list_with_lec) {
+ $ok_to_break = 1;
+ DEBUG_BBX && do { $Msg = "has list with lec;" };
+ }
- # we can reduce the indentation of all continuation lines
- my $continuation_line_count = $n - $line_open;
- @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- (0) x ($continuation_line_count);
- @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
- @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
- return;
-}
+ if ( !$ok_to_break ) {
-sub pad_token {
+ # Turn off -xci if -bbx=2 and this container has a sublist but
+ # not a broken sublist. This avoids creating blinkers. The
+ # problem is that -xci can cause one-line lists to break open,
+ # and thereby creating formatting instability.
+ # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
+ # b1045 b1046 b1047 b1051 b1052 b1061.
+ if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
- # insert $pad_spaces before token number $ipad
- my ( $self, $ipad, $pad_spaces ) = @_;
- my $rLL = $self->{rLL};
- if ( $pad_spaces > 0 ) {
- $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
- }
- elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
- $tokens_to_go[$ipad] = "";
- }
- else {
+ my $parent = $rparent_of_seqno->{$seqno};
+ if ( $self->is_list_by_seqno($parent) ) {
+ DEBUG_BBX && do { $Msg = "parent is list" };
+ $ok_to_break = 1;
+ }
+ }
- # shouldn't happen
- return;
- }
+ if ( !$ok_to_break ) {
+ DEBUG_BBX
+ && print STDOUT "Not breaking at seqno=$seqno: $Msg\n";
+ next;
+ }
- # Keep token arrays in sync
- $self->sync_token_K($ipad);
+ DEBUG_BBX
+ && print STDOUT "OK to break at seqno=$seqno: $Msg\n";
- $token_lengths_to_go[$ipad] += $pad_spaces;
- foreach my $i ( $ipad .. $max_index_to_go ) {
- $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
- }
- return;
-}
+ # Patch: turn off -xci if -bbx=2 and -lp
+ # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
+ $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
+ }
-{
- my %is_math_op;
+ # -bbx=3 = always break
+ elsif ( $break_option == 3 ) {
- BEGIN {
+ # ok to break
+ }
- my @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
- }
+ # Shouldn't happen! Bad flag, but make behavior same as 3
+ else {
+ # ok to break
+ }
- sub set_logical_padding {
+ # Set a flag for actual implementation later in
+ # sub insert_breaks_before_list_opening_containers
+ $rbreak_before_container_by_seqno->{$seqno} = 1;
+ DEBUG_BBX
+ && print STDOUT "BBX: ok to break at seqno=$seqno\n";
- # Look at a batch of lines and see if extra padding can improve the
- # alignment when there are certain leading operators. Here is an
- # example, in which some extra space is introduced before
- # '( $year' to make it line up with the subsequent lines:
- #
- # if ( ( $Year < 1601 )
- # || ( $Year > 2899 )
- # || ( $EndYear < 1601 )
- # || ( $EndYear > 2899 ) )
- # {
- # &Error_OutOfRange;
- # }
- #
- my ( $self, $ri_first, $ri_last ) = @_;
- my $max_line = @{$ri_first} - 1;
+ # -bbxi=0: Nothing more to do if the ci value remains unchanged
+ my $ci_flag = $container_indentation_options{$token};
+ next unless ($ci_flag);
- # FIXME: move these declarations below
- my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
- $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
+ # -bbxi=1: This option removes ci and is handled in
+ # later sub final_indentation_adjustment
+ if ( $ci_flag == 1 ) {
+ $rwant_reduced_ci->{$seqno} = 1;
+ next;
+ }
- # looking at each line of this batch..
- foreach my $line ( 0 .. $max_line - 1 ) {
+ # -bbxi=2 ...
+
+ #----------------------------------------------------------------
+ # Part 2: Perform tests before committing to changing ci and level
+ #----------------------------------------------------------------
+
+ # Before changing the ci level of the opening container, we need
+ # to be sure that the container will be broken in the later stages of
+ # formatting. We have to do this because we are working early in the
+ # formatting pipeline. A problem can occur if we change the ci or
+ # level of the opening token but do not actually break the container
+ # open as expected. In most cases it wouldn't make any difference if
+ # we changed ci or not, but there are some edge cases where this
+ # can cause blinking states, so we need to try to only change ci if
+ # the container will really be broken.
+
+ # Only consider containers already broken
+ next if ( !$ris_broken_container->{$seqno} );
+
+ # Patch to fix issue b1305: the combination of -naws and ci>i appears
+ # to cause an instability. It should almost never occur in practice.
+ next
+ if (!$rOpts_add_whitespace
+ && $rOpts_continuation_indentation > $rOpts_indent_columns );
+
+ # Always ok to change ci for permanently broken containers
+ if ( $ris_permanently_broken->{$seqno} ) {
+ goto OK;
+ }
+
+ # Always OK if this list contains a broken sub-container with
+ # a non-terminal line-ending comma
+ if ($has_list_with_lec) { goto OK }
+
+ # From here on we are considering a single container...
+
+ # A single container must have at least 1 line-ending comma:
+ next unless ( $rlec_count_by_seqno->{$seqno} );
+
+ # Since it has a line-ending comma, it will stay broken if the -boc
+ # flag is set
+ if ($rOpts_break_at_old_comma_breakpoints) { goto OK }
+
+ # OK if the container contains multiple fat commas
+ # Better: multiple lines with fat commas
+ if ( !$rOpts_ignore_old_breakpoints ) {
+ my $rtype_count = $rtype_count_by_seqno->{$seqno};
+ next unless ($rtype_count);
+ my $fat_comma_count = $rtype_count->{'=>'};
+ DEBUG_BBX
+ && print STDOUT "BBX: fat comma count=$fat_comma_count\n";
+ if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK }
+ }
+
+ # The last check we can make is to see if this container could fit on a
+ # single line. Use the least possble indentation in the estmate (ci=0),
+ # so we are not subtracting $ci * $rOpts_continuation_indentation from
+ # tablulated $maximum_text_length value.
+ my $maximum_text_length = $maximum_text_length_at_level[$level];
+ my $K_closing = $K_closing_container->{$seqno};
+ my $length = $self->cumulative_length_before_K($K_closing) -
+ $self->cumulative_length_before_K($KK);
+ my $excess_length = $length - $maximum_text_length;
+ DEBUG_BBX
+ && print STDOUT
+"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
- # see if the next line begins with a logical operator
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
- $ibeg_next = $ri_first->[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $type_next = $types_to_go[$ibeg_next];
+ # OK if the net container definitely breaks on length
+ if ( $excess_length > $length_tol ) {
+ DEBUG_BBX
+ && print STDOUT "BBX: excess_length=$excess_length\n";
+ goto OK;
+ }
- $has_leading_op_next = ( $tok_next =~ /^\w/ )
- ? $is_chain_operator{$tok_next} # + - * / : ? && ||
- : $is_chain_operator{$type_next}; # and, or
+ # Otherwise skip it
+ next;
- next unless ($has_leading_op_next);
+ #################################################################
+ # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
+ #################################################################
- # next line must not be at lesser depth
- next
- if ( $nesting_depth_to_go[$ibeg] >
- $nesting_depth_to_go[$ibeg_next] );
+ OK:
- # identify the token in this line to be padded on the left
- $ipad = undef;
+ DEBUG_BBX && print STDOUT "BBX: OK to break\n";
- # handle lines at same depth...
- if ( $nesting_depth_to_go[$ibeg] ==
- $nesting_depth_to_go[$ibeg_next] )
- {
+ # -bbhbi=n
+ # -bbsbi=n
+ # -bbpi=n
- # if this is not first line of the batch ...
- if ( $line > 0 ) {
+ # where:
- # and we have leading operator..
- next if $has_leading_op;
+ # n=0 default indentation (usually one ci)
+ # n=1 outdent one ci
+ # n=2 indent one level (minus one ci)
+ # n=3 indent one extra ci [This may be dropped]
- # Introduce padding if..
- # 1. the previous line is at lesser depth, or
- # 2. the previous line ends in an assignment
- # 3. the previous line ends in a 'return'
- # 4. the previous line ends in a comma
- # Example 1: previous line at lesser depth
- # if ( ( $Year < 1601 ) # <- we are here but
- # || ( $Year > 2899 ) # list has not yet
- # || ( $EndYear < 1601 ) # collapsed vertically
- # || ( $EndYear > 2899 ) )
- # {
- #
- # Example 2: previous line ending in assignment:
- # $leapyear =
- # $year % 4 ? 0 # <- We are here
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- #
- # Example 3: previous line ending in comma:
- # push @expr,
- # /test/ ? undef
- # : eval($_) ? 1
- # : eval($_) ? 1
- # : 0;
+ # NOTE: We are adjusting indentation of the opening container. The
+ # closing container will normally follow the indentation of the opening
+ # container automatically, so this is not currently done.
+ next unless ($ci);
- # be sure levels agree (do not indent after an indented 'if')
- next
- if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+ # option 1: outdent
+ if ( $ci_flag == 1 ) {
+ $ci -= 1;
+ }
- # allow padding on first line after a comma but only if:
- # (1) this is line 2 and
- # (2) there are at more than three lines and
- # (3) lines 3 and 4 have the same leading operator
- # These rules try to prevent padding within a long
- # comma-separated list.
- my $ok_comma;
- if ( $types_to_go[$iendm] eq ','
- && $line == 1
- && $max_line > 2 )
- {
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- my $tok_next_next = $tokens_to_go[$ibeg_next_next];
- $ok_comma = $tok_next_next eq $tok_next;
- }
+ # option 2: indent one level
+ elsif ( $ci_flag == 2 ) {
+ $ci -= 1;
+ $radjusted_levels->[$KK] += 1;
+ }
- next
- unless (
- $is_assignment{ $types_to_go[$iendm] }
- || $ok_comma
- || ( $nesting_depth_to_go[$ibegm] <
- $nesting_depth_to_go[$ibeg] )
- || ( $types_to_go[$iendm] eq 'k'
- && $tokens_to_go[$iendm] eq 'return' )
- );
+ # unknown option
+ else {
+ # Shouldn't happen - leave ci unchanged
+ }
- # we will add padding before the first token
- $ipad = $ibeg;
- }
+ $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
+ }
- # for first line of the batch..
- else {
+ $self->[_rbreak_before_container_by_seqno_] =
+ $rbreak_before_container_by_seqno;
+ $self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
+ return;
+}
- # WARNING: Never indent if first line is starting in a
- # continued quote, which would change the quote.
- next if $starting_in_quote;
+use constant DEBUG_XCI => 0;
- # if this is text after closing '}'
- # then look for an interior token to pad
- if ( $types_to_go[$ibeg] eq '}' ) {
+sub extended_ci {
- }
+ # This routine implements the -xci (--extended-continuation-indentation)
+ # flag. We add CI to interior tokens of a container which itself has CI but
+ # only if a token does not already have CI.
- # otherwise, we might pad if it looks really good
- else {
+ # To do this, we will locate opening tokens which themselves have
+ # continuation indentation (CI). We track them with their sequence
+ # numbers. These sequence numbers are called 'controlling sequence
+ # numbers'. They apply continuation indentation to the tokens that they
+ # contain. These inner tokens remember their controlling sequence numbers.
+ # Later, when these inner tokens are output, they have to see if the output
+ # lines with their controlling tokens were output with CI or not. If not,
+ # then they must remove their CI too.
- # we might pad token $ibeg, so be sure that it
- # is at the same depth as the next line.
- next
- if ( $nesting_depth_to_go[$ibeg] !=
- $nesting_depth_to_go[$ibeg_next] );
+ # The controlling CI concept works hierarchically. But CI itself is not
+ # hierarchical; it is either on or off. There are some rare instances where
+ # it would be best to have hierarchical CI too, but not enough to be worth
+ # the programming effort.
- # We can pad on line 1 of a statement if at least 3
- # lines will be aligned. Otherwise, it
- # can look very confusing.
+ # The operations to remove unwanted CI are done in sub 'undo_ci'.
- # We have to be careful not to pad if there are too few
- # lines. The current rule is:
- # (1) in general we require at least 3 consecutive lines
- # with the same leading chain operator token,
- # (2) but an exception is that we only require two lines
- # with leading colons if there are no more lines. For example,
- # the first $i in the following snippet would get padding
- # by the second rule:
- #
- # $i == 1 ? ( "First", "Color" )
- # : $i == 2 ? ( "Then", "Rarity" )
- # : ( "Then", "Name" );
-
- if ( $max_line > 1 ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $tokens_differ;
+ my ($self) = @_;
- # never indent line 1 of a '.' series because
- # previous line is most likely at same level.
- # TODO: we should also look at the leasing_spaces
- # of the last output line and skip if it is same
- # as this line.
- next if ( $leading_token eq '.' );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- my $count = 1;
- foreach my $l ( 2 .. 3 ) {
- last if ( $line + $l > $max_line );
- my $ibeg_next_next = $ri_first->[ $line + $l ];
- if ( $tokens_to_go[$ibeg_next_next] ne
- $leading_token )
- {
- $tokens_differ = 1;
- last;
- }
- $count++;
- }
- next if ($tokens_differ);
- next if ( $count < 3 && $leading_token ne ':' );
- $ipad = $ibeg;
- }
- else {
- next;
- }
- }
- }
- }
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+ my $rlines = $self->[_rlines_];
+ my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my %available_space;
+
+ # Loop over all opening container tokens
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my @seqno_stack;
+ my $seqno_top;
+ my $KLAST;
+ my $KNEXT = $self->[_K_first_seq_item_];
+
+ # The following variable can be used to allow a little extra space to
+ # avoid blinkers. A value $len_tol = 20 fixed the following
+ # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
+ # It turned out that the real problem was misparsing a list brace as
+ # a code block in a 'use' statement when the line length was extremely
+ # small. A value of 0 works now, but a slightly larger value can
+ # be used to minimize the chance of a blinker.
+ my $len_tol = 0;
- # find interior token to pad if necessary
- if ( !defined($ipad) ) {
+ while ( defined($KNEXT) ) {
- for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
+ # Fix all tokens up to the next sequence item if we are changing CI
+ if ($seqno_top) {
- # find any unclosed container
- next
- unless ( $type_sequence_to_go[$i]
- && $self->mate_index_to_go($i) > $iend );
+ my $is_list = $ris_list_by_seqno->{$seqno_top};
+ my $space = $available_space{$seqno_top};
+ my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_];
+ my $count = 0;
+ for ( my $Kt = $KLAST + 1 ; $Kt < $KNEXT ; $Kt++ ) {
- # find next nonblank token to pad
- $ipad = $inext_to_go[$i];
- last if ( $ipad > $iend );
+ # But do not include tokens which might exceed the line length
+ # and are not in a list.
+ # ... This fixes case b1031
+ my $length_before = $length;
+ $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_];
+ if (
+ !$rLL->[$Kt]->[_CI_LEVEL_]
+ && ( $is_list
+ || $length - $length_before < $space
+ || $rLL->[$Kt]->[_TYPE_] eq '#' )
+ )
+ {
+ $rLL->[$Kt]->[_CI_LEVEL_] = 1;
+ $rseqno_controlling_my_ci->{$Kt} = $seqno_top;
+ $count++;
}
- last unless $ipad;
}
+ $ris_seqno_controlling_ci->{$seqno_top} += $count;
+ }
- # We cannot pad the first leading token of a file because
- # it could cause a bug in which the starting indentation
- # level is guessed incorrectly each time the code is run
- # though perltidy, thus causing the code to march off to
- # the right. For example, the following snippet would have
- # this problem:
+ $KLAST = $KNEXT;
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
-## ov_method mycan( $package, '(""' ), $package
-## or ov_method mycan( $package, '(0+' ), $package
-## or ov_method mycan( $package, '(bool' ), $package
-## or ov_method mycan( $package, '(nomethod' ), $package;
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$seqno};
- # If this snippet is within a block this won't happen
- # unless the user just processes the snippet alone within
- # an editor. In that case either the user will see and
- # fix the problem or it will be corrected next time the
- # entire file is processed with perltidy.
- next if ( $ipad == 0 && $peak_batch_size <= 1 );
+ # see if we have reached the end of the current controlling container
+ if ( $seqno_top && $seqno == $seqno_top ) {
+ $seqno_top = pop @seqno_stack;
+ }
-## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
-## IT DID MORE HARM THAN GOOD
-## ceil(
-## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
-## / $upem
-## ),
-##? # do not put leading padding for just 2 lines of math
-##? if ( $ipad == $ibeg
-##? && $line > 0
-##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
-##? && $is_math_op{$type_next}
-##? && $line + 2 <= $max_line )
-##? {
-##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
-##? my $type_next_next = $types_to_go[$ibeg_next_next];
-##? next if !$is_math_op{$type_next_next};
-##? }
+ # Patch to fix some block types...
+ # Certain block types arrive from the tokenizer without CI but should
+ # have it for this option. These include anonymous subs and
+ # do sort map grep eval
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ( $block_type && $is_block_with_ci{$block_type} ) {
+ $rLL->[$KK]->[_CI_LEVEL_] = 1;
+ if ($seqno_top) {
+ $rseqno_controlling_my_ci->{$KK} = $seqno_top;
+ $ris_seqno_controlling_ci->{$seqno_top}++;
+ }
+ }
- # next line must not be at greater depth
- my $iend_next = $ri_last->[ $line + 1 ];
- next
- if ( $nesting_depth_to_go[ $iend_next + 1 ] >
- $nesting_depth_to_go[$ipad] );
+ # If this does not have ci, update ci if necessary and continue looking
+ if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
+ if ($seqno_top) {
+ $rLL->[$KK]->[_CI_LEVEL_] = 1;
+ $rseqno_controlling_my_ci->{$KK} = $seqno_top;
+ $ris_seqno_controlling_ci->{$seqno_top}++;
+ }
+ next;
+ }
- # lines must be somewhat similar to be padded..
- my $inext_next = $inext_to_go[$ibeg_next];
- my $type = $types_to_go[$ipad];
- my $type_next = $types_to_go[ $ipad + 1 ];
+ # Skip if requested by -bbx to avoid blinkers
+ if ( $rno_xci_by_seqno->{$seqno} ) {
+ next;
+ }
- # see if there are multiple continuation lines
- my $logical_continuation_lines = 1;
- if ( $line + 2 <= $max_line ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $ibeg_next_next = $ri_first->[ $line + 2 ];
- if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
- && $nesting_depth_to_go[$ibeg_next] eq
- $nesting_depth_to_go[$ibeg_next_next] )
- {
- $logical_continuation_lines++;
- }
- }
+ # Skip if this is a -bli container (this fixes case b1065) Note: case
+ # b1065 is also fixed by the update for b1055, so this update is not
+ # essential now. But there does not seem to be a good reason to add
+ # xci and bli together, so the update is retained.
+ if ( $ris_bli_container->{$seqno} ) {
+ next;
+ }
- # see if leading types match
- my $types_match = $types_to_go[$inext_next] eq $type;
- my $matches_without_bang;
+ # We are looking for opening container tokens with ci
+ next unless ( defined($K_opening) && $KK == $K_opening );
- # if first line has leading ! then compare the following token
- if ( !$types_match && $type eq '!' ) {
- $types_match = $matches_without_bang =
- $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
- }
+ # Make sure there is a corresponding closing container
+ # (could be missing if the script has a brace error)
+ my $K_closing = $K_closing_container->{$seqno};
+ next unless defined($K_closing);
- if (
+ # Require different input lines. This will filter out a large number
+ # of small hash braces and array brackets. If we accidentally filter
+ # out an important container, it will get fixed on the next pass.
+ if (
+ $rLL->[$K_opening]->[_LINE_INDEX_] ==
+ $rLL->[$K_closing]->[_LINE_INDEX_]
+ && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
+ $rOpts_maximum_line_length )
+ )
+ {
+ DEBUG_XCI
+ && print "XCI: Skipping seqno=$seqno, require different lines\n";
+ next;
+ }
- # either we have multiple continuation lines to follow
- # and we are not padding the first token
- ( $logical_continuation_lines > 1 && $ipad > 0 )
+ # Do not apply -xci if adding extra ci will put the container contents
+ # beyond the line length limit (fixes cases b899 b935)
+ my $level = $rLL->[$K_opening]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
+ my $maximum_text_length =
+ $maximum_text_length_at_level[$level] -
+ $ci_level * $rOpts_continuation_indentation;
+
+ # Fix for b1197 b1198 b1199 b1200 b1201 b1202
+ # Do not apply -xci if we are running out of space
+ if ( $level >= $stress_level_beta ) {
+ DEBUG_XCI
+ && print
+"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
+ next;
+ }
- # or..
- || (
+ # remember how much space is available for patch b1031 above
+ my $space =
+ $maximum_text_length - $len_tol - $rOpts_continuation_indentation;
- # types must match
- $types_match
+ if ( $space < 0 ) {
+ DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
+ next;
+ }
+ DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
- # and keywords must match if keyword
- && !(
- $type eq 'k'
- && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
- )
- )
- )
- {
+ $available_space{$seqno} = $space;
- #----------------------begin special checks--------------
- #
- # SPECIAL CHECK 1:
- # A check is needed before we can make the pad.
- # If we are in a list with some long items, we want each
- # item to stand out. So in the following example, the
- # first line beginning with '$casefold->' would look good
- # padded to align with the next line, but then it
- # would be indented more than the last line, so we
- # won't do it.
- #
- # ok(
- # $casefold->{code} eq '0041'
- # && $casefold->{status} eq 'C'
- # && $casefold->{mapping} eq '0061',
- # 'casefold 0x41'
- # );
- #
- # Note:
- # It would be faster, and almost as good, to use a comma
- # count, and not pad if comma_count > 1 and the previous
- # line did not end with a comma.
- #
- my $ok_to_pad = 1;
+ # This becomes the next controlling container
+ push @seqno_stack, $seqno_top if ($seqno_top);
+ $seqno_top = $seqno;
+ }
+ return;
+}
- my $ibg = $ri_first->[ $line + 1 ];
- my $depth = $nesting_depth_to_go[ $ibg + 1 ];
+sub braces_left_setup {
- # just use simplified formula for leading spaces to avoid
- # needless sub calls
- my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
+ # Called once per file to mark all -bl, -sbl, and -asbl containers
+ my $self = shift;
- # look at each line beyond the next ..
- my $l = $line + 1;
- foreach my $ltest ( $line + 2 .. $max_line ) {
- $l = $ltest;
- my $ibg = $ri_first->[$l];
+ my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
+ my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
+ my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+ return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
- # quit looking at the end of this container
- last
- if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
- || ( $nesting_depth_to_go[$ibg] < $depth );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- # cannot do the pad if a later line would be
- # outdented more
- if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
- $ok_to_pad = 0;
- last;
- }
- }
+ # We will turn on this hash for braces controlled by these flags:
+ my $rbrace_left = $self->[_rbrace_left_];
- # don't pad if we end in a broken list
- if ( $l == $max_line ) {
- my $i2 = $ri_last->[$l];
- if ( $types_to_go[$i2] eq '#' ) {
- my $i1 = $ri_first->[$l];
- next if $self->terminal_type_i( $i1, $i2 ) eq ',';
- }
- }
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_asub_block = $self->[_ris_asub_block_];
+ my $ris_sub_block = $self->[_ris_sub_block_];
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
- # SPECIAL CHECK 2:
- # a minus may introduce a quoted variable, and we will
- # add the pad only if this line begins with a bare word,
- # such as for the word 'Button' here:
- # [
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- #
- # On the other hand, if 'Button' is quoted, it looks best
- # not to pad:
- # [
- # 'Button' => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- if ( $types_to_go[$ibeg_next] eq 'm' ) {
- $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
- }
+ my $block_type = $rblock_type_of_seqno->{$seqno};
- next unless $ok_to_pad;
+ # use -asbl flag for an anonymous sub block
+ if ( $ris_asub_block->{$seqno} ) {
+ if ($rOpts_asbl) {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
- #----------------------end special check---------------
+ # use -sbl flag for a named sub
+ elsif ( $ris_sub_block->{$seqno} ) {
+ if ($rOpts_sbl) {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
- my $length_1 = total_line_length( $ibeg, $ipad - 1 );
- my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
- $pad_spaces = $length_2 - $length_1;
+ # use -bl flag if not a sub block of any type
+ else {
+ if ( $rOpts_bl
+ && $block_type =~ /$bl_pattern/
+ && $block_type !~ /$bl_exclusion_pattern/ )
+ {
+ $rbrace_left->{$seqno} = 1;
+ }
+ }
+ }
+ return;
+}
- # If the first line has a leading ! and the second does
- # not, then remove one space to try to align the next
- # leading characters, which are often the same. For example:
- # if ( !$ts
- # || $ts == $self->Holder
- # || $self->Holder->Type eq "Arena" )
- #
- # This usually helps readability, but if there are subsequent
- # ! operators things will still get messed up. For example:
- #
- # if ( !exists $Net::DNS::typesbyname{$qtype}
- # && exists $Net::DNS::classesbyname{$qtype}
- # && !exists $Net::DNS::classesbyname{$qclass}
- # && exists $Net::DNS::typesbyname{$qclass} )
- # We can't fix that.
- if ($matches_without_bang) { $pad_spaces-- }
-
- # make sure this won't change if -lp is used
- my $indentation_1 = $leading_spaces_to_go[$ibeg];
- if ( ref($indentation_1) ) {
- if ( $indentation_1->get_recoverable_spaces() == 0 ) {
- my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
- unless ( $indentation_2->get_recoverable_spaces() == 0 )
- {
- $pad_spaces = 0;
- }
- }
- }
-
- # we might be able to handle a pad of -1 by removing a blank
- # token
- if ( $pad_spaces < 0 ) {
-
- if ( $pad_spaces == -1 ) {
- if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
- {
- $self->pad_token( $ipad - 1, $pad_spaces );
- }
- }
- $pad_spaces = 0;
- }
+sub bli_adjustment {
- # now apply any padding for alignment
- if ( $ipad >= 0 && $pad_spaces ) {
+ # Called once per file to implement the --brace-left-and-indent option.
+ # If -bli is set, adds one continuation indentation for certain braces
+ my $self = shift;
+ return unless ( $rOpts->{'brace-left-and-indent'} );
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- my $length_t = total_line_length( $ibeg, $iend );
- if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
- {
- $self->pad_token( $ipad, $pad_spaces );
- }
- }
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rbrace_left = $self->[_rbrace_left_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+
+ foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ( $block_type
+ && $block_type =~ /$bli_pattern/
+ && $block_type !~ /$bli_exclusion_pattern/ )
+ {
+ $ris_bli_container->{$seqno} = 1;
+ $rbrace_left->{$seqno} = 1;
+ my $Ko = $K_opening_container->{$seqno};
+ my $Kc = $K_closing_container->{$seqno};
+ if ( defined($Ko) && defined($Kc) ) {
+ $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
}
}
- continue {
- $iendm = $iend;
- $ibegm = $ibeg;
- $has_leading_op = $has_leading_op_next;
- } # end of loop over lines
- return;
}
+ return;
}
-sub correct_lp_indentation {
-
- # When the -lp option is used, we need to make a last pass through
- # each line to correct the indentation positions in case they differ
- # from the predictions. This is necessary because perltidy uses a
- # predictor/corrector method for aligning with opening parens. The
- # predictor is usually good, but sometimes stumbles. The corrector
- # tries to patch things up once the actual opening paren locations
- # are known.
- my ( $ri_first, $ri_last ) = @_;
- my $do_not_pad = 0;
+sub find_multiline_qw {
- # Note on flag '$do_not_pad':
- # We want to avoid a situation like this, where the aligner inserts
- # whitespace before the '=' to align it with a previous '=', because
- # otherwise the parens might become mis-aligned in a situation like
- # this, where the '=' has become aligned with the previous line,
- # pushing the opening '(' forward beyond where we want it.
- #
- # $mkFloor::currentRoom = '';
- # $mkFloor::c_entry = $c->Entry(
- # -width => '10',
- # -relief => 'sunken',
- # ...
- # );
- #
- # We leave it to the aligner to decide how to do this.
+ my $self = shift;
- # first remove continuation indentation if appropriate
- my $max_line = @{$ri_first} - 1;
+ # Multiline qw quotes are not sequenced items like containers { [ (
+ # but behave in some respects in a similar way. So this routine finds them
+ # and creates a separate sequence number system for later use.
- # looking at each line of this batch..
- my ( $ibeg, $iend );
- foreach my $line ( 0 .. $max_line ) {
- $ibeg = $ri_first->[$line];
- $iend = $ri_last->[$line];
+ # This is straightforward because they always begin at the end of one line
+ # and and at the beginning of a later line. This is true no matter how we
+ # finally make our line breaks, so we can find them before deciding on new
+ # line breaks.
- # looking at each token in this output line..
- foreach my $i ( $ibeg .. $iend ) {
+ my $rstarting_multiline_qw_seqno_by_K = {};
+ my $rending_multiline_qw_seqno_by_K = {};
+ my $rKrange_multiline_qw_by_seqno = {};
+ my $rmultiline_qw_has_extra_level = {};
- # How many space characters to place before this token
- # for special alignment. Actual padding is done in the
- # continue block.
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
- # looking for next unvisited indentation item
- my $indentation = $leading_spaces_to_go[$i];
- if ( !$indentation->get_marked() ) {
- $indentation->set_marked(1);
-
- # looking for indentation item for which we are aligning
- # with parens, braces, and brackets
- next unless ( $indentation->get_align_paren() );
-
- # skip closed container on this line
- if ( $i > $ibeg ) {
- my $im = max( $ibeg, $iprev_to_go[$i] );
- if ( $type_sequence_to_go[$im]
- && $mate_index_to_go[$im] <= $iend )
- {
- next;
- }
- }
+ my $rlines = $self->[_rlines_];
+ my $rLL = $self->[_rLL_];
+ my $qw_seqno;
+ my $num_qw_seqno = 0;
+ my $K_start_multiline_qw;
- if ( $line == 1 && $i == $ibeg ) {
- $do_not_pad = 1;
- }
+ foreach my $line_of_tokens ( @{$rlines} ) {
- # Ok, let's see what the error is and try to fix it
- my $actual_pos;
- my $predicted_pos = $indentation->get_spaces();
- if ( $i > $ibeg ) {
+ my $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
+ if ( defined($K_start_multiline_qw) ) {
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
+
+ # shouldn't happen
+ if ( $type ne 'q' ) {
+ DEVEL_MODE && print STDERR <<EOM;
+STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
+EOM
+ $K_start_multiline_qw = undef;
+ next;
+ }
+ my $Kprev = $self->K_previous_nonblank($Kfirst);
+ my $Knext = $self->K_next_nonblank($Kfirst);
+ my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
+ my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
+ if ( $type_m eq 'q' && $type_p ne 'q' ) {
+ $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
+ $rKrange_multiline_qw_by_seqno->{$qw_seqno} =
+ [ $K_start_multiline_qw, $Kfirst ];
+ $K_start_multiline_qw = undef;
+ $qw_seqno = undef;
+ }
+ }
+ if ( !defined($K_start_multiline_qw)
+ && $rLL->[$Klast]->[_TYPE_] eq 'q' )
+ {
+ my $Kprev = $self->K_previous_nonblank($Klast);
+ my $Knext = $self->K_next_nonblank($Klast);
+ my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
+ my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
+ if ( $type_m ne 'q' && $type_p eq 'q' ) {
+ $num_qw_seqno++;
+ $qw_seqno = 'q' . $num_qw_seqno;
+ $K_start_multiline_qw = $Klast;
+ $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
+ }
+ }
+ }
- # token is mid-line - use length to previous token
- $actual_pos = total_line_length( $ibeg, $i - 1 );
+ # Give multiline qw lists extra indentation instead of CI. This option
+ # works well but is currently only activated when the -xci flag is set.
+ # The reason is to avoid unexpected changes in formatting.
+ if ($rOpts_extended_continuation_indentation) {
+ while ( my ( $qw_seqno, $rKrange ) =
+ each %{$rKrange_multiline_qw_by_seqno} )
+ {
+ my ( $Kbeg, $Kend ) = @{$rKrange};
- # for mid-line token, we must check to see if all
- # additional lines have continuation indentation,
- # and remove it if so. Otherwise, we do not get
- # good alignment.
- my $closing_index = $indentation->get_closed();
- if ( $closing_index > $iend ) {
- my $ibeg_next = $ri_first->[ $line + 1 ];
- if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
- undo_lp_ci( $line, $i, $closing_index, $ri_first,
- $ri_last );
- }
- }
- }
- elsif ( $line > 0 ) {
+ # require isolated closing token
+ my $token_end = $rLL->[$Kend]->[_TOKEN_];
+ next
+ unless ( length($token_end) == 1
+ && ( $is_closing_token{$token_end} || $token_end eq '>' ) );
- # handle case where token starts a new line;
- # use length of previous line
- my $ibegm = $ri_first->[ $line - 1 ];
- my $iendm = $ri_last->[ $line - 1 ];
- $actual_pos = total_line_length( $ibegm, $iendm );
+ # require isolated opening token
+ my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
- # follow -pt style
- ++$actual_pos
- if ( $types_to_go[ $iendm + 1 ] eq 'b' );
- }
- else {
+ # allow space(s) after the qw
+ if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
+ {
+ $token_beg =~ s/\s+//;
+ }
- # token is first character of first line of batch
- $actual_pos = $predicted_pos;
- }
+ next unless ( length($token_beg) == 3 );
- my $move_right = $actual_pos - $predicted_pos;
+ foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
+ $rLL->[$KK]->[_LEVEL_]++;
+ $rLL->[$KK]->[_CI_LEVEL_] = 0;
+ }
- # done if no error to correct (gnu2.t)
- if ( $move_right == 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
- }
+ # set flag for -wn option, which will remove the level
+ $rmultiline_qw_has_extra_level->{$qw_seqno} = 1;
+ }
+ }
- # if we have not seen closure for this indentation in
- # this batch, we can only pass on a request to the
- # vertical aligner
- my $closing_index = $indentation->get_closed();
+ # For the -lp option we need to mark all parent containers of
+ # multiline quotes
+ if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
- if ( $closing_index < 0 ) {
- $indentation->set_recoverable_spaces($move_right);
- next;
+ while ( my ( $qw_seqno, $rKrange ) =
+ each %{$rKrange_multiline_qw_by_seqno} )
+ {
+ my ( $Kbeg, $Kend ) = @{$rKrange};
+ my $parent_seqno = $self->parent_seqno_by_K($Kend);
+ next unless ($parent_seqno);
+
+ # If the parent container exactly surrounds this qw, then -lp
+ # formatting seems to work so we will not mark it.
+ my $is_tightly_contained;
+ my $Kn = $self->K_next_nonblank($Kend);
+ my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
+ if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
+
+ my $Kp = $self->K_previous_nonblank($Kbeg);
+ my $seqno_p =
+ defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
+ if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
+ $is_tightly_contained = 1;
}
+ }
+
+ $ris_excluded_lp_container->{$parent_seqno} = 1
+ unless ($is_tightly_contained);
- # If necessary, look ahead to see if there is really any
- # leading whitespace dependent on this whitespace, and
- # also find the longest line using this whitespace.
- # Since it is always safe to move left if there are no
- # dependents, we only need to do this if we may have
- # dependent nodes or need to move right.
+ # continue up the tree marking parent containers
+ while (1) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno};
+ last
+ unless ( defined($parent_seqno)
+ && $parent_seqno ne SEQ_ROOT );
+ $ris_excluded_lp_container->{$parent_seqno} = 1;
+ }
+ }
+ }
- my $right_margin = 0;
- my $have_child = $indentation->get_have_child();
+ $self->[_rstarting_multiline_qw_seqno_by_K_] =
+ $rstarting_multiline_qw_seqno_by_K;
+ $self->[_rending_multiline_qw_seqno_by_K_] =
+ $rending_multiline_qw_seqno_by_K;
+ $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
+ $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
- my %saw_indentation;
- my $line_count = 1;
- $saw_indentation{$indentation} = $indentation;
+ return;
+}
- if ( $have_child || $move_right > 0 ) {
- $have_child = 0;
- my $max_length = 0;
- if ( $i == $ibeg ) {
- $max_length = total_line_length( $ibeg, $iend );
- }
+use constant DEBUG_COLLAPSED_LENGTHS => 0;
- # look ahead at the rest of the lines of this batch..
- foreach my $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $ri_first->[$line_t];
- my $iend_t = $ri_last->[$line_t];
- last if ( $closing_index <= $ibeg_t );
-
- # remember all different indentation objects
- my $indentation_t = $leading_spaces_to_go[$ibeg_t];
- $saw_indentation{$indentation_t} = $indentation_t;
- $line_count++;
-
- # remember longest line in the group
- my $length_t = total_line_length( $ibeg_t, $iend_t );
- if ( $length_t > $max_length ) {
- $max_length = $length_t;
- }
- }
- $right_margin = maximum_line_length($ibeg) - $max_length;
- if ( $right_margin < 0 ) { $right_margin = 0 }
- }
-
- my $first_line_comma_count =
- grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_comma_count();
- my $arrow_count = $indentation->get_arrow_count();
-
- # This is a simple approximate test for vertical alignment:
- # if we broke just after an opening paren, brace, bracket,
- # and there are 2 or more commas in the first line,
- # and there are no '=>'s,
- # then we are probably vertically aligned. We could set
- # an exact flag in sub scan_list, but this is good
- # enough.
- my $indentation_count = keys %saw_indentation;
- my $is_vertically_aligned =
- ( $i == $ibeg
- && $first_line_comma_count > 1
- && $indentation_count == 1
- && ( $arrow_count == 0 || $arrow_count == $line_count ) );
-
- # Make the move if possible ..
- if (
+# Minimum space reserved for contents of a code block. A value of 40 has given
+# reasonable results. With a large line length, say -l=120, this will not
+# normally be noticable but it will prevent making a mess in some edge cases.
+use constant MIN_BLOCK_LEN => 40;
- # we can always move left
- $move_right < 0
+my %is_handle_type;
- # but we should only move right if we are sure it will
- # not spoil vertical alignment
- || ( $comma_count == 0 )
- || ( $comma_count > 0 && !$is_vertically_aligned )
- )
- {
- my $move =
- ( $move_right <= $right_margin )
- ? $move_right
- : $right_margin;
-
- foreach ( keys %saw_indentation ) {
- $saw_indentation{$_}
- ->permanently_decrease_available_spaces( -$move );
- }
- }
+BEGIN {
+ my @q = qw( w C U G i k => );
+ @is_handle_type{@q} = (1) x scalar(@q);
- # Otherwise, record what we want and the vertical aligner
- # will try to recover it.
- else {
- $indentation->set_recoverable_spaces($move_right);
- }
- }
- }
- }
- return $do_not_pad;
+ my $i = 0;
+ use constant {
+ _max_prong_len_ => $i++,
+ _handle_len_ => $i++,
+ _seqno_o_ => $i++,
+ _iline_o_ => $i++,
+ _K_o_ => $i++,
+ _K_c_ => $i++,
+ _interrupted_list_rule_ => $i++,
+ };
}
-# flush is called to output any tokens in the pipeline, so that
-# an alternate source of lines can be written in the correct order
+sub collapsed_lengths {
-sub flush {
my $self = shift;
- destroy_one_line_block();
- $self->output_line_to_go();
- Perl::Tidy::VerticalAligner::flush();
- return;
-}
-sub reset_block_text_accumulator {
+ #----------------------------------------------------------------
+ # Define the collapsed lengths of containers for -xlp indentation
+ #----------------------------------------------------------------
- # save text after 'if' and 'elsif' to append after 'else'
- if ($accumulating_text_for_block) {
+ # We need an estimate of the minimum required line length starting at any
+ # opening container for the -xlp style. This is needed to avoid using too
+ # much indentation space for lower level containers and thereby running
+ # out of space for outer container tokens due to the maximum line length
+ # limit.
- if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
- push @{$rleading_block_if_elsif_text}, $leading_block_text;
- }
- }
- $accumulating_text_for_block = "";
- $leading_block_text = "";
- $leading_block_text_level = 0;
- $leading_block_text_length_exceeded = 0;
- $leading_block_text_line_number = 0;
- $leading_block_text_line_length = 0;
- return;
-}
+ # The basic idea is that at each node in the tree we imagine that we have a
+ # fork with a handle and collapsable prongs:
+ #
+ # |------------
+ # |--------
+ # ------------|-------
+ # handle |------------
+ # |--------
+ # prongs
+ #
+ # Each prong has a minimum collapsed length. The collapsed length at a node
+ # is the maximum of these minimum lengths, plus the handle length. Each of
+ # the prongs may itself be a tree node.
+
+ # This is just a rough calculation to get an approximate starting point for
+ # indentation. Later routines will be more precise. It is important that
+ # these estimates be independent of the line breaks of the input stream in
+ # order to avoid instabilities.
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $ris_permanently_broken = $self->[_ris_permanently_broken_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $rhas_broken_list = $self->[_rhas_broken_list_];
+
+ my $K_start_multiline_qw;
+ my $level_start_multiline_qw = 0;
+ my $max_prong_len = 0;
+ my $handle_len = 0;
+ my @stack;
+ my $len = 0;
+ my $last_nonblank_type = 'b';
+ push @stack,
+ [ $max_prong_len, $handle_len, SEQ_ROOT, undef, undef, undef, undef ];
-sub set_block_text_accumulator {
- my $i = shift;
- $accumulating_text_for_block = $tokens_to_go[$i];
- if ( $accumulating_text_for_block !~ /^els/ ) {
- $rleading_block_if_elsif_text = [];
- }
- $leading_block_text = "";
- $leading_block_text_level = $levels_to_go[$i];
- $leading_block_text_line_number = get_output_line_number();
- $leading_block_text_length_exceeded = 0;
-
- # this will contain the column number of the last character
- # of the closing side comment
- $leading_block_text_line_length =
- length($csc_last_label) +
- length($accumulating_text_for_block) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $leading_block_text_level * $rOpts_indent_columns + 3;
- return;
-}
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ next if ( $line_type ne 'CODE' );
+ my $CODE_type = $line_of_tokens->{_code_type};
-sub accumulate_block_text {
- my $i = shift;
+ # Always skip blank lines
+ next if ( $CODE_type eq 'BL' );
- # accumulate leading text for -csc, ignoring any side comments
- if ( $accumulating_text_for_block
- && !$leading_block_text_length_exceeded
- && $types_to_go[$i] ne '#' )
- {
+ # Note on other line types:
+ # 'FS' (Format Skipping) lines may contain opening/closing tokens so
+ # we have to process them to keep the stack correctly sequenced.
+ # 'VB' (Verbatim) lines could be skipped, but testing shows that
+ # results look better if we include their lengths.
- my $added_length = $token_lengths_to_go[$i];
- $added_length += 1 if $i == 0;
- my $new_line_length = $leading_block_text_line_length + $added_length;
+ # Also note that we could exclude -xlp formatting of containers with
+ # 'FS' and 'VB' lines, but in testing that was not really beneficial.
- # we can add this text if we don't exceed some limits..
- if (
+ # So we process tokens in 'FS' and 'VB' lines like all the rest...
+
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $K_first, $K_last ) = @{$rK_range};
+ next unless ( defined($K_first) && defined($K_last) );
+
+ my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
+
+ # Always ignore block comments
+ next if ( $has_comment && $K_first == $K_last );
+
+ # Handle an intermediate line of a multiline qw quote. These may
+ # require including some -ci or -i spaces. See cases c098/x063.
+ # Updated to check all lines (not just $K_first==$K_last) to fix b1316
+ my $K_begin_loop = $K_first;
+ if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
+
+ my $KK = $K_first;
+ my $level = $rLL->[$KK]->[_LEVEL_];
+ my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
+
+ # remember the level of the start
+ if ( !defined($K_start_multiline_qw) ) {
+ $K_start_multiline_qw = $K_first;
+ $level_start_multiline_qw = $level;
+ my $seqno_qw =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]
+ ->{$K_start_multiline_qw};
+ if ( !$seqno_qw ) {
+ my $Kp = $self->K_previous_nonblank($K_first);
+ if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
+
+ $K_start_multiline_qw = $Kp;
+ $level_start_multiline_qw =
+ $rLL->[$K_start_multiline_qw]->[_LEVEL_];
+ }
+ }
+ }
- # we must not have already exceeded the text length limit
- length($leading_block_text) <
- $rOpts_closing_side_comment_maximum_text
+ $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
- # and either:
- # the new total line length must be below the line length limit
- # or the new length must be below the text length limit
- # (ie, we may allow one token to exceed the text length limit)
- && (
- $new_line_length <
- maximum_line_length_for_level($leading_block_text_level)
+ # We may have to add the spaces of one level or ci level ... it
+ # depends depends on the -xci flag, the -wn flag, and if the qw
+ # uses a container token as the quote delimiter.
- || length($leading_block_text) + $added_length <
- $rOpts_closing_side_comment_maximum_text
- )
+ # First rule: add ci if there is a $ci_level
+ if ($ci_level) {
+ $len += $rOpts_continuation_indentation;
+ }
- # UNLESS: we are adding a closing paren before the brace we seek.
- # This is an attempt to avoid situations where the ... to be
- # added are longer than the omitted right paren, as in:
+ # Second rule: otherwise, look for an extra indentation level
+ # from the start and add one indentation level if found.
+ elsif ( $level > $level_start_multiline_qw ) {
+ $len += $rOpts_indent_columns;
+ }
- # foreach my $item (@a_rather_long_variable_name_here) {
- # &whatever;
- # } ## end foreach my $item (@a_rather_long_variable_name_here...
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
- || (
- $tokens_to_go[$i] eq ')'
- && (
- (
- $i + 1 <= $max_index_to_go
- && $block_type_to_go[ $i + 1 ] eq
- $accumulating_text_for_block
- )
- || ( $i + 2 <= $max_index_to_go
- && $block_type_to_go[ $i + 2 ] eq
- $accumulating_text_for_block )
- )
- )
- )
- {
+ $last_nonblank_type = 'q';
- # add an extra space at each newline
- if ( $i == 0 ) { $leading_block_text .= ' ' }
+ $K_begin_loop = $K_first + 1;
+
+ # We can skip to the next line if more tokens
+ next if ( $K_begin_loop > $K_last );
- # add the token text
- $leading_block_text .= $tokens_to_go[$i];
- $leading_block_text_line_length = $new_line_length;
}
+ $K_start_multiline_qw = undef;
- # show that text was truncated if necessary
- elsif ( $types_to_go[$i] ne 'b' ) {
- $leading_block_text_length_exceeded = 1;
- $leading_block_text .= '...';
+ # Find the terminal token, before any side comment
+ my $K_terminal = $K_last;
+ if ($has_comment) {
+ $K_terminal -= 1;
+ $K_terminal -= 1
+ if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
+ && $K_terminal > $K_first );
}
- }
- return;
-}
-{
- my %is_if_elsif_else_unless_while_until_for_foreach;
+ # Use length to terminal comma if interrupded list rule applies
+ if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
+ my $K_c = $stack[-1]->[_K_c_];
+ if (
+ defined($K_c)
+ && $rLL->[$K_terminal]->[_TYPE_] eq ','
- BEGIN {
+ # Ignore a terminal comma, causes instability (b1297)
+ && ( $K_c - $K_terminal > 2
+ || $rLL->[ $K_terminal + 1 ]->[_TYPE_] eq 'b' )
+ )
+ {
+ my $Kend = $K_terminal;
- # These block types may have text between the keyword and opening
- # curly. Note: 'else' does not, but must be included to allow trailing
- # if/elsif text to be appended.
- # patch for SWITCH/CASE: added 'case' and 'when'
- my @q =
- qw(if elsif else unless while until for foreach case when catch);
- @is_if_elsif_else_unless_while_until_for_foreach{@q} =
- (1) x scalar(@q);
- }
+ # This caused an instability in b1311 by making the result
+ # dependent on input. It is not really necessary because the
+ # comment length is added at the end of the loop.
+ ##if ( $has_comment
+ ## && !$rOpts_ignore_side_comment_lengths )
+ ##{
+ ## $Kend = $K_last;
+ ##}
- sub accumulate_csc_text {
+ $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_];
- # called once per output buffer when -csc is used. Accumulates
- # the text placed after certain closing block braces.
- # Defines and returns the following for this buffer:
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ }
- my $block_leading_text = ""; # the leading text of the last '}'
- my $rblock_leading_if_elsif_text;
- my $i_block_leading_text =
- -1; # index of token owning block_leading_text
- my $block_line_count = 100; # how many lines the block spans
- my $terminal_type = 'b'; # type of last nonblank token
- my $i_terminal = 0; # index of last nonblank token
- my $terminal_block_type = "";
+ # Loop over tokens on this line ...
+ foreach my $KK ( $K_begin_loop .. $K_terminal ) {
- # update most recent statement label
- $csc_last_label = "" unless ($csc_last_label);
- if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
- my $block_label = $csc_last_label;
+ my $type = $rLL->[$KK]->[_TYPE_];
+ next if ( $type eq 'b' );
- # Loop over all tokens of this batch
- for my $i ( 0 .. $max_index_to_go ) {
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
+ #------------------------
+ # Handle sequenced tokens
+ #------------------------
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($seqno) {
- # remember last nonblank token type
- if ( $type ne '#' && $type ne 'b' ) {
- $terminal_type = $type;
- $terminal_block_type = $block_type;
- $i_terminal = $i;
- }
+ my $token = $rLL->[$KK]->[_TOKEN_];
- my $type_sequence = $type_sequence_to_go[$i];
- if ( $block_type && $type_sequence ) {
+ #----------------------------
+ # Entering a new container...
+ #----------------------------
+ if ( $is_opening_token{$token} ) {
- if ( $token eq '}' ) {
+ # save current prong length
+ $stack[-1]->[_max_prong_len_] = $max_prong_len;
+ $max_prong_len = 0;
- # restore any leading text saved when we entered this block
- if ( defined( $block_leading_text{$type_sequence} ) ) {
- ( $block_leading_text, $rblock_leading_if_elsif_text )
- = @{ $block_leading_text{$type_sequence} };
- $i_block_leading_text = $i;
- delete $block_leading_text{$type_sequence};
- $rleading_block_if_elsif_text =
- $rblock_leading_if_elsif_text;
- }
+ # Start new prong one level deeper
+ my $handle_len = 0;
+ if ( $rblock_type_of_seqno->{$seqno} ) {
- if ( defined( $csc_block_label{$type_sequence} ) ) {
- $block_label = $csc_block_label{$type_sequence};
- delete $csc_block_label{$type_sequence};
- }
+ # code blocks do not use -lp indentation, but behave as
+ # if they had a handle of one indentation length
+ $handle_len = $rOpts_indent_columns;
- # if we run into a '}' then we probably started accumulating
- # at something like a trailing 'if' clause..no harm done.
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] <= $leading_block_text_level )
- {
- my $lev = $levels_to_go[$i];
- reset_block_text_accumulator();
+ }
+ elsif ( $is_handle_type{$last_nonblank_type} ) {
+ $handle_len = $len;
+ $handle_len += 1
+ if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
}
- if ( defined( $block_opening_line_number{$type_sequence} ) )
+ # Set a flag if the 'Interrupted List Rule' will be applied
+ # (see sub copy_old_breakpoints).
+ # - Added check on has_broken_list to fix issue b1298
+
+ my $interrupted_list_rule =
+ $ris_permanently_broken->{$seqno}
+ && $ris_list_by_seqno->{$seqno}
+ && !$rhas_broken_list->{$seqno}
+ && !$rOpts_ignore_old_breakpoints;
+
+ # NOTES: Since we are looking at old line numbers we have
+ # to be very careful not to introduce an instability.
+
+ # This following causes instability (b1288-b1296):
+ # $interrupted_list_rule ||=
+ # $rOpts_break_at_old_comma_breakpoints;
+
+ # - We could turn off the interrupted list rule if there is
+ # a broken sublist, to follow 'Compound List Rule 1'.
+ # - We could use the _rhas_broken_list_ flag for this.
+ # - But it seems safer not to do this, to avoid
+ # instability, since the broken sublist could be
+ # temporary. It seems better to let the formatting
+ # stabilize by itself after one or two iterations.
+ # - So, not doing this for now
+
+ # Include length to a comma ending this line
+ if ( $interrupted_list_rule
+ && $rLL->[$K_terminal]->[_TYPE_] eq ',' )
{
- my $output_line_number = get_output_line_number();
- $block_line_count =
- $output_line_number -
- $block_opening_line_number{$type_sequence} + 1;
- delete $block_opening_line_number{$type_sequence};
- }
- else {
+ my $Kend = $K_terminal;
+ if ( $Kend < $K_last
+ && !$rOpts_ignore_side_comment_lengths )
+ {
+ $Kend = $K_last;
+ }
- # Error: block opening line undefined for this line..
- # This shouldn't be possible, but it is not a
- # significant problem.
+ # Measure from the next blank if any (fixes b1301)
+ my $Kbeg = $KK;
+ if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b'
+ && $Kbeg < $Kend )
+ {
+ $Kbeg++;
+ }
+
+ my $len = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_];
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
- }
- elsif ( $token eq '{' ) {
+ my $K_c = $K_closing_container->{$seqno};
- my $line_number = get_output_line_number();
- $block_opening_line_number{$type_sequence} = $line_number;
+ push @stack,
+ [
+ $max_prong_len, $handle_len,
+ $seqno, $iline,
+ $KK, $K_c,
+ $interrupted_list_rule
+ ];
+ }
- # set a label for this block, except for
- # a bare block which already has the label
- # A label can only be used on the next {
- if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
- $csc_block_label{$type_sequence} = $csc_last_label;
- $csc_last_label = "";
+ #--------------------
+ # Exiting a container
+ #--------------------
+ elsif ( $is_closing_token{$token} ) {
+ if (@stack) {
+
+ # The current prong ends - get its handle
+ my $item = pop @stack;
+ my $handle_len = $item->[_handle_len_];
+ my $seqno_o = $item->[_seqno_o_];
+ my $iline_o = $item->[_iline_o_];
+ my $K_o = $item->[_K_o_];
+ my $K_c_expect = $item->[_K_c_];
+ my $collapsed_len = $max_prong_len;
+
+ if ( $seqno_o ne $seqno ) {
+
+ # Shouldn't happen - must have skipped some lines.
+ # Not fatal but -lp formatting could get messed up.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
+EOM
+ }
+ }
- if ( $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
+ #------------------------------------------
+ # Rules to avoid scrunching code blocks ...
+ #------------------------------------------
+ # Some test cases:
+ # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
+ if ( $rblock_type_of_seqno->{$seqno} ) {
+
+ my $K_c = $KK;
+ my $block_length = MIN_BLOCK_LEN;
+ my $is_one_line_block;
+ my $level = $rLL->[$K_o]->[_LEVEL_];
+ if ( defined($K_o) && defined($K_c) ) {
+ my $block_length =
+ $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
+ $is_one_line_block = $iline == $iline_o;
+ }
- if ( $accumulating_text_for_block eq $block_type ) {
+ # Code block rule 1: Use the total block length if
+ # it is less than the minimum.
+ if ( $block_length < MIN_BLOCK_LEN ) {
+ $collapsed_len = $block_length;
+ }
- # save any leading text before we enter this block
- $block_leading_text{$type_sequence} = [
- $leading_block_text,
- $rleading_block_if_elsif_text
- ];
- $block_opening_line_number{$type_sequence} =
- $leading_block_text_line_number;
- reset_block_text_accumulator();
+ # Code block rule 2: Use the full length of a
+ # one-line block to avoid breaking it, unless
+ # extremely long. We do not need to do a precise
+ # check here, because if it breaks then it will
+ # stay broken on later iterations.
+ elsif ($is_one_line_block
+ && $block_length <
+ $maximum_line_length_at_level[$level] )
+ {
+ $collapsed_len = $block_length;
+ }
+
+ # Code block rule 3: Otherwise the length should be
+ # at least MIN_BLOCK_LEN to avoid scrunching code
+ # blocks.
+ elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
+ $collapsed_len = MIN_BLOCK_LEN;
+ }
}
- else {
- # shouldn't happen, but not a serious error.
- # We were accumulating -csc text for block type
- # $accumulating_text_for_block and unexpectedly
- # encountered a '{' for block type $block_type.
+ # Store the result. Some extra space, '2', allows for
+ # length of an opening token, inside space, comma, ...
+ # This constant has been tuned to give good overall
+ # results.
+ $collapsed_len += 2;
+ $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
+
+ # Restart scanning the lower level prong
+ if (@stack) {
+ $max_prong_len = $stack[-1]->[_max_prong_len_];
+ $collapsed_len += $handle_len;
+ if ( $collapsed_len > $max_prong_len ) {
+ $max_prong_len = $collapsed_len;
+ }
}
}
}
+
+ # it is a ternary - no special processing for these yet
+ else {
+
+ }
+
+ $len = 0;
+ $last_nonblank_type = $type;
+ next;
}
- if ( $type eq 'k'
- && $csc_new_statement_ok
- && $is_if_elsif_else_unless_while_until_for_foreach{$token}
- && $token =~ /$closing_side_comment_list_pattern/o )
- {
- set_block_text_accumulator($i);
+ #----------------------------
+ # Handle non-container tokens
+ #----------------------------
+ my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
+
+ # Count lengths of things like 'xx => yy' as a single item
+ if ( $type eq '=>' ) {
+ $len += $token_length + 1;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
- else {
+ elsif ( $last_nonblank_type eq '=>' ) {
+ $len += $token_length;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
- # note: ignoring type 'q' because of tricks being played
- # with 'q' for hanging side comments
- if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
- $csc_new_statement_ok =
- ( $block_type || $type eq 'J' || $type eq ';' );
- }
- if ( $type eq ';'
- && $accumulating_text_for_block
- && $levels_to_go[$i] == $leading_block_text_level )
- {
- reset_block_text_accumulator();
- }
- else {
- accumulate_block_text($i);
- }
+ # but only include one => per item
+ if ( $last_nonblank_type eq '=>' ) { $len = $token_length }
}
- }
- # Treat an 'else' block specially by adding preceding 'if' and
- # 'elsif' text. Otherwise, the 'end else' is not helpful,
- # especially for cuddled-else formatting.
- if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
- $block_leading_text =
- make_else_csc_text( $i_terminal, $terminal_block_type,
- $block_leading_text, $rblock_leading_if_elsif_text );
- }
+ # include everthing to end of line after a here target
+ elsif ( $type eq 'h' ) {
+ $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
- # if this line ends in a label then remember it for the next pass
- $csc_last_label = "";
- if ( $terminal_type eq 'J' ) {
- $csc_last_label = $tokens_to_go[$i_terminal];
- }
+ # for everything else just use the token length
+ else {
+ $len = $token_length;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ $last_nonblank_type = $type;
- return ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label );
- }
-}
+ } ## end loop over tokens on this line
-sub make_else_csc_text {
+ # Now take care of any side comment
+ if ($has_comment) {
+ if ($rOpts_ignore_side_comment_lengths) {
+ $len = 0;
+ }
+ else {
- # create additional -csc text for an 'else' and optionally 'elsif',
- # depending on the value of switch
- # $rOpts_closing_side_comment_else_flag:
- #
- # = 0 add 'if' text to trailing else
- # = 1 same as 0 plus:
- # add 'if' to 'elsif's if can fit in line length
- # add last 'elsif' to trailing else if can fit in one line
- # = 2 same as 1 but do not check if exceed line length
- #
- # $rif_elsif_text = a reference to a list of all previous closing
- # side comments created for this if block
- #
- my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
- my $csc_text = $block_leading_text;
+ # For a side comment when -iscl is not set, measure length from
+ # the start of the previous nonblank token
+ my $len0 =
+ $K_terminal > 0
+ ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
+ : 0;
+ $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
+ if ( $len > $max_prong_len ) { $max_prong_len = $len }
+ }
+ }
- if ( $block_type eq 'elsif'
- && $rOpts_closing_side_comment_else_flag == 0 )
- {
- return $csc_text;
+ } ## end loop over lines
+
+ if (DEBUG_COLLAPSED_LENGTHS) {
+ print "\nCollapsed lengths--\n";
+ foreach
+ my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
+ {
+ my $clen = $rcollapsed_length_by_seqno->{$key};
+ print "$key -> $clen\n";
+ }
}
- my $count = @{$rif_elsif_text};
- return $csc_text unless ($count);
+ return;
+}
- my $if_text = '[ if' . $rif_elsif_text->[0];
+sub is_excluded_lp {
- # always show the leading 'if' text on 'else'
- if ( $block_type eq 'else' ) {
- $csc_text .= $if_text;
- }
+ # Decide if this container is excluded by user request:
+ # returns true if this token is excluded (i.e., may not use -lp)
+ # returns false otherwise
- # see if that's all
- if ( $rOpts_closing_side_comment_else_flag == 0 ) {
- return $csc_text;
- }
+ # The control hash can either describe:
+ # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or
+ # what to include: $line_up_parentheses_control_is_lxpl = 0
- my $last_elsif_text = "";
- if ( $count > 1 ) {
- $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
- if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
- }
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $rflags = $line_up_parentheses_control_hash{$token};
- # tentatively append one more item
- my $saved_text = $csc_text;
- if ( $block_type eq 'else' ) {
- $csc_text .= $last_elsif_text;
- }
- else {
- $csc_text .= ' ' . $if_text;
- }
+ #-----------------------------------------------
+ # TEST #1: check match to listed container types
+ #-----------------------------------------------
+ if ( !defined($rflags) ) {
- # all done if no length checks requested
- if ( $rOpts_closing_side_comment_else_flag == 2 ) {
- return $csc_text;
+ # There is no entry for this container, so we are done
+ return !$line_up_parentheses_control_is_lxpl;
}
- # undo it if line length exceeded
- my $length =
- length($csc_text) +
- length($block_type) +
- length( $rOpts->{'closing-side-comment-prefix'} ) +
- $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
- if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
- $csc_text = $saved_text;
- }
- return $csc_text;
-}
+ my ( $flag1, $flag2 ) = @{$rflags};
-{ # sub balance_csc_text
+ #-----------------------------------------------------------
+ # TEST #2: check match to flag1, the preceding nonblank word
+ #-----------------------------------------------------------
+ my $match_flag1 = !defined($flag1) || $flag1 eq '*';
+ if ( !$match_flag1 ) {
- my %matching_char;
+ # Find the previous token
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank($KK);
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- BEGIN {
- %matching_char = (
- '{' => '}',
- '(' => ')',
- '[' => ']',
- '}' => '{',
- ')' => '(',
- ']' => '[',
- );
- }
+ # keyword?
+ $is_k = $type_p eq 'k';
- sub balance_csc_text {
+ # function call?
+ $is_f = $self->[_ris_function_call_paren_]->{$seqno};
- # Append characters to balance a closing side comment so that editors
- # such as vim can correctly jump through code.
- # Simple Example:
- # input = ## end foreach my $foo ( sort { $b ...
- # output = ## end foreach my $foo ( sort { $b ...})
+ # either keyword or function call?
+ $is_w = $is_k || $is_f;
+ }
- # NOTE: This routine does not currently filter out structures within
- # quoted text because the bounce algorithms in text editors do not
- # necessarily do this either (a version of vim was checked and
- # did not do this).
+ # Check for match based on flag1 and the previous token:
+ if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
+ elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
+ elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
+ elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
+ elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
+ elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
+ }
- # Some complex examples which will cause trouble for some editors:
- # while ( $mask_string =~ /\{[^{]*?\}/g ) {
- # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
- # if ( $1 eq '{' ) {
- # test file test1/braces.pl has many such examples.
+ # See if we can exclude this based on the flag1 test...
+ if ($line_up_parentheses_control_is_lxpl) {
+ return 1 if ($match_flag1);
+ }
+ else {
+ return 1 if ( !$match_flag1 );
+ }
- my ($csc) = @_;
+ #-------------------------------------------------------------
+ # TEST #3: exclusion based on flag2 and the container contents
+ #-------------------------------------------------------------
- # loop to examine characters one-by-one, RIGHT to LEFT and
- # build a balancing ending, LEFT to RIGHT.
- for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
+ # Note that this is an exclusion test for both -lpxl or -lpil input methods
+ # The options are:
+ # 0 or blank: ignore container contents
+ # 1 exclude non-lists or lists with sublists
+ # 2 same as 1 but also exclude lists with code blocks
- my $char = substr( $csc, $pos, 1 );
+ my $match_flag2;
+ if ($flag2) {
- # ignore everything except structural characters
- next unless ( $matching_char{$char} );
+ my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
- # pop most recently appended character
- my $top = chop($csc);
+ my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
+ my $has_list = $self->[_rhas_list_]->{$seqno};
+ my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
+ my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
- # push it back plus the mate to the newest character
- # unless they balance each other.
- $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
+ if ( !$is_list
+ || $has_list
+ || $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
+ {
+ $match_flag2 = 1;
}
-
- # return the balanced string
- return $csc;
}
+ return $match_flag2;
}
-sub add_closing_side_comment {
-
- my $self = shift;
+sub set_excluded_lp_containers {
- # add closing side comments after closing block braces if -csc used
- my ( $closing_side_comment, $cscw_block_comment );
+ my ($self) = @_;
+ return unless ($rOpts_line_up_parentheses);
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
- #---------------------------------------------------------------
- # Step 1: loop through all tokens of this line to accumulate
- # the text needed to create the closing side comments. Also see
- # how the line ends.
- #---------------------------------------------------------------
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- my ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count, $block_label )
- = accumulate_csc_text();
+ foreach my $seqno ( keys %{$K_opening_container} ) {
- #---------------------------------------------------------------
- # Step 2: make the closing side comment if this ends a block
- #---------------------------------------------------------------
- my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
+ # code blocks are always excluded by the -lp coding so we can skip them
+ next if ( $rblock_type_of_seqno->{$seqno} );
- # if this line might end in a block closure..
- if (
- $terminal_type eq '}'
+ my $KK = $K_opening_container->{$seqno};
+ next unless defined($KK);
- # ..and either
- && (
+ # see if a user exclusion rule turns off -lp for this container
+ if ( $self->is_excluded_lp($KK) ) {
+ $ris_excluded_lp_container->{$seqno} = 1;
+ }
+ }
+ return;
+}
- # the block is long enough
- ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
+######################################
+# CODE SECTION 6: Process line-by-line
+######################################
- # or there is an existing comment to check
- || ( $have_side_comment
- && $rOpts->{'closing-side-comment-warnings'} )
- )
+sub process_all_lines {
- # .. and if this is one of the types of interest
- && $block_type_to_go[$i_terminal] =~
- /$closing_side_comment_list_pattern/o
+ #----------------------------------------------------------
+ # Main loop to format all lines of a file according to type
+ #----------------------------------------------------------
- # .. but not an anonymous sub
- # These are not normally of interest, and their closing braces are
- # often followed by commas or semicolons anyway. This also avoids
- # possible erratic output due to line numbering inconsistencies
- # in the cases where their closing braces terminate a line.
- && $block_type_to_go[$i_terminal] ne 'sub'
+ my $self = shift;
+ my $rlines = $self->[_rlines_];
+ my $sink_object = $self->[_sink_object_];
+ my $fh_tee = $self->[_fh_tee_];
+ my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ my $file_writer_object = $self->[_file_writer_object_];
+ my $logger_object = $self->[_logger_object_];
+ my $vertical_aligner_object = $self->[_vertical_aligner_object_];
+ my $save_logfile = $self->[_save_logfile_];
- # ..and the corresponding opening brace must is not in this batch
- # (because we do not need to tag one-line blocks, although this
- # should also be caught with a positive -csci value)
- && $self->mate_index_to_go($i_terminal) < 0
+ # Note for RT#118553, leave only one newline at the end of a file.
+ # Example code to do this is in comments below:
+ # my $Opt_trim_ending_blank_lines = 0;
+ # if ($Opt_trim_ending_blank_lines) {
+ # while ( my $line_of_tokens = pop @{$rlines} ) {
+ # my $line_type = $line_of_tokens->{_line_type};
+ # if ( $line_type eq 'CODE' ) {
+ # my $CODE_type = $line_of_tokens->{_code_type};
+ # next if ( $CODE_type eq 'BL' );
+ # }
+ # push @{$rlines}, $line_of_tokens;
+ # last;
+ # }
+ # }
- # ..and either
- && (
+ # But while this would be a trivial update, it would have very undesirable
+ # side effects when perltidy is run from within an editor on a small snippet.
+ # So this is best done with a separate filter, such
+ # as 'delete_ending_blank_lines.pl' in the examples folder.
- # this is the last token (line doesn't have a side comment)
- !$have_side_comment
+ # Flag to prevent blank lines when POD occurs in a format skipping sect.
+ my $in_format_skipping_section;
- # or the old side comment is a closing side comment
- || $tokens_to_go[$max_index_to_go] =~
- /$closing_side_comment_prefix_pattern/o
- )
- )
- {
+ # set locations for blanks around long runs of keywords
+ my $rwant_blank_line_after = $self->keyword_group_scan();
- # then make the closing side comment text
- if ($block_label) { $block_label .= " " }
- my $token =
-"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
+ my $line_type = "";
+ my $i_last_POD_END = -10;
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $i++;
- # append any extra descriptive text collected above
- if ( $i_block_leading_text == $i_terminal ) {
- $token .= $block_leading_text;
+ # insert blank lines requested for keyword sequences
+ if ( $i > 0
+ && defined( $rwant_blank_line_after->{ $i - 1 } )
+ && $rwant_blank_line_after->{ $i - 1 } == 1 )
+ {
+ $self->want_blank_line();
}
- $token = balance_csc_text($token)
- if $rOpts->{'closing-side-comments-balanced'};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ my $input_line = $line_of_tokens->{_line_text};
- $token =~ s/\s*$//; # trim any trailing whitespace
+ # _line_type codes are:
+ # SYSTEM - system-specific code before hash-bang line
+ # CODE - line of perl code (including comments)
+ # POD_START - line starting pod, such as '=head'
+ # POD - pod documentation text
+ # POD_END - last line of pod section, '=cut'
+ # HERE - text of here-document
+ # HERE_END - last line of here-doc (target word)
+ # FORMAT - format section
+ # FORMAT_END - last line of format section, '.'
+ # SKIP - code skipping section
+ # SKIP_END - last line of code skipping section, '#>>V'
+ # DATA_START - __DATA__ line
+ # DATA - unidentified text following __DATA__
+ # END_START - __END__ line
+ # END - unidentified text following __END__
+ # ERROR - we are in big trouble, probably not a perl script
- # handle case of existing closing side comment
- if ($have_side_comment) {
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
+ $i_last_POD_END = $i;
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
+ $self->want_blank_line();
+ }
+ }
- # warn if requested and tokens differ significantly
- if ( $rOpts->{'closing-side-comment-warnings'} ) {
- my $old_csc = $tokens_to_go[$max_index_to_go];
- my $new_csc = $token;
- $new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g; # trim all whitespace
- $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
- $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
+ # handle line of code..
+ if ( $line_type eq 'CODE' ) {
- # Patch to handle multiple closing side comments at
- # else and elsif's. These have become too complicated
- # to check, so if we see an indication of
- # '[ if' or '[ # elsif', then assume they were made
- # by perltidy.
- if ( $block_type_to_go[$i_terminal] eq 'else' ) {
- if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
- }
- elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
- if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
- }
+ my $CODE_type = $line_of_tokens->{_code_type};
+ $in_format_skipping_section = $CODE_type eq 'FS';
- # if old comment is contained in new comment,
- # only compare the common part.
- if ( length($new_csc) > length($old_csc) ) {
- $new_csc = substr( $new_csc, 0, length($old_csc) );
- }
+ # Handle blank lines
+ if ( $CODE_type eq 'BL' ) {
- # if the new comment is shorter and has been limited,
- # only compare the common part.
- if ( length($new_csc) < length($old_csc)
- && $new_trailing_dots )
+ # Keep this blank? Start with the flag -kbl=n, where
+ # n=0 ignore all old blank lines
+ # n=1 stable: keep old blanks, but limited by -mbl=n
+ # n=2 keep all old blank lines, regardless of -mbl=n
+ # If n=0 we delete all old blank lines and let blank line
+ # rules generate any needed blank lines.
+ my $kgb_keep = $rOpts_keep_old_blank_lines;
+
+ # Then delete lines requested by the keyword-group logic if
+ # allowed
+ if ( $kgb_keep == 1
+ && defined( $rwant_blank_line_after->{$i} )
+ && $rwant_blank_line_after->{$i} == 2 )
{
- $old_csc = substr( $old_csc, 0, length($new_csc) );
+ $kgb_keep = 0;
}
- # any remaining difference?
- if ( $new_csc ne $old_csc ) {
-
- # just leave the old comment if we are below the threshold
- # for creating side comments
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- }
-
- # otherwise we'll make a note of it
- else {
-
- warning(
-"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
- );
+ # But always keep a blank line following an =cut
+ if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
+ $kgb_keep = 1;
+ }
- # save the old side comment in a new trailing block
- # comment
- my $timestamp = "";
- if ( $rOpts->{'timestamp'} ) {
- my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
- $year += 1900;
- $month += 1;
- $timestamp = "$year-$month-$day";
- }
- $cscw_block_comment =
-"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
-## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
- }
+ if ($kgb_keep) {
+ $self->flush($CODE_type);
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
+ $self->[_last_line_leading_type_] = 'b';
}
- else {
+ next;
+ }
+ else {
- # No differences.. we can safely delete old comment if we
- # are below the threshold
- if ( $block_line_count <
- $rOpts->{'closing-side-comment-interval'} )
- {
- $token = undef;
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq '#' );
- $self->unstore_token_to_go()
- if ( $types_to_go[$max_index_to_go] eq 'b' );
- }
+ # Let logger see all non-blank lines of code. This is a slow
+ # operation so we avoid it if it is not going to be saved.
+ if ( $save_logfile && $logger_object ) {
+ $logger_object->black_box( $line_of_tokens,
+ $vertical_aligner_object->get_output_line_number );
}
}
- # switch to the new csc (unless we deleted it!)
- if ($token) {
- $tokens_to_go[$max_index_to_go] = $token;
- $self->sync_token_K($max_index_to_go);
+ # Handle Format Skipping (FS) and Verbatim (VB) Lines
+ if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
+ $self->write_unindented_line("$input_line");
+ $file_writer_object->reset_consecutive_blank_lines();
+ next;
}
+
+ # Handle all other lines of code
+ $self->process_line_of_CODE($line_of_tokens);
}
- # handle case of NO existing closing side comment
+ # handle line of non-code..
else {
- # To avoid inserting a new token in the token arrays, we
- # will just return the new side comment so that it can be
- # inserted just before it is needed in the call to the
- # vertical aligner.
- $closing_side_comment = $token;
- }
- }
- return ( $closing_side_comment, $cscw_block_comment );
-}
+ # set special flags
+ my $skip_line = 0;
+ if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
-sub previous_nonblank_token {
- my ($i) = @_;
- my $name = "";
- my $im = $i - 1;
- return "" if ( $im < 0 );
- if ( $types_to_go[$im] eq 'b' ) { $im--; }
- return "" if ( $im < 0 );
- $name = $tokens_to_go[$im];
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
+ if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
+ && !$in_format_skipping_section
+ && $line_type eq 'POD_START'
+ && !$self->[_saw_END_or_DATA_] )
+ {
+ $self->want_blank_line();
+ }
+ }
- # prepend any sub name to an isolated -> to avoid unwanted alignments
- # [test case is test8/penco.pl]
- if ( $name eq '->' ) {
- $im--;
- if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
- $name = $tokens_to_go[$im] . $name;
+ # leave the blank counters in a predictable state
+ # after __END__ or __DATA__
+ elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ $self->[_saw_END_or_DATA_] = 1;
+ }
+
+ # Patch to avoid losing blank lines after a code-skipping block;
+ # fixes case c047.
+ elsif ( $line_type eq 'SKIP_END' ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ }
+
+ # write unindented non-code line
+ if ( !$skip_line ) {
+ $self->write_unindented_line($input_line);
+ }
}
}
- return $name;
-}
-
-sub send_lines_to_vertical_aligner {
+ return;
- my ( $self, $rbatch_hash ) = @_;
+} ## end sub process_all_lines
- # This routine receives a batch of code for which the final line breaks
- # have been defined. Here we prepare the lines for passing to the vertical
- # aligner. We do the following tasks:
- # - mark certain vertical alignment tokens tokens, such as '=', in each line.
- # - make minor indentation adjustments
- # - insert extra blank spaces to help display certain logical constructions
+sub keyword_group_scan {
+ my $self = shift;
- my $rlines_K = $rbatch_hash->{rlines_K};
- if ( !@{$rlines_K} ) {
- Fault("Unexpected call with no lines");
- return;
- }
- my $n_last_line = @{$rlines_K} - 1;
- my $do_not_pad = $rbatch_hash->{do_not_pad};
+ #-------------------------------------------------------------------------
+ # Called once per file to process any --keyword-group-blanks-* parameters.
+ #-------------------------------------------------------------------------
- my $rLL = $self->{rLL};
- my $Klimit = $self->{Klimit};
+ # Manipulate blank lines around keyword groups (kgb* flags)
+ # Scan all lines looking for runs of consecutive lines beginning with
+ # selected keywords. Example keywords are 'my', 'our', 'local', ... but
+ # they may be anything. We will set flags requesting that blanks be
+ # inserted around and within them according to input parameters. Note
+ # that we are scanning the lines as they came in in the input stream, so
+ # they are not necessarily well formatted.
- my ( $Kbeg_next, $Kend_next ) = @{ $rlines_K->[0] };
- my $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- my $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- my $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
+ # The output of this sub is a return hash ref whose keys are the indexes of
+ # lines after which we desire a blank line. For line index i:
+ # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
+ # $rhash_of_desires->{$i} = 2 means we want blank line $i removed
+ my $rhash_of_desires = {};
- # Construct indexes to the global_to_go arrays so that called routines can
- # still access those arrays. This might eventually be removed
- # when all called routines have been converted to access token values
- # in the rLL array instead.
- my $ibeg0 = $rbatch_hash->{ibeg0};
- my $Kbeg0 = $Kbeg_next;
- my ( $ri_first, $ri_last );
- foreach my $rline ( @{$rlines_K} ) {
- my ( $Kbeg, $Kend ) = @{$rline};
- my $ibeg = $ibeg0 + $Kbeg - $Kbeg0;
- my $iend = $ibeg0 + $Kend - $Kbeg0;
- push @{$ri_first}, $ibeg;
- push @{$ri_last}, $iend;
+ # Nothing to do if no blanks can be output. This test added to fix
+ # case b760.
+ if ( !$rOpts_maximum_consecutive_blank_lines ) {
+ return $rhash_of_desires;
}
- #####################################################################
-
- my $valign_batch_number = $self->increment_valign_batch_count();
- my ( $cscw_block_comment, $closing_side_comment );
- if ( $rOpts->{'closing-side-comments'} ) {
- ( $closing_side_comment, $cscw_block_comment ) =
- $self->add_closing_side_comment();
- }
+ my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
+ my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
+ my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
+ my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
+ my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
- my $rindentation_list = [0]; # ref to indentations for each line
+ # A range of sizes can be input with decimal notation like 'min.max' with
+ # any number of dots between the two numbers. Examples:
+ # string => min max matches
+ # 1.1 1 1 exactly 1
+ # 1.3 1 3 1,2, or 3
+ # 1..3 1 3 1,2, or 3
+ # 5 5 - 5 or more
+ # 6. 6 - 6 or more
+ # .2 - 2 up to 2
+ # 1.0 1 0 nothing
+ my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
+ if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/
+ || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
+ {
+ Warn(<<EOM);
+Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max';
+ignoring all -kgb flags
+EOM
- # define the array @{$ralignment_type_to_go} for the output tokens
- # which will be non-blank for each special token (such as =>)
- # for which alignment is required.
- my $ralignment_type_to_go =
- $self->set_vertical_alignment_markers( $ri_first, $ri_last );
+ # Turn this option off so that this message does not keep repeating
+ # during iterations and other files.
+ $rOpts->{'keyword-group-blanks-size'} = "";
+ return $rhash_of_desires;
+ }
+ $Opt_size_min = 1 unless ($Opt_size_min);
- # flush before a long if statement to avoid unwanted alignment
- if ( $n_last_line > 0
- && $type_beg_next eq 'k'
- && $token_beg_next =~ /^(if|unless)$/ )
- {
- Perl::Tidy::VerticalAligner::flush();
+ if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
+ return $rhash_of_desires;
}
- $self->undo_ci( $ri_first, $ri_last );
+ # codes for $Opt_blanks_before and $Opt_blanks_after:
+ # 0 = never (delete if exist)
+ # 1 = stable (keep unchanged)
+ # 2 = always (insert if missing)
- $self->set_logical_padding( $ri_first, $ri_last );
+ return $rhash_of_desires
+ unless $Opt_size_min > 0
+ && ( $Opt_blanks_before != 1
+ || $Opt_blanks_after != 1
+ || $Opt_blanks_inside
+ || $Opt_blanks_delete );
- # loop to prepare each line for shipment
- my $in_comma_list;
- my ( $Kbeg, $type_beg, $token_beg );
- my ( $Kend, $type_end );
- for my $n ( 0 .. $n_last_line ) {
+ my $Opt_pattern = $keyword_group_list_pattern;
+ my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
+ my $Opt_repeat_count =
+ $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
- my $ibeg = $ri_first->[$n];
- my $iend = $ri_last->[$n];
- my $rline = $rlines_K->[$n];
- my $forced_breakpoint = $rline->[2];
+ my $rlines = $self->[_rlines_];
+ my $rLL = $self->[_rLL_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
- # we may need to look at variables on three consecutive lines ...
+ # variables for the current group and subgroups:
+ my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
+ @subgroup );
- # Some vars on line [n-1], if any:
- my $Kbeg_last = $Kbeg;
- my $type_beg_last = $type_beg;
- my $token_beg_last = $token_beg;
- my $Kend_last = $Kend;
- my $type_end_last = $type_end;
+ # Definitions:
+ # ($ibeg, $iend) = starting and ending line indexes of this entire group
+ # $count = total number of keywords seen in this entire group
+ # $level_beg = indententation level of this group
+ # @group = [ $i, $token, $count ] =list of all keywords & blanks
+ # @subgroup = $j, index of group where token changes
+ # @iblanks = line indexes of blank lines in input stream in this group
+ # where i=starting line index
+ # token (the keyword)
+ # count = number of this token in this subgroup
+ # j = index in group where token changes
+ #
+ # These vars will contain values for the most recently seen line:
+ my ( $line_type, $CODE_type, $K_first, $K_last );
- # Some vars on line [n]:
- $Kbeg = $Kbeg_next;
- $type_beg = $type_beg_next;
- $token_beg = $token_beg_next;
- $Kend = $Kend_next;
- $type_end = $type_end_next;
+ my $number_of_groups_seen = 0;
- # We use two slightly different definitions of level jump at the end
- # of line:
- # $ljump is the level jump needed by 'sub set_adjusted_indentation'
- # $level_jump is the level jump needed by the vertical aligner.
- my $ljump = 0; # level jump at end of line
+ #-------------------
+ # helper subroutines
+ #-------------------
- # Get some vars on line [n+1], if any:
- if ( $n < $n_last_line ) {
- ( $Kbeg_next, $Kend_next ) =
- @{ $rlines_K->[ $n + 1 ] };
- $type_beg_next = $rLL->[$Kbeg_next]->[_TYPE_];
- $token_beg_next = $rLL->[$Kbeg_next]->[_TOKEN_];
- $type_end_next = $rLL->[$Kend_next]->[_TYPE_];
- $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
+ my $insert_blank_after = sub {
+ my ($i) = @_;
+ $rhash_of_desires->{$i} = 1;
+ my $ip = $i + 1;
+ if ( defined( $rhash_of_desires->{$ip} )
+ && $rhash_of_desires->{$ip} == 2 )
+ {
+ $rhash_of_desires->{$ip} = 0;
}
+ return;
+ };
- # level jump at end of line for the vertical aligner:
- my $level_jump =
- $Kend >= $Klimit
- ? 0
- : $rLL->[ $Kend + 1 ]->[_SLEVEL_] - $rLL->[$Kbeg]->[_SLEVEL_];
+ my $split_into_sub_groups = sub {
- $self->delete_needless_alignments( $ibeg, $iend,
- $ralignment_type_to_go );
+ # place blanks around long sub-groups of keywords
+ # ...if requested
+ return unless ($Opt_blanks_inside);
- my ( $rtokens, $rfields, $rpatterns ) =
- $self->make_alignment_patterns( $ibeg, $iend,
- $ralignment_type_to_go );
+ # loop over sub-groups, index k
+ push @subgroup, scalar @group;
+ my $kbeg = 1;
+ my $kend = @subgroup - 1;
+ for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
- my ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line )
- = $self->set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
- $ri_first, $ri_last, $rindentation_list, $ljump );
+ # index j runs through all keywords found
+ my $j_b = $subgroup[ $k - 1 ];
+ my $j_e = $subgroup[$k] - 1;
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
+ # index i is the actual line number of a keyword
+ my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
+ my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
+ my $num = $count_e - $count_b + 1;
- # which are long quotes, if allowed
- ( $type_beg eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+ # This subgroup runs from line $ib to line $ie-1, but may contain
+ # blank lines
+ if ( $num >= $Opt_size_min ) {
- # which are long block comments, if allowed
- || (
- $type_beg eq '#'
- && $rOpts->{'outdent-long-comments'}
+ # if there are blank lines, we require that at least $num lines
+ # be non-blank up to the boundary with the next subgroup.
+ my $nog_b = my $nog_e = 1;
+ if ( @iblanks && !$Opt_blanks_delete ) {
+ my $j_bb = $j_b + $num - 1;
+ my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
+ $nog_b = $count_bb - $count_b + 1 == $num;
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
+ my $j_ee = $j_e - ( $num - 1 );
+ my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
+ $nog_e = $count_e - $count_ee + 1 == $num;
+ }
+ if ( $nog_b && $k > $kbeg ) {
+ $insert_blank_after->( $i_b - 1 );
+ }
+ if ( $nog_e && $k < $kend ) {
+ my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
+ $insert_blank_after->( $i_ep - 1 );
+ }
+ }
+ }
+ return;
+ };
- my $rvertical_tightness_flags =
- $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
+ my $delete_if_blank = sub {
+ my ($i) = @_;
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+ # delete line $i if it is blank
+ return unless ( $i >= 0 && $i < @{$rlines} );
+ my $line_type = $rlines->[$i]->{_line_type};
+ return if ( $line_type ne 'CODE' );
+ my $code_type = $rlines->[$i]->{_code_type};
+ if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
+ return;
+ };
- # Set a flag at the final ':' of a ternary chain to request
- # vertical alignment of the final term. Here is a
- # slightly complex example:
- #
- # $self->{_text} = (
- # !$section ? ''
- # : $type eq 'item' ? "the $section entry"
- # : "the section on $section"
- # )
- # . (
- # $page
- # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
- # : ' elsewhere in this document'
- # );
- #
- my $is_terminal_ternary = 0;
+ my $delete_inner_blank_lines = sub {
- if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
- my $last_leading_type = $n > 0 ? $type_beg_last : ':';
- if ( $terminal_type ne ';'
- && $n_last_line > $n
- && $level_end == $lev )
- {
- $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
- $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
- }
- if (
- $last_leading_type eq ':'
- && ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $terminal_type ne ':' && $level_end < $lev ) )
- )
- {
+ # always remove unwanted trailing blank lines from our list
+ return unless (@iblanks);
+ while ( my $ibl = pop(@iblanks) ) {
+ if ( $ibl < $iend ) { push @iblanks, $ibl; last }
+ $iend = $ibl;
+ }
- # the terminal term must not contain any ternary terms, as in
- # my $ECHO = (
- # $Is_MSWin32 ? ".\\echo$$"
- # : $Is_MacOS ? ":echo$$"
- # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
- # );
- $is_terminal_ternary = 1;
+ # now mark mark interior blank lines for deletion if requested
+ return unless ($Opt_blanks_delete);
- my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
- while ( defined($KP) && $KP <= $Kend ) {
- my $type_KP = $rLL->[$KP]->[_TYPE_];
- if ( $type_KP eq '?' || $type_KP eq ':' ) {
- $is_terminal_ternary = 0;
- last;
- }
- $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
- }
- }
- }
+ while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
- # add any new closing side comment to the last line
- if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
- $rfields->[-1] .= " $closing_side_comment";
- }
+ return;
+ };
- # send this new line down the pipe
- my $rvalign_hash = {};
- $rvalign_hash->{level} = $lev;
- $rvalign_hash->{level_end} = $level_end;
- $rvalign_hash->{indentation} = $indentation;
- $rvalign_hash->{is_forced_break} = $forced_breakpoint || $in_comma_list;
- $rvalign_hash->{outdent_long_lines} = $outdent_long_lines;
- $rvalign_hash->{is_terminal_ternary} = $is_terminal_ternary;
- $rvalign_hash->{is_terminal_statement} = $is_semicolon_terminated;
- $rvalign_hash->{do_not_pad} = $do_not_pad;
- $rvalign_hash->{rvertical_tightness_flags} = $rvertical_tightness_flags;
- $rvalign_hash->{level_jump} = $level_jump;
+ my $end_group = sub {
- $rvalign_hash->{valign_batch_number} = $valign_batch_number;
+ # end a group of keywords
+ my ($bad_ending) = @_;
+ if ( defined($ibeg) && $ibeg >= 0 ) {
- Perl::Tidy::VerticalAligner::valign_input( $rvalign_hash, $rfields,
- $rtokens, $rpatterns );
+ # then handle sufficiently large groups
+ if ( $count >= $Opt_size_min ) {
- $in_comma_list = $type_end eq ',' && $forced_breakpoint;
+ $number_of_groups_seen++;
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+ # do any blank deletions regardless of the count
+ $delete_inner_blank_lines->();
- $do_not_pad = 0;
+ if ( $ibeg > 0 ) {
+ my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
- # Set flag indicating if this line ends in an opening
- # token and is very short, so that a blank line is not
- # needed if the subsequent line is a comment.
- # Examples of what we are looking for:
- # {
- # && (
- # BEGIN {
- # default {
- # sub {
- $last_output_short_opening_token
+ # patch for hash bang line which is not currently marked as
+ # a comment; mark it as a comment
+ if ( $ibeg == 1 && !$code_type ) {
+ my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
+ $code_type = 'BC'
+ if ( $line_text && $line_text =~ /^#/ );
+ }
- # line ends in opening token
- = $type_end =~ /^[\{\(\[L]$/
+ # Do not insert a blank after a comment
+ # (this could be subject to a flag in the future)
+ if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
+ if ( $Opt_blanks_before == INSERT ) {
+ $insert_blank_after->( $ibeg - 1 );
- # and either
- && (
- # line has either single opening token
- $Kend == $Kbeg
+ }
+ elsif ( $Opt_blanks_before == DELETE ) {
+ $delete_if_blank->( $ibeg - 1 );
+ }
+ }
+ }
- # or is a single token followed by opening token.
- # Note that sub identifiers have blanks like 'sub doit'
- || ( $Kend - $Kbeg <= 2 && $token_beg !~ /\s+/ )
- )
+ # We will only put blanks before code lines. We could loosen
+ # this rule a little, but we have to be very careful because
+ # for example we certainly don't want to drop a blank line
+ # after a line like this:
+ # my $var = <<EOM;
+ if ( $line_type eq 'CODE' && defined($K_first) ) {
- # and limit total to 10 character widths
- && token_sequence_length( $ibeg, $iend ) <= 10;
+ # - Do not put a blank before a line of different level
+ # - Do not put a blank line if we ended the search badly
+ # - Do not put a blank at the end of the file
+ # - Do not put a blank line before a hanging side comment
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- } # end of loop to output each line
+ if ( $level == $level_beg
+ && $ci_level == 0
+ && !$bad_ending
+ && $iend < @{$rlines}
+ && $CODE_type ne 'HSC' )
+ {
+ if ( $Opt_blanks_after == INSERT ) {
+ $insert_blank_after->($iend);
+ }
+ elsif ( $Opt_blanks_after == DELETE ) {
+ $delete_if_blank->( $iend + 1 );
+ }
+ }
+ }
+ }
+ $split_into_sub_groups->();
+ }
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+ # reset for another group
+ $ibeg = -1;
+ $iend = undef;
+ $level_beg = -1;
+ $K_closing = undef;
+ @group = ();
+ @subgroup = ();
+ @iblanks = ();
- # output any new -cscw block comment
- if ($cscw_block_comment) {
- Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
- }
- return;
-}
+ return;
+ };
-{ # begin make_alignment_patterns
+ my $find_container_end = sub {
- my %block_type_map;
- my %keyword_map;
- my %operator_map;
+ # If the keyword line is continued onto subsequent lines, find the
+ # closing token '$K_closing' so that we can easily skip past the
+ # contents of the container.
- BEGIN {
+ # We only set this value if we find a simple list, meaning
+ # -contents only one level deep
+ # -not welded
- # map related block names into a common name to
- # allow alignment
- %block_type_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'if',
- 'default' => 'if',
- 'case' => 'if',
- 'sort' => 'map',
- 'grep' => 'map',
- );
+ # First check: skip if next line is not one deeper
+ my $Knext_nonblank = $self->K_next_nonblank($K_last);
+ goto RETURN if ( !defined($Knext_nonblank) );
+ my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
+ goto RETURN if ( $level_next != $level_beg + 1 );
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. [elsif.pl]
- %keyword_map = (
- 'unless' => 'if',
- 'else' => 'if',
- 'elsif' => 'if',
- 'when' => 'given',
- 'default' => 'given',
- 'case' => 'switch',
+ # Find the parent container of the first token on the next line
+ my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
+ goto RETURN unless ( defined($parent_seqno) );
- # treat an 'undef' similar to numbers and quotes
- 'undef' => 'Q',
- );
+ # Must not be a weld (can be unstable)
+ goto RETURN
+ if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) );
- # map certain operators to the same class for pattern matching
- %operator_map = (
- '!~' => '=~',
- '+=' => '+=',
- '-=' => '+=',
- '*=' => '+=',
- '/=' => '+=',
- );
- }
+ # Opening container must exist and be on this line
+ my $Ko = $K_opening_container->{$parent_seqno};
+ goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last );
- sub delete_needless_alignments {
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
+ # Verify that the closing container exists and is on a later line
+ my $Kc = $K_closing_container->{$parent_seqno};
+ goto RETURN unless ( defined($Kc) && $Kc > $K_last );
- # Remove unwanted alignments. This routine is a place to remove
- # alignments which might cause problems at later stages. There are
- # currently two types of fixes:
+ # That's it
+ $K_closing = $Kc;
+ goto RETURN;
- # 1. Remove excess parens
- # 2. Remove alignments within 'elsif' conditions
+ RETURN:
+ return;
+ };
- # Patch #1: Excess alignment of parens can prevent other good
- # alignments. For example, note the parens in the first two rows of
- # the following snippet. They would normally get marked for alignment
- # and aligned as follows:
+ my $add_to_group = sub {
+ my ( $i, $token, $level ) = @_;
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
+ # End the previous group if we have reached the maximum
+ # group size
+ if ( $Opt_size_max && @group >= $Opt_size_max ) {
+ $end_group->();
+ }
- # This causes unnecessary paren alignment and prevents the third equals
- # from aligning. If we remove the unwanted alignments we get:
+ if ( @group == 0 ) {
+ $ibeg = $i;
+ $level_beg = $level;
+ $count = 0;
+ }
- # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
- # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
- # my $img = new Gimp::Image( $w, $h, RGB );
+ $count++;
+ $iend = $i;
- # A rule for doing this which works well is to remove alignment of
- # parens whose containers do not contain other aligning tokens, with
- # the exception that we always keep alignment of the first opening
- # paren on a line (for things like 'if' and 'elsif' statements).
+ # New sub-group?
+ if ( !@group || $token ne $group[-1]->[1] ) {
+ push @subgroup, scalar(@group);
+ }
+ push @group, [ $i, $token, $count ];
- # Setup needed constants
- my $i_good_paren = -1;
- my $imin_match = $iend + 1;
- my $i_elsif_close = $ibeg - 1;
- my $i_elsif_open = $iend + 1;
- if ( $iend > $ibeg ) {
- if ( $types_to_go[$ibeg] eq 'k' ) {
+ # remember if this line ends in an open container
+ $find_container_end->();
- # Paren patch: mark a location of a paren we should keep, such
- # as one following something like a leading 'if', 'elsif',..
- $i_good_paren = $ibeg + 1;
- if ( $types_to_go[$i_good_paren] eq 'b' ) {
- $i_good_paren++;
- }
+ return;
+ };
- # 'elsif' patch: remember the range of the parens of an elsif,
- # and do not make alignments within them because this can cause
- # loss of padding and overall brace alignment in the vertical
- # aligner.
- if ( $tokens_to_go[$ibeg] eq 'elsif'
- && $i_good_paren < $iend
- && $tokens_to_go[$i_good_paren] eq '(' )
- {
- $i_elsif_open = $i_good_paren;
- $i_elsif_close = $self->mate_index_to_go($i_good_paren);
- }
- }
- }
+ #----------------------------------
+ # loop over all lines of the source
+ #----------------------------------
+ $end_group->();
+ my $i = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
- # Loop to make the fixes on this line
- my @imatch_list;
- for my $i ( $ibeg .. $iend ) {
+ $i++;
+ last
+ if ( $Opt_repeat_count > 0
+ && $number_of_groups_seen >= $Opt_repeat_count );
- if ( $ralignment_type_to_go->[$i] ne '' ) {
+ $CODE_type = "";
+ $K_first = undef;
+ $K_last = undef;
+ $line_type = $line_of_tokens->{_line_type};
- # Patch #2: undo alignment within elsif parens
- if ( $i > $i_elsif_open && $i < $i_elsif_close ) {
- $ralignment_type_to_go->[$i] = '';
- next;
- }
- push @imatch_list, $i;
+ # always end a group at non-CODE
+ if ( $line_type ne 'CODE' ) { $end_group->(); next }
- }
- if ( $tokens_to_go[$i] eq ')' ) {
+ $CODE_type = $line_of_tokens->{_code_type};
- # Patch #1: undo the corresponding opening paren if:
- # - it is at the top of the stack
- # - and not the first overall opening paren
- # - does not follow a leading keyword on this line
- my $imate = $self->mate_index_to_go($i);
- if ( @imatch_list
- && $imatch_list[-1] eq $imate
- && ( $ibeg > 1 || @imatch_list > 1 )
- && $imate > $i_good_paren )
- {
- $ralignment_type_to_go->[$imate] = '';
- pop @imatch_list;
- }
- }
+ # end any group at a format skipping line
+ if ( $CODE_type && $CODE_type eq 'FS' ) {
+ $end_group->();
+ next;
}
- return;
- }
- sub make_alignment_patterns {
+ # continue in a verbatim (VB) type; it may be quoted text
+ if ( $CODE_type eq 'VB' ) {
+ if ( $ibeg >= 0 ) { $iend = $i; }
+ next;
+ }
- # Here we do some important preliminary work for the
- # vertical aligner. We create three arrays for one
- # output line. These arrays contain strings that can
- # be tested by the vertical aligner to see if
- # consecutive lines can be aligned vertically.
- #
- # The three arrays are indexed on the vertical
- # alignment fields and are:
- # @tokens - a list of any vertical alignment tokens for this line.
- # These are tokens, such as '=' '&&' '#' etc which
- # we want to might align vertically. These are
- # decorated with various information such as
- # nesting depth to prevent unwanted vertical
- # alignment matches.
- # @fields - the actual text of the line between the vertical alignment
- # tokens.
- # @patterns - a modified list of token types, one for each alignment
- # field. These should normally each match before alignment is
- # allowed, even when the alignment tokens match.
- my ( $self, $ibeg, $iend, $ralignment_type_to_go ) = @_;
- my @tokens = ();
- my @fields = ();
- my @patterns = ();
- my $i_start = $ibeg;
+ # and continue in blank (BL) types
+ if ( $CODE_type eq 'BL' ) {
+ if ( $ibeg >= 0 ) {
+ $iend = $i;
+ push @{iblanks}, $i;
- my $depth = 0;
- my @container_name = ("");
- my @multiple_comma_arrows = (undef);
+ # propagate current subgroup token
+ my $tok = $group[-1]->[1];
+ push @group, [ $i, $tok, $count ];
+ }
+ next;
+ }
- my $j = 0; # field index
+ # examine the first token of this line
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $K_first, $K_last ) = @{$rK_range};
+ if ( !defined($K_first) ) {
- $patterns[0] = "";
- my %token_count;
- for my $i ( $ibeg .. $iend ) {
+ # Somewhat unexpected blank line..
+ # $rK_range is normally defined for line type CODE, but this can
+ # happen for example if the input line was a single semicolon which
+ # is being deleted. In that case there was code in the input
+ # file but it is not being retained. So we can silently return.
+ return $rhash_of_desires;
+ }
- # Keep track of containers balanced on this line only.
- # These are used below to prevent unwanted cross-line alignments.
- # Unbalanced containers already avoid aligning across
- # container boundaries.
- my $tok = $tokens_to_go[$i];
- if ( $tok =~ /^[\(\{\[]/ ) { #'(' ) {
+ my $level = $rLL->[$K_first]->[_LEVEL_];
+ my $type = $rLL->[$K_first]->[_TYPE_];
+ my $token = $rLL->[$K_first]->[_TOKEN_];
+ my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
- # if container is balanced on this line...
- my $i_mate = $self->mate_index_to_go($i);
- if ( $i_mate > $i && $i_mate <= $iend ) {
- $depth++;
- my $seqno = $type_sequence_to_go[$i];
- my $count = comma_arrow_count($seqno);
- $multiple_comma_arrows[$depth] = $count && $count > 1;
-
- # Append the previous token name to make the container name
- # more unique. This name will also be given to any commas
- # within this container, and it helps avoid undesirable
- # alignments of different types of containers.
-
- # Containers beginning with { and [ are given those names
- # for uniqueness. That way commas in different containers
- # will not match. Here is an example of what this prevents:
- # a => [ 1, 2, 3 ],
- # b => { b1 => 4, b2 => 5 },
- # Here is another example of what we avoid by labeling the
- # commas properly:
- # is_d( [ $a, $a ], [ $b, $c ] );
- # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
- # is_d( [ \$a, \$a ], [ \$b, \$c ] );
-
- my $name = $tok;
- if ( $tok eq '(' ) {
- $name = previous_nonblank_token($i);
- $name =~ s/^->//;
- }
- $container_name[$depth] = "+" . $name;
-
- # Make the container name even more unique if necessary.
- # If we are not vertically aligning this opening paren,
- # append a character count to avoid bad alignment because
- # it usually looks bad to align commas within containers
- # for which the opening parens do not align. Here
- # is an example very BAD alignment of commas (because
- # the atan2 functions are not all aligned):
- # $XY =
- # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
- # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
- # $X * atan2( $X, 1 ) -
- # $Y * atan2( $Y, 1 );
- #
- # On the other hand, it is usually okay to align commas if
- # opening parens align, such as:
- # glVertex3d( $cx + $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy + $s * $ys, $z );
- # glVertex3d( $cx - $s * $xs, $cy, $z );
- # glVertex3d( $cx, $cy - $s * $ys, $z );
- #
- # To distinguish between these situations, we will
- # append the length of the line from the previous matching
- # token, or beginning of line, to the function name. This
- # will allow the vertical aligner to reject undesirable
- # matches.
-
- # if we are not aligning on this paren...
- if ( $ralignment_type_to_go->[$i] eq '' ) {
-
- # Sum length from previous alignment
- my $len = token_sequence_length( $i_start, $i - 1 );
- if ( $i_start == $ibeg ) {
-
- # For first token, use distance from start of line
- # but subtract off the indentation due to level.
- # Otherwise, results could vary with indentation.
- $len += leading_spaces_to_go($ibeg) -
- $levels_to_go[$i_start] * $rOpts_indent_columns;
- if ( $len < 0 ) { $len = 0 }
- }
+ # End a group 'badly' at an unexpected level. This will prevent
+ # blank lines being incorrectly placed after the end of the group.
+ # We are looking for any deviation from two acceptable patterns:
+ # PATTERN 1: a simple list; secondary lines are at level+1
+ # PATTERN 2: a long statement; all secondary lines same level
+ # This was added as a fix for case b1177, in which a complex structure
+ # got incorrectly inserted blank lines.
+ if ( $ibeg >= 0 ) {
- # tack this length onto the container name to try
- # to make a unique token name
- $container_name[$depth] .= "-" . $len;
- }
- }
- }
- elsif ( $tokens_to_go[$i] =~ /^[\)\}\]]/ ) {
- $depth-- if $depth > 0;
+ # Check for deviation from PATTERN 1, simple list:
+ if ( defined($K_closing) && $K_first < $K_closing ) {
+ $end_group->(1) if ( $level != $level_beg + 1 );
}
- # if we find a new synchronization token, we are done with
- # a field
- if ( $i > $i_start && $ralignment_type_to_go->[$i] ne '' ) {
+ # Check for deviation from PATTERN 2, single statement:
+ elsif ( $level != $level_beg ) { $end_group->(1) }
+ }
- my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
+ # Do not look for keywords in lists ( keyword 'my' can occur in lists,
+ # see case b760); fixed for c048.
+ if ( $self->is_list_by_K($K_first) ) {
+ if ( $ibeg >= 0 ) { $iend = $i }
+ next;
+ }
- # map similar items
- my $tok_map = $operator_map{$tok};
- $tok = $tok_map if ($tok_map);
+ # see if this is a code type we seek (i.e. comment)
+ if ( $CODE_type
+ && $Opt_comment_pattern
+ && $CODE_type =~ /$Opt_comment_pattern/ )
+ {
- # make separators in different nesting depths unique
- # by appending the nesting depth digit.
- if ( $raw_tok ne '#' ) {
- $tok .= "$nesting_depth_to_go[$i]";
- }
+ my $tok = $CODE_type;
- # also decorate commas with any container name to avoid
- # unwanted cross-line alignments.
- if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- $tok .= $container_name[$depth];
- }
- }
+ # Continuing a group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $add_to_group->( $i, $tok, $level );
+ }
- # Patch to avoid aligning leading and trailing if, unless.
- # Mark trailing if, unless statements with container names.
- # This makes them different from leading if, unless which
- # are not so marked at present. If we ever need to name
- # them too, we could use ci to distinguish them.
- # Example problem to avoid:
- # return ( 2, "DBERROR" )
- # if ( $retval == 2 );
- # if ( scalar @_ ) {
- # my ( $a, $b, $c, $d, $e, $f ) = @_;
- # }
- if ( $raw_tok eq '(' ) {
- my $ci = $ci_levels_to_go[$ibeg];
- if ( $container_name[$depth] =~ /^\+(if|unless)/
- && $ci )
- {
- $tok .= $container_name[$depth];
- }
- }
+ # Start new group
+ else {
- # Decorate block braces with block types to avoid
- # unwanted alignments such as the following:
- # foreach ( @{$routput_array} ) { $fh->print($_) }
- # eval { $fh->close() };
- if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
- my $block_type = $block_type_to_go[$i];
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg >= 0 ) { $end_group->(); }
+ $add_to_group->( $i, $tok, $level );
+ }
+ next;
+ }
- # map certain related block types to allow
- # else blocks to align
- $block_type = $block_type_map{$block_type}
- if ( defined( $block_type_map{$block_type} ) );
+ # See if it is a keyword we seek, but never start a group in a
+ # continuation line; the code may be badly formatted.
+ if ( $ci_level == 0
+ && $type eq 'k'
+ && $token =~ /$Opt_pattern/ )
+ {
- # remove sub names to allow one-line sub braces to align
- # regardless of name
- #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
- if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
+ # Continuing a keyword group
+ if ( $ibeg >= 0 && $level == $level_beg ) {
+ $add_to_group->( $i, $token, $level );
+ }
- # allow all control-type blocks to align
- if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+ # Start new keyword group
+ else {
- $tok .= $block_type;
- }
+ # first end old group if any; we might be starting new
+ # keywords at different level
+ if ( $ibeg >= 0 ) { $end_group->(); }
+ $add_to_group->( $i, $token, $level );
+ }
+ next;
+ }
- # Mark multiple copies of certain tokens with the copy number
- # This will allow the aligner to decide if they are matched.
- # For now, only do this for equals. For example, the two
- # equals on the next line will be labeled '=0' and '=0.2'.
- # Later, the '=0.2' will be ignored in alignment because it
- # has no match.
+ # This is not one of our keywords, but we are in a keyword group
+ # so see if we should continue or quit
+ elsif ( $ibeg >= 0 ) {
- # $| = $debug = 1 if $opt_d;
- # $full_index = 1 if $opt_i;
+ # - bail out on a large level change; we may have walked into a
+ # data structure or anoymous sub code.
+ if ( $level > $level_beg + 1 || $level < $level_beg ) {
+ $end_group->(1);
+ next;
+ }
- if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
- $token_count{$tok}++;
- if ( $token_count{$tok} > 1 ) {
- $tok .= '.' . $token_count{$tok};
+ # - keep going on a continuation line of the same level, since
+ # it is probably a continuation of our previous keyword,
+ # - and keep going past hanging side comments because we never
+ # want to interrupt them.
+ if ( ( ( $level == $level_beg ) && $ci_level > 0 )
+ || $CODE_type eq 'HSC' )
+ {
+ $iend = $i;
+ next;
+ }
+
+ # - continue if if we are within in a container which started with
+ # the line of the previous keyword.
+ if ( defined($K_closing) && $K_first <= $K_closing ) {
+
+ # continue if entire line is within container
+ if ( $K_last <= $K_closing ) { $iend = $i; next }
+
+ # continue at ); or }; or ];
+ my $KK = $K_closing + 1;
+ if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
+ if ( $KK < $K_last ) {
+ if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
+ if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
+ $end_group->(1);
+ next;
+ }
}
+ $iend = $i;
+ next;
}
- # concatenate the text of the consecutive tokens to form
- # the field
- push( @fields,
- join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
+ $end_group->(1);
+ next;
+ }
- # store the alignment token for this field
- push( @tokens, $tok );
+ # - end the group if none of the above
+ $end_group->();
+ next;
+ }
- # get ready for the next batch
- $i_start = $i;
- $j++;
- $patterns[$j] = "";
- }
+ # not in a keyword group; continue
+ else { next }
+ }
- # continue accumulating tokens
- # handle non-keywords..
- if ( $types_to_go[$i] ne 'k' ) {
- my $type = $types_to_go[$i];
+ # end of loop over all lines
+ $end_group->();
+ return $rhash_of_desires;
- # Mark most things before arrows as a quote to
- # get them to line up. Testfile: mixed.pl.
- if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
- my $next_type = $types_to_go[ $i + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+} ## end sub keyword_group_scan
- if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
- $type = 'Q';
+#######################################
+# CODE SECTION 7: Process lines of code
+#######################################
- # Patch to ignore leading minus before words,
- # by changing pattern 'mQ' into just 'Q',
- # so that we can align things like this:
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
- }
- }
+{ ## begin closure process_line_of_CODE
- # Convert a bareword within braces into a quote for matching. This will
- # allow alignment of expressions like this:
- # local ( $SIG{'INT'} ) = IGNORE;
- # local ( $SIG{ALRM} ) = 'POSTMAN';
- if ( $type eq 'w'
- && $i > $ibeg
- && $i < $iend
- && $types_to_go[ $i - 1 ] eq 'L'
- && $types_to_go[ $i + 1 ] eq 'R' )
- {
- $type = 'Q';
- }
+ # The routines in this closure receive lines of code and combine them into
+ # 'batches' and send them along. A 'batch' is the unit of code which can be
+ # processed further as a unit. It has the property that it is the largest
+ # amount of code into which which perltidy is free to place one or more
+ # line breaks within it without violating any constraints.
- # patch to make numbers and quotes align
- if ( $type eq 'n' ) { $type = 'Q' }
+ # When a new batch is formed it is sent to sub 'grind_batch_of_code'.
- # patch to ignore any ! in patterns
- if ( $type eq '!' ) { $type = '' }
+ # flags needed by the store routine
+ my $line_of_tokens;
+ my $no_internal_newlines;
+ my $CODE_type;
- $patterns[$j] .= $type;
- }
+ # range of K of tokens for the current line
+ my ( $K_first, $K_last );
- # for keywords we have to use the actual text
- else {
+ my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
+ $rblock_type_of_seqno, $ri_starting_one_line_block );
+
+ # past stored nonblank tokens and flags
+ my (
+ $K_last_nonblank_code, $K_last_last_nonblank_code,
+ $looking_for_else, $is_static_block_comment,
+ $batch_CODE_type, $last_line_had_side_comment,
+ $next_parent_seqno, $next_slevel,
+ );
- my $tok = $tokens_to_go[$i];
+ # Called once at the start of a new file
+ sub initialize_process_line_of_CODE {
+ $K_last_nonblank_code = undef;
+ $K_last_last_nonblank_code = undef;
+ $looking_for_else = 0;
+ $is_static_block_comment = 0;
+ $batch_CODE_type = "";
+ $last_line_had_side_comment = 0;
+ $next_parent_seqno = SEQ_ROOT;
+ $next_slevel = undef;
+ return;
+ }
- # but map certain keywords to a common string to allow
- # alignment.
- $tok = $keyword_map{$tok}
- if ( defined( $keyword_map{$tok} ) );
- $patterns[$j] .= $tok;
- }
- }
+ # Batch variables: these describe the current batch of code being formed
+ # and sent down the pipeline. They are initialized in the next
+ # sub.
+ my ( $rbrace_follower, $index_start_one_line_block,
+ $semicolons_before_block_self_destruct,
+ $starting_in_quote, $ending_in_quote, );
+
+ # Called before the start of each new batch
+ sub initialize_batch_variables {
+
+ $max_index_to_go = UNDEFINED_INDEX;
+ @summed_lengths_to_go = @nesting_depth_to_go = (0);
+ $ri_starting_one_line_block = [];
+
+ # The initialization code for the remaining batch arrays is as follows
+ # and can be activated for testing. But profiling shows that it is
+ # time-consuming to re-initialize the batch arrays and is not necessary
+ # because the maximum valid token, $max_index_to_go, is carefully
+ # controlled. This means however that it is not possible to do any
+ # type of filter or map operation directly on these arrays. And it is
+ # not possible to use negative indexes. As a precaution against program
+ # changes which might do this, sub pad_array_to_go adds some undefs at
+ # the end of the current batch of data.
+
+ # So 'long story short': this is a waste of time
+ 0 && do { #<<<
+ @block_type_to_go = ();
+ @type_sequence_to_go = ();
+ @bond_strength_to_go = ();
+ @forced_breakpoint_to_go = ();
+ @token_lengths_to_go = ();
+ @levels_to_go = ();
+ @mate_index_to_go = ();
+ @ci_levels_to_go = ();
+ @nobreak_to_go = ();
+ @old_breakpoint_to_go = ();
+ @tokens_to_go = ();
+ @K_to_go = ();
+ @types_to_go = ();
+ @leading_spaces_to_go = ();
+ @reduced_spaces_to_go = ();
+ @inext_to_go = ();
+ @iprev_to_go = ();
+ @parent_seqno_to_go = ();
+ };
- # done with this line .. join text of tokens to make the last field
- push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
- return ( \@tokens, \@fields, \@patterns );
+ $rbrace_follower = undef;
+ $ending_in_quote = 0;
+ destroy_one_line_block();
+ return;
}
-} # end make_alignment_patterns
+ sub leading_spaces_to_go {
-{ # begin unmatched_indexes
+ # return the number of indentation spaces for a token in the output
+ # stream
- # closure to keep track of unbalanced containers.
- # arrays shared by the routines in this block:
- my @unmatched_opening_indexes_in_this_batch;
- my @unmatched_closing_indexes_in_this_batch;
- my %comma_arrow_count;
+ my ($ii) = @_;
+ return 0 if ( $ii < 0 );
+ my $indentation = $leading_spaces_to_go[$ii];
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
+ }
- sub is_unbalanced_batch {
- return @unmatched_opening_indexes_in_this_batch +
- @unmatched_closing_indexes_in_this_batch;
+ sub create_one_line_block {
+ ( $index_start_one_line_block, $semicolons_before_block_self_destruct )
+ = @_;
+ return;
}
- sub comma_arrow_count {
- my $seqno = shift;
- return $comma_arrow_count{$seqno};
+ sub destroy_one_line_block {
+ $index_start_one_line_block = UNDEFINED_INDEX;
+ $semicolons_before_block_self_destruct = 0;
+ return;
}
- sub match_opening_and_closing_tokens {
+ # Routine to place the current token into the output stream.
+ # Called once per output token.
- # Match up indexes of opening and closing braces, etc, in this batch.
- # This has to be done after all tokens are stored because unstoring
- # of tokens would otherwise cause trouble.
+ use constant DEBUG_STORE => 0;
- @unmatched_opening_indexes_in_this_batch = ();
- @unmatched_closing_indexes_in_this_batch = ();
- %comma_arrow_count = ();
- my $comma_arrow_count_contained = 0;
+ sub store_token_to_go {
- foreach my $i ( 0 .. $max_index_to_go ) {
- if ( $type_sequence_to_go[$i] ) {
- my $token = $tokens_to_go[$i];
- if ( $token =~ /^[\(\[\{\?]$/ ) {
- push @unmatched_opening_indexes_in_this_batch, $i;
+ my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
+
+ # Add one token to the next batch.
+ # $Ktoken_vars = the index K in the global token array
+ # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
+ # unless they are temporarily being overridden
+
+ my $type = $rtoken_vars->[_TYPE_];
+
+ # Check for emergency flush...
+ # The K indexes in the batch must always be a continuous sequence of
+ # the global token array. The batch process programming assumes this.
+ # If storing this token would cause this relation to fail we must dump
+ # the current batch before storing the new token. It is extremely rare
+ # for this to happen. One known example is the following two-line
+ # snippet when run with parameters
+ # --noadd-newlines --space-terminal-semicolon:
+ # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
+ # $yy=1;
+ if ( $max_index_to_go >= 0 ) {
+ my $Klast = $K_to_go[$max_index_to_go];
+ if ( $Ktoken_vars != $Klast + 1 ) {
+ $self->flush_batch_of_CODE();
+ }
+
+ # Do not output consecutive blank tokens ... this should not
+ # happen, but it is worth checking. Later code can then make the
+ # simplifying assumption that blank tokens are not consecutive.
+ elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
+
+ if (DEVEL_MODE) {
+
+ # if this happens, it is may be that consecutive blanks
+ # were inserted into the token stream in 'respace_tokens'
+ my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
+ Fault("consecutive blanks near line $lno; please fix");
}
- elsif ( $token =~ /^[\)\]\}\:]$/ ) {
+ return;
+ }
+ }
- my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
- if ( defined($i_mate) && $i_mate >= 0 ) {
- if ( $type_sequence_to_go[$i_mate] ==
- $type_sequence_to_go[$i] )
- {
- $mate_index_to_go[$i] = $i_mate;
- $mate_index_to_go[$i_mate] = $i;
- my $seqno = $type_sequence_to_go[$i];
- if ( $comma_arrow_count{$seqno} ) {
- $comma_arrow_count_contained +=
- $comma_arrow_count{$seqno};
- }
- }
- else {
- push @unmatched_opening_indexes_in_this_batch,
- $i_mate;
- push @unmatched_closing_indexes_in_this_batch, $i;
- }
+ # Do not start a batch with a blank token.
+ # Fixes cases b149 b888 b984 b985 b986 b987
+ else {
+ if ( $type eq 'b' ) { return }
+ }
+
+ ++$max_index_to_go;
+ $batch_CODE_type = $CODE_type;
+ $K_to_go[$max_index_to_go] = $Ktoken_vars;
+ $types_to_go[$max_index_to_go] = $type;
+
+ $old_breakpoint_to_go[$max_index_to_go] = 0;
+ $forced_breakpoint_to_go[$max_index_to_go] = 0;
+ $mate_index_to_go[$max_index_to_go] = -1;
+
+ my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_];
+ my $ci_level = $ci_levels_to_go[$max_index_to_go] =
+ $rtoken_vars->[_CI_LEVEL_];
+
+ # Clip levels to zero if there are level errors in the file.
+ # We had to wait until now for reasons explained in sub 'write_line'.
+ my $level = $rtoken_vars->[_LEVEL_];
+ if ( $level < 0 ) { $level = 0 }
+ $levels_to_go[$max_index_to_go] = $level;
+
+ my $seqno = $type_sequence_to_go[$max_index_to_go] =
+ $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ if ( $max_index_to_go == 0 ) {
+
+ # Update the next parent sequence number for each new batch.
+
+ #------------------------------------------
+ # Begin coding from sub parent_seqno_from_K
+ #------------------------------------------
+
+ ## $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
+ $next_parent_seqno = SEQ_ROOT;
+ if ($seqno) {
+ $next_parent_seqno = $rparent_of_seqno->{$seqno};
+ }
+ else {
+ my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ my $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type = $rLL->[$Kt]->[_TYPE_];
+
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
+ $next_parent_seqno = $type_sequence;
}
+
+ # otherwise we want its parent container
else {
- push @unmatched_closing_indexes_in_this_batch, $i;
+ $next_parent_seqno =
+ $rparent_of_seqno->{$type_sequence};
}
}
}
- elsif ( $tokens_to_go[$i] eq '=>' ) {
- if (@unmatched_opening_indexes_in_this_batch) {
- my $j = $unmatched_opening_indexes_in_this_batch[-1];
- my $seqno = $type_sequence_to_go[$j];
- $comma_arrow_count{$seqno}++;
- }
+ $next_parent_seqno = SEQ_ROOT
+ unless ( defined($next_parent_seqno) );
+
+ #----------------------------------------
+ # End coding from sub parent_seqno_from_K
+ #----------------------------------------
+
+ $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
+ }
+
+ # Initialize some sequence-dependent variables to their normal values
+ my $parent_seqno = $next_parent_seqno;
+ my $slevel = $next_slevel;
+ my $block_type = "";
+
+ # Then fix them at container tokens:
+ if ($seqno) {
+ if ( $is_opening_token{$token} ) {
+ $next_parent_seqno = $seqno;
+ $slevel = $rdepth_of_opening_seqno->[$seqno];
+ $next_slevel = $slevel + 1;
+ $block_type = $rblock_type_of_seqno->{$seqno};
+ }
+ elsif ( $is_closing_token{$token} ) {
+ $next_slevel = $rdepth_of_opening_seqno->[$seqno];
+ $slevel = $next_slevel + 1;
+ $block_type = $rblock_type_of_seqno->{$seqno};
+ $parent_seqno = $rparent_of_seqno->{$seqno};
+ $parent_seqno = SEQ_ROOT unless defined($parent_seqno);
+ $next_parent_seqno = $parent_seqno;
+ }
+ else {
+ # ternary token: nothing to do
}
+ $block_type = "" unless ( defined($block_type) );
}
- return $comma_arrow_count_contained;
- }
- sub save_opening_indentation {
+ $parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
+ $nesting_depth_to_go[$max_index_to_go] = $slevel;
+ $block_type_to_go[$max_index_to_go] = $block_type;
+ $nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
- # This should be called after each batch of tokens is output. It
- # saves indentations of lines of all unmatched opening tokens.
- # These will be used by sub get_opening_indentation.
+ my $length = $rtoken_vars->[_TOKEN_LENGTH_];
- my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
+ # Safety check that length is defined. Should not be needed now.
+ # Former patch for indent-only, in which the entire set of tokens is
+ # turned into type 'q'. Lengths may have not been defined because sub
+ # 'respace_tokens' is bypassed. We do not need lengths in this case,
+ # but we will use the character count to have a defined value. In the
+ # future, it would be nicer to have 'respace_tokens' convert the lines
+ # to quotes and get correct lengths.
+ if ( !defined($length) ) { $length = length($token) }
- # we no longer need indentations of any saved indentations which
- # are unmatched closing tokens in this batch, because we will
- # never encounter them again. So we can delete them to keep
- # the hash size down.
- foreach (@unmatched_closing_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- delete $saved_opening_indentation{$seqno};
+ $token_lengths_to_go[$max_index_to_go] = $length;
+
+ # We keep a running sum of token lengths from the start of this batch:
+ # summed_lengths_to_go[$i] = total length to just before token $i
+ # summed_lengths_to_go[$i+1] = total length to just after token $i
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] =
+ $summed_lengths_to_go[$max_index_to_go] + $length;
+
+ my $in_continued_quote =
+ ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote};
+ if ( $max_index_to_go == 0 ) {
+ $starting_in_quote = $in_continued_quote;
}
- # we need to save indentations of any unmatched opening tokens
- # in this batch because we may need them in a subsequent batch.
- foreach (@unmatched_opening_indexes_in_this_batch) {
- my $seqno = $type_sequence_to_go[$_];
- $saved_opening_indentation{$seqno} = [
- lookup_opening_indentation(
- $_, $ri_first, $ri_last, $rindentation_list
- )
- ];
+ # Define the indentation that this token will have in two cases:
+ # Without CI = reduced_spaces_to_go
+ # With CI = leading_spaces_to_go
+ if ($in_continued_quote) {
+ $leading_spaces_to_go[$max_index_to_go] = 0;
+ $reduced_spaces_to_go[$max_index_to_go] = 0;
+ }
+ else {
+ $reduced_spaces_to_go[$max_index_to_go] = my $reduced_spaces =
+ $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
+ $leading_spaces_to_go[$max_index_to_go] =
+ $reduced_spaces + $rOpts_continuation_indentation * $ci_level;
}
+ $standard_spaces_to_go[$max_index_to_go] =
+ $leading_spaces_to_go[$max_index_to_go];
+
+ DEBUG_STORE && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n";
+ };
return;
}
-} # end unmatched_indexes
-sub get_opening_indentation {
+ sub flush_batch_of_CODE {
- # get the indentation of the line which output the opening token
- # corresponding to a given closing token in the current output batch.
- #
- # given:
- # $i_closing - index in this line of a closing token ')' '}' or ']'
- #
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line.
- #
- # return:
- # -the indentation of the line which contained the opening token
- # which matches the token at index $i_opening
- # -and its offset (number of columns) from the start of the line
- #
- my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
+ # Finish any batch packaging and call the process routine.
+ # This must be the only call to grind_batch_of_CODE()
+ my ($self) = @_;
- # first, see if the opening token is in the current batch
- my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset, $is_leading, $exists );
- $exists = 1;
- if ( $i_opening >= 0 ) {
+ return unless ( $max_index_to_go >= 0 );
- # it is..look up the indentation
- ( $indent, $offset, $is_leading ) =
- lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
- $rindentation_list );
- }
+ # Create an array to hold variables for this batch
+ my $this_batch = [];
+ $this_batch->[_starting_in_quote_] = $starting_in_quote;
+ $this_batch->[_ending_in_quote_] = $ending_in_quote;
+ $this_batch->[_max_index_to_go_] = $max_index_to_go;
+ $this_batch->[_batch_CODE_type_] = $batch_CODE_type;
- # if not, it should have been stored in the hash by a previous batch
- else {
- my $seqno = $type_sequence_to_go[$i_closing];
- if ($seqno) {
- if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset, $is_leading ) =
- @{ $saved_opening_indentation{$seqno} };
- }
+ # The flag $is_static_block_comment applies to the line which just
+ # arrived. So it only applies if we are outputting that line.
+ $this_batch->[_is_static_block_comment_] =
+ defined($K_first)
+ && $max_index_to_go == 0
+ && $K_to_go[0] == $K_first ? $is_static_block_comment : 0;
- # some kind of serious error
- # (example is badfile.t)
- else {
- $indent = 0;
- $offset = 0;
- $is_leading = 0;
- $exists = 0;
- }
- }
+ $this_batch->[_ri_starting_one_line_block_] =
+ $ri_starting_one_line_block;
- # if no sequence number it must be an unbalanced container
- else {
- $indent = 0;
- $offset = 0;
- $is_leading = 0;
- $exists = 0;
- }
- }
- return ( $indent, $offset, $is_leading, $exists );
-}
+ $self->[_this_batch_] = $this_batch;
-sub lookup_opening_indentation {
+ $last_line_had_side_comment =
+ $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#';
- # get the indentation of the line in the current output batch
- # which output a selected opening token
- #
- # given:
- # $i_opening - index of an opening token in the current output batch
- # whose line indentation we need
- # $ri_first - reference to list of the first index $i for each output
- # line in this batch
- # $ri_last - reference to list of the last index $i for each output line
- # in this batch
- # $rindentation_list - reference to a list containing the indentation
- # used for each line. (NOTE: the first slot in
- # this list is the last returned line number, and this is
- # followed by the list of indentations).
- #
- # return
- # -the indentation of the line which contained token $i_opening
- # -and its offset (number of columns) from the start of the line
+ $self->grind_batch_of_CODE();
- my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
+ # Done .. this batch is history
+ $self->[_this_batch_] = [];
+
+ initialize_batch_variables();
+ initialize_forced_breakpoint_vars();
- if ( !@{$ri_last} ) {
- warning("Error in opening_indentation: no lines");
return;
}
- my $nline = $rindentation_list->[0]; # line number of previous lookup
+ sub end_batch {
- # reset line location if necessary
- $nline = 0 if ( $i_opening < $ri_start->[$nline] );
+ # end the current batch, EXCEPT for a few special cases
+ my ($self) = @_;
- # find the correct line
- unless ( $i_opening > $ri_last->[-1] ) {
- while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
- }
+ if ( $max_index_to_go < 0 ) {
- # error - token index is out of bounds - shouldn't happen
- else {
- warning(
-"non-fatal program bug in lookup_opening_indentation - index out of range\n"
- );
- report_definite_bug();
- $nline = $#{$ri_last};
- }
+ # This is harmless but should be elimintated in development
+ if (DEVEL_MODE) {
+ Fault("End batch called with nothing to do; please fix\n");
+ }
+ return;
+ }
- $rindentation_list->[0] =
- $nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- my $is_leading = ( $ibeg == $i_opening );
- return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
-}
+ # Exceptions when a line does not end with a comment... (fixes c058)
+ if ( $types_to_go[$max_index_to_go] ne '#' ) {
-{
- my %is_if_elsif_else_unless_while_until_for_foreach;
+ # Exception 1: Do not end line in a weld
+ return
+ if ( $total_weld_count
+ && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
- BEGIN {
+ # Exception 2: just set a tentative breakpoint if we might be in a
+ # one-line block
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ return;
+ }
+ }
- # These block types may have text between the keyword and opening
- # curly. Note: 'else' does not, but must be included to allow trailing
- # if/elsif text to be appended.
- # patch for SWITCH/CASE: added 'case' and 'when'
- my @q = qw(if elsif else unless while until for foreach case when);
- @is_if_elsif_else_unless_while_until_for_foreach{@q} =
- (1) x scalar(@q);
+ $self->flush_batch_of_CODE();
+ return;
}
- sub set_adjusted_indentation {
+ sub flush_vertical_aligner {
+ my ($self) = @_;
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->flush();
+ return;
+ }
- # This routine has the final say regarding the actual indentation of
- # a line. It starts with the basic indentation which has been
- # defined for the leading token, and then takes into account any
- # options that the user has set regarding special indenting and
- # outdenting.
+ # flush is called to output any tokens in the pipeline, so that
+ # an alternate source of lines can be written in the correct order
+ sub flush {
+ my ( $self, $CODE_type ) = @_;
- my (
- $self, $ibeg, $iend,
- $rfields, $rpatterns, $ri_first,
- $ri_last, $rindentation_list, $level_jump
- ) = @_;
+ # end the current batch with 1 exception
- my $rLL = $self->{rLL};
+ destroy_one_line_block();
- # we need to know the last token of this line
- my ( $terminal_type, $i_terminal ) =
- $self->terminal_type_i( $ibeg, $iend );
+ # Exception: if we are flushing within the code stream only to insert
+ # blank line(s), then we can keep the batch intact at a weld. This
+ # improves formatting of -ce. See test 'ce1.ce'
+ if ( $CODE_type && $CODE_type eq 'BL' ) {
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
- my $is_outdented_line = 0;
+ # otherwise, we have to shut things down completely.
+ else { $self->flush_batch_of_CODE() }
- my $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
+ $self->flush_vertical_aligner();
+ return;
+ }
- # NOTE: A future improvement would be to make it semicolon terminated
- # even if it does not have a semicolon but is followed by a closing
- # block brace. This would undo ci even for something like the
- # following, in which the final paren does not have a semicolon because
- # it is a possible weld location:
+ sub process_line_of_CODE {
- # if ($BOLD_MATH) {
- # (
- # $labels, $comment,
- # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
- # )
- # }
- #
+ my ( $self, $my_line_of_tokens ) = @_;
- # MOJO: Set a flag if this lines begins with ')->'
- my $leading_paren_arrow = (
- $types_to_go[$ibeg] eq '}'
- && $tokens_to_go[$ibeg] eq ')'
- && (
- ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
- || ( $ibeg < $i_terminal - 1
- && $types_to_go[ $ibeg + 1 ] eq 'b'
- && $types_to_go[ $ibeg + 2 ] eq '->' )
- )
- );
+ #----------------------------------------------------------------
+ # This routine is called once per INPUT line to format all of the
+ # tokens on that line.
+ #----------------------------------------------------------------
- ##########################################################
- # Section 1: set a flag and a default indentation
- #
- # Most lines are indented according to the initial token.
- # But it is common to outdent to the level just after the
- # terminal token in certain cases...
- # adjust_indentation flag:
- # 0 - do not adjust
- # 1 - outdent
- # 2 - vertically align with opening token
- # 3 - indent
- ##########################################################
- my $adjust_indentation = 0;
- my $default_adjust_indentation = $adjust_indentation;
+ # It outputs full-line comments and blank lines immediately.
- my (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- );
+ # The tokens are copied one-by-one from the global token array $rLL to
+ # a set of '_to_go' arrays which collect batches of tokens for a
+ # further processing via calls to 'sub store_token_to_go', until a well
+ # defined 'structural' break point* or 'forced' breakpoint* is reached.
+ # Then, the batch of collected '_to_go' tokens is passed along to 'sub
+ # grind_batch_of_CODE' for further processing.
- my $type_beg = $types_to_go[$ibeg];
- my $token_beg = $tokens_to_go[$ibeg];
- my $K_beg = $K_to_go[$ibeg];
- my $ibeg_weld_fix = $ibeg;
+ # * 'structural' break points are basically line breaks corresponding
+ # to code blocks. An example is a chain of if-elsif-else statements,
+ # which should typically be broken at the opening and closing braces.
- # QW PATCH 2 (Testing)
- # At an isolated closing token of a qw quote which is welded to
- # a following closing token, we will locally change its type to
- # be the same as its token. This will allow formatting to be the
- # same as for an ordinary closing token.
+ # * 'forced' break points are breaks required by side comments or by
+ # special user controls.
- # For -lp formatting se use $ibeg_weld_fix to get around the problem
- # that with -lp type formatting the opening and closing tokens to not
- # have sequence numbers.
- if ( $type_beg eq 'q' && $token_beg =~ /^[\)\}\]\>]/ ) {
- my $K_next_nonblank = $self->K_next_code($K_beg);
- if ( defined($K_next_nonblank) ) {
- my $type_sequence = $rLL->[$K_next_nonblank]->[_TYPE_SEQUENCE_];
- my $token = $rLL->[$K_next_nonblank]->[_TOKEN_];
- my $welded = weld_len_left( $type_sequence, $token );
- if ($welded) {
- $ibeg_weld_fix = $ibeg + ( $K_next_nonblank - $K_beg );
- $type_beg = ')'; ##$token_beg;
- }
- }
+ # So this routine is just making an initial set of required line
+ # breaks, basically regardless of the maximum requested line length.
+ # The subsequent stage of formating make additional line breaks
+ # appropriate for lists and logical structures, and to keep line
+ # lengths below the requested maximum line length.
+
+ #-----------------------------------
+ # begin initialize closure variables
+ #-----------------------------------
+ $line_of_tokens = $my_line_of_tokens;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ ( $K_first, $K_last ) = @{$rK_range};
+ if ( !defined($K_first) ) {
+
+ # Empty line: This can happen if tokens are deleted, for example
+ # with the -mangle parameter
+ return;
}
+ $rLL = $self->[_rLL_];
+ $radjusted_levels = $self->[_radjusted_levels_];
+ $rparent_of_seqno = $self->[_rparent_of_seqno_];
+ $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
+ $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
- # if we are at a closing token of some type..
- if ( $type_beg =~ /^[\)\}\]R]$/ ) {
+ #---------------------------------
+ # end initialize closure variables
+ #---------------------------------
- # get the indentation of the line containing the corresponding
- # opening token
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
- $ri_last, $rindentation_list );
+ # This flag will become nobreak_to_go and should be set to 2 to prevent
+ # a line break AFTER the current token.
+ $no_internal_newlines = 0;
+ if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
+ $no_internal_newlines = 2;
+ }
- # First set the default behavior:
- if (
+ my $input_line = $line_of_tokens->{_line_text};
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
- $is_semicolon_terminated
+ my $is_comment =
+ ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
+ my $is_static_block_comment_without_leading_space =
+ $CODE_type eq 'SBCX';
+ $is_static_block_comment =
+ $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
+ my $is_hanging_side_comment = $CODE_type eq 'HSC';
+ my $is_VERSION_statement = $CODE_type eq 'VER';
- # and 'cuddled parens' of the form: ")->pack("
- # Bug fix for RT #123749]: the types here were
- # incorrectly '(' and ')'. Corrected to be '{' and '}'
- || (
- $terminal_type eq '{'
- && $type_beg eq '}'
- && ( $nesting_depth_to_go[$iend] + 1 ==
- $nesting_depth_to_go[$ibeg] )
- )
+ if ($is_VERSION_statement) {
+ $self->[_saw_VERSION_in_this_file_] = 1;
+ $no_internal_newlines = 2;
+ }
- # remove continuation indentation for any line like
- # } ... {
- # or without ending '{' and unbalanced, such as
- # such as '}->{$operator}'
- || (
- $type_beg eq '}'
+ # Add interline blank if any
+ my $last_old_nonblank_type = "b";
+ my $first_new_nonblank_token = "";
+ my $K_first_true = $K_first;
+ if ( $max_index_to_go >= 0 ) {
+ $last_old_nonblank_type = $types_to_go[$max_index_to_go];
+ $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
+ if ( !$is_comment
+ && $types_to_go[$max_index_to_go] ne 'b'
+ && $K_first > 0
+ && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
+ {
+ $K_first -= 1;
+ }
+ }
- && ( $types_to_go[$iend] eq '{'
- || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
- )
+ my $rtok_first = $rLL->[$K_first];
- # and when the next line is at a lower indentation level
- # PATCH: and only if the style allows undoing continuation
- # for all closing token types. We should really wait until
- # the indentation of the next line is known and then make
- # a decision, but that would require another pass.
- || ( $level_jump < 0 && !$some_closing_token_indentation )
+ my $in_quote = $line_of_tokens->{_ending_in_quote};
+ $ending_in_quote = $in_quote;
- # Patch for -wn=2, multiple welded closing tokens
- || ( $i_terminal > $ibeg
- && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
+ #------------------------------------
+ # Handle a block (full-line) comment.
+ #------------------------------------
+ if ($is_comment) {
- )
- {
- $adjust_indentation = 1;
+ if ( $rOpts->{'delete-block-comments'} ) {
+ $self->flush();
+ return;
}
- # outdent something like '),'
+ destroy_one_line_block();
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+
+ # output a blank line before block comments
if (
- $terminal_type eq ','
+ # unless we follow a blank or comment line
+ $self->[_last_line_leading_type_] ne '#'
+ && $self->[_last_line_leading_type_] ne 'b'
- # Removed this constraint for -wn
- # OLD: allow just one character before the comma
- # && $i_terminal == $ibeg + 1
+ # only if allowed
+ && $rOpts->{'blanks-before-comments'}
- # require LIST environment; otherwise, we may outdent too much -
- # this can happen in calls without parentheses (overload.t);
- && $container_environment_to_go[$i_terminal] eq 'LIST'
+ # if this is NOT an empty comment, unless it follows a side
+ # comment and could become a hanging side comment.
+ && (
+ $rtok_first->[_TOKEN_] ne '#'
+ || ( $last_line_had_side_comment
+ && $rLL->[$K_first]->[_LEVEL_] > 0 )
+ )
+
+ # not after a short line ending in an opening token
+ # because we already have space above this comment.
+ # Note that the first comment in this if block, after
+ # the 'if (', does not get a blank line because of this.
+ && !$self->[_last_output_short_opening_token_]
+
+ # never before static block comments
+ && !$is_static_block_comment
)
{
- $adjust_indentation = 1;
+ $self->flush(); # switching to new output stream
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_blank_code_line();
+ $self->[_last_line_leading_type_] = 'b';
}
- # undo continuation indentation of a terminal closing token if
- # it is the last token before a level decrease. This will allow
- # a closing token to line up with its opening counterpart, and
- # avoids an indentation jump larger than 1 level.
- if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
- && $i_terminal == $ibeg
- && defined($K_beg) )
+ if (
+ $rOpts->{'indent-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
+ || $input_line =~ /^\s+/ )
+ && !$is_static_block_comment_without_leading_space
+ )
{
- my $K_next_nonblank = $self->K_next_code($K_beg);
-
- # Patch for RT#131115: honor -bli flag at closing brace
- my $is_bli =
- $rOpts_brace_left_and_indent
- && $block_type_to_go[$i_terminal]
- && $block_type_to_go[$i_terminal] =~ /$bli_pattern/o;
-
- if ( !$is_bli && defined($K_next_nonblank) ) {
- my $lev = $rLL->[$K_beg]->[_LEVEL_];
- my $level_next = $rLL->[$K_next_nonblank]->[_LEVEL_];
- $adjust_indentation = 1 if ( $level_next < $lev );
- }
-
- # Patch for RT #96101, in which closing brace of anonymous subs
- # was not outdented. We should look ahead and see if there is
- # a level decrease at the next token (i.e., a closing token),
- # but right now we do not have that information. For now
- # we see if we are in a list, and this works well.
- # See test files 'sub*.t' for good test cases.
- if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
- && $container_environment_to_go[$i_terminal] eq 'LIST'
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first,
- $ri_last, $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
- }
+ my $Ktoken_vars = $K_first;
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ $self->end_batch();
}
+ else {
- # YVES patch 1 of 2:
- # Undo ci of line with leading closing eval brace,
- # but not beyond the indention of the line with
- # the opening brace.
- if ( $block_type_to_go[$ibeg] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'} )
- {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- my $indentation = $leading_spaces_to_go[$ibeg];
- if ( defined($opening_indentation)
- && get_spaces($indentation) >
- get_spaces($opening_indentation) )
- {
- $adjust_indentation = 1;
- }
+ # switching to new output stream
+ $self->flush();
+
+ # Note that last arg in call here is 'undef' for comments
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line(
+ $rtok_first->[_TOKEN_] . "\n", undef );
+ $self->[_last_line_leading_type_] = '#';
}
+ return;
+ }
- $default_adjust_indentation = $adjust_indentation;
+ # compare input/output indentation except for continuation lines
+ # (because they have an unknown amount of initial blank space)
+ # and lines which are quotes (because they may have been outdented)
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+ unless ( $is_hanging_side_comment
+ || $rtok_first->[_CI_LEVEL_] > 0
+ || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' )
+ {
+ my $input_line_number = $line_of_tokens->{_line_number};
+ $self->compare_indentation_levels( $K_first,
+ $guessed_indentation_level, $input_line_number );
+ }
- # Now modify default behavior according to user request:
- # handle option to indent non-blocks of the form ); }; ];
- # But don't do special indentation to something like ')->pack('
- if ( !$block_type_to_go[$ibeg] ) {
- my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
- if ( $cti == 1 ) {
- if ( $i_terminal <= $ibeg + 1
- || $is_semicolon_terminated )
- {
- $adjust_indentation = 2;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 2 ) {
- if ($is_semicolon_terminated) {
- $adjust_indentation = 3;
- }
- else {
- $adjust_indentation = 0;
- }
- }
- elsif ( $cti == 3 ) {
- $adjust_indentation = 3;
- }
- }
+ #------------------------
+ # Handle indentation-only
+ #------------------------
- # handle option to indent blocks
- else {
- if (
- $rOpts->{'indent-closing-brace'}
- && (
- $i_terminal == $ibeg # isolated terminal '}'
- || $is_semicolon_terminated
- )
- ) # } xxxx ;
- {
- $adjust_indentation = 3;
- }
- }
- }
+ # NOTE: In previous versions we sent all qw lines out immediately here.
+ # No longer doing this: also write a line which is entirely a 'qw' list
+ # to allow stacking of opening and closing tokens. Note that interior
+ # qw lines will still go out at the end of this routine.
+ if ( $CODE_type eq 'IO' ) {
+ $self->flush();
+ my $line = $input_line;
- # if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ($rpatterns->[0] =~ /^qb*;$/
- && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
- {
- if ( $closing_token_indentation{$1} == 0 ) {
- $adjust_indentation = 1;
- }
- else {
- $adjust_indentation = 3;
- }
- }
+ # Fix for rt #125506 Unexpected string formating
+ # in which leading space of a terminal quote was removed
+ $line =~ s/\s+$//;
+ $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
- # if line begins with a ':', align it with any
- # previous line leading with corresponding ?
- elsif ( $types_to_go[$ibeg] eq ':' ) {
- (
- $opening_indentation, $opening_offset,
- $is_leading, $opening_exists
- )
- = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
- $rindentation_list );
- if ($is_leading) { $adjust_indentation = 2; }
- }
+ my $Ktoken_vars = $K_first;
- ##########################################################
- # Section 2: set indentation according to flag set above
- #
- # Select the indentation object to define leading
- # whitespace. If we are outdenting something like '} } );'
- # then we want to use one level below the last token
- # ($i_terminal) in order to get it to fully outdent through
- # all levels.
- ##########################################################
- my $indentation;
- my $lev;
- my $level_end = $levels_to_go[$iend];
+ # We work with a copy of the token variables and change the
+ # first token to be the entire line as a quote variable
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
- if ( $adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- $lev = $levels_to_go[$ibeg];
+ # Patch: length is not really important here
+ $rtoken_vars->[_TOKEN_LENGTH_] = length($line);
+
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ $self->end_batch();
+ return;
}
- elsif ( $adjust_indentation == 1 ) {
- # Change the indentation to be that of a different token on the line
- # Previously, the indentation of the terminal token was used:
- # OLD CODING:
- # $indentation = $reduced_spaces_to_go[$i_terminal];
- # $lev = $levels_to_go[$i_terminal];
+ #---------------------------
+ # Handle all other lines ...
+ #---------------------------
- # Generalization for MOJO:
- # Use the lowest level indentation of the tokens on the line.
- # For example, here we can use the indentation of the ending ';':
- # } until ($selection > 0 and $selection < 10); # ok to use ';'
- # But this will not outdent if we use the terminal indentation:
- # )->then( sub { # use indentation of the ->, not the {
- # Warning: reduced_spaces_to_go[] may be a reference, do not
- # do numerical checks with it
+ # If we just saw the end of an elsif block, write nag message
+ # if we do not see another elseif or an else.
+ if ($looking_for_else) {
- my $i_ind = $ibeg;
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- while ( $i_ind < $i_terminal ) {
- $i_ind++;
- if ( $levels_to_go[$i_ind] < $lev ) {
- $indentation = $reduced_spaces_to_go[$i_ind];
- $lev = $levels_to_go[$i_ind];
- }
+ unless ( $rLL->[$K_first]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
+ write_logfile_entry("(No else block)\n");
}
+ $looking_for_else = 0;
}
- # handle indented closing token which aligns with opening token
- elsif ( $adjust_indentation == 2 ) {
-
- # handle option to align closing token with opening token
- $lev = $levels_to_go[$ibeg];
-
- # calculate spaces needed to align with opening token
- my $space_count =
- get_spaces($opening_indentation) + $opening_offset;
+ # This is a good place to kill incomplete one-line blocks
+ if ( $max_index_to_go >= 0 ) {
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $last_old_nonblank_type eq ';' )
+ && ( $first_new_nonblank_token ne '}' )
+ )
- # Indent less than the previous line.
- #
- # Problem: For -lp we don't exactly know what it was if there
- # were recoverable spaces sent to the aligner. A good solution
- # would be to force a flush of the vertical alignment buffer, so
- # that we would know. For now, this rule is used for -lp:
- #
- # When the last line did not start with a closing token we will
- # be optimistic that the aligner will recover everything wanted.
- #
- # This rule will prevent us from breaking a hierarchy of closing
- # tokens, and in a worst case will leave a closing paren too far
- # indented, but this is better than frequently leaving it not
- # indented enough.
- my $last_spaces = get_spaces($last_indentation_written);
- if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
- $last_spaces +=
- get_recoverable_spaces($last_indentation_written);
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $last_old_nonblank_type eq ',' )
+ )
+ {
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
+ destroy_one_line_block();
+ $self->end_batch();
}
- # reset the indentation to the new space count if it works
- # only options are all or none: nothing in-between looks good
- $lev = $levels_to_go[$ibeg];
- if ( $space_count < $last_spaces ) {
- if ($rOpts_line_up_parentheses) {
- my $lev = $levels_to_go[$ibeg];
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
+ # Keep any requested breaks before this line. Note that we have to
+ # use the original K_first because it may have been reduced above
+ # to add a blank. The value of the flag is as follows:
+ # 1 => hard break, flush the batch
+ # 2 => soft break, set breakpoint and continue building the batch
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) {
+ destroy_one_line_block();
+ if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
}
else {
- $indentation = $space_count;
+ $self->end_batch() if ( $max_index_to_go >= 0 );
}
}
+ }
- # revert to default if it doesn't work
- else {
- $space_count = leading_spaces_to_go($ibeg);
- if ( $default_adjust_indentation == 0 ) {
- $indentation = $leading_spaces_to_go[$ibeg];
- }
- elsif ( $default_adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
- }
- }
+ #--------------------------------------
+ # loop to process the tokens one-by-one
+ #--------------------------------------
+
+ # We do not want a leading blank if the previous batch just got output
+ if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
+ $K_first++;
}
- # Full indentaion of closing tokens (-icb and -icp or -cti=2)
- else {
+ foreach my $Ktoken_vars ( $K_first .. $K_last ) {
- # handle -icb (indented closing code block braces)
- # Updated method for indented block braces: indent one full level if
- # there is no continuation indentation. This will occur for major
- # structures such as sub, if, else, but not for things like map
- # blocks.
- #
- # Note: only code blocks without continuation indentation are
- # handled here (if, else, unless, ..). In the following snippet,
- # the terminal brace of the sort block will have continuation
- # indentation as shown so it will not be handled by the coding
- # here. We would have to undo the continuation indentation to do
- # this, but it probably looks ok as is. This is a possible future
- # update for semicolon terminated lines.
- #
- # if ($sortby eq 'date' or $sortby eq 'size') {
- # @files = sort {
- # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
- # or $a cmp $b
- # } @files;
- # }
- #
- if ( $block_type_to_go[$ibeg]
- && $ci_levels_to_go[$i_terminal] == 0 )
- {
- my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
- $indentation = $spaces + $rOpts_indent_columns;
+ my $rtoken_vars = $rLL->[$Ktoken_vars];
+ my $type = $rtoken_vars->[_TYPE_];
- # NOTE: for -lp we could create a new indentation object, but
- # there is probably no need to do it
+ # If we are continuing after seeing a right curly brace, flush
+ # buffer unless we see what we are looking for, as in
+ # } else ...
+ if ( $rbrace_follower && $type ne 'b' ) {
+ my $token = $rtoken_vars->[_TOKEN_];
+ unless ( $rbrace_follower->{$token} ) {
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
+ $rbrace_follower = undef;
}
- # handle -icp and any -icb block braces which fall through above
- # test such as the 'sort' block mentioned above.
- else {
-
- # There are currently two ways to handle -icp...
- # One way is to use the indentation of the previous line:
- # $indentation = $last_indentation_written;
+ my (
+ $block_type, $type_sequence,
+ $is_opening_BLOCK, $is_closing_BLOCK,
+ $nobreak_BEFORE_BLOCK
+ );
+ if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
- # The other way is to use the indentation that the previous line
- # would have had if it hadn't been adjusted:
- $indentation = $last_unadjusted_indentation;
+ my $token = $rtoken_vars->[_TOKEN_];
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ $block_type = $rblock_type_of_seqno->{$type_sequence};
- # Current method: use the minimum of the two. This avoids
- # inconsistent indentation.
- if ( get_spaces($last_indentation_written) <
- get_spaces($indentation) )
+ if ( $block_type
+ && $token eq $type
+ && $block_type ne 't'
+ && !$self->[_rshort_nested_]->{$type_sequence} )
{
- $indentation = $last_indentation_written;
+
+ if ( $type eq '{' ) {
+ $is_opening_BLOCK = 1;
+ $nobreak_BEFORE_BLOCK = $no_internal_newlines;
+ }
+ elsif ( $type eq '}' ) {
+ $is_closing_BLOCK = 1;
+ $nobreak_BEFORE_BLOCK = $no_internal_newlines;
+ }
}
}
- # use previous indentation but use own level
- # to cause list to be flushed properly
- $lev = $levels_to_go[$ibeg];
- }
+ # Find next nonblank token on this line and look for a side comment
+ my ( $Knnb, $side_comment_follows );
- # remember indentation except for multi-line quotes, which get
- # no indentation
- unless ( $ibeg == 0 && $starting_in_quote ) {
- $last_indentation_written = $indentation;
- $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
- $last_leading_token = $tokens_to_go[$ibeg];
- }
+ # if before last token ...
+ if ( $Ktoken_vars < $K_last ) {
+ $Knnb = $Ktoken_vars + 1;
+ if ( $Knnb < $K_last
+ && $rLL->[$Knnb]->[_TYPE_] eq 'b' )
+ {
+ $Knnb++;
+ }
- # be sure lines with leading closing tokens are not outdented more
- # than the line which contained the corresponding opening token.
+ if ( $rLL->[$Knnb]->[_TYPE_] eq '#' ) {
+ $side_comment_follows = 1;
- #############################################################
- # updated per bug report in alex_bug.pl: we must not
- # mess with the indentation of closing logical braces so
- # we must treat something like '} else {' as if it were
- # an isolated brace
- #############################################################
- my $is_isolated_block_brace = $block_type_to_go[$ibeg]
- && ( $i_terminal == $ibeg
- || $is_if_elsif_else_unless_while_until_for_foreach{
- $block_type_to_go[$ibeg]
- } );
+ # Do not allow breaks which would promote a side comment to
+ # a block comment.
+ $no_internal_newlines = 2;
+ }
+ }
- # only do this for a ':; which is aligned with its leading '?'
- my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+ # if at last token ...
+ else {
- if (
- defined($opening_indentation)
- && !$leading_paren_arrow # MOJO
- && !$is_isolated_block_brace
- && !$is_unaligned_colon
- )
- {
- if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
- $indentation = $opening_indentation;
+ #---------------------
+ # handle side comments
+ #---------------------
+ if ( $type eq '#' ) {
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ next;
+ }
}
- }
- # remember the indentation of each line of this batch
- push @{$rindentation_list}, $indentation;
+ #--------------
+ # handle blanks
+ #--------------
+ if ( $type eq 'b' ) {
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ next;
+ }
- # outdent lines with certain leading tokens...
- if (
+ # Process non-blank and non-comment tokens ...
- # must be first word of this batch
- $ibeg == 0
+ #-----------------
+ # handle semicolon
+ #-----------------
+ if ( $type eq ';' ) {
+
+ my $next_nonblank_token_type = 'b';
+ my $next_nonblank_token = '';
+ if ( defined($Knnb) ) {
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
- # and ...
- && (
+ my $break_before_semicolon = ( $Ktoken_vars == $K_first )
+ && $rOpts_break_at_old_semicolon_breakpoints;
- # certain leading keywords if requested
- (
- $rOpts->{'outdent-keywords'}
- && $types_to_go[$ibeg] eq 'k'
- && $outdent_keyword{ $tokens_to_go[$ibeg] }
- )
+ # kill one-line blocks with too many semicolons
+ $semicolons_before_block_self_destruct--;
+ if (
+ $break_before_semicolon
+ || ( $semicolons_before_block_self_destruct < 0 )
+ || ( $semicolons_before_block_self_destruct == 0
+ && $next_nonblank_token_type !~ /^[b\}]$/ )
+ )
+ {
+ destroy_one_line_block();
+ $self->end_batch()
+ if ( $break_before_semicolon
+ && $max_index_to_go >= 0 );
+ }
- # or labels if requested
- || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- # or static block comments if requested
- || ( $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-static-block-comments'}
- && $is_static_block_comment )
- )
- )
+ $self->end_batch()
+ unless (
+ $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons
+ && $Ktoken_vars < $K_last )
+ || ( $next_nonblank_token eq '}' )
+ );
- {
- my $space_count = leading_spaces_to_go($ibeg);
- if ( $space_count > 0 ) {
- $space_count -= $rOpts_continuation_indentation;
- $is_outdented_line = 1;
- if ( $space_count < 0 ) { $space_count = 0 }
-
- # do not promote a spaced static block comment to non-spaced;
- # this is not normally necessary but could be for some
- # unusual user inputs (such as -ci = -i)
- if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
- $space_count = 1;
- }
-
- if ($rOpts_line_up_parentheses) {
- $indentation =
- new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
- }
- else {
- $indentation = $space_count;
- }
}
- }
-
- return ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line );
- }
-}
-sub mate_index_to_go {
- my ( $self, $i ) = @_;
+ #-----------
+ # handle '{'
+ #-----------
+ elsif ($is_opening_BLOCK) {
- # Return the matching index of a container or ternary pair
- # This is equivalent to the array @mate_index_to_go
- my $K = $K_to_go[$i];
- my $K_mate = $self->K_mate_index($K);
- my $i_mate = -1;
- if ( defined($K_mate) ) {
- $i_mate = $i + ( $K_mate - $K );
- if ( $i_mate < 0 || $i_mate > $max_index_to_go ) {
- $i_mate = -1;
- }
- }
- my $i_mate_alt = $mate_index_to_go[$i];
-
- # Debug code to eventually be removed
- if ( 0 && $i_mate_alt != $i_mate ) {
- my $tok = $tokens_to_go[$i];
- my $type = $types_to_go[$i];
- my $tok_mate = '*';
- my $type_mate = '*';
- if ( $i_mate >= 0 && $i_mate <= $max_index_to_go ) {
- $tok_mate = $tokens_to_go[$i_mate];
- $type_mate = $types_to_go[$i_mate];
- }
- my $seq = $type_sequence_to_go[$i];
- my $file = $logger_object->get_input_stream_name();
-
- Warn(
-"mate_index: file '$file': i=$i, imate=$i_mate, should be $i_mate_alt, K=$K, K_mate=$K_mate\ntype=$type, tok=$tok, seq=$seq, max=$max_index_to_go, tok_mate=$tok_mate, type_mate=$type_mate"
- );
- }
- return $i_mate;
-}
+ # Tentatively output this token. This is required before
+ # calling starting_one_line_block. We may have to unstore
+ # it, though, if we have to break before it.
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
-sub K_mate_index {
+ # Look ahead to see if we might form a one-line block..
+ my $too_long =
+ $self->starting_one_line_block( $Ktoken_vars,
+ $K_last_nonblank_code, $K_last );
+ $self->clear_breakpoint_undo_stack();
- # Given the index K of an opening or closing container, or ?/: ternary pair,
- # return the index K of the other member of the pair.
- my ( $self, $K ) = @_;
- return unless defined($K);
- my $rLL = $self->{rLL};
- my $seqno = $rLL->[$K]->[_TYPE_SEQUENCE_];
- return unless ($seqno);
+ # to simplify the logic below, set a flag to indicate if
+ # this opening brace is far from the keyword which introduces it
+ my $keyword_on_same_line = 1;
+ if (
+ $max_index_to_go >= 0
+ && defined($K_last_nonblank_code)
+ && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
+ && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
+ || $too_long )
+ )
+ {
+ $keyword_on_same_line = 0;
+ }
- my $K_opening = $self->{K_opening_container}->{$seqno};
- if ( defined($K_opening) ) {
- if ( $K != $K_opening ) { return $K_opening }
- return $self->{K_closing_container}->{$seqno};
- }
+ # Break before '{' if requested with -bl or -bli flag
+ my $want_break = $self->[_rbrace_left_]->{$type_sequence};
- $K_opening = $self->{K_opening_ternary}->{$seqno};
- if ( defined($K_opening) ) {
- if ( $K != $K_opening ) { return $K_opening }
- return $self->{K_closing_ternary}->{$seqno};
- }
- return;
-}
+ # But do not break if this token is welded to the left
+ if ( $total_weld_count
+ && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
+ {
+ $want_break = 0;
+ }
-sub set_vertical_tightness_flags {
+ # Break BEFORE an opening '{' ...
+ if (
- my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
+ # if requested
+ $want_break
- # Define vertical tightness controls for the nth line of a batch.
- # We create an array of parameters which tell the vertical aligner
- # if we should combine this line with the next line to achieve the
- # desired vertical tightness. The array of parameters contains:
- #
- # [0] type: 1=opening non-block 2=closing non-block
- # 3=opening block brace 4=closing block brace
- #
- # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
- # if closing: spaces of padding to use
- # [2] sequence number of container
- # [3] valid flag: do not append if this flag is false. Will be
- # true if appropriate -vt flag is set. Otherwise, Will be
- # made true only for 2 line container in parens with -lp
- #
- # These flags are used by sub set_leading_whitespace in
- # the vertical aligner
+ # and we were unable to start looking for a block,
+ && $index_start_one_line_block == UNDEFINED_INDEX
- my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
+ # or if it will not be on same line as its keyword, so that
+ # it will be outdented (eval.t, overload.t), and the user
+ # has not insisted on keeping it on the right
+ || ( !$keyword_on_same_line
+ && !$rOpts_opening_brace_always_on_right )
+ )
+ {
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1:
- # Handle Lines 1 .. n-1 but not the last line
- # For non-BLOCK tokens, we will need to examine the next line
- # too, so we won't consider the last line.
- #--------------------------------------------------------------
- if ( $n < $n_last_line ) {
+ # but only if allowed
+ unless ($nobreak_BEFORE_BLOCK) {
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1a:
- # Look for Type 1, last token of this line is a non-block opening token
- #--------------------------------------------------------------
- my $ibeg_next = $ri_first->[ $n + 1 ];
- my $token_end = $tokens_to_go[$iend];
- my $iend_next = $ri_last->[ $n + 1 ];
- if (
- $type_sequence_to_go[$iend]
- && !$block_type_to_go[$iend]
- && $is_opening_token{$token_end}
- && (
- $opening_vertical_tightness{$token_end} > 0
+ # since we already stored this token, we must unstore it
+ $self->unstore_token_to_go();
- # allow 2-line method call to be closed up
- || ( $rOpts_line_up_parentheses
- && $token_end eq '('
- && $iend > $ibeg
- && $types_to_go[ $iend - 1 ] ne 'b' )
- )
- )
- {
+ # then output the line
+ $self->end_batch() if ( $max_index_to_go >= 0 );
- # avoid multiple jumps in nesting depth in one line if
- # requested
- my $ovt = $opening_vertical_tightness{$token_end};
- my $iend_next = $ri_last->[ $n + 1 ];
- unless (
- $ovt < 2
- && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
- $nesting_depth_to_go[$ibeg_next] )
- )
- {
+ # and now store this token at the start of a new line
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
+ }
- # If -vt flag has not been set, mark this as invalid
- # and aligner will validate it if it sees the closing paren
- # within 2 lines.
- my $valid_flag = $ovt;
- @{$rvertical_tightness_flags} =
- ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
+ # now output this line
+ $self->end_batch()
+ if ( $max_index_to_go >= 0 && !$no_internal_newlines );
}
- }
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1b:
- # Look for Type 2, first token of next line is a non-block closing
- # token .. and be sure this line does not have a side comment
- #--------------------------------------------------------------
- my $token_next = $tokens_to_go[$ibeg_next];
- if ( $type_sequence_to_go[$ibeg_next]
- && !$block_type_to_go[$ibeg_next]
- && $is_closing_token{$token_next}
- && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
- {
- my $ovt = $opening_vertical_tightness{$token_next};
- my $cvt = $closing_vertical_tightness{$token_next};
- if (
+ #-----------
+ # handle '}'
+ #-----------
+ elsif ($is_closing_BLOCK) {
- # never append a trailing line like )->pack(
- # because it will throw off later alignment
- (
- $nesting_depth_to_go[$ibeg_next] ==
- $nesting_depth_to_go[ $iend_next + 1 ] + 1
- )
- && (
- $cvt == 2
- || (
- $container_environment_to_go[$ibeg_next] ne 'LIST'
- && (
- $cvt == 1
+ my $next_nonblank_token_type = 'b';
+ my $next_nonblank_token = '';
+ if ( defined($Knnb) ) {
+ $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
+ $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
+ }
- # allow closing up 2-line method calls
- || ( $rOpts_line_up_parentheses
- && $token_next eq ')' )
- )
- )
- )
- )
- {
+ # If there is a pending one-line block ..
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- # decide which trailing closing tokens to append..
- my $ok = 0;
- if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
- else {
- my $str = join( '',
- @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
+ # Fix for b1208: if a side comment follows this closing
+ # brace then we must include its length in the length test
+ # ... unless the -issl flag is set (fixes b1307-1309).
+ # Assume a minimum of 1 blank space to the comment.
+ my $added_length =
+ $side_comment_follows
+ && !$rOpts_ignore_side_comment_lengths
+ ? 1 + $rLL->[$Knnb]->[_TOKEN_LENGTH_]
+ : 0;
- # append closing token if followed by comment or ';'
- if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
- }
+ # we have to terminate it if..
+ if (
- if ($ok) {
- my $valid_flag = $cvt;
- @{$rvertical_tightness_flags} = (
- 2,
- $tightness{$token_next} == 2 ? 0 : 1,
- $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
- }
- }
- }
+ # it is too long (final length may be different from
+ # initial estimate). note: must allow 1 space for this
+ # token
+ $self->excess_line_length( $index_start_one_line_block,
+ $max_index_to_go ) + $added_length >= 0
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1c:
- # Implement the Opening Token Right flag (Type 2)..
- # If requested, move an isolated trailing opening token to the end of
- # the previous line which ended in a comma. We could do this
- # in sub recombine_breakpoints but that would cause problems
- # with -lp formatting. The problem is that indentation will
- # quickly move far to the right in nested expressions. By
- # doing it after indentation has been set, we avoid changes
- # to the indentation. Actual movement of the token takes place
- # in sub valign_output_step_B.
- #--------------------------------------------------------------
- if (
- $opening_token_right{ $tokens_to_go[$ibeg_next] }
+ # or if it has too many semicolons
+ || ( $semicolons_before_block_self_destruct == 0
+ && defined($K_last_nonblank_code)
+ && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' )
+ )
+ {
+ destroy_one_line_block();
+ }
+ }
- # previous line is not opening
- # (use -sot to combine with it)
- && !$is_opening_token{$token_end}
+ # put a break before this closing curly brace if appropriate
+ $self->end_batch()
+ if ( $max_index_to_go >= 0
+ && !$nobreak_BEFORE_BLOCK
+ && $index_start_one_line_block == UNDEFINED_INDEX );
- # previous line ended in one of these
- # (add other cases if necessary; '=>' and '.' are not necessary
- && !$block_type_to_go[$ibeg_next]
+ # store the closing curly brace
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
- # this is a line with just an opening token
- && ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
+ # ok, we just stored a closing curly brace. Often, but
+ # not always, we want to end the line immediately.
+ # So now we have to check for special cases.
- # looks bad if we align vertically with the wrong container
- && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
- }
+ # if this '}' successfully ends a one-line block..
+ my $is_one_line_block = 0;
+ my $keep_going = 0;
+ if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 1d:
- # Stacking of opening and closing tokens (Type 2)
- #--------------------------------------------------------------
- my $stackable;
- my $token_beg_next = $tokens_to_go[$ibeg_next];
+ # Remember the type of token just before the
+ # opening brace. It would be more general to use
+ # a stack, but this will work for one-line blocks.
+ $is_one_line_block =
+ $types_to_go[$index_start_one_line_block];
- # patch to make something like 'qw(' behave like an opening paren
- # (aran.t)
- if ( $types_to_go[$ibeg_next] eq 'q' ) {
- if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
- $token_beg_next = $1;
- }
- }
+ # we have to actually make it by removing tentative
+ # breaks that were set within it
+ $self->undo_forced_breakpoint_stack(0);
+
+ # For -lp, extend the nobreak to include a trailing
+ # terminal ','. This is because the -lp indentation was
+ # not known when making one-line blocks, so we may be able
+ # to move the line back to fit. Otherwise we may create a
+ # needlessly stranded comma on the next line.
+ my $iend_nobreak = $max_index_to_go - 1;
+ if ( $rOpts_line_up_parentheses
+ && $next_nonblank_token_type eq ','
+ && $Knnb eq $K_last )
+ {
+ my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
+ my $is_excluded =
+ $self->[_ris_excluded_lp_container_]->{$p_seqno};
+ $iend_nobreak = $max_index_to_go if ( !$is_excluded );
+ }
- if ( $is_closing_token{$token_end}
- && $is_closing_token{$token_beg_next} )
- {
- $stackable = $stack_closing_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
- elsif ($is_opening_token{$token_end}
- && $is_opening_token{$token_beg_next} )
- {
- $stackable = $stack_opening_token{$token_beg_next}
- unless ( $block_type_to_go[$ibeg_next] )
- ; # shouldn't happen; just checking
- }
+ $self->set_nobreaks( $index_start_one_line_block,
+ $iend_nobreak );
- if ($stackable) {
+ # save starting block indexes so that sub correct_lp can
+ # check and adjust -lp indentation (c098)
+ push @{$ri_starting_one_line_block},
+ $index_start_one_line_block;
- my $is_semicolon_terminated;
- if ( $n + 1 == $n_last_line ) {
- my ( $terminal_type, $i_terminal ) =
- $self->terminal_type_i( $ibeg_next, $iend_next );
- $is_semicolon_terminated = $terminal_type eq ';'
- && $nesting_depth_to_go[$iend_next] <
- $nesting_depth_to_go[$ibeg_next];
- }
+ # then re-initialize for the next one-line block
+ destroy_one_line_block();
- # this must be a line with just an opening token
- # or end in a semicolon
- if (
- $is_semicolon_terminated
- || ( $iend_next == $ibeg_next
- || $iend_next == $ibeg_next + 2
- && $types_to_go[$iend_next] eq '#' )
- )
- {
- my $valid_flag = 1;
- my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- @{$rvertical_tightness_flags} =
- ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
- );
- }
- }
- }
+ # then decide if we want to break after the '}' ..
+ # We will keep going to allow certain brace followers as in:
+ # do { $ifclosed = 1; last } unless $losing;
+ #
+ # But make a line break if the curly ends a
+ # significant block:
+ if (
+ (
+ $is_block_without_semicolon{$block_type}
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 2:
- # Handle type 3, opening block braces on last line of the batch
- # Check for a last line with isolated opening BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
- && $types_to_go[$iend] eq '{'
- && $block_type_to_go[$iend] =~
- /$block_brace_vertical_tightness_pattern/o )
- {
- @{$rvertical_tightness_flags} =
- ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
- }
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $is_one_line_block =~ /^[UG]$/
+ && $Ktoken_vars == $K_last
+ )
- #--------------------------------------------------------------
- # Vertical Tightness Flags Section 3:
- # Handle type 4, a closing block brace on the last line of the batch Check
- # for a last line with isolated closing BLOCK curly
- #--------------------------------------------------------------
- elsif ($rOpts_stack_closing_block_brace
- && $ibeg eq $iend
- && $block_type_to_go[$iend]
- && $types_to_go[$iend] eq '}' )
- {
- my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
- @{$rvertical_tightness_flags} =
- ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
- }
+ # if needless semicolon follows we handle it later
+ && $next_nonblank_token ne ';'
+ )
+ {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
+ }
- # pack in the sequence numbers of the ends of this line
- $rvertical_tightness_flags->[4] = get_seqno($ibeg);
- $rvertical_tightness_flags->[5] = get_seqno($iend);
- return $rvertical_tightness_flags;
-}
+ # set string indicating what we need to look for brace follower
+ # tokens
+ if ( $block_type eq 'do' ) {
+ $rbrace_follower = \%is_do_follower;
+ if (
+ $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
+ )
+ {
+ $rbrace_follower = { ')' => 1 };
+ }
+ }
+ elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
+ $rbrace_follower = \%is_if_brace_follower;
+ }
+ elsif ( $block_type eq 'else' ) {
+ $rbrace_follower = \%is_else_brace_follower;
+ }
-sub get_seqno {
+ # added eval for borris.t
+ elsif ($is_sort_map_grep_eval{$block_type}
+ || $is_one_line_block eq 'G' )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
- # get opening and closing sequence numbers of a token for the vertical
- # aligner. Assign qw quotes a value to allow qw opening and closing tokens
- # to be treated somewhat like opening and closing tokens for stacking
- # tokens by the vertical aligner.
- my ($ii) = @_;
- my $seqno = $type_sequence_to_go[$ii];
- if ( $types_to_go[$ii] eq 'q' ) {
- my $SEQ_QW = -1;
- if ( $ii > 0 ) {
- $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
- }
- else {
- if ( !$ending_in_quote ) {
- $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
- }
- }
- }
- return ($seqno);
-}
+ # anonymous sub
+ elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
+ if ($is_one_line_block) {
-{
- my %is_vertical_alignment_type;
- my %is_not_vertical_alignment_token;
- my %is_vertical_alignment_keyword;
- my %is_terminal_alignment_type;
- my %is_low_level_alignment_token;
+ $rbrace_follower = \%is_anon_sub_1_brace_follower;
- BEGIN {
+ # Exceptions to help keep -lp intact, see git #74 ...
+ # Exception 1: followed by '}' on this line
+ if ( $Ktoken_vars < $K_last
+ && $next_nonblank_token eq '}' )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
- my @q;
+ # Exception 2: followed by '}' on next line if -lp set.
+ # The -lp requirement allows the formatting to follow
+ # old breaks when -lp is not used, minimizing changes.
+ # Fixes issue c087.
+ elsif ($Ktoken_vars == $K_last
+ && $rOpts_line_up_parentheses )
+ {
+ my $K_closing_container =
+ $self->[_K_closing_container_];
+ my $K_opening_container =
+ $self->[_K_opening_container_];
+ my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
+ my $Kc = $K_closing_container->{$p_seqno};
+ my $is_excluded =
+ $self->[_ris_excluded_lp_container_]->{$p_seqno};
+ if ( defined($Kc)
+ && $rLL->[$Kc]->[_TOKEN_] eq '}'
+ && !$is_excluded
+ && $Kc - $Ktoken_vars <= 2 )
+ {
+ $rbrace_follower = undef;
+ $keep_going = 1;
+ }
+ }
+ }
+ else {
+ $rbrace_follower = \%is_anon_sub_brace_follower;
+ }
+ }
- # Replaced =~ and // in the list. // had been removed in RT 119588
- @q = qw#
- = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- { ? : => && || ~~ !~~ =~ !~ //
- #;
- @is_vertical_alignment_type{@q} = (1) x scalar(@q);
+ # None of the above: specify what can follow a closing
+ # brace of a block which is not an
+ # if/elsif/else/do/sort/map/grep/eval
+ # Testfiles:
+ # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
+ else {
+ $rbrace_follower = \%is_other_brace_follower;
+ }
- # These 'tokens' are not aligned. We need this to remove [
- # from the above list because it has type ='{'
- @q = qw([);
- @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
+ # See if an elsif block is followed by another elsif or else;
+ # complain if not.
+ if ( $block_type eq 'elsif' ) {
- # these are the only types aligned at a line end
- @q = qw(&& ||);
- @is_terminal_alignment_type{@q} = (1) x scalar(@q);
+ if ( $next_nonblank_token_type eq 'b' ) { # end of line?
+ $looking_for_else = 1; # ok, check on next line
+ }
+ else {
- # these tokens only align at line level
- @q = ( '{', '(' );
- @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+ unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
+ write_logfile_entry("No else block :(\n");
+ }
+ }
+ }
- # eq and ne were removed from this list to improve alignment chances
- @q = qw(if unless and or err for foreach while until);
- @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
- }
+ # keep going after certain block types (map,sort,grep,eval)
+ # added eval for borris.t
+ if ($keep_going) {
- sub set_vertical_alignment_markers {
+ # keep going
+ }
- # This routine takes the first step toward vertical alignment of the
- # lines of output text. It looks for certain tokens which can serve as
- # vertical alignment markers (such as an '=').
- #
- # Method: We look at each token $i in this output batch and set
- # $ralignment_type_to_go->[$i] equal to those tokens at which we would
- # accept vertical alignment.
+ # if no more tokens, postpone decision until re-entring
+ elsif ( ( $next_nonblank_token_type eq 'b' )
+ && $rOpts_add_newlines )
+ {
+ unless ($rbrace_follower) {
+ $self->end_batch()
+ unless ($no_internal_newlines);
+ }
+ }
- my ( $self, $ri_first, $ri_last ) = @_;
+ elsif ($rbrace_follower) {
- my $ralignment_type_to_go;
- for my $i ( 0 .. $max_index_to_go ) {
- $ralignment_type_to_go->[$i] = '';
- }
+ unless ( $rbrace_follower->{$next_nonblank_token} ) {
+ $self->end_batch()
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
+ }
+ $rbrace_follower = undef;
+ }
- # nothing to do if we aren't allowed to change whitespace
- if ( !$rOpts_add_whitespace ) {
- return $ralignment_type_to_go;
- }
+ else {
+ $self->end_batch()
+ unless ( $no_internal_newlines
+ || $max_index_to_go < 0 );
+ }
- # remember the index of last nonblank token before any sidecomment
- my $i_terminal = $max_index_to_go;
- if ( $types_to_go[$i_terminal] eq '#' ) {
- if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
- if ( $i_terminal > 0 ) { --$i_terminal }
- }
- }
+ } ## end treatment of closing block token
- # look at each line of this batch..
- my $last_vertical_alignment_before_index;
- my $vert_last_nonblank_type;
- my $vert_last_nonblank_token;
- my $vert_last_nonblank_block_type;
- my $max_line = @{$ri_first} - 1;
+ #------------------------------
+ # handle here_doc target string
+ #------------------------------
+ elsif ( $type eq 'h' ) {
- foreach my $line ( 0 .. $max_line ) {
- my $ibeg = $ri_first->[$line];
- my $iend = $ri_last->[$line];
- $last_vertical_alignment_before_index = -1;
- $vert_last_nonblank_type = '';
- $vert_last_nonblank_token = '';
- $vert_last_nonblank_block_type = '';
+ # no newlines after seeing here-target
+ $no_internal_newlines = 2;
+ ## destroy_one_line_block(); # deleted to fix case b529
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+ }
- # look at each token in this output line..
- my $level_beg = $levels_to_go[$ibeg];
- foreach my $i ( $ibeg .. $iend ) {
- my $alignment_type = '';
- my $type = $types_to_go[$i];
- my $block_type = $block_type_to_go[$i];
- my $token = $tokens_to_go[$i];
+ #-----------------------------
+ # handle all other token types
+ #-----------------------------
+ else {
- # do not align tokens at lower level then start of line
- # except for side comments
- if ( $levels_to_go[$i] < $levels_to_go[$ibeg]
- && $types_to_go[$i] ne '#' )
- {
- $ralignment_type_to_go->[$i] = '';
- next;
+ $self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
+
+ # break after a label if requested
+ if ( $type eq 'J' && $rOpts_break_after_labels == 1 ) {
+ $self->end_batch()
+ unless ($no_internal_newlines);
}
+ }
- #--------------------------------------------------------
- # First see if we want to align BEFORE this token
- #--------------------------------------------------------
+ # remember two previous nonblank, non-comment OUTPUT tokens
+ $K_last_last_nonblank_code = $K_last_nonblank_code;
+ $K_last_nonblank_code = $Ktoken_vars;
- # The first possible token that we can align before
- # is index 2 because: 1) it doesn't normally make sense to
- # align before the first token and 2) the second
- # token must be a blank if we are to align before
- # the third
- if ( $i < $ibeg + 2 ) { }
+ } ## end of loop over all tokens in this line
- # must follow a blank token
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
+ my $type = $rLL->[$K_last]->[_TYPE_];
+ my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
- # align a side comment --
- elsif ( $type eq '#' ) {
+ # we have to flush ..
+ if (
- unless (
+ # if there is a side comment...
+ $type eq '#'
- # it is a static side comment
- (
- $rOpts->{'static-side-comments'}
- && $token =~ /$static_side_comment_pattern/o
- )
+ # if this line ends in a quote
+ # NOTE: This is critically important for insuring that quoted lines
+ # do not get processed by things like -sot and -sct
+ || $in_quote
- # or a closing side comment
- || ( $vert_last_nonblank_block_type
- && $token =~
- /$closing_side_comment_prefix_pattern/o )
- )
- {
- $alignment_type = $type;
- } ## Example of a static side comment
- }
+ # if this is a VERSION statement
+ || $is_VERSION_statement
- # otherwise, do not align two in a row to create a
- # blank field
- elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
+ # to keep a label at the end of a line
+ || ( $type eq 'J' && $rOpts_break_after_labels != 2 )
- # align before one of these keywords
- # (within a line, since $i>1)
- elsif ( $type eq 'k' ) {
+ # if we have a hard break request
+ || $break_flag && $break_flag != 2
- # /^(if|unless|and|or|eq|ne)$/
- if ( $is_vertical_alignment_keyword{$token} ) {
- $alignment_type = $token;
- }
- }
+ # if we are instructed to keep all old line breaks
+ || !$rOpts->{'delete-old-newlines'}
- # align before one of these types..
- # Note: add '.' after new vertical aligner is operational
- elsif ( $is_vertical_alignment_type{$type}
- && !$is_not_vertical_alignment_token{$token} )
- {
- $alignment_type = $token;
+ # if this is a line of the form 'use overload'. A break here
+ # in the input file is a good break because it will allow
+ # the operators which follow to be formatted well. Without
+ # this break the formatting with -ci=4 -xci is poor, for example.
+
+ # use overload
+ # '+' => sub {
+ # print length $_[2], "\n";
+ # my ( $x, $y ) = _order(@_);
+ # Number::Roman->new( int $x + $y );
+ # },
+ # '-' => sub {
+ # my ( $x, $y ) = _order(@_);
+ # Number::Roman->new( int $x - $y );
+ # };
+ || ( $max_index_to_go == 2
+ && $types_to_go[0] eq 'k'
+ && $tokens_to_go[0] eq 'use'
+ && $tokens_to_go[$max_index_to_go] eq 'overload' )
+ )
+ {
+ destroy_one_line_block();
+ $self->end_batch() if ( $max_index_to_go >= 0 );
+ }
- # Do not align a terminal token. Although it might
- # occasionally look ok to do this, this has been found to be
- # a good general rule. The main problems are:
- # (1) that the terminal token (such as an = or :) might get
- # moved far to the right where it is hard to see because
- # nothing follows it, and
- # (2) doing so may prevent other good alignments.
- # Current exceptions are && and ||
- if ( $i == $iend || $i >= $i_terminal ) {
- $alignment_type = ""
- unless ( $is_terminal_alignment_type{$type} );
- }
+ # Check for a soft break request
+ if ( $max_index_to_go >= 0 && $break_flag && $break_flag == 2 ) {
+ $self->set_forced_breakpoint($max_index_to_go);
+ }
- # Do not align leading ': (' or '. ('. This would prevent
- # alignment in something like the following:
- # $extra_space .=
- # ( $input_line_number < 10 ) ? " "
- # : ( $input_line_number < 100 ) ? " "
- # : "";
- # or
- # $code =
- # ( $case_matters ? $accessor : " lc($accessor) " )
- # . ( $yesno ? " eq " : " ne " )
+ # mark old line breakpoints in current output stream
+ if (
+ $max_index_to_go >= 0
+ && ( !$rOpts_ignore_old_breakpoints
+ || $self->[_ris_essential_old_breakpoint_]->{$K_last} )
+ )
+ {
+ my $jobp = $max_index_to_go;
+ if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
+ {
+ $jobp--;
+ }
+ $old_breakpoint_to_go[$jobp] = 1;
+ }
+ return;
+ } ## end sub process_line_of_CODE
+} ## end closure process_line_of_CODE
+
+sub tight_paren_follows {
+
+ my ( $self, $K_to_go_0, $K_ic ) = @_;
+
+ # Input parameters:
+ # $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
+ # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
+ # Return parameter:
+ # false if we want a break after the closing do brace
+ # true if we do not want a break after the closing do brace
+
+ # We are at the closing brace of a 'do' block. See if this brace is
+ # followed by a closing paren, and if so, set a flag which indicates
+ # that we do not want a line break between the '}' and ')'.
+
+ # xxxxx ( ...... do { ... } ) {
+ # ^-------looking at this brace, K_ic
+
+ # Subscript notation:
+ # _i = inner container (braces in this case)
+ # _o = outer container (parens in this case)
+ # _io = inner opening = '{'
+ # _ic = inner closing = '}'
+ # _oo = outer opening = '('
+ # _oc = outer closing = ')'
+
+ # |--K_oo |--K_oc = outer container
+ # xxxxx ( ...... do { ...... } ) {
+ # |--K_io |--K_ic = inner container
+
+ # In general, the safe thing to do is return a 'false' value
+ # if the statement appears to be complex. This will have
+ # the downstream side-effect of opening up outer containers
+ # to help make complex code readable. But for simpler
+ # do blocks it can be preferable to keep the code compact
+ # by returning a 'true' value.
+
+ return unless defined($K_ic);
+ my $rLL = $self->[_rLL_];
+
+ # we should only be called at a closing block
+ my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
+ return unless ($seqno_i); # shouldn't happen;
+
+ # This only applies if the next nonblank is a ')'
+ my $K_oc = $self->K_next_nonblank($K_ic);
+ return unless defined($K_oc);
+ my $token_next = $rLL->[$K_oc]->[_TOKEN_];
+ return unless ( $token_next eq ')' );
+
+ my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
+ my $K_io = $self->[_K_opening_container_]->{$seqno_i};
+ my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
+ return unless ( defined($K_io) && defined($K_oo) );
+
+ # RULE 1: Do not break before a closing signature paren
+ # (regardless of complexity). This is a fix for issue git#22.
+ # Looking for something like:
+ # sub xxx ( ... do { ... } ) {
+ # ^----- next block_type
+ my $K_test = $self->K_next_nonblank($K_oc);
+ if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
+ my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
+ if ($seqno_test) {
+ if ( $self->[_ris_asub_block_]->{$seqno_test}
+ || $self->[_ris_sub_block_]->{$seqno_test} )
+ {
+ return 1;
+ }
+ }
+ }
- # Also, do not align a ( following a leading ? so we can
- # align something like this:
- # $converter{$_}->{ushortok} =
- # $PDL::IO::Pic::biggrays
- # ? ( m/GIF/ ? 0 : 1 )
- # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
- if ( $i == $ibeg + 2
- && $types_to_go[$ibeg] =~ /^[\.\:\?]$/
- && $types_to_go[ $i - 1 ] eq 'b' )
- {
- $alignment_type = "";
- }
+ # RULE 2: Break if the contents within braces appears to be 'complex'. We
+ # base this decision on the number of tokens between braces.
- # Certain tokens only align at the same level as the
- # initial line level
- if ( $is_low_level_alignment_token{$token}
- && $levels_to_go[$i] != $level_beg )
- {
- $alignment_type = "";
- }
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^
- # For a paren after keyword, only align something like this:
- # if ( $a ) { &a }
- # elsif ( $b ) { &b }
- if ( $token eq '(' ) {
+ # Although very simple, it has the advantages of (1) being insensitive to
+ # changes in lengths of identifier names, (2) easy to understand, implement
+ # and test. A test case for this is 't/snippets/long_line.in'.
- if ( $vert_last_nonblank_type eq 'k' ) {
- $alignment_type = ""
- unless $vert_last_nonblank_token =~
- /^(if|unless|elsif)$/;
- }
- }
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # if ( do { $2 !~ /&/ } ) { ... }
- # be sure the alignment tokens are unique
- # This didn't work well: reason not determined
- # if ($token ne $type) {$alignment_type .= $type}
- }
+ # Example: $K_ic - $K_oo = 10 [Pass Rule 2]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- # NOTE: This is deactivated because it causes the previous
- # if/elsif alignment to fail
- #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
- #{ $alignment_type = $type; }
+ # Example: $K_ic - $K_oo = 20 [Fail Rule 2]
+ # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
- if ($alignment_type) {
- $last_vertical_alignment_before_index = $i;
- }
+ return if ( $K_ic - $K_io > 16 );
- #--------------------------------------------------------
- # Next see if we want to align AFTER the previous nonblank
- #--------------------------------------------------------
+ # RULE 3: break if the code between the opening '(' and the '{' is 'complex'
+ # As with the previous rule, we decide based on the token count
- # We want to line up ',' and interior ';' tokens, with the added
- # space AFTER these tokens. (Note: interior ';' is included
- # because it may occur in short blocks).
- if (
+ # xxxxx ( ... do { ... } ) {
+ # ^^^^^^^^
- # we haven't already set it
- !$alignment_type
+ # Example: $K_ic - $K_oo = 9 [Pass Rule 2]
+ # $K_io - $K_oo = 4 [Pass Rule 3]
+ # if ( do { $2 !~ /&/ } ) { ... }
- # and its not the first token of the line
- && ( $i > $ibeg )
+ # Example: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 9 [Pass rule 3]
+ # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
- # and it follows a blank
- && $types_to_go[ $i - 1 ] eq 'b'
+ return if ( $K_io - $K_oo > 9 );
- # and previous token IS one of these:
- && ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
+ # RULE 4: Break if we have already broken this batch of output tokens
+ return if ( $K_oo < $K_to_go_0 );
- # and it's NOT one of these
- && ( $type !~ /^[b\#\)\]\}]$/ )
+ # RULE 5: Break if input is not on one line
+ # For example, we will set the flag for the following expression
+ # written in one line:
- # then go ahead and align
- )
+ # This has: $K_ic - $K_oo = 10 [Pass rule 2]
+ # $K_io - $K_oo = 8 [Pass rule 3]
+ # $self->debug( 'Error: ' . do { local $/; <$err> } );
- {
- $alignment_type = $vert_last_nonblank_type;
- }
+ # but we break after the brace if it is on multiple lines on input, since
+ # the user may prefer it on multiple lines:
- #--------------------------------------------------------
- # then store the value
- #--------------------------------------------------------
- $ralignment_type_to_go->[$i] = $alignment_type;
- if ( $type ne 'b' ) {
- $vert_last_nonblank_type = $type;
- $vert_last_nonblank_token = $token;
- $vert_last_nonblank_block_type = $block_type;
- }
- }
- }
- return $ralignment_type_to_go;
+ # [Fail rule 5]
+ # $self->debug(
+ # 'Error: ' . do { local $/; <$err> }
+ # );
+
+ if ( !$rOpts_ignore_old_breakpoints ) {
+ my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
+ my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
+ return if ( $iline_oo != $iline_oc );
}
+
+ # OK to keep the paren tight
+ return 1;
}
-sub terminal_type_i {
+my %is_brace_semicolon_colon;
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
+BEGIN {
+ my @q = qw( { } ; : );
+ @is_brace_semicolon_colon{@q} = (1) x scalar(@q);
+}
- my ( $self, $ibeg, $iend ) = @_;
+sub starting_one_line_block {
- # Start at the end and work backwards
- my $i = $iend;
- my $type_i = $types_to_go[$i];
+ # after seeing an opening curly brace, look for the closing brace and see
+ # if the entire block will fit on a line. This routine is not always right
+ # so a check is made later (at the closing brace) to make sure we really
+ # have a one-line block. We have to do this preliminary check, though,
+ # because otherwise we would always break at a semicolon within a one-line
+ # block if the block contains multiple statements.
- # Check for side comment
- if ( $type_i eq '#' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
- }
- $type_i = $types_to_go[$i];
+ my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
+
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ # kill any current block - we can only go 1 deep
+ destroy_one_line_block();
+
+ # return value:
+ # 1=distance from start of block to opening brace exceeds line length
+ # 0=otherwise
+
+ my $i_start = 0;
+
+ # This routine should not have been called if there are no tokens in the
+ # 'to_go' arrays of previously stored tokens. A previous call to
+ # 'store_token_to_go' should have stored an opening brace. An error here
+ # indicates that a programming change may have caused a flush operation to
+ # clean out the previously stored tokens.
+ if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
+ Fault("program bug: store_token_to_go called incorrectly\n")
+ if (DEVEL_MODE);
+ return 0;
}
- # Skip past a blank
- if ( $type_i eq 'b' ) {
- $i--;
- if ( $i < $ibeg ) {
- return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ # Return if block should be broken
+ my $type_sequence = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
+ if ( $rbreak_container->{$type_sequence} ) {
+ return 0;
+ }
+
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $is_bli = $ris_bli_container->{$type_sequence};
+
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ $block_type = "" unless ( defined($block_type) );
+ my $index_max_forced_break = get_index_max_forced_break();
+
+ my $previous_nonblank_token = '';
+ my $i_last_nonblank = -1;
+ if ( defined($K_last_nonblank) ) {
+ $i_last_nonblank = $K_last_nonblank - $K_to_go[0];
+ if ( $i_last_nonblank >= 0 ) {
+ $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
}
- $type_i = $types_to_go[$i];
}
- # Found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $block_type_to_go[$i];
+ # find the starting keyword for this block (such as 'if', 'else', ...)
if (
- $type_i eq '}'
- && ( !$block_type
- || ( $is_sort_map_grep_eval_do{$block_type} ) )
+ $max_index_to_go == 0
+ ##|| $block_type =~ /^[\{\}\;\:]$/
+ || $is_brace_semicolon_colon{$block_type}
+ || substr( $block_type, 0, 7 ) eq 'package'
)
{
- $type_i = 'b';
+ $i_start = $max_index_to_go;
}
- return wantarray ? ( $type_i, $i ) : $type_i;
-}
-sub terminal_type_K {
-
- # returns type of last token on this line (terminal token), as follows:
- # returns # for a full-line comment
- # returns ' ' for a blank line
- # otherwise returns final token type
+ # the previous nonblank token should start these block types
+ elsif (
+ $i_last_nonblank >= 0
+ && ( $previous_nonblank_token eq $block_type
+ || $self->[_ris_asub_block_]->{$type_sequence}
+ || $self->[_ris_sub_block_]->{$type_sequence}
+ || substr( $block_type, -2, 2 ) eq '()' )
+ )
+ {
+ $i_start = $i_last_nonblank;
- my ( $self, $Kbeg, $Kend ) = @_;
- my $rLL = $self->{rLL};
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intack, and cause the parenthesized
+ # expression to break open. That looks bad.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
- if ( !defined($Kend) ) {
- Fault("Error in terminal_type_K: Kbeg=$Kbeg > $Kend=Kend");
+ # Find the opening paren
+ my $K_start = $K_to_go[$i_start];
+ return 0 unless defined($K_start);
+ my $seqno = $type_sequence_to_go[$i_start];
+ return 0 unless ($seqno);
+ my $K_opening = $K_opening_container->{$seqno};
+ return 0 unless defined($K_opening);
+ my $i_opening = $i_start + ( $K_opening - $K_start );
+
+ # give up if not on this line
+ return 0 unless ( $i_opening >= 0 );
+ $i_start = $i_opening; ##$index_max_forced_break + 1;
+
+ # go back one token before the opening paren
+ if ( $i_start > 0 ) { $i_start-- }
+ if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 }
+ }
}
- # Start at the end and work backwards
- my $K = $Kend;
- my $type_K = $rLL->[$K]->[_TYPE_];
+ elsif ( $previous_nonblank_token eq ')' ) {
- # Check for side comment
- if ( $type_K eq '#' ) {
- $K--;
- if ( $K < $Kbeg ) {
- return wantarray ? ( $type_K, $Kbeg ) : $type_K;
+ # For something like "if (xxx) {", the keyword "if" will be
+ # just after the most recent break. This will be 0 unless
+ # we have just killed a one-line block and are starting another.
+ # (doif.t)
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ if ( substr( $block_type, -2, 2 ) eq '()' ) {
+ $stripped_block_type = substr( $block_type, 0, -2 );
+ }
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
+ return 0;
}
- $type_K = $rLL->[$K]->[_TYPE_];
}
- # Skip past a blank
- if ( $type_K eq 'b' ) {
- $K--;
- if ( $K < $Kbeg ) {
- return wantarray ? ( $type_K, $Kbeg ) : $type_K;
+ # patch for SWITCH/CASE to retain one-line case/when blocks
+ elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) {
+ $i_start++;
+ }
+ unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ return 0;
}
- $type_K = $rLL->[$K]->[_TYPE_];
}
- # found it..make sure it is a BLOCK termination,
- # but hide a terminal } after sort/grep/map because it is not
- # necessarily the end of the line. (terminal.t)
- my $block_type = $rLL->[$K]->[_BLOCK_TYPE_];
- if (
- $type_K eq '}'
- && ( !$block_type
- || ( $is_sort_map_grep_eval_do{$block_type} ) )
- )
- {
- $type_K = 'b';
+ else {
+ return 1;
}
- return wantarray ? ( $type_K, $K ) : $type_K;
-}
+ my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
-{ # set_bond_strengths
+ my $maximum_line_length =
+ $maximum_line_length_at_level[ $levels_to_go[$i_start] ];
- my %is_good_keyword_breakpoint;
- my %is_lt_gt_le_ge;
+ # see if block starting location is too great to even start
+ if ( $pos > $maximum_line_length ) {
+ return 1;
+ }
- my %binary_bond_strength;
- my %nobreak_lhs;
- my %nobreak_rhs;
+ # See if everything to the closing token will fit on one line
+ # This is part of an update to fix cases b562 .. b983
+ my $K_closing = $self->[_K_closing_container_]->{$type_sequence};
+ return 0 unless ( defined($K_closing) );
+ my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
- my @bias_tokens;
- my $delta_bias;
+ my $excess = $pos + 1 + $container_length - $maximum_line_length;
- sub bias_table_key {
- my ( $type, $token ) = @_;
- my $bias_table_key = $type;
- if ( $type eq 'k' ) {
- $bias_table_key = $token;
- if ( $token eq 'err' ) { $bias_table_key = 'or' }
- }
- return $bias_table_key;
+ # Add a small tolerance for welded tokens (case b901)
+ if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
+ $excess += 2;
}
- sub initialize_bond_strength_hashes {
+ if ( $excess > 0 ) {
- my @q;
- @q = qw(if unless while until for foreach);
- @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
+ # line is too long... there is no chance of forming a one line block
+ # if the excess is more than 1 char
+ return 0 if ( $excess > 1 );
- @q = qw(lt gt le ge);
- @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
- #
- # The decision about where to break a line depends upon a "bond
- # strength" between tokens. The LOWER the bond strength, the MORE
- # likely a break. A bond strength may be any value but to simplify
- # things there are several pre-defined strength levels:
+ # ... and give up if it is not a one-line block on input.
+ # note: for a one-line block on input, it may be possible to keep
+ # it as a one-line block (by removing a needless semicolon ).
+ my $K_start = $K_to_go[$i_start];
+ my $ldiff =
+ $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
+ return 0 if ($ldiff);
+ }
- # NO_BREAK => 10000;
- # VERY_STRONG => 100;
- # STRONG => 2.1;
- # NOMINAL => 1.1;
- # WEAK => 0.8;
- # VERY_WEAK => 0.55;
+ foreach my $Ki ( $Kj + 1 .. $K_last ) {
- # The strength values are based on trial-and-error, and need to be
- # tweaked occasionally to get desired results. Some comments:
- #
- # 1. Only relative strengths are important. small differences
- # in strengths can make big formatting differences.
- # 2. Each indentation level adds one unit of bond strength.
- # 3. A value of NO_BREAK makes an unbreakable bond
- # 4. A value of VERY_WEAK is the strength of a ','
- # 5. Values below NOMINAL are considered ok break points.
- # 6. Values above NOMINAL are considered poor break points.
- #
- # The bond strengths should roughly follow precedence order where
- # possible. If you make changes, please check the results very
- # carefully on a variety of scripts. Testing with the -extrude
- # options is particularly helpful in exercising all of the rules.
+ # old whitespace could be arbitrarily large, so don't use it
+ if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
+ else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
- # Wherever possible, bond strengths are defined in the following
- # tables. There are two main stages to setting bond strengths and
- # two types of tables:
- #
- # The first stage involves looking at each token individually and
- # defining left and right bond strengths, according to if we want
- # to break to the left or right side, and how good a break point it
- # is. For example tokens like =, ||, && make good break points and
- # will have low strengths, but one might want to break on either
- # side to put them at the end of one line or beginning of the next.
- #
- # The second stage involves looking at certain pairs of tokens and
- # defining a bond strength for that particular pair. This second
- # stage has priority.
+ # ignore some small blocks
+ my $type_sequence = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
+ my $nobreak = $rshort_nested->{$type_sequence};
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 1.
- # Set left and right bond strengths of individual tokens.
- #---------------------------------------------------------------
+ # Return false result if we exceed the maximum line length,
+ if ( $pos > $maximum_line_length ) {
+ return 0;
+ }
- # NOTE: NO_BREAK's set in this section first are HINTS which will
- # probably not be honored. Essential NO_BREAKS's should be set in
- # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
- # of this subroutine.
+ # keep going for non-containers
+ elsif ( !$type_sequence ) {
- # Note that we are setting defaults in this section. The user
- # cannot change bond strengths but can cause the left and right
- # bond strengths of any token type to be swapped through the use of
- # the -wba and -wbb flags. In this way the user can determine if a
- # breakpoint token should appear at the end of one line or the
- # beginning of the next line.
+ }
- # The hash keys in this section are token types, plus the text of
- # certain keywords like 'or', 'and'.
+ # return if we encounter another opening brace before finding the
+ # closing brace.
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
+ && $rLL->[$Ki]->[_TYPE_] eq '{'
+ && $rblock_type_of_seqno->{$type_sequence}
+ && !$nobreak )
+ {
+ return 0;
+ }
- # no break around possible filehandle
- $left_bond_strength{'Z'} = NO_BREAK;
- $right_bond_strength{'Z'} = NO_BREAK;
+ # if we find our closing brace..
+ elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
+ && $rLL->[$Ki]->[_TYPE_] eq '}'
+ && $rblock_type_of_seqno->{$type_sequence}
+ && !$nobreak )
+ {
- # never put a bare word on a new line:
- # example print (STDERR, "bla"); will fail with break after (
- $left_bond_strength{'w'} = NO_BREAK;
+ # be sure any trailing comment also fits on the line
+ my $Ki_nonblank = $Ki;
+ if ( $Ki_nonblank < $K_last ) {
+ $Ki_nonblank++;
+ if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
+ && $Ki_nonblank < $K_last )
+ {
+ $Ki_nonblank++;
+ }
+ }
- # blanks always have infinite strength to force breaks after
- # real tokens
- $right_bond_strength{'b'} = NO_BREAK;
+ # Patch for one-line sort/map/grep/eval blocks with side comments:
+ # We will ignore the side comment length for sort/map/grep/eval
+ # because this can lead to statements which change every time
+ # perltidy is run. Here is an example from Denis Moskowitz which
+ # oscillates between these two states without this patch:
- # try not to break on exponentation
- @q = qw# ** .. ... <=> #;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
+## --------
+## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+##
+## grep {
+## $_->foo ne 'bar'
+## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+## --------
- # The comma-arrow has very low precedence but not a good break point
- $left_bond_strength{'=>'} = NO_BREAK;
- $right_bond_strength{'=>'} = NOMINAL;
+ # When the first line is input it gets broken apart by the main
+ # line break logic in sub process_line_of_CODE.
+ # When the second line is input it gets recombined by
+ # process_line_of_CODE and passed to the output routines. The
+ # output routines (break_long_lines) do not break it apart
+ # because the bond strengths are set to the highest possible value
+ # for grep/map/eval/sort blocks, so the first version gets output.
+ # It would be possible to fix this by changing bond strengths,
+ # but they are high to prevent errors in older versions of perl.
+ # See c100 for eval test.
+ if ( $Ki < $K_last
+ && $rLL->[$K_last]->[_TYPE_] eq '#'
+ && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
+ && !$rOpts_ignore_side_comment_lengths
+ && !$is_sort_map_grep_eval{$block_type}
+ && $K_last - $Ki_nonblank <= 2 )
+ {
+ # Only include the side comment for if/else/elsif/unless if it
+ # immediately follows (because the current '$rbrace_follower'
+ # logic for these will give an immediate brake after these
+ # closing braces). So for example a line like this
+ # if (...) { ... } ; # very long comment......
+ # will already break like this:
+ # if (...) { ... }
+ # ; # very long comment......
+ # so we do not need to include the length of the comment, which
+ # would break the block. Project 'bioperl' has coding like this.
+ if ( $block_type !~ /^(if|else|elsif|unless)$/
+ || $K_last == $Ki_nonblank )
+ {
+ $Ki_nonblank = $K_last;
+ $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
- # ok to break after label
- $left_bond_strength{'J'} = NO_BREAK;
- $right_bond_strength{'J'} = NOMINAL;
- $left_bond_strength{'j'} = STRONG;
- $right_bond_strength{'j'} = STRONG;
- $left_bond_strength{'A'} = STRONG;
- $right_bond_strength{'A'} = STRONG;
+ if ( $Ki_nonblank > $Ki + 1 ) {
- $left_bond_strength{'->'} = STRONG;
- $right_bond_strength{'->'} = VERY_STRONG;
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
+ $pos += 1;
+ }
+ else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
+ }
- $left_bond_strength{'CORE::'} = NOMINAL;
- $right_bond_strength{'CORE::'} = NO_BREAK;
+ if ( $pos >= $maximum_line_length ) {
+ return 0;
+ }
+ }
+ }
- # breaking AFTER modulus operator is ok:
- @q = qw< % >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
+ # ok, it's a one-line block
+ create_one_line_block( $i_start, 20 );
+ return 0;
+ }
- # Break AFTER math operators * and /
- @q = qw< * / x >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ # just keep going for other characters
+ else {
+ }
+ }
- # Break AFTER weakest math operators + and -
- # Make them weaker than * but a bit stronger than '.'
- @q = qw< + - >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
+ # We haven't hit the closing brace, but there is still space. So the
+ # question here is, should we keep going to look at more lines in hopes of
+ # forming a new one-line block, or should we stop right now. The problem
+ # with continuing is that we will not be able to honor breaks before the
+ # opening brace if we continue.
- # breaking BEFORE these is just ok:
- @q = qw# >> << #;
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
- @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
+ # Typically we will want to keep trying to make one-line blocks for things
+ # like sort/map/grep/eval. But it is not always a good idea to make as
+ # many one-line blocks as possible, so other types are not done. The user
+ # can always use -mangle.
- # breaking before the string concatenation operator seems best
- # because it can be hard to see at the end of a line
- $right_bond_strength{'.'} = STRONG;
- $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
+ # If we want to keep going, we will create a new one-line block.
+ # The blocks which we can keep going are in a hash, but we never want
+ # to continue if we are at a '-bli' block.
+ if ( $want_one_line_block{$block_type} && !$is_bli ) {
+ create_one_line_block( $i_start, 1 );
+ }
+ return 0;
+}
- @q = qw< } ] ) R >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
+sub unstore_token_to_go {
- # make these a little weaker than nominal so that they get
- # favored for end-of-line characters
- @q = qw< != == =~ !~ ~~ !~~ >;
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
+ # remove most recent token from output stream
+ my $self = shift;
+ if ( $max_index_to_go > 0 ) {
+ $max_index_to_go--;
+ }
+ else {
+ $max_index_to_go = UNDEFINED_INDEX;
+ }
+ return;
+}
- # break AFTER these
- @q = qw# < > | & >= <= #;
- @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
+sub compare_indentation_levels {
- # breaking either before or after a quote is ok
- # but bias for breaking before a quote
- $left_bond_strength{'Q'} = NOMINAL;
- $right_bond_strength{'Q'} = NOMINAL + 0.02;
- $left_bond_strength{'q'} = NOMINAL;
- $right_bond_strength{'q'} = NOMINAL;
+ # Check to see if output line tabbing agrees with input line
+ # this can be very useful for debugging a script which has an extra
+ # or missing brace.
- # starting a line with a keyword is usually ok
- $left_bond_strength{'k'} = NOMINAL;
+ my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
+ return unless ( defined($K_first) );
- # we usually want to bond a keyword strongly to what immediately
- # follows, rather than leaving it stranded at the end of a line
- $right_bond_strength{'k'} = STRONG;
+ my $rLL = $self->[_rLL_];
- $left_bond_strength{'G'} = NOMINAL;
- $right_bond_strength{'G'} = STRONG;
+ my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) {
+ $structural_indentation_level = $radjusted_levels->[$K_first];
+ }
- # assignment operators
- @q = qw(
- = **= += *= &= <<= &&=
- -= /= |= >>= ||= //=
- .= %= ^=
- x=
- );
+ # record max structural depth for log file
+ if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
+ $self->[_maximum_BLOCK_level_] = $structural_indentation_level;
+ $self->[_maximum_BLOCK_level_at_line_] = $line_number;
+ }
- # Default is to break AFTER various assignment operators
- @left_bond_strength{@q} = (STRONG) x scalar(@q);
- @right_bond_strength{@q} =
- ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
+ my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
+ my $is_closing_block =
+ $type_sequence
+ && $self->[_rblock_type_of_seqno_]->{$type_sequence}
+ && $rLL->[$K_first]->[_TYPE_] eq '}';
- # Default is to break BEFORE '&&' and '||' and '//'
- # set strength of '||' to same as '=' so that chains like
- # $a = $b || $c || $d will break before the first '||'
- $right_bond_strength{'||'} = NOMINAL;
- $left_bond_strength{'||'} = $right_bond_strength{'='};
+ if ( $guessed_indentation_level ne $structural_indentation_level ) {
+ $self->[_last_tabbing_disagreement_] = $line_number;
- # same thing for '//'
- $right_bond_strength{'//'} = NOMINAL;
- $left_bond_strength{'//'} = $right_bond_strength{'='};
+ if ($is_closing_block) {
- # set strength of && a little higher than ||
- $right_bond_strength{'&&'} = NOMINAL;
- $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
+ if ( !$self->[_in_brace_tabbing_disagreement_] ) {
+ $self->[_in_brace_tabbing_disagreement_] = $line_number;
+ }
+ if ( !$self->[_first_brace_tabbing_disagreement_] ) {
+ $self->[_first_brace_tabbing_disagreement_] = $line_number;
+ }
+ }
- $left_bond_strength{';'} = VERY_STRONG;
- $right_bond_strength{';'} = VERY_WEAK;
- $left_bond_strength{'f'} = VERY_STRONG;
+ if ( !$self->[_in_tabbing_disagreement_] ) {
+ $self->[_tabbing_disagreement_count_]++;
- # make right strength of for ';' a little less than '='
- # to make for contents break after the ';' to avoid this:
- # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
- # $number_of_fields )
- # and make it weaker than ',' and 'and' too
- $right_bond_strength{'f'} = VERY_WEAK - 0.03;
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
+ );
+ }
+ $self->[_in_tabbing_disagreement_] = $line_number;
+ $self->[_first_tabbing_disagreement_] = $line_number
+ unless ( $self->[_first_tabbing_disagreement_] );
+ }
+ }
+ else {
- # The strengths of ?/: should be somewhere between
- # an '=' and a quote (NOMINAL),
- # make strength of ':' slightly less than '?' to help
- # break long chains of ? : after the colons
- $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
- $right_bond_strength{':'} = NO_BREAK;
- $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
- $right_bond_strength{'?'} = NO_BREAK;
+ $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
- $left_bond_strength{','} = VERY_STRONG;
- $right_bond_strength{','} = VERY_WEAK;
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
+ if ($in_tabbing_disagreement) {
- # remaining digraphs and trigraphs not defined above
- @q = qw( :: <> ++ --);
- @left_bond_strength{@q} = (WEAK) x scalar(@q);
- @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry(
+"End indentation disagreement from input line $in_tabbing_disagreement\n"
+ );
- # Set bond strengths of certain keywords
- # make 'or', 'err', 'and' slightly weaker than a ','
- $left_bond_strength{'and'} = VERY_WEAK - 0.01;
- $left_bond_strength{'or'} = VERY_WEAK - 0.02;
- $left_bond_strength{'err'} = VERY_WEAK - 0.02;
- $left_bond_strength{'xor'} = NOMINAL;
- $right_bond_strength{'and'} = NOMINAL;
- $right_bond_strength{'or'} = NOMINAL;
- $right_bond_strength{'err'} = NOMINAL;
- $right_bond_strength{'xor'} = STRONG;
+ if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
+ {
+ write_logfile_entry(
+ "No further tabbing disagreements will be noted\n");
+ }
+ }
+ $self->[_in_tabbing_disagreement_] = 0;
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 2.
- # Set binary rules for bond strengths between certain token types.
- #---------------------------------------------------------------
+ }
+ }
+ return;
+}
- # We have a little problem making tables which apply to the
- # container tokens. Here is a list of container tokens and
- # their types:
- #
- # type tokens // meaning
- # { {, [, ( // indent
- # } }, ], ) // outdent
- # [ [ // left non-structural [ (enclosing an array index)
- # ] ] // right non-structural square bracket
- # ( ( // left non-structural paren
- # ) ) // right non-structural paren
- # L { // left non-structural curly brace (enclosing a key)
- # R } // right non-structural curly brace
- #
- # Some rules apply to token types and some to just the token
- # itself. We solve the problem by combining type and token into a
- # new hash key for the container types.
- #
- # If a rule applies to a token 'type' then we need to make rules
- # for each of these 'type.token' combinations:
- # Type Type.Token
- # { {{, {[, {(
- # [ [[
- # ( ((
- # L L{
- # } }}, }], })
- # ] ]]
- # ) ))
- # R R}
- #
- # If a rule applies to a token then we need to make rules for
- # these 'type.token' combinations:
- # Token Type.Token
- # { {{, L{
- # [ {[, [[
- # ( {(, ((
- # } }}, R}
- # ] }], ]]
- # ) }), ))
+###################################################
+# CODE SECTION 8: Utilities for setting breakpoints
+###################################################
- # allow long lines before final { in an if statement, as in:
- # if (..........
- # ..........)
- # {
- #
- # Otherwise, the line before the { tends to be too short.
+{ ## begin closure set_forced_breakpoint
- $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
- $binary_bond_strength{'(('}{'{{'} = NOMINAL;
+ my $forced_breakpoint_count;
+ my $forced_breakpoint_undo_count;
+ my @forced_breakpoint_undo_stack;
+ my $index_max_forced_break;
- # break on something like '} (', but keep this stronger than a ','
- # example is in 'howe.pl'
- $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
- $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ # Break before or after certain tokens based on user settings
+ my %break_before_or_after_token;
- # keep matrix and hash indices together
- # but make them a little below STRONG to allow breaking open
- # something like {'some-word'}{'some-very-long-word'} at the }{
- # (bracebrk.t)
- $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
- $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ BEGIN {
- # increase strength to the point where a break in the following
- # will be after the opening paren rather than at the arrow:
- # $a->$b($c);
- $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
+ # Updated to use all operators. This fixes case b1054
+ # Here is the previous simplified version:
+ ## my @q = qw( . : ? and or xor && || );
+ my @q = @all_operators;
- $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
- $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ push @q, ',';
+ @break_before_or_after_token{@q} = (1) x scalar(@q);
+ }
- $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
- $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ sub initialize_forced_breakpoint_vars {
+ $forced_breakpoint_count = 0;
+ $index_max_forced_break = UNDEFINED_INDEX;
+ $forced_breakpoint_undo_count = 0;
+ @forced_breakpoint_undo_stack = ();
+ return;
+ }
- #---------------------------------------------------------------
- # Binary NO_BREAK rules
- #---------------------------------------------------------------
+ sub get_forced_breakpoint_count {
+ return $forced_breakpoint_count;
+ }
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
- $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
+ sub get_forced_breakpoint_undo_count {
+ return $forced_breakpoint_undo_count;
+ }
- # Never break between a bareword and a following paren because
- # perl may give an error. For example, if a break is placed
- # between 'to_filehandle' and its '(' the following line will
- # give a syntax error [Carp.pm]: my( $no) =fileno(
- # to_filehandle( $in)) ;
- $binary_bond_strength{'C'}{'(('} = NO_BREAK;
- $binary_bond_strength{'C'}{'{('} = NO_BREAK;
- $binary_bond_strength{'U'}{'(('} = NO_BREAK;
- $binary_bond_strength{'U'}{'{('} = NO_BREAK;
+ sub get_index_max_forced_break {
+ return $index_max_forced_break;
+ }
- # use strict requires that bare word within braces not start new
- # line
- $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
+ sub set_fake_breakpoint {
- $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
+ # Just bump up the breakpoint count as a signal that there are breaks.
+ # This is useful if we have breaks but may want to postpone deciding
+ # where to make them.
+ $forced_breakpoint_count++;
+ return;
+ }
- # use strict requires that bare word and => not be separated
- $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
+ use constant DEBUG_FORCE => 0;
- # use strict does not allow separating type info from trailing { }
- # testfile is readmail.pl
- $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
- $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
+ sub set_forced_breakpoint {
+ my ( $self, $i ) = @_;
- # As a defensive measure, do not break between a '(' and a
- # filehandle. In some cases, this can cause an error. For
- # example, the following program works:
- # my $msg="hi!\n";
- # print
- # ( STDOUT
- # $msg
- # );
- #
- # But this program fails:
- # my $msg="hi!\n";
- # print
- # (
- # STDOUT
- # $msg
- # );
- #
- # This is normally only a problem with the 'extrude' option
- $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
- $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
+ # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
- # never break between sub name and opening paren
- $binary_bond_strength{'w'}{'(('} = NO_BREAK;
- $binary_bond_strength{'w'}{'{('} = NO_BREAK;
+ # Exceptions:
+ # - If the token at index $i is a blank, backup to $i-1 to
+ # get to the previous nonblank token.
+ # - For certain tokens, the break may be placed BEFORE the token
+ # at index $i, depending on user break preference settings.
+ # - If a break is made after an opening token, then a break will
+ # also be made before the corresponding closing token.
- # keep '}' together with ';'
- $binary_bond_strength{'}}'}{';'} = NO_BREAK;
+ # Returns '$i_nonblank':
+ # = index of the token after which the breakpoint was actually placed
+ # = undef if breakpoint was not set.
+ my $i_nonblank;
- # Breaking before a ++ can cause perl to guess wrong. For
- # example the following line will cause a syntax error
- # with -extrude if we break between '$i' and '++' [fixstyle2]
- # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
- $nobreak_lhs{'++'} = NO_BREAK;
+ if ( !defined($i) || $i < 0 ) {
- # Do not break before a possible file handle
- $nobreak_lhs{'Z'} = NO_BREAK;
+ # Calls with bad index $i are harmless but waste time and should
+ # be caught and eliminated during code development.
+ if (DEVEL_MODE) {
+ my ( $a, $b, $c ) = caller();
+ Fault(
+"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
+ );
+ }
+ return;
+ }
- # use strict hates bare words on any new line. For
- # example, a break before the underscore here provokes the
- # wrath of use strict:
- # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
- $nobreak_rhs{'F'} = NO_BREAK;
- $nobreak_rhs{'CORE::'} = NO_BREAK;
+ # Break after token $i
+ $i_nonblank = $self->set_forced_breakpoint_AFTER($i);
- #---------------------------------------------------------------
- # Bond Strength BEGIN Section 3.
- # Define tables and values for applying a small bias to the above
- # values.
- #---------------------------------------------------------------
- # Adding a small 'bias' to strengths is a simple way to make a line
- # break at the first of a sequence of identical terms. For
- # example, to force long string of conditional operators to break
- # with each line ending in a ':', we can add a small number to the
- # bond strength of each ':' (colon.t)
- @bias_tokens = qw( : && || f and or . ); # tokens which get bias
- $delta_bias = 0.0001; # a very small strength level
- return;
+ # If we break at an opening container..break at the closing
+ my $set_closing;
+ if ( defined($i_nonblank)
+ && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
+ {
+ $set_closing = 1;
+ $self->set_closing_breakpoint($i_nonblank);
+ }
- } ## end sub initialize_bond_strength_hashes
+ DEBUG_FORCE && do {
+ my ( $a, $b, $c ) = caller();
+ my $msg =
+"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go";
+ if ( !defined($i_nonblank) ) {
+ $i = "" unless defined($i);
+ $msg .= " but could not set break after i='$i'\n";
+ }
+ else {
+ $msg .= <<EOM;
+set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]
+EOM
+ if ( defined($set_closing) ) {
+ $msg .=
+" Also set closing breakpoint corresponding to this token\n";
+ }
+ }
+ print STDOUT $msg;
+ };
- sub set_bond_strengths {
+ return $i_nonblank;
+ }
- # patch-its always ok to break at end of line
- $nobreak_to_go[$max_index_to_go] = 0;
+ sub set_forced_breakpoint_AFTER {
+ my ( $self, $i ) = @_;
- # we start a new set of bias values for each line
- my %bias;
- @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
- my $code_bias = -.01; # bias for closing block braces
+ # This routine is only called by sub set_forced_breakpoint and
+ # sub set_closing_breakpoint.
- my $type = 'b';
- my $token = ' ';
- my $last_type;
- my $last_nonblank_type = $type;
- my $last_nonblank_token = $token;
- my $list_str = $left_bond_strength{'?'};
+ # Set a breakpoint AFTER the token at index $i in the _to_go arrays.
- my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
- $next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
- );
+ # Exceptions:
+ # - If the token at index $i is a blank, backup to $i-1 to
+ # get to the previous nonblank token.
+ # - For certain tokens, the break may be placed BEFORE the token
+ # at index $i, depending on user break preference settings.
- # main loop to compute bond strengths between each pair of tokens
- foreach my $i ( 0 .. $max_index_to_go ) {
- $last_type = $type;
- if ( $type ne 'b' ) {
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- }
- $type = $types_to_go[$i];
+ # Returns:
+ # - the index of the token after which the break was set, or
+ # - undef if no break was set
- # strength on both sides of a blank is the same
- if ( $type eq 'b' && $last_type ne 'b' ) {
- $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
- next;
- }
+ return unless ( defined($i) && $i >= 0 );
- $token = $tokens_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $i_next = $i + 1;
- $next_type = $types_to_go[$i_next];
- $next_token = $tokens_to_go[$i_next];
- $total_nesting_depth = $nesting_depth_to_go[$i_next];
- $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # Back up at a blank so we have a token to examine.
+ # This was added to fix for cases like b932 involving an '=' break.
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
- # We are computing the strength of the bond between the current
- # token and the NEXT token.
+ # Never break between welded tokens
+ return
+ if ( $total_weld_count
+ && $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
- #---------------------------------------------------------------
- # Bond Strength Section 1:
- # First Approximation.
- # Use minimum of individual left and right tabulated bond
- # strengths.
- #---------------------------------------------------------------
- my $bsr = $right_bond_strength{$type};
- my $bsl = $left_bond_strength{$next_nonblank_type};
+ my $token = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
- # define right bond strengths of certain keywords
- if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
- $bsr = $right_bond_strength{$token};
- }
- elsif ( $token eq 'ne' or $token eq 'eq' ) {
- $bsr = NOMINAL;
- }
+ # For certain tokens, use user settings to decide if we break before or
+ # after it
+ if ( $break_before_or_after_token{$token}
+ && ( $type eq $token || $type eq 'k' ) )
+ {
+ if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
+ }
- # set terminal bond strength to the nominal value
- # this will cause good preceding breaks to be retained
- if ( $i_next_nonblank > $max_index_to_go ) {
- $bsl = NOMINAL;
- }
+ # breaks are forced before 'if' and 'unless'
+ elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
- # define right bond strengths of certain keywords
- if ( $next_nonblank_type eq 'k'
- && defined( $left_bond_strength{$next_nonblank_token} ) )
- {
- $bsl = $left_bond_strength{$next_nonblank_token};
- }
- elsif ($next_nonblank_token eq 'ne'
- or $next_nonblank_token eq 'eq' )
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
+
+ if ( $i_nonblank >= 0
+ && $nobreak_to_go[$i_nonblank] == 0
+ && !$forced_breakpoint_to_go[$i_nonblank] )
{
- $bsl = NOMINAL;
- }
- elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
- $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
- }
+ $forced_breakpoint_to_go[$i_nonblank] = 1;
- # Use the minimum of the left and right strengths. Note: it might
- # seem that we would want to keep a NO_BREAK if either token has
- # this value. This didn't work, for example because in an arrow
- # list, it prevents the comma from separating from the following
- # bare word (which is probably quoted by its arrow). So necessary
- # NO_BREAK's have to be handled as special cases in the final
- # section.
- if ( !defined($bsr) ) { $bsr = VERY_STRONG }
- if ( !defined($bsl) ) { $bsl = VERY_STRONG }
- my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
- my $bond_str_1 = $bond_str;
+ if ( $i_nonblank > $index_max_forced_break ) {
+ $index_max_forced_break = $i_nonblank;
+ }
+ $forced_breakpoint_count++;
+ $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
+ = $i_nonblank;
- #---------------------------------------------------------------
- # Bond Strength Section 2:
- # Apply hardwired rules..
- #---------------------------------------------------------------
+ # success
+ return $i_nonblank;
+ }
+ }
+ return;
+ }
- # Patch to put terminal or clauses on a new line: Weaken the bond
- # at an || followed by die or similar keyword to make the terminal
- # or clause fall on a new line, like this:
- #
- # my $class = shift
- # || die "Cannot add broadcast: No class identifier found";
- #
- # Otherwise the break will be at the previous '=' since the || and
- # = have the same starting strength and the or is biased, like
- # this:
- #
- # my $class =
- # shift || die "Cannot add broadcast: No class identifier found";
- #
- # In any case if the user places a break at either the = or the ||
- # it should remain there.
- if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
- if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
- if ( $want_break_before{$token} && $i > 0 ) {
- $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
- }
- else {
- $bond_str -= $delta_bias;
- }
- }
- }
+ sub clear_breakpoint_undo_stack {
+ my ($self) = @_;
+ $forced_breakpoint_undo_count = 0;
+ return;
+ }
- # good to break after end of code blocks
- if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
+ use constant DEBUG_UNDOBP => 0;
- $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
- $code_bias += $delta_bias;
- }
+ sub undo_forced_breakpoint_stack {
- if ( $type eq 'k' ) {
+ my ( $self, $i_start ) = @_;
- # allow certain control keywords to stand out
- if ( $next_nonblank_type eq 'k'
- && $is_last_next_redo_return{$token} )
- {
- $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
- }
+ # Given $i_start, a non-negative index the 'undo stack' of breakpoints,
+ # remove all breakpoints from the top of the 'undo stack' down to and
+ # including index $i_start.
- # Don't break after keyword my. This is a quick fix for a
- # rare problem with perl. An example is this line from file
- # Container.pm:
+ # The 'undo stack' is a stack of all breakpoints made for a batch of
+ # code.
- # foreach my $question( Debian::DebConf::ConfigDb::gettree(
- # $this->{'question'} ) )
+ if ( $i_start < 0 ) {
+ $i_start = 0;
+ my ( $a, $b, $c ) = caller();
- if ( $token eq 'my' ) {
- $bond_str = NO_BREAK;
- }
+ # Bad call, can only be due to a recent programming change.
+ Fault(
+"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start "
+ ) if (DEVEL_MODE);
+ return;
+ }
+
+ while ( $forced_breakpoint_undo_count > $i_start ) {
+ my $i =
+ $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
+ if ( $i >= 0 && $i <= $max_index_to_go ) {
+ $forced_breakpoint_to_go[$i] = 0;
+ $forced_breakpoint_count--;
+ DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
+ };
}
- # good to break before 'if', 'unless', etc
- if ( $is_if_brace_follower{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK;
+ # shouldn't happen, but not a critical error
+ else {
+ DEBUG_UNDOBP && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
+ };
}
+ }
+ return;
+ }
+} ## end closure set_forced_breakpoint
- if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
+{ ## begin closure set_closing_breakpoint
- # FIXME: needs more testing
- if ( $is_keyword_returning_list{$next_nonblank_token} ) {
- $bond_str = $list_str if ( $bond_str > $list_str );
- }
+ my %postponed_breakpoint;
- # keywords like 'unless', 'if', etc, within statements
- # make good breaks
- if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
- $bond_str = VERY_WEAK / 1.05;
- }
- }
+ sub initialize_postponed_breakpoint {
+ %postponed_breakpoint = ();
+ return;
+ }
- # try not to break before a comma-arrow
- elsif ( $next_nonblank_type eq '=>' ) {
- if ( $bond_str < STRONG ) { $bond_str = STRONG }
- }
+ sub has_postponed_breakpoint {
+ my ($seqno) = @_;
+ return $postponed_breakpoint{$seqno};
+ }
- #---------------------------------------------------------------
- # Additional hardwired NOBREAK rules
- #---------------------------------------------------------------
+ sub set_closing_breakpoint {
- # map1.t -- correct for a quirk in perl
- if ( $token eq '('
- && $next_nonblank_type eq 'i'
- && $last_nonblank_type eq 'k'
- && $is_sort_map_grep{$last_nonblank_token} )
+ # set a breakpoint at a matching closing token
+ my ( $self, $i_break ) = @_;
- # /^(sort|map|grep)$/ )
- {
- $bond_str = NO_BREAK;
- }
+ if ( $mate_index_to_go[$i_break] >= 0 ) {
- # extrude.t: do not break before paren at:
- # -l pid_filename(
- if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
- $bond_str = NO_BREAK;
- }
+ # Don't reduce the '2' in the statement below.
+ # Test files: attrib.t, BasicLyx.pm.html
+ if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
- # in older version of perl, use strict can cause problems with
- # breaks before bare words following opening parens. For example,
- # this will fail under older versions if a break is made between
- # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
- # command"); close MAIL;
- if ( $type eq '{' ) {
+ # break before } ] and ), but sub set_forced_breakpoint will decide
+ # to break before or after a ? and :
+ my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
+ $self->set_forced_breakpoint_AFTER(
+ $mate_index_to_go[$i_break] - $inc );
+ }
+ }
+ else {
+ my $type_sequence = $type_sequence_to_go[$i_break];
+ if ($type_sequence) {
+ my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
+ $postponed_breakpoint{$type_sequence} = 1;
+ }
+ }
+ return;
+ }
+} ## end closure set_closing_breakpoint
- if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
+#########################################
+# CODE SECTION 9: Process batches of code
+#########################################
- # but it's fine to break if the word is followed by a '=>'
- # or if it is obviously a sub call
- my $i_next_next_nonblank = $i_next_nonblank + 1;
- my $next_next_type = $types_to_go[$i_next_next_nonblank];
- if ( $next_next_type eq 'b'
- && $i_next_nonblank < $max_index_to_go )
- {
- $i_next_next_nonblank++;
- $next_next_type = $types_to_go[$i_next_next_nonblank];
- }
+{ ## begin closure grind_batch_of_CODE
- # We'll check for an old breakpoint and keep a leading
- # bareword if it was that way in the input file.
- # Presumably it was ok that way. For example, the
- # following would remain unchanged:
- #
- # @months = (
- # January, February, March, April,
- # May, June, July, August,
- # September, October, November, December,
- # );
- #
- # This should be sufficient:
- if (
- !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ','
- || $next_next_type eq '}' )
- )
- {
- $bond_str = NO_BREAK;
- }
- }
- }
+ # The routines in this closure begin the processing of a 'batch' of code.
- # Do not break between a possible filehandle and a ? or / and do
- # not introduce a break after it if there is no blank
- # (extrude.t)
- elsif ( $type eq 'Z' ) {
+ # A variable to keep track of consecutive nonblank lines so that we can
+ # insert occasional blanks
+ my @nonblank_lines_at_depth;
- # don't break..
- if (
+ # A variable to remember maximum size of previous batches; this is needed
+ # by the logical padding routine
+ my $peak_batch_size;
+ my $batch_count;
- # if there is no blank and we do not want one. Examples:
- # print $x++ # do not break after $x
- # print HTML"HELLO" # break ok after HTML
- (
- $next_type ne 'b'
- && defined( $want_left_space{$next_type} )
- && $want_left_space{$next_type} == WS_NO
- )
+ # variables to keep track of unbalanced containers.
+ my %saved_opening_indentation;
+ my @unmatched_opening_indexes_in_this_batch;
- # or we might be followed by the start of a quote
- || $next_nonblank_type =~ /^[\/\?]$/
- )
- {
- $bond_str = NO_BREAK;
- }
- }
+ sub initialize_grind_batch_of_CODE {
+ @nonblank_lines_at_depth = ();
+ $peak_batch_size = 0;
+ $batch_count = 0;
+ %saved_opening_indentation = ();
+ return;
+ }
- # Breaking before a ? before a quote can cause trouble if
- # they are not separated by a blank.
- # Example: a syntax error occurs if you break before the ? here
- # my$logic=join$all?' && ':' || ',@regexps;
- # From: Professional_Perl_Programming_Code/multifind.pl
- if ( $next_nonblank_type eq '?' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
- }
+ # sub grind_batch_of_CODE receives sections of code which are the longest
+ # possible lines without a break. In other words, it receives what is left
+ # after applying all breaks forced by blank lines, block comments, side
+ # comments, pod text, and structural braces. Its job is to break this code
+ # down into smaller pieces, if necessary, which fit within the maximum
+ # allowed line length. Then it sends the resulting lines of code on down
+ # the pipeline to the VerticalAligner package, breaking the code into
+ # continuation lines as necessary. The batch of tokens are in the "to_go"
+ # arrays. The name 'grind' is slightly suggestive of a machine continually
+ # breaking down long lines of code, but mainly it is unique and easy to
+ # remember and find with an editor search.
- # Breaking before a . followed by a number
- # can cause trouble if there is no intervening space
- # Example: a syntax error occurs if you break before the .2 here
- # $str .= pack($endian.2, ensurrogate($ord));
- # From: perl58/Unicode.pm
- elsif ( $next_nonblank_type eq '.' ) {
- $bond_str = NO_BREAK
- if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
- }
+ # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
+ # together in the following way:
- my $bond_str_2 = $bond_str;
+ # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
+ # combines them into the largest sequences of tokens which might form a new
+ # line.
+ # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
+ # lines.
- #---------------------------------------------------------------
- # End of hardwired rules
- #---------------------------------------------------------------
+ # So sub 'process_line_of_CODE' builds up the longest possible continouus
+ # sequences of tokens, regardless of line length, and then
+ # grind_batch_of_CODE breaks these sequences back down into the new output
+ # lines.
- #---------------------------------------------------------------
- # Bond Strength Section 3:
- # Apply table rules. These have priority over the above
- # hardwired rules.
- #---------------------------------------------------------------
+ # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
- my $tabulated_bond_str;
- my $ltype = $type;
- my $rtype = $next_nonblank_type;
- if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
- if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
- $rtype = $next_nonblank_type . $next_nonblank_token;
- }
+ use constant DEBUG_GRIND => 0;
- if ( $binary_bond_strength{$ltype}{$rtype} ) {
- $bond_str = $binary_bond_strength{$ltype}{$rtype};
- $tabulated_bond_str = $bond_str;
- }
+ sub check_grind_input {
- if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
- $bond_str = NO_BREAK;
- $tabulated_bond_str = $bond_str;
- }
- my $bond_str_3 = $bond_str;
+ # Check for valid input to sub grind_batch_of_CODE. An error here
+ # would most likely be due to an error in 'sub store_token_to_go'.
+ my ($self) = @_;
- # If the hardwired rules conflict with the tabulated bond
- # strength then there is an inconsistency that should be fixed
- FORMATTER_DEBUG_FLAG_BOND_TABLES
- && $tabulated_bond_str
- && $bond_str_1
- && $bond_str_1 != $bond_str_2
- && $bond_str_2 != $tabulated_bond_str
- && do {
- print STDERR
-"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
- };
+ # Be sure there are tokens in the batch
+ if ( $max_index_to_go < 0 ) {
+ Fault(<<EOM);
+sub grind incorrectly called with max_index_to_go=$max_index_to_go
+EOM
+ }
+ my $Klimit = $self->[_Klimit_];
- #-----------------------------------------------------------------
- # Bond Strength Section 4:
- # Modify strengths of certain tokens which often occur in sequence
- # by adding a small bias to each one in turn so that the breaks
- # occur from left to right.
- #
- # Note that we only changing strengths by small amounts here,
- # and usually increasing, so we should not be altering any NO_BREAKs.
- # Other routines which check for NO_BREAKs will use a tolerance
- # of one to avoid any problem.
- #-----------------------------------------------------------------
+ # The local batch tokens must be a continous part of the global token
+ # array.
+ my $KK;
+ foreach my $ii ( 0 .. $max_index_to_go ) {
- # The bias tables use special keys
- my $left_key = bias_table_key( $type, $token );
- my $right_key =
- bias_table_key( $next_nonblank_type, $next_nonblank_token );
+ my $Km = $KK;
- # add any bias set by sub scan_list at old comma break points.
- if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
+ $KK = $K_to_go[$ii];
+ if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
+ $KK = '(undef)' unless defined($KK);
+ Fault(<<EOM);
+at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
+EOM
+ }
- # bias left token
- elsif ( defined( $bias{$left_key} ) ) {
- if ( !$want_break_before{$left_key} ) {
- $bias{$left_key} += $delta_bias;
- $bond_str += $bias{$left_key};
- }
+ if ( $ii > 0 && $KK != $Km + 1 ) {
+ my $im = $ii - 1;
+ Fault(<<EOM);
+Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
+EOM
}
+ }
+ return;
+ }
- # bias right token
- if ( defined( $bias{$right_key} ) ) {
- if ( $want_break_before{$right_key} ) {
+ sub grind_batch_of_CODE {
- # for leading '.' align all but 'short' quotes; the idea
- # is to not place something like "\n" on a single line.
- if ( $right_key eq '.' ) {
- unless (
- $last_nonblank_type eq '.'
- && (
- length($token) <=
- $rOpts_short_concatenation_item_length )
- && ( !$is_closing_token{$token} )
- )
- {
- $bias{$right_key} += $delta_bias;
- }
- }
- else {
- $bias{$right_key} += $delta_bias;
- }
- $bond_str += $bias{$right_key};
- }
- }
- my $bond_str_4 = $bond_str;
+ my ($self) = @_;
- #---------------------------------------------------------------
- # Bond Strength Section 5:
- # Fifth Approximation.
- # Take nesting depth into account by adding the nesting depth
- # to the bond strength.
- #---------------------------------------------------------------
- my $strength;
+ my $this_batch = $self->[_this_batch_];
+ $batch_count++;
- if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
- if ( $total_nesting_depth > 0 ) {
- $strength = $bond_str + $total_nesting_depth;
- }
- else {
- $strength = $bond_str;
- }
+ $self->check_grind_input() if (DEVEL_MODE);
+
+ # This routine is only called from sub flush_batch_of_code, so that
+ # routine is a better spot for debugging.
+ DEBUG_GRIND && do {
+ my $token = my $type = "";
+ if ( $max_index_to_go >= 0 ) {
+ $token = $tokens_to_go[$max_index_to_go];
+ $type = $types_to_go[$max_index_to_go];
+ }
+ my $output_str = "";
+ if ( $max_index_to_go > 20 ) {
+ my $mm = $max_index_to_go - 10;
+ $output_str = join( "", @tokens_to_go[ 0 .. 10 ] ) . " ... "
+ . join( "", @tokens_to_go[ $mm .. $max_index_to_go ] );
}
else {
- $strength = NO_BREAK;
+ $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
}
+ print STDERR <<EOM;
+grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
+$output_str
+EOM
+ };
- #---------------------------------------------------------------
- # Bond Strength Section 6:
- # Sixth Approximation. Welds.
- #---------------------------------------------------------------
-
- # Do not allow a break within welds,
- if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
+ return if ( $max_index_to_go < 0 );
- # But encourage breaking after opening welded tokens
- elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
- $strength -= 1;
- }
+ $self->set_lp_indentation()
+ if ($rOpts_line_up_parentheses);
- # always break after side comment
- if ( $type eq '#' ) { $strength = 0 }
+ #----------------------------
+ # Shortcut for block comments
+ #----------------------------
+ if (
+ $max_index_to_go == 0
+ && $types_to_go[0] eq '#'
- $bond_strength_to_go[$i] = $strength;
+ # this shortcut does not work for -lp yet
+ && !$rOpts_line_up_parentheses
+ )
+ {
+ my $ibeg = 0;
+ $this_batch->[_ri_first_] = [$ibeg];
+ $this_batch->[_ri_last_] = [$ibeg];
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_do_not_pad_] = 0;
+ $this_batch->[_batch_count_] = $batch_count;
+ $this_batch->[_rix_seqno_controlling_ci_] = [];
+
+ $self->convey_batch_to_vertical_aligner();
+
+ my $level = $levels_to_go[$ibeg];
+ $self->[_last_last_line_leading_level_] =
+ $self->[_last_line_leading_level_];
+ $self->[_last_line_leading_type_] = $types_to_go[$ibeg];
+ $self->[_last_line_leading_level_] = $level;
+ $nonblank_lines_at_depth[$level] = 1;
+ return;
+ }
- FORMATTER_DEBUG_FLAG_BOND && do {
- my $str = substr( $token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- print STDOUT
-"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
- };
- } ## end main loop
- return;
- } ## end sub set_bond_strengths
-}
+ #-------------
+ # Normal route
+ #-------------
-sub pad_array_to_go {
+ my $rLL = $self->[_rLL_];
+ my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
+ my $rwant_container_open = $self->[_rwant_container_open_];
- # to simplify coding in scan_list and set_bond_strengths, it helps
- # to create some extra blank tokens at the end of the arrays
- $tokens_to_go[ $max_index_to_go + 1 ] = '';
- $tokens_to_go[ $max_index_to_go + 2 ] = '';
- $types_to_go[ $max_index_to_go + 1 ] = 'b';
- $types_to_go[ $max_index_to_go + 2 ] = 'b';
- $nesting_depth_to_go[ $max_index_to_go + 1 ] =
- $nesting_depth_to_go[$max_index_to_go];
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
- # /^[R\}\)\]]$/
- if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
- if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
+ #-------------------------------------------------------
+ # Loop over the batch to initialize some batch variables
+ #-------------------------------------------------------
+ my $comma_count_in_batch = 0;
+ my $ilast_nonblank = -1;
+ my @colon_list;
+ my @ix_seqno_controlling_ci;
+ my %comma_arrow_count = ();
+ my $comma_arrow_count_contained = 0;
+ my @unmatched_closing_indexes_in_this_batch;
- # shouldn't happen:
- unless ( get_saw_brace_error() ) {
- warning(
-"Program bug in scan_list: hit nesting error which should have been caught\n"
- );
- report_definite_bug();
- }
- }
- else {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
- }
- }
+ @unmatched_opening_indexes_in_this_batch = ();
- # /^[L\{\(\[]$/
- elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
- $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
- }
- return;
-}
+ for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
+ $bond_strength_to_go[$i] = 0;
+ $iprev_to_go[$i] = $ilast_nonblank;
+ $inext_to_go[$i] = $i + 1;
-{ # begin scan_list
+ my $type = $types_to_go[$i];
+ if ( $type ne 'b' ) {
+ if ( $ilast_nonblank >= 0 ) {
+ $inext_to_go[$ilast_nonblank] = $i;
- my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_nonblank_block_type, $last_old_breakpoint_count,
- $minimum_depth, $next_nonblank_block_type,
- $next_nonblank_token, $next_nonblank_type,
- $old_breakpoint_count, $starting_breakpoint_count,
- $starting_depth, $token,
- $type, $type_sequence,
- );
+ # just in case there are two blanks in a row (shouldn't
+ # happen)
+ if ( ++$ilast_nonblank < $i ) {
+ $inext_to_go[$ilast_nonblank] = $i;
+ }
+ }
+ $ilast_nonblank = $i;
- my (
- @breakpoint_stack, @breakpoint_undo_stack,
- @comma_index, @container_type,
- @identifier_count_stack, @index_before_arrow,
- @interrupted_list, @item_count_stack,
- @last_comma_index, @last_dot_index,
- @last_nonblank_type, @old_breakpoint_count_stack,
- @opening_structure_index_stack, @rfor_semicolon_list,
- @has_old_logical_breakpoints, @rand_or_list,
- @i_equals,
- );
+ # This is a good spot to efficiently collect information needed
+ # for breaking lines...
- # routine to define essential variables when we go 'up' to
- # a new depth
- sub check_for_new_minimum_depth {
- my $depth = shift;
- if ( $depth < $minimum_depth ) {
+ # gather info needed by sub break_long_lines
+ if ( $type_sequence_to_go[$i] ) {
+ my $seqno = $type_sequence_to_go[$i];
+ my $token = $tokens_to_go[$i];
- $minimum_depth = $depth;
+ # remember indexes of any tokens controlling xci
+ # in this batch. This list is needed by sub undo_ci.
+ if ( $ris_seqno_controlling_ci->{$seqno} ) {
+ push @ix_seqno_controlling_ci, $i;
+ }
- # these arrays need not retain values between calls
- $breakpoint_stack[$depth] = $starting_breakpoint_count;
- $container_type[$depth] = "";
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 1;
- $item_count_stack[$depth] = 0;
- $last_nonblank_type[$depth] = "";
- $opening_structure_index_stack[$depth] = -1;
+ if ( $is_opening_sequence_token{$token} ) {
+ if ( $rwant_container_open->{$seqno} ) {
+ $self->set_forced_breakpoint($i);
+ }
+ push @unmatched_opening_indexes_in_this_batch, $i;
+ if ( $type eq '?' ) {
+ push @colon_list, $type;
+ }
+ }
+ elsif ( $is_closing_sequence_token{$token} ) {
- $breakpoint_undo_stack[$depth] = undef;
- $comma_index[$depth] = undef;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $old_breakpoint_count_stack[$depth] = undef;
- $has_old_logical_breakpoints[$depth] = 0;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
+ if ( $i > 0 && $rwant_container_open->{$seqno} ) {
+ $self->set_forced_breakpoint( $i - 1 );
+ }
- # these arrays must retain values between calls
- if ( !defined( $has_broken_sublist[$depth] ) ) {
- $dont_align[$depth] = 0;
- $has_broken_sublist[$depth] = 0;
- $want_comma_break[$depth] = 0;
- }
- }
- return;
- }
+ my $i_mate =
+ pop @unmatched_opening_indexes_in_this_batch;
+ if ( defined($i_mate) && $i_mate >= 0 ) {
+ if ( $type_sequence_to_go[$i_mate] ==
+ $type_sequence_to_go[$i] )
+ {
+ $mate_index_to_go[$i] = $i_mate;
+ $mate_index_to_go[$i_mate] = $i;
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $comma_arrow_count{$seqno} ) {
+ $comma_arrow_count_contained +=
+ $comma_arrow_count{$seqno};
+ }
+ }
+ else {
+ push @unmatched_opening_indexes_in_this_batch,
+ $i_mate;
+ push @unmatched_closing_indexes_in_this_batch,
+ $i;
+ }
+ }
+ else {
+ push @unmatched_closing_indexes_in_this_batch, $i;
+ }
+ if ( $type eq ':' ) {
+ push @colon_list, $type;
+ }
+ } ## end elsif ( $is_closing_sequence_token...)
- # routine to decide which commas to break at within a container;
- # returns:
- # $bp_count = number of comma breakpoints set
- # $do_not_break_apart = a flag indicating if container need not
- # be broken open
- sub set_comma_breakpoints {
+ } ## end if ($seqno)
- my $dd = shift;
- my $bp_count = 0;
- my $do_not_break_apart = 0;
+ elsif ( $type eq ',' ) { $comma_count_in_batch++; }
+ elsif ( $tokens_to_go[$i] eq '=>' ) {
+ if (@unmatched_opening_indexes_in_this_batch) {
+ my $j = $unmatched_opening_indexes_in_this_batch[-1];
+ my $seqno = $type_sequence_to_go[$j];
+ $comma_arrow_count{$seqno}++;
+ }
+ }
+ } ## end if ( $type ne 'b' )
+ } ## end for ( my $i = 0 ; $i <=...)
- # anything to do?
- if ( $item_count_stack[$dd] ) {
+ my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
+ @unmatched_closing_indexes_in_this_batch;
- # handle commas not in containers...
- if ( $dont_align[$dd] ) {
- do_uncontained_comma_breaks($dd);
- }
+ #------------------------
+ # Set special breakpoints
+ #------------------------
+ # If this line ends in a code block brace, set breaks at any
+ # previous closing code block braces to breakup a chain of code
+ # blocks on one line. This is very rare but can happen for
+ # user-defined subs. For example we might be looking at this:
+ # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+ my $saw_good_break = 0; # flag to force breaks even if short line
+ if (
- # handle commas within containers...
- else {
- my $fbc = $forced_breakpoint_count;
+ # looking for opening or closing block brace
+ $block_type_to_go[$max_index_to_go]
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+ # never any good breaks if just one token
+ && $max_index_to_go > 0
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
+ # but not one of these which are never duplicated on a line:
+ # until|while|for|if|elsif|else
+ && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
+ }
+ )
+ {
+ my $lev = $nesting_depth_to_go[$max_index_to_go];
+
+ # Walk backwards from the end and
+ # set break at any closing block braces at the same level.
+ # But quit if we are not in a chain of blocks.
+ for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+ last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
+ next if ( $levels_to_go[$i] > $lev ); # skip past higher level
+
+ if ( $block_type_to_go[$i] ) {
+ if ( $tokens_to_go[$i] eq '}' ) {
+ $self->set_forced_breakpoint($i);
+ $saw_good_break = 1;
+ }
+ }
+
+ # quit if we see anything besides words, function, blanks
+ # at this level
+ elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
}
- return ( $bp_count, $do_not_break_apart );
- }
- sub do_uncontained_comma_breaks {
+ #-----------------------------------------------
+ # insertion of any blank lines before this batch
+ #-----------------------------------------------
- # Handle commas not in containers...
- # This is a catch-all routine for commas that we
- # don't know what to do with because the don't fall
- # within containers. We will bias the bond strength
- # to break at commas which ended lines in the input
- # file. This usually works better than just trying
- # to put as many items on a line as possible. A
- # downside is that if the input file is garbage it
- # won't work very well. However, the user can always
- # prevent following the old breakpoints with the
- # -iob flag.
- my $dd = shift;
- my $bias = -.01;
- my $old_comma_break_count = 0;
- foreach my $ii ( @{ $comma_index[$dd] } ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $old_comma_break_count++;
- $bond_strength_to_go[$ii] = $bias;
+ my $imin = 0;
+ my $imax = $max_index_to_go;
- # reduce bias magnitude to force breaks in order
- $bias *= 0.99;
+ # trim any blank tokens
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
+
+ if ( $imin > $imax ) {
+ if (DEVEL_MODE) {
+ my $K0 = $K_to_go[0];
+ my $lno = "";
+ if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
+ Fault(<<EOM);
+Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
+EOM
}
+ return;
}
- # Also put a break before the first comma if
- # (1) there was a break there in the input, and
- # (2) there was exactly one old break before the first comma break
- # (3) OLD: there are multiple old comma breaks
- # (3) NEW: there are one or more old comma breaks (see return example)
- #
- # For example, we will follow the user and break after
- # 'print' in this snippet:
- # print
- # "conformability (Not the same dimension)\n",
- # "\t", $have, " is ", text_unit($hu), "\n",
- # "\t", $want, " is ", text_unit($wu), "\n",
- # ;
- #
- # Another example, just one comma, where we will break after
- # the return:
- # return
- # $x * cos($a) - $y * sin($a),
- # $x * sin($a) + $y * cos($a);
+ my $last_line_leading_type = $self->[_last_line_leading_type_];
+ my $last_line_leading_level = $self->[_last_line_leading_level_];
+ my $last_last_line_leading_level =
+ $self->[_last_last_line_leading_level_];
- # Breaking a print statement:
- # print SAVEOUT
- # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
- # ( $? & 128 ) ? " -- core dumped" : "", "\n";
- #
- # But we will not force a break after the opening paren here
- # (causes a blinker):
- # $heap->{stream}->set_output_filter(
- # poe::filter::reference->new('myotherfreezer') ),
- # ;
- #
- my $i_first_comma = $comma_index[$dd]->[0];
- if ( $old_breakpoint_to_go[$i_first_comma] ) {
- my $level_comma = $levels_to_go[$i_first_comma];
- my $ibreak = -1;
- my $obp_count = 0;
- for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
- if ( $old_breakpoint_to_go[$ii] ) {
- $obp_count++;
- last if ( $obp_count > 1 );
- $ibreak = $ii
- if ( $levels_to_go[$ii] == $level_comma );
- }
- }
+ # add a blank line before certain key types but not after a comment
+ if ( $last_line_leading_type ne '#' ) {
+ my $want_blank = 0;
+ my $leading_token = $tokens_to_go[$imin];
+ my $leading_type = $types_to_go[$imin];
- # Changed rule from multiple old commas to just one here:
- if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
- {
- # Do not to break before an opening token because
- # it can lead to "blinkers".
- my $ibreakm = $ibreak;
- $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
- if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
+ # blank lines before subs except declarations and one-liners
+ if ( $leading_type eq 'i' ) {
+ if (
+
+ # quick check
+ (
+ substr( $leading_token, 0, 3 ) eq 'sub'
+ || $rOpts_sub_alias_list
+ )
+
+ # slow check
+ && $leading_token =~ /$SUB_PATTERN/
+ )
{
- set_forced_breakpoint($ibreak);
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ );
}
- }
- }
- return;
- }
- my %is_logical_container;
+ # break before all package declarations
+ elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) {
+ $want_blank = $rOpts->{'blank-lines-before-packages'};
+ }
+ }
- BEGIN {
- my @q = qw# if elsif unless while and or err not && | || ? : ! #;
- @is_logical_container{@q} = (1) x scalar(@q);
- }
+ # break before certain key blocks except one-liners
+ if ( $leading_type eq 'k' ) {
+ if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if ( terminal_type_i( $imin, $imax ) ne '}' );
+ }
- sub set_for_semicolon_breakpoints {
- my $dd = shift;
- foreach ( @{ $rfor_semicolon_list[$dd] } ) {
- set_forced_breakpoint($_);
- }
- return;
- }
+ # Break before certain block types if we haven't had a
+ # break at this level for a while. This is the
+ # difficult decision..
+ elsif ($last_line_leading_type ne 'b'
+ && $is_if_unless_while_until_for_foreach{$leading_token} )
+ {
+ my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
+ if ( !defined($lc) ) { $lc = 0 }
- sub set_logical_breakpoints {
- my $dd = shift;
- if (
- $item_count_stack[$dd] == 0
- && $is_logical_container{ $container_type[$dd] }
+ # patch for RT #128216: no blank line inserted at a level
+ # change
+ if ( $levels_to_go[$imin] != $last_line_leading_level ) {
+ $lc = 0;
+ }
- || $has_old_logical_breakpoints[$dd]
- )
- {
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
+ && $lc >= $rOpts->{'long-block-line-count'}
+ && $self->consecutive_nonblank_lines() >=
+ $rOpts->{'long-block-line-count'}
+ && terminal_type_i( $imin, $imax ) ne '}';
+ }
+ }
- # Look for breaks in this order:
- # 0 1 2 3
- # or and || &&
- foreach my $i ( 0 .. 3 ) {
- if ( $rand_or_list[$dd][$i] ) {
- foreach ( @{ $rand_or_list[$dd][$i] } ) {
- set_forced_breakpoint($_);
- }
-
- # break at any 'if' and 'unless' too
- foreach ( @{ $rand_or_list[$dd][4] } ) {
- set_forced_breakpoint($_);
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
}
- $rand_or_list[$dd] = [];
- last;
}
}
+
+ if ($want_blank) {
+
+ # future: send blank line down normal path to VerticalAligner
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->require_blank_code_lines($want_blank);
+ }
}
- return;
- }
- sub is_unbreakable_container {
+ # update blank line variables and count number of consecutive
+ # non-blank, non-comment lines at this level
+ $last_last_line_leading_level = $last_line_leading_level;
+ $last_line_leading_level = $levels_to_go[$imin];
+ if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
+ $last_line_leading_type = $types_to_go[$imin];
+ if ( $last_line_leading_level == $last_last_line_leading_level
+ && $last_line_leading_type ne 'b'
+ && $last_line_leading_type ne '#'
+ && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
+ {
+ $nonblank_lines_at_depth[$last_line_leading_level]++;
+ }
+ else {
+ $nonblank_lines_at_depth[$last_line_leading_level] = 1;
+ }
- # never break a container of one of these types
- # because bad things can happen (map1.t)
- my $dd = shift;
- return $is_sort_map_grep{ $container_type[$dd] };
- }
+ $self->[_last_line_leading_type_] = $last_line_leading_type;
+ $self->[_last_line_leading_level_] = $last_line_leading_level;
+ $self->[_last_last_line_leading_level_] = $last_last_line_leading_level;
- sub scan_list {
+ #--------------------------
+ # scan lists and long lines
+ #--------------------------
- # This routine is responsible for setting line breaks for all lists,
- # so that hierarchical structure can be displayed and so that list
- # items can be vertically aligned. The output of this routine is
- # stored in the array @forced_breakpoint_to_go, which is used to set
- # final breakpoints.
+ # Flag to remember if we called sub 'pad_array_to_go'.
+ # Some routines (break_lists(), break_long_lines() ) need some
+ # extra tokens added at the end of the batch. Most batches do not
+ # use these routines, so we will avoid calling 'pad_array_to_go'
+ # unless it is needed.
+ my $called_pad_array_to_go;
- $starting_depth = $nesting_depth_to_go[0];
+ # set all forced breakpoints for good list formatting
+ my $is_long_line = $max_index_to_go > 0
+ && $self->excess_line_length( $imin, $max_index_to_go ) > 0;
- $block_type = ' ';
- $current_depth = $starting_depth;
- $i = -1;
- $last_colon_sequence_number = -1;
- $last_nonblank_token = ';';
- $last_nonblank_type = ';';
- $last_nonblank_block_type = ' ';
- $last_old_breakpoint_count = 0;
- $minimum_depth = $current_depth + 1; # forces update in check below
- $old_breakpoint_count = 0;
- $starting_breakpoint_count = $forced_breakpoint_count;
- $token = ';';
- $type = ';';
- $type_sequence = '';
+ my $old_line_count_in_batch = 1;
+ if ( $max_index_to_go > 0 ) {
+ my $Kbeg = $K_to_go[0];
+ my $Kend = $K_to_go[$max_index_to_go];
+ $old_line_count_in_batch +=
+ $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
+ }
- my $total_depth_variation = 0;
- my $i_old_assignment_break;
- my $depth_last = $starting_depth;
+ if (
+ $is_long_line
+ || $old_line_count_in_batch > 1
- check_for_new_minimum_depth($current_depth);
+ # must always call break_lists() with unbalanced batches because
+ # it is maintaining some stacks
+ || $is_unbalanced_batch
- my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
- my $want_previous_breakpoint = -1;
+ # call break_lists if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ && $rOpts_maximum_fields_per_table <= $comma_count_in_batch
+ || $rOpts_comma_arrow_breakpoints == 0 )
+ )
- my $saw_good_breakpoint;
- my $i_line_end = -1;
- my $i_line_start = -1;
+ # call break_lists if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
+ )
+ {
+ # add a couple of extra terminal blank tokens
+ $self->pad_array_to_go();
+ $called_pad_array_to_go = 1;
- # loop over all tokens in this batch
- while ( ++$i <= $max_index_to_go ) {
- if ( $type ne 'b' ) {
- $i_last_nonblank_token = $i - 1;
- $last_nonblank_type = $type;
- $last_nonblank_token = $token;
- $last_nonblank_block_type = $block_type;
- } ## end if ( $type ne 'b' )
- $type = $types_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $token = $tokens_to_go[$i];
- $type_sequence = $type_sequence_to_go[$i];
- my $next_type = $types_to_go[ $i + 1 ];
- my $next_token = $tokens_to_go[ $i + 1 ];
- my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+ my $sgb = $self->break_lists($is_long_line);
+ $saw_good_break ||= $sgb;
+ }
- # set break if flag was set
- if ( $want_previous_breakpoint >= 0 ) {
- set_forced_breakpoint($want_previous_breakpoint);
- $want_previous_breakpoint = -1;
- }
+ # let $ri_first and $ri_last be references to lists of
+ # first and last tokens of line fragments to output..
+ my ( $ri_first, $ri_last );
- $last_old_breakpoint_count = $old_breakpoint_count;
- if ( $old_breakpoint_to_go[$i] ) {
- $i_line_end = $i;
- $i_line_start = $i_next_nonblank;
+ #-------------------------
+ # write a single line if..
+ #-------------------------
+ if (
- $old_breakpoint_count++;
+ # we aren't allowed to add any newlines
+ !$rOpts_add_newlines
- # Break before certain keywords if user broke there and
- # this is a 'safe' break point. The idea is to retain
- # any preferred breaks for sequential list operations,
- # like a schwartzian transform.
- if ($rOpts_break_at_old_keyword_breakpoints) {
- if (
- $next_nonblank_type eq 'k'
- && $is_keyword_returning_list{$next_nonblank_token}
- && ( $type =~ /^[=\)\]\}Riw]$/
- || $type eq 'k'
- && $is_keyword_returning_list{$token} )
- )
- {
+ # or,
+ || (
- # we actually have to set this break next time through
- # the loop because if we are at a closing token (such
- # as '}') which forms a one-line block, this break might
- # get undone.
- $want_previous_breakpoint = $i;
- } ## end if ( $next_nonblank_type...)
- } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+ # this line is 'short'
+ !$is_long_line
- # Break before attributes if user broke there
- if ($rOpts_break_at_old_attribute_breakpoints) {
- if ( $next_nonblank_type eq 'A' ) {
- $want_previous_breakpoint = $i;
- }
- }
+ # and we didn't see a good breakpoint
+ && !$saw_good_break
- # remember an = break as possible good break point
- if ( $is_assignment{$type} ) {
- $i_old_assignment_break = $i;
- }
- elsif ( $is_assignment{$next_nonblank_type} ) {
- $i_old_assignment_break = $i_next_nonblank;
- }
- } ## end if ( $old_breakpoint_to_go...)
+ # and we don't already have an interior breakpoint
+ && !get_forced_breakpoint_count()
+ )
+ )
+ {
+ @{$ri_first} = ($imin);
+ @{$ri_last} = ($imax);
+ }
- next if ( $type eq 'b' );
- $depth = $nesting_depth_to_go[ $i + 1 ];
+ #-----------------------------
+ # otherwise use multiple lines
+ #-----------------------------
+ else {
- $total_depth_variation += abs( $depth - $depth_last );
- $depth_last = $depth;
+ # add a couple of extra terminal blank tokens if we haven't
+ # already done so
+ $self->pad_array_to_go() unless ($called_pad_array_to_go);
- # safety check - be sure we always break after a comment
- # Shouldn't happen .. an error here probably means that the
- # nobreak flag did not get turned off correctly during
- # formatting.
- if ( $type eq '#' ) {
- if ( $i != $max_index_to_go ) {
- warning(
-"Non-fatal program bug: backup logic needed to break after a comment\n"
- );
- report_definite_bug();
- $nobreak_to_go[$i] = 0;
- set_forced_breakpoint($i);
- } ## end if ( $i != $max_index_to_go)
- } ## end if ( $type eq '#' )
+ ( $ri_first, $ri_last ) =
+ $self->break_long_lines( $saw_good_break, \@colon_list );
- # Force breakpoints at certain tokens in long lines.
- # Note that such breakpoints will be undone later if these tokens
- # are fully contained within parens on a line.
- if (
+ $self->break_all_chain_tokens( $ri_first, $ri_last );
- # break before a keyword within a line
- $type eq 'k'
- && $i > 0
+ $self->break_equals( $ri_first, $ri_last );
- # if one of these keywords:
- && $token =~ /^(if|unless|while|until|for)$/
+ # now we do a correction step to clean this up a bit
+ # (The only time we would not do this is for debugging)
+ $self->recombine_breakpoints( $ri_first, $ri_last )
+ if ( $rOpts_recombine && @{$ri_first} > 1 );
- # but do not break at something like '1 while'
- && ( $last_nonblank_type ne 'n' || $i > 2 )
+ $self->insert_final_ternary_breaks( $ri_first, $ri_last )
+ if (@colon_list);
+ }
- # and let keywords follow a closing 'do' brace
- && $last_nonblank_block_type ne 'do'
+ $self->insert_breaks_before_list_opening_containers( $ri_first,
+ $ri_last )
+ if ( %break_before_container_types && $max_index_to_go > 0 );
- && (
- $is_long_line
+ #-------------------
+ # -lp corrector step
+ #-------------------
+ my $do_not_pad = 0;
+ if ($rOpts_line_up_parentheses) {
+ $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last );
+ }
- # or container is broken (by side-comment, etc)
- || ( $next_nonblank_token eq '('
- && $mate_index_to_go[$i_next_nonblank] < $i )
- )
- )
- {
- set_forced_breakpoint( $i - 1 );
- } ## end if ( $type eq 'k' && $i...)
+ #--------------------------
+ # unmask phantom semicolons
+ #--------------------------
+ if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
+ my $i = $imax;
+ my $tok = ';';
+ my $tok_len = 1;
+ if ( $want_left_space{';'} != WS_NO ) {
+ $tok = ' ;';
+ $tok_len = 2;
+ }
+ $tokens_to_go[$i] = $tok;
+ $token_lengths_to_go[$i] = $tok_len;
+ my $KK = $K_to_go[$i];
+ $rLL->[$KK]->[_TOKEN_] = $tok;
+ $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
+ my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
+ $self->note_added_semicolon($line_number);
- # remember locations of -> if this is a pre-broken method chain
- if ( $type eq '->' ) {
- if ($rOpts_break_at_old_method_breakpoints) {
+ foreach ( $imax .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] += $tok_len;
+ }
+ }
- # Case 1: look for lines with leading pointers
- if ( $i == $i_line_start ) {
- set_forced_breakpoint( $i - 1 );
- }
+ if ( $rOpts_one_line_block_semicolons == 0 ) {
+ $self->delete_one_line_semicolons( $ri_first, $ri_last );
+ }
- # Case 2: look for cuddled pointer calls
- else {
+ #--------------------
+ # ship this batch out
+ #--------------------
+ $this_batch->[_ri_first_] = $ri_first;
+ $this_batch->[_ri_last_] = $ri_last;
+ $this_batch->[_peak_batch_size_] = $peak_batch_size;
+ $this_batch->[_do_not_pad_] = $do_not_pad;
+ $this_batch->[_batch_count_] = $batch_count;
+ $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci;
- # look for old lines with leading ')->' or ') ->'
- # and, when found, force a break before the
- # opening paren and after the previous closing paren.
- if (
- $types_to_go[$i_line_start] eq '}'
- && ( $i == $i_line_start + 1
- || $i == $i_line_start + 2
- && $types_to_go[ $i - 1 ] eq 'b' )
- )
- {
- set_forced_breakpoint( $i_line_start - 1 );
- set_forced_breakpoint(
- $mate_index_to_go[$i_line_start] );
- }
- }
+ $self->convey_batch_to_vertical_aligner();
+
+ #-------------------------------------------------------------------
+ # Write requested number of blank lines after an opening block brace
+ #-------------------------------------------------------------------
+ if ($rOpts_blank_lines_after_opening_block) {
+ my $iterm = $imax;
+ if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
+ $iterm -= 1;
+ if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
+ $iterm -= 1;
}
- } ## end if ( $type eq '->' )
+ }
- # remember locations of '||' and '&&' for possible breaks if we
- # decide this is a long logical expression.
- elsif ( $type eq '||' ) {
- push @{ $rand_or_list[$depth][2] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '||' )
- elsif ( $type eq '&&' ) {
- push @{ $rand_or_list[$depth][3] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end elsif ( $type eq '&&' )
- elsif ( $type eq 'f' ) {
- push @{ $rfor_semicolon_list[$depth] }, $i;
+ if ( $types_to_go[$iterm] eq '{'
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts_blank_lines_after_opening_block;
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->require_blank_code_lines($nblanks);
}
- elsif ( $type eq 'k' ) {
- if ( $token eq 'and' ) {
- push @{ $rand_or_list[$depth][1] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- } ## end if ( $token eq 'and' )
+ }
- # break immediately at 'or's which are probably not in a logical
- # block -- but we will break in logical breaks below so that
- # they do not add to the forced_breakpoint_count
- elsif ( $token eq 'or' ) {
- push @{ $rand_or_list[$depth][0] }, $i;
- ++$has_old_logical_breakpoints[$depth]
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints );
- if ( $is_logical_container{ $container_type[$depth] } ) {
- }
- else {
- if ($is_long_line) { set_forced_breakpoint($i) }
- elsif ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- $saw_good_breakpoint = 1;
- }
- } ## end else [ if ( $is_logical_container...)]
- } ## end elsif ( $token eq 'or' )
- elsif ( $token eq 'if' || $token eq 'unless' ) {
- push @{ $rand_or_list[$depth][4] }, $i;
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_logical_breakpoints )
- {
- set_forced_breakpoint($i);
- }
- } ## end elsif ( $token eq 'if' ||...)
- } ## end elsif ( $type eq 'k' )
- elsif ( $is_assignment{$type} ) {
- $i_equals[$depth] = $i;
- }
-
- if ($type_sequence) {
-
- # handle any postponed closing breakpoints
- if ( $token =~ /^[\)\]\}\:]$/ ) {
- if ( $type eq ':' ) {
- $last_colon_sequence_number = $type_sequence;
+ # Remember the largest batch size processed. This is needed by the
+ # logical padding routine to avoid padding the first nonblank token
+ if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
+ $peak_batch_size = $max_index_to_go;
+ }
- # retain break at a ':' line break
- if ( ( $i == $i_line_start || $i == $i_line_end )
- && $rOpts_break_at_old_ternary_breakpoints )
- {
+ return;
+ }
- set_forced_breakpoint($i);
+ sub save_opening_indentation {
- # break at previous '='
- if ( $i_equals[$depth] > 0 ) {
- set_forced_breakpoint( $i_equals[$depth] );
- $i_equals[$depth] = -1;
- }
- } ## end if ( ( $i == $i_line_start...))
- } ## end if ( $type eq ':' )
- if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
- my $inc = ( $type eq ':' ) ? 0 : 1;
- set_forced_breakpoint( $i - $inc );
- delete $postponed_breakpoint{$type_sequence};
- }
- } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
+ # This should be called after each batch of tokens is output. It
+ # saves indentations of lines of all unmatched opening tokens.
+ # These will be used by sub get_opening_indentation.
- # set breaks at ?/: if they will get separated (and are
- # not a ?/: chain), or if the '?' is at the end of the
- # line
- elsif ( $token eq '?' ) {
- my $i_colon = $mate_index_to_go[$i];
- if (
- $i_colon <= 0 # the ':' is not in this batch
- || $i == 0 # this '?' is the first token of the line
- || $i ==
- $max_index_to_go # or this '?' is the last token
- )
- {
+ my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_;
- # don't break at a '?' if preceded by ':' on
- # this line of previous ?/: pair on this line.
- # This is an attempt to preserve a chain of ?/:
- # expressions (elsif2.t). And don't break if
- # this has a side comment.
- set_forced_breakpoint($i)
- unless (
- $type_sequence == (
- $last_colon_sequence_number +
- TYPE_SEQUENCE_INCREMENT
- )
- || $tokens_to_go[$max_index_to_go] eq '#'
- );
- set_closing_breakpoint($i);
- } ## end if ( $i_colon <= 0 ||...)
- } ## end elsif ( $token eq '?' )
- } ## end if ($type_sequence)
+ # QW INDENTATION PATCH 1:
+ # Also save indentation for multiline qw quotes
+ my @i_qw;
+ my $seqno_qw_opening;
+ if ( $types_to_go[$max_index_to_go] eq 'q' ) {
+ my $KK = $K_to_go[$max_index_to_go];
+ $seqno_qw_opening =
+ $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
+ if ($seqno_qw_opening) {
+ push @i_qw, $max_index_to_go;
+ }
+ }
-#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
+ # we need to save indentations of any unmatched opening tokens
+ # in this batch because we may need them in a subsequent batch.
+ foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) {
- #------------------------------------------------------------
- # Handle Increasing Depth..
- #
- # prepare for a new list when depth increases
- # token $i is a '(','{', or '['
- #------------------------------------------------------------
- if ( $depth > $current_depth ) {
-
- $breakpoint_stack[$depth] = $forced_breakpoint_count;
- $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
- $has_broken_sublist[$depth] = 0;
- $identifier_count_stack[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- $interrupted_list[$depth] = 0;
- $item_count_stack[$depth] = 0;
- $last_comma_index[$depth] = undef;
- $last_dot_index[$depth] = undef;
- $last_nonblank_type[$depth] = $last_nonblank_type;
- $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
- $opening_structure_index_stack[$depth] = $i;
- $rand_or_list[$depth] = [];
- $rfor_semicolon_list[$depth] = [];
- $i_equals[$depth] = -1;
- $want_comma_break[$depth] = 0;
- $container_type[$depth] =
- ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
- ? $last_nonblank_token
- : "";
- $has_old_logical_breakpoints[$depth] = 0;
+ my $seqno = $type_sequence_to_go[$_];
- # if line ends here then signal closing token to break
- if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
- {
- set_closing_breakpoint($i);
+ if ( !$seqno ) {
+ if ( $seqno_qw_opening && $_ == $max_index_to_go ) {
+ $seqno = $seqno_qw_opening;
}
+ else {
- # Not all lists of values should be vertically aligned..
- $dont_align[$depth] =
-
- # code BLOCKS are handled at a higher level
- ( $block_type ne "" )
+ # shouldn't happen
+ $seqno = 'UNKNOWN';
+ }
+ }
- # certain paren lists
- || ( $type eq '(' ) && (
+ $saved_opening_indentation{$seqno} = [
+ lookup_opening_indentation(
+ $_, $ri_first, $ri_last, $rindentation_list
+ )
+ ];
+ }
+ return;
+ }
- # it does not usually look good to align a list of
- # identifiers in a parameter list, as in:
- # my($var1, $var2, ...)
- # (This test should probably be refined, for now I'm just
- # testing for any keyword)
- ( $last_nonblank_type eq 'k' )
+ sub get_saved_opening_indentation {
+ my ($seqno) = @_;
+ my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
- # a trailing '(' usually indicates a non-list
- || ( $next_nonblank_type eq '(' )
- );
+ if ($seqno) {
+ if ( $saved_opening_indentation{$seqno} ) {
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
+ $exists = 1;
+ }
+ }
- # patch to outdent opening brace of long if/for/..
- # statements (like this one). See similar coding in
- # set_continuation breaks. We have also catch it here for
- # short line fragments which otherwise will not go through
- # set_continuation_breaks.
- if (
- $block_type
+ # some kind of serious error it doesn't exist
+ # (example is badfile.t)
- # if we have the ')' but not its '(' in this batch..
- && ( $last_nonblank_token eq ')' )
- && $mate_index_to_go[$i_last_nonblank_token] < 0
+ return ( $indent, $offset, $is_leading, $exists );
+ }
+} ## end closure grind_batch_of_CODE
- # and user wants brace to left
- && !$rOpts->{'opening-brace-always-on-right'}
+sub lookup_opening_indentation {
- && ( $type eq '{' ) # should be true
- && ( $token eq '{' ) # should be true
- )
- {
- set_forced_breakpoint( $i - 1 );
- } ## end if ( $block_type && ( ...))
- } ## end if ( $depth > $current_depth)
+ # get the indentation of the line in the current output batch
+ # which output a selected opening token
+ #
+ # given:
+ # $i_opening - index of an opening token in the current output batch
+ # whose line indentation we need
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line. (NOTE: the first slot in
+ # this list is the last returned line number, and this is
+ # followed by the list of indentations).
+ #
+ # return
+ # -the indentation of the line which contained token $i_opening
+ # -and its offset (number of columns) from the start of the line
- #------------------------------------------------------------
- # Handle Decreasing Depth..
- #
- # finish off any old list when depth decreases
- # token $i is a ')','}', or ']'
- #------------------------------------------------------------
- elsif ( $depth < $current_depth ) {
+ my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
- check_for_new_minimum_depth($depth);
+ if ( !@{$ri_last} ) {
- # force all outer logical containers to break after we see on
- # old breakpoint
- $has_old_logical_breakpoints[$depth] ||=
- $has_old_logical_breakpoints[$current_depth];
+ # An error here implies a bug introduced by a recent program change.
+ # Every batch of code has lines, so this should never happen.
+ if (DEVEL_MODE) {
+ Fault("Error in opening_indentation: no lines");
+ }
+ return ( 0, 0, 0 );
+ }
- # Patch to break between ') {' if the paren list is broken.
- # There is similar logic in set_continuation_breaks for
- # non-broken lists.
- if ( $token eq ')'
- && $next_nonblank_block_type
- && $interrupted_list[$current_depth]
- && $next_nonblank_type eq '{'
- && !$rOpts->{'opening-brace-always-on-right'} )
- {
- set_forced_breakpoint($i);
- } ## end if ( $token eq ')' && ...
+ my $nline = $rindentation_list->[0]; # line number of previous lookup
-#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
+ # reset line location if necessary
+ $nline = 0 if ( $i_opening < $ri_start->[$nline] );
- # set breaks at commas if necessary
- my ( $bp_count, $do_not_break_apart ) =
- set_comma_breakpoints($current_depth);
+ # find the correct line
+ unless ( $i_opening > $ri_last->[-1] ) {
+ while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
+ }
- my $i_opening = $opening_structure_index_stack[$current_depth];
- my $saw_opening_structure = ( $i_opening >= 0 );
+ # Error - token index is out of bounds - shouldn't happen
+ # A program bug has been introduced in one of the calling routines.
+ # We better stop here.
+ else {
+ my $i_last_line = $ri_last->[-1];
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program bug in call to lookup_opening_indentation - index out of range
+ called with index i_opening=$i_opening > $i_last_line = max index of last line
+This batch has max index = $max_index_to_go,
+EOM
+ }
+ $nline = $#{$ri_last};
+ }
- # this term is long if we had to break at interior commas..
- my $is_long_term = $bp_count > 0;
+ $rindentation_list->[0] =
+ $nline; # save line number to start looking next call
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
+}
- # If this is a short container with one or more comma arrows,
- # then we will mark it as a long term to open it if requested.
- # $rOpts_comma_arrow_breakpoints =
- # 0 - open only if comma precedes closing brace
- # 1 - stable: except for one line blocks
- # 2 - try to form 1 line blocks
- # 3 - ignore =>
- # 4 - always open up if vt=0
- # 5 - stable: even for one line blocks if vt=0
- if ( !$is_long_term
- && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
- && $index_before_arrow[ $depth + 1 ] > 0
- && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
- )
- {
- $is_long_term = $rOpts_comma_arrow_breakpoints == 4
- || ( $rOpts_comma_arrow_breakpoints == 0
- && $last_nonblank_token eq ',' )
- || ( $rOpts_comma_arrow_breakpoints == 5
- && $old_breakpoint_to_go[$i_opening] );
- } ## end if ( !$is_long_term &&...)
+sub terminal_type_i {
- # mark term as long if the length between opening and closing
- # parens exceeds allowed line length
- if ( !$is_long_term && $saw_opening_structure ) {
- my $i_opening_minus = find_token_starting_list($i_opening);
+ # returns type of last token on this line (terminal token), as follows:
+ # returns # for a full-line comment
+ # returns ' ' for a blank line
+ # otherwise returns final token type
- # Note: we have to allow for one extra space after a
- # closing token so that we do not strand a comma or
- # semicolon, hence the '>=' here (oneline.t)
- # Note: we ignore left weld lengths here for best results
- $is_long_term =
- excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
- } ## end if ( !$is_long_term &&...)
+ my ( $ibeg, $iend ) = @_;
- # We've set breaks after all comma-arrows. Now we have to
- # undo them if this can be a one-line block
- # (the only breakpoints set will be due to comma-arrows)
- if (
+ # Start at the end and work backwards
+ my $i = $iend;
+ my $type_i = $types_to_go[$i];
- # user doesn't require breaking after all comma-arrows
- ( $rOpts_comma_arrow_breakpoints != 0 )
- && ( $rOpts_comma_arrow_breakpoints != 4 )
+ # Check for side comment
+ if ( $type_i eq '#' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
- # and if the opening structure is in this batch
- && $saw_opening_structure
+ # Skip past a blank
+ if ( $type_i eq 'b' ) {
+ $i--;
+ if ( $i < $ibeg ) {
+ return wantarray ? ( $type_i, $ibeg ) : $type_i;
+ }
+ $type_i = $types_to_go[$i];
+ }
+
+ # Found it..make sure it is a BLOCK termination,
+ # but hide a terminal } after sort/map/grep/eval/do because it is not
+ # necessarily the end of the line. (terminal.t)
+ my $block_type = $block_type_to_go[$i];
+ if (
+ $type_i eq '}'
+ && ( !$block_type
+ || $is_sort_map_grep_eval_do{$block_type} )
+ )
+ {
+ $type_i = 'b';
+ }
+ return wantarray ? ( $type_i, $i ) : $type_i;
+}
+
+sub pad_array_to_go {
+
+ # To simplify coding in break_lists and set_bond_strengths, it helps to
+ # create some extra blank tokens at the end of the arrays. We also add
+ # some undef's to help guard against using invalid data.
+ my ($self) = @_;
+ $K_to_go[ $max_index_to_go + 1 ] = undef;
+ $tokens_to_go[ $max_index_to_go + 1 ] = '';
+ $tokens_to_go[ $max_index_to_go + 2 ] = '';
+ $tokens_to_go[ $max_index_to_go + 3 ] = undef;
+ $types_to_go[ $max_index_to_go + 1 ] = 'b';
+ $types_to_go[ $max_index_to_go + 2 ] = 'b';
+ $types_to_go[ $max_index_to_go + 3 ] = undef;
+ $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] =
+ $nesting_depth_to_go[$max_index_to_go];
+
+ # /^[R\}\)\]]$/
+ if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
+ if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
+
+ # Nesting depths are set to be >=0 in sub write_line, so it should
+ # not be possible to get here unless the code has a bracing error
+ # which leaves a closing brace with zero nesting depth.
+ unless ( get_saw_brace_error() ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Program bug in pad_array_to_go: hit nesting error which should have been caught
+EOM
+ }
+ }
+ }
+ else {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
+ }
+ }
+
+ # /^[L\{\(\[]$/
+ elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
+ $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
+ }
+ return;
+}
+
+sub break_all_chain_tokens {
+
+ # scan the current breakpoints looking for breaks at certain "chain
+ # operators" (. : && || + etc) which often occur repeatedly in a long
+ # statement. If we see a break at any one, break at all similar tokens
+ # within the same container.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
+
+ my %saw_chain_type;
+ my %left_chain_type;
+ my %right_chain_type;
+ my %interior_chain_type;
+ my $nmax = @{$ri_right} - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ $typel = '+' if ( $typel eq '-' ); # treat + and - the same
+ $typer = '+' if ( $typer eq '-' );
+ $typel = '*' if ( $typel eq '/' ); # treat * and / the same
+ $typer = '*' if ( $typer eq '/' );
+ my $tokenl = $tokens_to_go[$il];
+ my $tokenr = $tokens_to_go[$ir];
+
+ if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
+ next if ( $typel eq '?' );
+ push @{ $left_chain_type{$typel} }, $il;
+ $saw_chain_type{$typel} = 1;
+ $count++;
+ }
+ if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
+ next if ( $typer eq '?' );
+ push @{ $right_chain_type{$typer} }, $ir;
+ $saw_chain_type{$typer} = 1;
+ $count++;
+ }
+ }
+ return unless $count;
+
+ # now look for any interior tokens of the same types
+ $count = 0;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir - 1 ) {
+ my $type = $types_to_go[$i];
+ $type = '+' if ( $type eq '-' );
+ $type = '*' if ( $type eq '/' );
+ if ( $saw_chain_type{$type} ) {
+ push @{ $interior_chain_type{$type} }, $i;
+ $count++;
+ }
+ }
+ }
+ return unless $count;
+
+ # now make a list of all new break points
+ my @insert_list;
+
+ # loop over all chain types
+ foreach my $type ( keys %saw_chain_type ) {
+
+ # quit if just ONE continuation line with leading . For example--
+ # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
+ # . $contents;
+ last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
+
+ # loop over all interior chain tokens
+ foreach my $itest ( @{ $interior_chain_type{$type} } ) {
+
+ # loop over all left end tokens of same type
+ if ( $left_chain_type{$type} ) {
+ next if $nobreak_to_go[ $itest - 1 ];
+ foreach my $i ( @{ $left_chain_type{$type} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest - 1;
+
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
+ last;
+ }
+ }
+
+ # loop over all right end tokens of same type
+ if ( $right_chain_type{$type} ) {
+ next if $nobreak_to_go[$itest];
+ foreach my $i ( @{ $right_chain_type{$type} } ) {
+ next unless $self->in_same_container_i( $i, $itest );
+ push @insert_list, $itest;
+
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
+ last;
+ }
+ }
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
+
+sub insert_additional_breaks {
+
+ # this routine will add line breaks at requested locations after
+ # sub break_long_lines has made preliminary breaks.
+
+ my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
+ my $i_f;
+ my $i_l;
+ my $line_number = 0;
+ foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
+
+ next if ( $nobreak_to_go[$i_break_left] );
+
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ while ( $i_break_left >= $i_l ) {
+ $line_number++;
+
+ # shouldn't happen unless caller passes bad indexes
+ if ( $line_number >= @{$ri_last} ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Non-fatal program bug: couldn't set break at $i_break_left
+EOM
+ }
+ return;
+ }
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
+ }
+
+ # Do not leave a blank at the end of a line; back up if necessary
+ if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+
+ my $i_break_right = $inext_to_go[$i_break_left];
+ if ( $i_break_left >= $i_f
+ && $i_break_left < $i_l
+ && $i_break_right > $i_f
+ && $i_break_right <= $i_l )
+ {
+ splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
+ splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+ }
+ }
+ return;
+}
+
+{ ## begin closure in_same_container_i
+ my $ris_break_token;
+ my $ris_comma_token;
+
+ BEGIN {
+
+ # all cases break on seeing commas at same level
+ my @q = qw( => );
+ push @q, ',';
+ @{$ris_comma_token}{@q} = (1) x scalar(@q);
+
+ # Non-ternary text also breaks on seeing any of qw(? : || or )
+ # Example: we would not want to break at any of these .'s
+ # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
+ push @q, qw( or || ? : );
+ @{$ris_break_token}{@q} = (1) x scalar(@q);
+ }
+
+ sub in_same_container_i {
+
+ # Check to see if tokens at i1 and i2 are in the same container, and
+ # not separated by certain characters: => , ? : || or
+ # This is an interface between the _to_go arrays to the rLL array
+ my ( $self, $i1, $i2 ) = @_;
+
+ # quick check
+ my $parent_seqno_1 = $parent_seqno_to_go[$i1];
+ return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
+
+ if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
+ my $K1 = $K_to_go[$i1];
+ my $K2 = $K_to_go[$i2];
+ my $rLL = $self->[_rLL_];
+
+ my $depth_1 = $nesting_depth_to_go[$i1];
+ return if ( $depth_1 < 0 );
+
+ # Shouldn't happen since i1 and i2 have same parent:
+ return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
+
+ # Select character set to scan for
+ my $type_1 = $types_to_go[$i1];
+ my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
+
+ # Fast preliminary loop to verify that tokens are in the same container
+ my $KK = $K1;
+ while (1) {
+ $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ last if !defined($KK);
+ last if ( $KK >= $K2 );
+ my $ii = $i1 + $KK - $K1;
+ my $depth_i = $nesting_depth_to_go[$ii];
+ return if ( $depth_i < $depth_1 );
+ next if ( $depth_i > $depth_1 );
+ if ( $type_1 ne ':' ) {
+ my $tok_i = $tokens_to_go[$ii];
+ return if ( $tok_i eq '?' || $tok_i eq ':' );
+ }
+ }
+
+ # Slow loop checking for certain characters
+
+ #-----------------------------------------------------
+ # This is potentially a slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ #-----------------------------------------------------
+ return if ( $i2 - $i1 > 200 );
+
+ foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
+
+ my $depth_i = $nesting_depth_to_go[$ii];
+ next if ( $depth_i > $depth_1 );
+ return if ( $depth_i < $depth_1 );
+ my $tok_i = $tokens_to_go[$ii];
+ return if ( $rbreak->{$tok_i} );
+ }
+ return 1;
+ }
+} ## end closure in_same_container_i
+
+sub break_equals {
+
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $self, $ri_left, $ri_right ) = @_;
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 2 );
+
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
+
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
+
+ # now look for any interior tokens of the same types
+ my $il = $ri_left->[0];
+ my $ir = $ri_right->[0];
+
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
+ }
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
+
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
+
+ return unless (@insert_list);
+
+ # One final check...
+ # scan second and third lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
+
+ # ok, insert any new break point
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
+
+{ ## begin closure recombine_breakpoints
+
+ # This routine is called once per batch to see if it would be better
+ # to combine some of the lines into which the batch has been broken.
+
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+ my %is_plus_minus;
+ my %is_mult_div;
+
+ BEGIN {
+
+ my @q;
+ @q = qw( && || );
+ @is_amp_amp{@q} = (1) x scalar(@q);
+
+ @q = qw( ? : );
+ @is_ternary{@q} = (1) x scalar(@q);
+
+ @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
+
+ @q = qw( + - );
+ @is_plus_minus{@q} = (1) x scalar(@q);
+
+ @q = qw( * / );
+ @is_mult_div{@q} = (1) x scalar(@q);
+ }
+
+ sub Debug_dump_breakpoints {
+
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $ri_beg->[$n];
+ my $iend = $ri_end->[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
+ }
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ return;
+ }
+
+ sub delete_one_line_semicolons {
+
+ my ( $self, $ri_beg, $ri_end ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $K_opening_container = $self->[_K_opening_container_];
+
+ # Walk down the lines of this batch and delete any semicolons
+ # terminating one-line blocks;
+ my $nmax = @{$ri_end} - 1;
+
+ foreach my $n ( 0 .. $nmax ) {
+ my $i_beg = $ri_beg->[$n];
+ my $i_e = $ri_end->[$n];
+ my $K_beg = $K_to_go[$i_beg];
+ my $K_e = $K_to_go[$i_e];
+ my $K_end = $K_e;
+ my $type_end = $rLL->[$K_end]->[_TYPE_];
+ if ( $type_end eq '#' ) {
+ $K_end = $self->K_previous_nonblank($K_end);
+ if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
+ }
+
+ # we are looking for a line ending in closing brace
+ next
+ unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
+
+ # ...and preceded by a semicolon on the same line
+ my $K_semicolon = $self->K_previous_nonblank($K_end);
+ next unless defined($K_semicolon);
+ my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
+ next if ( $i_semicolon <= $i_beg );
+ next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
+
+ # Safety check - shouldn't happen - not critical
+ # This is not worth throwing a Fault, except in DEVEL_MODE
+ if ( $types_to_go[$i_semicolon] ne ';' ) {
+ DEVEL_MODE
+ && Fault("unexpected type looking for semicolon");
+ next;
+ }
+
+ # ... with the corresponding opening brace on the same line
+ my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
+ my $K_opening = $K_opening_container->{$type_sequence};
+ next unless ( defined($K_opening) );
+ my $i_opening = $i_beg + ( $K_opening - $K_beg );
+ next if ( $i_opening < $i_beg );
+
+ # ... and only one semicolon between these braces
+ my $semicolon_count = 0;
+ foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
+ if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
+ $semicolon_count++;
+ last;
+ }
+ }
+ next if ($semicolon_count);
+
+ # ...ok, then make the semicolon invisible
+ my $len = $token_lengths_to_go[$i_semicolon];
+ $tokens_to_go[$i_semicolon] = "";
+ $token_lengths_to_go[$i_semicolon] = 0;
+ $rLL->[$K_semicolon]->[_TOKEN_] = "";
+ $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
+ foreach ( $i_semicolon .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $_ + 1 ] -= $len;
+ }
+ }
+ return;
+ }
+
+ use constant DEBUG_RECOMBINE => 0;
+
+ sub recombine_breakpoints {
+
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $self, $ri_beg, $ri_end ) = @_;
+
+ # sub break_long_lines is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Sometimes small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine.
+
+ # do nothing under extreme stress
+ return if ( $stress_level_alpha < 1 && !DEVEL_MODE );
+
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+
+ my $nmax = @{$ri_end} - 1;
+ return if ( $nmax <= 0 );
+
+ my $nmax_start = $nmax;
+
+ # Make a list of all good joining tokens between the lines
+ # n-1 and n.
+ my @joint;
+
+ # Break the total batch sub-sections with lengths short enough to
+ # recombine
+ my $rsections = [];
+ my $nbeg = 0;
+ my $nend;
+ my $nmax_section = 0;
+ foreach my $nn ( 1 .. $nmax ) {
+ my $ibeg_1 = $ri_beg->[ $nn - 1 ];
+ my $iend_1 = $ri_end->[ $nn - 1 ];
+ my $iend_2 = $ri_end->[$nn];
+ my $ibeg_2 = $ri_beg->[$nn];
+
+ # Define the joint variable
+ my ( $itok, $itokp, $itokm );
+ foreach my $itest ( $iend_1, $ibeg_2 ) {
+ my $type = $types_to_go[$itest];
+ if ( $is_math_op{$type}
+ || $is_amp_amp{$type}
+ || $is_assignment{$type}
+ || $type eq ':' )
+ {
+ $itok = $itest;
+ }
+ }
+ $joint[$nn] = [$itok];
+
+ # Update the section list
+ my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ if (
+ $excess <= 1
+
+ # The number 5 here is an arbitrary small number intended
+ # to keep most small matches in one sub-section.
+ || ( defined($nend) && ( $nn < 5 || $nmax - $nn < 5 ) )
+ )
+ {
+ $nend = $nn;
+ }
+ else {
+ if ( defined($nend) ) {
+ push @{$rsections}, [ $nbeg, $nend ];
+ my $num = $nend - $nbeg;
+ if ( $num > $nmax_section ) { $nmax_section = $num }
+ $nbeg = $nn;
+ $nend = undef;
+ }
+ $nbeg = $nn;
+ }
+ }
+ if ( defined($nend) ) {
+ push @{$rsections}, [ $nbeg, $nend ];
+ my $num = $nend - $nbeg;
+ if ( $num > $nmax_section ) { $nmax_section = $num }
+ }
+
+ my $num_sections = @{$rsections};
+
+ # This is potentially an O(n-squared) loop, but not critical, so we can
+ # put a finite limit on the total number of iterations. This is
+ # suggested by issue c118, which pushed about 5.e5 lines through here
+ # and caused an excessive run time.
+
+ # Three lines of defence have been put in place to prevent excessive
+ # run times:
+ # 1. do nothing if formatting under stress (c118 was under stress)
+ # 2. break into small sub-sections to decrease the maximum n-squared.
+ # 3. put a finite limit on the number of iterations.
+
+ # Testing shows that most batches only require one or two iterations.
+ # A very large batch which is broken into sub-sections can require one
+ # iteration per section. This suggests the limit here, which allows
+ # up to 10 iterations plus one pass per sub-section.
+ my $it_count = 0;
+ my $it_count_max =
+ 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections;
+
+ if ( DEBUG_RECOMBINE > 1 ) {
+ my $max = 0;
+ print STDERR "-----\n$num_sections sections found for nmax=$nmax\n";
+ foreach my $sect ( @{$rsections} ) {
+ my ( $nbeg, $nend ) = @{$sect};
+ my $num = $nend - $nbeg;
+ if ( $num > $max ) { $max = $num }
+ print STDERR "$nbeg $nend\n";
+ }
+ print STDERR "max size=$max of $nmax lines\n";
+ }
+
+ # Loop over all sub-sections. Note that we have to work backwards
+ # from the end of the batch since the sections use original line
+ # numbers, and the line numbers change as we go.
+ while ( my $section = pop @{$rsections} ) {
+ my ( $nbeg, $nend ) = @{$section};
+
+ # number of ending lines to leave untouched in this pass
+ $nmax = @{$ri_end} - 1;
+ my $num_freeze = $nmax - $nend;
+
+ my $more_to_do = 1;
+
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = $nmax + 1;
+ my $reverse = 0;
+
+ while ($more_to_do) {
+
+ # Safety check for excess total iterations
+ $it_count++;
+ if ( $it_count > $it_count_max ) {
+ goto RETURN;
+ }
+
+ my $n_best = 0;
+ my $bs_best;
+ my $nmax = @{$ri_end} - 1;
+
+ # Safety check for infinite loop: the line count must decrease
+ unless ( $nmax < $nmax_last ) {
+
+ # Shouldn't happen because splice below decreases nmax on
+ # each iteration. An error can only be due to a recent
+ # programming change. We better stop here.
+ if (DEVEL_MODE) {
+ Fault(
+"Program bug-infinite loop in recombine breakpoints\n"
+ );
+ }
+ $more_to_do = 0;
+ last;
+ }
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $skip_Section_3;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
+
+ # loop over all remaining lines in this batch
+ my $nstop = $nmax - $num_freeze;
+ for my $iter ( $nbeg + 1 .. $nstop ) {
+
+ # alternating sweep direction gives symmetric results
+ # for recombining lines which exceed the line length
+ # such as eval {{{{.... }}}}
+ my $n;
+ if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; }
+ else { $n = $iter }
+
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $ibeg_nmax = $ri_beg->[$nmax];
+
+ # combined line cannot be too long
+ my $excess =
+ $self->excess_line_length( $ibeg_1, $iend_2, 1 );
+ next if ( $excess > 0 );
+
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
+
+ # terminal token of line 2 if any side comment is ignored:
+ my $iend_2t = $iend_2;
+ my $type_iend_2t = $type_iend_2;
+
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
+
+ my $bs_tweak = 0;
+
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
+
+ DEBUG_RECOMBINE > 1 && do {
+ print STDERR
+"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
+
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
+
+ # a terminal '{' should stay where it is
+ # unless preceded by a fat comma
+ next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
+
+ if ( $type_iend_2 eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' )
+ {
+ $iend_2t = $iend_2 - 2;
+ $type_iend_2t = $types_to_go[$iend_2t];
+ }
+
+ $this_line_is_semicolon_terminated =
+ $type_iend_2t eq ';';
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 0:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code
+ # and to make formatting independent of whether breaks are
+ # to the left or right of an operator.
+ #----------------------------------------------------------
+
+ my ($itok) = @{ $joint[$n] };
+ if ($itok) {
+
+ my $type = $types_to_go[$itok];
+
+ if ( $type eq ':' ) {
+
+ # do not join at a colon unless it disobeys the
+ # break request
+ if ( $itok eq $iend_1 ) {
+ next unless $want_break_before{$type};
+ }
+ else {
+ $leading_amp_count++;
+ next if $want_break_before{$type};
+ }
+ } ## end if ':'
+
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
+
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
+
+ # This can be important in math-intensive code.
+
+ my $good_combo;
+
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
+ my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
+
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
+
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp]
+ } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ }
+ }
+
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
+
+ # otherwise look one more token to left
+ else {
+
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm]
+ } );
+ }
+ }
+
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
+
+ # Slight adjustment factor to make results
+ # independent of break before or after operator
+ # in long summed lists. (An operator and a
+ # space make two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+
+ $good_combo =
+
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
+
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right
+ # of joint
+ $itokpp == $iend_2
+
+ # short
+ && token_sequence_length(
+ $itokp, $iend_2
+ ) < $two +
+ $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
+
+ # short
+ && token_sequence_length(
+ $ibeg_1, $itokm
+ ) < 2 - $two +
+ $rOpts_short_concatenation_item_length
+ )
+
+ )
+
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] }
+ )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp]
+ } )
+ )
+
+ ;
+ }
+
+ # it is also good to combine if we can reduce to 2
+ # lines
+ if ( !$good_combo ) {
+
+ # index on other line where same token would be
+ # in a long chain.
+ my $iother =
+ ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
+
+ next unless ($good_combo);
+
+ } ## end math
+
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
+
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Join welded nested containers immediately
+ #----------------------------------------------------------
+
+ if (
+ $total_weld_count
+ && ( $type_sequence_to_go[$iend_1]
+ && defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
+ || $type_sequence_to_go[$ibeg_2]
+ && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
+ )
+ {
+ $n_best = $n;
+ last;
+ }
+
+ $reverse = 0;
+
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
+
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
+
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ # But we do not want to do this for something like the -lp
+ # option where the paren is not outdentable because the
+ # trailing clause will be far to the right.
+ #
+ # The logic here is synchronized with the logic in sub
+ # sub final_indentation_adjustment, which actually does
+ # the outdenting.
+ #
+ $skip_Section_3 ||= $this_line_is_semicolon_terminated
+
+ # only one token on last line
+ && $ibeg_1 == $iend_1
+
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
+
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
+
+ # only leading '&&', '||', and ':' if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
+
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
+
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in final_indentation_adjustment which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !ref( $leading_spaces_to_go[$iend_1] )
+ && !$rOpts_indent_closing_brace
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $skip_Section_3 ||= 1;
+ }
+
+ next
+ unless (
+ $skip_Section_3
+
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
+ );
+ }
+
+ elsif ( $type_iend_1 eq '{' ) {
+
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ next if $forced_breakpoint_to_go[$iend_1];
+ }
+
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ next unless $want_break_before{$type_iend_1};
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
+
+ # Do not recombine different levels
+ next
+ if (
+ $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+
+ # do not recombine unless next line ends in :
+ next unless $type_iend_2 eq ':';
+ }
+
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
+
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
+
+ # An isolated '},' may join with an identifier + ';'
+ # This is useful for the class of a 'bless' statement
+ # (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
+ {
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
+
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # but otherwise ..
+ else {
+
+ # do not recombine after a comma unless this will
+ # leave just 1 more line
+ next unless ( $n + 1 >= $nmax );
+
+ # do not recombine if there is a change in
+ # indentation depth
+ next
+ if ( $levels_to_go[$iend_1] !=
+ $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
+ }
+ }
+ next if $saw_paren;
+ }
+ }
+
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
+
+ # No longer doing this
+ }
+
+ elsif ( $type_iend_1 eq ')' ) {
+
+ # No longer doing this
+ }
+
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ next;
+ }
+
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
+
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
+ next
+ if (
+ $old_breakpoint_to_go[$iend_1]
+
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1
+ );
+
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary = (
+ $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
+ && $types_to_go[$ibeg_3] eq ':' )
+ );
+
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
+
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
+
+ # or three lines, the last with a leading
+ # semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
+
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
+
+ # or the next line ends in an open paren or
+ # brace and the break hasn't been forced
+ # [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
+
+ # do not recombine if the two lines might align
+ # well this is a very approximate test for this
+ && (
+
+ # RT#127633 - the leading tokens are not
+ # operators
+ ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+
+ # or they are different
+ || ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne
+ $types_to_go[$ibeg_3] )
+ )
+ );
+
+ if (
+
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
+
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && !(
+ $ibeg_3 > 0
+ && ref( $leading_spaces_to_go[$ibeg_3] )
+ && $type_iend_2 eq ','
+ )
+ )
+ {
+
+ # otherwise, scan the rhs line up to last token
+ # for complexity. Note that we are not
+ # counting the last token in case it is an
+ # opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 1 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # ok to recombine if no level changes before
+ # last token
+ if ( $tv > 0 ) {
+
+ # otherwise, do not recombine if more than
+ # two level changes.
+ next if ( $tv > 1 );
+
+ # check total complexity of the two
+ # adjacent lines that will occur if we do
+ # this join
+ my $istop =
+ ( $n < $nmax )
+ ? $ri_end->[ $n + 1 ]
+ : $iend_2;
+ foreach my $i ( $iend_2 .. $istop ) {
+ if (
+ $nesting_depth_to_go[$i] != $depth )
+ {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
+ # do not recombine if total is more than 2
+ # level changes
+ next if ( $tv > 2 );
+ }
+ }
+ }
+
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+ }
+
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
+
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
+
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
+
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
+
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1]
+ };
+ }
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
+
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ # Note that we are skipping the rest of this section
+ # and the rest of the loop to do the join
+ if ($skip_Section_3) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ $n_best = $n;
+ last;
+ }
+
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
+
+ $leading_amp_count++;
+
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1}
+ && $tokens_to_go[$iend_2] eq '(' )
+
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ # Combine a trailing && term with an || term: fix for
+ # c060 This is rare but can happen.
+ $ok ||= 1
+ if ( $ibeg_3 < 0
+ && $type_ibeg_2 eq '&&'
+ && $type_ibeg_1 eq '||'
+ && $nesting_depth_to_go[$ibeg_2] ==
+ $nesting_depth_to_go[$ibeg_1] );
+
+ next if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a
+ # chain. we will just look at a few nearby lines
+ # to see if this looks like a chain.
+ my $local_count = 0;
+ foreach
+ my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 )
+ {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
+ }
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank =
+ min( $inext_to_go[$ibeg_2], $iend_2 );
+ next
+ unless (
+
+ # ... unless there is just one and we can reduce
+ # this to two lines if we do. For example, this
+ #
+ #
+ # $bodyA .=
+ # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
+ #
+ # looks better than this:
+ # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
+ # . '$args .= $pat;'
+
+ (
+ $n == 2
+ && $n == $nmax
+ && $type_ibeg_1 ne $type_ibeg_2
+ )
+
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
+
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length )
+ );
+ }
+
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
+
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+ $type_ibeg_1 eq '}'
+ || (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1]
+ }
+
+ # important: only combine a very simple
+ # or statement because the step below
+ # may have combined a trailing 'and'
+ # with this or, and we do not want to
+ # then combine everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ )
+ );
+
+ #X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless ( $old_breakpoint_to_go[$iend_1] );
+ }
+
+ # handle leading 'and' and 'xor'
+ elsif ($tokens_to_go[$ibeg_2] eq 'and'
+ || $tokens_to_go[$ibeg_2] eq 'xor' )
+ {
+
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
+
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
+ }
+
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+
+ # Combine something like:
+ # next
+ # if ( $lang !~ /${l}$/i );
+ # into:
+ # next if ( $lang !~ /${l}$/i );
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
+
+ );
+ }
+
+ # handle all other leading keywords
+ else {
+
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ next
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
+ }
+ }
+
+ # similar treatment of && and || as above for 'and' and
+ # 'or': NOTE: This block of code is currently bypassed
+ # because of a previous block but is retained for possible
+ # future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
+
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+
+ # previous line begins with an 'if' or 'unless'
+ # keyword
+ && $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ );
+ }
+
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ next unless ( $n == 1 || $n == $nmax );
+ next if ( $old_breakpoint_to_go[$iend_1] );
+ next
+ unless (
+
+ # unless we can reduce this to two lines
+ $nmax == 2
+
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
+
+ # or this is a short line ending in ;
+ || ( $n == $nmax
+ && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
+
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+
+ # Require a few extra spaces before recombining lines if we are
+ # at an old breakpoint unless this is a simple list or terminal
+ # line. The goal is to avoid oscillating between two
+ # quasi-stable end states. For example this snippet caused
+ # problems:
+## my $this =
+## bless {
+## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
+## },
+## $type;
+ next
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
+
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $ri_beg->[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
+
+ # honor no-break's
+ ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
+
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ else {
+
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ }
+ }
+
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @{$ri_beg}, $n_best, 1;
+ splice @{$ri_end}, $n_best - 1, 1;
+ splice @joint, $n_best, 1;
+
+ # keep going if we are still making progress
+ $more_to_do++;
+ }
+ } # end iteration loop
+
+ } # end loop over sections
+
+ RETURN:
+
+ if (DEBUG_RECOMBINE) {
+ my $nmax = @{$ri_end} - 1;
+ print STDERR
+"exiting recombine with $nmax lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n";
+ }
+ return;
+ }
+} ## end closure recombine_breakpoints
+
+sub insert_final_ternary_breaks {
+
+ my ( $self, $ri_left, $ri_right ) = @_;
+
+ # Called once per batch to look for and do any final line breaks for
+ # long ternary chains
+
+ my $nmax = @{$ri_right} - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
+
+ # For long ternary chains,
+ # if the first : we see has its ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
+
+ # For now, a good break is either a comma or,
+ # in a long chain, a 'return'.
+ # Patch for RT #126633: added the $nmax>1 check to avoid
+ # breaking after a return for a simple ternary. For longer
+ # chains the break after return allows vertical alignment, so
+ # it is still done. So perltidy -wba='?' will not break
+ # immediately after the return in the following statement:
+ # sub x {
+ # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
+ # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
+ # }
+ if (
+ (
+ $type eq ','
+ || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
+ )
+ && $self->in_same_container_i( $ii, $i_question )
+ )
+ {
+ push @insert_list, $ii;
+ last;
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left,
+ $ri_right );
+ }
+ }
+ }
+ return;
+}
+
+sub insert_breaks_before_list_opening_containers {
+
+ my ( $self, $ri_left, $ri_right ) = @_;
+
+ # This routine is called once per batch to implement the parameters
+ # --break-before-hash-brace, etc.
+
+ # Nothing to do if none of these parameters has been set
+ return unless %break_before_container_types;
+
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 0 );
+
+ my $rLL = $self->[_rLL_];
+
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+
+ # scan the ends of all lines
+ my @insert_list;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ next unless ( $ir > $il );
+ my $Kl = $K_to_go[$il];
+ my $Kr = $K_to_go[$ir];
+ my $Kend = $Kr;
+ my $type_end = $rLL->[$Kr]->[_TYPE_];
+
+ # Backup before any side comment
+ if ( $type_end eq '#' ) {
+ $Kend = $self->K_previous_nonblank($Kr);
+ next unless defined($Kend);
+ $type_end = $rLL->[$Kend]->[_TYPE_];
+ }
+
+ # Backup to the start of any weld; fix for b1173.
+ if ($total_weld_count) {
+ my $Kend_test = $rK_weld_left->{$Kend};
+ if ( defined($Kend_test) && $Kend_test > $Kl ) {
+ $Kend = $Kend_test;
+ $Kend_test = $rK_weld_left->{$Kend};
+ }
+
+ # Do not break if we did not back up to the start of a weld
+ # (shouldn't happen)
+ next if ( defined($Kend_test) );
+ }
+
+ my $token = $rLL->[$Kend]->[_TOKEN_];
+ next unless ( $is_opening_token{$token} );
+ next unless ( $Kl < $Kend - 1 );
+
+ my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
+ next unless ( defined($seqno) );
+
+ # Use the flag which was previously set
+ next unless ( $rbreak_before_container_by_seqno->{$seqno} );
+
+ # Install a break before this opening token.
+ my $Kbreak = $self->K_previous_nonblank($Kend);
+ my $ibreak = $Kbreak - $Kl + $il;
+ next if ( $ibreak < $il );
+ next if ( $nobreak_to_go[$ibreak] );
+ push @insert_list, $ibreak;
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
+
+sub note_added_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_added_semicolon_at_] = $line_number;
+ if ( $self->[_added_semicolon_count_] == 0 ) {
+ $self->[_first_added_semicolon_at_] = $line_number;
+ }
+ $self->[_added_semicolon_count_]++;
+ write_logfile_entry("Added ';' here\n");
+ return;
+}
+
+sub note_deleted_semicolon {
+ my ( $self, $line_number ) = @_;
+ $self->[_last_deleted_semicolon_at_] = $line_number;
+ if ( $self->[_deleted_semicolon_count_] == 0 ) {
+ $self->[_first_deleted_semicolon_at_] = $line_number;
+ }
+ $self->[_deleted_semicolon_count_]++;
+ write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
+ return;
+}
+
+sub note_embedded_tab {
+ my ( $self, $line_number ) = @_;
+ $self->[_embedded_tab_count_]++;
+ $self->[_last_embedded_tab_at_] = $line_number;
+ if ( !$self->[_first_embedded_tab_at_] ) {
+ $self->[_first_embedded_tab_at_] = $line_number;
+ }
+
+ if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
+ write_logfile_entry("Embedded tabs in quote or pattern\n");
+ }
+ return;
+}
+
+use constant DEBUG_CORRECT_LP => 0;
+
+sub correct_lp_indentation {
+
+ # When the -lp option is used, we need to make a last pass through
+ # each line to correct the indentation positions in case they differ
+ # from the predictions. This is necessary because perltidy uses a
+ # predictor/corrector method for aligning with opening parens. The
+ # predictor is usually good, but sometimes stumbles. The corrector
+ # tries to patch things up once the actual opening paren locations
+ # are known.
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $do_not_pad = 0;
+
+ # Note on flag '$do_not_pad':
+ # We want to avoid a situation like this, where the aligner inserts
+ # whitespace before the '=' to align it with a previous '=', because
+ # otherwise the parens might become mis-aligned in a situation like
+ # this, where the '=' has become aligned with the previous line,
+ # pushing the opening '(' forward beyond where we want it.
+ #
+ # $mkFloor::currentRoom = '';
+ # $mkFloor::c_entry = $c->Entry(
+ # -width => '10',
+ # -relief => 'sunken',
+ # ...
+ # );
+ #
+ # We leave it to the aligner to decide how to do this.
+
+ # first remove continuation indentation if appropriate
+ my $rLL = $self->[_rLL_];
+ my $max_line = @{$ri_first} - 1;
+
+ #---------------------------------------------------------------------------
+ # PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
+ #---------------------------------------------------------------------------
+
+ # The point is that sub 'starting_one_line_block' made one-line blocks based
+ # on default indentation, not -lp indentation. So some of the one-line
+ # blocks may be too long when given -lp indentation. We will fix that now
+ # if possible, using the list of these closing block indexes.
+ my $ri_starting_one_line_block =
+ $self->[_this_batch_]->[_ri_starting_one_line_block_];
+ if ( @{$ri_starting_one_line_block} ) {
+ my @ilist = @{$ri_starting_one_line_block};
+ my $inext = shift(@ilist);
+
+ # loop over lines, checking length of each with a one-line block
+ my ( $ibeg, $iend );
+ foreach my $line ( 0 .. $max_line ) {
+ $iend = $ri_last->[$line];
+ next if ( $inext > $iend );
+ $ibeg = $ri_first->[$line];
+
+ # This is just for lines with indentation objects (c098)
+ my $excess =
+ ref( $leading_spaces_to_go[$ibeg] )
+ ? $self->excess_line_length( $ibeg, $iend )
+ : 0;
+
+ if ( $excess > 0 ) {
+ my $available_spaces = $self->get_available_spaces_to_go($ibeg);
+
+ if ( $available_spaces > 0 ) {
+ my $delete_want = min( $available_spaces, $excess );
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $ibeg, $delete_want );
+ $available_spaces =
+ $self->get_available_spaces_to_go($ibeg);
+ }
+ }
+
+ # skip forward to next one-line block to check
+ while (@ilist) {
+ $inext = shift @ilist;
+ next if ( $inext <= $iend );
+ last if ( $inext > $iend );
+ }
+ last if ( $inext <= $iend );
+ }
+ }
+
+ #-------------------------------------------------------------------
+ # PASS 2: look for and fix other problems in each line of this batch
+ #-------------------------------------------------------------------
+
+ # look at each output line ...
+ my ( $ibeg, $iend );
+ foreach my $line ( 0 .. $max_line ) {
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
+
+ # looking at each token in this output line ...
+ foreach my $i ( $ibeg .. $iend ) {
+
+ # How many space characters to place before this token
+ # for special alignment. Actual padding is done in the
+ # continue block.
+
+ # looking for next unvisited indentation item ...
+ my $indentation = $leading_spaces_to_go[$i];
+
+ # This is just for indentation objects (c098)
+ next unless ( ref($indentation) );
+
+ # Visit each indentation object just once
+ next if ( $indentation->get_marked() );
+
+ # Mark first visit
+ $indentation->set_marked(1);
+
+ # Skip indentation objects which do not align with container tokens
+ my $align_seqno = $indentation->get_align_seqno();
+ next unless ($align_seqno);
+
+ # Skip a container which is entirely on this line
+ my $Ko = $K_opening_container->{$align_seqno};
+ my $Kc = $K_closing_container->{$align_seqno};
+ if ( defined($Ko) && defined($Kc) ) {
+ next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
+ }
+
+ if ( $line == 1 && $i == $ibeg ) {
+ $do_not_pad = 1;
+ }
+
+ #--------------------------------------------
+ # Now see what the error is and try to fix it
+ #--------------------------------------------
+ my $closing_index = $indentation->get_closed();
+ my $predicted_pos = $indentation->get_spaces();
+
+ # Find actual position:
+ my $actual_pos;
+
+ if ( $i == $ibeg ) {
+
+ # Case 1: token is first character of of batch - table lookup
+ if ( $line == 0 ) {
+
+ $actual_pos = $predicted_pos;
+
+ my ( $indent, $offset, $is_leading, $exists ) =
+ get_saved_opening_indentation($align_seqno);
+ if ( defined($indent) ) {
+
+ # FIXME: should use '1' here if no space after opening
+ # and '2' if want space; hardwired at 1 like -gnu-style
+ $actual_pos = get_spaces($indent) + $offset + 1;
+ }
+ }
+
+ # Case 2: token starts a new line - use length of previous line
+ else {
+
+ my $ibegm = $ri_first->[ $line - 1 ];
+ my $iendm = $ri_last->[ $line - 1 ];
+ $actual_pos = total_line_length( $ibegm, $iendm );
+
+ # follow -pt style
+ ++$actual_pos
+ if ( $types_to_go[ $iendm + 1 ] eq 'b' );
+
+ }
+ }
+
+ # Case 3: $i>$ibeg: token is mid-line - use length to previous token
+ else {
+
+ $actual_pos = total_line_length( $ibeg, $i - 1 );
+
+ # for mid-line token, we must check to see if all
+ # additional lines have continuation indentation,
+ # and remove it if so. Otherwise, we do not get
+ # good alignment.
+ if ( $closing_index > $iend ) {
+ my $ibeg_next = $ri_first->[ $line + 1 ];
+ if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
+ $self->undo_lp_ci( $line, $i, $closing_index,
+ $ri_first, $ri_last );
+ }
+ }
+ }
+
+ # By how many spaces (plus or minus) would we need to increase the
+ # indentation to get alignment with the opening token?
+ my $move_right = $actual_pos - $predicted_pos;
+
+ if (DEBUG_CORRECT_LP) {
+ my $tok = substr( $tokens_to_go[$i], 0, 8 );
+ my $avail = $self->get_available_spaces_to_go($ibeg);
+ print
+"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
+ }
+
+ # nothing more to do if no error to correct (gnu2.t)
+ if ( $move_right == 0 ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
+
+ # Get any collapsed length defined for -xlp
+ my $collapsed_length =
+ $self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
+ $collapsed_length = 0 unless ( defined($collapsed_length) );
+
+ if (DEBUG_CORRECT_LP) {
+ print
+"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
+ }
+
+ # if we have not seen closure for this indentation in this batch,
+ # and do not have a collapsed length estimate, we can only pass on
+ # a request to the vertical aligner
+ if ( $closing_index < 0 && !$collapsed_length ) {
+ $indentation->set_recoverable_spaces($move_right);
+ next;
+ }
+
+ # If necessary, look ahead to see if there is really any leading
+ # whitespace dependent on this whitespace, and also find the
+ # longest line using this whitespace. Since it is always safe to
+ # move left if there are no dependents, we only need to do this if
+ # we may have dependent nodes or need to move right.
+
+ my $have_child = $indentation->get_have_child();
+ my %saw_indentation;
+ my $line_count = 1;
+ $saw_indentation{$indentation} = $indentation;
+
+ # How far can we move right before we hit the limit?
+ # let $right_margen = the number of spaces that we can increase
+ # the current indentation before hitting the maximum line length.
+ my $right_margin = 0;
+
+ if ( $have_child || $move_right > 0 ) {
+ $have_child = 0;
+
+ # include estimated collapsed length for incomplete containers
+ my $max_length = 0;
+ if ( $Kc > $K_to_go[$max_index_to_go] ) {
+ $max_length = $collapsed_length + $predicted_pos;
+ }
+
+ if ( $i == $ibeg ) {
+ my $length = total_line_length( $ibeg, $iend );
+ if ( $length > $max_length ) { $max_length = $length }
+ }
+
+ # look ahead at the rest of the lines of this batch..
+ foreach my $line_t ( $line + 1 .. $max_line ) {
+ my $ibeg_t = $ri_first->[$line_t];
+ my $iend_t = $ri_last->[$line_t];
+ last if ( $closing_index <= $ibeg_t );
+
+ # remember all different indentation objects
+ my $indentation_t = $leading_spaces_to_go[$ibeg_t];
+ $saw_indentation{$indentation_t} = $indentation_t;
+ $line_count++;
+
+ # remember longest line in the group
+ my $length_t = total_line_length( $ibeg_t, $iend_t );
+ if ( $length_t > $max_length ) {
+ $max_length = $length_t;
+ }
+ }
+
+ $right_margin =
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
+ $max_length;
+ if ( $right_margin < 0 ) { $right_margin = 0 }
+ }
+
+ my $first_line_comma_count =
+ grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
+ my $comma_count = $indentation->get_comma_count();
+ my $arrow_count = $indentation->get_arrow_count();
+
+ # This is a simple approximate test for vertical alignment:
+ # if we broke just after an opening paren, brace, bracket,
+ # and there are 2 or more commas in the first line,
+ # and there are no '=>'s,
+ # then we are probably vertically aligned. We could set
+ # an exact flag in sub break_lists, but this is good
+ # enough.
+ my $indentation_count = keys %saw_indentation;
+ my $is_vertically_aligned =
+ ( $i == $ibeg
+ && $first_line_comma_count > 1
+ && $indentation_count == 1
+ && ( $arrow_count == 0 || $arrow_count == $line_count ) );
+
+ # Make the move if possible ..
+ if (
+
+ # we can always move left
+ $move_right < 0
+
+ # -xlp
+
+ # incomplete container
+ || ( $rOpts_extended_line_up_parentheses
+ && $Kc > $K_to_go[$max_index_to_go] )
+ || $closing_index < 0
+
+ # but we should only move right if we are sure it will
+ # not spoil vertical alignment
+ || ( $comma_count == 0 )
+ || ( $comma_count > 0 && !$is_vertically_aligned )
+ )
+ {
+ my $move =
+ ( $move_right <= $right_margin )
+ ? $move_right
+ : $right_margin;
+
+ if (DEBUG_CORRECT_LP) {
+ print
+ "CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
+ }
+
+ foreach ( keys %saw_indentation ) {
+ $saw_indentation{$_}
+ ->permanently_decrease_available_spaces( -$move );
+ }
+ }
+
+ # Otherwise, record what we want and the vertical aligner
+ # will try to recover it.
+ else {
+ $indentation->set_recoverable_spaces($move_right);
+ }
+ } ## end loop over tokens in a line
+ } ## end loop over lines
+ return $do_not_pad;
+}
+
+sub undo_lp_ci {
+
+ # If there is a single, long parameter within parens, like this:
+ #
+ # $self->command( "/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?" );
+ #
+ # we can remove the continuation indentation of the 2nd and higher lines
+ # to achieve this effect, which is more pleasing:
+ #
+ # $self->command("/msg "
+ # . $infoline->chan
+ # . " You said $1, but did you know that it's square was "
+ # . $1 * $1 . " ?");
+
+ my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
+ @_;
+ my $max_line = @{$ri_first} - 1;
+
+ # must be multiple lines
+ return unless $max_line > $line_open;
+
+ my $lev_start = $levels_to_go[$i_start];
+ my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
+
+ # see if all additional lines in this container have continuation
+ # indentation
+ my $n;
+ my $line_1 = 1 + $line_open;
+ for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+ if ( $ibeg eq $closing_index ) { $n--; last }
+ return if ( $lev_start != $levels_to_go[$ibeg] );
+ return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
+ last if ( $closing_index <= $iend );
+ }
+
+ # we can reduce the indentation of all continuation lines
+ my $continuation_line_count = $n - $line_open;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
+ return;
+}
+
+###############################################
+# CODE SECTION 10: Code to break long statments
+###############################################
+
+sub break_long_lines {
+
+ #-----------------------------------------------------------
+ # Break a batch of tokens into lines which do not exceed the
+ # maximum line length.
+ #-----------------------------------------------------------
+
+ # Define an array of indexes for inserting newline characters to
+ # keep the line lengths below the maximum desired length. There is
+ # an implied break after the last token, so it need not be included.
+
+ # Method:
+ # This routine is part of series of routines which adjust line
+ # lengths. It is only called if a statement is longer than the
+ # maximum line length, or if a preliminary scanning located
+ # desirable break points. Sub break_lists has already looked at
+ # these tokens and set breakpoints (in array
+ # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
+ # after commas, after opening parens, and before closing parens).
+ # This routine will honor these breakpoints and also add additional
+ # breakpoints as necessary to keep the line length below the maximum
+ # requested. It bases its decision on where the 'bond strength' is
+ # lowest.
+
+ # Output: returns references to the arrays:
+ # @i_first
+ # @i_last
+ # which contain the indexes $i of the first and last tokens on each
+ # line.
+
+ # In addition, the array:
+ # $forced_breakpoint_to_go[$i]
+ # may be updated to be =1 for any index $i after which there must be
+ # a break. This signals later routines not to undo the breakpoint.
+
+ my ( $self, $saw_good_break, $rcolon_list ) = @_;
+
+ # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in
+ # order.
+
+ use constant DEBUG_BREAK_LINES => 0;
+
+ my @i_first = (); # the first index to output
+ my @i_last = (); # the last index to output
+ my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
+ if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
+
+ $self->set_bond_strengths();
+
+ my $imin = 0;
+ my $imax = $max_index_to_go;
+ if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
+ if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
+ my $i_begin = $imin; # index for starting next iteration
+
+ my $leading_spaces = leading_spaces_to_go($imin);
+ my $line_count = 0;
+ my $last_break_strength = NO_BREAK;
+ my $i_last_break = -1;
+ my $max_bias = 0.001;
+ my $tiny_bias = 0.0001;
+ my $leading_alignment_token = "";
+ my $leading_alignment_type = "";
+
+ # see if any ?/:'s are in order
+ my $colons_in_order = 1;
+ my $last_tok = "";
+ foreach ( @{$rcolon_list} ) {
+ if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
+ $last_tok = $_;
+ }
+
+ # This is a sufficient but not necessary condition for colon chain
+ my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
+
+ my $Msg = "";
+
+ #-------------------------------------------------------
+ # BEGINNING of main loop to set continuation breakpoints
+ # Keep iterating until we reach the end
+ #-------------------------------------------------------
+ while ( $i_begin <= $imax ) {
+ my $lowest_strength = NO_BREAK;
+ my $starting_sum = $summed_lengths_to_go[$i_begin];
+ my $i_lowest = -1;
+ my $i_test = -1;
+ my $lowest_next_token = '';
+ my $lowest_next_type = 'b';
+ my $i_lowest_next_nonblank = -1;
+ my $maximum_line_length =
+ $maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
+
+ # Do not separate an isolated bare word from an opening paren.
+ # Alternate Fix #2 for issue b1299. This waits as long as possible
+ # to make the decision.
+ if ( $types_to_go[$i_begin] eq 'i'
+ && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
+ {
+ my $i_next_nonblank = $inext_to_go[$i_begin];
+ if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
+ $bond_strength_to_go[$i_begin] = NO_BREAK;
+ }
+ }
+
+ #-------------------------------------------------------
+ # BEGINNING of inner loop to find the best next breakpoint
+ #-------------------------------------------------------
+ my $strength = NO_BREAK;
+ for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
+ my $type = $types_to_go[$i_test];
+ my $token = $tokens_to_go[$i_test];
+ my $next_type = $types_to_go[ $i_test + 1 ];
+ my $next_token = $tokens_to_go[ $i_test + 1 ];
+ my $i_next_nonblank = $inext_to_go[$i_test];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+
+ # adjustments to the previous bond strength may have been made, and
+ # we must keep the bond strength of a token and its following blank
+ # the same;
+ my $last_strength = $strength;
+ $strength = $bond_strength_to_go[$i_test];
+ if ( $type eq 'b' ) { $strength = $last_strength }
+
+ # reduce strength a bit to break ties at an old comma breakpoint ...
+ if (
+
+ $old_breakpoint_to_go[$i_test]
+
+ # Patch: limited to just commas to avoid blinking states
+ && $type eq ','
+
+ # which is a 'good' breakpoint, meaning ...
+ # we don't want to break before it
+ && !$want_break_before{$type}
+
+ # and either we want to break before the next token
+ # or the next token is not short (i.e. not a '*', '/' etc.)
+ && $i_next_nonblank <= $imax
+ && ( $want_break_before{$next_nonblank_type}
+ || $token_lengths_to_go[$i_next_nonblank] > 2
+ || $next_nonblank_type eq ','
+ || $is_opening_type{$next_nonblank_type} )
+ )
+ {
+ $strength -= $tiny_bias;
+ DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
+ }
+
+ # otherwise increase strength a bit if this token would be at the
+ # maximum line length. This is necessary to avoid blinking
+ # in the above example when the -iob flag is added.
+ else {
+ my $len =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum;
+ if ( $len >= $maximum_line_length ) {
+ $strength += $tiny_bias;
+ DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
+ }
+ }
+
+ my $must_break = 0;
+
+ # Force an immediate break at certain operators
+ # with lower level than the start of the line,
+ # unless we've already seen a better break.
+ #
+ #------------------------------------
+ # Note on an issue with a preceding ?
+ #------------------------------------
+ # We don't include a ? in the above list, but there may
+ # be a break at a previous ? if the line is long.
+ # Because of this we do not want to force a break if
+ # there is a previous ? on this line. For now the best way
+ # to do this is to not break if we have seen a lower strength
+ # point, which is probably a ?.
+ #
+ # Example of unwanted breaks we are avoiding at a '.' following a ?
+ # from pod2html using perltidy -gnu:
+ # )
+ # ? "\n<A NAME=\""
+ # . $value
+ # . "\">\n$text</A>\n"
+ # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
+ if (
+ ( $strength <= $lowest_strength )
+ && ( $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_next_nonblank] )
+ && (
+ $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
+ || ( $next_nonblank_type eq 'k'
+ && $next_nonblank_token =~ /^(and|or)$/ )
+ )
+ )
+ {
+ $self->set_forced_breakpoint($i_next_nonblank);
+ DEBUG_BREAK_LINES
+ && do { $Msg .= " :Forced break at i=$i_next_nonblank" };
+ }
+
+ if (
+
+ # Try to put a break where requested by break_lists
+ $forced_breakpoint_to_go[$i_test]
+
+ # break between ) { in a continued line so that the '{' can
+ # be outdented
+ # See similar logic in break_lists which catches instances
+ # where a line is just something like ') {'. We have to
+ # be careful because the corresponding block keyword might
+ # not be on the first line, such as 'for' here:
+ #
+ # eval {
+ # for ("a") {
+ # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
+ # }
+ # };
+ #
+ || (
+ $line_count
+ && ( $token eq ')' )
+ && ( $next_nonblank_type eq '{' )
+ && ($next_nonblank_block_type)
+ && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
+
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceding list is long and broken
+ && !(
+
+ (
+ $next_nonblank_block_type =~ /$SUB_PATTERN/
+ || $next_nonblank_block_type =~ /$ASUB_PATTERN/
+ )
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
+
+ && !$rOpts_opening_brace_always_on_right
+ )
+
+ # There is an implied forced break at a terminal opening brace
+ || ( ( $type eq '{' ) && ( $i_test == $imax ) )
+ )
+ {
+
+ # Forced breakpoints must sometimes be overridden, for example
+ # because of a side comment causing a NO_BREAK. It is easier
+ # to catch this here than when they are set.
+ if ( $strength < NO_BREAK - 1 ) {
+ $strength = $lowest_strength - $tiny_bias;
+ $must_break = 1;
+ DEBUG_BREAK_LINES
+ && do { $Msg .= " :set must_break at i=$i_next_nonblank" };
+ }
+ }
+
+ # quit if a break here would put a good terminal token on
+ # the next line and we already have a possible break
+ if (
+ !$must_break
+ && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
+ $starting_sum
+ ) > $maximum_line_length
+ )
+ )
+ {
+ if ( $i_lowest >= 0 ) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :quit at good terminal='$next_nonblank_type'";
+ };
+ last;
+ }
+ }
+
+ # Avoid a break which would strand a single punctuation
+ # token. For example, we do not want to strand a leading
+ # '.' which is followed by a long quoted string.
+ # But note that we do want to do this with -extrude (l=1)
+ # so please test any changes to this code on -extrude.
+ if (
+ !$must_break
+ && ( $i_test == $i_begin )
+ && ( $i_test < $imax )
+ && ( $token eq $type )
+ && (
+ (
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum
+ ) < $maximum_line_length
+ )
+ )
+ {
+ $i_test = min( $imax, $inext_to_go[$i_test] );
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :redo at i=$i_test";
+ };
+ redo;
+ }
+
+ if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
+ {
+
+ # break at previous best break if it would have produced
+ # a leading alignment of certain common tokens, and it
+ # is different from the latest candidate break
+ if ($leading_alignment_type) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .=
+" :last at leading_alignment='$leading_alignment_type'";
+ };
+ last;
+ }
+
+ # Force at least one breakpoint if old code had good
+ # break It is only called if a breakpoint is required or
+ # desired. This will probably need some adjustments
+ # over time. A goal is to try to be sure that, if a new
+ # side comment is introduced into formatted text, then
+ # the same breakpoints will occur. scbreak.t
+ if (
+ $i_test == $imax # we are at the end
+ && !get_forced_breakpoint_count()
+ && $saw_good_break # old line had good break
+ && $type =~ /^[#;\{]$/ # and this line ends in
+ # ';' or side comment
+ && $i_last_break < 0 # and we haven't made a break
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $imax - 1 # (but not just before this ;)
+ && $strength - $lowest_strength < 0.5 * WEAK # and it's good
+ )
+ {
+
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :last at good old break\n";
+ };
+ last;
+ }
+
+ # Do not skip past an important break point in a short final
+ # segment. For example, without this check we would miss the
+ # break at the final / in the following code:
+ #
+ # $depth_stop =
+ # ( $tau * $mass_pellet * $q_0 *
+ # ( 1. - exp( -$t_stop / $tau ) ) -
+ # 4. * $pi * $factor * $k_ice *
+ # ( $t_melt - $t_ice ) *
+ # $r_pellet *
+ # $t_stop ) /
+ # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
+ #
+ if (
+ $line_count > 2
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $i_test
+ && $i_test > $imax - 2
+ && $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_lowest]
+ && $lowest_strength < $last_break_strength - .5 * WEAK
+ )
+ {
+ # Make this break for math operators for now
+ my $ir = $inext_to_go[$i_lowest];
+ my $il = $iprev_to_go[$ir];
+ if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
+ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
+ {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :last-noskip_short";
+ };
+ last;
+ }
+ }
+
+ # Update the minimum bond strength location
+ $lowest_strength = $strength;
+ $i_lowest = $i_test;
+ $lowest_next_token = $next_nonblank_token;
+ $lowest_next_type = $next_nonblank_type;
+ $i_lowest_next_nonblank = $i_next_nonblank;
+ if ($must_break) {
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :last-must_break";
+ };
+ last;
+ }
+
+ # set flags to remember if a break here will produce a
+ # leading alignment of certain common tokens
+ if ( $line_count > 0
+ && $i_test < $imax
+ && ( $lowest_strength - $last_break_strength <= $max_bias )
+ )
+ {
+ my $i_last_end = $iprev_to_go[$i_begin];
+ my $tok_beg = $tokens_to_go[$i_begin];
+ my $type_beg = $types_to_go[$i_begin];
+ if (
+
+ # check for leading alignment of certain tokens
+ (
+ $tok_beg eq $next_nonblank_token
+ && $is_chain_operator{$tok_beg}
+ && ( $type_beg eq 'k'
+ || $type_beg eq $tok_beg )
+ && $nesting_depth_to_go[$i_begin] >=
+ $nesting_depth_to_go[$i_next_nonblank]
+ )
+
+ || ( $tokens_to_go[$i_last_end] eq $token
+ && $is_chain_operator{$token}
+ && ( $type eq 'k' || $type eq $token )
+ && $nesting_depth_to_go[$i_last_end] >=
+ $nesting_depth_to_go[$i_test] )
+ )
+ {
+ $leading_alignment_token = $next_nonblank_token;
+ $leading_alignment_type = $next_nonblank_type;
+ }
+ }
+ }
+
+ my $too_long = ( $i_test >= $imax );
+ if ( !$too_long ) {
+ my $next_length =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 2 ] -
+ $starting_sum;
+ $too_long = $next_length > $maximum_line_length;
+
+ # To prevent blinkers we will avoid leaving a token exactly at
+ # the line length limit unless it is the last token or one of
+ # several "good" types.
+ #
+ # The following code was a blinker with -pbp before this
+ # modification:
+## $last_nonblank_token eq '('
+## && $is_indirect_object_taker{ $paren_type
+## [$paren_depth] }
+ # The issue causing the problem is that if the
+ # term [$paren_depth] gets broken across a line then
+ # the whitespace routine doesn't see both opening and closing
+ # brackets and will format like '[ $paren_depth ]'. This
+ # leads to an oscillation in length depending if we break
+ # before the closing bracket or not.
+ if ( !$too_long
+ && $i_test + 1 < $imax
+ && $next_nonblank_type ne ','
+ && !$is_closing_type{$next_nonblank_type} )
+ {
+ $too_long = $next_length >= $maximum_line_length;
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :too_long=$too_long" if ($too_long);
+ }
+ }
+ }
+
+ DEBUG_BREAK_LINES && do {
+ my $ltok = $token;
+ my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
+ my $i_testp2 = $i_test + 2;
+ if ( $i_testp2 > $max_index_to_go + 1 ) {
+ $i_testp2 = $max_index_to_go + 1;
+ }
+ if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
+ if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
+ print STDOUT
+"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
+ };
+
+ # allow one extra terminal token after exceeding line length
+ # if it would strand this token.
+ if ( $rOpts_fuzzy_line_length
+ && $too_long
+ && $i_lowest == $i_test
+ && $token_lengths_to_go[$i_test] > 1
+ && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
+ )
+ {
+ $too_long = 0;
+ DEBUG_BREAK_LINES && do {
+ $Msg .= " :do_not_strand next='$next_nonblank_type'";
+ };
+ }
+
+ # we are done if...
+ if (
+
+ # ... no more space and we have a break
+ $too_long && $i_lowest >= 0
+
+ # ... or no more tokens
+ || $i_test == $imax
+ )
+ {
+ DEBUG_BREAK_LINES && do {
+ $Msg .=
+" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax";
+ };
+ last;
+ }
+ }
+
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint
+ # Now decide exactly where to put the breakpoint
+ #-------------------------------------------------------
+
+ # it's always ok to break at imax if no other break was found
+ if ( $i_lowest < 0 ) { $i_lowest = $imax }
+
+ # semi-final index calculation
+ my $i_next_nonblank = $inext_to_go[$i_lowest];
+ my $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+
+ #-------------------------------------------------------
+ # ?/: rule 1 : if a break here will separate a '?' on this
+ # line from its closing ':', then break at the '?' instead.
+ #-------------------------------------------------------
+ foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
+ next unless ( $tokens_to_go[$i] eq '?' );
+
+ # do not break if probable sequence of ?/: statements
+ next if ($is_colon_chain);
+
+ # do not break if statement is broken by side comment
+ next
+ if ( $tokens_to_go[$max_index_to_go] eq '#'
+ && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ );
+
+ # no break needed if matching : is also on the line
+ next
+ if ( $mate_index_to_go[$i] >= 0
+ && $mate_index_to_go[$i] <= $i_next_nonblank );
+
+ $i_lowest = $i;
+ if ( $want_break_before{'?'} ) { $i_lowest-- }
+ last;
+ }
+
+ #-------------------------------------------------------
+ # END of inner loop to find the best next breakpoint:
+ # Break the line after the token with index i=$i_lowest
+ #-------------------------------------------------------
+
+ # final index calculation
+ $i_next_nonblank = $inext_to_go[$i_lowest];
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+
+ DEBUG_BREAK_LINES
+ && print STDOUT
+"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
+ $Msg = "";
+
+ #-------------------------------------------------------
+ # ?/: rule 2 : if we break at a '?', then break at its ':'
+ #
+ # Note: this rule is also in sub break_lists to handle a break
+ # at the start and end of a line (in case breaks are dictated
+ # by side comments).
+ #-------------------------------------------------------
+ if ( $next_nonblank_type eq '?' ) {
+ $self->set_closing_breakpoint($i_next_nonblank);
+ }
+ elsif ( $types_to_go[$i_lowest] eq '?' ) {
+ $self->set_closing_breakpoint($i_lowest);
+ }
+
+ #-------------------------------------------------------
+ # ?/: rule 3 : if we break at a ':' then we save
+ # its location for further work below. We may need to go
+ # back and break at its '?'.
+ #-------------------------------------------------------
+ if ( $next_nonblank_type eq ':' ) {
+ push @i_colon_breaks, $i_next_nonblank;
+ }
+ elsif ( $types_to_go[$i_lowest] eq ':' ) {
+ push @i_colon_breaks, $i_lowest;
+ }
+
+ # here we should set breaks for all '?'/':' pairs which are
+ # separated by this line
+
+ $line_count++;
+
+ # save this line segment, after trimming blanks at the ends
+ push( @i_first,
+ ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
+ push( @i_last,
+ ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
+
+ # set a forced breakpoint at a container opening, if necessary, to
+ # signal a break at a closing container. Excepting '(' for now.
+ if (
+ (
+ $tokens_to_go[$i_lowest] eq '{'
+ || $tokens_to_go[$i_lowest] eq '['
+ )
+ && !$forced_breakpoint_to_go[$i_lowest]
+ )
+ {
+ $self->set_closing_breakpoint($i_lowest);
+ }
+
+ # get ready to go again
+ $i_begin = $i_lowest + 1;
+ $last_break_strength = $lowest_strength;
+ $i_last_break = $i_lowest;
+ $leading_alignment_token = "";
+ $leading_alignment_type = "";
+ $lowest_next_token = '';
+ $lowest_next_type = 'b';
+
+ if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
+ $i_begin++;
+ }
+
+ # update indentation size
+ if ( $i_begin <= $imax ) {
+ $leading_spaces = leading_spaces_to_go($i_begin);
+ DEBUG_BREAK_LINES
+ && print STDOUT
+ "updating leading spaces to be $leading_spaces at i=$i_begin\n";
+ }
+ }
+
+ #-------------------------------------------------------
+ # END of main loop to set continuation breakpoints
+ # Now go back and make any necessary corrections
+ #-------------------------------------------------------
+
+ #-------------------------------------------------------
+ # ?/: rule 4 -- if we broke at a ':', then break at
+ # corresponding '?' unless this is a chain of ?: expressions
+ #-------------------------------------------------------
+ if (@i_colon_breaks) {
+
+ # using a simple method for deciding if we are in a ?/: chain --
+ # this is a chain if it has multiple ?/: pairs all in order;
+ # otherwise not.
+ # Note that if line starts in a ':' we count that above as a break
+ my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+
+ unless ($is_chain) {
+ my @insert_list = ();
+ foreach (@i_colon_breaks) {
+ my $i_question = $mate_index_to_go[$_];
+ if ( $i_question >= 0 ) {
+ if ( $want_break_before{'?'} ) {
+ $i_question = $iprev_to_go[$i_question];
+ }
+
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
+ $self->insert_additional_breaks( \@insert_list, \@i_first,
+ \@i_last );
+ }
+ }
+ }
+ return ( \@i_first, \@i_last );
+}
+
+###########################################
+# CODE SECTION 11: Code to break long lists
+###########################################
+
+{ ## begin closure break_lists
+
+ # These routines and variables are involved in finding good
+ # places to break long lists.
+
+ use constant DEBUG_BREAK_LISTS => 0;
+
+ my (
+ $block_type, $current_depth,
+ $depth, $i,
+ $i_last_nonblank_token, $last_nonblank_token,
+ $last_nonblank_type, $last_nonblank_block_type,
+ $last_old_breakpoint_count, $minimum_depth,
+ $next_nonblank_block_type, $next_nonblank_token,
+ $next_nonblank_type, $old_breakpoint_count,
+ $starting_breakpoint_count, $starting_depth,
+ $token, $type,
+ $type_sequence,
+ );
+
+ my (
+ @breakpoint_stack, @breakpoint_undo_stack,
+ @comma_index, @container_type,
+ @identifier_count_stack, @index_before_arrow,
+ @interrupted_list, @item_count_stack,
+ @last_comma_index, @last_dot_index,
+ @last_nonblank_type, @old_breakpoint_count_stack,
+ @opening_structure_index_stack, @rfor_semicolon_list,
+ @has_old_logical_breakpoints, @rand_or_list,
+ @i_equals, @override_cab3,
+ @type_sequence_stack,
+ );
+
+ # these arrays must retain values between calls
+ my ( @has_broken_sublist, @dont_align, @want_comma_break );
+
+ my $length_tol;
+ my $lp_tol_boost;
+ my $list_stress_level;
+
+ sub initialize_break_lists {
+ @dont_align = ();
+ @has_broken_sublist = ();
+ @want_comma_break = ();
+
+ #---------------------------------------------------
+ # Set tolerances to prevent formatting instabilities
+ #---------------------------------------------------
+
+ # Define tolerances to use when checking if closed
+ # containers will fit on one line. This is necessary to avoid
+ # formatting instability. The basic tolerance is based on the
+ # following:
+
+ # - Always allow for at least one extra space after a closing token so
+ # that we do not strand a comma or semicolon. (oneline.t).
+
+ # - Use an increased line length tolerance when -ci > -i to avoid
+ # blinking states (case b923 and others).
+ $length_tol =
+ 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
+
+ # In addition, it may be necessary to use a few extra tolerance spaces
+ # when -lp is used and/or when -xci is used. The history of this
+ # so far is as follows:
+
+ # FIX1: At least 3 characters were been found to be required for -lp
+ # to fixes cases b1059 b1063 b1117.
+
+ # FIX2: Further testing showed that we need a total of 3 extra spaces
+ # when -lp is set for non-lists, and at least 2 spaces when -lp and
+ # -xci are set.
+ # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
+ # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
+ # b1165
+
+ # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
+ # 'find_token_starting_list' to go back before an initial blank space.
+ # This fixed these three cases, and allowed the tolerances to be
+ # reduced to continue to fix all other known cases of instability.
+ # This gives the current tolerance formulation.
+
+ $lp_tol_boost = 0;
+
+ if ($rOpts_line_up_parentheses) {
+
+ # boost tol for combination -lp -xci
+ if ($rOpts_extended_continuation_indentation) {
+ $lp_tol_boost = 2;
+ }
+
+ # boost tol for combination -lp and any -vtc > 0, but only for
+ # non-list containers
+ else {
+ foreach ( keys %closing_vertical_tightness ) {
+ next
+ unless ( $closing_vertical_tightness{$_} );
+ $lp_tol_boost = 1; # Fixes B1193;
+ last;
+ }
+ }
+ }
+
+ # Define a level where list formatting becomes highly stressed and
+ # needs to be simplified. Introduced for case b1262.
+ $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
+
+ return;
+ }
+
+ # routine to define essential variables when we go 'up' to
+ # a new depth
+ sub check_for_new_minimum_depth {
+ my $depth = shift;
+ if ( $depth < $minimum_depth ) {
+
+ $minimum_depth = $depth;
+
+ # these arrays need not retain values between calls
+ $breakpoint_stack[$depth] = $starting_breakpoint_count;
+ $container_type[$depth] = "";
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 1;
+ $item_count_stack[$depth] = 0;
+ $last_nonblank_type[$depth] = "";
+ $opening_structure_index_stack[$depth] = -1;
+
+ $breakpoint_undo_stack[$depth] = undef;
+ $comma_index[$depth] = undef;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $old_breakpoint_count_stack[$depth] = undef;
+ $has_old_logical_breakpoints[$depth] = 0;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
+
+ # these arrays must retain values between calls
+ if ( !defined( $has_broken_sublist[$depth] ) ) {
+ $dont_align[$depth] = 0;
+ $has_broken_sublist[$depth] = 0;
+ $want_comma_break[$depth] = 0;
+ }
+ }
+ return;
+ }
+
+ # routine to decide which commas to break at within a container;
+ # returns:
+ # $bp_count = number of comma breakpoints set
+ # $do_not_break_apart = a flag indicating if container need not
+ # be broken open
+ sub set_comma_breakpoints {
+
+ my ( $self, $dd ) = @_;
+ my $bp_count = 0;
+ my $do_not_break_apart = 0;
+
+ # Do not break a list unless there are some non-line-ending commas.
+ # This avoids getting different results with only non-essential commas,
+ # and fixes b1192.
+ my $seqno = $type_sequence_stack[$dd];
+ my $real_comma_count =
+ $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
+
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
+
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ $self->do_uncontained_comma_breaks($dd);
+ }
+
+ # handle commas within containers...
+ elsif ($real_comma_count) {
+ my $fbc = get_forced_breakpoint_count();
+
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+
+ $self->set_comma_breakpoints_do(
+ {
+ depth => $dd,
+ i_opening_paren => $opening_structure_index_stack[$dd],
+ i_closing_paren => $i,
+ item_count => $item_count_stack[$dd],
+ identifier_count => $identifier_count_stack[$dd],
+ rcomma_index => $comma_index[$dd],
+ next_nonblank_type => $next_nonblank_type,
+ list_type => $container_type[$dd],
+ interrupted => $interrupted_list[$dd],
+ rdo_not_break_apart => \$do_not_break_apart,
+ must_break_open => $must_break_open,
+ has_broken_sublist => $has_broken_sublist[$dd],
+ }
+ );
+ $bp_count = get_forced_breakpoint_count() - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
+ }
+ return ( $bp_count, $do_not_break_apart );
+ }
+
+ # These types are excluded at breakpoints to prevent blinking
+ # Switched from excluded to included as part of fix for b1214
+ ##my %is_uncontained_comma_break_excluded_type;
+ my %is_uncontained_comma_break_included_type;
+
+ BEGIN {
+ ##my @q = qw< L { ( [ ? : + - =~ >;
+ ##@is_uncontained_comma_break_excluded_type{@q} = (1) x scalar(@q);
+
+ my @q = qw< k R } ) ] Y Z U w i q Q .
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>;
+ @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
+ }
+
+ sub do_uncontained_comma_breaks {
+
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my ( $self, $dd ) = @_;
+ my $bias = -.01;
+ my $old_comma_break_count = 0;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $old_comma_break_count++;
+ $bond_strength_to_go[$ii] = $bias;
+
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
+
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) there was exactly one old break before the first comma break
+ # (3) OLD: there are multiple old comma breaks
+ # (3) NEW: there are one or more old comma breaks (see return example)
+ # (4) the first comma is at the starting level ...
+ # ... fixes cases b064 b065 b068 b210 b747
+ # (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
+ # ... fixes b1220. If ci>0 we are in the middle of a snippet,
+ # maybe because -boc has been forcing out previous lines.
+
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ #
+ # Another example, just one comma, where we will break after
+ # the return:
+ # return
+ # $x * cos($a) - $y * sin($a),
+ # $x * sin($a) + $y * cos($a);
+
+ # Breaking a print statement:
+ # print SAVEOUT
+ # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
+ # ( $? & 128 ) ? " -- core dumped" : "", "\n";
+ #
+ # But we will not force a break after the opening paren here
+ # (causes a blinker):
+ # $heap->{stream}->set_output_filter(
+ # poe::filter::reference->new('myotherfreezer') ),
+ # ;
+ #
+ my $i_first_comma = $comma_index[$dd]->[0];
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ci_start = $ci_levels_to_go[0];
+
+ # Here we want to use the value of ci before any -xci adjustment
+ if ( $ci_start && $rOpts_extended_continuation_indentation ) {
+ my $K0 = $K_to_go[0];
+ if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
+ }
+ if ( !$ci_start
+ && $old_breakpoint_to_go[$i_first_comma]
+ && $level_comma == $levels_to_go[0] )
+ {
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
+
+ # Changed rule from multiple old commas to just one here:
+ if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
+ {
+ my $ibreak_m = $ibreak;
+ $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
+ if ( $ibreak_m >= 0 ) {
+
+ # In order to avoid blinkers we have to be fairly
+ # restrictive:
+
+ # OLD Rules:
+ # Rule 1: Do not to break before an opening token
+ # Rule 2: avoid breaking at ternary operators
+ # (see b931, which is similar to the above print example)
+ # Rule 3: Do not break at chain operators to fix case b1119
+ # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
+
+ # NEW Rule, replaced above rules after case b1214:
+ # only break at one of the included types
+
+ # Be sure to test any changes to these rules against runs
+ # with -l=0 such as the 'bbvt' test (perltidyrc_colin)
+ # series.
+ my $type_m = $types_to_go[$ibreak_m];
+
+ # Switched from excluded to included for b1214. If necessary
+ # the token could also be checked if type_m eq 'k'
+ ##if ( !$is_uncontained_comma_break_excluded_type{$type_m} ) {
+ ##my $token_m = $tokens_to_go[$ibreak_m];
+ if ( $is_uncontained_comma_break_included_type{$type_m} ) {
+ $self->set_forced_breakpoint($ibreak);
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ my %is_logical_container;
+ my %quick_filter;
+
+ BEGIN {
+ my @q = qw# if elsif unless while and or err not && | || ? : ! #;
+ @is_logical_container{@q} = (1) x scalar(@q);
+
+ # This filter will allow most tokens to skip past a section of code
+ %quick_filter = %is_assignment;
+ @q = qw# => . ; < > ~ #;
+ push @q, ',';
+ @quick_filter{@q} = (1) x scalar(@q);
+ }
+
+ sub set_for_semicolon_breakpoints {
+ my ( $self, $dd ) = @_;
+ foreach ( @{ $rfor_semicolon_list[$dd] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ return;
+ }
+
+ sub set_logical_breakpoints {
+ my ( $self, $dd ) = @_;
+ if (
+ $item_count_stack[$dd] == 0
+ && $is_logical_container{ $container_type[$dd] }
+
+ || $has_old_logical_breakpoints[$dd]
+ )
+ {
+
+ # Look for breaks in this order:
+ # 0 1 2 3
+ # or and || &&
+ foreach my $i ( 0 .. 3 ) {
+ if ( $rand_or_list[$dd][$i] ) {
+ foreach ( @{ $rand_or_list[$dd][$i] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+
+ # break at any 'if' and 'unless' too
+ foreach ( @{ $rand_or_list[$dd][4] } ) {
+ $self->set_forced_breakpoint($_);
+ }
+ $rand_or_list[$dd] = [];
+ last;
+ }
+ }
+ }
+ return;
+ }
+
+ sub is_unbreakable_container {
+
+ # never break a container of one of these types
+ # because bad things can happen (map1.t)
+ my $dd = shift;
+ return $is_sort_map_grep{ $container_type[$dd] };
+ }
+
+ sub break_lists {
+
+ my ( $self, $is_long_line ) = @_;
+
+ #----------------------------------------------------------------------
+ # This routine is called once per batch, if the batch is a list, to set
+ # line breaks so that hierarchical structure can be displayed and so
+ # that list items can be vertically aligned. The output of this
+ # routine is stored in the array @forced_breakpoint_to_go, which is
+ # used by sub 'break_long_lines' to set final breakpoints.
+ #----------------------------------------------------------------------
+
+ my $rLL = $self->[_rLL_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
+
+ $starting_depth = $nesting_depth_to_go[0];
+
+ $block_type = ' ';
+ $current_depth = $starting_depth;
+ $i = -1;
+ $last_nonblank_token = ';';
+ $last_nonblank_type = ';';
+ $last_nonblank_block_type = ' ';
+ $last_old_breakpoint_count = 0;
+ $minimum_depth = $current_depth + 1; # forces update in check below
+ $old_breakpoint_count = 0;
+ $starting_breakpoint_count = get_forced_breakpoint_count();
+ $token = ';';
+ $type = ';';
+ $type_sequence = '';
+
+ my $total_depth_variation = 0;
+ my $i_old_assignment_break;
+ my $depth_last = $starting_depth;
+ my $comma_follows_last_closing_token;
+
+ check_for_new_minimum_depth($current_depth);
+
+ my $want_previous_breakpoint = -1;
+
+ my $saw_good_breakpoint;
+ my $i_line_end = -1;
+ my $i_line_start = -1;
+ my $i_last_colon = -1;
+
+ #----------------------------------------
+ # Main loop over all tokens in this batch
+ #----------------------------------------
+ while ( ++$i <= $max_index_to_go ) {
+ if ( $type ne 'b' ) {
+ $i_last_nonblank_token = $i - 1;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+ $last_nonblank_block_type = $block_type;
+ } ## end if ( $type ne 'b' )
+ $type = $types_to_go[$i];
+ $block_type = $block_type_to_go[$i];
+ $token = $tokens_to_go[$i];
+ $type_sequence = $type_sequence_to_go[$i];
+ my $next_type = $types_to_go[ $i + 1 ];
+ my $next_token = $tokens_to_go[ $i + 1 ];
+ my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
+ $next_nonblank_type = $types_to_go[$i_next_nonblank];
+ $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
+
+ # set break if flag was set
+ if ( $want_previous_breakpoint >= 0 ) {
+ $self->set_forced_breakpoint($want_previous_breakpoint);
+ $want_previous_breakpoint = -1;
+ }
+
+ $last_old_breakpoint_count = $old_breakpoint_count;
+
+ # Fixed for case b1097 to not consider old breaks at highly
+ # stressed locations, such as types 'L' and 'R'. It might be
+ # useful to generalize this concept in the future by looking at
+ # actual bond strengths.
+ if ( $old_breakpoint_to_go[$i]
+ && $type ne 'L'
+ && $next_nonblank_type ne 'R' )
+ {
+ $i_line_end = $i;
+ $i_line_start = $i_next_nonblank;
+
+ $old_breakpoint_count++;
+
+ # Break before certain keywords if user broke there and
+ # this is a 'safe' break point. The idea is to retain
+ # any preferred breaks for sequential list operations,
+ # like a schwartzian transform.
+ if ($rOpts_break_at_old_keyword_breakpoints) {
+ if (
+ $next_nonblank_type eq 'k'
+ && $is_keyword_returning_list{$next_nonblank_token}
+ && ( $type =~ /^[=\)\]\}Riw]$/
+ || $type eq 'k'
+ && $is_keyword_returning_list{$token} )
+ )
+ {
+
+ # we actually have to set this break next time through
+ # the loop because if we are at a closing token (such
+ # as '}') which forms a one-line block, this break might
+ # get undone.
+
+ # And do not do this at an equals if the user wants
+ # breaks before an equals (blinker cases b434 b903)
+ unless ( $type eq '=' && $want_break_before{$type} ) {
+ $want_previous_breakpoint = $i;
+ }
+ } ## end if ( $next_nonblank_type...)
+ } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+
+ # Break before attributes if user broke there
+ if ($rOpts_break_at_old_attribute_breakpoints) {
+ if ( $next_nonblank_type eq 'A' ) {
+ $want_previous_breakpoint = $i;
+ }
+ }
+
+ # remember an = break as possible good break point
+ if ( $is_assignment{$type} ) {
+ $i_old_assignment_break = $i;
+ }
+ elsif ( $is_assignment{$next_nonblank_type} ) {
+ $i_old_assignment_break = $i_next_nonblank;
+ }
+ } ## end if ( $old_breakpoint_to_go...)
+
+ next if ( $type eq 'b' );
+ $depth = $nesting_depth_to_go[ $i + 1 ];
+
+ $total_depth_variation += abs( $depth - $depth_last );
+ $depth_last = $depth;
+
+ # safety check - be sure we always break after a comment
+ # Shouldn't happen .. an error here probably means that the
+ # nobreak flag did not get turned off correctly during
+ # formatting.
+ if ( $type eq '#' ) {
+ if ( $i != $max_index_to_go ) {
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+Non-fatal program bug: backup logic required to break after a comment
+EOM
+ }
+ $nobreak_to_go[$i] = 0;
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $i != $max_index_to_go)
+ } ## end if ( $type eq '#' )
+
+ # Force breakpoints at certain tokens in long lines.
+ # Note that such breakpoints will be undone later if these tokens
+ # are fully contained within parens on a line.
+ if (
+
+ # break before a keyword within a line
+ $type eq 'k'
+ && $i > 0
+
+ # if one of these keywords:
+ && $is_if_unless_while_until_for_foreach{$token}
+
+ # but do not break at something like '1 while'
+ && ( $last_nonblank_type ne 'n' || $i > 2 )
+
+ # and let keywords follow a closing 'do' brace
+ && $last_nonblank_block_type ne 'do'
+
+ && (
+ $is_long_line
+
+ # or container is broken (by side-comment, etc)
+ || ( $next_nonblank_token eq '('
+ && $mate_index_to_go[$i_next_nonblank] < $i )
+ )
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ } ## end if ( $type eq 'k' && $i...)
+
+ # remember locations of '||' and '&&' for possible breaks if we
+ # decide this is a long logical expression.
+ if ( $type eq '||' ) {
+ push @{ $rand_or_list[$depth][2] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end elsif ( $type eq '||' )
+ elsif ( $type eq '&&' ) {
+ push @{ $rand_or_list[$depth][3] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end elsif ( $type eq '&&' )
+ elsif ( $type eq 'f' ) {
+ push @{ $rfor_semicolon_list[$depth] }, $i;
+ }
+ elsif ( $type eq 'k' ) {
+ if ( $token eq 'and' ) {
+ push @{ $rand_or_list[$depth][1] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ } ## end if ( $token eq 'and' )
+
+ # break immediately at 'or's which are probably not in a logical
+ # block -- but we will break in logical breaks below so that
+ # they do not add to the forced_breakpoint_count
+ elsif ( $token eq 'or' ) {
+ push @{ $rand_or_list[$depth][0] }, $i;
+ ++$has_old_logical_breakpoints[$depth]
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints );
+ if ( $is_logical_container{ $container_type[$depth] } ) {
+ }
+ else {
+ if ($is_long_line) { $self->set_forced_breakpoint($i) }
+ elsif ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $saw_good_breakpoint = 1;
+ }
+ } ## end else [ if ( $is_logical_container...)]
+ } ## end elsif ( $token eq 'or' )
+ elsif ( $token eq 'if' || $token eq 'unless' ) {
+ push @{ $rand_or_list[$depth][4] }, $i;
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_logical_breakpoints )
+ {
+ $self->set_forced_breakpoint($i);
+ }
+ } ## end elsif ( $token eq 'if' ||...)
+ } ## end elsif ( $type eq 'k' )
+ elsif ( $is_assignment{$type} ) {
+ $i_equals[$depth] = $i;
+ }
+
+ if ($type_sequence) {
+
+ # handle any postponed closing breakpoints
+ if ( $is_closing_sequence_token{$token} ) {
+ if ( $type eq ':' ) {
+ $i_last_colon = $i;
+
+ # retain break at a ':' line break
+ if ( ( $i == $i_line_start || $i == $i_line_end )
+ && $rOpts_break_at_old_ternary_breakpoints
+ && $levels_to_go[$i] < $list_stress_level )
+ {
+
+ $self->set_forced_breakpoint($i);
+
+ # Break at a previous '=', but only if it is before
+ # the mating '?'. Mate_index test fixes b1287.
+ my $ieq = $i_equals[$depth];
+ if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) {
+ $self->set_forced_breakpoint(
+ $i_equals[$depth] );
+ $i_equals[$depth] = -1;
+ }
+ } ## end if ( ( $i == $i_line_start...))
+ } ## end if ( $type eq ':' )
+ if ( has_postponed_breakpoint($type_sequence) ) {
+ my $inc = ( $type eq ':' ) ? 0 : 1;
+ if ( $i >= $inc ) {
+ $self->set_forced_breakpoint( $i - $inc );
+ }
+ }
+ } ## end if ( $is_closing_sequence_token{$token} )
+
+ # set breaks at ?/: if they will get separated (and are
+ # not a ?/: chain), or if the '?' is at the end of the
+ # line
+ elsif ( $token eq '?' ) {
+ my $i_colon = $mate_index_to_go[$i];
+ if (
+ $i_colon <= 0 # the ':' is not in this batch
+ || $i == 0 # this '?' is the first token of the line
+ || $i ==
+ $max_index_to_go # or this '?' is the last token
+ )
+ {
+
+ # don't break if # this has a side comment, and
+ # don't break at a '?' if preceded by ':' on
+ # this line of previous ?/: pair on this line.
+ # This is an attempt to preserve a chain of ?/:
+ # expressions (elsif2.t).
+ if (
+ (
+ $i_last_colon < 0
+ || $parent_seqno_to_go[$i_last_colon] !=
+ $parent_seqno_to_go[$i]
+ )
+ && $tokens_to_go[$max_index_to_go] ne '#'
+ )
+ {
+ $self->set_forced_breakpoint($i);
+ }
+ $self->set_closing_breakpoint($i);
+ } ## end if ( $i_colon <= 0 ||...)
+ } ## end elsif ( $token eq '?' )
+
+ elsif ( $is_opening_token{$token} ) {
+
+ # do requeste -lp breaks at the OPENING token for BROKEN
+ # blocks. NOTE: this can be done for both -lp and -xlp,
+ # but only -xlp can really take advantage of this. So this
+ # is currently restricted to -xlp to avoid excess changes to
+ # existing -lp formatting.
+ if ( $rOpts_extended_line_up_parentheses
+ && $mate_index_to_go[$i] < 0 )
+ {
+ my $lp_object =
+ $self->[_rlp_object_by_seqno_]->{$type_sequence};
+ if ($lp_object) {
+ my $K_begin_line = $lp_object->get_K_begin_line();
+ my $i_begin_line = $K_begin_line - $K_to_go[0];
+ $self->set_forced_lp_break( $i_begin_line, $i );
+ }
+ }
+ }
+
+ } ## end if ($type_sequence)
+
+#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
+
+ #------------------------------------------------------------
+ # Handle Increasing Depth..
+ #
+ # prepare for a new list when depth increases
+ # token $i is a '(','{', or '['
+ #------------------------------------------------------------
+ # hardened against bad input syntax: depth jump must be 1 and type
+ # must be opening..fixes c102
+ if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
+
+ $type_sequence_stack[$depth] = $type_sequence;
+ $override_cab3[$depth] =
+ $rOpts_comma_arrow_breakpoints == 3
+ && $type_sequence
+ && $self->[_roverride_cab3_]->{$type_sequence};
+ $breakpoint_stack[$depth] = get_forced_breakpoint_count();
+ $breakpoint_undo_stack[$depth] =
+ get_forced_breakpoint_undo_count();
+ $has_broken_sublist[$depth] = 0;
+ $identifier_count_stack[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ $interrupted_list[$depth] = 0;
+ $item_count_stack[$depth] = 0;
+ $last_comma_index[$depth] = undef;
+ $last_dot_index[$depth] = undef;
+ $last_nonblank_type[$depth] = $last_nonblank_type;
+ $old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
+ $opening_structure_index_stack[$depth] = $i;
+ $rand_or_list[$depth] = [];
+ $rfor_semicolon_list[$depth] = [];
+ $i_equals[$depth] = -1;
+ $want_comma_break[$depth] = 0;
+ $container_type[$depth] =
+
+ # k => && || ? : .
+ $is_container_label_type{$last_nonblank_type}
+ ? $last_nonblank_token
+ : "";
+ $has_old_logical_breakpoints[$depth] = 0;
+
+ # if line ends here then signal closing token to break
+ if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
+ {
+ $self->set_closing_breakpoint($i);
+ }
+
+ # Not all lists of values should be vertically aligned..
+ $dont_align[$depth] =
+
+ # code BLOCKS are handled at a higher level
+ ( $block_type ne "" )
+
+ # certain paren lists
+ || ( $type eq '(' ) && (
+
+ # it does not usually look good to align a list of
+ # identifiers in a parameter list, as in:
+ # my($var1, $var2, ...)
+ # (This test should probably be refined, for now I'm just
+ # testing for any keyword)
+ ( $last_nonblank_type eq 'k' )
+
+ # a trailing '(' usually indicates a non-list
+ || ( $next_nonblank_type eq '(' )
+ );
+
+ # patch to outdent opening brace of long if/for/..
+ # statements (like this one). See similar coding in
+ # set_continuation breaks. We have also catch it here for
+ # short line fragments which otherwise will not go through
+ # break_long_lines.
+ if (
+ $block_type
+
+ # if we have the ')' but not its '(' in this batch..
+ && ( $last_nonblank_token eq ')' )
+ && $mate_index_to_go[$i_last_nonblank_token] < 0
+
+ # and user wants brace to left
+ && !$rOpts_opening_brace_always_on_right
+
+ && ( $type eq '{' ) # should be true
+ && ( $token eq '{' ) # should be true
+ )
+ {
+ $self->set_forced_breakpoint( $i - 1 );
+ } ## end if ( $block_type && ( ...))
+ } ## end if ( $depth > $current_depth)
+
+ #------------------------------------------------------------
+ # Handle Decreasing Depth..
+ #
+ # finish off any old list when depth decreases
+ # token $i is a ')','}', or ']'
+ #------------------------------------------------------------
+ # hardened against bad input syntax: depth jump must be 1 and type
+ # must be closing .. fixes c102
+ elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) {
+
+ check_for_new_minimum_depth($depth);
+
+ $comma_follows_last_closing_token =
+ $next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
+
+ # force all outer logical containers to break after we see on
+ # old breakpoint
+ $has_old_logical_breakpoints[$depth] ||=
+ $has_old_logical_breakpoints[$current_depth];
+
+ # Patch to break between ') {' if the paren list is broken.
+ # There is similar logic in break_long_lines for
+ # non-broken lists.
+ if ( $token eq ')'
+ && $next_nonblank_block_type
+ && $interrupted_list[$current_depth]
+ && $next_nonblank_type eq '{'
+ && !$rOpts_opening_brace_always_on_right )
+ {
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $token eq ')' && ...
+
+#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
+
+ # set breaks at commas if necessary
+ my ( $bp_count, $do_not_break_apart ) =
+ $self->set_comma_breakpoints($current_depth);
+
+ my $i_opening = $opening_structure_index_stack[$current_depth];
+ my $saw_opening_structure = ( $i_opening >= 0 );
+ my $lp_object;
+ if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
+ $lp_object = $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$i_opening] };
+ }
+
+ # this term is long if we had to break at interior commas..
+ my $is_long_term = $bp_count > 0;
+
+ # If this is a short container with one or more comma arrows,
+ # then we will mark it as a long term to open it if requested.
+ # $rOpts_comma_arrow_breakpoints =
+ # 0 - open only if comma precedes closing brace
+ # 1 - stable: except for one line blocks
+ # 2 - try to form 1 line blocks
+ # 3 - ignore =>
+ # 4 - always open up if vt=0
+ # 5 - stable: even for one line blocks if vt=0
+
+ # PATCH: Modify the -cab flag if we are not processing a list:
+ # We only want the -cab flag to apply to list containers, so
+ # for non-lists we use the default and stable -cab=5 value.
+ # Fixes case b939a.
+ my $cab_flag = $rOpts_comma_arrow_breakpoints;
+ if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) {
+ $cab_flag = 5;
+ }
+
+ # Ignore old breakpoints when under stress.
+ # Fixes b1203 b1204 as well as b1197-b1200.
+ # But not if -lp: fixes b1264, b1265. NOTE: rechecked with
+ # b1264 to see if this check is still required at all, and
+ # these still require a check, but at higher level beta+3
+ # instead of beta: b1193 b780
+ if ( $saw_opening_structure
+ && !$lp_object
+ && $levels_to_go[$i_opening] >= $list_stress_level )
+ {
+ $cab_flag = 2;
+
+ # Do not break hash braces under stress (fixes b1238)
+ $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
+
+ # This option fixes b1235, b1237, b1240 with old and new -lp,
+ # but formatting is nicer with next option.
+ ## $is_long_term ||=
+ ## $levels_to_go[$i_opening] > $stress_level_beta + 1;
+
+ # This option fixes b1240 but not b1235, b1237 with new -lp,
+ # but this gives better formatting than the previous option.
+ $do_not_break_apart ||=
+ $levels_to_go[$i_opening] > $stress_level_beta;
+ }
+
+ if ( !$is_long_term
+ && $saw_opening_structure
+ && $is_opening_token{ $tokens_to_go[$i_opening] }
+ && $index_before_arrow[ $depth + 1 ] > 0
+ && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
+ )
+ {
+ $is_long_term =
+ $cab_flag == 4
+ || $cab_flag == 0 && $last_nonblank_token eq ','
+ || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
+ } ## end if ( !$is_long_term &&...)
+
+ # mark term as long if the length between opening and closing
+ # parens exceeds allowed line length
+ if ( !$is_long_term && $saw_opening_structure ) {
+
+ my $i_opening_minus =
+ $self->find_token_starting_list($i_opening);
+
+ my $excess =
+ $self->excess_line_length( $i_opening_minus, $i );
+
+ # Use standard spaces for indentation of lists in -lp mode
+ # if it gives a longer line length. This helps to avoid an
+ # instability due to forming and breaking one-line blocks.
+ # This fixes case b1314.
+ my $indentation = $leading_spaces_to_go[$i_opening_minus];
+ if ( ref($indentation)
+ && $ris_broken_container->{$type_sequence} )
+ {
+ my $lp_spaces = $indentation->get_spaces();
+ my $std_spaces =
+ $standard_spaces_to_go[$i_opening_minus];
+ my $diff = $std_spaces - $lp_spaces;
+ if ( $diff > 0 ) { $excess += $diff }
+ }
+
+ my $tol = $length_tol;
+
+ # boost tol for an -lp container
+ if (
+ $lp_tol_boost
+ && $lp_object
+ && ( $rOpts_extended_continuation_indentation
+ || !$ris_list_by_seqno->{$type_sequence} )
+ )
+ {
+ $tol += $lp_tol_boost;
+ }
+
+ # Patch to avoid blinking with -bbxi=2 and -cab=2
+ # in which variations in -ci cause unstable formatting
+ # in edge cases. We just always add one ci level so that
+ # the formatting is independent of the -BBX results.
+ # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
+ # b1161 b1166 b1167 b1168
+ if ( !$ci_levels_to_go[$i_opening]
+ && $rbreak_before_container_by_seqno->{$type_sequence} )
+ {
+ $tol += $rOpts->{'continuation-indentation'};
+ }
+
+ $is_long_term = $excess + $tol > 0;
+
+ } ## end if ( !$is_long_term &&...)
+
+ # We've set breaks after all comma-arrows. Now we have to
+ # undo them if this can be a one-line block
+ # (the only breakpoints set will be due to comma-arrows)
+
+ if (
+
+ # user doesn't require breaking after all comma-arrows
+ ( $cab_flag != 0 ) && ( $cab_flag != 4 )
+
+ # and if the opening structure is in this batch
+ && $saw_opening_structure
# and either on the same old line
&& (
- $old_breakpoint_count_stack[$current_depth] ==
- $last_old_breakpoint_count
+ $old_breakpoint_count_stack[$current_depth] ==
+ $last_old_breakpoint_count
+
+ # or user wants to form long blocks with arrows
+ || $cab_flag == 2
+
+ # if -cab=3 is overridden then use -cab=2 behavior
+ || $cab_flag == 3 && $override_cab3[$current_depth]
+ )
+
+ # and we made breakpoints between the opening and closing
+ && ( $breakpoint_undo_stack[$current_depth] <
+ get_forced_breakpoint_undo_count() )
+
+ # and this block is short enough to fit on one line
+ # Note: use < because need 1 more space for possible comma
+ && !$is_long_term
+
+ )
+ {
+ $self->undo_forced_breakpoint_stack(
+ $breakpoint_undo_stack[$current_depth] );
+ } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
+
+ # now see if we have any comma breakpoints left
+ my $has_comma_breakpoints =
+ ( $breakpoint_stack[$current_depth] !=
+ get_forced_breakpoint_count() );
+
+ # update broken-sublist flag of the outer container
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
+ || $has_broken_sublist[$current_depth]
+ || $is_long_term
+ || $has_comma_breakpoints;
+
+# Having come to the closing ')', '}', or ']', now we have to decide if we
+# should 'open up' the structure by placing breaks at the opening and
+# closing containers. This is a tricky decision. Here are some of the
+# basic considerations:
+#
+# -If this is a BLOCK container, then any breakpoints will have already
+# been set (and according to user preferences), so we need do nothing here.
+#
+# -If we have a comma-separated list for which we can align the list items,
+# then we need to do so because otherwise the vertical aligner cannot
+# currently do the alignment.
+#
+# -If this container does itself contain a container which has been broken
+# open, then it should be broken open to properly show the structure.
+#
+# -If there is nothing to align, and no other reason to break apart,
+# then do not do it.
+#
+# We will not break open the parens of a long but 'simple' logical expression.
+# For example:
+#
+# This is an example of a simple logical expression and its formatting:
+#
+# if ( $bigwasteofspace1 && $bigwasteofspace2
+# || $bigwasteofspace3 && $bigwasteofspace4 )
+#
+# Most people would prefer this than the 'spacey' version:
+#
+# if (
+# $bigwasteofspace1 && $bigwasteofspace2
+# || $bigwasteofspace3 && $bigwasteofspace4
+# )
+#
+# To illustrate the rules for breaking logical expressions, consider:
+#
+# FULLY DENSE:
+# if ( $opt_excl
+# and ( exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc ))
+#
+# This is on the verge of being difficult to read. The current default is to
+# open it up like this:
+#
+# DEFAULT:
+# if (
+# $opt_excl
+# and ( exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc )
+# )
+#
+# This is a compromise which tries to avoid being too dense and to spacey.
+# A more spaced version would be:
+#
+# SPACEY:
+# if (
+# $opt_excl
+# and (
+# exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc
+# )
+# )
+#
+# Some people might prefer the spacey version -- an option could be added. The
+# innermost expression contains a long block '( exists $ids_... ')'.
+#
+# Here is how the logic goes: We will force a break at the 'or' that the
+# innermost expression contains, but we will not break apart its opening and
+# closing containers because (1) it contains no multi-line sub-containers itself,
+# and (2) there is no alignment to be gained by breaking it open like this
+#
+# and (
+# exists $ids_excl_uc{$id_uc}
+# or grep $id_uc =~ /$_/, @ids_excl_uc
+# )
+#
+# (although this looks perfectly ok and might be good for long expressions). The
+# outer 'if' container, though, contains a broken sub-container, so it will be
+# broken open to avoid too much density. Also, since it contains no 'or's, there
+# will be a forced break at its 'and'.
+
+ # set some flags telling something about this container..
+ my $is_simple_logical_expression = 0;
+ if ( $item_count_stack[$current_depth] == 0
+ && $saw_opening_structure
+ && $tokens_to_go[$i_opening] eq '('
+ && $is_logical_container{ $container_type[$current_depth] }
+ )
+ {
+
+ # This seems to be a simple logical expression with
+ # no existing breakpoints. Set a flag to prevent
+ # opening it up.
+ if ( !$has_comma_breakpoints ) {
+ $is_simple_logical_expression = 1;
+ }
+
+ # This seems to be a simple logical expression with
+ # breakpoints (broken sublists, for example). Break
+ # at all 'or's and '||'s.
+ else {
+ $self->set_logical_breakpoints($current_depth);
+ }
+ } ## end if ( $item_count_stack...)
+
+ if ( $is_long_term
+ && @{ $rfor_semicolon_list[$current_depth] } )
+ {
+ $self->set_for_semicolon_breakpoints($current_depth);
+
+ # open up a long 'for' or 'foreach' container to allow
+ # leading term alignment unless -lp is used.
+ $has_comma_breakpoints = 1 unless ($lp_object);
+ } ## end if ( $is_long_term && ...)
+
+ if (
+
+ # breaks for code BLOCKS are handled at a higher level
+ !$block_type
+
+ # we do not need to break at the top level of an 'if'
+ # type expression
+ && !$is_simple_logical_expression
+
+ ## modification to keep ': (' containers vertically tight;
+ ## but probably better to let user set -vt=1 to avoid
+ ## inconsistency with other paren types
+ ## && ($container_type[$current_depth] ne ':')
+
+ # otherwise, we require one of these reasons for breaking:
+ && (
+
+ # - this term has forced line breaks
+ $has_comma_breakpoints
+
+ # - the opening container is separated from this batch
+ # for some reason (comment, blank line, code block)
+ # - this is a non-paren container spanning multiple lines
+ || !$saw_opening_structure
+
+ # - this is a long block contained in another breakable
+ # container
+ || $is_long_term && !$self->is_in_block_by_i($i_opening)
+ )
+ )
+ {
+
+ # do special -lp breaks at the CLOSING token for INTACT
+ # blocks (because we might not do them if the block does
+ # not break open)
+ if ($lp_object) {
+ my $K_begin_line = $lp_object->get_K_begin_line();
+ my $i_begin_line = $K_begin_line - $K_to_go[0];
+ $self->set_forced_lp_break( $i_begin_line, $i_opening );
+ }
+
+ # break after opening structure.
+ # note: break before closing structure will be automatic
+ if ( $minimum_depth <= $current_depth ) {
+
+ if ( $i_opening >= 0 ) {
+ $self->set_forced_breakpoint($i_opening)
+ unless ( $do_not_break_apart
+ || is_unbreakable_container($current_depth) );
+ }
+
+ # break at ',' of lower depth level before opening token
+ if ( $last_comma_index[$depth] ) {
+ $self->set_forced_breakpoint(
+ $last_comma_index[$depth] );
+ }
+
+ # break at '.' of lower depth level before opening token
+ if ( $last_dot_index[$depth] ) {
+ $self->set_forced_breakpoint(
+ $last_dot_index[$depth] );
+ }
+
+ # break before opening structure if preceded by another
+ # closing structure and a comma. This is normally
+ # done by the previous closing brace, but not
+ # if it was a one-line block.
+ if ( $i_opening > 2 ) {
+ my $i_prev =
+ ( $types_to_go[ $i_opening - 1 ] eq 'b' )
+ ? $i_opening - 2
+ : $i_opening - 1;
+
+ if (
+ $types_to_go[$i_prev] eq ','
+ && ( $types_to_go[ $i_prev - 1 ] eq ')'
+ || $types_to_go[ $i_prev - 1 ] eq '}' )
+ )
+ {
+ $self->set_forced_breakpoint($i_prev);
+ }
+
+ # also break before something like ':(' or '?('
+ # if appropriate.
+ elsif (
+ $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
+ {
+ my $token_prev = $tokens_to_go[$i_prev];
+ if ( $want_break_before{$token_prev} ) {
+ $self->set_forced_breakpoint($i_prev);
+ }
+ } ## end elsif ( $types_to_go[$i_prev...])
+ } ## end if ( $i_opening > 2 )
+ } ## end if ( $minimum_depth <=...)
+
+ # break after comma following closing structure
+ if ( $next_type eq ',' ) {
+ $self->set_forced_breakpoint( $i + 1 );
+ }
+
+ # break before an '=' following closing structure
+ if (
+ $is_assignment{$next_nonblank_type}
+ && ( $breakpoint_stack[$current_depth] !=
+ get_forced_breakpoint_count() )
+ )
+ {
+ $self->set_forced_breakpoint($i);
+ } ## end if ( $is_assignment{$next_nonblank_type...})
+
+ # break at any comma before the opening structure Added
+ # for -lp, but seems to be good in general. It isn't
+ # obvious how far back to look; the '5' below seems to
+ # work well and will catch the comma in something like
+ # push @list, myfunc( $param, $param, ..
+
+ my $icomma = $last_comma_index[$depth];
+ if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
+ unless ( $forced_breakpoint_to_go[$icomma] ) {
+ $self->set_forced_breakpoint($icomma);
+ }
+ }
+ } ## end logic to open up a container
+
+ # Break open a logical container open if it was already open
+ elsif ($is_simple_logical_expression
+ && $has_old_logical_breakpoints[$current_depth] )
+ {
+ $self->set_logical_breakpoints($current_depth);
+ }
+
+ # Handle long container which does not get opened up
+ elsif ($is_long_term) {
+
+ # must set fake breakpoint to alert outer containers that
+ # they are complex
+ set_fake_breakpoint();
+ } ## end elsif ($is_long_term)
+
+ } ## end elsif ( $depth < $current_depth)
+
+ #------------------------------------------------------------
+ # Handle this token
+ #------------------------------------------------------------
+
+ $current_depth = $depth;
+
+ # most token types can skip the rest of this loop
+ next unless ( $quick_filter{$type} );
+
+ # handle comma-arrow
+ if ( $type eq '=>' ) {
+ next if ( $last_nonblank_type eq '=>' );
+ next if $rOpts_break_at_old_comma_breakpoints;
+ next
+ if ( $rOpts_comma_arrow_breakpoints == 3
+ && !$override_cab3[$depth] );
+ $want_comma_break[$depth] = 1;
+ $index_before_arrow[$depth] = $i_last_nonblank_token;
+ next;
+ } ## end if ( $type eq '=>' )
+
+ elsif ( $type eq '.' ) {
+ $last_dot_index[$depth] = $i;
+ }
+
+ # Turn off alignment if we are sure that this is not a list
+ # environment. To be safe, we will do this if we see certain
+ # non-list tokens, such as ';', and also the environment is
+ # not a list. Note that '=' could be in any of the = operators
+ # (lextest.t). We can't just use the reported environment
+ # because it can be incorrect in some cases.
+ elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
+ && !$self->is_in_list_by_i($i) )
+ {
+ $dont_align[$depth] = 1;
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+ } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
+
+ # now just handle any commas
+ next unless ( $type eq ',' );
+
+ $last_dot_index[$depth] = undef;
+ $last_comma_index[$depth] = $i;
+
+ # break here if this comma follows a '=>'
+ # but not if there is a side comment after the comma
+ if ( $want_comma_break[$depth] ) {
+
+ if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
+ if ($rOpts_comma_arrow_breakpoints) {
+ $want_comma_break[$depth] = 0;
+ next;
+ }
+ }
+
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
+
+ # break before the previous token if it looks safe
+ # Example of something that we will not try to break before:
+ # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
+ # Also we don't want to break at a binary operator (like +):
+ # $c->createOval(
+ # $x + $R, $y +
+ # $R => $x - $R,
+ # $y - $R, -fill => 'black',
+ # );
+ my $ibreak = $index_before_arrow[$depth] - 1;
+ if ( $ibreak > 0
+ && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
+ {
+ if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+
+ # don't break pointer calls, such as the following:
+ # File::Spec->curdir => 1,
+ # (This is tokenized as adjacent 'w' tokens)
+ ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+
+ # And don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
+ $self->set_forced_breakpoint($ibreak);
+ }
+ } ## end if ( $types_to_go[$ibreak...])
+ } ## end if ( $ibreak > 0 && $tokens_to_go...)
+
+ $want_comma_break[$depth] = 0;
+ $index_before_arrow[$depth] = -1;
+
+ # handle list which mixes '=>'s and ','s:
+ # treat any list items so far as an interrupted list
+ $interrupted_list[$depth] = 1;
+ next;
+ } ## end if ( $want_comma_break...)
+
+ # Break after all commas above starting depth...
+ # But only if the last closing token was followed by a comma,
+ # to avoid breaking a list operator (issue c119)
+ if ( $depth < $starting_depth
+ && $comma_follows_last_closing_token
+ && !$dont_align[$depth] )
+ {
+ $self->set_forced_breakpoint($i)
+ unless ( $next_nonblank_type eq '#' );
+ next;
+ }
+
+ # add this comma to the list..
+ my $item_count = $item_count_stack[$depth];
+ if ( $item_count == 0 ) {
+
+ # but do not form a list with no opening structure
+ # for example:
+
+ # open INFILE_COPY, ">$input_file_copy"
+ # or die ("very long message");
+ if ( ( $opening_structure_index_stack[$depth] < 0 )
+ && $self->is_in_block_by_i($i) )
+ {
+ $dont_align[$depth] = 1;
+ }
+ } ## end if ( $item_count == 0 )
+
+ $comma_index[$depth][$item_count] = $i;
+ ++$item_count_stack[$depth];
+ if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
+ $identifier_count_stack[$depth]++;
+ }
+ } ## end while ( ++$i <= $max_index_to_go)
+
+ #-------------------------------------------
+ # end of loop over all tokens in this batch
+ #-------------------------------------------
+
+ # set breaks for any unfinished lists ..
+ for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
+
+ $interrupted_list[$dd] = 1;
+ $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
+ $self->set_comma_breakpoints($dd);
+ $self->set_logical_breakpoints($dd)
+ if ( $has_old_logical_breakpoints[$dd] );
+ $self->set_for_semicolon_breakpoints($dd);
+
+ # break open container...
+ my $i_opening = $opening_structure_index_stack[$dd];
+ if ( defined($i_opening) && $i_opening >= 0 ) {
+ $self->set_forced_breakpoint($i_opening)
+ unless (
+ is_unbreakable_container($dd)
+
+ # Avoid a break which would place an isolated ' or "
+ # on a line
+ || ( $type eq 'Q'
+ && $i_opening >= $max_index_to_go - 2
+ && ( $token eq "'" || $token eq '"' ) )
+ );
+ }
+ } ## end for ( my $dd = $current_depth...)
+
+ # Return a flag indicating if the input file had some good breakpoints.
+ # This flag will be used to force a break in a line shorter than the
+ # allowed line length.
+ if ( $has_old_logical_breakpoints[$current_depth] ) {
+ $saw_good_breakpoint = 1;
+ }
+
+ # A complex line with one break at an = has a good breakpoint.
+ # This is not complex ($total_depth_variation=0):
+ # $res1
+ # = 10;
+ #
+ # This is complex ($total_depth_variation=6):
+ # $res2 =
+ # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
+ elsif ($i_old_assignment_break
+ && $total_depth_variation > 4
+ && $old_breakpoint_count == 1 )
+ {
+ $saw_good_breakpoint = 1;
+ } ## end elsif ( $i_old_assignment_break...)
+
+ return $saw_good_breakpoint;
+ } ## end sub break_lists
+} ## end closure break_lists
+
+my %is_kwiZ;
+my %is_key_type;
+
+BEGIN {
+
+ # Added 'w' to fix b1172
+ my @q = qw(k w i Z ->);
+ @is_kwiZ{@q} = (1) x scalar(@q);
+
+ # added = for b1211
+ @q = qw<( [ { L R } ] ) = b>;
+ push @q, ',';
+ @is_key_type{@q} = (1) x scalar(@q);
+}
+
+use constant DEBUG_FIND_START => 0;
+
+sub find_token_starting_list {
+
+ # When testing to see if a block will fit on one line, some
+ # previous token(s) may also need to be on the line; particularly
+ # if this is a sub call. So we will look back at least one
+ # token.
+ my ( $self, $i_opening_paren ) = @_;
+
+ # This will be the return index
+ my $i_opening_minus = $i_opening_paren;
+
+ goto RETURN if ( $i_opening_minus <= 0 );
+
+ my $im1 = $i_opening_paren - 1;
+ my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
+ if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
+ $iprev_nb -= 1;
+ $type_prev_nb = $types_to_go[$iprev_nb];
+ }
+
+ if ( $type_prev_nb eq ',' ) {
+
+ # a previous comma is a good break point
+ # $i_opening_minus = $i_opening_paren;
+ }
+
+ elsif (
+ $tokens_to_go[$i_opening_paren] eq '('
+
+ # non-parens added here to fix case b1186
+ || $is_kwiZ{$type_prev_nb}
+ )
+ {
+ $i_opening_minus = $im1;
+
+ # Walk back to improve length estimate...
+ # FIX for cases b1169 b1170 b1171: start walking back
+ # at the previous nonblank. This makes the result insensitive
+ # to the flag --space-function-paren, and similar.
+ # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
+ for ( my $j = $iprev_nb ; $j >= 0 ; $j-- ) {
+ ##last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
+ ##last if ( $is_key_type{ $types_to_go[$j] } );
+ if ( $is_key_type{ $types_to_go[$j] } ) {
+
+ # fix for b1211
+ if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
+ last;
+ }
+ $i_opening_minus = $j;
+ }
+ if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
+ }
+
+ RETURN:
+
+ DEBUG_FIND_START && print <<EOM;
+FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
+EOM
+
+ return $i_opening_minus;
+}
+
+{ ## begin closure set_comma_breakpoints_do
+
+ my %is_keyword_with_special_leading_term;
+
+ BEGIN {
+
+ # These keywords have prototypes which allow a special leading item
+ # followed by a list
+ my @q =
+ qw(formline grep kill map printf sprintf push chmod join pack unshift);
+ @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
+ }
+
+ use constant DEBUG_SPARSE => 0;
+
+ sub set_comma_breakpoints_do {
+
+ # Given a list with some commas, set breakpoints at some of the
+ # commas, if necessary, to make it easy to read.
+
+ my ( $self, $rinput_hash ) = @_;
+
+ my $depth = $rinput_hash->{depth};
+ my $i_opening_paren = $rinput_hash->{i_opening_paren};
+ my $i_closing_paren = $rinput_hash->{i_closing_paren};
+ my $item_count = $rinput_hash->{item_count};
+ my $identifier_count = $rinput_hash->{identifier_count};
+ my $rcomma_index = $rinput_hash->{rcomma_index};
+ my $next_nonblank_type = $rinput_hash->{next_nonblank_type};
+ my $list_type = $rinput_hash->{list_type};
+ my $interrupted = $rinput_hash->{interrupted};
+ my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart};
+ my $must_break_open = $rinput_hash->{must_break_open};
+ my $has_broken_sublist = $rinput_hash->{has_broken_sublist};
+
+ # nothing to do if no commas seen
+ return if ( $item_count < 1 );
+
+ my $i_first_comma = $rcomma_index->[0];
+ my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
+ my $i_last_comma = $i_true_last_comma;
+ if ( $i_last_comma >= $max_index_to_go ) {
+ $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
+ return if ( $item_count < 1 );
+ }
+ my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
+
+ #---------------------------------------------------------------
+ # find lengths of all items in the list to calculate page layout
+ #---------------------------------------------------------------
+ my $comma_count = $item_count;
+ my @item_lengths;
+ my @i_term_begin;
+ my @i_term_end;
+ my @i_term_comma;
+ my $i_prev_plus;
+ my @max_length = ( 0, 0 );
+ my $first_term_length;
+ my $i = $i_opening_paren;
+ my $is_odd = 1;
+
+ foreach my $j ( 0 .. $comma_count - 1 ) {
+ $is_odd = 1 - $is_odd;
+ $i_prev_plus = $i + 1;
+ $i = $rcomma_index->[$j];
+
+ my $i_term_end =
+ ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
+ my $i_term_begin =
+ ( $types_to_go[$i_prev_plus] eq 'b' )
+ ? $i_prev_plus + 1
+ : $i_prev_plus;
+ push @i_term_begin, $i_term_begin;
+ push @i_term_end, $i_term_end;
+ push @i_term_comma, $i;
+
+ # note: currently adding 2 to all lengths (for comma and space)
+ my $length =
+ 2 + token_sequence_length( $i_term_begin, $i_term_end );
+ push @item_lengths, $length;
+
+ if ( $j == 0 ) {
+ $first_term_length = $length;
+ }
+ else {
+
+ if ( $length > $max_length[$is_odd] ) {
+ $max_length[$is_odd] = $length;
+ }
+ }
+ }
+
+ # now we have to make a distinction between the comma count and item
+ # count, because the item count will be one greater than the comma
+ # count if the last item is not terminated with a comma
+ my $i_b =
+ ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
+ ? $i_last_comma + 1
+ : $i_last_comma;
+ my $i_e =
+ ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
+ ? $i_closing_paren - 2
+ : $i_closing_paren - 1;
+ my $i_effective_last_comma = $i_last_comma;
+
+ my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
+
+ if ( $last_item_length > 0 ) {
+
+ # add 2 to length because other lengths include a comma and a blank
+ $last_item_length += 2;
+ push @item_lengths, $last_item_length;
+ push @i_term_begin, $i_b + 1;
+ push @i_term_end, $i_e;
+ push @i_term_comma, undef;
+
+ my $i_odd = $item_count % 2;
+
+ if ( $last_item_length > $max_length[$i_odd] ) {
+ $max_length[$i_odd] = $last_item_length;
+ }
+
+ $item_count++;
+ $i_effective_last_comma = $i_e + 1;
+
+ if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
+ $identifier_count++;
+ }
+ }
+
+ #---------------------------------------------------------------
+ # End of length calculations
+ #---------------------------------------------------------------
+
+ #---------------------------------------------------------------
+ # Compound List Rule 1:
+ # Break at (almost) every comma for a list containing a broken
+ # sublist. This has higher priority than the Interrupted List
+ # Rule.
+ #---------------------------------------------------------------
+ if ($has_broken_sublist) {
+
+ # Break at every comma except for a comma between two
+ # simple, small terms. This prevents long vertical
+ # columns of, say, just 0's.
+ my $small_length = 10; # 2 + actual maximum length wanted
+
+ # We'll insert a break in long runs of small terms to
+ # allow alignment in uniform tables.
+ my $skipped_count = 0;
+ my $columns = table_columns_available($i_first_comma);
+ my $fields = int( $columns / $small_length );
+ if ( $rOpts_maximum_fields_per_table
+ && $fields > $rOpts_maximum_fields_per_table )
+ {
+ $fields = $rOpts_maximum_fields_per_table;
+ }
+ my $max_skipped_count = $fields - 1;
+
+ my $is_simple_last_term = 0;
+ my $is_simple_next_term = 0;
+ foreach my $j ( 0 .. $item_count ) {
+ $is_simple_last_term = $is_simple_next_term;
+ $is_simple_next_term = 0;
+ if ( $j < $item_count
+ && $i_term_end[$j] == $i_term_begin[$j]
+ && $item_lengths[$j] <= $small_length )
+ {
+ $is_simple_next_term = 1;
+ }
+ next if $j == 0;
+ if ( $is_simple_last_term
+ && $is_simple_next_term
+ && $skipped_count < $max_skipped_count )
+ {
+ $skipped_count++;
+ }
+ else {
+ $skipped_count = 0;
+ my $i = $i_term_comma[ $j - 1 ];
+ last unless defined $i;
+ $self->set_forced_breakpoint($i);
+ }
+ }
+
+ # always break at the last comma if this list is
+ # interrupted; we wouldn't want to leave a terminal '{', for
+ # example.
+ if ($interrupted) {
+ $self->set_forced_breakpoint($i_true_last_comma);
+ }
+ return;
+ }
+
+#my ( $a, $b, $c ) = caller();
+#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
+#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
+#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
+
+ #---------------------------------------------------------------
+ # Interrupted List Rule:
+ # A list is forced to use old breakpoints if it was interrupted
+ # by side comments or blank lines, or requested by user.
+ #---------------------------------------------------------------
+ if ( $rOpts_break_at_old_comma_breakpoints
+ || $interrupted
+ || $i_opening_paren < 0 )
+ {
+ $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
+ return;
+ }
+
+ #---------------------------------------------------------------
+ # Looks like a list of items. We have to look at it and size it up.
+ #---------------------------------------------------------------
+
+ my $opening_token = $tokens_to_go[$i_opening_paren];
+ my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
+
+ #-------------------------------------------------------------------
+ # Return if this will fit on one line
+ #-------------------------------------------------------------------
+
+ # The -bbxi=2 parameters can add an extra hidden level of indentation;
+ # this needs a tolerance to avoid instability. Fixes b1259, 1260.
+ my $tol = 0;
+ if ( $break_before_container_types{$opening_token}
+ && $container_indentation_options{$opening_token}
+ && $container_indentation_options{$opening_token} == 2 )
+ {
+ $tol = $rOpts_indent_columns;
+ }
+
+ my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
+ return
+ unless $self->excess_line_length( $i_opening_minus, $i_closing_paren )
+ + $tol > 0;
+
+ #-------------------------------------------------------------------
+ # Now we know that this block spans multiple lines; we have to set
+ # at least one breakpoint -- real or fake -- as a signal to break
+ # open any outer containers.
+ #-------------------------------------------------------------------
+ set_fake_breakpoint();
+
+ # be sure we do not extend beyond the current list length
+ if ( $i_effective_last_comma >= $max_index_to_go ) {
+ $i_effective_last_comma = $max_index_to_go - 1;
+ }
+
+ # Set a flag indicating if we need to break open to keep -lp
+ # items aligned. This is necessary if any of the list terms
+ # exceeds the available space after the '('.
+ my $need_lp_break_open = $must_break_open;
+ if ( $is_lp_formatting && !$must_break_open ) {
+ my $columns_if_unbroken =
+ $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
+ - total_line_length( $i_opening_minus, $i_opening_paren );
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
+ || ( $max_length[1] > $columns_if_unbroken )
+ || ( $first_term_length > $columns_if_unbroken );
+ }
+
+ # Specify if the list must have an even number of fields or not.
+ # It is generally safest to assume an even number, because the
+ # list items might be a hash list. But if we can be sure that
+ # it is not a hash, then we can allow an odd number for more
+ # flexibility.
+ my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
+
+ if ( $identifier_count >= $item_count - 1
+ || $is_assignment{$next_nonblank_type}
+ || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
+ )
+ {
+ $odd_or_even = 1;
+ }
+
+ # do we have a long first term which should be
+ # left on a line by itself?
+ my $use_separate_first_term = (
+ $odd_or_even == 1 # only if we can use 1 field/line
+ && $item_count > 3 # need several items
+ && $first_term_length >
+ 2 * $max_length[0] - 2 # need long first term
+ && $first_term_length >
+ 2 * $max_length[1] - 2 # need long first term
+ );
+
+ # or do we know from the type of list that the first term should
+ # be placed alone?
+ if ( !$use_separate_first_term ) {
+ if ( $is_keyword_with_special_leading_term{$list_type} ) {
+ $use_separate_first_term = 1;
+
+ # should the container be broken open?
+ if ( $item_count < 3 ) {
+ if ( $i_first_comma - $i_opening_paren < 4 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ elsif ($first_term_length < 20
+ && $i_first_comma - $i_opening_paren < 4 )
+ {
+ my $columns = table_columns_available($i_first_comma);
+ if ( $first_term_length < $columns ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ }
+ }
+
+ # if so,
+ if ($use_separate_first_term) {
+
+ # ..set a break and update starting values
+ $use_separate_first_term = 1;
+ $self->set_forced_breakpoint($i_first_comma);
+ $i_opening_paren = $i_first_comma;
+ $i_first_comma = $rcomma_index->[1];
+ $item_count--;
+ return if $comma_count == 1;
+ shift @item_lengths;
+ shift @i_term_begin;
+ shift @i_term_end;
+ shift @i_term_comma;
+ }
+
+ # if not, update the metrics to include the first term
+ else {
+ if ( $first_term_length > $max_length[0] ) {
+ $max_length[0] = $first_term_length;
+ }
+ }
+
+ # Field width parameters
+ my $pair_width = ( $max_length[0] + $max_length[1] );
+ my $max_width =
+ ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
+
+ # Number of free columns across the page width for laying out tables
+ my $columns = table_columns_available($i_first_comma);
+
+ # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
+ # to break after an opening paren, then the maximum line length for the
+ # first line could be less than the later lines. So we need to reduce
+ # the line length. Normally, we will get a break after an opening
+ # paren, but in some cases we might not.
+ if ( $rOpts_variable_maximum_line_length
+ && $tokens_to_go[$i_opening_paren] eq '('
+ && @i_term_begin )
+ ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch
+ {
+ my $ib = $i_term_begin[0];
+ my $type = $types_to_go[$ib];
+
+ # So far, the only known instance of this problem is when
+ # a bareword follows an opening paren with -vmll
+ if ( $type eq 'w' ) {
+
+ # If a line starts with paren+space+terms, then its max length
+ # could be up to ci+2-i spaces less than if the term went out
+ # on a line after the paren. So..
+ my $tol = max( 0,
+ 2 + $rOpts_continuation_indentation -
+ $rOpts_indent_columns );
+ $columns = max( 0, $columns - $tol );
+
+ ## Here is the original b1210 fix, but it failed on b1216-b1218
+ ##my $columns2 = table_columns_available($i_opening_paren);
+ ##$columns = min( $columns, $columns2 );
+ }
+ }
+
+ # Estimated maximum number of fields which fit this space
+ # This will be our first guess
+ my $number_of_fields_max =
+ maximum_number_of_fields( $columns, $odd_or_even, $max_width,
+ $pair_width );
+ my $number_of_fields = $number_of_fields_max;
+
+ # Find the best-looking number of fields
+ # and make this our second guess if possible
+ my ( $number_of_fields_best, $ri_ragged_break_list,
+ $new_identifier_count )
+ = $self->study_list_complexity( \@i_term_begin, \@i_term_end,
+ \@item_lengths, $max_width );
+
+ if ( $number_of_fields_best != 0
+ && $number_of_fields_best < $number_of_fields_max )
+ {
+ $number_of_fields = $number_of_fields_best;
+ }
+
+ # ----------------------------------------------------------------------
+ # If we are crowded and the -lp option is being used, try to
+ # undo some indentation
+ # ----------------------------------------------------------------------
+ if (
+ $is_lp_formatting
+ && (
+ $number_of_fields == 0
+ || ( $number_of_fields == 1
+ && $number_of_fields != $number_of_fields_best )
+ )
+ )
+ {
+ my $available_spaces =
+ $self->get_available_spaces_to_go($i_first_comma);
+ if ( $available_spaces > 0 ) {
+
+ my $spaces_wanted = $max_width - $columns; # for 1 field
+
+ if ( $number_of_fields_best == 0 ) {
+ $number_of_fields_best =
+ get_maximum_fields_wanted( \@item_lengths );
+ }
+
+ if ( $number_of_fields_best != 1 ) {
+ my $spaces_wanted_2 =
+ 1 + $pair_width - $columns; # for 2 fields
+ if ( $available_spaces > $spaces_wanted_2 ) {
+ $spaces_wanted = $spaces_wanted_2;
+ }
+ }
+
+ if ( $spaces_wanted > 0 ) {
+ my $deleted_spaces =
+ $self->reduce_lp_indentation( $i_first_comma,
+ $spaces_wanted );
+
+ # redo the math
+ if ( $deleted_spaces > 0 ) {
+ $columns = table_columns_available($i_first_comma);
+ $number_of_fields_max =
+ maximum_number_of_fields( $columns, $odd_or_even,
+ $max_width, $pair_width );
+ $number_of_fields = $number_of_fields_max;
+
+ if ( $number_of_fields_best == 1
+ && $number_of_fields >= 1 )
+ {
+ $number_of_fields = $number_of_fields_best;
+ }
+ }
+ }
+ }
+ }
+
+ # try for one column if two won't work
+ if ( $number_of_fields <= 0 ) {
+ $number_of_fields = int( $columns / $max_width );
+ }
+
+ # The user can place an upper bound on the number of fields,
+ # which can be useful for doing maintenance on tables
+ if ( $rOpts_maximum_fields_per_table
+ && $number_of_fields > $rOpts_maximum_fields_per_table )
+ {
+ $number_of_fields = $rOpts_maximum_fields_per_table;
+ }
+
+ # How many columns (characters) and lines would this container take
+ # if no additional whitespace were added?
+ my $packed_columns = token_sequence_length( $i_opening_paren + 1,
+ $i_effective_last_comma + 1 );
+ if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
+ my $packed_lines = 1 + int( $packed_columns / $columns );
+
+ # are we an item contained in an outer list?
+ my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
+
+ if ( $number_of_fields <= 0 ) {
+
+# #---------------------------------------------------------------
+# # We're in trouble. We can't find a single field width that works.
+# # There is no simple answer here; we may have a single long list
+# # item, or many.
+# #---------------------------------------------------------------
+#
+# In many cases, it may be best to not force a break if there is just one
+# comma, because the standard continuation break logic will do a better
+# job without it.
+#
+# In the common case that all but one of the terms can fit
+# on a single line, it may look better not to break open the
+# containing parens. Consider, for example
+#
+# $color =
+# join ( '/',
+# sort { $color_value{$::a} <=> $color_value{$::b}; }
+# keys %colors );
+#
+# which will look like this with the container broken:
+#
+# $color = join (
+# '/',
+# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
+# );
+#
+# Here is an example of this rule for a long last term:
+#
+# log_message( 0, 256, 128,
+# "Number of routes in adj-RIB-in to be considered: $peercount" );
+#
+# And here is an example with a long first term:
+#
+# $s = sprintf(
+# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
+# $r, $pu, $ps, $cu, $cs, $tt
+# )
+# if $style eq 'all';
+
+ my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
+ my $long_last_term =
+ $self->excess_line_length( 0, $i_last_comma ) <= 0;
+ my $long_first_term =
+ $self->excess_line_length( $i_first_comma + 1, $max_index_to_go )
+ <= 0;
+
+ # break at every comma ...
+ if (
+
+ # if requested by user or is best looking
+ $number_of_fields_best == 1
+
+ # or if this is a sublist of a larger list
+ || $in_hierarchical_list
+
+ # or if multiple commas and we don't have a long first or last
+ # term
+ || ( $comma_count > 1
+ && !( $long_last_term || $long_first_term ) )
+ )
+ {
+ foreach ( 0 .. $comma_count - 1 ) {
+ $self->set_forced_breakpoint( $rcomma_index->[$_] );
+ }
+ }
+ elsif ($long_last_term) {
+
+ $self->set_forced_breakpoint($i_last_comma);
+ ${$rdo_not_break_apart} = 1 unless $must_break_open;
+ }
+ elsif ($long_first_term) {
+
+ $self->set_forced_breakpoint($i_first_comma);
+ }
+ else {
+
+ # let breaks be defined by default bond strength logic
+ }
+ return;
+ }
+
+ # --------------------------------------------------------
+ # We have a tentative field count that seems to work.
+ # How many lines will this require?
+ # --------------------------------------------------------
+ my $formatted_lines = $item_count / ($number_of_fields);
+ if ( $formatted_lines != int $formatted_lines ) {
+ $formatted_lines = 1 + int $formatted_lines;
+ }
+
+ # So far we've been trying to fill out to the right margin. But
+ # compact tables are easier to read, so let's see if we can use fewer
+ # fields without increasing the number of lines.
+ $number_of_fields =
+ compactify_table( $item_count, $number_of_fields, $formatted_lines,
+ $odd_or_even );
+
+ # How many spaces across the page will we fill?
+ my $columns_per_line =
+ ( int $number_of_fields / 2 ) * $pair_width +
+ ( $number_of_fields % 2 ) * $max_width;
+
+ my $formatted_columns;
+
+ if ( $number_of_fields > 1 ) {
+ $formatted_columns =
+ ( $pair_width * ( int( $item_count / 2 ) ) +
+ ( $item_count % 2 ) * $max_width );
+ }
+ else {
+ $formatted_columns = $max_width * $item_count;
+ }
+ if ( $formatted_columns < $packed_columns ) {
+ $formatted_columns = $packed_columns;
+ }
+
+ my $unused_columns = $formatted_columns - $packed_columns;
+
+ # set some empirical parameters to help decide if we should try to
+ # align; high sparsity does not look good, especially with few lines
+ my $sparsity = ($unused_columns) / ($formatted_columns);
+ my $max_allowed_sparsity =
+ ( $item_count < 3 ) ? 0.1
+ : ( $packed_lines == 1 ) ? 0.15
+ : ( $packed_lines == 2 ) ? 0.4
+ : 0.7;
+
+ my $two_line_word_wrap_ok;
+ if ( $opening_token eq '(' ) {
+
+ # default is to allow wrapping of short paren lists
+ $two_line_word_wrap_ok = 1;
+
+ # but turn off word wrap where requested
+ if ($rOpts_break_open_paren_list) {
+
+ # This parameter is a one-character flag, as follows:
+ # '0' matches no parens -> break open NOT OK -> word wrap OK
+ # '1' matches all parens -> break open OK -> word wrap NOT OK
+ # Other values are the same as used by the weld-exclusion-list
+ my $flag = $rOpts_break_open_paren_list;
+ if ( $flag eq '*'
+ || $flag eq '1' )
+ {
+ $two_line_word_wrap_ok = 0;
+ }
+ elsif ( $flag eq '0' ) {
+ $two_line_word_wrap_ok = 1;
+ }
+ else {
+ my $KK = $K_to_go[$i_opening_paren];
+ $two_line_word_wrap_ok =
+ !$self->match_paren_flag( $KK, $flag );
+ }
+ }
+ }
+
+ # Begin check for shortcut methods, which avoid treating a list
+ # as a table for relatively small parenthesized lists. These
+ # are usually easier to read if not formatted as tables.
+ if (
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
+ && $opening_is_in_block # not a sub-container
+ && $two_line_word_wrap_ok # ok to wrap this paren list
+ ##&& $opening_token eq '(' # is paren list
+ )
+ {
+
+ # Shortcut method 1: for -lp and just one comma:
+ # This is a no-brainer, just break at the comma.
+ if (
+ $is_lp_formatting # -lp
+ && $item_count == 2 # two items, one comma
+ && !$must_break_open
+ )
+ {
+ my $i_break = $rcomma_index->[0];
+ $self->set_forced_breakpoint($i_break);
+ ${$rdo_not_break_apart} = 1;
+ return;
+
+ }
+
+ # method 2 is for most small ragged lists which might look
+ # best if not displayed as a table.
+ if (
+ ( $number_of_fields == 2 && $item_count == 3 )
+ || (
+ $new_identifier_count > 0 # isn't all quotes
+ && $sparsity > 0.15
+ ) # would be fairly spaced gaps if aligned
+ )
+ {
+
+ my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
+
+ # NOTE: we should really use the true break count here,
+ # which can be greater if there are large terms and
+ # little space, but usually this will work well enough.
+ unless ($must_break_open) {
+
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $is_lp_formatting && !$need_lp_break_open ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ return;
+ }
+
+ } ## end shortcut methods
+
+ # debug stuff
+ DEBUG_SPARSE && do {
+ print STDOUT
+"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
+
+ };
+
+ #---------------------------------------------------------------
+ # Compound List Rule 2:
+ # If this list is too long for one line, and it is an item of a
+ # larger list, then we must format it, regardless of sparsity
+ # (ian.t). One reason that we have to do this is to trigger
+ # Compound List Rule 1, above, which causes breaks at all commas of
+ # all outer lists. In this way, the structure will be properly
+ # displayed.
+ #---------------------------------------------------------------
+
+ # Decide if this list is too long for one line unless broken
+ my $total_columns = table_columns_available($i_opening_paren);
+ my $too_long = $packed_columns > $total_columns;
+
+ # For a paren list, include the length of the token just before the
+ # '(' because this is likely a sub call, and we would have to
+ # include the sub name on the same line as the list. This is still
+ # imprecise, but not too bad. (steve.t)
+ if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
+
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
+
+ # FIXME: For an item after a '=>', try to include the length of the
+ # thing before the '=>'. This is crude and should be improved by
+ # actually looking back token by token.
+ if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
+ my $i_opening_minus = $i_opening_paren - 4;
+ if ( $i_opening_minus >= 0 ) {
+ $too_long = $self->excess_line_length( $i_opening_minus,
+ $i_effective_last_comma + 1 ) > 0;
+ }
+ }
+
+ # Always break lists contained in '[' and '{' if too long for 1 line,
+ # and always break lists which are too long and part of a more complex
+ # structure.
+ my $must_break_open_container = $must_break_open
+ || ( $too_long
+ && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
+
+#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+
+ #---------------------------------------------------------------
+ # The main decision:
+ # Now decide if we will align the data into aligned columns. Do not
+ # attempt to align columns if this is a tiny table or it would be
+ # too spaced. It seems that the more packed lines we have, the
+ # sparser the list that can be allowed and still look ok.
+ #---------------------------------------------------------------
+
+ if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
+ || ( $formatted_lines < 2 )
+ || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
+ )
+ {
+
+ #---------------------------------------------------------------
+ # too sparse: would look ugly if aligned in a table;
+ #---------------------------------------------------------------
+
+ # use old breakpoints if this is a 'big' list
+ if ( $packed_lines > 2 && $item_count > 10 ) {
+ write_logfile_entry("List sparse: using old breakpoints\n");
+ $self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
+ }
+
+ # let the continuation logic handle it if 2 lines
+ else {
+
+ my $break_count = $self->set_ragged_breakpoints( \@i_term_comma,
+ $ri_ragged_break_list );
+ ++$break_count if ($use_separate_first_term);
+
+ unless ($must_break_open_container) {
+ if ( $break_count <= 1 ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ elsif ( $is_lp_formatting && !$need_lp_break_open ) {
+ ${$rdo_not_break_apart} = 1;
+ }
+ }
+ }
+ return;
+ }
+
+ #---------------------------------------------------------------
+ # go ahead and format as a table
+ #---------------------------------------------------------------
+ write_logfile_entry(
+ "List: auto formatting with $number_of_fields fields/row\n");
+
+ my $j_first_break =
+ $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
+
+ for (
+ my $j = $j_first_break ;
+ $j < $comma_count ;
+ $j += $number_of_fields
+ )
+ {
+ my $i = $rcomma_index->[$j];
+ $self->set_forced_breakpoint($i);
+ }
+ return;
+ }
+} ## end closure set_comma_breakpoints_do
+
+sub study_list_complexity {
+
+ # Look for complex tables which should be formatted with one term per line.
+ # Returns the following:
+ #
+ # \@i_ragged_break_list = list of good breakpoints to avoid lines
+ # which are hard to read
+ # $number_of_fields_best = suggested number of fields based on
+ # complexity; = 0 if any number may be used.
+ #
+ my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
+ my $item_count = @{$ri_term_begin};
+ my $complex_item_count = 0;
+ my $number_of_fields_best = $rOpts_maximum_fields_per_table;
+ my $i_max = @{$ritem_lengths} - 1;
+ ##my @item_complexity;
+
+ my $i_last_last_break = -3;
+ my $i_last_break = -2;
+ my @i_ragged_break_list;
+
+ my $definitely_complex = 30;
+ my $definitely_simple = 12;
+ my $quote_count = 0;
+
+ for my $i ( 0 .. $i_max ) {
+ my $ib = $ri_term_begin->[$i];
+ my $ie = $ri_term_end->[$i];
+
+ # define complexity: start with the actual term length
+ my $weighted_length = ( $ritem_lengths->[$i] - 2 );
+
+ ##TBD: join types here and check for variations
+ ##my $str=join "", @tokens_to_go[$ib..$ie];
+
+ my $is_quote = 0;
+ if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
+ $is_quote = 1;
+ $quote_count++;
+ }
+ elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
+ $quote_count++;
+ }
+
+ if ( $ib eq $ie ) {
+ if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
+ $complex_item_count++;
+ $weighted_length *= 2;
+ }
+ else {
+ }
+ }
+ else {
+ if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
+ $complex_item_count++;
+ $weighted_length *= 2;
+ }
+ if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
+ $weighted_length += 4;
+ }
+ }
+
+ # add weight for extra tokens.
+ $weighted_length += 2 * ( $ie - $ib );
+
+## my $BUB = join '', @tokens_to_go[$ib..$ie];
+## print "# COMPLEXITY:$weighted_length $BUB\n";
+
+##push @item_complexity, $weighted_length;
+
+ # now mark a ragged break after this item it if it is 'long and
+ # complex':
+ if ( $weighted_length >= $definitely_complex ) {
+
+ # if we broke after the previous term
+ # then break before it too
+ if ( $i_last_break == $i - 1
+ && $i > 1
+ && $i_last_last_break != $i - 2 )
+ {
+
+ ## FIXME: don't strand a small term
+ pop @i_ragged_break_list;
+ push @i_ragged_break_list, $i - 2;
+ push @i_ragged_break_list, $i - 1;
+ }
+
+ push @i_ragged_break_list, $i;
+ $i_last_last_break = $i_last_break;
+ $i_last_break = $i;
+ }
+
+ # don't break before a small last term -- it will
+ # not look good on a line by itself.
+ elsif ($i == $i_max
+ && $i_last_break == $i - 1
+ && $weighted_length <= $definitely_simple )
+ {
+ pop @i_ragged_break_list;
+ }
+ }
+
+ my $identifier_count = $i_max + 1 - $quote_count;
+
+ # Need more tuning here..
+ if ( $max_width > 12
+ && $complex_item_count > $item_count / 2
+ && $number_of_fields_best != 2 )
+ {
+ $number_of_fields_best = 1;
+ }
+
+ return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
+}
+
+sub get_maximum_fields_wanted {
+
+ # Not all tables look good with more than one field of items.
+ # This routine looks at a table and decides if it should be
+ # formatted with just one field or not.
+ # This coding is still under development.
+ my ($ritem_lengths) = @_;
+
+ my $number_of_fields_best = 0;
+
+ # For just a few items, we tentatively assume just 1 field.
+ my $item_count = @{$ritem_lengths};
+ if ( $item_count <= 5 ) {
+ $number_of_fields_best = 1;
+ }
+
+ # For larger tables, look at it both ways and see what looks best
+ else {
+
+ my $is_odd = 1;
+ my @max_length = ( 0, 0 );
+ my @last_length_2 = ( undef, undef );
+ my @first_length_2 = ( undef, undef );
+ my $last_length = undef;
+ my $total_variation_1 = 0;
+ my $total_variation_2 = 0;
+ my @total_variation_2 = ( 0, 0 );
+
+ foreach my $j ( 0 .. $item_count - 1 ) {
+
+ $is_odd = 1 - $is_odd;
+ my $length = $ritem_lengths->[$j];
+ if ( $length > $max_length[$is_odd] ) {
+ $max_length[$is_odd] = $length;
+ }
+
+ if ( defined($last_length) ) {
+ my $dl = abs( $length - $last_length );
+ $total_variation_1 += $dl;
+ }
+ $last_length = $length;
+
+ my $ll = $last_length_2[$is_odd];
+ if ( defined($ll) ) {
+ my $dl = abs( $length - $ll );
+ $total_variation_2[$is_odd] += $dl;
+ }
+ else {
+ $first_length_2[$is_odd] = $length;
+ }
+ $last_length_2[$is_odd] = $length;
+ }
+ $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
+
+ my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
+ unless ( $total_variation_2 < $factor * $total_variation_1 ) {
+ $number_of_fields_best = 1;
+ }
+ }
+ return ($number_of_fields_best);
+}
+
+sub table_columns_available {
+ my $i_first_comma = shift;
+ my $columns =
+ $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
+ leading_spaces_to_go($i_first_comma);
+
+ # Patch: the vertical formatter does not line up lines whose lengths
+ # exactly equal the available line length because of allowances
+ # that must be made for side comments. Therefore, the number of
+ # available columns is reduced by 1 character.
+ $columns -= 1;
+ return $columns;
+}
+
+sub maximum_number_of_fields {
+
+ # how many fields will fit in the available space?
+ my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
+ my $max_pairs = int( $columns / $pair_width );
+ my $number_of_fields = $max_pairs * 2;
+ if ( $odd_or_even == 1
+ && $max_pairs * $pair_width + $max_width <= $columns )
+ {
+ $number_of_fields++;
+ }
+ return $number_of_fields;
+}
+
+sub compactify_table {
+
+ # given a table with a certain number of fields and a certain number
+ # of lines, see if reducing the number of fields will make it look
+ # better.
+ my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
+ if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
+ my $min_fields;
+
+ for (
+ $min_fields = $number_of_fields ;
+ $min_fields >= $odd_or_even
+ && $min_fields * $formatted_lines >= $item_count ;
+ $min_fields -= $odd_or_even
+ )
+ {
+ $number_of_fields = $min_fields;
+ }
+ }
+ return $number_of_fields;
+}
+
+sub set_ragged_breakpoints {
+
+ # Set breakpoints in a list that cannot be formatted nicely as a
+ # table.
+ my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
+
+ my $break_count = 0;
+ foreach ( @{$ri_ragged_break_list} ) {
+ my $j = $ri_term_comma->[$_];
+ if ($j) {
+ $self->set_forced_breakpoint($j);
+ $break_count++;
+ }
+ }
+ return $break_count;
+}
+
+sub copy_old_breakpoints {
+ my ( $self, $i_first_comma, $i_last_comma ) = @_;
+ for my $i ( $i_first_comma .. $i_last_comma ) {
+ if ( $old_breakpoint_to_go[$i] ) {
+ $self->set_forced_breakpoint($i);
+ }
+ }
+ return;
+}
+
+sub set_nobreaks {
+ my ( $self, $i, $j ) = @_;
+ if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
+
+ 0 && do {
+ my ( $a, $b, $c ) = caller();
+ my $forced_breakpoint_count = get_forced_breakpoint_count();
+ print STDOUT
+"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
+ };
+
+ @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
+ }
+
+ # shouldn't happen; non-critical error
+ else {
+ 0 && do {
+ my ( $a, $b, $c ) = caller();
+ print STDOUT
+ "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
+ };
+ }
+ return;
+}
+
+###############################################
+# CODE SECTION 12: Code for setting indentation
+###############################################
+
+sub token_sequence_length {
+
+ # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
+ # returns 0 if $ibeg > $iend (shouldn't happen)
+ my ( $ibeg, $iend ) = @_;
+ return 0 if ( !defined($iend) || $iend < 0 || $ibeg > $iend );
+ return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
+ return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
+}
+
+sub total_line_length {
+
+ # return length of a line of tokens ($ibeg .. $iend)
+ my ( $ibeg, $iend ) = @_;
+
+ # original coding:
+ #return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+
+ # this is basically sub 'leading_spaces_to_go':
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
+
+ return $indentation + $summed_lengths_to_go[ $iend + 1 ] -
+ $summed_lengths_to_go[$ibeg];
+}
+
+sub excess_line_length {
+
+ # return number of characters by which a line of tokens ($ibeg..$iend)
+ # exceeds the allowable line length.
+
+ # NOTE: Profiling shows that this is a critical routine for efficiency.
+ # Therefore I have eliminated additional calls to subs from it.
+ my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_;
+
+ # Original expression for line length
+ ##$length = leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+
+ # This is basically sub 'leading_spaces_to_go':
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation) ) { $indentation = $indentation->get_spaces() }
+
+ my $length =
+ $indentation +
+ $summed_lengths_to_go[ $iend + 1 ] -
+ $summed_lengths_to_go[$ibeg];
+
+ # Include right weld lengths unless requested not to.
+ if ( $total_weld_count
+ && !$ignore_right_weld
+ && $type_sequence_to_go[$iend] )
+ {
+ my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
+ $length += $wr if defined($wr);
+ }
+
+ # return the excess
+ return $length - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
+}
+
+sub get_spaces {
+
+ # return the number of leading spaces associated with an indentation
+ # variable $indentation is either a constant number of spaces or an object
+ # with a get_spaces method.
+ my $indentation = shift;
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
+}
+
+sub get_recoverable_spaces {
+
+ # return the number of spaces (+ means shift right, - means shift left)
+ # that we would like to shift a group of lines with the same indentation
+ # to get them to line up with their opening parens
+ my $indentation = shift;
+ return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
+}
+
+sub get_available_spaces_to_go {
+
+ my ( $self, $ii ) = @_;
+ my $item = $leading_spaces_to_go[$ii];
+
+ # return the number of available leading spaces associated with an
+ # indentation variable. $indentation is either a constant number of
+ # spaces or an object with a get_available_spaces method.
+ return ref($item) ? $item->get_available_spaces() : 0;
+}
+
+{ ## begin closure set_lp_indentation
+
+ use constant DEBUG_LP => 0;
+
+ # Stack of -lp index objects which survives between batches.
+ my $rLP;
+ my $max_lp_stack;
+
+ # The predicted position of the next opening container which may start
+ # an -lp indentation level. This survives between batches.
+ my $lp_position_predictor;
+
+ # A level at which the lp format becomes too highly stressed to continue
+ my $lp_cutoff_level;
+
+ BEGIN {
+
+ # Index names for the -lp stack variables.
+ # Do not combine with other BEGIN blocks (c101).
+
+ my $i = 0;
+ use constant {
+ _lp_ci_level_ => $i++,
+ _lp_level_ => $i++,
+ _lp_object_ => $i++,
+ _lp_container_seqno_ => $i++,
+ _lp_space_count_ => $i++,
+ };
+ }
+
+ sub initialize_lp_vars {
+
+ # initialize gnu variables for a new file;
+ # must be called once at the start of a new file.
+
+ $lp_position_predictor = 0;
+ $max_lp_stack = 0;
+ $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 );
+
+ # we can turn off -lp if all levels will be at or above the cutoff
+ if ( $lp_cutoff_level <= 1 ) {
+ $rOpts_line_up_parentheses = 0;
+ $rOpts_extended_line_up_parentheses = 0;
+ }
+
+ $rLP = [];
+
+ # initialize the leading whitespace stack to negative levels
+ # so that we can never run off the end of the stack
+ $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
+ $rLP->[$max_lp_stack]->[_lp_level_] = -1;
+ $rLP->[$max_lp_stack]->[_lp_object_] = undef;
+ $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
+ $rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
+
+ return;
+ }
+
+ # hashes for efficient testing
+ my %hash_test1;
+ my %hash_test2;
+ my %hash_test3;
+
+ BEGIN {
+ my @q = qw< } ) ] >;
+ @hash_test1{@q} = (1) x scalar(@q);
+ @q = qw(: ? f);
+ push @q, ',';
+ @hash_test2{@q} = (1) x scalar(@q);
+ @q = qw( . || && );
+ @hash_test3{@q} = (1) x scalar(@q);
+ }
+
+ sub set_lp_indentation {
+
+ #------------------------------------------------------------------
+ # Define the leading whitespace for all tokens in the current batch
+ # when the -lp formatting is selected.
+ #------------------------------------------------------------------
+
+ my ($self) = @_;
+
+ return unless ($rOpts_line_up_parentheses);
+ return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 );
+
+ # List of -lp indentation objects created in this batch
+ my $rlp_object_list = [];
+ my $max_lp_object_list = UNDEFINED_INDEX;
+
+ my %last_lp_equals;
+ my %lp_comma_count;
+ my %lp_arrow_count;
+ my $ii_begin_line = 0;
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $rshort_nested = $self->[_rshort_nested_];
+ my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_];
+ my $K_opening_container = $self->[_K_opening_container_]; ##TESTING
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_];
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ my $rbreak_before_container_by_seqno =
+ $self->[_rbreak_before_container_by_seqno_];
+ my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
+
+ my $nws = @{$radjusted_levels};
+ my $imin = 0;
+
+ # The 'starting_in_quote' flag means that the first token is the first
+ # token of a line and it is also the continuation of some kind of
+ # multi-line quote or pattern. It must have no added leading
+ # whitespace, so we can skip it.
+ if ($starting_in_quote) {
+ $imin += 1;
+ }
+
+ my $K_last_nonblank;
+ my $Kpnb = $K_to_go[0] - 1;
+ if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
+ $Kpnb -= 1;
+ }
+ if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
+ $K_last_nonblank = $Kpnb;
+ }
+
+ my $last_nonblank_token = '';
+ my $last_nonblank_type = '';
+ my $last_last_nonblank_type = '';
+
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
+ $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
+ }
+
+ my ( $space_count, $current_level, $current_ci_level, $in_lp_mode );
+ my $stack_changed = 1;
+
+ #-----------------------------------
+ # Loop over all tokens in this batch
+ #-----------------------------------
+ foreach my $ii ( $imin .. $max_index_to_go ) {
+
+ my $KK = $K_to_go[$ii];
+ my $type = $types_to_go[$ii];
+ my $token = $tokens_to_go[$ii];
+ my $level = $levels_to_go[$ii];
+ my $ci_level = $ci_levels_to_go[$ii];
+ my $total_depth = $nesting_depth_to_go[$ii];
+
+ #--------------------------------------------------
+ # Adjust levels if necessary to recycle whitespace:
+ #--------------------------------------------------
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit )
+ {
+ $level = $radjusted_levels->[$KK];
+ if ( $level < 0 ) { $level = 0 } # note: this should not happen
+ }
+
+ # get the top state from the stack if it has changed
+ if ($stack_changed) {
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $lp_object = $rLP_top->[_lp_object_];
+ if ($lp_object) {
+ ( $space_count, $current_level, $current_ci_level ) =
+ @{ $lp_object->get_spaces_level_ci() };
+ }
+ else {
+ $current_ci_level = $rLP_top->[_lp_ci_level_];
+ $current_level = $rLP_top->[_lp_level_];
+ $space_count = $rLP_top->[_lp_space_count_];
+ }
+ $stack_changed = 0;
+ }
+
+ #------------------------------
+ # update the position predictor
+ #------------------------------
+ if ( $type eq '{' || $type eq '(' ) {
+
+ $lp_comma_count{ $total_depth + 1 } = 0;
+ $lp_arrow_count{ $total_depth + 1 } = 0;
+
+ # If we come to an opening token after an '=' token of some
+ # type, see if it would be helpful to 'break' after the '=' to
+ # save space
+ my $last_equals = $last_lp_equals{$total_depth};
+ if ( $last_equals && $last_equals > $ii_begin_line ) {
+
+ my $seqno = $type_sequence_to_go[$ii];
+
+ # find the position if we break at the '='
+ my $i_test = $last_equals;
+
+ # Fix for issue b1229, check for break before
+ if ( $want_break_before{ $types_to_go[$i_test] } ) {
+ if ( $i_test > 0 ) { $i_test-- }
+ }
+ elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+
+ # TESTING
+ ##my $too_close = ($i_test==$ii-1);
+
+ my $test_position = total_line_length( $i_test, $ii );
+ my $mll =
+ $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
+
+ #------------------------------------------------------
+ # Break if structure will reach the maximum line length
+ #------------------------------------------------------
+
+ # Historically, -lp just used one-half line length here
+ my $len_increase = $rOpts_maximum_line_length / 2;
+
+ # For -xlp, we can also use the pre-computed lengths
+ my $min_len = $rcollapsed_length_by_seqno->{$seqno};
+ if ( $min_len && $min_len > $len_increase ) {
+ $len_increase = $min_len;
+ }
+
+ if (
+
+ # the equals is not just before an open paren (testing)
+ ##!$too_close &&
+
+ # if we might exceed the maximum line length
+ $lp_position_predictor + $len_increase > $mll
+
+ # if a -bbx flag WANTS a break before this opening token
+ || ( $seqno
+ && $rbreak_before_container_by_seqno->{$seqno} )
+
+ # or we are beyond the 1/4 point and there was an old
+ # break at an assignment (not '=>') [fix for b1035]
+ || (
+ $lp_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
+ && $types_to_go[$last_equals] ne '=>'
+ && (
+ $old_breakpoint_to_go[$last_equals]
+ || ( $last_equals > 0
+ && $old_breakpoint_to_go[ $last_equals - 1 ]
+ )
+ || ( $last_equals > 1
+ && $types_to_go[ $last_equals - 1 ] eq 'b'
+ && $old_breakpoint_to_go[ $last_equals - 2 ]
+ )
+ )
+ )
+ )
+ {
+
+ # then make the switch -- note that we do not set a
+ # real breakpoint here because we may not really need
+ # one; sub break_lists will do that if necessary.
+
+ my $Kc = $K_closing_container->{$seqno};
+ if (
+
+ # For -lp, only if the closing token is in this
+ # batch (c117). Otherwise it cannot be done by sub
+ # break_lists.
+ defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
+
+ # For -xlp, we only need one nonblank token after
+ # the opening token.
+ || $rOpts_extended_line_up_parentheses
+ )
+ {
+ $ii_begin_line = $i_test + 1;
+ $lp_position_predictor = $test_position;
+
+ #--------------------------------------------------
+ # Fix for an opening container terminating a batch:
+ #--------------------------------------------------
+ # To get alignment of a -lp container with its
+ # contents, we have to put a break after $i_test.
+ # For $ii<$max_index_to_go, this will be done by
+ # sub break_lists based on the indentation object.
+ # But for $ii=$max_index_to_go, the indentation
+ # object for this seqno will not be created until
+ # the next batch, so we have to set a break at
+ # $i_test right now in order to get one.
+ if ( $ii == $max_index_to_go
+ && !$block_type_to_go[$ii]
+ && $type eq '{'
+ && $seqno
+ && !$ris_excluded_lp_container->{$seqno} )
+ {
+ $self->set_forced_lp_break( $ii_begin_line,
+ $ii );
+ }
+ }
+ }
+ }
+ } ## end update position predictor
+
+ #------------------------
+ # Handle decreasing depth
+ #------------------------
+ # Note that one token may have both decreasing and then increasing
+ # depth. For example, (level, ci) can go from (1,1) to (2,0). So,
+ # in this example we would first go back to (1,0) then up to (2,0)
+ # in a single call.
+ if ( $level < $current_level || $ci_level < $current_ci_level ) {
+
+ # loop to find the first entry at or completely below this level
+ my ( $lev, $ci_lev );
+ while (1) {
+ if ($max_lp_stack) {
+
+ # save index of token which closes this level
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ my $lp_object =
+ $rLP->[$max_lp_stack]->[_lp_object_];
+
+ $lp_object->set_closed($ii);
+
+ my $comma_count = 0;
+ my $arrow_count = 0;
+ if ( $type eq '}' || $type eq ')' ) {
+ $comma_count = $lp_comma_count{$total_depth};
+ $arrow_count = $lp_arrow_count{$total_depth};
+ $comma_count = 0 unless $comma_count;
+ $arrow_count = 0 unless $arrow_count;
+ }
+
+ $lp_object->set_comma_count($comma_count);
+ $lp_object->set_arrow_count($arrow_count);
+
+ # Undo any extra indentation if we saw no commas
+ my $available_spaces =
+ $lp_object->get_available_spaces();
+ my $K_start = $lp_object->get_K_begin_line();
+
+ if ( $available_spaces > 0
+ && $K_start >= $K_to_go[0]
+ && ( $comma_count <= 0 || $arrow_count > 0 ) )
+ {
- # or user wants to form long blocks with arrows
- || $rOpts_comma_arrow_breakpoints == 2
- )
+ my $i = $lp_object->get_lp_item_index();
+
+ # Safety check for a valid stack index. It
+ # should be ok because we just checked that the
+ # index K of the token associated with this
+ # indentation is in this batch.
+ if ( $i < 0 || $i > $max_lp_object_list ) {
+ if (DEVEL_MODE) {
+ my $lno = $rLL->[$KK]->[_LINE_INDEX_];
+ Fault(<<EOM);
+Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
+EOM
+ }
+ }
+ else {
+ if ( $arrow_count == 0 ) {
+ $rlp_object_list->[$i]
+ ->permanently_decrease_available_spaces
+ ($available_spaces);
+ }
+ else {
+ $rlp_object_list->[$i]
+ ->tentatively_decrease_available_spaces
+ ($available_spaces);
+ }
+ foreach
+ my $j ( $i + 1 .. $max_lp_object_list )
+ {
+ $rlp_object_list->[$j]
+ ->decrease_SPACES($available_spaces);
+ }
+ }
+ }
+ }
- # and we made some breakpoints between the opening and closing
- && ( $breakpoint_undo_stack[$current_depth] <
- $forced_breakpoint_undo_count )
+ # go down one level
+ --$max_lp_stack;
+
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $ci_lev = $rLP_top->[_lp_ci_level_];
+ my $lev = $rLP_top->[_lp_level_];
+ my $spaces = $rLP_top->[_lp_space_count_];
+ if ( $rLP_top->[_lp_object_] ) {
+ my $lp_obj = $rLP_top->[_lp_object_];
+ ( $spaces, $lev, $ci_lev ) =
+ @{ $lp_obj->get_spaces_level_ci() };
+ }
- # and this block is short enough to fit on one line
- # Note: use < because need 1 more space for possible comma
- && !$is_long_term
+ # stop when we reach a level at or below the current
+ # level
+ if ( $lev <= $level && $ci_lev <= $ci_level ) {
+ $space_count = $spaces;
+ $current_level = $lev;
+ $current_ci_level = $ci_lev;
+ last;
+ }
+ }
+
+ # reached bottom of stack .. should never happen because
+ # only negative levels can get here, and $level was forced
+ # to be positive above.
+ else {
+
+ # non-fatal, keep going except in DEVEL_MODE
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp
+EOM
+ }
+ last;
+ }
+ }
+ } ## end decreasing depth
+
+ #------------------------
+ # handle increasing depth
+ #------------------------
+ if ( $level > $current_level || $ci_level > $current_ci_level ) {
+
+ $stack_changed = 1;
+
+ # Compute the standard incremental whitespace. This will be
+ # the minimum incremental whitespace that will be used. This
+ # choice results in a smooth transition between the gnu-style
+ # and the standard style.
+ my $standard_increment =
+ ( $level - $current_level ) *
+ $rOpts_indent_columns +
+ ( $ci_level - $current_ci_level ) *
+ $rOpts_continuation_indentation;
+
+ # Now we have to define how much extra incremental space
+ # ("$available_space") we want. This extra space will be
+ # reduced as necessary when long lines are encountered or when
+ # it becomes clear that we do not have a good list.
+ my $available_spaces = 0;
+ my $align_seqno = 0;
+ my $excess = 0;
+
+ my $last_nonblank_seqno;
+ my $last_nonblank_block_type;
+ if ( defined($K_last_nonblank) ) {
+ $last_nonblank_seqno =
+ $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
+ $last_nonblank_block_type =
+ $last_nonblank_seqno
+ ? $rblock_type_of_seqno->{$last_nonblank_seqno}
+ : undef;
+ }
+
+ $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
+
+ #-----------------------------------------------
+ # Initialize indentation spaces on empty stack..
+ #-----------------------------------------------
+ if ( $max_lp_stack == 0 ) {
+ $space_count = $level * $rOpts_indent_columns;
+ }
+
+ #----------------------------------------
+ # Add the standard space increment if ...
+ #----------------------------------------
+ elsif (
+
+ # if this is a BLOCK, add the standard increment
+ $last_nonblank_block_type
+
+ # or if this is not a sequenced item
+ || !$last_nonblank_seqno
+
+ # or this continer is excluded by user rules
+ # or contains here-docs or multiline qw text
+ || defined($last_nonblank_seqno)
+ && $ris_excluded_lp_container->{$last_nonblank_seqno}
+
+ # or if last nonblank token was not structural indentation
+ || $last_nonblank_type ne '{'
+
+ # and do not start -lp under stress .. fixes b1244, b1255
+ || !$in_lp_mode && $level >= $lp_cutoff_level
)
{
- undo_forced_breakpoint_stack(
- $breakpoint_undo_stack[$current_depth] );
- } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
- # now see if we have any comma breakpoints left
- my $has_comma_breakpoints =
- ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count );
+ # If we have entered lp mode, use the top lp object to get
+ # the current indentation spaces because it may have
+ # changed. Fixes b1285, b1286.
+ if ($in_lp_mode) {
+ $space_count = $in_lp_mode->get_spaces();
+ }
+ $space_count += $standard_increment;
+ }
- # update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] =
- $has_broken_sublist[$depth]
- || $has_broken_sublist[$current_depth]
- || $is_long_term
- || $has_comma_breakpoints;
+ #---------------------------------------------------------------
+ # -lp mode: try to use space to the first non-blank level change
+ #---------------------------------------------------------------
+ else {
-# Having come to the closing ')', '}', or ']', now we have to decide if we
-# should 'open up' the structure by placing breaks at the opening and
-# closing containers. This is a tricky decision. Here are some of the
-# basic considerations:
-#
-# -If this is a BLOCK container, then any breakpoints will have already
-# been set (and according to user preferences), so we need do nothing here.
-#
-# -If we have a comma-separated list for which we can align the list items,
-# then we need to do so because otherwise the vertical aligner cannot
-# currently do the alignment.
-#
-# -If this container does itself contain a container which has been broken
-# open, then it should be broken open to properly show the structure.
-#
-# -If there is nothing to align, and no other reason to break apart,
-# then do not do it.
-#
-# We will not break open the parens of a long but 'simple' logical expression.
-# For example:
-#
-# This is an example of a simple logical expression and its formatting:
-#
-# if ( $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4 )
-#
-# Most people would prefer this than the 'spacey' version:
-#
-# if (
-# $bigwasteofspace1 && $bigwasteofspace2
-# || $bigwasteofspace3 && $bigwasteofspace4
-# )
-#
-# To illustrate the rules for breaking logical expressions, consider:
-#
-# FULLY DENSE:
-# if ( $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc ))
-#
-# This is on the verge of being difficult to read. The current default is to
-# open it up like this:
-#
-# DEFAULT:
-# if (
-# $opt_excl
-# and ( exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc )
-# )
-#
-# This is a compromise which tries to avoid being too dense and to spacey.
-# A more spaced version would be:
-#
-# SPACEY:
-# if (
-# $opt_excl
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-# )
-#
-# Some people might prefer the spacey version -- an option could be added. The
-# innermost expression contains a long block '( exists $ids_... ')'.
-#
-# Here is how the logic goes: We will force a break at the 'or' that the
-# innermost expression contains, but we will not break apart its opening and
-# closing containers because (1) it contains no multi-line sub-containers itself,
-# and (2) there is no alignment to be gained by breaking it open like this
-#
-# and (
-# exists $ids_excl_uc{$id_uc}
-# or grep $id_uc =~ /$_/, @ids_excl_uc
-# )
-#
-# (although this looks perfectly ok and might be good for long expressions). The
-# outer 'if' container, though, contains a broken sub-container, so it will be
-# broken open to avoid too much density. Also, since it contains no 'or's, there
-# will be a forced break at its 'and'.
+ # see how much space we have available
+ my $test_space_count = $lp_position_predictor;
+ my $excess = 0;
+ my $min_len =
+ $rcollapsed_length_by_seqno->{$last_nonblank_seqno};
+ my $next_opening_too_far;
+
+ if ( defined($min_len) ) {
+ $excess =
+ $test_space_count +
+ $min_len -
+ $maximum_line_length_at_level[$level];
+ if ( $excess > 0 ) {
+ $test_space_count -= $excess;
+
+ # will the next opening token be a long way out?
+ $next_opening_too_far =
+ $lp_position_predictor + $excess >
+ $maximum_line_length_at_level[$level];
+ }
+ }
- # set some flags telling something about this container..
- my $is_simple_logical_expression = 0;
- if ( $item_count_stack[$current_depth] == 0
- && $saw_opening_structure
- && $tokens_to_go[$i_opening] eq '('
- && $is_logical_container{ $container_type[$current_depth] }
+ my $rLP_top = $rLP->[$max_lp_stack];
+ my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
+ if ( $rLP_top->[_lp_object_] ) {
+ $min_gnu_indentation =
+ $rLP_top->[_lp_object_]->get_spaces();
+ }
+ $available_spaces =
+ $test_space_count - $min_gnu_indentation;
+
+ # Do not startup -lp indentation mode if no space ...
+ # ... or if it puts the opening far to the right
+ if ( !$in_lp_mode
+ && ( $available_spaces <= 0 || $next_opening_too_far ) )
+ {
+ $space_count += $standard_increment;
+ $available_spaces = 0;
+ }
+
+ # Use -lp mode
+ else {
+ $space_count = $test_space_count;
+
+ $in_lp_mode = 1;
+ if ( $available_spaces >= $standard_increment ) {
+ $min_gnu_indentation += $standard_increment;
+ }
+ elsif ( $available_spaces > 1 ) {
+ $min_gnu_indentation += $available_spaces + 1;
+ }
+ elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+ if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
+ $min_gnu_indentation += 2;
+ }
+ else {
+ $min_gnu_indentation += 1;
+ }
+ }
+ else {
+ $min_gnu_indentation += $standard_increment;
+ }
+ $available_spaces = $space_count - $min_gnu_indentation;
+
+ if ( $available_spaces < 0 ) {
+ $space_count = $min_gnu_indentation;
+ $available_spaces = 0;
+ }
+ $align_seqno = $last_nonblank_seqno;
+ }
+ }
+
+ #-------------------------------------------
+ # update the state, but not on a blank token
+ #-------------------------------------------
+ if ( $type ne 'b' ) {
+
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
+ $in_lp_mode = 1;
+ }
+
+ #----------------------------------------
+ # Create indentation object if in lp-mode
+ #----------------------------------------
+ ++$max_lp_stack;
+ my $lp_object;
+ if ($in_lp_mode) {
+
+ # A negative level implies not to store the item in the
+ # item_list
+ my $lp_item_index = 0;
+ if ( $level >= 0 ) {
+ $lp_item_index = ++$max_lp_object_list;
+ }
+
+ my $K_begin_line = 0;
+ if ( $ii_begin_line >= 0
+ && $ii_begin_line <= $max_index_to_go )
+ {
+ $K_begin_line = $K_to_go[$ii_begin_line];
+ }
+
+ # Minor Fix: when creating indentation at a side
+ # comment we don't know what the space to the actual
+ # next code token will be. We will allow a space for
+ # sub correct_lp to move it in if necessary.
+ if ( $type eq '#'
+ && $max_index_to_go > 0
+ && $align_seqno )
+ {
+ $available_spaces += 1;
+ }
+
+ $lp_object = Perl::Tidy::IndentationItem->new(
+ spaces => $space_count,
+ level => $level,
+ ci_level => $ci_level,
+ available_spaces => $available_spaces,
+ lp_item_index => $lp_item_index,
+ align_seqno => $align_seqno,
+ stack_depth => $max_lp_stack,
+ K_begin_line => $K_begin_line,
+ );
+
+ DEBUG_LP && do {
+ my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
+ print STDERR <<EOM;
+DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
+EOM
+ };
+
+ if ( $level >= 0 ) {
+ $rlp_object_list->[$max_lp_object_list] =
+ $lp_object;
+ }
+
+ if ( $last_nonblank_token =~ /^[\{\[\(]$/
+ && $last_nonblank_seqno )
+ {
+ $rlp_object_by_seqno->{$last_nonblank_seqno} =
+ $lp_object;
+ }
+ }
+
+ #------------------------------------
+ # Store this indentation on the stack
+ #------------------------------------
+ $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
+ $rLP->[$max_lp_stack]->[_lp_level_] = $level;
+ $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
+ $rLP->[$max_lp_stack]->[_lp_container_seqno_] =
+ $last_nonblank_seqno;
+ $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
+
+ # If the opening paren is beyond the half-line length, then
+ # we will use the minimum (standard) indentation. This will
+ # help avoid problems associated with running out of space
+ # near the end of a line. As a result, in deeply nested
+ # lists, there will be some indentations which are limited
+ # to this minimum standard indentation. But the most deeply
+ # nested container will still probably be able to shift its
+ # parameters to the right for proper alignment, so in most
+ # cases this will not be noticeable.
+ if ( $available_spaces > 0 && $lp_object ) {
+ my $halfway =
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2;
+ $lp_object->tentatively_decrease_available_spaces(
+ $available_spaces)
+ if ( $space_count > $halfway );
+ }
+ }
+ } ## end increasing depth
+
+ #------------------
+ # Handle all tokens
+ #------------------
+ if ( $type ne 'b' ) {
+
+ # Count commas and look for non-list characters. Once we see a
+ # non-list character, we give up and don't look for any more
+ # commas.
+ if ( $type eq '=>' ) {
+ $lp_arrow_count{$total_depth}++;
+
+ # remember '=>' like '=' for estimating breaks (but see
+ # above note for b1035)
+ $last_lp_equals{$total_depth} = $ii;
+ }
+
+ elsif ( $type eq ',' ) {
+ $lp_comma_count{$total_depth}++;
+ }
+
+ elsif ( $is_assignment{$type} ) {
+ $last_lp_equals{$total_depth} = $ii;
+ }
+
+ # this token might start a new line if ..
+ if (
+
+ # this is the first nonblank token of the line
+ $ii == 1 && $types_to_go[0] eq 'b'
+
+ # or previous character was one of these:
+ # /^([\:\?\,f])$/
+ || $hash_test2{$last_nonblank_type}
+
+ # or previous character was opening and this is not closing
+ || ( $last_nonblank_type eq '{' && $type ne '}' )
+ || ( $last_nonblank_type eq '(' and $type ne ')' )
+
+ # or this token is one of these:
+ # /^([\.]|\|\||\&\&)$/
+ || $hash_test3{$type}
+
+ # or this is a closing structure
+ || ( $last_nonblank_type eq '}'
+ && $last_nonblank_token eq $last_nonblank_type )
+
+ # or previous token was keyword 'return'
+ || (
+ $last_nonblank_type eq 'k'
+ && ( $last_nonblank_token eq 'return'
+ && $type ne '{' )
+ )
+
+ # or starting a new line at certain keywords is fine
+ || ( $type eq 'k'
+ && $is_if_unless_and_or_last_next_redo_return{$token} )
+
+ # or this is after an assignment after a closing structure
+ || (
+ $is_assignment{$last_nonblank_type}
+ && (
+ # /^[\}\)\]]$/
+ $hash_test1{$last_last_nonblank_type}
+
+ # and it is significantly to the right
+ || $lp_position_predictor > (
+ $maximum_line_length_at_level[$level] -
+ $rOpts_maximum_line_length / 2
+ )
+ )
+ )
)
{
+ check_for_long_gnu_style_lines( $ii, $rlp_object_list );
+ $ii_begin_line = $ii;
+
+ # back up 1 token if we want to break before that type
+ # otherwise, we may strand tokens like '?' or ':' on a line
+ if ( $ii_begin_line > 0 ) {
+ if ( $last_nonblank_type eq 'k' ) {
+
+ if ( $want_break_before{$last_nonblank_token} ) {
+ $ii_begin_line--;
+ }
+ }
+ elsif ( $want_break_before{$last_nonblank_type} ) {
+ $ii_begin_line--;
+ }
+ }
+ } ## end if ( $ii == 1 && $types_to_go...)
+
+ $K_last_nonblank = $KK;
+
+ $last_last_nonblank_type = $last_nonblank_type;
+ $last_nonblank_type = $type;
+ $last_nonblank_token = $token;
+
+ } ## end if ( $type ne 'b' )
+
+ # remember the predicted position of this token on the output line
+ if ( $ii > $ii_begin_line ) {
+
+ ## NOTE: this is a critical loop - the following call has been
+ ## expanded for about 2x speedup:
+ ## $lp_position_predictor =
+ ## total_line_length( $ii_begin_line, $ii );
+
+ my $indentation = $leading_spaces_to_go[$ii_begin_line];
+ if ( ref($indentation) ) {
+ $indentation = $indentation->get_spaces();
+ }
+ $lp_position_predictor =
+ $indentation +
+ $summed_lengths_to_go[ $ii + 1 ] -
+ $summed_lengths_to_go[$ii_begin_line];
+ }
+ else {
+ $lp_position_predictor =
+ $space_count + $token_lengths_to_go[$ii];
+ }
+
+ # Store the indentation object for this token.
+ # This allows us to manipulate the leading whitespace
+ # (in case we have to reduce indentation to fit a line) without
+ # having to change any token values.
+
+ #---------------------------------------------------------------
+ # replace leading whitespace with indentation objects where used
+ #---------------------------------------------------------------
+ if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
+ my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
+ $leading_spaces_to_go[$ii] = $lp_object;
+ if ( $max_lp_stack > 0
+ && $ci_level
+ && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
+ {
+ $reduced_spaces_to_go[$ii] =
+ $rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
+ }
+ else {
+ $reduced_spaces_to_go[$ii] = $lp_object;
+ }
+ }
+ } ## end loop over all tokens in this batch
+
+ undo_incomplete_lp_indentation($rlp_object_list)
+ if ( !$rOpts_extended_line_up_parentheses );
+
+ return;
+ }
+
+ sub check_for_long_gnu_style_lines {
+
+ # look at the current estimated maximum line length, and
+ # remove some whitespace if it exceeds the desired maximum
+ my ( $mx_index_to_go, $rlp_object_list ) = @_;
+
+ my $max_lp_object_list = @{$rlp_object_list} - 1;
+
+ # nothing can be done if no stack items defined for this line
+ return if ( $max_lp_object_list < 0 );
+
+ # see if we have exceeded the maximum desired line length
+ # keep 2 extra free because they are needed in some cases
+ # (result of trial-and-error testing)
+ my $spaces_needed =
+ $lp_position_predictor -
+ $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2;
+
+ return if ( $spaces_needed <= 0 );
+
+ # We are over the limit, so try to remove a requested number of
+ # spaces from leading whitespace. We are only allowed to remove
+ # from whitespace items created on this batch, since others have
+ # already been used and cannot be undone.
+ my @candidates = ();
+ my $i;
+
+ # loop over all whitespace items created for the current batch
+ for ( $i = 0 ; $i <= $max_lp_object_list ; $i++ ) {
+ my $item = $rlp_object_list->[$i];
+
+ # item must still be open to be a candidate (otherwise it
+ # cannot influence the current token)
+ next if ( $item->get_closed() >= 0 );
+
+ my $available_spaces = $item->get_available_spaces();
+
+ if ( $available_spaces > 0 ) {
+ push( @candidates, [ $i, $available_spaces ] );
+ }
+ }
+
+ return unless (@candidates);
+
+ # sort by available whitespace so that we can remove whitespace
+ # from the maximum available first
+ @candidates = sort { $b->[1] <=> $a->[1] } @candidates;
+
+ # keep removing whitespace until we are done or have no more
+ foreach my $candidate (@candidates) {
+ my ( $i, $available_spaces ) = @{$candidate};
+ my $deleted_spaces =
+ ( $available_spaces > $spaces_needed )
+ ? $spaces_needed
+ : $available_spaces;
+
+ # remove the incremental space from this item
+ $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
- # This seems to be a simple logical expression with
- # no existing breakpoints. Set a flag to prevent
- # opening it up.
- if ( !$has_comma_breakpoints ) {
- $is_simple_logical_expression = 1;
- }
+ my $i_debug = $i;
- # This seems to be a simple logical expression with
- # breakpoints (broken sublists, for example). Break
- # at all 'or's and '||'s.
- else {
- set_logical_breakpoints($current_depth);
+ # update the leading whitespace of this item and all items
+ # that came after it
+ for ( ; $i <= $max_lp_object_list ; $i++ ) {
+
+ my $old_spaces = $rlp_object_list->[$i]->get_spaces();
+ if ( $old_spaces >= $deleted_spaces ) {
+ $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
+ }
+
+ # shouldn't happen except for code bug:
+ else {
+ # non-fatal, keep going except in DEVEL_MODE
+ if (DEVEL_MODE) {
+ my $level = $rlp_object_list->[$i_debug]->get_level();
+ my $ci_level =
+ $rlp_object_list->[$i_debug]->get_ci_level();
+ my $old_level = $rlp_object_list->[$i]->get_level();
+ my $old_ci_level =
+ $rlp_object_list->[$i]->get_ci_level();
+ Fault(<<EOM);
+program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
+EOM
}
- } ## end if ( $item_count_stack...)
+ }
+ }
+ $lp_position_predictor -= $deleted_spaces;
+ $spaces_needed -= $deleted_spaces;
+ last unless ( $spaces_needed > 0 );
+ }
+ return;
+ }
- if ( $is_long_term
- && @{ $rfor_semicolon_list[$current_depth] } )
- {
- set_for_semicolon_breakpoints($current_depth);
+ sub undo_incomplete_lp_indentation {
- # open up a long 'for' or 'foreach' container to allow
- # leading term alignment unless -lp is used.
- $has_comma_breakpoints = 1
- unless $rOpts_line_up_parentheses;
- } ## end if ( $is_long_term && ...)
+ #------------------------------------------------------------------
+ # Undo indentation for all incomplete -lp indentation levels of the
+ # current batch unless -xlp is set.
+ #------------------------------------------------------------------
- if (
+ # This routine is called once after each output stream batch is
+ # finished to undo indentation for all incomplete -lp indentation
+ # levels. If this routine is called then comments and blank lines will
+ # disrupt this indentation style. In older versions of perltidy this
+ # was always done because it could cause problems otherwise, but recent
+ # improvements allow fairly good results to be obtained by skipping
+ # this step with the -xlp flag.
+ my ($rlp_object_list) = @_;
- # breaks for code BLOCKS are handled at a higher level
- !$block_type
+ my $max_lp_object_list = @{$rlp_object_list} - 1;
- # we do not need to break at the top level of an 'if'
- # type expression
- && !$is_simple_logical_expression
+ # nothing to do if no stack items defined for this line
+ return if ( $max_lp_object_list < 0 );
- ## modification to keep ': (' containers vertically tight;
- ## but probably better to let user set -vt=1 to avoid
- ## inconsistency with other paren types
- ## && ($container_type[$current_depth] ne ':')
+ # loop over all whitespace items created for the current batch
+ foreach my $i ( 0 .. $max_lp_object_list ) {
+ my $item = $rlp_object_list->[$i];
- # otherwise, we require one of these reasons for breaking:
- && (
+ # only look for open items
+ next if ( $item->get_closed() >= 0 );
- # - this term has forced line breaks
- $has_comma_breakpoints
+ # Tentatively remove all of the available space
+ # (The vertical aligner will try to get it back later)
+ my $available_spaces = $item->get_available_spaces();
+ if ( $available_spaces > 0 ) {
- # - the opening container is separated from this batch
- # for some reason (comment, blank line, code block)
- # - this is a non-paren container spanning multiple lines
- || !$saw_opening_structure
+ # delete incremental space for this item
+ $rlp_object_list->[$i]
+ ->tentatively_decrease_available_spaces($available_spaces);
- # - this is a long block contained in another breakable
- # container
- || ( $is_long_term
- && $container_environment_to_go[$i_opening] ne
- 'BLOCK' )
- )
- )
- {
+ # Reduce the total indentation space of any nodes that follow
+ # Note that any such nodes must necessarily be dependents
+ # of this node.
+ foreach ( $i + 1 .. $max_lp_object_list ) {
+ $rlp_object_list->[$_]->decrease_SPACES($available_spaces);
+ }
+ }
+ }
+ return;
+ }
+} ## end closure set_lp_indentation
- # For -lp option, we must put a breakpoint before
- # the token which has been identified as starting
- # this indentation level. This is necessary for
- # proper alignment.
- if ( $rOpts_line_up_parentheses && $saw_opening_structure )
- {
- my $item = $leading_spaces_to_go[ $i_opening + 1 ];
- if ( $i_opening + 1 < $max_index_to_go
- && $types_to_go[ $i_opening + 1 ] eq 'b' )
- {
- $item = $leading_spaces_to_go[ $i_opening + 2 ];
- }
- if ( defined($item) ) {
- my $i_start_2 = $item->get_starting_index();
- if (
- defined($i_start_2)
+#----------------------------------------------------------------------
+# sub to set a requested break before an opening container in -lp mode.
+#----------------------------------------------------------------------
+sub set_forced_lp_break {
- # we are breaking after an opening brace, paren,
- # so don't break before it too
- && $i_start_2 ne $i_opening
- )
- {
+ my ( $self, $i_begin_line, $i_opening ) = @_;
- # Only break for breakpoints at the same
- # indentation level as the opening paren
- my $test1 = $nesting_depth_to_go[$i_opening];
- my $test2 = $nesting_depth_to_go[$i_start_2];
- if ( $test2 == $test1 ) {
- set_forced_breakpoint( $i_start_2 - 1 );
- }
- } ## end if ( defined($i_start_2...))
- } ## end if ( defined($item) )
- } ## end if ( $rOpts_line_up_parentheses...)
+ # Given:
+ # $i_begin_line = index of break in the _to_go arrays
+ # $i_opening = index of the opening container
- # break after opening structure.
- # note: break before closing structure will be automatic
- if ( $minimum_depth <= $current_depth ) {
+ # Set any requested break at a token before this opening container
+ # token. This is often an '=' or '=>' but can also be things like
+ # '.', ',', 'return'. It was defined by sub set_lp_indentation.
- set_forced_breakpoint($i_opening)
- unless ( $do_not_break_apart
- || is_unbreakable_container($current_depth) );
+ # Important:
+ # For intact containers, call this at the closing token.
+ # For broken containers, call this at the opening token.
+ # This will avoid needless breaks when it turns out that the
+ # container does not actually get broken. This isn't known until
+ # the closing container for intact blocks.
- # break at ',' of lower depth level before opening token
- if ( $last_comma_index[$depth] ) {
- set_forced_breakpoint( $last_comma_index[$depth] );
- }
+ return
+ if ( $i_begin_line < 0
+ || $i_begin_line > $max_index_to_go );
- # break at '.' of lower depth level before opening token
- if ( $last_dot_index[$depth] ) {
- set_forced_breakpoint( $last_dot_index[$depth] );
- }
+ # Handle request to put a break break immediately before this token.
+ # We may not want to do that since we are also breaking after it.
+ if ( $i_begin_line == $i_opening ) {
- # break before opening structure if preceded by another
- # closing structure and a comma. This is normally
- # done by the previous closing brace, but not
- # if it was a one-line block.
- if ( $i_opening > 2 ) {
- my $i_prev =
- ( $types_to_go[ $i_opening - 1 ] eq 'b' )
- ? $i_opening - 2
- : $i_opening - 1;
+ # The following rules should be reviewed. We may want to always
+ # allow the break. If we do not do the break, the indentation
+ # may be off.
- if ( $types_to_go[$i_prev] eq ','
- && $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
- {
- set_forced_breakpoint($i_prev);
- }
+ # RULE: don't break before it unless it is welded to a qw.
+ # This works well, but we may want to relax this to allow
+ # breaks in additional cases.
+ return
+ if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
+ return unless ( $types_to_go[$max_index_to_go] eq 'q' );
+ }
- # also break before something like ':(' or '?('
- # if appropriate.
- elsif (
- $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
- {
- my $token_prev = $tokens_to_go[$i_prev];
- if ( $want_break_before{$token_prev} ) {
- set_forced_breakpoint($i_prev);
- }
- } ## end elsif ( $types_to_go[$i_prev...])
- } ## end if ( $i_opening > 2 )
- } ## end if ( $minimum_depth <=...)
+ # Only break for breakpoints at the same
+ # indentation level as the opening paren
+ my $test1 = $nesting_depth_to_go[$i_opening];
+ my $test2 = $nesting_depth_to_go[$i_begin_line];
+ return if ( $test2 != $test1 );
- # break after comma following closing structure
- if ( $next_type eq ',' ) {
- set_forced_breakpoint( $i + 1 );
- }
+ # Back up at a blank (fixes case b932)
+ my $ibr = $i_begin_line - 1;
+ if ( $ibr > 0
+ && $types_to_go[$ibr] eq 'b' )
+ {
+ $ibr--;
+ }
+ if ( $ibr >= 0 ) {
+ my $i_nonblank = $self->set_forced_breakpoint($ibr);
- # break before an '=' following closing structure
- if (
- $is_assignment{$next_nonblank_type}
- && ( $breakpoint_stack[$current_depth] !=
- $forced_breakpoint_count )
- )
- {
- set_forced_breakpoint($i);
- } ## end if ( $is_assignment{$next_nonblank_type...})
+ # Crude patch to prevent sub recombine_breakpoints from undoing
+ # this break, especially after an '='. It will leave old
+ # breakpoints alone. See c098/x045 for some examples.
+ if ( defined($i_nonblank) ) {
+ $old_breakpoint_to_go[$i_nonblank] = 1;
+ }
+ }
+ return;
+}
- # break at any comma before the opening structure Added
- # for -lp, but seems to be good in general. It isn't
- # obvious how far back to look; the '5' below seems to
- # work well and will catch the comma in something like
- # push @list, myfunc( $param, $param, ..
+sub reduce_lp_indentation {
- my $icomma = $last_comma_index[$depth];
- if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
- unless ( $forced_breakpoint_to_go[$icomma] ) {
- set_forced_breakpoint($icomma);
- }
- }
- } # end logic to open up a container
+ # reduce the leading whitespace at token $i if possible by $spaces_needed
+ # (a large value of $spaces_needed will remove all excess space)
+ # NOTE: to be called from break_lists only for a sequence of tokens
+ # contained between opening and closing parens/braces/brackets
- # Break open a logical container open if it was already open
- elsif ($is_simple_logical_expression
- && $has_old_logical_breakpoints[$current_depth] )
- {
- set_logical_breakpoints($current_depth);
- }
+ my ( $self, $i, $spaces_wanted ) = @_;
+ my $deleted_spaces = 0;
- # Handle long container which does not get opened up
- elsif ($is_long_term) {
+ my $item = $leading_spaces_to_go[$i];
+ my $available_spaces = $item->get_available_spaces();
- # must set fake breakpoint to alert outer containers that
- # they are complex
- set_fake_breakpoint();
- } ## end elsif ($is_long_term)
+ if (
+ $available_spaces > 0
+ && ( ( $spaces_wanted <= $available_spaces )
+ || !$item->get_have_child() )
+ )
+ {
- } ## end elsif ( $depth < $current_depth)
+ # we'll remove these spaces, but mark them as recoverable
+ $deleted_spaces =
+ $item->tentatively_decrease_available_spaces($spaces_wanted);
+ }
- #------------------------------------------------------------
- # Handle this token
- #------------------------------------------------------------
+ return $deleted_spaces;
+}
- $current_depth = $depth;
+###########################################################
+# CODE SECTION 13: Preparing batches for vertical alignment
+###########################################################
- # handle comma-arrow
- if ( $type eq '=>' ) {
- next if ( $last_nonblank_type eq '=>' );
- next if $rOpts_break_at_old_comma_breakpoints;
- next if $rOpts_comma_arrow_breakpoints == 3;
- $want_comma_break[$depth] = 1;
- $index_before_arrow[$depth] = $i_last_nonblank_token;
- next;
- } ## end if ( $type eq '=>' )
+sub check_convey_batch_input {
- elsif ( $type eq '.' ) {
- $last_dot_index[$depth] = $i;
- }
+ # Check for valid input to sub convey_batch_to_vertical_aligner. An
+ # error here would most likely be due to an error in the calling
+ # routine 'sub grind_batch_of_CODE'.
+ my ( $self, $ri_first, $ri_last ) = @_;
- # Turn off alignment if we are sure that this is not a list
- # environment. To be safe, we will do this if we see certain
- # non-list tokens, such as ';', and also the environment is
- # not a list. Note that '=' could be in any of the = operators
- # (lextest.t). We can't just use the reported environment
- # because it can be incorrect in some cases.
- elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
- && $container_environment_to_go[$i] ne 'LIST' )
+ if ( !defined($ri_first) || !defined($ri_last) ) {
+ Fault(<<EOM);
+Undefined line ranges ri_first and/r ri_last
+EOM
+ }
+
+ my $nmax = @{$ri_first} - 1;
+ my $nmax_check = @{$ri_last} - 1;
+ if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
+ Fault(<<EOM);
+Line range index error: nmax=$nmax but nmax_check=$nmax_check
+These should be equal and >=0
+EOM
+ }
+ my ( $ibeg, $iend );
+ foreach my $n ( 0 .. $nmax ) {
+ my $ibeg_m = $ibeg;
+ my $iend_m = $iend;
+ $ibeg = $ri_first->[$n];
+ $iend = $ri_last->[$n];
+ if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
+ Fault(<<EOM);
+Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
+These should have iend >= ibeg and be in the range (0..$max_index_to_go)
+EOM
+ }
+ next if ( $n == 0 );
+ if ( $ibeg <= $iend_m ) {
+ Fault(<<EOM);
+Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
+EOM
+ }
+ }
+ return;
+}
+
+sub convey_batch_to_vertical_aligner {
+
+ my ($self) = @_;
+
+ # This routine receives a batch of code for which the final line breaks
+ # have been defined. Here we prepare the lines for passing to the vertical
+ # aligner. We do the following tasks:
+ # - mark certain vertical alignment tokens, such as '=', in each line
+ # - make minor indentation adjustments
+ # - do logical padding: insert extra blank spaces to help display certain
+ # logical constructions
+
+ my $this_batch = $self->[_this_batch_];
+ my $ri_first = $this_batch->[_ri_first_];
+ my $ri_last = $this_batch->[_ri_last_];
+
+ $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE);
+
+ my $n_last_line = @{$ri_first} - 1;
+
+ my $do_not_pad = $this_batch->[_do_not_pad_];
+ my $peak_batch_size = $this_batch->[_peak_batch_size_];
+ my $starting_in_quote = $this_batch->[_starting_in_quote_];
+ my $ending_in_quote = $this_batch->[_ending_in_quote_];
+ my $is_static_block_comment = $this_batch->[_is_static_block_comment_];
+ my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_];
+ my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+
+ my $ibeg_next = $ri_first->[0];
+ my $iend_next = $ri_last->[0];
+
+ my $type_beg_next = $types_to_go[$ibeg_next];
+ my $type_end_next = $types_to_go[$iend_next];
+ my $token_beg_next = $tokens_to_go[$ibeg_next];
+
+ my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#';
+
+ my $rindentation_list = [0]; # ref to indentations for each line
+ my ( $cscw_block_comment, $closing_side_comment );
+ if ($rOpts_closing_side_comments) {
+ ( $closing_side_comment, $cscw_block_comment ) =
+ $self->add_closing_side_comment( $ri_first, $ri_last );
+ }
+
+ # flush before a long if statement to avoid unwanted alignment
+ if ( $n_last_line > 0
+ && $type_beg_next eq 'k'
+ && $token_beg_next =~ /^(if|unless)$/ )
+ {
+ $self->flush_vertical_aligner();
+ }
+
+ $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci )
+ if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation );
+
+ $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size,
+ $starting_in_quote )
+ if ( $n_last_line > 0 && $rOpts_logical_padding );
+
+ if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
+
+ # ----------------------------------------------------------
+ # define the vertical alignments for all lines of this batch
+ # ----------------------------------------------------------
+ my $rline_alignments =
+ $self->make_vertical_alignments( $ri_first, $ri_last );
+
+ # ----------------------------------------------
+ # loop to send each line to the vertical aligner
+ # ----------------------------------------------
+ my ( $type_beg, $token_beg );
+ my ($type_end);
+ my ( $ibeg, $iend );
+ for my $n ( 0 .. $n_last_line ) {
+
+ # ----------------------------------------------------------------
+ # This hash will hold the args for vertical alignment of this line
+ # We will populate it as we go.
+ # ----------------------------------------------------------------
+ my $rvao_args = {};
+
+ my $type_beg_last = $type_beg;
+ my $type_end_last = $type_end;
+
+ my $ibeg = $ibeg_next;
+ my $iend = $iend_next;
+ my $Kbeg = $K_to_go[$ibeg];
+ my $Kend = $K_to_go[$iend];
+
+ $type_beg = $type_beg_next;
+ $type_end = $type_end_next;
+ $token_beg = $token_beg_next;
+
+ # ---------------------------------------------------
+ # Define the check value 'Kend' to send for this line
+ # ---------------------------------------------------
+ # The 'Kend' value is an integer for checking that lines come out of
+ # the far end of the pipeline in the right order. It increases
+ # linearly along the token stream. But we only send ending K values of
+ # non-comments down the pipeline. This is equivalent to checking that
+ # the last CODE_type is blank or equal to 'VER'. See also sub
+ # resync_lines_and_tokens for related coding. Note that
+ # '$batch_CODE_type' is the code type of the line to which the ending
+ # token belongs.
+ my $Kend_code =
+ $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend;
+
+ # $ljump is a level jump needed by 'sub final_indentation_adjustment'
+ my $ljump = 0;
+
+ # Get some vars on line [n+1], if any:
+ if ( $n < $n_last_line ) {
+ $ibeg_next = $ri_first->[ $n + 1 ];
+ $iend_next = $ri_last->[ $n + 1 ];
+
+ $type_beg_next = $types_to_go[$ibeg_next];
+ $type_end_next = $types_to_go[$iend_next];
+ $token_beg_next = $tokens_to_go[$ibeg_next];
+
+ my $Kbeg_next = $K_to_go[$ibeg_next];
+ $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
+ }
+ elsif ( !$is_block_comment && $Kend < $Klimit ) {
+
+ # Patch for git #51, a bare closing qw paren was not outdented
+ # if the flag '-nodelete-old-newlines is set
+ # Note that we are just looking ahead for the next nonblank
+ # character. We could scan past an arbitrary number of block
+ # comments or hanging side comments by calling K_next_code, but it
+ # could add significant run time with very little to be gained.
+ my $Kbeg_next = $Kend + 1;
+ if ( $Kbeg_next < $Klimit
+ && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
{
- $dont_align[$depth] = 1;
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
+ $Kbeg_next += 1;
+ }
+ $ljump =
+ $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
+ }
+
+ # ---------------------------------------------
+ # get the vertical alignment info for this line
+ # ---------------------------------------------
+
+ # The lines are broken into fields which can be spaced by the vertical
+ # to achieve vertical alignment. These fields are the actual text
+ # which will be output, so from here on no more changes can be made to
+ # the text.
+ my $rline_alignment = $rline_alignments->[$n];
+ my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
+ @{$rline_alignment};
+
+ # Programming check: (shouldn't happen)
+ # The number of tokens which separate the fields must always be
+ # one less than the number of fields. If this is not true then
+ # an error has been introduced in sub make_alignment_patterns.
+ if (DEVEL_MODE) {
+ if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
+ my $nt = @{$rtokens};
+ my $nf = @{$rfields};
+ my $msg = <<EOM;
+Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
+The number of tokens = $nt should be one less than number of fields: $nf
+EOM
+ Fault($msg);
+ }
+ }
+
+ # --------------------------------------
+ # get the final indentation of this line
+ # --------------------------------------
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $terminal_block_type, $is_semicolon_terminated, $is_outdented_line )
+ = $self->final_indentation_adjustment( $ibeg, $iend, $rfields,
+ $rpatterns, $ri_first, $ri_last,
+ $rindentation_list, $ljump, $starting_in_quote,
+ $is_static_block_comment, );
+
+ # --------------------------------
+ # define flag 'outdent_long_lines'
+ # --------------------------------
+ if (
+ # we will allow outdenting of long lines..
+ # which are long quotes, if allowed
+ ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
+
+ # which are long block comments, if allowed
+ || (
+ $type_beg eq '#'
+ && $rOpts_outdent_long_comments
+
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ )
+ {
+ $rvao_args->{outdent_long_lines} = 1;
- # now just handle any commas
- next unless ( $type eq ',' );
+ # convert -lp indentation objects to spaces to allow outdenting
+ if ( ref($indentation) ) {
+ $indentation = $indentation->get_spaces();
+ }
+ }
- $last_dot_index[$depth] = undef;
- $last_comma_index[$depth] = $i;
+ # --------------------------------------------------
+ # define flags 'break_alignment_before' and '_after'
+ # --------------------------------------------------
- # break here if this comma follows a '=>'
- # but not if there is a side comment after the comma
- if ( $want_comma_break[$depth] ) {
+ # These flags tell the vertical aligner to stop alignment before or
+ # after this line.
+ if ($is_outdented_line) {
+ $rvao_args->{break_alignment_before} = 1;
+ $rvao_args->{break_alignment_after} = 1;
+ }
+ elsif ($do_not_pad) {
+ $rvao_args->{break_alignment_before} = 1;
+ }
- if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- if ($rOpts_comma_arrow_breakpoints) {
- $want_comma_break[$depth] = 0;
- next;
+ # flush at an 'if' which follows a line with (1) terminal semicolon
+ # or (2) terminal block_type which is not an 'if'. This prevents
+ # unwanted alignment between the lines.
+ elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) {
+ my $type_m = 'b';
+ my $block_type_m;
+
+ if ( $Kbeg > 0 ) {
+ my $Km = $Kbeg - 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ if ( $type_m eq 'b' && $Km > 0 ) {
+ $Km -= 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ }
+ if ( $type_m eq '#' && $Km > 0 ) {
+ $Km -= 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
+ if ( $type_m eq 'b' && $Km > 0 ) {
+ $Km -= 1;
+ $type_m = $rLL->[$Km]->[_TYPE_];
}
}
- set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
+ my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
+ if ($seqno_m) {
+ $block_type_m = $rblock_type_of_seqno->{$seqno_m};
+ }
+ }
- # break before the previous token if it looks safe
- # Example of something that we will not try to break before:
- # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
- # Also we don't want to break at a binary operator (like +):
- # $c->createOval(
- # $x + $R, $y +
- # $R => $x - $R,
- # $y - $R, -fill => 'black',
- # );
- my $ibreak = $index_before_arrow[$depth] - 1;
- if ( $ibreak > 0
- && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
- {
- if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
+ # break after anything that is not if-like
+ if (
+ $type_m eq ';'
+ || ( $type_m eq '}'
+ && $block_type_m
+ && $block_type_m ne 'if'
+ && $block_type_m ne 'unless'
+ && $block_type_m ne 'elsif'
+ && $block_type_m ne 'else' )
+ )
+ {
+ $rvao_args->{break_alignment_before} = 1;
+ }
+ }
- # don't break pointer calls, such as the following:
- # File::Spec->curdir => 1,
- # (This is tokenized as adjacent 'w' tokens)
- ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+ # ----------------------------------
+ # define 'rvertical_tightness_flags'
+ # ----------------------------------
+ # These flags tell the vertical aligner if/when to combine consecutive
+ # lines, based on the user input parameters.
+ $rvao_args->{rvertical_tightness_flags} =
+ $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last, $ending_in_quote, $closing_side_comment )
+ if ( !$is_block_comment );
- # And don't break before a comma, as in the following:
- # ( LONGER_THAN,=> 1,
- # EIGHTY_CHARACTERS,=> 2,
- # CAUSES_FORMATTING,=> 3,
- # LIKE_THIS,=> 4,
- # );
- # This example is for -tso but should be general rule
- if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
- && $tokens_to_go[ $ibreak + 1 ] ne ',' )
- {
- set_forced_breakpoint($ibreak);
- }
- } ## end if ( $types_to_go[$ibreak...])
- } ## end if ( $ibreak > 0 && $tokens_to_go...)
+ # ----------------------------------
+ # define 'is_terminal_ternary' flag
+ # ----------------------------------
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
+ # This flag is set at the final ':' of a ternary chain to request
+ # vertical alignment of the final term. Here is a slightly complex
+ # example:
+ #
+ # $self->{_text} = (
+ # !$section ? ''
+ # : $type eq 'item' ? "the $section entry"
+ # : "the section on $section"
+ # )
+ # . (
+ # $page
+ # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+ # : ' elsewhere in this document'
+ # );
+ #
+ if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) {
- # handle list which mixes '=>'s and ','s:
- # treat any list items so far as an interrupted list
- $interrupted_list[$depth] = 1;
- next;
- } ## end if ( $want_comma_break...)
+ my $is_terminal_ternary = 0;
+ my $last_leading_type = $n > 0 ? $type_beg_last : ':';
+ if ( $terminal_type ne ';'
+ && $n_last_line > $n
+ && $level_end == $lev )
+ {
+ my $Kbeg_next = $K_to_go[$ibeg_next];
+ $level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
+ $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
+ }
+ if (
+ $last_leading_type eq ':'
+ && ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $terminal_type ne ':' && $level_end < $lev ) )
+ )
+ {
- # break after all commas above starting depth
- if ( $depth < $starting_depth && !$dont_align[$depth] ) {
- set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
- next;
+ # the terminal term must not contain any ternary terms, as in
+ # my $ECHO = (
+ # $Is_MSWin32 ? ".\\echo$$"
+ # : $Is_MacOS ? ":echo$$"
+ # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
+ # );
+ $is_terminal_ternary = 1;
+
+ my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_];
+ while ( defined($KP) && $KP <= $Kend ) {
+ my $type_KP = $rLL->[$KP]->[_TYPE_];
+ if ( $type_KP eq '?' || $type_KP eq ':' ) {
+ $is_terminal_ternary = 0;
+ last;
+ }
+ $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_];
+ }
}
+ $rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
+ }
- # add this comma to the list..
- my $item_count = $item_count_stack[$depth];
- if ( $item_count == 0 ) {
+ # -------------------------------------------------
+ # add any new closing side comment to the last line
+ # -------------------------------------------------
+ if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) {
- # but do not form a list with no opening structure
- # for example:
+ $rfields->[-1] .= " $closing_side_comment";
- # open INFILE_COPY, ">$input_file_copy"
- # or die ("very long message");
+ # NOTE: Patch for csc. We can just use 1 for the length of the csc
+ # because its length should not be a limiting factor from here on.
+ $rfield_lengths->[-1] += 2;
+
+ # repack
+ $rline_alignment =
+ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
+ }
+
+ # ------------------------
+ # define flag 'list_seqno'
+ # ------------------------
+
+ # This flag indicates if this line is contained in a multi-line list
+ if ( !$is_block_comment ) {
+ my $parent_seqno = $parent_seqno_to_go[$ibeg];
+ $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
+ }
+
+ # The alignment tokens have been marked with nesting_depths, so we need
+ # to pass nesting depths to the vertical aligner. They remain invariant
+ # under all formatting operations. Previously, level values were sent
+ # to the aligner. But they can be altered in welding and other
+ # opeartions, and this can lead to alignement errors.
+ my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
+ my $nesting_depth_end = $nesting_depth_to_go[$iend];
+
+ # A quirk in the definition of nesting depths is that the closing token
+ # has the same depth as internal tokens. The vertical aligner is
+ # programmed to expect them to have the lower depth, so we fix this.
+ if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
+ if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
+
+ # Adjust nesting depths to keep -lp indentation for qw lists. This is
+ # required because qw lists contained in brackets do not get nesting
+ # depths, but the vertical aligner is watching nesting depth changes to
+ # decide if a -lp block is intact. Without this patch, qw lists
+ # enclosed in angle brackets will not get the correct -lp indentation.
+
+ # Looking for line with isolated qw ...
+ if ( $rOpts_line_up_parentheses
+ && $type_beg eq 'q'
+ && $ibeg == $iend )
+ {
- if ( ( $opening_structure_index_stack[$depth] < 0 )
- && $container_environment_to_go[$i] eq 'BLOCK' )
- {
- $dont_align[$depth] = 1;
- }
- } ## end if ( $item_count == 0 )
+ # ... which is part of a multiline qw
+ my $Km = $self->K_previous_nonblank($Kbeg);
+ my $Kp = $self->K_next_nonblank($Kbeg);
+ if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
+ || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
+ {
+ $nesting_depth_beg++;
+ $nesting_depth_end++;
+ }
+ }
- $comma_index[$depth][$item_count] = $i;
- ++$item_count_stack[$depth];
- if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
- $identifier_count_stack[$depth]++;
+ # ---------------------------------
+ # define flag 'forget_side_comment'
+ # ---------------------------------
+
+ # This flag tells the vertical aligner to reset the side comment
+ # location if we are entering a new block from level 0. This is
+ # intended to keep side comments from drifting too far to the right.
+ if ( $terminal_block_type
+ && $nesting_depth_end > $nesting_depth_beg )
+ {
+ my $level_adj = $lev;
+ my $radjusted_levels = $self->[_radjusted_levels_];
+ if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} )
+ {
+ $level_adj = $radjusted_levels->[$Kbeg];
+ if ( $level_adj < 0 ) { $level_adj = 0 }
}
- } ## end while ( ++$i <= $max_index_to_go)
+ if ( $level_adj == 0 ) {
+ $rvao_args->{forget_side_comment} = 1;
+ }
+ }
- #-------------------------------------------
- # end of loop over all tokens in this batch
- #-------------------------------------------
+ # -----------------------------------
+ # Store the remaining non-flag values
+ # -----------------------------------
+ $rvao_args->{Kend} = $Kend_code;
+ $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
+ $rvao_args->{indentation} = $indentation;
+ $rvao_args->{level_end} = $nesting_depth_end;
+ $rvao_args->{level} = $nesting_depth_beg;
+ $rvao_args->{rline_alignment} = $rline_alignment;
+ $rvao_args->{maximum_line_length} =
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
- # set breaks for any unfinished lists ..
- for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
+ # --------------------------------------
+ # send this line to the vertical aligner
+ # --------------------------------------
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->valign_input($rvao_args);
- $interrupted_list[$dd] = 1;
- $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
- set_comma_breakpoints($dd);
- set_logical_breakpoints($dd)
- if ( $has_old_logical_breakpoints[$dd] );
- set_for_semicolon_breakpoints($dd);
+ $do_not_pad = 0;
- # break open container...
- my $i_opening = $opening_structure_index_stack[$dd];
- set_forced_breakpoint($i_opening)
- unless (
- is_unbreakable_container($dd)
+ # Set flag indicating if this line ends in an opening
+ # token and is very short, so that a blank line is not
+ # needed if the subsequent line is a comment.
+ # Examples of what we are looking for:
+ # {
+ # && (
+ # BEGIN {
+ # default {
+ # sub {
+ $self->[_last_output_short_opening_token_]
- # Avoid a break which would place an isolated ' or "
- # on a line
- || ( $type eq 'Q'
- && $i_opening >= $max_index_to_go - 2
- && $token =~ /^['"]$/ )
- );
- } ## end for ( my $dd = $current_depth...)
+ # line ends in opening token
+ # /^[\{\(\[L]$/
+ = $is_opening_type{$type_end}
- # Return a flag indicating if the input file had some good breakpoints.
- # This flag will be used to force a break in a line shorter than the
- # allowed line length.
- if ( $has_old_logical_breakpoints[$current_depth] ) {
- $saw_good_breakpoint = 1;
- }
+ # and either
+ && (
+ # line has either single opening token
+ $Kend == $Kbeg
- # A complex line with one break at an = has a good breakpoint.
- # This is not complex ($total_depth_variation=0):
- # $res1
- # = 10;
- #
- # This is complex ($total_depth_variation=6):
- # $res2 =
- # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
- elsif ($i_old_assignment_break
- && $total_depth_variation > 4
- && $old_breakpoint_count == 1 )
- {
- $saw_good_breakpoint = 1;
- } ## end elsif ( $i_old_assignment_break...)
+ # or is a single token followed by opening token.
+ # Note that sub identifiers have blanks like 'sub doit'
+ # $token_beg !~ /\s+/
+ || ( $Kend - $Kbeg <= 2 && index( $token_beg, ' ' ) < 0 )
+ )
- return $saw_good_breakpoint;
- } ## end sub scan_list
-} # end scan_list
+ # and limit total to 10 character widths
+ && token_sequence_length( $ibeg, $iend ) <= 10;
-sub find_token_starting_list {
+ } ## end of loop to output each line
- # When testing to see if a block will fit on one line, some
- # previous token(s) may also need to be on the line; particularly
- # if this is a sub call. So we will look back at least one
- # token. NOTE: This isn't perfect, but not critical, because
- # if we mis-identify a block, it will be wrapped and therefore
- # fixed the next time it is formatted.
- my $i_opening_paren = shift;
- my $i_opening_minus = $i_opening_paren;
- my $im1 = $i_opening_paren - 1;
- my $im2 = $i_opening_paren - 2;
- my $im3 = $i_opening_paren - 3;
- my $typem1 = $types_to_go[$im1];
- my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
+ # remember indentation of lines containing opening containers for
+ # later use by sub final_indentation_adjustment
+ $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list )
+ if ( !$is_block_comment );
- if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
- $i_opening_minus = $i_opening_paren;
+ # output any new -cscw block comment
+ if ($cscw_block_comment) {
+ $self->flush_vertical_aligner();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->write_code_line( $cscw_block_comment . "\n" );
}
- elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
- $i_opening_minus = $im1 if $im1 >= 0;
+ return;
+}
- # walk back to improve length estimate
- for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
- last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
- $i_opening_minus = $j;
+sub check_batch_summed_lengths {
+
+ my ( $self, $msg ) = @_;
+ $msg = "" unless defined($msg);
+ my $rLL = $self->[_rLL_];
+
+ # Verify that the summed lengths are correct. We want to be sure that
+ # errors have not been introduced by programming changes. Summed lengths
+ # are defined in sub $store_token. Operations like padding and unmasking
+ # semicolons can change token lengths, but those operations are expected to
+ # update the summed lengths when they make changes. So the summed lengths
+ # should always be correct.
+ foreach my $i ( 0 .. $max_index_to_go ) {
+ my $len_by_sum =
+ $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
+ my $len_tok_i = $token_lengths_to_go[$i];
+ my $KK = $K_to_go[$i];
+ my $len_tok_K;
+ if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] }
+ if ( $len_by_sum != $len_tok_i
+ || defined($len_tok_K) && $len_by_sum != $len_tok_K )
+ {
+ my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
+ $KK = 'undef' unless defined($KK);
+ my $tok = $tokens_to_go[$i];
+ my $type = $types_to_go[$i];
+ Fault(<<EOM);
+Summed lengths are appear to be incorrect. $msg
+lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
+near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
+EOM
}
- if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
}
- elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
- elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
- $i_opening_minus = $im2;
- }
- return $i_opening_minus;
+ return;
}
-{ # begin set_comma_breakpoints_do
-
- my %is_keyword_with_special_leading_term;
+{ ## begin closure set_vertical_alignment_markers
+ my %is_vertical_alignment_type;
+ my %is_not_vertical_alignment_token;
+ my %is_vertical_alignment_keyword;
+ my %is_terminal_alignment_type;
+ my %is_low_level_alignment_token;
BEGIN {
- # These keywords have prototypes which allow a special leading item
- # followed by a list
- my @q =
- qw(formline grep kill map printf sprintf push chmod join pack unshift);
- @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
+ my @q;
+
+ # Replaced =~ and // in the list. // had been removed in RT 119588
+ @q = qw#
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ { ? : => && || ~~ !~~ =~ !~ // <=> ->
+ #;
+ @is_vertical_alignment_type{@q} = (1) x scalar(@q);
+
+ # These 'tokens' are not aligned. We need this to remove [
+ # from the above list because it has type ='{'
+ @q = qw([);
+ @is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
+
+ # these are the only types aligned at a line end
+ @q = qw(&& || =>);
+ @is_terminal_alignment_type{@q} = (1) x scalar(@q);
+
+ # these tokens only align at line level
+ @q = ( '{', '(' );
+ @is_low_level_alignment_token{@q} = (1) x scalar(@q);
+
+ # eq and ne were removed from this list to improve alignment chances
+ @q = qw(if unless and or err for foreach while until);
+ @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
}
- sub set_comma_breakpoints_do {
+ sub set_vertical_alignment_markers {
+
+ # This routine takes the first step toward vertical alignment of the
+ # lines of output text. It looks for certain tokens which can serve as
+ # vertical alignment markers (such as an '=').
+ #
+ # Method: We look at each token $i in this output batch and set
+ # $ralignment_type_to_go->[$i] equal to those tokens at which we would
+ # accept vertical alignment.
- # Given a list with some commas, set breakpoints at some of the
- # commas, if necessary, to make it easy to read. This list is
- # an example:
- my (
- $depth, $i_opening_paren, $i_closing_paren,
- $item_count, $identifier_count, $rcomma_index,
- $next_nonblank_type, $list_type, $interrupted,
- $rdo_not_break_apart, $must_break_open,
- ) = @_;
+ my ( $self, $ri_first, $ri_last ) = @_;
- # nothing to do if no commas seen
- return if ( $item_count < 1 );
- my $i_first_comma = $rcomma_index->[0];
- my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
- my $i_last_comma = $i_true_last_comma;
- if ( $i_last_comma >= $max_index_to_go ) {
- $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
- return if ( $item_count < 1 );
- }
+ my $ralignment_type_to_go;
+ my $ralignment_counts = [];
+ my $ralignment_hash_by_line = [];
- #---------------------------------------------------------------
- # find lengths of all items in the list to calculate page layout
- #---------------------------------------------------------------
- my $comma_count = $item_count;
- my @item_lengths;
- my @i_term_begin;
- my @i_term_end;
- my @i_term_comma;
- my $i_prev_plus;
- my @max_length = ( 0, 0 );
- my $first_term_length;
- my $i = $i_opening_paren;
- my $is_odd = 1;
+ # NOTE: closing side comments can insert up to 2 additional tokens
+ # beyond the original $max_index_to_go, so we need to check ri_last for
+ # the last index.
+ my $max_line = @{$ri_first} - 1;
+ my $max_i = $ri_last->[$max_line];
+ if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
- foreach my $j ( 0 .. $comma_count - 1 ) {
- $is_odd = 1 - $is_odd;
- $i_prev_plus = $i + 1;
- $i = $rcomma_index->[$j];
+ # -----------------------------------------------------------------
+ # Shortcut:
+ # - no alignments if there is only 1 token.
+ # - and nothing to do if we aren't allowed to change whitespace.
+ # -----------------------------------------------------------------
+ if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
+ }
- my $i_term_end =
- ( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
- my $i_term_begin =
- ( $types_to_go[$i_prev_plus] eq 'b' )
- ? $i_prev_plus + 1
- : $i_prev_plus;
- push @i_term_begin, $i_term_begin;
- push @i_term_end, $i_term_end;
- push @i_term_comma, $i;
+ my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
+ my $ris_function_call_paren = $self->[_ris_function_call_paren_];
+ my $rLL = $self->[_rLL_];
- # note: currently adding 2 to all lengths (for comma and space)
- my $length =
- 2 + token_sequence_length( $i_term_begin, $i_term_end );
- push @item_lengths, $length;
+ # -------------------------------
+ # First handle any side comment.
+ # -------------------------------
+ my $i_terminal = $max_i;
+ if ( $types_to_go[$max_i] eq '#' ) {
- if ( $j == 0 ) {
- $first_term_length = $length;
+ # We know $max_i > 0 if we get here.
+ $i_terminal -= 1;
+ if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
+ $i_terminal -= 1;
}
- else {
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
- }
- }
+ my $token = $tokens_to_go[$max_i];
+ my $KK = $K_to_go[$max_i];
- # now we have to make a distinction between the comma count and item
- # count, because the item count will be one greater than the comma
- # count if the last item is not terminated with a comma
- my $i_b =
- ( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
- ? $i_last_comma + 1
- : $i_last_comma;
- my $i_e =
- ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
- ? $i_closing_paren - 2
- : $i_closing_paren - 1;
- my $i_effective_last_comma = $i_last_comma;
+ # Do not align various special side comments
+ my $do_not_align = (
- my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
+ # it is any specially marked side comment
+ ( defined($KK) && $rspecial_side_comment_type->{$KK} )
- if ( $last_item_length > 0 ) {
+ # or it is a static side comment
+ || ( $rOpts->{'static-side-comments'}
+ && $token =~ /$static_side_comment_pattern/ )
- # add 2 to length because other lengths include a comma and a blank
- $last_item_length += 2;
- push @item_lengths, $last_item_length;
- push @i_term_begin, $i_b + 1;
- push @i_term_end, $i_e;
- push @i_term_comma, undef;
+ # or a closing side comment
+ || ( $types_to_go[$i_terminal] eq '}'
+ && $tokens_to_go[$i_terminal] eq '}'
+ && $token =~ /$closing_side_comment_prefix_pattern/ )
+ );
- my $i_odd = $item_count % 2;
+ # - For the specific combination -vc -nvsc, we put all side comments
+ # at fixed locations. Note that we will lose hanging side comment
+ # alignments. Otherwise, hsc's can move to strange locations.
+ # - For -nvc -nvsc we make all side comments vertical alignments
+ # because the vertical aligner will check for -nvsc and be able
+ # to reduce the final padding to the side comments for long lines.
+ # and keep hanging side comments aligned.
+ if ( !$do_not_align
+ && !$rOpts_valign_side_comments
+ && $rOpts_valign_code )
+ {
- if ( $last_item_length > $max_length[$i_odd] ) {
- $max_length[$i_odd] = $last_item_length;
+ $do_not_align = 1;
+ my $ipad = $max_i - 1;
+ if ( $types_to_go[$ipad] eq 'b' ) {
+ my $pad_spaces =
+ $rOpts->{'minimum-space-to-comment'} -
+ $token_lengths_to_go[$ipad];
+ $self->pad_token( $ipad, $pad_spaces );
+ }
}
- $item_count++;
- $i_effective_last_comma = $i_e + 1;
-
- if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
- $identifier_count++;
+ if ( !$do_not_align ) {
+ $ralignment_type_to_go->[$max_i] = '#';
+ $ralignment_hash_by_line->[$max_line]->{$max_i} = '#';
+ $ralignment_counts->[$max_line]++;
}
}
- #---------------------------------------------------------------
- # End of length calculations
- #---------------------------------------------------------------
+ # ----------------------------------------------
+ # Nothing more to do on this line if -nvc is set
+ # ----------------------------------------------
+ if ( !$rOpts_valign_code ) {
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
+ }
- #---------------------------------------------------------------
- # Compound List Rule 1:
- # Break at (almost) every comma for a list containing a broken
- # sublist. This has higher priority than the Interrupted List
- # Rule.
- #---------------------------------------------------------------
- if ( $has_broken_sublist[$depth] ) {
+ # -------------------------------------
+ # Loop over each line of this batch ...
+ # -------------------------------------
+ my $last_vertical_alignment_BEFORE_index;
+ my $vert_last_nonblank_type;
+ my $vert_last_nonblank_token;
+ my $vert_last_nonblank_block_type;
- # Break at every comma except for a comma between two
- # simple, small terms. This prevents long vertical
- # columns of, say, just 0's.
- my $small_length = 10; # 2 + actual maximum length wanted
+ foreach my $line ( 0 .. $max_line ) {
- # We'll insert a break in long runs of small terms to
- # allow alignment in uniform tables.
- my $skipped_count = 0;
- my $columns = table_columns_available($i_first_comma);
- my $fields = int( $columns / $small_length );
- if ( $rOpts_maximum_fields_per_table
- && $fields > $rOpts_maximum_fields_per_table )
- {
- $fields = $rOpts_maximum_fields_per_table;
- }
- my $max_skipped_count = $fields - 1;
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
- my $is_simple_last_term = 0;
- my $is_simple_next_term = 0;
- foreach my $j ( 0 .. $item_count ) {
- $is_simple_last_term = $is_simple_next_term;
- $is_simple_next_term = 0;
- if ( $j < $item_count
- && $i_term_end[$j] == $i_term_begin[$j]
- && $item_lengths[$j] <= $small_length )
- {
- $is_simple_next_term = 1;
+ next if ( $iend <= $ibeg );
+
+ # back up before any side comment
+ if ( $iend > $i_terminal ) { $iend = $i_terminal }
+
+ my $level_beg = $levels_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $type_beg = $types_to_go[$ibeg];
+ my $type_beg_special_char =
+ ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' );
+
+ $last_vertical_alignment_BEFORE_index = -1;
+ $vert_last_nonblank_type = $type_beg;
+ $vert_last_nonblank_token = $token_beg;
+
+ # ----------------------------------------------------------------
+ # Initialization code merged from 'sub delete_needless_alignments'
+ # ----------------------------------------------------------------
+ my $i_good_paren = -1;
+ my $i_elsif_close = $ibeg - 1;
+ my $i_elsif_open = $iend + 1;
+ my @imatch_list;
+ if ( $type_beg eq 'k' ) {
+
+ # Initialization for paren patch: mark a location of a paren we
+ # should keep, such as one following something like a leading
+ # 'if', 'elsif',
+ $i_good_paren = $ibeg + 1;
+ if ( $types_to_go[$i_good_paren] eq 'b' ) {
+ $i_good_paren++;
}
- next if $j == 0;
- if ( $is_simple_last_term
- && $is_simple_next_term
- && $skipped_count < $max_skipped_count )
+
+ # Initializtion for 'elsif' patch: remember the paren range of
+ # an elsif, and do not make alignments within them because this
+ # can cause loss of padding and overall brace alignment in the
+ # vertical aligner.
+ if ( $token_beg eq 'elsif'
+ && $i_good_paren < $iend
+ && $tokens_to_go[$i_good_paren] eq '(' )
{
- $skipped_count++;
+ $i_elsif_open = $i_good_paren;
+ $i_elsif_close = $mate_index_to_go[$i_good_paren];
}
- else {
- $skipped_count = 0;
- my $i = $i_term_comma[ $j - 1 ];
- last unless defined $i;
- set_forced_breakpoint($i);
+ } ## end if ( $type_beg eq 'k' )
+
+ # --------------------------------------------
+ # Loop over each token in this output line ...
+ # --------------------------------------------
+ foreach my $i ( $ibeg + 1 .. $iend ) {
+
+ next if ( $types_to_go[$i] eq 'b' );
+
+ my $type = $types_to_go[$i];
+ my $token = $tokens_to_go[$i];
+ my $alignment_type = '';
+
+ # ----------------------------------------------
+ # Check for 'paren patch' : Remove excess parens
+ # ----------------------------------------------
+
+ # Excess alignment of parens can prevent other good alignments.
+ # For example, note the parens in the first two rows of the
+ # following snippet. They would normally get marked for
+ # alignment and aligned as follows:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # This causes unnecessary paren alignment and prevents the
+ # third equals from aligning. If we remove the unwanted
+ # alignments we get:
+
+ # my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
+ # my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
+ # my $img = new Gimp::Image( $w, $h, RGB );
+
+ # A rule for doing this which works well is to remove alignment
+ # of parens whose containers do not contain other aligning
+ # tokens, with the exception that we always keep alignment of
+ # the first opening paren on a line (for things like 'if' and
+ # 'elsif' statements).
+ if ( $token eq ')' && @imatch_list ) {
+
+ # undo the corresponding opening paren if:
+ # - it is at the top of the stack
+ # - and not the first overall opening paren
+ # - does not follow a leading keyword on this line
+ my $imate = $mate_index_to_go[$i];
+ if ( $imatch_list[-1] eq $imate
+ && ( $ibeg > 1 || @imatch_list > 1 )
+ && $imate > $i_good_paren )
+ {
+ if ( $ralignment_type_to_go->[$imate] ) {
+ $ralignment_type_to_go->[$imate] = '';
+ $ralignment_counts->[$line]--;
+ delete $ralignment_hash_by_line->[$line]->{$imate};
+ }
+ pop @imatch_list;
+ }
}
- }
- # always break at the last comma if this list is
- # interrupted; we wouldn't want to leave a terminal '{', for
- # example.
- if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
- return;
- }
+ # do not align tokens at lower level than start of line
+ # except for side comments
+ if ( $levels_to_go[$i] < $level_beg ) {
+ next;
+ }
-#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
-#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
-#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
+ #--------------------------------------------------------
+ # First see if we want to align BEFORE this token
+ #--------------------------------------------------------
- #---------------------------------------------------------------
- # Interrupted List Rule:
- # A list is forced to use old breakpoints if it was interrupted
- # by side comments or blank lines, or requested by user.
- #---------------------------------------------------------------
- if ( $rOpts_break_at_old_comma_breakpoints
- || $interrupted
- || $i_opening_paren < 0 )
- {
- copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
- return;
- }
+ # The first possible token that we can align before
+ # is index 2 because: 1) it doesn't normally make sense to
+ # align before the first token and 2) the second
+ # token must be a blank if we are to align before
+ # the third
+ if ( $i < $ibeg + 2 ) { }
- #---------------------------------------------------------------
- # Looks like a list of items. We have to look at it and size it up.
- #---------------------------------------------------------------
+ # must follow a blank token
+ elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
- my $opening_token = $tokens_to_go[$i_opening_paren];
- my $opening_environment =
- $container_environment_to_go[$i_opening_paren];
+ # otherwise, do not align two in a row to create a
+ # blank field
+ elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
- #-------------------------------------------------------------------
- # Return if this will fit on one line
- #-------------------------------------------------------------------
+ # align before one of these keywords
+ # (within a line, since $i>1)
+ elsif ( $type eq 'k' ) {
- my $i_opening_minus = find_token_starting_list($i_opening_paren);
- return
- unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
+ # /^(if|unless|and|or|eq|ne)$/
+ if ( $is_vertical_alignment_keyword{$token} ) {
+ $alignment_type = $token;
+ }
+ }
- #-------------------------------------------------------------------
- # Now we know that this block spans multiple lines; we have to set
- # at least one breakpoint -- real or fake -- as a signal to break
- # open any outer containers.
- #-------------------------------------------------------------------
- set_fake_breakpoint();
+ # align before one of these types..
+ elsif ( $is_vertical_alignment_type{$type}
+ && !$is_not_vertical_alignment_token{$token} )
+ {
+ $alignment_type = $token;
- # be sure we do not extend beyond the current list length
- if ( $i_effective_last_comma >= $max_index_to_go ) {
- $i_effective_last_comma = $max_index_to_go - 1;
- }
+ # Do not align a terminal token. Although it might
+ # occasionally look ok to do this, this has been found to be
+ # a good general rule. The main problems are:
+ # (1) that the terminal token (such as an = or :) might get
+ # moved far to the right where it is hard to see because
+ # nothing follows it, and
+ # (2) doing so may prevent other good alignments.
+ # Current exceptions are && and || and =>
+ if ( $i == $iend ) {
+ $alignment_type = ""
+ unless ( $is_terminal_alignment_type{$type} );
+ }
- # Set a flag indicating if we need to break open to keep -lp
- # items aligned. This is necessary if any of the list terms
- # exceeds the available space after the '('.
- my $need_lp_break_open = $must_break_open;
- if ( $rOpts_line_up_parentheses && !$must_break_open ) {
- my $columns_if_unbroken =
- maximum_line_length($i_opening_minus) -
- total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open =
- ( $max_length[0] > $columns_if_unbroken )
- || ( $max_length[1] > $columns_if_unbroken )
- || ( $first_term_length > $columns_if_unbroken );
- }
+ # Do not align leading ': (' or '. ('. This would prevent
+ # alignment in something like the following:
+ # $extra_space .=
+ # ( $input_line_number < 10 ) ? " "
+ # : ( $input_line_number < 100 ) ? " "
+ # : "";
+ # or
+ # $code =
+ # ( $case_matters ? $accessor : " lc($accessor) " )
+ # . ( $yesno ? " eq " : " ne " )
- # Specify if the list must have an even number of fields or not.
- # It is generally safest to assume an even number, because the
- # list items might be a hash list. But if we can be sure that
- # it is not a hash, then we can allow an odd number for more
- # flexibility.
- my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
+ # Also, do not align a ( following a leading ? so we can
+ # align something like this:
+ # $converter{$_}->{ushortok} =
+ # $PDL::IO::Pic::biggrays
+ # ? ( m/GIF/ ? 0 : 1 )
+ # : ( m/GIF|RAST|IFF/ ? 0 : 1 );
+ if ( $type_beg_special_char
+ && $i == $ibeg + 2
+ && $types_to_go[ $i - 1 ] eq 'b' )
+ {
+ $alignment_type = "";
+ }
- if ( $identifier_count >= $item_count - 1
- || $is_assignment{$next_nonblank_type}
- || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
- )
- {
- $odd_or_even = 1;
- }
+ # Certain tokens only align at the same level as the
+ # initial line level
+ if ( $is_low_level_alignment_token{$token}
+ && $levels_to_go[$i] != $level_beg )
+ {
+ $alignment_type = "";
+ }
- # do we have a long first term which should be
- # left on a line by itself?
- my $use_separate_first_term = (
- $odd_or_even == 1 # only if we can use 1 field/line
- && $item_count > 3 # need several items
- && $first_term_length >
- 2 * $max_length[0] - 2 # need long first term
- && $first_term_length >
- 2 * $max_length[1] - 2 # need long first term
- );
+ # For a paren after keyword, only align something like this:
+ # if ( $a ) { &a }
+ # elsif ( $b ) { &b }
+ if ( $token eq '(' ) {
- # or do we know from the type of list that the first term should
- # be placed alone?
- if ( !$use_separate_first_term ) {
- if ( $is_keyword_with_special_leading_term{$list_type} ) {
- $use_separate_first_term = 1;
+ if ( $vert_last_nonblank_type eq 'k' ) {
+ $alignment_type = ""
+ unless $vert_last_nonblank_token =~
+ /^(if|unless|elsif)$/;
+ }
- # should the container be broken open?
- if ( $item_count < 3 ) {
- if ( $i_first_comma - $i_opening_paren < 4 ) {
- ${$rdo_not_break_apart} = 1;
- }
- }
- elsif ($first_term_length < 20
- && $i_first_comma - $i_opening_paren < 4 )
- {
- my $columns = table_columns_available($i_first_comma);
- if ( $first_term_length < $columns ) {
- ${$rdo_not_break_apart} = 1;
+ # Do not align a spaced-function-paren if requested.
+ # Issue git #53, #73.
+ if ( !$rOpts_function_paren_vertical_alignment ) {
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $ris_function_call_paren->{$seqno} ) {
+ $alignment_type = "";
+ }
+ }
}
+
+ # be sure the alignment tokens are unique
+ # This didn't work well: reason not determined
+ # if ($token ne $type) {$alignment_type .= $type}
}
- }
- }
- # if so,
- if ($use_separate_first_term) {
+ # NOTE: This is deactivated because it causes the previous
+ # if/elsif alignment to fail
+ #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+ #{ $alignment_type = $type; }
- # ..set a break and update starting values
- $use_separate_first_term = 1;
- set_forced_breakpoint($i_first_comma);
- $i_opening_paren = $i_first_comma;
- $i_first_comma = $rcomma_index->[1];
- $item_count--;
- return if $comma_count == 1;
- shift @item_lengths;
- shift @i_term_begin;
- shift @i_term_end;
- shift @i_term_comma;
- }
+ if ($alignment_type) {
+ $last_vertical_alignment_BEFORE_index = $i;
+ }
- # if not, update the metrics to include the first term
- else {
- if ( $first_term_length > $max_length[0] ) {
- $max_length[0] = $first_term_length;
- }
- }
+ #--------------------------------------------------------
+ # Next see if we want to align AFTER the previous nonblank
+ #--------------------------------------------------------
- # Field width parameters
- my $pair_width = ( $max_length[0] + $max_length[1] );
- my $max_width =
- ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
+ # We want to line up ',' and interior ';' tokens, with the added
+ # space AFTER these tokens. (Note: interior ';' is included
+ # because it may occur in short blocks).
+ elsif (
- # Number of free columns across the page width for laying out tables
- my $columns = table_columns_available($i_first_comma);
+ # we haven't already set it
+ ##!$alignment_type
- # Estimated maximum number of fields which fit this space
- # This will be our first guess
- my $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even, $max_width,
- $pair_width );
- my $number_of_fields = $number_of_fields_max;
+ # previous token IS one of these:
+ (
+ $vert_last_nonblank_type eq ','
+ || $vert_last_nonblank_type eq ';'
+ )
- # Find the best-looking number of fields
- # and make this our second guess if possible
- my ( $number_of_fields_best, $ri_ragged_break_list,
- $new_identifier_count )
- = study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
- $max_width );
+ # and its not the first token of the line
+ ## && $i > $ibeg
- if ( $number_of_fields_best != 0
- && $number_of_fields_best < $number_of_fields_max )
- {
- $number_of_fields = $number_of_fields_best;
- }
+ # and it follows a blank
+ && $types_to_go[ $i - 1 ] eq 'b'
- # ----------------------------------------------------------------------
- # If we are crowded and the -lp option is being used, try to
- # undo some indentation
- # ----------------------------------------------------------------------
- if (
- $rOpts_line_up_parentheses
- && (
- $number_of_fields == 0
- || ( $number_of_fields == 1
- && $number_of_fields != $number_of_fields_best )
- )
- )
- {
- my $available_spaces = get_available_spaces_to_go($i_first_comma);
- if ( $available_spaces > 0 ) {
+ # and it's NOT one of these
+ && !$is_closing_token{$type}
- my $spaces_wanted = $max_width - $columns; # for 1 field
+ # then go ahead and align
+ )
- if ( $number_of_fields_best == 0 ) {
- $number_of_fields_best =
- get_maximum_fields_wanted( \@item_lengths );
+ {
+ $alignment_type = $vert_last_nonblank_type;
}
- if ( $number_of_fields_best != 1 ) {
- my $spaces_wanted_2 =
- 1 + $pair_width - $columns; # for 2 fields
- if ( $available_spaces > $spaces_wanted_2 ) {
- $spaces_wanted = $spaces_wanted_2;
+ #-----------------------
+ # Set the alignment type
+ #-----------------------
+ if ($alignment_type) {
+
+ # but do not align the opening brace of an anonymous sub
+ if ( $token eq '{'
+ && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ )
+ {
+
}
- }
- if ( $spaces_wanted > 0 ) {
- my $deleted_spaces =
- reduce_lp_indentation( $i_first_comma, $spaces_wanted );
+ # and do not make alignments within 'elsif' parens
+ elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
- # redo the math
- if ( $deleted_spaces > 0 ) {
- $columns = table_columns_available($i_first_comma);
- $number_of_fields_max =
- maximum_number_of_fields( $columns, $odd_or_even,
- $max_width, $pair_width );
- $number_of_fields = $number_of_fields_max;
+ }
- if ( $number_of_fields_best == 1
- && $number_of_fields >= 1 )
- {
- $number_of_fields = $number_of_fields_best;
- }
+ # and ignore any tokens which have leading padded spaces
+ # example: perl527/lop.t
+ elsif ( substr( $alignment_type, 0, 1 ) eq ' ' ) {
+
+ }
+
+ else {
+ $ralignment_type_to_go->[$i] = $alignment_type;
+ $ralignment_hash_by_line->[$line]->{$i} =
+ $alignment_type;
+ $ralignment_counts->[$line]++;
+ push @imatch_list, $i;
}
}
+
+ $vert_last_nonblank_type = $type;
+ $vert_last_nonblank_token = $token;
}
}
- # try for one column if two won't work
- if ( $number_of_fields <= 0 ) {
- $number_of_fields = int( $columns / $max_width );
- }
+ return ( $ralignment_type_to_go, $ralignment_counts,
+ $ralignment_hash_by_line );
+ } ## end sub set_vertical_alignment_markers
+} ## end closure set_vertical_alignment_markers
- # The user can place an upper bound on the number of fields,
- # which can be useful for doing maintenance on tables
- if ( $rOpts_maximum_fields_per_table
- && $number_of_fields > $rOpts_maximum_fields_per_table )
- {
- $number_of_fields = $rOpts_maximum_fields_per_table;
+sub make_vertical_alignments {
+ my ( $self, $ri_first, $ri_last ) = @_;
+
+ #----------------------------
+ # Shortcut for a single token
+ #----------------------------
+ if ( $max_index_to_go == 0 ) {
+ if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
+ my $rtokens = [];
+ my $rfields = [ $tokens_to_go[0] ];
+ my $rpatterns = [ $types_to_go[0] ];
+ my $rfield_lengths =
+ [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
+ return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
+ }
+
+ # Strange line packing, not fatal but should not happen
+ elsif (DEVEL_MODE) {
+ my $max_line = @{$ri_first} - 1;
+ my $ibeg = $ri_first->[0];
+ my $iend = $ri_last->[0];
+ my $tok_b = $tokens_to_go[$ibeg];
+ my $tok_e = $tokens_to_go[$iend];
+ my $type_b = $types_to_go[$ibeg];
+ my $type_e = $types_to_go[$iend];
+ Fault(
+"Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
+ );
}
+ }
- # How many columns (characters) and lines would this container take
- # if no additional whitespace were added?
- my $packed_columns = token_sequence_length( $i_opening_paren + 1,
- $i_effective_last_comma + 1 );
- if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
- my $packed_lines = 1 + int( $packed_columns / $columns );
+ #---------------------------------------------------------
+ # Step 1: Define the alignment tokens for the entire batch
+ #---------------------------------------------------------
+ my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line )
+ = $self->set_vertical_alignment_markers( $ri_first, $ri_last );
- # are we an item contained in an outer list?
- my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
+ #----------------------------------------------
+ # Step 2: Break each line into alignment fields
+ #----------------------------------------------
+ my $rline_alignments = [];
+ my $max_line = @{$ri_first} - 1;
+ foreach my $line ( 0 .. $max_line ) {
- if ( $number_of_fields <= 0 ) {
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
-# #---------------------------------------------------------------
-# # We're in trouble. We can't find a single field width that works.
-# # There is no simple answer here; we may have a single long list
-# # item, or many.
-# #---------------------------------------------------------------
-#
-# In many cases, it may be best to not force a break if there is just one
-# comma, because the standard continuation break logic will do a better
-# job without it.
-#
-# In the common case that all but one of the terms can fit
-# on a single line, it may look better not to break open the
-# containing parens. Consider, for example
-#
-# $color =
-# join ( '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; }
-# keys %colors );
-#
-# which will look like this with the container broken:
-#
-# $color = join (
-# '/',
-# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
-# );
-#
-# Here is an example of this rule for a long last term:
-#
-# log_message( 0, 256, 128,
-# "Number of routes in adj-RIB-in to be considered: $peercount" );
-#
-# And here is an example with a long first term:
-#
-# $s = sprintf(
-# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
-# $r, $pu, $ps, $cu, $cs, $tt
-# )
-# if $style eq 'all';
+ my $rtok_fld_pat_len = $self->make_alignment_patterns(
+ $ibeg, $iend, $ralignment_type_to_go,
+ $ralignment_counts->[$line],
+ $ralignment_hash_by_line->[$line]
+ );
+ push @{$rline_alignments}, $rtok_fld_pat_len;
+ }
+ return $rline_alignments;
+} ## end sub make_vertical_alignments
+
+sub get_seqno {
+
+ # get opening and closing sequence numbers of a token for the vertical
+ # aligner. Assign qw quotes a value to allow qw opening and closing tokens
+ # to be treated somewhat like opening and closing tokens for stacking
+ # tokens by the vertical aligner.
+ my ( $self, $ii, $ending_in_quote ) = @_;
- my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
- my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
- my $long_first_term =
- excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
+ my $rLL = $self->[_rLL_];
- # break at every comma ...
- if (
+ my $KK = $K_to_go[$ii];
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
- # if requested by user or is best looking
- $number_of_fields_best == 1
+ if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
+ my $SEQ_QW = -1;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $ii > 0 ) {
+ $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
+ }
+ else {
+ if ( !$ending_in_quote ) {
+ $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
+ }
+ }
+ }
+ return ($seqno);
+}
- # or if this is a sublist of a larger list
- || $in_hierarchical_list
+{
+ my %undo_extended_ci;
- # or if multiple commas and we don't have a long first or last
- # term
- || ( $comma_count > 1
- && !( $long_last_term || $long_first_term ) )
- )
- {
- foreach ( 0 .. $comma_count - 1 ) {
- set_forced_breakpoint( $rcomma_index->[$_] );
+ sub initialize_undo_ci {
+ %undo_extended_ci = ();
+ return;
+ }
+
+ sub undo_ci {
+
+ # Undo continuation indentation in certain sequences
+ my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_;
+ my ( $line_1, $line_2, $lev_last );
+ my $this_line_is_semicolon_terminated;
+ my $max_line = @{$ri_first} - 1;
+
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+
+ # Prepare a list of controlling indexes for each line if required.
+ # This is used for efficient processing below. Note: this is
+ # critical for speed. In the initial implementation I just looped
+ # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
+ # found that this routine was causing a huge run time in large lists.
+ # On a very large list test case, this new coding dropped the run time
+ # of this routine from 30 seconds to 169 milliseconds.
+ my @i_controlling_ci;
+ if ( @{$rix_seqno_controlling_ci} ) {
+ my @tmp = reverse @{$rix_seqno_controlling_ci};
+ my $ix_next = pop @tmp;
+ foreach my $line ( 0 .. $max_line ) {
+ my $iend = $ri_last->[$line];
+ while ( defined($ix_next) && $ix_next <= $iend ) {
+ push @{ $i_controlling_ci[$line] }, $ix_next;
+ $ix_next = pop @tmp;
}
}
- elsif ($long_last_term) {
+ }
- set_forced_breakpoint($i_last_comma);
- ${$rdo_not_break_apart} = 1 unless $must_break_open;
- }
- elsif ($long_first_term) {
+ # Loop over all lines of the batch ...
- set_forced_breakpoint($i_first_comma);
- }
- else {
+ # Workaround originally created for problem c007, in which the
+ # combination -lp -xci could produce a "Program bug" message in unusual
+ # circumstances.
+ my $skip_SECTION_1;
+ if ( $rOpts_line_up_parentheses
+ && $rOpts_extended_continuation_indentation )
+ {
- # let breaks be defined by default bond strength logic
+ # Only set this flag if -lp is actually used here
+ foreach my $line ( 0 .. $max_line ) {
+ my $ibeg = $ri_first->[$line];
+ if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
+ $skip_SECTION_1 = 1;
+ last;
+ }
}
- return;
}
- # --------------------------------------------------------
- # We have a tentative field count that seems to work.
- # How many lines will this require?
- # --------------------------------------------------------
- my $formatted_lines = $item_count / ($number_of_fields);
- if ( $formatted_lines != int $formatted_lines ) {
- $formatted_lines = 1 + int $formatted_lines;
- }
+ foreach my $line ( 0 .. $max_line ) {
- # So far we've been trying to fill out to the right margin. But
- # compact tables are easier to read, so let's see if we can use fewer
- # fields without increasing the number of lines.
- $number_of_fields =
- compactify_table( $item_count, $number_of_fields, $formatted_lines,
- $odd_or_even );
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
+ my $lev = $levels_to_go[$ibeg];
- # How many spaces across the page will we fill?
- my $columns_per_line =
- ( int $number_of_fields / 2 ) * $pair_width +
- ( $number_of_fields % 2 ) * $max_width;
+ #-----------------------------------
+ # SECTION 1: Undo needless common CI
+ #-----------------------------------
- my $formatted_columns;
+ # We are looking at leading tokens and looking for a sequence all
+ # at the same level and all at a higher level than enclosing lines.
- if ( $number_of_fields > 1 ) {
- $formatted_columns =
- ( $pair_width * ( int( $item_count / 2 ) ) +
- ( $item_count % 2 ) * $max_width );
- }
- else {
- $formatted_columns = $max_width * $item_count;
- }
- if ( $formatted_columns < $packed_columns ) {
- $formatted_columns = $packed_columns;
- }
+ # For example, we can undo continuation indentation in sort/map/grep
+ # chains
- my $unused_columns = $formatted_columns - $packed_columns;
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
- # set some empirical parameters to help decide if we should try to
- # align; high sparsity does not look good, especially with few lines
- my $sparsity = ($unused_columns) / ($formatted_columns);
- my $max_allowed_sparsity =
- ( $item_count < 3 ) ? 0.1
- : ( $packed_lines == 1 ) ? 0.15
- : ( $packed_lines == 2 ) ? 0.4
- : 0.7;
+ # to become
- # Begin check for shortcut methods, which avoid treating a list
- # as a table for relatively small parenthesized lists. These
- # are usually easier to read if not formatted as tables.
- if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
- && $opening_environment eq 'BLOCK' # not a sub-container
- && $opening_token eq '(' # is paren list
- )
- {
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
- # Shortcut method 1: for -lp and just one comma:
- # This is a no-brainer, just break at the comma.
+ if ( $line > 0 && !$skip_SECTION_1 ) {
+
+ # if we have started a chain..
+ if ($line_1) {
+
+ # see if it continues..
+ if ( $lev == $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+
+ # chain continues...
+ # check for chain ending at end of a statement
+ if ( $line == $max_line ) {
+
+ # see of this line ends a statement
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' );
+ }
+ $line_2 = $line
+ if ($this_line_is_semicolon_terminated);
+ }
+ else {
+
+ # kill chain
+ $line_1 = undef;
+ }
+ }
+ elsif ( $lev < $lev_last ) {
+
+ # chain ends with previous line
+ $line_2 = $line - 1;
+ }
+ elsif ( $lev > $lev_last ) {
+
+ # kill chain
+ $line_1 = undef;
+ }
+
+ # undo the continuation indentation if a chain ends
+ if ( defined($line_2) && defined($line_1) ) {
+ my $continuation_line_count = $line_2 - $line_1 + 1;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
+ = (0) x ($continuation_line_count)
+ if ( $continuation_line_count >= 0 );
+ @leading_spaces_to_go[ @{$ri_first}
+ [ $line_1 .. $line_2 ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}
+ [ $line_1 .. $line_2 ] ];
+ $line_1 = undef;
+ }
+ }
+
+ # not in a chain yet..
+ else {
+
+ # look for start of a new sort/map/grep chain
+ if ( $lev > $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+ $line_1 = $line;
+ }
+ }
+ }
+ }
+
+ #-------------------------------------
+ # SECTION 2: Undo ci at cuddled blocks
+ #-------------------------------------
+
+ # Note that sub final_indentation_adjustment will be called later to
+ # actually do this, but for now we will tentatively mark cuddled
+ # lines with ci=0 so that the the -xci loop which follows will be
+ # correct at cuddles.
if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
- && !$must_break_open
+ $types_to_go[$ibeg] eq '}'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
)
{
- my $i_break = $rcomma_index->[0];
- set_forced_breakpoint($i_break);
- ${$rdo_not_break_apart} = 1;
- return;
+ my $terminal_type = $types_to_go[$iend];
+ if ( $terminal_type eq '#' && $iend > $ibeg ) {
+ $terminal_type = $types_to_go[ $iend - 1 ];
+ if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
+ $terminal_type = $types_to_go[ $iend - 2 ];
+ }
+ }
+ if ( $terminal_type eq '{' ) {
+ my $Kbeg = $K_to_go[$ibeg];
+ $ci_levels_to_go[$ibeg] = 0;
+ }
+ }
+
+ #--------------------------------------------------------
+ # SECTION 3: Undo ci set by sub extended_ci if not needed
+ #--------------------------------------------------------
+ # Undo the ci of the leading token if its controlling token
+ # went out on a previous line without ci
+ if ( $ci_levels_to_go[$ibeg] ) {
+ my $Kbeg = $K_to_go[$ibeg];
+ my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
+ if ( $seqno && $undo_extended_ci{$seqno} ) {
+
+ # but do not undo ci set by the -lp flag
+ if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
+ $ci_levels_to_go[$ibeg] = 0;
+ $leading_spaces_to_go[$ibeg] =
+ $reduced_spaces_to_go[$ibeg];
+ }
+ }
}
- # method 2 is for most small ragged lists which might look
- # best if not displayed as a table.
- if (
- ( $number_of_fields == 2 && $item_count == 3 )
- || (
- $new_identifier_count > 0 # isn't all quotes
- && $sparsity > 0.15
- ) # would be fairly spaced gaps if aligned
- )
+ # Flag any controlling opening tokens in lines without ci. This
+ # will be used later in the above if statement to undo the ci which
+ # they added. The array i_controlling_ci[$line] was prepared at
+ # the top of this routine.
+ if ( !$ci_levels_to_go[$ibeg]
+ && defined( $i_controlling_ci[$line] ) )
{
+ foreach my $i ( @{ $i_controlling_ci[$line] } ) {
+ my $seqno = $type_sequence_to_go[$i];
+ $undo_extended_ci{$seqno} = 1;
+ }
+ }
- my $break_count = set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ $lev_last = $lev;
+ }
- # NOTE: we should really use the true break count here,
- # which can be greater if there are large terms and
- # little space, but usually this will work well enough.
- unless ($must_break_open) {
+ return;
+ }
+}
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
- }
+{ ## begin closure set_logical_padding
+ my %is_math_op;
+
+ BEGIN {
+
+ my @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
+ }
+
+ sub set_logical_padding {
+
+ # Look at a batch of lines and see if extra padding can improve the
+ # alignment when there are certain leading operators. Here is an
+ # example, in which some extra space is introduced before
+ # '( $year' to make it line up with the subsequent lines:
+ #
+ # if ( ( $Year < 1601 )
+ # || ( $Year > 2899 )
+ # || ( $EndYear < 1601 )
+ # || ( $EndYear > 2899 ) )
+ # {
+ # &Error_OutOfRange;
+ # }
+ #
+ my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote )
+ = @_;
+ my $max_line = @{$ri_first} - 1;
+
+ my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
+
+ # Patch to produce padding in the first line of short code blocks.
+ # This is part of an update to fix cases b562 .. b983.
+ # This is needed to compensate for a change which was made in 'sub
+ # starting_one_line_block' to prevent blinkers. Previously, that sub
+ # would not look at the total block size and rely on sub
+ # break_long_lines to break up long blocks. Consequently, the
+ # first line of those batches would end in the opening block brace of a
+ # sort/map/grep/eval block. When this was changed to immediately check
+ # for blocks which were too long, the opening block brace would go out
+ # in a single batch, and the block contents would go out as the next
+ # batch. This caused the logic in this routine which decides if the
+ # first line should be padded to be incorrect. To fix this, we set a
+ # flag if the previous batch ended in an opening sort/map/grep/eval
+ # block brace, and use it to adjust the logic to compensate.
+
+ # For example, the following would have previously been a single batch
+ # but now is two batches. We want to pad the line starting in '$dir':
+ # my (@indices) = # batch n-1 (prev batch n)
+ # sort { # batch n-1 (prev batch n)
+ # $dir eq 'left' # batch n
+ # ? $cells[$a] <=> $cells[$b] # batch n
+ # : $cells[$b] <=> $cells[$a]; # batch n
+ # } ( 0 .. $#cells ); # batch n
+
+ my $rLL = $self->[_rLL_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ my $is_short_block;
+ if ( $K_to_go[0] > 0 ) {
+ my $Kp = $K_to_go[0] - 1;
+ if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
+ $Kp -= 1;
+ }
+ if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp -= 1;
+ if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
+ $Kp -= 1;
+ }
+ }
+ my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
+ if ($seqno) {
+ my $block_type = $rblock_type_of_seqno->{$seqno};
+ if ($block_type) {
+ $is_short_block = $is_sort_map_grep_eval{$block_type};
+ $is_short_block ||= $want_one_line_block{$block_type};
}
- return;
}
+ }
- } # end shortcut methods
+ # looking at each line of this batch..
+ foreach my $line ( 0 .. $max_line - 1 ) {
- # debug stuff
+ # see if the next line begins with a logical operator
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
+ $ibeg_next = $ri_first->[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
- FORMATTER_DEBUG_FLAG_SPARSE && do {
- print STDOUT
-"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
+
+ next unless ($has_leading_op_next);
+
+ # next line must not be at lesser depth
+ next
+ if ( $nesting_depth_to_go[$ibeg] >
+ $nesting_depth_to_go[$ibeg_next] );
+
+ # identify the token in this line to be padded on the left
+ $ipad = undef;
+
+ # handle lines at same depth...
+ if ( $nesting_depth_to_go[$ibeg] ==
+ $nesting_depth_to_go[$ibeg_next] )
+ {
+
+ # if this is not first line of the batch ...
+ if ( $line > 0 ) {
+
+ # and we have leading operator..
+ next if $has_leading_op;
+
+ # Introduce padding if..
+ # 1. the previous line is at lesser depth, or
+ # 2. the previous line ends in an assignment
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
+ # Example 1: previous line at lesser depth
+ # if ( ( $Year < 1601 ) # <- we are here but
+ # || ( $Year > 2899 ) # list has not yet
+ # || ( $EndYear < 1601 ) # collapsed vertically
+ # || ( $EndYear > 2899 ) )
+ # {
+ #
+ # Example 2: previous line ending in assignment:
+ # $leapyear =
+ # $year % 4 ? 0 # <- We are here
+ # : $year % 100 ? 1
+ # : $year % 400 ? 0
+ # : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
- };
+ # be sure levels agree (do not indent after an indented 'if')
+ next
+ if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
- #---------------------------------------------------------------
- # Compound List Rule 2:
- # If this list is too long for one line, and it is an item of a
- # larger list, then we must format it, regardless of sparsity
- # (ian.t). One reason that we have to do this is to trigger
- # Compound List Rule 1, above, which causes breaks at all commas of
- # all outer lists. In this way, the structure will be properly
- # displayed.
- #---------------------------------------------------------------
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
- # Decide if this list is too long for one line unless broken
- my $total_columns = table_columns_available($i_opening_paren);
- my $too_long = $packed_columns > $total_columns;
+ next
+ unless (
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
+ || ( $nesting_depth_to_go[$ibegm] <
+ $nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
+ );
- # For a paren list, include the length of the token just before the
- # '(' because this is likely a sub call, and we would have to
- # include the sub name on the same line as the list. This is still
- # imprecise, but not too bad. (steve.t)
- if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
+ # we will add padding before the first token
+ $ipad = $ibeg;
+ }
- $too_long = excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
+ # for first line of the batch..
+ else {
- # FIXME: For an item after a '=>', try to include the length of the
- # thing before the '=>'. This is crude and should be improved by
- # actually looking back token by token.
- if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
- my $i_opening_minus = $i_opening_paren - 4;
- if ( $i_opening_minus >= 0 ) {
- $too_long = excess_line_length( $i_opening_minus,
- $i_effective_last_comma + 1 ) > 0;
- }
- }
+ # WARNING: Never indent if first line is starting in a
+ # continued quote, which would change the quote.
+ next if $starting_in_quote;
- # Always break lists contained in '[' and '{' if too long for 1 line,
- # and always break lists which are too long and part of a more complex
- # structure.
- my $must_break_open_container = $must_break_open
- || ( $too_long
- && ( $in_hierarchical_list || $opening_token ne '(' ) );
+ # if this is text after closing '}'
+ # then look for an interior token to pad
+ if ( $types_to_go[$ibeg] eq '}' ) {
-#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
+ }
- #---------------------------------------------------------------
- # The main decision:
- # Now decide if we will align the data into aligned columns. Do not
- # attempt to align columns if this is a tiny table or it would be
- # too spaced. It seems that the more packed lines we have, the
- # sparser the list that can be allowed and still look ok.
- #---------------------------------------------------------------
+ # otherwise, we might pad if it looks really good
+ elsif ($is_short_block) {
+ $ipad = $ibeg;
+ }
+ else {
- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
- || ( $formatted_lines < 2 )
- || ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
- )
- {
+ # we might pad token $ibeg, so be sure that it
+ # is at the same depth as the next line.
+ next
+ if ( $nesting_depth_to_go[$ibeg] !=
+ $nesting_depth_to_go[$ibeg_next] );
- #---------------------------------------------------------------
- # too sparse: would look ugly if aligned in a table;
- #---------------------------------------------------------------
+ # We can pad on line 1 of a statement if at least 3
+ # lines will be aligned. Otherwise, it
+ # can look very confusing.
- # use old breakpoints if this is a 'big' list
- # FIXME: goal is to improve set_ragged_breakpoints so that
- # this is not necessary.
- if ( $packed_lines > 2 && $item_count > 10 ) {
- write_logfile_entry("List sparse: using old breakpoints\n");
- copy_old_breakpoints( $i_first_comma, $i_last_comma );
- }
+ # We have to be careful not to pad if there are too few
+ # lines. The current rule is:
+ # (1) in general we require at least 3 consecutive lines
+ # with the same leading chain operator token,
+ # (2) but an exception is that we only require two lines
+ # with leading colons if there are no more lines. For example,
+ # the first $i in the following snippet would get padding
+ # by the second rule:
+ #
+ # $i == 1 ? ( "First", "Color" )
+ # : $i == 2 ? ( "Then", "Rarity" )
+ # : ( "Then", "Name" );
- # let the continuation logic handle it if 2 lines
- else {
+ if ( $max_line > 1 ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
- my $break_count = set_ragged_breakpoints( \@i_term_comma,
- $ri_ragged_break_list );
- ++$break_count if ($use_separate_first_term);
+ # never indent line 1 of a '.' series because
+ # previous line is most likely at same level.
+ # TODO: we should also look at the leading_spaces
+ # of the last output line and skip if it is same
+ # as this line.
+ next if ( $leading_token eq '.' );
- unless ($must_break_open_container) {
- if ( $break_count <= 1 ) {
- ${$rdo_not_break_apart} = 1;
- }
- elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
- {
- ${$rdo_not_break_apart} = 1;
+ my $count = 1;
+ foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
+ my $ibeg_next_next = $ri_first->[ $line + $l ];
+ if ( $tokens_to_go[$ibeg_next_next] ne
+ $leading_token )
+ {
+ $tokens_differ = 1;
+ last;
+ }
+ $count++;
+ }
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
+ $ipad = $ibeg;
+ }
+ else {
+ next;
+ }
}
}
}
- return;
- }
- #---------------------------------------------------------------
- # go ahead and format as a table
- #---------------------------------------------------------------
- write_logfile_entry(
- "List: auto formatting with $number_of_fields fields/row\n");
+ # find interior token to pad if necessary
+ if ( !defined($ipad) ) {
- my $j_first_break =
- $use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
+ for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
- for (
- my $j = $j_first_break ;
- $j < $comma_count ;
- $j += $number_of_fields
- )
- {
- my $i = $rcomma_index->[$j];
- set_forced_breakpoint($i);
- }
- return;
- }
-}
+ # find any unclosed container
+ next
+ unless ( $type_sequence_to_go[$i]
+ && $mate_index_to_go[$i] > $iend );
-sub study_list_complexity {
+ # find next nonblank token to pad
+ $ipad = $inext_to_go[$i];
+ last if ( $ipad > $iend );
+ }
+ last unless $ipad;
+ }
- # Look for complex tables which should be formatted with one term per line.
- # Returns the following:
- #
- # \@i_ragged_break_list = list of good breakpoints to avoid lines
- # which are hard to read
- # $number_of_fields_best = suggested number of fields based on
- # complexity; = 0 if any number may be used.
- #
- my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
- my $item_count = @{$ri_term_begin};
- my $complex_item_count = 0;
- my $number_of_fields_best = $rOpts_maximum_fields_per_table;
- my $i_max = @{$ritem_lengths} - 1;
- ##my @item_complexity;
+ # We cannot pad the first leading token of a file because
+ # it could cause a bug in which the starting indentation
+ # level is guessed incorrectly each time the code is run
+ # though perltidy, thus causing the code to march off to
+ # the right. For example, the following snippet would have
+ # this problem:
- my $i_last_last_break = -3;
- my $i_last_break = -2;
- my @i_ragged_break_list;
+## ov_method mycan( $package, '(""' ), $package
+## or ov_method mycan( $package, '(0+' ), $package
+## or ov_method mycan( $package, '(bool' ), $package
+## or ov_method mycan( $package, '(nomethod' ), $package;
- my $definitely_complex = 30;
- my $definitely_simple = 12;
- my $quote_count = 0;
+ # If this snippet is within a block this won't happen
+ # unless the user just processes the snippet alone within
+ # an editor. In that case either the user will see and
+ # fix the problem or it will be corrected next time the
+ # entire file is processed with perltidy.
+ next if ( $ipad == 0 && $peak_batch_size <= 1 );
- for my $i ( 0 .. $i_max ) {
- my $ib = $ri_term_begin->[$i];
- my $ie = $ri_term_end->[$i];
+## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
+## IT DID MORE HARM THAN GOOD
+## ceil(
+## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
+## / $upem
+## ),
+##? # do not put leading padding for just 2 lines of math
+##? if ( $ipad == $ibeg
+##? && $line > 0
+##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
+##? && $is_math_op{$type_next}
+##? && $line + 2 <= $max_line )
+##? {
+##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
+##? my $type_next_next = $types_to_go[$ibeg_next_next];
+##? next if !$is_math_op{$type_next_next};
+##? }
- # define complexity: start with the actual term length
- my $weighted_length = ( $ritem_lengths->[$i] - 2 );
+ # next line must not be at greater depth
+ my $iend_next = $ri_last->[ $line + 1 ];
+ next
+ if ( $nesting_depth_to_go[ $iend_next + 1 ] >
+ $nesting_depth_to_go[$ipad] );
+
+ # lines must be somewhat similar to be padded..
+ my $inext_next = $inext_to_go[$ibeg_next];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
- ##TBD: join types here and check for variations
- ##my $str=join "", @tokens_to_go[$ib..$ie];
+ # see if there are multiple continuation lines
+ my $logical_continuation_lines = 1;
+ if ( $line + 2 <= $max_line ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
+ && $nesting_depth_to_go[$ibeg_next] eq
+ $nesting_depth_to_go[$ibeg_next_next] )
+ {
+ $logical_continuation_lines++;
+ }
+ }
- my $is_quote = 0;
- if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
- $is_quote = 1;
- $quote_count++;
- }
- elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
- $quote_count++;
- }
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
- if ( $ib eq $ie ) {
- if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- else {
- }
- }
- else {
- if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
- $complex_item_count++;
- $weighted_length *= 2;
- }
- if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
- $weighted_length += 4;
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
}
- }
-
- # add weight for extra tokens.
- $weighted_length += 2 * ( $ie - $ib );
+ if (
-## my $BUB = join '', @tokens_to_go[$ib..$ie];
-## print "# COMPLEXITY:$weighted_length $BUB\n";
+ # either we have multiple continuation lines to follow
+ # and we are not padding the first token
+ (
+ $logical_continuation_lines > 1
+ && ( $ipad > 0 || $is_short_block )
+ )
-##push @item_complexity, $weighted_length;
+ # or..
+ || (
- # now mark a ragged break after this item it if it is 'long and
- # complex':
- if ( $weighted_length >= $definitely_complex ) {
+ # types must match
+ $types_match
- # if we broke after the previous term
- # then break before it too
- if ( $i_last_break == $i - 1
- && $i > 1
- && $i_last_last_break != $i - 2 )
+ # and keywords must match if keyword
+ && !(
+ $type eq 'k'
+ && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
+ )
+ )
+ )
{
- ## FIXME: don't strand a small term
- pop @i_ragged_break_list;
- push @i_ragged_break_list, $i - 2;
- push @i_ragged_break_list, $i - 1;
- }
-
- push @i_ragged_break_list, $i;
- $i_last_last_break = $i_last_break;
- $i_last_break = $i;
- }
+ #----------------------begin special checks--------------
+ #
+ # SPECIAL CHECK 1:
+ # A check is needed before we can make the pad.
+ # If we are in a list with some long items, we want each
+ # item to stand out. So in the following example, the
+ # first line beginning with '$casefold->' would look good
+ # padded to align with the next line, but then it
+ # would be indented more than the last line, so we
+ # won't do it.
+ #
+ # ok(
+ # $casefold->{code} eq '0041'
+ # && $casefold->{status} eq 'C'
+ # && $casefold->{mapping} eq '0061',
+ # 'casefold 0x41'
+ # );
+ #
+ # Note:
+ # It would be faster, and almost as good, to use a comma
+ # count, and not pad if comma_count > 1 and the previous
+ # line did not end with a comma.
+ #
+ my $ok_to_pad = 1;
- # don't break before a small last term -- it will
- # not look good on a line by itself.
- elsif ($i == $i_max
- && $i_last_break == $i - 1
- && $weighted_length <= $definitely_simple )
- {
- pop @i_ragged_break_list;
- }
- }
+ my $ibg = $ri_first->[ $line + 1 ];
+ my $depth = $nesting_depth_to_go[ $ibg + 1 ];
- my $identifier_count = $i_max + 1 - $quote_count;
+ # just use simplified formula for leading spaces to avoid
+ # needless sub calls
+ my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
- # Need more tuning here..
- if ( $max_width > 12
- && $complex_item_count > $item_count / 2
- && $number_of_fields_best != 2 )
- {
- $number_of_fields_best = 1;
- }
+ # look at each line beyond the next ..
+ my $l = $line + 1;
+ foreach my $ltest ( $line + 2 .. $max_line ) {
+ $l = $ltest;
+ my $ibg = $ri_first->[$l];
- return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
-}
+ # quit looking at the end of this container
+ last
+ if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
+ || ( $nesting_depth_to_go[$ibg] < $depth );
-sub get_maximum_fields_wanted {
+ # cannot do the pad if a later line would be
+ # outdented more
+ if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
+ $ok_to_pad = 0;
+ last;
+ }
+ }
- # Not all tables look good with more than one field of items.
- # This routine looks at a table and decides if it should be
- # formatted with just one field or not.
- # This coding is still under development.
- my ($ritem_lengths) = @_;
+ # don't pad if we end in a broken list
+ if ( $l == $max_line ) {
+ my $i2 = $ri_last->[$l];
+ if ( $types_to_go[$i2] eq '#' ) {
+ my $i1 = $ri_first->[$l];
+ next if terminal_type_i( $i1, $i2 ) eq ',';
+ }
+ }
- my $number_of_fields_best = 0;
+ # SPECIAL CHECK 2:
+ # a minus may introduce a quoted variable, and we will
+ # add the pad only if this line begins with a bare word,
+ # such as for the word 'Button' here:
+ # [
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ #
+ # On the other hand, if 'Button' is quoted, it looks best
+ # not to pad:
+ # [
+ # 'Button' => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ if ( $types_to_go[$ibeg_next] eq 'm' ) {
+ $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
+ }
- # For just a few items, we tentatively assume just 1 field.
- my $item_count = @{$ritem_lengths};
- if ( $item_count <= 5 ) {
- $number_of_fields_best = 1;
- }
+ next unless $ok_to_pad;
- # For larger tables, look at it both ways and see what looks best
- else {
+ #----------------------end special check---------------
- my $is_odd = 1;
- my @max_length = ( 0, 0 );
- my @last_length_2 = ( undef, undef );
- my @first_length_2 = ( undef, undef );
- my $last_length = undef;
- my $total_variation_1 = 0;
- my $total_variation_2 = 0;
- my @total_variation_2 = ( 0, 0 );
+ my $length_1 = total_line_length( $ibeg, $ipad - 1 );
+ my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
+ $pad_spaces = $length_2 - $length_1;
- foreach my $j ( 0 .. $item_count - 1 ) {
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
- $is_odd = 1 - $is_odd;
- my $length = $ritem_lengths->[$j];
- if ( $length > $max_length[$is_odd] ) {
- $max_length[$is_odd] = $length;
- }
+ # make sure this won't change if -lp is used
+ my $indentation_1 = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation_1)
+ && $indentation_1->get_recoverable_spaces() == 0 )
+ {
+ my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
+ if ( ref($indentation_2)
+ && $indentation_2->get_recoverable_spaces() != 0 )
+ {
+ $pad_spaces = 0;
+ }
+ }
- if ( defined($last_length) ) {
- my $dl = abs( $length - $last_length );
- $total_variation_1 += $dl;
- }
- $last_length = $length;
+ # we might be able to handle a pad of -1 by removing a blank
+ # token
+ if ( $pad_spaces < 0 ) {
- my $ll = $last_length_2[$is_odd];
- if ( defined($ll) ) {
- my $dl = abs( $length - $ll );
- $total_variation_2[$is_odd] += $dl;
- }
- else {
- $first_length_2[$is_odd] = $length;
- }
- $last_length_2[$is_odd] = $length;
- }
- $total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
+ # Deactivated for -kpit due to conflict. This block deletes
+ # a space in an attempt to improve alignment in some cases,
+ # but it may conflict with user spacing requests. For now
+ # it is just deactivated if the -kpit option is used.
+ if ( $pad_spaces == -1 ) {
+ if ( $ipad > $ibeg
+ && $types_to_go[ $ipad - 1 ] eq 'b'
+ && !%keyword_paren_inner_tightness )
+ {
+ $self->pad_token( $ipad - 1, $pad_spaces );
+ }
+ }
+ $pad_spaces = 0;
+ }
- my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
- unless ( $total_variation_2 < $factor * $total_variation_1 ) {
- $number_of_fields_best = 1;
+ # now apply any padding for alignment
+ if ( $ipad >= 0 && $pad_spaces ) {
+
+ my $length_t = total_line_length( $ibeg, $iend );
+ if ( $pad_spaces + $length_t <=
+ $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
+ {
+ $self->pad_token( $ipad, $pad_spaces );
+ }
+ }
+ }
}
+ continue {
+ $iendm = $iend;
+ $ibegm = $ibeg;
+ $has_leading_op = $has_leading_op_next;
+ } ## end of loop over lines
+ return;
}
- return ($number_of_fields_best);
-}
+} ## end closure set_logical_padding
-sub table_columns_available {
- my $i_first_comma = shift;
- my $columns =
- maximum_line_length($i_first_comma) -
- leading_spaces_to_go($i_first_comma);
+sub pad_token {
- # Patch: the vertical formatter does not line up lines whose lengths
- # exactly equal the available line length because of allowances
- # that must be made for side comments. Therefore, the number of
- # available columns is reduced by 1 character.
- $columns -= 1;
- return $columns;
-}
+ # insert $pad_spaces before token number $ipad
+ my ( $self, $ipad, $pad_spaces ) = @_;
+ my $rLL = $self->[_rLL_];
+ my $KK = $K_to_go[$ipad];
+ my $tok = $rLL->[$KK]->[_TOKEN_];
+ my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
-sub maximum_number_of_fields {
+ if ( $pad_spaces > 0 ) {
+ $tok = ' ' x $pad_spaces . $tok;
+ $tok_len += $pad_spaces;
+ }
+ elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
+ $tok = "";
+ $tok_len = 0;
+ }
+ else {
- # how many fields will fit in the available space?
- my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
- my $max_pairs = int( $columns / $pair_width );
- my $number_of_fields = $max_pairs * 2;
- if ( $odd_or_even == 1
- && $max_pairs * $pair_width + $max_width <= $columns )
- {
- $number_of_fields++;
+ # shouldn't happen
+ return;
}
- return $number_of_fields;
-}
-sub compactify_table {
+ $tok = $rLL->[$KK]->[_TOKEN_] = $tok;
+ $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
- # given a table with a certain number of fields and a certain number
- # of lines, see if reducing the number of fields will make it look
- # better.
- my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
- if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
- my $min_fields;
+ $token_lengths_to_go[$ipad] += $pad_spaces;
+ $tokens_to_go[$ipad] = $tok;
- for (
- $min_fields = $number_of_fields ;
- $min_fields >= $odd_or_even
- && $min_fields * $formatted_lines >= $item_count ;
- $min_fields -= $odd_or_even
- )
- {
- $number_of_fields = $min_fields;
- }
+ foreach my $i ( $ipad .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
}
- return $number_of_fields;
+ return;
}
-sub set_ragged_breakpoints {
+{ ## begin closure make_alignment_patterns
- # Set breakpoints in a list that cannot be formatted nicely as a
- # table.
- my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
+ my %keyword_map;
+ my %operator_map;
+ my %is_w_n_C;
+ my %is_my_local_our;
+ my %is_kwU;
+ my %is_use_like;
+ my %is_binary_type;
+ my %is_binary_keyword;
+ my %name_map;
- my $break_count = 0;
- foreach ( @{$ri_ragged_break_list} ) {
- my $j = $ri_term_comma->[$_];
- if ($j) {
- set_forced_breakpoint($j);
- $break_count++;
- }
- }
- return $break_count;
-}
+ BEGIN {
-sub copy_old_breakpoints {
- my ( $i_first_comma, $i_last_comma ) = @_;
- for my $i ( $i_first_comma .. $i_last_comma ) {
- if ( $old_breakpoint_to_go[$i] ) {
- set_forced_breakpoint($i);
- }
- }
- return;
-}
+ # Note: %block_type_map is now global to enable the -gal=s option
-sub set_nobreaks {
- my ( $i, $j ) = @_;
- if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
- FORMATTER_DEBUG_FLAG_NOBREAK && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
- };
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
- @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
- }
+ # map certain operators to the same class for pattern matching
+ %operator_map = (
+ '!~' => '=~',
+ '+=' => '+=',
+ '-=' => '+=',
+ '*=' => '+=',
+ '/=' => '+=',
+ );
- # shouldn't happen; non-critical error
- else {
- FORMATTER_DEBUG_FLAG_NOBREAK && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
- "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
- };
- }
- return;
-}
+ %is_w_n_C = (
+ 'w' => 1,
+ 'n' => 1,
+ 'C' => 1,
+ );
-sub set_fake_breakpoint {
+ # leading keywords which to skip for efficiency when making parenless
+ # container names
+ my @q = qw( my local our return );
+ @{is_my_local_our}{@q} = (1) x scalar(@q);
- # Just bump up the breakpoint count as a signal that there are breaks.
- # This is useful if we have breaks but may want to postpone deciding where
- # to make them.
- $forced_breakpoint_count++;
- return;
-}
+ # leading keywords where we should just join one token to form
+ # parenless name
+ @q = qw( use );
+ @{is_use_like}{@q} = (1) x scalar(@q);
-sub set_forced_breakpoint {
- my $i = shift;
+ # leading token types which may be used to make a container name
+ @q = qw( k w U );
+ @{is_kwU}{@q} = (1) x scalar(@q);
- return unless defined $i && $i >= 0;
+ # token types which prevent using leading word as a container name
+ @q = qw(
+ x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /=
+ &= // >> ~. &. |. ^.
+ **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
+ );
+ push @q, ',';
+ @{is_binary_type}{@q} = (1) x scalar(@q);
+
+ # token keywords which prevent using leading word as a container name
+ @_ = qw(and or err eq ne cmp);
+ @is_binary_keyword{@_} = (1) x scalar(@_);
+
+ # Some common function calls whose args can be aligned. These do not
+ # give good alignments if the lengths differ significantly.
+ %name_map = (
+ 'unlike' => 'like',
+ 'isnt' => 'is',
+ ##'is_deeply' => 'is', # poor; names lengths too different
+ );
- # no breaks between welded tokens
- return if ( weld_len_right_to_go($i) );
+ }
- # when called with certain tokens, use bond strengths to decide
- # if we break before or after it
- my $token = $tokens_to_go[$i];
+ sub make_alignment_patterns {
- if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
- if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
- }
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create four arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The four arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ # @field_lengths - the display width of each field
+
+ my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count,
+ $ralignment_hash )
+ = @_;
+
+ # The var $ralignment_hash contains all of the alignments for this
+ # line. It is not yet used but is available for future coding in case
+ # there is a need to do a preliminary scan of the alignment tokens.
+ if (DEVEL_MODE) {
+ my $new_count = 0;
+ if ( defined($ralignment_hash) ) {
+ $new_count = keys %{$ralignment_hash};
+ }
+ my $old_count = $alignment_count;
+ $old_count = 0 unless ($old_count);
+ if ( $new_count != $old_count ) {
+ my $K = $K_to_go[$ibeg];
+ my $rLL = $self->[_rLL_];
+ my $lnl = $rLL->[$K]->[_LINE_INDEX_];
+ Fault(
+"alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n"
+ );
+ }
+ }
- # breaks are forced before 'if' and 'unless'
- elsif ( $is_if_unless{$token} ) { $i-- }
+ # -------------------------------------
+ # Shortcut for lines without alignments
+ # -------------------------------------
+ if ( !$alignment_count ) {
+ my $rtokens = [];
+ my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] -
+ $summed_lengths_to_go[$ibeg] ];
+ my $rpatterns;
+ my $rfields;
+ if ( $ibeg == $iend ) {
+ $rfields = [ $tokens_to_go[$ibeg] ];
+ $rpatterns = [ $types_to_go[$ibeg] ];
+ }
+ else {
+ $rfields = [ join( '', @tokens_to_go[ $ibeg .. $iend ] ) ];
+ $rpatterns = [ join( '', @types_to_go[ $ibeg .. $iend ] ) ];
+ }
+ return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
+ }
+
+ my $i_start = $ibeg;
+ my $depth = 0;
+ my %container_name = ( 0 => "" );
+
+ my @tokens = ();
+ my @fields = ();
+ my @patterns = ();
+ my @field_lengths = ();
+
+ #-------------------------------------------------------------
+ # Make a container name for any uncontained commas, issue c089
+ #-------------------------------------------------------------
+ # This is a generalization of the fix for rt136416 which was a
+ # specialized patch just for 'use Module' statements.
+ # We restrict this to semicolon-terminated statements; that way
+ # we know that the top level commas are not in a list container.
+ if ( $ibeg == 0 && $iend == $max_index_to_go ) {
+ my $iterm = $max_index_to_go;
+ if ( $types_to_go[$iterm] eq '#' ) {
+ $iterm = $iprev_to_go[$iterm];
+ }
+
+ # Alignment lines ending like '=> sub {'; fixes issue c093
+ my $term_type_ok = $types_to_go[$iterm] eq ';';
+ $term_type_ok ||=
+ $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
+
+ if ( $iterm > $ibeg
+ && $term_type_ok
+ && !$is_my_local_our{ $tokens_to_go[$ibeg] }
+ && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
+ {
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
+ # Make a container name by combining all leading barewords,
+ # keywords and functions.
+ my $name = "";
+ my $count = 0;
+ my $count_max;
+ my $iname_end;
+ my $ilast_blank;
+ for ( $ibeg .. $iterm ) {
+ my $type = $types_to_go[$_];
+
+ if ( $type eq 'b' ) {
+ $ilast_blank = $_;
+ next;
+ }
- FORMATTER_DEBUG_FLAG_FORCE && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
- };
+ my $token = $tokens_to_go[$_];
+
+ # Give up if we find an opening paren, binary operator or
+ # comma within or after the proposed container name.
+ if ( $token eq '('
+ || $is_binary_type{$type}
+ || $type eq 'k' && $is_binary_keyword{$token} )
+ {
+ $name = "";
+ last;
+ }
- if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
- $forced_breakpoint_to_go[$i_nonblank] = 1;
+ # The container name is only built of certain types:
+ last if ( !$is_kwU{$type} );
- if ( $i_nonblank > $index_max_forced_break ) {
- $index_max_forced_break = $i_nonblank;
- }
- $forced_breakpoint_count++;
- $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
- $i_nonblank;
+ # Normally it is made of one word, but two words for 'use'
+ if ( $count == 0 ) {
+ if ( $type eq 'k'
+ && $is_use_like{ $tokens_to_go[$_] } )
+ {
+ $count_max = 2;
+ }
+ else {
+ $count_max = 1;
+ }
+ }
+ elsif ( defined($count_max) && $count >= $count_max ) {
+ last;
+ }
+
+ if ( defined( $name_map{$token} ) ) {
+ $token = $name_map{$token};
+ }
- # if we break at an opening container..break at the closing
- if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
- set_closing_breakpoint($i_nonblank);
+ $name .= ' ' . $token;
+ $iname_end = $_;
+ $count++;
+ }
+
+ # Require a space after the container name token(s)
+ if ( $name
+ && defined($ilast_blank)
+ && $ilast_blank > $iname_end )
+ {
+ $name = substr( $name, 1 );
+ $container_name{'0'} = $name;
+ }
}
}
- }
- return;
-}
-sub clear_breakpoint_undo_stack {
- $forced_breakpoint_undo_count = 0;
- return;
-}
+ # --------------------
+ # Loop over all tokens
+ # --------------------
+ my $j = 0; # field index
-sub undo_forced_breakpoint_stack {
+ $patterns[0] = "";
+ my %token_count;
+ for my $i ( $ibeg .. $iend ) {
- my $i_start = shift;
- if ( $i_start < 0 ) {
- $i_start = 0;
- my ( $a, $b, $c ) = caller();
- warning(
-"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
- );
- }
+ # Keep track of containers balanced on this line only.
+ # These are used below to prevent unwanted cross-line alignments.
+ # Unbalanced containers already avoid aligning across
+ # container boundaries.
- while ( $forced_breakpoint_undo_count > $i_start ) {
- my $i =
- $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
- if ( $i >= 0 && $i <= $max_index_to_go ) {
- $forced_breakpoint_to_go[$i] = 0;
- $forced_breakpoint_count--;
+ my $type = $types_to_go[$i];
+ my $token = $tokens_to_go[$i];
+ my $depth_last = $depth;
+ if ( $type_sequence_to_go[$i] ) {
+ if ( $is_opening_token{$token} ) {
+
+ # if container is balanced on this line...
+ my $i_mate = $mate_index_to_go[$i];
+ if ( $i_mate > $i && $i_mate <= $iend ) {
+ $depth++;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
+
+ # Containers beginning with { and [ are given those names
+ # for uniqueness. That way commas in different containers
+ # will not match. Here is an example of what this prevents:
+ # a => [ 1, 2, 3 ],
+ # b => { b1 => 4, b2 => 5 },
+ # Here is another example of what we avoid by labeling the
+ # commas properly:
+
+ # is_d( [ $a, $a ], [ $b, $c ] );
+ # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
+ # is_d( [ \$a, \$a ], [ \$b, \$c ] );
+
+ my $name = $token;
+ if ( $token eq '(' ) {
+ $name = $self->make_paren_name($i);
+ }
- FORMATTER_DEBUG_FLAG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
- };
- }
+ # name cannot be '.', so change to something else if so
+ if ( $name eq '.' ) { $name = 'dot' }
+
+ $container_name{$depth} = "+" . $name;
+
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment since
+ # it usually looks bad to align commas within containers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas
+ # if opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we append
+ # the length of the line from the previous matching
+ # token, or beginning of line, to the function name.
+ # This will allow the vertical aligner to reject
+ # undesirable matches.
+
+ # if we are not aligning on this paren...
+ if ( !$ralignment_type_to_go->[$i] ) {
+
+ # Sum length from previous alignment
+ my $len = token_sequence_length( $i_start, $i - 1 );
+
+ # Minor patch: do not include the length of any '!'.
+ # Otherwise, commas in the following line will not
+ # match
+ # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
+ # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
+ if ( grep { $_ eq '!' }
+ @types_to_go[ $i_start .. $i - 1 ] )
+ {
+ $len -= 1;
+ }
- # shouldn't happen, but not a critical error
- else {
- FORMATTER_DEBUG_FLAG_UNDOBP && do {
- my ( $a, $b, $c ) = caller();
- print STDOUT
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
- };
- }
- }
- return;
-}
+ if ( $i_start == $ibeg ) {
+
+ # For first token, use distance from start of
+ # line but subtract off the indentation due to
+ # level. Otherwise, results could vary with
+ # indentation.
+ $len +=
+ leading_spaces_to_go($ibeg) -
+ $levels_to_go[$i_start] *
+ $rOpts_indent_columns;
+ if ( $len < 0 ) { $len = 0 }
+ }
+
+ # tack this length onto the container name to try
+ # to make a unique token name
+ $container_name{$depth} .= "-" . $len;
+ } ## end if ( !$ralignment_type_to_go...)
+ } ## end if ( $i_mate > $i && $i_mate...)
+ } ## end if ( $is_opening_token...)
+
+ elsif ( $is_closing_type{$token} ) {
+ $depth-- if $depth > 0;
+ }
+ } ## end if ( $type_sequence_to_go...)
+
+ # if we find a new synchronization token, we are done with
+ # a field
+ if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
+
+ my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
+
+ # map similar items
+ my $tok_map = $operator_map{$tok};
+ $tok = $tok_map if ($tok_map);
+
+ # make separators in different nesting depths unique
+ # by appending the nesting depth digit.
+ if ( $raw_tok ne '#' ) {
+ $tok .= "$nesting_depth_to_go[$i]";
+ }
+
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
+
+ # If we are at an opening token which increased depth, we have
+ # to use the name from the previous depth.
+ my $depth_p =
+ ( $depth_last < $depth ? $depth_last : $depth );
+ if ( $container_name{$depth_p} ) {
+ $tok .= $container_name{$depth_p};
+ }
+ }
+
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ if ( $ci_levels_to_go[$ibeg]
+ && $container_name{$depth} =~ /^\+(if|unless)/ )
+ {
+ $tok .= $container_name{$depth};
+ }
+ }
-sub sync_token_K {
- my ( $self, $i ) = @_;
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
- # Keep tokens in the rLL array in sync with the _to_go array
- my $rLL = $self->{rLL};
- my $K = $K_to_go[$i];
- if ( defined($K) ) {
- $rLL->[$K]->[_TOKEN_] = $tokens_to_go[$i];
- }
- else {
- # shouldn't happen
- }
- return;
-}
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
-{ # begin recombine_breakpoints
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
- my %is_amp_amp;
- my %is_ternary;
- my %is_math_op;
- my %is_plus_minus;
- my %is_mult_div;
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
- BEGIN {
+ $tok .= $block_type;
+ }
- my @q;
- @q = qw( && || );
- @is_amp_amp{@q} = (1) x scalar(@q);
+ # Mark multiple copies of certain tokens with the copy number
+ # This will allow the aligner to decide if they are matched.
+ # For now, only do this for equals. For example, the two
+ # equals on the next line will be labeled '=0' and '=0.2'.
+ # Later, the '=0.2' will be ignored in alignment because it
+ # has no match.
- @q = qw( ? : );
- @is_ternary{@q} = (1) x scalar(@q);
+ # $| = $debug = 1 if $opt_d;
+ # $full_index = 1 if $opt_i;
- @q = qw( + - * / );
- @is_math_op{@q} = (1) x scalar(@q);
+ if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
+ $token_count{$tok}++;
+ if ( $token_count{$tok} > 1 ) {
+ $tok .= '.' . $token_count{$tok};
+ }
+ }
- @q = qw( + - );
- @is_plus_minus{@q} = (1) x scalar(@q);
+ # concatenate the text of the consecutive tokens to form
+ # the field
+ push( @fields,
+ join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
- @q = qw( * / );
- @is_mult_div{@q} = (1) x scalar(@q);
- }
+ push @field_lengths,
+ $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
- sub DUMP_BREAKPOINTS {
+ # store the alignment token for this field
+ push( @tokens, $tok );
- # Debug routine to dump current breakpoints...not normally called
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $ri_beg, $ri_end, $msg ) = @_;
- print STDERR "----Dumping breakpoints from: $msg----\n";
- for my $n ( 0 .. @{$ri_end} - 1 ) {
- my $ibeg = $ri_beg->[$n];
- my $iend = $ri_end->[$n];
- my $text = "";
- foreach my $i ( $ibeg .. $iend ) {
- $text .= $tokens_to_go[$i];
- }
- print STDERR "$n ($ibeg:$iend) $text\n";
- }
- print STDERR "----\n";
- return;
- }
+ # get ready for the next batch
+ $i_start = $i;
+ $j++;
+ $patterns[$j] = "";
+ } ## end if ( new synchronization token
- sub delete_one_line_semicolons {
+ # continue accumulating tokens
- my ( $self, $ri_beg, $ri_end ) = @_;
- my $rLL = $self->{rLL};
- my $K_opening_container = $self->{K_opening_container};
+ # for keywords we have to use the actual text
+ if ( $type eq 'k' ) {
- # Walk down the lines of this batch and delete any semicolons
- # terminating one-line blocks;
- my $nmax = @{$ri_end} - 1;
+ my $tok_fix = $tokens_to_go[$i];
- foreach my $n ( 0 .. $nmax ) {
- my $i_beg = $ri_beg->[$n];
- my $i_e = $ri_end->[$n];
- my $K_beg = $K_to_go[$i_beg];
- my $K_e = $K_to_go[$i_e];
- my $K_end = $K_e;
- my $type_end = $rLL->[$K_end]->[_TYPE_];
- if ( $type_end eq '#' ) {
- $K_end = $self->K_previous_nonblank($K_end);
- if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok_fix = $keyword_map{$tok_fix}
+ if ( defined( $keyword_map{$tok_fix} ) );
+ $patterns[$j] .= $tok_fix;
}
- # we are looking for a line ending in closing brace
- next
- unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
-
- # ...and preceded by a semicolon on the same line
- my $K_semicolon = $self->K_previous_nonblank($K_end);
- my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
- next if ( $i_semicolon <= $i_beg );
- next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
-
- # safety check - shouldn't happen
- if ( $types_to_go[$i_semicolon] ne ';' ) {
- Fault("unexpected type looking for semicolon, ignoring");
- next;
+ elsif ( $type eq 'b' ) {
+ $patterns[$j] .= $type;
}
- # ... with the corresponding opening brace on the same line
- my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
- my $K_opening = $K_opening_container->{$type_sequence};
- my $i_opening = $i_beg + ( $K_opening - $K_beg );
- next if ( $i_opening < $i_beg );
+ # Mark most things before arrows as a quote to
+ # get them to line up. Testfile: mixed.pl.
- # ... and only one semicolon between these braces
- my $semicolon_count = 0;
- foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
- if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
- $semicolon_count++;
- last;
- }
- }
- next if ($semicolon_count);
+ # handle $type =~ /^[wnC]$/
+ elsif ( $is_w_n_C{$type} ) {
- # ...ok, then make the semicolon invisible
- $tokens_to_go[$i_semicolon] = "";
- }
- return;
- }
+ my $type_fix = $type;
- sub unmask_phantom_semicolons {
+ if ( $i < $iend - 1 ) {
+ my $next_type = $types_to_go[ $i + 1 ];
+ my $i_next_nonblank =
+ ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
- my ( $self, $ri_beg, $ri_end ) = @_;
+ if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
+ $type_fix = 'Q';
- # Walk down the lines of this batch and unmask any invisible line-ending
- # semicolons. They were placed by sub respace_tokens but we only now
- # know if we actually need them.
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
+ }
+ }
- my $nmax = @{$ri_end} - 1;
- foreach my $n ( 0 .. $nmax ) {
+ # Convert a bareword within braces into a quote for
+ # matching. This will allow alignment of expressions like
+ # this:
+ # local ( $SIG{'INT'} ) = IGNORE;
+ # local ( $SIG{ALRM} ) = 'POSTMAN';
+ if ( $type eq 'w'
+ && $i > $ibeg
+ && $i < $iend
+ && $types_to_go[ $i - 1 ] eq 'L'
+ && $types_to_go[ $i + 1 ] eq 'R' )
+ {
+ $type_fix = 'Q';
+ }
+
+ # patch to make numbers and quotes align
+ if ( $type eq 'n' ) { $type_fix = 'Q' }
- my $i = $ri_end->[$n];
- if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
+ $patterns[$j] .= $type_fix;
+ } ## end elsif ( $is_w_n_C{$type} )
- $tokens_to_go[$i] = $want_left_space{';'} == WS_NO ? ';' : ' ;';
- $self->sync_token_K($i);
+ # ignore any ! in patterns
+ elsif ( $type eq '!' ) { }
- my $line_number = 1 + $self->get_old_line_index( $K_to_go[$i] );
- note_added_semicolon($line_number);
+ # everything else
+ else {
+ $patterns[$j] .= $type;
}
- }
- return;
- }
-
- sub recombine_breakpoints {
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Sometimes small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine.
- #
- # We are given indexes to the current lines:
- # $ri_beg = ref to array of BEGinning indexes of each line
- # $ri_end = ref to array of ENDing indexes of each line
- my ( $ri_beg, $ri_end ) = @_;
+ # remove any zero-level name at first fat comma
+ if ( $depth == 0 && $type eq '=>' ) {
+ $container_name{$depth} = "";
+ }
+ } ## end for my $i ( $ibeg .. $iend)
- # Make a list of all good joining tokens between the lines
- # n-1 and n.
- my @joint;
- my $nmax = @{$ri_end} - 1;
- for my $n ( 1 .. $nmax ) {
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
+ # done with this line .. join text of tokens to make the last field
+ push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ push @field_lengths,
+ $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
- my ( $itok, $itokp, $itokm );
+ return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
+ } ## end sub make_alignment_patterns
- foreach my $itest ( $iend_1, $ibeg_2 ) {
- my $type = $types_to_go[$itest];
- if ( $is_math_op{$type}
- || $is_amp_amp{$type}
- || $is_assignment{$type}
- || $type eq ':' )
- {
- $itok = $itest;
- }
- }
- $joint[$n] = [$itok];
- }
+} ## end closure make_alignment_patterns
- my $more_to_do = 1;
+sub make_paren_name {
+ my ( $self, $i ) = @_;
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @{$ri_end};
- my $reverse = 0;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $nmax = @{$ri_end} - 1;
+ # The token at index $i is a '('.
+ # Create an alignment name for it to avoid incorrect alignments.
- # Safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
-
- # Shouldn't happen because splice below decreases nmax on each
- # pass.
- Fault("Program bug-infinite loop in recombine breakpoints\n");
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $skip_Section_3;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
-
- # loop over all remaining lines in this batch
- for my $iter ( 1 .. $nmax ) {
-
- # alternating sweep direction gives symmetric results
- # for recombining lines which exceed the line length
- # such as eval {{{{.... }}}}
- my $n;
- if ($reverse) { $n = 1 + $nmax - $iter; }
- else { $n = $iter }
-
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # -----line $n-1--- | -----line $n-----
- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
- # ^
- # |
- # We want to decide if we should remove the line break
- # between the tokens at $iend_1 and $ibeg_2
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- #
- # beginning and ending tokens of the lines we are working on
- my $ibeg_1 = $ri_beg->[ $n - 1 ];
- my $iend_1 = $ri_end->[ $n - 1 ];
- my $iend_2 = $ri_end->[$n];
- my $ibeg_2 = $ri_beg->[$n];
- my $ibeg_nmax = $ri_beg->[$nmax];
-
- # combined line cannot be too long
- my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
- next if ( $excess > 0 );
-
- my $type_iend_1 = $types_to_go[$iend_1];
- my $type_iend_2 = $types_to_go[$iend_2];
- my $type_ibeg_1 = $types_to_go[$ibeg_1];
- my $type_ibeg_2 = $types_to_go[$ibeg_2];
-
- # terminal token of line 2 if any side comment is ignored:
- my $iend_2t = $iend_2;
- my $type_iend_2t = $type_iend_2;
-
- # some beginning indexes of other lines, which may not exist
- my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
- my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
- my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
-
- my $bs_tweak = 0;
-
- #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
- # $nesting_depth_to_go[$ibeg_1] );
-
- FORMATTER_DEBUG_FLAG_RECOMBINE && do {
- print STDERR
-"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
- };
+ # Start with the name of the previous nonblank token...
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
+ # Prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
+ }
- # a terminal '{' should stay where it is
- # unless preceded by a fat comma
- next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' );
+ # Finally, remove any leading arrows
+ if ( substr( $name, 0, 2 ) eq '->' ) {
+ $name = substr( $name, 2 );
+ }
+ return $name;
+}
- if ( $type_iend_2 eq '#'
- && $iend_2 - $ibeg_2 >= 2
- && $types_to_go[ $iend_2 - 1 ] eq 'b' )
- {
- $iend_2t = $iend_2 - 2;
- $type_iend_2t = $types_to_go[$iend_2t];
- }
+{ ## begin closure final_indentation_adjustment
- $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
- }
+ my ( $last_indentation_written, $last_unadjusted_indentation,
+ $last_leading_token );
- #----------------------------------------------------------
- # Recombine Section 0:
- # Examine the special token joining this line pair, if any.
- # Put as many tests in this section to avoid duplicate code and
- # to make formatting independent of whether breaks are to the
- # left or right of an operator.
- #----------------------------------------------------------
+ sub initialize_final_indentation_adjustment {
+ $last_indentation_written = 0;
+ $last_unadjusted_indentation = 0;
+ $last_leading_token = "";
+ return;
+ }
- my ($itok) = @{ $joint[$n] };
- if ($itok) {
+ sub final_indentation_adjustment {
+
+ #--------------------------------------------------------------------
+ # This routine sets the final indentation of a line in the Formatter.
+ #--------------------------------------------------------------------
+
+ # It starts with the basic indentation which has been defined for the
+ # leading token, and then takes into account any options that the user
+ # has set regarding special indenting and outdenting.
+
+ # This routine has to resolve a number of complex interacting issues,
+ # including:
+ # 1. The various -cti=n type flags, which contain the desired change in
+ # indentation for lines ending in commas and semicolons, should be
+ # followed,
+ # 2. qw quotes require special processing and do not fit perfectly
+ # with normal containers,
+ # 3. formatting with -wn can complicate things, especially with qw
+ # quotes,
+ # 4. formatting with the -lp option is complicated, and does not
+ # work well with qw quotes and with -wn formatting.
+ # 5. a number of special situations, such as 'cuddled' formatting.
+ # 6. This routine is mainly concerned with outdenting closing tokens
+ # but note that there is some overlap with the functions of sub
+ # undo_ci, which was processed earlier, so care has to be taken to
+ # keep them coordinated.
- # FIXME: Patch - may not be necessary
- my $iend_1 =
- $type_iend_1 eq 'b'
- ? $iend_1 - 1
- : $iend_1;
+ my (
+ $self, $ibeg,
+ $iend, $rfields,
+ $rpatterns, $ri_first,
+ $ri_last, $rindentation_list,
+ $level_jump, $starting_in_quote,
+ $is_static_block_comment,
+ ) = @_;
- my $iend_2 =
- $type_iend_2 eq 'b'
- ? $iend_2 - 1
- : $iend_2;
- ## END PATCH
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $ris_bli_container = $self->[_ris_bli_container_];
+ my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
+ my $rwant_reduced_ci = $self->[_rwant_reduced_ci_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+
+ # Find the last code token of this line
+ my $i_terminal = $iend;
+ my $terminal_type = $types_to_go[$iend];
+ if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
+ $i_terminal -= 1;
+ $terminal_type = $types_to_go[$i_terminal];
+ if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
+ $i_terminal -= 1;
+ $terminal_type = $types_to_go[$i_terminal];
+ }
+ }
+
+ my $terminal_block_type = $block_type_to_go[$i_terminal];
+ my $is_outdented_line = 0;
+
+ my $type_beg = $types_to_go[$ibeg];
+ my $token_beg = $tokens_to_go[$ibeg];
+ my $block_type_beg = $block_type_to_go[$ibeg];
+ my $level_beg = $levels_to_go[$ibeg];
+ my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
+ my $K_beg = $K_to_go[$ibeg];
+ my $seqno_beg = $type_sequence_to_go[$ibeg];
+ my $ibeg_weld_fix = $ibeg;
+ my $is_closing_type_beg = $is_closing_type{$type_beg};
+ my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
+
+ # QW INDENTATION PATCH 3:
+ my $seqno_qw_closing;
+ if ( $type_beg eq 'q' && $ibeg == 0 ) {
+ my $KK = $K_to_go[$ibeg];
+ $seqno_qw_closing =
+ $self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
+ }
- my $type = $types_to_go[$itok];
+ my $is_semicolon_terminated = $terminal_type eq ';'
+ && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
+ || $seqno_qw_closing );
- if ( $type eq ':' ) {
+ # NOTE: A future improvement would be to make it semicolon terminated
+ # even if it does not have a semicolon but is followed by a closing
+ # block brace. This would undo ci even for something like the
+ # following, in which the final paren does not have a semicolon because
+ # it is a possible weld location:
- # do not join at a colon unless it disobeys the break request
- if ( $itok eq $iend_1 ) {
- next unless $want_break_before{$type};
- }
- else {
- $leading_amp_count++;
- next if $want_break_before{$type};
- }
- } ## end if ':'
+ # if ($BOLD_MATH) {
+ # (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # )
+ # }
+ #
- # handle math operators + - * /
- elsif ( $is_math_op{$type} ) {
+ # MOJO: Set a flag if this lines begins with ')->'
+ my $leading_paren_arrow = (
+ $is_closing_type_beg
+ && $token_beg eq ')'
+ && (
+ ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
+ || ( $ibeg < $i_terminal - 1
+ && $types_to_go[ $ibeg + 1 ] eq 'b'
+ && $types_to_go[ $ibeg + 2 ] eq '->' )
+ )
+ );
- # Combine these lines if this line is a single
- # number, or if it is a short term with same
- # operator as the previous line. For example, in
- # the following code we will combine all of the
- # short terms $A, $B, $C, $D, $E, $F, together
- # instead of leaving them one per line:
- # my $time =
- # $A * $B * $C * $D * $E * $F *
- # ( 2. * $eps * $sigma * $area ) *
- # ( 1. / $tcold**3 - 1. / $thot**3 );
+ #---------------------------------------------------------
+ # Section 1: set a flag and a default indentation
+ #
+ # Most lines are indented according to the initial token.
+ # But it is common to outdent to the level just after the
+ # terminal token in certain cases...
+ # adjust_indentation flag:
+ # 0 - do not adjust
+ # 1 - outdent
+ # 2 - vertically align with opening token
+ # 3 - indent
+ #---------------------------------------------------------
+ my $adjust_indentation = 0;
+ my $default_adjust_indentation = $adjust_indentation;
- # This can be important in math-intensive code.
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
- my $good_combo;
+ # Honor any flag to reduce -ci set by the -bbxi=n option
+ if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) {
- my $itokp = min( $inext_to_go[$itok], $iend_2 );
- my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
- my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
- my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+ # if this is an opening, it must be alone on the line ...
+ if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
+ $adjust_indentation = 1;
+ }
- # check for a number on the right
- if ( $types_to_go[$itokp] eq 'n' ) {
+ # ... or a single welded unit (fix for b1173)
+ elsif ($total_weld_count) {
+ my $Kterm = $K_to_go[$i_terminal];
+ my $Kterm_test = $rK_weld_left->{$Kterm};
+ if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
+ $Kterm = $Kterm_test;
+ }
+ if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
+ }
+ }
- # ok if nothing else on right
- if ( $itokp == $iend_2 ) {
- $good_combo = 1;
- }
- else {
+ # Update the $is_bli flag as we go. It is initially 1.
+ # We note seeing a leading opening brace by setting it to 2.
+ # If we get to the closing brace without seeing the opening then we
+ # turn it off. This occurs if the opening brace did not get output
+ # at the start of a line, so we will then indent the closing brace
+ # in the default way.
+ if ( $is_bli_beg && $is_bli_beg == 1 ) {
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_opening = $K_opening_container->{$seqno_beg};
+ if ( $K_beg eq $K_opening ) {
+ $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
+ }
+ else { $is_bli_beg = 0 }
+ }
- # look one more token to right..
- # okay if math operator or some termination
- $good_combo =
- ( ( $itokpp == $iend_2 )
- && $is_math_op{ $types_to_go[$itokpp] } )
- || $types_to_go[$itokpp] =~ /^[#,;]$/;
- }
- }
+ # QW PATCH for the combination -lp -wn
+ # For -lp formatting use $ibeg_weld_fix to get around the problem
+ # that with -lp type formatting the opening and closing tokens to not
+ # have sequence numbers.
+ if ( $seqno_qw_closing && $total_weld_count ) {
+ my $i_plus = $inext_to_go[$ibeg];
+ if ( $i_plus <= $max_index_to_go ) {
+ my $K_plus = $K_to_go[$i_plus];
+ if ( defined( $rK_weld_left->{$K_plus} ) ) {
+ $ibeg_weld_fix = $i_plus;
+ }
+ }
+ }
- # check for a number on the left
- if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+ # if we are at a closing token of some type..
+ if ( $is_closing_type_beg || $seqno_qw_closing ) {
- # okay if nothing else to left
- if ( $itokm == $ibeg_1 ) {
- $good_combo = 1;
- }
+ # get the indentation of the line containing the corresponding
+ # opening token
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first,
+ $ri_last, $rindentation_list, $seqno_qw_closing );
- # otherwise look one more token to left
- else {
+ my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal);
- # okay if math operator, comma, or assignment
- $good_combo = ( $itokmm == $ibeg_1 )
- && ( $is_math_op{ $types_to_go[$itokmm] }
- || $types_to_go[$itokmm] =~ /^[,]$/
- || $is_assignment{ $types_to_go[$itokmm] }
- );
- }
- }
+ # First set the default behavior:
+ if (
- # look for a single short token either side of the
- # operator
- if ( !$good_combo ) {
+ # default behavior is to outdent closing lines
+ # of the form: "); }; ]; )->xxx;"
+ $is_semicolon_terminated
- # Slight adjustment factor to make results
- # independent of break before or after operator in
- # long summed lists. (An operator and a space make
- # two spaces).
- my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+ # and 'cuddled parens' of the form: ")->pack("
+ # Bug fix for RT #123749]: the types here were
+ # incorrectly '(' and ')'. Corrected to be '{' and '}'
+ || (
+ $terminal_type eq '{'
+ && $type_beg eq '}'
+ && ( $nesting_depth_to_go[$iend] + 1 ==
+ $nesting_depth_to_go[$ibeg] )
+ )
- $good_combo =
+ # remove continuation indentation for any line like
+ # } ... {
+ # or without ending '{' and unbalanced, such as
+ # such as '}->{$operator}'
+ || (
+ $type_beg eq '}'
- # numbers or id's on both sides of this joint
- $types_to_go[$itokp] =~ /^[in]$/
- && $types_to_go[$itokm] =~ /^[in]$/
+ && ( $types_to_go[$iend] eq '{'
+ || $levels_to_go[$iend] < $level_beg )
+ )
- # one of the two lines must be short:
- && (
- (
- # no more than 2 nonblank tokens right of
- # joint
- $itokpp == $iend_2
-
- # short
- && token_sequence_length( $itokp, $iend_2 )
- < $two +
- $rOpts_short_concatenation_item_length
- )
- || (
- # no more than 2 nonblank tokens left of
- # joint
- $itokmm == $ibeg_1
-
- # short
- && token_sequence_length( $ibeg_1, $itokm )
- < 2 - $two +
- $rOpts_short_concatenation_item_length
- )
+ # and when the next line is at a lower indentation level...
- )
+ # PATCH #1: and only if the style allows undoing continuation
+ # for all closing token types. We should really wait until
+ # the indentation of the next line is known and then make
+ # a decision, but that would require another pass.
- # keep pure terms; don't mix +- with */
- && !(
- $is_plus_minus{$type}
- && ( $is_mult_div{ $types_to_go[$itokmm] }
- || $is_mult_div{ $types_to_go[$itokpp] } )
- )
- && !(
- $is_mult_div{$type}
- && ( $is_plus_minus{ $types_to_go[$itokmm] }
- || $is_plus_minus{ $types_to_go[$itokpp] } )
- )
+ # PATCH #2: and not if this token is under -xci control
+ || ( $level_jump < 0
+ && !$some_closing_token_indentation
+ && !$rseqno_controlling_my_ci->{$K_beg} )
- ;
- }
+ # Patch for -wn=2, multiple welded closing tokens
+ || ( $i_terminal > $ibeg
+ && $is_closing_type{ $types_to_go[$iend] } )
- # it is also good to combine if we can reduce to 2 lines
- if ( !$good_combo ) {
+ # Alternate Patch for git #51, isolated closing qw token not
+ # outdented if no-delete-old-newlines is set. This works, but
+ # a more general patch elsewhere fixes the real problem: ljump.
+ # || ( $seqno_qw_closing && $ibeg == $i_terminal )
- # index on other line where same token would be in a
- # long chain.
- my $iother =
- ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+ )
+ {
+ $adjust_indentation = 1;
+ }
- $good_combo =
- $n == 2
- && $n == $nmax
- && $types_to_go[$iother] ne $type;
- }
+ # outdent something like '),'
+ if (
+ $terminal_type eq ','
- next unless ($good_combo);
+ # Removed this constraint for -wn
+ # OLD: allow just one character before the comma
+ # && $i_terminal == $ibeg + 1
- } ## end math
+ # require LIST environment; otherwise, we may outdent too much -
+ # this can happen in calls without parentheses (overload.t);
+ && $terminal_is_in_list
+ )
+ {
+ $adjust_indentation = 1;
+ }
- elsif ( $is_amp_amp{$type} ) {
- ##TBD
- } ## end &&, ||
+ # undo continuation indentation of a terminal closing token if
+ # it is the last token before a level decrease. This will allow
+ # a closing token to line up with its opening counterpart, and
+ # avoids an indentation jump larger than 1 level.
+ if ( $i_terminal == $ibeg
+ && $is_closing_type_beg
+ && defined($K_beg)
+ && $K_beg < $Klimit )
+ {
+ my $K_plus = $K_beg + 1;
+ my $type_plus = $rLL->[$K_plus]->[_TYPE_];
- elsif ( $is_assignment{$type} ) {
- ##TBD
- } ## end assignment
+ if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
}
- #----------------------------------------------------------
- # Recombine Section 1:
- # Join welded nested containers immediately
- #----------------------------------------------------------
- if ( weld_len_right_to_go($iend_1)
- || weld_len_left_to_go($ibeg_2) )
- {
- $n_best = $n;
+ if ( $type_plus eq '#' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
+ $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
+ }
- # Old coding alternated sweep direction: no longer needed
- # $reverse = 1 - $reverse;
- last;
+ # Note: we have skipped past just one comment (perhaps a
+ # side comment). There could be more, and we could easily
+ # skip past all the rest with the following code, or with a
+ # while loop. It would be rare to have to do this, and
+ # those block comments would still be indented, so it would
+ # to leave them indented. So it seems best to just stop at
+ # a maximum of one comment.
+ ##if ($type_plus eq '#') {
+ ## $K_plus = $self->K_next_code($K_plus);
+ ##}
}
- $reverse = 0;
- #----------------------------------------------------------
- # Recombine Section 2:
- # Examine token at $iend_1 (right end of first line of pair)
- #----------------------------------------------------------
+ if ( !$is_bli_beg && defined($K_plus) ) {
+ my $lev = $level_beg;
+ my $level_next = $rLL->[$K_plus]->[_LEVEL_];
- # an isolated '}' may join with a ';' terminated segment
- if ( $type_iend_1 eq '}' ) {
+ # and do not undo ci if it was set by the -xci option
+ $adjust_indentation = 1
+ if ( $level_next < $lev
+ && !$rseqno_controlling_my_ci->{$K_beg} );
+ }
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
- #
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
- #
- # which makes the parens line up.
- #
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
- #
- # But we do not want to do this for something like the -lp
- # option where the paren is not outdentable because the
- # trailing clause will be far to the right.
- #
- # The logic here is synchronized with the logic in sub
- # sub set_adjusted_indentation, which actually does
- # the outdenting.
- #
- $skip_Section_3 ||= $this_line_is_semicolon_terminated
-
- # only one token on last line
- && $ibeg_1 == $iend_1
-
- # must be structural paren
- && $tokens_to_go[$iend_1] eq ')'
-
- # style must allow outdenting,
- && !$closing_token_indentation{')'}
-
- # only leading '&&', '||', and ':' if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with a
- # previous colon or question (count could be wrong).
- && $type_ibeg_2 ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$iend_1] ==
- $nesting_depth_to_go[$iend_2] + 1 );
-
- # YVES patch 2 of 2:
- # Allow cuddled eval chains, like this:
- # eval {
- # #STUFF;
- # 1; # return true
- # } or do {
- # #handle error
- # };
- # This patch works together with a patch in
- # setting adjusted indentation (where the closing eval
- # brace is outdented if possible).
- # The problem is that an 'eval' block has continuation
- # indentation and it looks better to undo it in some
- # cases. If we do not use this patch we would get:
- # eval {
- # #STUFF;
- # 1; # return true
- # }
- # or do {
- # #handle error
- # };
- # The alternative, for uncuddled style, is to create
- # a patch in set_adjusted_indentation which undoes
- # the indentation of a leading line like 'or do {'.
- # This doesn't work well with -icb through
- if (
- $block_type_to_go[$iend_1] eq 'eval'
- && !$rOpts->{'line-up-parentheses'}
- && !$rOpts->{'indent-closing-brace'}
- && $tokens_to_go[$iend_2] eq '{'
- && (
- ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
- || ( $type_ibeg_2 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_2] } )
- || $is_if_unless{ $tokens_to_go[$ibeg_2] }
- )
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $terminal_is_in_list
+ && !$rOpts_indent_closing_brace
+ && $block_type_beg
+ && $block_type_beg =~ /$ASUB_PATTERN/ )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
)
+ = $self->get_opening_indentation( $ibeg, $ri_first,
+ $ri_last, $rindentation_list );
+ my $indentation = $leading_spaces_beg;
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
{
- $skip_Section_3 ||= 1;
+ $adjust_indentation = 1;
}
-
- next
- unless (
- $skip_Section_3
-
- # handle '.' and '?' specially below
- || ( $type_ibeg_2 =~ /^[\.\?]$/ )
- );
}
+ }
- elsif ( $type_iend_1 eq '{' ) {
-
- # YVES
- # honor breaks at opening brace
- # Added to prevent recombining something like this:
- # } || eval { package main;
- next if $forced_breakpoint_to_go[$iend_1];
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indention of the line with
+ # the opening brace.
+ if (
+ $block_type_beg eq 'eval'
+ ##&& !$rOpts_line_up_parentheses
+ && !ref($leading_spaces_beg)
+ && !$rOpts_indent_closing_brace
+ )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_beg;
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
}
+ }
- # do not recombine lines with ending &&, ||,
- elsif ( $is_amp_amp{$type_iend_1} ) {
- next unless $want_break_before{$type_iend_1};
- }
+ # patch for issue git #40: -bli setting has priority
+ $adjust_indentation = 0 if ($is_bli_beg);
- # Identify and recombine a broken ?/: chain
- elsif ( $type_iend_1 eq '?' ) {
+ $default_adjust_indentation = $adjust_indentation;
- # Do not recombine different levels
- next
- if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+ # Now modify default behavior according to user request:
+ # handle option to indent non-blocks of the form ); }; ];
+ # But don't do special indentation to something like ')->pack('
+ if ( !$block_type_beg ) {
+
+ # Note that logical padding has already been applied, so we may
+ # need to remove some spaces to get a valid hash key.
+ my $tok = $token_beg;
+ my $cti = $closing_token_indentation{$tok};
+
+ # Fix the value of 'cti' for an isloated non-welded closing qw
+ # delimiter.
+ if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
+
+ # A quote delimiter which is not a container will not have
+ # a cti value defined. In this case use the style of a
+ # paren. For example
+ # my @fars = (
+ # qw<
+ # far
+ # farfar
+ # farfars-far
+ # >,
+ # );
+ if ( !defined($cti) && length($tok) == 1 ) {
+
+ # something other than ')', '}', ']' ; use flag for ')'
+ $cti = $closing_token_indentation{')'};
+
+ # But for now, do not outdent non-container qw
+ # delimiters because it would would change existing
+ # formatting.
+ if ( $tok ne '>' ) { $cti = 3 }
+ }
- # do not recombine unless next line ends in :
- next unless $type_iend_2 eq ':';
+ # A non-welded closing qw cannot currently use -cti=1
+ # because that option requires a sequence number to find
+ # the opening indentation, and qw quote delimiters are not
+ # sequenced items.
+ if ( defined($cti) && $cti == 1 ) { $cti = 0 }
}
- # for lines ending in a comma...
- elsif ( $type_iend_1 eq ',' ) {
+ if ( !defined($cti) ) {
- # Do not recombine at comma which is following the
- # input bias.
- # TODO: might be best to make a special flag
- next if ( $old_breakpoint_to_go[$iend_1] );
+ # $cti may not be defined for several reasons.
+ # -padding may have been applied so the character
+ # has a length > 1
+ # - we may have welded to a closing quote token.
+ # Here is an example (perltidy -wn):
+ # __PACKAGE__->load_components( qw(
+ # > Core
+ # >
+ # > ) );
+ $adjust_indentation = 0;
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $type_ibeg_1 eq '}'
- && $type_ibeg_2 eq 'i' )
+ }
+ elsif ( $cti == 1 ) {
+ if ( $i_terminal <= $ibeg + 1
+ || $is_semicolon_terminated )
{
- next
- unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
- && ( $iend_2 == ( $ibeg_2 + 1 ) )
- && $this_line_is_semicolon_terminated );
-
- # override breakpoint
- $forced_breakpoint_to_go[$iend_1] = 0;
+ $adjust_indentation = 2;
}
-
- # but otherwise ..
else {
-
- # do not recombine after a comma unless this will leave
- # just 1 more line
- next unless ( $n + 1 >= $nmax );
-
- # do not recombine if there is a change in indentation depth
- next
- if (
- $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
-
- # do not recombine a "complex expression" after a
- # comma. "complex" means no parens.
- my $saw_paren;
- foreach my $ii ( $ibeg_2 .. $iend_2 ) {
- if ( $tokens_to_go[$ii] eq '(' ) {
- $saw_paren = 1;
- last;
- }
- }
- next if $saw_paren;
+ $adjust_indentation = 0;
}
}
-
- # opening paren..
- elsif ( $type_iend_1 eq '(' ) {
-
- # No longer doing this
+ elsif ( $cti == 2 ) {
+ if ($is_semicolon_terminated) {
+ $adjust_indentation = 3;
+ }
+ else {
+ $adjust_indentation = 0;
+ }
}
-
- elsif ( $type_iend_1 eq ')' ) {
-
- # No longer doing this
+ elsif ( $cti == 3 ) {
+ $adjust_indentation = 3;
}
+ }
- # keep a terminal for-semicolon
- elsif ( $type_iend_1 eq 'f' ) {
- next;
+ # handle option to indent blocks
+ else {
+ if (
+ $rOpts_indent_closing_brace
+ && (
+ $i_terminal == $ibeg # isolated terminal '}'
+ || $is_semicolon_terminated
+ )
+ ) # } xxxx ;
+ {
+ $adjust_indentation = 3;
}
+ }
+ }
- # if '=' at end of line ...
- elsif ( $is_assignment{$type_iend_1} ) {
-
- # keep break after = if it was in input stream
- # this helps prevent 'blinkers'
- next if $old_breakpoint_to_go[$iend_1]
-
- # don't strand an isolated '='
- && $iend_1 != $ibeg_1;
-
- my $is_short_quote =
- ( $type_ibeg_2 eq 'Q'
- && $ibeg_2 == $iend_2
- && token_sequence_length( $ibeg_2, $ibeg_2 ) <
- $rOpts_short_concatenation_item_length );
- my $is_ternary =
- ( $type_ibeg_1 eq '?'
- && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
-
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $ibeg_1 != $iend_1
- && !$is_short_quote
- && !$is_ternary )
- {
- next
- unless (
- (
-
- # unless we can reduce this to two lines
- $nmax < $n + 2
-
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ibeg_nmax] eq ';' )
+ # if at ');', '};', '>;', and '];' of a terminal qw quote
+ elsif (
+ substr( $rpatterns->[0], 0, 2 ) eq 'qb'
+ && substr( $rfields->[0], -1, 1 ) eq ';'
+ ##&& $rpatterns->[0] =~ /^qb*;$/
+ && $rfields->[0] =~ /^([\)\}\]\>]);$/
+ )
+ {
+ if ( $closing_token_indentation{$1} == 0 ) {
+ $adjust_indentation = 1;
+ }
+ else {
+ $adjust_indentation = 3;
+ }
+ }
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $type_beg eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
- # or the next line ends in an open paren or brace
- # and the break hasn't been forced [dima.t]
- || ( !$forced_breakpoint_to_go[$iend_1]
- && $type_iend_2 eq '{' )
- )
+ #---------------------------------------------------------
+ # Section 2: set indentation according to flag set above
+ #
+ # Select the indentation object to define leading
+ # whitespace. If we are outdenting something like '} } );'
+ # then we want to use one level below the last token
+ # ($i_terminal) in order to get it to fully outdent through
+ # all levels.
+ #---------------------------------------------------------
+ my $indentation;
+ my $lev;
+ my $level_end = $levels_to_go[$iend];
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && (
+ if ( $adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_beg;
+ $lev = $level_beg;
+ }
+ elsif ( $adjust_indentation == 1 ) {
- # RT#127633 - the leading tokens are not operators
- ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
+ # Change the indentation to be that of a different token on the line
+ # Previously, the indentation of the terminal token was used:
+ # OLD CODING:
+ # $indentation = $reduced_spaces_to_go[$i_terminal];
+ # $lev = $levels_to_go[$i_terminal];
- # or they are different
- || ( $ibeg_3 >= 0
- && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
- )
- );
+ # Generalization for MOJO:
+ # Use the lowest level indentation of the tokens on the line.
+ # For example, here we can use the indentation of the ending ';':
+ # } until ($selection > 0 and $selection < 10); # ok to use ';'
+ # But this will not outdent if we use the terminal indentation:
+ # )->then( sub { # use indentation of the ->, not the {
+ # Warning: reduced_spaces_to_go[] may be a reference, do not
+ # do numerical checks with it
- if (
+ my $i_ind = $ibeg;
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ while ( $i_ind < $i_terminal ) {
+ $i_ind++;
+ if ( $levels_to_go[$i_ind] < $lev ) {
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ }
+ }
+ }
- # Recombine if we can make two lines
- $nmax >= $n + 2
+ # handle indented closing token which aligns with opening token
+ elsif ( $adjust_indentation == 2 ) {
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have
- # ending comma
- && ( !$rOpts_line_up_parentheses
- || $type_iend_2 ne ',' )
- )
- {
+ # handle option to align closing token with opening token
+ $lev = $level_beg;
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$ibeg_2];
- foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # calculate spaces needed to align with opening token
+ my $space_count =
+ get_spaces($opening_indentation) + $opening_offset;
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
+ # Indent less than the previous line.
+ #
+ # Problem: For -lp we don't exactly know what it was if there
+ # were recoverable spaces sent to the aligner. A good solution
+ # would be to force a flush of the vertical alignment buffer, so
+ # that we would know. For now, this rule is used for -lp:
+ #
+ # When the last line did not start with a closing token we will
+ # be optimistic that the aligner will recover everything wanted.
+ #
+ # This rule will prevent us from breaking a hierarchy of closing
+ # tokens, and in a worst case will leave a closing paren too far
+ # indented, but this is better than frequently leaving it not
+ # indented enough.
+ my $last_spaces = get_spaces($last_indentation_written);
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
+ if ( ref($last_indentation_written)
+ && !$is_closing_token{$last_leading_token} )
+ {
+ $last_spaces +=
+ get_recoverable_spaces($last_indentation_written);
+ }
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax )
- ? $ri_end->[ $n + 1 ]
- : $iend_2;
- foreach my $i ( $iend_2 .. $istop ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 2 );
- }
- $depth = $nesting_depth_to_go[$i];
- }
+ # reset the indentation to the new space count if it works
+ # only options are all or none: nothing in-between looks good
+ $lev = $level_beg;
- # do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
- }
- }
- }
+ my $diff = $last_spaces - $space_count;
+ if ( $diff > 0 ) {
+ $indentation = $space_count;
+ }
+ else {
- unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # We need to fix things ... but there is no good way to do it.
+ # The best solution is for the user to use a longer maximum
+ # line length. We could get a smooth variation if we just move
+ # the paren in using
+ # $space_count -= ( 1 - $diff );
+ # But unfortunately this can give a rather unbalanced look.
+
+ # For -xlp we currently allow a tolerance of one indentation
+ # level and then revert to a simpler default. This will jump
+ # suddenly but keeps a balanced look.
+ if ( $rOpts_extended_line_up_parentheses
+ && $diff >= -$rOpts_indent_columns
+ && $space_count > $leading_spaces_beg )
+ {
+ $indentation = $space_count;
}
- # for keywords..
- elsif ( $type_iend_1 eq 'k' ) {
-
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
-
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
-
- # but only if followed by multiple lines
- && $n < $nmax
- );
-
- if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
- next
- unless $want_break_before{ $tokens_to_go[$iend_1] };
- }
+ # Otherwise revert to defaults
+ elsif ( $default_adjust_indentation == 0 ) {
+ $indentation = $leading_spaces_beg;
}
-
- #----------------------------------------------------------
- # Recombine Section 3:
- # Examine token at $ibeg_2 (left end of second line of pair)
- #----------------------------------------------------------
-
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- # Note that we are skipping the rest of this section
- # and the rest of the loop to do the join
- if ($skip_Section_3) {
- $forced_breakpoint_to_go[$iend_1] = 0;
- $n_best = $n;
- last;
+ elsif ( $default_adjust_indentation == 1 ) {
+ $indentation = $reduced_spaces_to_go[$i_terminal];
+ $lev = $levels_to_go[$i_terminal];
}
+ }
+ }
- # handle lines with leading &&, ||
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
-
- $leading_amp_count++;
-
- # ok to recombine if it follows a ? or :
- # and is followed by an open paren..
- my $ok =
- ( $is_ternary{$type_ibeg_1}
- && $tokens_to_go[$iend_2] eq '(' )
+ # Full indentaion of closing tokens (-icb and -icp or -cti=2)
+ else {
- # or is followed by a ? or : at same depth
- #
- # We are looking for something like this. We can
- # recombine the && line with the line above to make the
- # structure more clear:
- # return
- # exists $G->{Attr}->{V}
- # && exists $G->{Attr}->{V}->{$u}
- # ? %{ $G->{Attr}->{V}->{$u} }
- # : ();
- #
- # We should probably leave something like this alone:
- # return
- # exists $G->{Attr}->{E}
- # && exists $G->{Attr}->{E}->{$u}
- # && exists $G->{Attr}->{E}->{$u}->{$v}
- # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
- # : ();
- # so that we either have all of the &&'s (or ||'s)
- # on one line, as in the first example, or break at
- # each one as in the second example. However, it
- # sometimes makes things worse to check for this because
- # it prevents multiple recombinations. So this is not done.
- || ( $ibeg_3 >= 0
- && $is_ternary{ $types_to_go[$ibeg_3] }
- && $nesting_depth_to_go[$ibeg_3] ==
- $nesting_depth_to_go[$ibeg_2] );
-
- next if !$ok && $want_break_before{$type_ibeg_2};
- $forced_breakpoint_to_go[$iend_1] = 0;
-
- # tweak the bond strength to give this joint priority
- # over ? and :
- $bs_tweak = 0.25;
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $type_ibeg_2 eq '?' ) {
-
- # Do not recombine different levels
- my $lev = $levels_to_go[$ibeg_2];
- next if ( $lev ne $levels_to_go[$ibeg_1] );
-
- # Do not recombine a '?' if either next line or
- # previous line does not start with a ':'. The reasons
- # are that (1) no alignment of the ? will be possible
- # and (2) the expression is somewhat complex, so the
- # '?' is harder to see in the interior of the line.
- my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
- my $precedes_colon =
- $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
- next unless ( $follows_colon || $precedes_colon );
-
- # we will always combining a ? line following a : line
- if ( !$follows_colon ) {
-
- # ...otherwise recombine only if it looks like a chain.
- # we will just look at a few nearby lines to see if
- # this looks like a chain.
- my $local_count = 0;
- foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
- $local_count++
- if $ii >= 0
- && $types_to_go[$ii] eq ':'
- && $levels_to_go[$ii] == $lev;
- }
- next unless ( $local_count > 1 );
- }
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # handle -icb (indented closing code block braces)
+ # Updated method for indented block braces: indent one full level if
+ # there is no continuation indentation. This will occur for major
+ # structures such as sub, if, else, but not for things like map
+ # blocks.
+ #
+ # Note: only code blocks without continuation indentation are
+ # handled here (if, else, unless, ..). In the following snippet,
+ # the terminal brace of the sort block will have continuation
+ # indentation as shown so it will not be handled by the coding
+ # here. We would have to undo the continuation indentation to do
+ # this, but it probably looks ok as is. This is a possible future
+ # update for semicolon terminated lines.
+ #
+ # if ($sortby eq 'date' or $sortby eq 'size') {
+ # @files = sort {
+ # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
+ # or $a cmp $b
+ # } @files;
+ # }
+ #
+ if ( $block_type_beg
+ && $ci_levels_to_go[$i_terminal] == 0 )
+ {
+ my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
+ $indentation = $spaces + $rOpts_indent_columns;
- # do not recombine lines with leading '.'
- elsif ( $type_ibeg_2 eq '.' ) {
- my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
- next
- unless (
+ # NOTE: for -lp we could create a new indentation object, but
+ # there is probably no need to do it
+ }
- # ... unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- #
- #
- # $bodyA .=
- # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
- #
- # looks better than this:
- # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
- # . '$args .= $pat;'
+ # handle -icp and any -icb block braces which fall through above
+ # test such as the 'sort' block mentioned above.
+ else {
- (
- $n == 2
- && $n == $nmax
- && $type_ibeg_1 ne $type_ibeg_2
- )
+ # There are currently two ways to handle -icp...
+ # One way is to use the indentation of the previous line:
+ # $indentation = $last_indentation_written;
- # ... or this would strand a short quote , like this
- # . "some long quote"
- # . "\n";
+ # The other way is to use the indentation that the previous line
+ # would have had if it hadn't been adjusted:
+ $indentation = $last_unadjusted_indentation;
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $iend_2 - 1
- && $token_lengths_to_go[$i_next_nonblank] <
- $rOpts_short_concatenation_item_length )
- );
+ # Current method: use the minimum of the two. This avoids
+ # inconsistent indentation.
+ if ( get_spaces($last_indentation_written) <
+ get_spaces($indentation) )
+ {
+ $indentation = $last_indentation_written;
}
+ }
- # handle leading keyword..
- elsif ( $type_ibeg_2 eq 'k' ) {
-
- # handle leading "or"
- if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
- $type_ibeg_1 eq '}'
- || (
+ # use previous indentation but use own level
+ # to cause list to be flushed properly
+ $lev = $level_beg;
+ }
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
-
- # important: only combine a very simple or
- # statement because the step below may have
- # combined a trailing 'and' with this or,
- # and we do not want to then combine
- # everything together
- && ( $iend_2 - $ibeg_2 <= 7 )
- )
- )
- );
+ # remember indentation except for multi-line quotes, which get
+ # no indentation
+ unless ( $ibeg == 0 && $starting_in_quote ) {
+ $last_indentation_written = $indentation;
+ $last_unadjusted_indentation = $leading_spaces_beg;
+ $last_leading_token = $token_beg;
+
+ # Patch to make a line which is the end of a qw quote work with the
+ # -lp option. Make $token_beg look like a closing token as some
+ # type even if it is not. This veriable will become
+ # $last_leading_token at the end of this loop. Then, if the -lp
+ # style is selected, and the next line is also a
+ # closing token, it will not get more indentation than this line.
+ # We need to do this because qw quotes (at present) only get
+ # continuation indentation, not one level of indentation, so we
+ # need to turn off the -lp indentation.
+
+ # ... a picture is worth a thousand words:
+
+ # perltidy -wn -gnu (Without this patch):
+ # ok(defined(
+ # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
+ # 2981014)])
+ # ));
+
+ # perltidy -wn -gnu (With this patch):
+ # ok(defined(
+ # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
+ # 2981014)])
+ # ));
+ if ( $seqno_qw_closing
+ && ( length($token_beg) > 1 || $token_beg eq '>' ) )
+ {
+ $last_leading_token = ')';
+ }
+ }
- #X: RT #81854
- $forced_breakpoint_to_go[$iend_1] = 0
- unless $old_breakpoint_to_go[$iend_1];
- }
+ # be sure lines with leading closing tokens are not outdented more
+ # than the line which contained the corresponding opening token.
- # handle leading 'and'
- elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
+ #--------------------------------------------------------
+ # updated per bug report in alex_bug.pl: we must not
+ # mess with the indentation of closing logical braces so
+ # we must treat something like '} else {' as if it were
+ # an isolated brace
+ #--------------------------------------------------------
+ my $is_isolated_block_brace = $block_type_beg
+ && ( $i_terminal == $ibeg
+ || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg}
+ );
- # Decide if we will combine a single terminal 'and'
- # after an 'if' or 'unless'.
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ if (
+ defined($opening_indentation)
+ && !$leading_paren_arrow # MOJO
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon
+ )
+ {
+ if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
+ $indentation = $opening_indentation;
+ }
+ }
- # following 'if' or 'unless' or 'or'
- $type_ibeg_1 eq 'k'
- && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
- || $tokens_to_go[$ibeg_1] eq 'or' )
- )
- );
- }
+ # remember the indentation of each line of this batch
+ push @{$rindentation_list}, $indentation;
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
+ # outdent lines with certain leading tokens...
+ if (
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
+ # must be first word of this batch
+ $ibeg == 0
- # previous line begins with 'and' or 'or'
- && $type_ibeg_1 eq 'k'
- && $is_and_or{ $tokens_to_go[$ibeg_1] }
+ # and ...
+ && (
- );
- }
+ # certain leading keywords if requested
+ $rOpts_outdent_keywords
+ && $type_beg eq 'k'
+ && $outdent_keyword{$token_beg}
- # handle all other leading keywords
- else {
+ # or labels if requested
+ || $rOpts_outdent_labels && $type_beg eq 'J'
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{$type_iend_1} ) {
- next
- if ( ( $type_iend_1 ne 'k' )
- && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
- }
- }
+ # or static block comments if requested
+ || $is_static_block_comment
+ && $rOpts_outdent_static_block_comments
+ )
+ )
+ {
+ my $space_count = leading_spaces_to_go($ibeg);
+ if ( $space_count > 0 ) {
+ $space_count -= $rOpts_continuation_indentation;
+ $is_outdented_line = 1;
+ if ( $space_count < 0 ) { $space_count = 0 }
+
+ # do not promote a spaced static block comment to non-spaced;
+ # this is not normally necessary but could be for some
+ # unusual user inputs (such as -ci = -i)
+ if ( $type_beg eq '#' && $space_count == 0 ) {
+ $space_count = 1;
}
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $is_amp_amp{$type_ibeg_2} ) {
+ $indentation = $space_count;
+ }
+ }
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ return ( $indentation, $lev, $level_end, $terminal_type,
+ $terminal_block_type, $is_semicolon_terminated,
+ $is_outdented_line );
+ }
+} ## end closure final_indentation_adjustment
- next
- unless (
- $this_line_is_semicolon_terminated
+sub get_opening_indentation {
- # previous line begins with an 'if' or 'unless' keyword
- && $type_ibeg_1 eq 'k'
- && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ # get the indentation of the line which output the opening token
+ # corresponding to a given closing token in the current output batch.
+ #
+ # given:
+ # $i_closing - index in this line of a closing token ')' '}' or ']'
+ #
+ # $ri_first - reference to list of the first index $i for each output
+ # line in this batch
+ # $ri_last - reference to list of the last index $i for each output line
+ # in this batch
+ # $rindentation_list - reference to a list containing the indentation
+ # used for each line.
+ # $qw_seqno - optional sequence number to use if normal seqno not defined
+ # (TODO: would be more general to just look this up from index i)
+ #
+ # return:
+ # -the indentation of the line which contained the opening token
+ # which matches the token at index $i_opening
+ # -and its offset (number of columns) from the start of the line
+ #
+ my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno )
+ = @_;
- );
- }
+ # first, see if the opening token is in the current batch
+ my $i_opening = $mate_index_to_go[$i_closing];
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
+ if ( defined($i_opening) && $i_opening >= 0 ) {
- # handle line with leading = or similar
- elsif ( $is_assignment{$type_ibeg_2} ) {
- next unless ( $n == 1 || $n == $nmax );
- next if $old_breakpoint_to_go[$iend_1];
- next
- unless (
+ # it is..look up the indentation
+ ( $indent, $offset, $is_leading ) =
+ lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
+ $rindentation_list );
+ }
- # unless we can reduce this to two lines
- $nmax == 2
+ # if not, it should have been stored in the hash by a previous batch
+ else {
+ my $seqno = $type_sequence_to_go[$i_closing];
+ $seqno = $qw_seqno unless ($seqno);
+ ( $indent, $offset, $is_leading, $exists ) =
+ get_saved_opening_indentation($seqno);
+ }
+ return ( $indent, $offset, $is_leading, $exists );
+}
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
+sub set_vertical_tightness_flags {
- # or the next line ends with a here doc
- || $type_iend_2 eq 'h'
+ my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last,
+ $ending_in_quote, $closing_side_comment )
+ = @_;
- # or this is a short line ending in ;
- || ( $n == $nmax && $this_line_is_semicolon_terminated )
- );
- $forced_breakpoint_to_go[$iend_1] = 0;
- }
+ # Define vertical tightness controls for the nth line of a batch.
- #----------------------------------------------------------
- # Recombine Section 4:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ # These parameters are passed to the vertical aligner to indicated
+ # if we should combine this line with the next line to achieve the
+ # desired vertical tightness. This was previously an array but
+ # has been converted to a hash:
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
+ # old hash Meaning
+ # index key
+ #
+ # 0 _vt_type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ #
+ # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
+ # 1b _vt_closing_flag: spaces of padding to use if closing
+ # 2 _vt_seqno: sequence number of container
+ # 3 _vt_valid flag: do not append if this flag is false. Will be
+ # true if appropriate -vt flag is set. Otherwise, Will be
+ # made true only for 2 line container in parens with -lp
+ # 4 _vt_seqno_beg: sequence number of first token of line
+ # 5 _vt_seqno_end: sequence number of last token of line
+ # 6 _vt_min_lines: min number of lines for joining opening cache,
+ # 0=no constraint
+ # 7 _vt_max_lines: max number of lines for joining opening cache,
+ # 0=no constraint
+
+ # The vertical tightness mechanism can add whitespace, so whitespace can
+ # continually increase if we allowed it when the -fws flag is set.
+ # See case b499 for an example.
+
+ # Speedup: just return for a comment
+ if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) {
+ return;
+ }
- my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
+ # Define these values...
+ my $vt_type = 0;
+ my $vt_opening_flag = 0;
+ my $vt_closing_flag = 0;
+ my $vt_seqno = 0;
+ my $vt_valid_flag = 0;
+ my $vt_seqno_beg = 0;
+ my $vt_seqno_end = 0;
+ my $vt_min_lines = 0;
+ my $vt_max_lines = 0;
+
+ goto RETURN
+ if ($rOpts_freeze_whitespace);
+
+ # Uses these global parameters:
+ # $rOpts_block_brace_tightness
+ # $rOpts_block_brace_vertical_tightness
+ # $rOpts_stack_closing_block_brace
+ # %opening_vertical_tightness
+ # %closing_vertical_tightness
+ # %opening_token_right
+ # %stack_closing_token
+ # %stack_opening_token
- # Require a few extra spaces before recombining lines if we are
- # at an old breakpoint unless this is a simple list or terminal
- # line. The goal is to avoid oscillating between two
- # quasi-stable end states. For example this snippet caused
- # problems:
-## my $this =
-## bless {
-## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
-## },
-## $type;
- next
- if ( $old_breakpoint_to_go[$iend_1]
- && !$this_line_is_semicolon_terminated
- && $n < $nmax
- && $excess + 4 > 0
- && $type_iend_2 ne ',' );
-
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $ri_beg->[ $n + 1 ];
- next
- if (
- $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
- && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
-
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $iend_1 - $ibeg_1 <= 2
- && $type_ibeg_1 eq 'k'
- && $tokens_to_go[$ibeg_1] eq 'if'
- && $tokens_to_go[$iend_1] ne '('
- )
- );
- }
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1:
+ # Handle Lines 1 .. n-1 but not the last line
+ # For non-BLOCK tokens, we will need to examine the next line
+ # too, so we won't consider the last line.
+ #--------------------------------------------------------------
+ if ( $n < $n_last_line ) {
- # honor no-break's
- next if ( $bs >= NO_BREAK - 1 );
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1a:
+ # Look for Type 1, last token of this line is a non-block opening token
+ #--------------------------------------------------------------
+ my $ibeg_next = $ri_first->[ $n + 1 ];
+ my $token_end = $tokens_to_go[$iend];
+ my $iend_next = $ri_last->[ $n + 1 ];
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ if (
+ $type_sequence_to_go[$iend]
+ && !$block_type_to_go[$iend]
+ && $is_opening_token{$token_end}
+ && (
+ $opening_vertical_tightness{$token_end} > 0
- if ( $bs > $bs_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- }
- }
+ # allow 2-line method call to be closed up
+ || ( $rOpts_line_up_parentheses
+ && $token_end eq '('
+ && $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$iend] }
+ && $iend > $ibeg
+ && $types_to_go[ $iend - 1 ] ne 'b' )
+ )
+ )
+ {
+ # avoid multiple jumps in nesting depth in one line if
+ # requested
+ my $ovt = $opening_vertical_tightness{$token_end};
+ my $iend_next = $ri_last->[ $n + 1 ];
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @{$ri_beg}, $n_best, 1;
- splice @{$ri_end}, $n_best - 1, 1;
- splice @joint, $n_best, 1;
+ # Turn off the -vt flag if the next line ends in a weld.
+ # This avoids an instability with one-line welds (fixes b1183).
+ my $type_end_next = $types_to_go[$iend_next];
+ $ovt = 0
+ if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] }
+ && $is_closing_type{$type_end_next} );
- # keep going if we are still making progress
- $more_to_do++;
+ # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270
+ # See similar patch above for $cvt.
+ my $seqno = $type_sequence_to_go[$iend];
+ if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) {
+ $ovt = 0;
}
- }
- return ( $ri_beg, $ri_end );
- }
-} # end recombine_breakpoints
-sub break_all_chain_tokens {
+ unless (
+ $ovt < 2
+ && ( $nesting_depth_to_go[ $iend_next + 1 ] !=
+ $nesting_depth_to_go[$ibeg_next] )
+ )
+ {
- # scan the current breakpoints looking for breaks at certain "chain
- # operators" (. : && || + etc) which often occur repeatedly in a long
- # statement. If we see a break at any one, break at all similar tokens
- # within the same container.
- #
- my ( $self, $ri_left, $ri_right ) = @_;
+ # If -vt flag has not been set, mark this as invalid
+ # and aligner will validate it if it sees the closing paren
+ # within 2 lines.
+ my $valid_flag = $ovt;
- my %saw_chain_type;
- my %left_chain_type;
- my %right_chain_type;
- my %interior_chain_type;
- my $nmax = @{$ri_right} - 1;
+ $vt_type = 1;
+ $vt_opening_flag = $ovt;
+ $vt_seqno = $type_sequence_to_go[$iend];
+ $vt_valid_flag = $valid_flag;
+ }
+ }
- # scan the left and right end tokens of all lines
- my $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- $typel = '+' if ( $typel eq '-' ); # treat + and - the same
- $typer = '+' if ( $typer eq '-' );
- $typel = '*' if ( $typel eq '/' ); # treat * and / the same
- $typer = '*' if ( $typer eq '/' );
- my $tokenl = $tokens_to_go[$il];
- my $tokenr = $tokens_to_go[$ir];
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1b:
+ # Look for Type 2, first token of next line is a non-block closing
+ # token .. and be sure this line does not have a side comment
+ #--------------------------------------------------------------
+ my $token_next = $tokens_to_go[$ibeg_next];
+ if ( $type_sequence_to_go[$ibeg_next]
+ && !$block_type_to_go[$ibeg_next]
+ && $is_closing_token{$token_next}
+ && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen!
+ {
+ my $ovt = $opening_vertical_tightness{$token_next};
+ my $cvt = $closing_vertical_tightness{$token_next};
- if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
- next if ( $typel eq '?' );
- push @{ $left_chain_type{$typel} }, $il;
- $saw_chain_type{$typel} = 1;
- $count++;
- }
- if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
- next if ( $typer eq '?' );
- push @{ $right_chain_type{$typer} }, $ir;
- $saw_chain_type{$typer} = 1;
- $count++;
- }
- }
- return unless $count;
+ # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
+ # See similar patch above for $ovt.
+ my $seqno = $type_sequence_to_go[$ibeg_next];
+ if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) {
+ $cvt = 0;
+ }
- # now look for any interior tokens of the same types
- $count = 0;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir - 1 ) {
- my $type = $types_to_go[$i];
- $type = '+' if ( $type eq '-' );
- $type = '*' if ( $type eq '/' );
- if ( $saw_chain_type{$type} ) {
- push @{ $interior_chain_type{$type} }, $i;
- $count++;
+ # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
+ # otherwise. Added for rt136417.
+ if ( $cvt == 3 ) {
+ my $seqno = $type_sequence_to_go[$ibeg_next];
+ $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
}
- }
- }
- return unless $count;
- # now make a list of all new break points
- my @insert_list;
+ # The unusual combination -pvtc=2 -dws -naws can be unstable.
+ # This fixes b1282, b1283. This can be moved to set_options.
+ if ( $cvt == 2
+ && $rOpts_delete_old_whitespace
+ && !$rOpts_add_whitespace )
+ {
+ $cvt = 1;
+ }
- # loop over all chain types
- foreach my $type ( keys %saw_chain_type ) {
+ if (
- # quit if just ONE continuation line with leading . For example--
- # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
- # . $contents;
- last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
+ # Never append a trailing line like ')->pack(' because it
+ # will throw off later alignment. So this line must start at a
+ # deeper level than the next line (fix1 for welding, git #45).
+ (
+ $nesting_depth_to_go[$ibeg_next] >=
+ $nesting_depth_to_go[ $iend_next + 1 ] + 1
+ )
+ && (
+ $cvt == 2
+ || (
+ !$self->is_in_list_by_i($ibeg_next)
+ && (
+ $cvt == 1
- # loop over all interior chain tokens
- foreach my $itest ( @{ $interior_chain_type{$type} } ) {
+ # allow closing up 2-line method calls
+ || ( $rOpts_line_up_parentheses
+ && $token_next eq ')'
+ && $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$ibeg_next] } )
+ )
+ )
+ )
+ )
+ {
- # loop over all left end tokens of same type
- if ( $left_chain_type{$type} ) {
- next if $nobreak_to_go[ $itest - 1 ];
- foreach my $i ( @{ $left_chain_type{$type} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest - 1;
+ # decide which trailing closing tokens to append..
+ my $ok = 0;
+ if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
+ else {
+ my $str = join( '',
+ @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
- # Break at matching ? if this : is at a different level.
- # For example, the ? before $THRf_DEAD in the following
- # should get a break if its : gets a break.
- #
- # my $flags =
- # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
- # : ( $_ & 4 ) ? $THRf_R_DETACHED
- # : $THRf_R_JOINABLE;
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
- {
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question > 0 ) {
- push @insert_list, $i_question - 1;
- }
- }
- last;
+ # append closing token if followed by comment or ';'
+ # or another closing token (fix2 for welding, git #45)
+ if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
}
- }
-
- # loop over all right end tokens of same type
- if ( $right_chain_type{$type} ) {
- next if $nobreak_to_go[$itest];
- foreach my $i ( @{ $right_chain_type{$type} } ) {
- next unless $self->in_same_container_i( $i, $itest );
- push @insert_list, $itest;
- # break at matching ? if this : is at a different level
- if ( $type eq ':'
- && $levels_to_go[$i] != $levels_to_go[$itest] )
+ if ($ok) {
+ my $valid_flag = $cvt;
+ my $min_lines = 0;
+ my $max_lines = 0;
+
+ # Fix for b1187 and b1188: Blinking can occur if we allow
+ # welded tokens to re-form into one-line blocks during
+ # vertical alignment when -lp used. So for this case we
+ # set the minimum number of lines to be 1 instead of 0.
+ # The maximum should be 1 if -vtc is not used. If -vtc is
+ # used, we turn the valid
+ # flag off and set the maximum to 0. This is equivalent to
+ # using a large number.
+ my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
+ if ( $rOpts_line_up_parentheses
+ && $total_weld_count
+ && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
+ && $self->is_welded_at_seqno($seqno_ibeg_next) )
{
- my $i_question = $mate_index_to_go[$itest];
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
- }
+ $min_lines = 1;
+ $max_lines = $cvt ? 0 : 1;
+ $valid_flag = 0;
}
- last;
+
+ $vt_type = 2;
+ $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
+ $vt_seqno = $type_sequence_to_go[$ibeg_next];
+ $vt_valid_flag = $valid_flag;
+ $vt_min_lines = $min_lines;
+ $vt_max_lines = $max_lines;
}
}
}
- }
-
- # insert any new break points
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-}
-sub break_equals {
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1c:
+ # Implement the Opening Token Right flag (Type 2)..
+ # If requested, move an isolated trailing opening token to the end of
+ # the previous line which ended in a comma. We could do this
+ # in sub recombine_breakpoints but that would cause problems
+ # with -lp formatting. The problem is that indentation will
+ # quickly move far to the right in nested expressions. By
+ # doing it after indentation has been set, we avoid changes
+ # to the indentation. Actual movement of the token takes place
+ # in sub valign_output_step_B.
- # Look for assignment operators that could use a breakpoint.
- # For example, in the following snippet
- #
- # $HOME = $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # we could break at the = to get this, which is a little nicer:
- # $HOME =
- # $ENV{HOME}
- # || $ENV{LOGDIR}
- # || $pw[7]
- # || die "no home directory for user $<";
- #
- # The logic here follows the logic in set_logical_padding, which
- # will add the padding in the second line to improve alignment.
- #
- my ( $ri_left, $ri_right ) = @_;
- my $nmax = @{$ri_right} - 1;
- return unless ( $nmax >= 2 );
+ # Note added 4 May 2021: the man page suggests that the -otr flags
+ # are mainly for opening tokens following commas. But this seems
+ # to have been generalized long ago to include other situations.
+ # I checked the coding back to 2012 and it is essentially the same
+ # as here, so it is best to leave this unchanged for now.
+ #--------------------------------------------------------------
+ if (
+ $opening_token_right{ $tokens_to_go[$ibeg_next] }
- # scan the left ends of first two lines
- my $tokbeg = "";
- my $depth_beg;
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $typel = $types_to_go[$il];
- my $tokenl = $tokens_to_go[$il];
+ # previous line is not opening
+ # (use -sot to combine with it)
+ && !$is_opening_token{$token_end}
- my $has_leading_op = ( $tokenl =~ /^\w/ )
- ? $is_chain_operator{$tokenl} # + - * / : ? && ||
- : $is_chain_operator{$typel}; # and, or
- return unless ($has_leading_op);
- if ( $n > 1 ) {
- return
- unless ( $tokenl eq $tokbeg
- && $nesting_depth_to_go[$il] eq $depth_beg );
- }
- $tokbeg = $tokenl;
- $depth_beg = $nesting_depth_to_go[$il];
- }
+ # previous line ended in one of these
+ # (add other cases if necessary; '=>' and '.' are not necessary
+ && !$block_type_to_go[$ibeg_next]
- # now look for any interior tokens of the same types
- my $il = $ri_left->[0];
- my $ir = $ri_right->[0];
+ # this is a line with just an opening token
+ && ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
- # now make a list of all new break points
- my @insert_list;
- for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
- my $type = $types_to_go[$i];
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- if ( $want_break_before{$type} ) {
- push @insert_list, $i - 1;
- }
- else {
- push @insert_list, $i;
- }
- }
- }
+ # Fix for case b1060 when both -baoo and -otr are set:
+ # to avoid blinking, honor the -baoo flag over the -otr flag.
+ && $token_end ne '||' && $token_end ne '&&'
- # Break after a 'return' followed by a chain of operators
- # return ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- # To give:
- # return
- # ( $^O !~ /win32|dos/i )
- # && ( $^O ne 'VMS' )
- # && ( $^O ne 'OS2' )
- # && ( $^O ne 'MacOS' );
- my $i = 0;
- if ( $types_to_go[$i] eq 'k'
- && $tokens_to_go[$i] eq 'return'
- && $ir > $il
- && $nesting_depth_to_go[$i] eq $depth_beg )
- {
- push @insert_list, $i;
- }
+ # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
+ && !(
+ $token_end eq '='
+ && $rOpts_line_up_parentheses
+ && $self->[_rlp_object_by_seqno_]
+ ->{ $type_sequence_to_go[$ibeg_next] }
+ )
- return unless (@insert_list);
+ # looks bad if we align vertically with the wrong container
+ && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
+ )
+ {
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
- # One final check...
- # scan second and third lines and be sure there are no assignments
- # we want to avoid breaking at an = to make something like this:
- # unless ( $icon =
- # $html_icons{"$type-$state"}
- # or $icon = $html_icons{$type}
- # or $icon = $html_icons{$state} )
- for my $n ( 1 .. 2 ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- foreach my $i ( $il + 1 .. $ir ) {
- my $type = $types_to_go[$i];
- return
- if ( $is_assignment{$type}
- && $nesting_depth_to_go[$i] eq $depth_beg );
+ $vt_type = 2;
+ $vt_closing_flag = $spaces;
+ $vt_seqno = $type_sequence_to_go[$ibeg_next];
+ $vt_valid_flag = 1;
}
- }
- # ok, insert any new break point
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
- }
- return;
-}
-
-sub insert_final_breaks {
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1d:
+ # Stacking of opening and closing tokens (Type 2)
+ #--------------------------------------------------------------
+ my $stackable;
+ my $token_beg_next = $tokens_to_go[$ibeg_next];
- my ( $self, $ri_left, $ri_right ) = @_;
+ # patch to make something like 'qw(' behave like an opening paren
+ # (aran.t)
+ if ( $types_to_go[$ibeg_next] eq 'q' ) {
+ if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
+ $token_beg_next = $1;
+ }
+ }
- my $nmax = @{$ri_right} - 1;
+ if ( $is_closing_token{$token_end}
+ && $is_closing_token{$token_beg_next} )
+ {
- # scan the left and right end tokens of all lines
- my $count = 0;
- my $i_first_colon = -1;
- for my $n ( 0 .. $nmax ) {
- my $il = $ri_left->[$n];
- my $ir = $ri_right->[$n];
- my $typel = $types_to_go[$il];
- my $typer = $types_to_go[$ir];
- return if ( $typel eq '?' );
- return if ( $typer eq '?' );
- if ( $typel eq ':' ) { $i_first_colon = $il; last; }
- elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
- }
+ # avoid instability of combo -bom and -sct; b1179
+ my $seq_next = $type_sequence_to_go[$ibeg_next];
+ $stackable = $stack_closing_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next]
+ || $seq_next && $self->[_rwant_container_open_]->{$seq_next} );
+ }
+ elsif ($is_opening_token{$token_end}
+ && $is_opening_token{$token_beg_next} )
+ {
+ $stackable = $stack_opening_token{$token_beg_next}
+ unless ( $block_type_to_go[$ibeg_next] )
+ ; # shouldn't happen; just checking
+ }
- # For long ternary chains,
- # if the first : we see has its ? is in the interior
- # of a preceding line, then see if there are any good
- # breakpoints before the ?.
- if ( $i_first_colon > 0 ) {
- my $i_question = $mate_index_to_go[$i_first_colon];
- if ( $i_question > 0 ) {
- my @insert_list;
- for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
- my $token = $tokens_to_go[$ii];
- my $type = $types_to_go[$ii];
+ if ($stackable) {
- # For now, a good break is either a comma or,
- # in a long chain, a 'return'.
- # Patch for RT #126633: added the $nmax>1 check to avoid
- # breaking after a return for a simple ternary. For longer
- # chains the break after return allows vertical alignment, so
- # it is still done. So perltidy -wba='?' will not break
- # immediately after the return in the following statement:
- # sub x {
- # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
- # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
- # }
- if (
- (
- $type eq ','
- || $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
- )
- && $self->in_same_container_i( $ii, $i_question )
- )
- {
- push @insert_list, $ii;
- last;
- }
+ my $is_semicolon_terminated;
+ if ( $n + 1 == $n_last_line ) {
+ my ( $terminal_type, $i_terminal ) =
+ terminal_type_i( $ibeg_next, $iend_next );
+ $is_semicolon_terminated = $terminal_type eq ';'
+ && $nesting_depth_to_go[$iend_next] <
+ $nesting_depth_to_go[$ibeg_next];
}
- # insert any new break points
- if (@insert_list) {
- insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ # this must be a line with just an opening token
+ # or end in a semicolon
+ if (
+ $is_semicolon_terminated
+ || ( $iend_next == $ibeg_next
+ || $iend_next == $ibeg_next + 2
+ && $types_to_go[$iend_next] eq '#' )
+ )
+ {
+ my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
+
+ $vt_type = 2;
+ $vt_closing_flag = $spaces;
+ $vt_seqno = $type_sequence_to_go[$ibeg_next];
+ $vt_valid_flag = 1;
+
}
}
}
- return;
-}
-sub in_same_container_i {
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 2:
+ # Handle type 3, opening block braces on last line of the batch
+ # Check for a last line with isolated opening BLOCK curly
+ #--------------------------------------------------------------
+ elsif ($rOpts_block_brace_vertical_tightness
+ && $ibeg eq $iend
+ && $types_to_go[$iend] eq '{'
+ && $block_type_to_go[$iend] =~
+ /$block_brace_vertical_tightness_pattern/ )
+ {
+ $vt_type = 3;
+ $vt_opening_flag = $rOpts_block_brace_vertical_tightness;
+ $vt_seqno = 0;
+ $vt_valid_flag = 1;
+ }
- # check to see if tokens at i1 and i2 are in the
- # same container, and not separated by a comma, ? or :
- # This is an interface between the _to_go arrays to the rLL array
- my ( $self, $i1, $i2 ) = @_;
- return $self->in_same_container_K( $K_to_go[$i1], $K_to_go[$i2] );
-}
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 3:
+ # Handle type 4, a closing block brace on the last line of the batch Check
+ # for a last line with isolated closing BLOCK curly
+ # Patch: added a check for any new closing side comment which the
+ # -csc option may generate. If it exists, there will be a side comment
+ # so we cannot combine with a brace on the next line. This issue
+ # occurs for the combination -scbb and -csc is used.
+ #--------------------------------------------------------------
+ elsif ($rOpts_stack_closing_block_brace
+ && $ibeg eq $iend
+ && $block_type_to_go[$iend]
+ && $types_to_go[$iend] eq '}'
+ && ( !$closing_side_comment || $n < $n_last_line ) )
+ {
+ my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
-{ # sub in_same_container_K
- my $ris_break_token;
- my $ris_comma_token;
+ $vt_type = 4;
+ $vt_closing_flag = $spaces;
+ $vt_seqno = $type_sequence_to_go[$iend];
+ $vt_valid_flag = 1;
- BEGIN {
+ }
- # all cases break on seeing commas at same level
- my @q = qw( => );
- push @q, ',';
- @{$ris_comma_token}{@q} = (1) x scalar(@q);
+ # get the sequence numbers of the ends of this line
+ $vt_seqno_beg = $type_sequence_to_go[$ibeg];
+ if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) {
+ $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote );
+ }
- # Non-ternary text also breaks on seeing any of qw(? : || or )
- # Example: we would not want to break at any of these .'s
- # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
- push @q, qw( or || ? : );
- @{$ris_break_token}{@q} = (1) x scalar(@q);
+ $vt_seqno_end = $type_sequence_to_go[$iend];
+ if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) {
+ $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote );
}
- sub in_same_container_K {
+ RETURN:
- # Check to see if tokens at K1 and K2 are in the same container,
- # and not separated by certain characters: => , ? : || or
- # This version uses the newer $rLL data structure
+ my $rvertical_tightness_flags = {
+ _vt_type => $vt_type,
+ _vt_opening_flag => $vt_opening_flag,
+ _vt_closing_flag => $vt_closing_flag,
+ _vt_seqno => $vt_seqno,
+ _vt_valid_flag => $vt_valid_flag,
+ _vt_seqno_beg => $vt_seqno_beg,
+ _vt_seqno_end => $vt_seqno_end,
+ _vt_min_lines => $vt_min_lines,
+ _vt_max_lines => $vt_max_lines,
+ };
- my ( $self, $K1, $K2 ) = @_;
- if ( $K2 < $K1 ) { ( $K1, $K2 ) = ( $K2, $K1 ) }
- my $rLL = $self->{rLL};
- my $depth_1 = $rLL->[$K1]->[_SLEVEL_];
- return if ( $depth_1 < 0 );
- return unless ( $rLL->[$K2]->[_SLEVEL_] == $depth_1 );
+ return ($rvertical_tightness_flags);
+}
- # Select character set to scan for
- my $type_1 = $rLL->[$K1]->[_TYPE_];
- my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
+##########################################################
+# CODE SECTION 14: Code for creating closing side comments
+##########################################################
+
+{ ## begin closure accumulate_csc_text
+
+# These routines are called once per batch when the --closing-side-comments flag
+# has been set.
+
+ my %block_leading_text;
+ my %block_opening_line_number;
+ my $csc_new_statement_ok;
+ my $csc_last_label;
+ my %csc_block_label;
+ my $accumulating_text_for_block;
+ my $leading_block_text;
+ my $rleading_block_if_elsif_text;
+ my $leading_block_text_level;
+ my $leading_block_text_length_exceeded;
+ my $leading_block_text_line_length;
+ my $leading_block_text_line_number;
+
+ sub initialize_csc_vars {
+ %block_leading_text = ();
+ %block_opening_line_number = ();
+ $csc_new_statement_ok = 1;
+ $csc_last_label = "";
+ %csc_block_label = ();
+ $rleading_block_if_elsif_text = [];
+ $accumulating_text_for_block = "";
+ reset_block_text_accumulator();
+ return;
+ }
- # Fast preliminary loop to verify that tokens are in the same container
- my $KK = $K1;
- while (1) {
- $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
- last if !defined($KK);
- last if ( $KK >= $K2 );
- my $depth_K = $rLL->[$KK]->[_SLEVEL_];
- return if ( $depth_K < $depth_1 );
- next if ( $depth_K > $depth_1 );
- if ( $type_1 ne ':' ) {
- my $tok_K = $rLL->[$KK]->[_TOKEN_];
- return if ( $tok_K eq '?' || $tok_K eq ':' );
+ sub reset_block_text_accumulator {
+
+ # save text after 'if' and 'elsif' to append after 'else'
+ if ($accumulating_text_for_block) {
+
+ if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
+ push @{$rleading_block_if_elsif_text}, $leading_block_text;
}
}
+ $accumulating_text_for_block = "";
+ $leading_block_text = "";
+ $leading_block_text_level = 0;
+ $leading_block_text_length_exceeded = 0;
+ $leading_block_text_line_number = 0;
+ $leading_block_text_line_length = 0;
+ return;
+ }
- # Slow loop checking for certain characters
+ sub set_block_text_accumulator {
+ my ( $self, $i ) = @_;
+ $accumulating_text_for_block = $tokens_to_go[$i];
+ if ( $accumulating_text_for_block !~ /^els/ ) {
+ $rleading_block_if_elsif_text = [];
+ }
+ $leading_block_text = "";
+ $leading_block_text_level = $levels_to_go[$i];
+ $leading_block_text_line_number = $self->get_output_line_number();
+ $leading_block_text_length_exceeded = 0;
+
+ # this will contain the column number of the last character
+ # of the closing side comment
+ $leading_block_text_line_length =
+ length($csc_last_label) +
+ length($accumulating_text_for_block) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $leading_block_text_level * $rOpts_indent_columns + 3;
+ return;
+ }
- ###########################################################
- # This is potentially a slow routine and not critical.
- # For safety just give up for large differences.
- # See test file 'infinite_loop.txt'
- ###########################################################
- return if ( $K2 - $K1 > 200 );
+ sub accumulate_block_text {
+ my ( $self, $i ) = @_;
+
+ # accumulate leading text for -csc, ignoring any side comments
+ if ( $accumulating_text_for_block
+ && !$leading_block_text_length_exceeded
+ && $types_to_go[$i] ne '#' )
+ {
+
+ my $added_length = $token_lengths_to_go[$i];
+ $added_length += 1 if $i == 0;
+ my $new_line_length =
+ $leading_block_text_line_length + $added_length;
+
+ # we can add this text if we don't exceed some limits..
+ if (
+
+ # we must not have already exceeded the text length limit
+ length($leading_block_text) <
+ $rOpts_closing_side_comment_maximum_text
+
+ # and either:
+ # the new total line length must be below the line length limit
+ # or the new length must be below the text length limit
+ # (ie, we may allow one token to exceed the text length limit)
+ && (
+ $new_line_length <
+ $maximum_line_length_at_level[$leading_block_text_level]
+
+ || length($leading_block_text) + $added_length <
+ $rOpts_closing_side_comment_maximum_text
+ )
+
+ # UNLESS: we are adding a closing paren before the brace we seek.
+ # This is an attempt to avoid situations where the ... to be
+ # added are longer than the omitted right paren, as in:
+
+ # foreach my $item (@a_rather_long_variable_name_here) {
+ # &whatever;
+ # } ## end foreach my $item (@a_rather_long_variable_name_here...
+
+ || (
+ $tokens_to_go[$i] eq ')'
+ && (
+ (
+ $i + 1 <= $max_index_to_go
+ && $block_type_to_go[ $i + 1 ] eq
+ $accumulating_text_for_block
+ )
+ || ( $i + 2 <= $max_index_to_go
+ && $block_type_to_go[ $i + 2 ] eq
+ $accumulating_text_for_block )
+ )
+ )
+ )
+ {
+
+ # add an extra space at each newline
+ if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
+ $leading_block_text .= ' ';
+ }
- foreach my $K ( $K1 + 1 .. $K2 - 1 ) {
+ # add the token text
+ $leading_block_text .= $tokens_to_go[$i];
+ $leading_block_text_line_length = $new_line_length;
+ }
- my $depth_K = $rLL->[$K]->[_SLEVEL_];
- next if ( $depth_K > $depth_1 );
- return if ( $depth_K < $depth_1 ); # redundant, checked above
- my $tok = $rLL->[$K]->[_TOKEN_];
- return if ( $rbreak->{$tok} );
+ # show that text was truncated if necessary
+ elsif ( $types_to_go[$i] ne 'b' ) {
+ $leading_block_text_length_exceeded = 1;
+ $leading_block_text .= '...';
+ }
}
- return 1;
+ return;
}
-}
-sub set_continuation_breaks {
+ sub accumulate_csc_text {
- # Define an array of indexes for inserting newline characters to
- # keep the line lengths below the maximum desired length. There is
- # an implied break after the last token, so it need not be included.
+ my ($self) = @_;
- # Method:
- # This routine is part of series of routines which adjust line
- # lengths. It is only called if a statement is longer than the
- # maximum line length, or if a preliminary scanning located
- # desirable break points. Sub scan_list has already looked at
- # these tokens and set breakpoints (in array
- # $forced_breakpoint_to_go[$i]) where it wants breaks (for example
- # after commas, after opening parens, and before closing parens).
- # This routine will honor these breakpoints and also add additional
- # breakpoints as necessary to keep the line length below the maximum
- # requested. It bases its decision on where the 'bond strength' is
- # lowest.
+ # called once per output buffer when -csc is used. Accumulates
+ # the text placed after certain closing block braces.
+ # Defines and returns the following for this buffer:
- # Output: returns references to the arrays:
- # @i_first
- # @i_last
- # which contain the indexes $i of the first and last tokens on each
- # line.
+ my $block_leading_text = ""; # the leading text of the last '}'
+ my $rblock_leading_if_elsif_text;
+ my $i_block_leading_text =
+ -1; # index of token owning block_leading_text
+ my $block_line_count = 100; # how many lines the block spans
+ my $terminal_type = 'b'; # type of last nonblank token
+ my $i_terminal = 0; # index of last nonblank token
+ my $terminal_block_type = "";
- # In addition, the array:
- # $forced_breakpoint_to_go[$i]
- # may be updated to be =1 for any index $i after which there must be
- # a break. This signals later routines not to undo the breakpoint.
+ # update most recent statement label
+ $csc_last_label = "" unless ($csc_last_label);
+ if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
+ my $block_label = $csc_last_label;
- my ( $self, $saw_good_break ) = @_;
- my @i_first = (); # the first index to output
- my @i_last = (); # the last index to output
- my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
- if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
+ # Loop over all tokens of this batch
+ for my $i ( 0 .. $max_index_to_go ) {
+ my $type = $types_to_go[$i];
+ my $block_type = $block_type_to_go[$i];
+ my $token = $tokens_to_go[$i];
- set_bond_strengths();
+ # remember last nonblank token type
+ if ( $type ne '#' && $type ne 'b' ) {
+ $terminal_type = $type;
+ $terminal_block_type = $block_type;
+ $i_terminal = $i;
+ }
- my $imin = 0;
- my $imax = $max_index_to_go;
- if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
- if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
- my $i_begin = $imin; # index for starting next iteration
+ my $type_sequence = $type_sequence_to_go[$i];
+ if ( $block_type && $type_sequence ) {
- my $leading_spaces = leading_spaces_to_go($imin);
- my $line_count = 0;
- my $last_break_strength = NO_BREAK;
- my $i_last_break = -1;
- my $max_bias = 0.001;
- my $tiny_bias = 0.0001;
- my $leading_alignment_token = "";
- my $leading_alignment_type = "";
+ if ( $token eq '}' ) {
- # see if any ?/:'s are in order
- my $colons_in_order = 1;
- my $last_tok = "";
- my @colon_list = grep { /^[\?\:]$/ } @types_to_go[ 0 .. $max_index_to_go ];
- my $colon_count = @colon_list;
- foreach (@colon_list) {
- if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
- $last_tok = $_;
- }
+ # restore any leading text saved when we entered this block
+ if ( defined( $block_leading_text{$type_sequence} ) ) {
+ ( $block_leading_text, $rblock_leading_if_elsif_text )
+ = @{ $block_leading_text{$type_sequence} };
+ $i_block_leading_text = $i;
+ delete $block_leading_text{$type_sequence};
+ $rleading_block_if_elsif_text =
+ $rblock_leading_if_elsif_text;
+ }
- # This is a sufficient but not necessary condition for colon chain
- my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
+ if ( defined( $csc_block_label{$type_sequence} ) ) {
+ $block_label = $csc_block_label{$type_sequence};
+ delete $csc_block_label{$type_sequence};
+ }
- #-------------------------------------------------------
- # BEGINNING of main loop to set continuation breakpoints
- # Keep iterating until we reach the end
- #-------------------------------------------------------
- while ( $i_begin <= $imax ) {
- my $lowest_strength = NO_BREAK;
- my $starting_sum = $summed_lengths_to_go[$i_begin];
- my $i_lowest = -1;
- my $i_test = -1;
- my $lowest_next_token = '';
- my $lowest_next_type = 'b';
- my $i_lowest_next_nonblank = -1;
+ # if we run into a '}' then we probably started accumulating
+ # at something like a trailing 'if' clause..no harm done.
+ if ( $accumulating_text_for_block
+ && $levels_to_go[$i] <= $leading_block_text_level )
+ {
+ my $lev = $levels_to_go[$i];
+ reset_block_text_accumulator();
+ }
- #-------------------------------------------------------
- # BEGINNING of inner loop to find the best next breakpoint
- #-------------------------------------------------------
- for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank = $inext_to_go[$i_test];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
- my $strength = $bond_strength_to_go[$i_test];
- my $maximum_line_length = maximum_line_length($i_begin);
+ if ( defined( $block_opening_line_number{$type_sequence} ) )
+ {
+ my $output_line_number =
+ $self->get_output_line_number();
+ $block_line_count =
+ $output_line_number -
+ $block_opening_line_number{$type_sequence} + 1;
+ delete $block_opening_line_number{$type_sequence};
+ }
+ else {
+
+ # Error: block opening line undefined for this line..
+ # This shouldn't be possible, but it is not a
+ # significant problem.
+ }
+ }
- # use old breaks as a tie-breaker. For example to
- # prevent blinkers with -pbp in this code:
+ elsif ( $token eq '{' ) {
-##@keywords{
-## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
-## = ();
+ my $line_number = $self->get_output_line_number();
+ $block_opening_line_number{$type_sequence} = $line_number;
- # At the same time try to prevent a leading * in this code
- # with the default formatting:
- #
-## return
-## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
-## * ( $x**( $a - 1 ) )
-## * ( ( 1 - $x )**( $b - 1 ) );
+ # set a label for this block, except for
+ # a bare block which already has the label
+ # A label can only be used on the next {
+ if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
+ $csc_block_label{$type_sequence} = $csc_last_label;
+ $csc_last_label = "";
- # reduce strength a bit to break ties at an old breakpoint ...
- if (
- $old_breakpoint_to_go[$i_test]
+ if ( $accumulating_text_for_block
+ && $levels_to_go[$i] == $leading_block_text_level )
+ {
+
+ if ( $accumulating_text_for_block eq $block_type ) {
- # which is a 'good' breakpoint, meaning ...
- # we don't want to break before it
- && !$want_break_before{$type}
+ # save any leading text before we enter this block
+ $block_leading_text{$type_sequence} = [
+ $leading_block_text,
+ $rleading_block_if_elsif_text
+ ];
+ $block_opening_line_number{$type_sequence} =
+ $leading_block_text_line_number;
+ reset_block_text_accumulator();
+ }
+ else {
- # and either we want to break before the next token
- # or the next token is not short (i.e. not a '*', '/' etc.)
- && $i_next_nonblank <= $imax
- && ( $want_break_before{$next_nonblank_type}
- || $token_lengths_to_go[$i_next_nonblank] > 2
- || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
- )
- {
- $strength -= $tiny_bias;
+ # shouldn't happen, but not a serious error.
+ # We were accumulating -csc text for block type
+ # $accumulating_text_for_block and unexpectedly
+ # encountered a '{' for block type $block_type.
+ }
+ }
+ }
}
- # otherwise increase strength a bit if this token would be at the
- # maximum line length. This is necessary to avoid blinking
- # in the above example when the -iob flag is added.
+ if ( $type eq 'k'
+ && $csc_new_statement_ok
+ && $is_if_elsif_else_unless_while_until_for_foreach{$token}
+ && $token =~ /$closing_side_comment_list_pattern/ )
+ {
+ $self->set_block_text_accumulator($i);
+ }
else {
- my $len =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum;
- if ( $len >= $maximum_line_length ) {
- $strength += $tiny_bias;
+
+ # note: ignoring type 'q' because of tricks being played
+ # with 'q' for hanging side comments
+ if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
+ $csc_new_statement_ok =
+ ( $block_type || $type eq 'J' || $type eq ';' );
+ }
+ if ( $type eq ';'
+ && $accumulating_text_for_block
+ && $levels_to_go[$i] == $leading_block_text_level )
+ {
+ reset_block_text_accumulator();
+ }
+ else {
+ $self->accumulate_block_text($i);
}
}
+ }
- my $must_break = 0;
+ # Treat an 'else' block specially by adding preceding 'if' and
+ # 'elsif' text. Otherwise, the 'end else' is not helpful,
+ # especially for cuddled-else formatting.
+ if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
+ $block_leading_text =
+ $self->make_else_csc_text( $i_terminal, $terminal_block_type,
+ $block_leading_text, $rblock_leading_if_elsif_text );
+ }
- # Force an immediate break at certain operators
- # with lower level than the start of the line,
- # unless we've already seen a better break.
- #
- ##############################################
- # Note on an issue with a preceding ?
- ##############################################
- # We don't include a ? in the above list, but there may
- # be a break at a previous ? if the line is long.
- # Because of this we do not want to force a break if
- # there is a previous ? on this line. For now the best way
- # to do this is to not break if we have seen a lower strength
- # point, which is probably a ?.
- #
- # Example of unwanted breaks we are avoiding at a '.' following a ?
- # from pod2html using perltidy -gnu:
- # )
- # ? "\n<A NAME=\""
- # . $value
- # . "\">\n$text</A>\n"
- # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
- if (
- (
- $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
- || ( $next_nonblank_type eq 'k'
- && $next_nonblank_token =~ /^(and|or)$/ )
- )
- && ( $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_next_nonblank] )
- && ( $strength <= $lowest_strength )
- )
- {
- set_forced_breakpoint($i_next_nonblank);
- }
+ # if this line ends in a label then remember it for the next pass
+ $csc_last_label = "";
+ if ( $terminal_type eq 'J' ) {
+ $csc_last_label = $tokens_to_go[$i_terminal];
+ }
- if (
+ return ( $terminal_type, $i_terminal, $i_block_leading_text,
+ $block_leading_text, $block_line_count, $block_label );
+ }
- # Try to put a break where requested by scan_list
- $forced_breakpoint_to_go[$i_test]
+ sub make_else_csc_text {
- # break between ) { in a continued line so that the '{' can
- # be outdented
- # See similar logic in scan_list which catches instances
- # where a line is just something like ') {'. We have to
- # be careful because the corresponding block keyword might
- # not be on the first line, such as 'for' here:
- #
- # eval {
- # for ("a") {
- # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
- # }
- # };
- #
- || (
- $line_count
- && ( $token eq ')' )
- && ( $next_nonblank_type eq '{' )
- && ($next_nonblank_block_type)
- && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
+ # create additional -csc text for an 'else' and optionally 'elsif',
+ # depending on the value of switch
+ #
+ # = 0 add 'if' text to trailing else
+ # = 1 same as 0 plus:
+ # add 'if' to 'elsif's if can fit in line length
+ # add last 'elsif' to trailing else if can fit in one line
+ # = 2 same as 1 but do not check if exceed line length
+ #
+ # $rif_elsif_text = a reference to a list of all previous closing
+ # side comments created for this if block
+ #
+ my ( $self, $i_terminal, $block_type, $block_leading_text,
+ $rif_elsif_text )
+ = @_;
+ my $csc_text = $block_leading_text;
- # RT #104427: Dont break before opening sub brace because
- # sub block breaks handled at higher level, unless
- # it looks like the preceding list is long and broken
- && !(
- $next_nonblank_block_type =~ /^sub\b/
- && ( $nesting_depth_to_go[$i_begin] ==
- $nesting_depth_to_go[$i_next_nonblank] )
- )
+ if ( $block_type eq 'elsif'
+ && $rOpts_closing_side_comment_else_flag == 0 )
+ {
+ return $csc_text;
+ }
- && !$rOpts->{'opening-brace-always-on-right'}
- )
+ my $count = @{$rif_elsif_text};
+ return $csc_text unless ($count);
- # There is an implied forced break at a terminal opening brace
- || ( ( $type eq '{' ) && ( $i_test == $imax ) )
- )
- {
+ my $if_text = '[ if' . $rif_elsif_text->[0];
- # Forced breakpoints must sometimes be overridden, for example
- # because of a side comment causing a NO_BREAK. It is easier
- # to catch this here than when they are set.
- if ( $strength < NO_BREAK - 1 ) {
- $strength = $lowest_strength - $tiny_bias;
- $must_break = 1;
- }
- }
+ # always show the leading 'if' text on 'else'
+ if ( $block_type eq 'else' ) {
+ $csc_text .= $if_text;
+ }
- # quit if a break here would put a good terminal token on
- # the next line and we already have a possible break
- if (
- !$must_break
- && ( $next_nonblank_type =~ /^[\;\,]$/ )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
- $starting_sum
- ) > $maximum_line_length
- )
- )
- {
- last if ( $i_lowest >= 0 );
- }
+ # see if that's all
+ if ( $rOpts_closing_side_comment_else_flag == 0 ) {
+ return $csc_text;
+ }
- # Avoid a break which would strand a single punctuation
- # token. For example, we do not want to strand a leading
- # '.' which is followed by a long quoted string.
- # But note that we do want to do this with -extrude (l=1)
- # so please test any changes to this code on -extrude.
- if (
- !$must_break
- && ( $i_test == $i_begin )
- && ( $i_test < $imax )
- && ( $token eq $type )
- && (
- (
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 1 ] -
- $starting_sum
- ) < $maximum_line_length
- )
- )
- {
- $i_test = min( $imax, $inext_to_go[$i_test] );
- redo;
- }
+ my $last_elsif_text = "";
+ if ( $count > 1 ) {
+ $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
+ if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
+ }
- if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
- {
+ # tentatively append one more item
+ my $saved_text = $csc_text;
+ if ( $block_type eq 'else' ) {
+ $csc_text .= $last_elsif_text;
+ }
+ else {
+ $csc_text .= ' ' . $if_text;
+ }
- # break at previous best break if it would have produced
- # a leading alignment of certain common tokens, and it
- # is different from the latest candidate break
- last
- if ($leading_alignment_type);
+ # all done if no length checks requested
+ if ( $rOpts_closing_side_comment_else_flag == 2 ) {
+ return $csc_text;
+ }
- # Force at least one breakpoint if old code had good
- # break It is only called if a breakpoint is required or
- # desired. This will probably need some adjustments
- # over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formatted text, then
- # the same breakpoints will occur. scbreak.t
- last
- if (
- $i_test == $imax # we are at the end
- && !$forced_breakpoint_count #
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest >= 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
- && $strength - $lowest_strength < 0.5 * WEAK # and it's good
- );
+ # undo it if line length exceeded
+ my $length =
+ length($csc_text) +
+ length($block_type) +
+ length( $rOpts->{'closing-side-comment-prefix'} ) +
+ $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
+ if (
+ $length > $maximum_line_length_at_level[$leading_block_text_level] )
+ {
+ $csc_text = $saved_text;
+ }
+ return $csc_text;
+ }
+} ## end closure accumulate_csc_text
- # Do not skip past an important break point in a short final
- # segment. For example, without this check we would miss the
- # break at the final / in the following code:
- #
- # $depth_stop =
- # ( $tau * $mass_pellet * $q_0 *
- # ( 1. - exp( -$t_stop / $tau ) ) -
- # 4. * $pi * $factor * $k_ice *
- # ( $t_melt - $t_ice ) *
- # $r_pellet *
- # $t_stop ) /
- # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
- #
- if ( $line_count > 2
- && $i_lowest < $i_test
- && $i_test > $imax - 2
- && $nesting_depth_to_go[$i_begin] >
- $nesting_depth_to_go[$i_lowest]
- && $lowest_strength < $last_break_strength - .5 * WEAK )
- {
- # Make this break for math operators for now
- my $ir = $inext_to_go[$i_lowest];
- my $il = $iprev_to_go[$ir];
- last
- if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
- || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
- }
+{ ## begin closure balance_csc_text
- # Update the minimum bond strength location
- $lowest_strength = $strength;
- $i_lowest = $i_test;
- $lowest_next_token = $next_nonblank_token;
- $lowest_next_type = $next_nonblank_type;
- $i_lowest_next_nonblank = $i_next_nonblank;
- last if $must_break;
+ # Some additional routines for handling the --closing-side-comments option
+
+ my %matching_char;
+
+ BEGIN {
+ %matching_char = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ );
+ }
+
+ sub balance_csc_text {
+
+ # Append characters to balance a closing side comment so that editors
+ # such as vim can correctly jump through code.
+ # Simple Example:
+ # input = ## end foreach my $foo ( sort { $b ...
+ # output = ## end foreach my $foo ( sort { $b ...})
- # set flags to remember if a break here will produce a
- # leading alignment of certain common tokens
- if ( $line_count > 0
- && $i_test < $imax
- && ( $lowest_strength - $last_break_strength <= $max_bias )
- )
- {
- my $i_last_end = $iprev_to_go[$i_begin];
- my $tok_beg = $tokens_to_go[$i_begin];
- my $type_beg = $types_to_go[$i_begin];
- if (
+ # NOTE: This routine does not currently filter out structures within
+ # quoted text because the bounce algorithms in text editors do not
+ # necessarily do this either (a version of vim was checked and
+ # did not do this).
- # check for leading alignment of certain tokens
- (
- $tok_beg eq $next_nonblank_token
- && $is_chain_operator{$tok_beg}
- && ( $type_beg eq 'k'
- || $type_beg eq $tok_beg )
- && $nesting_depth_to_go[$i_begin] >=
- $nesting_depth_to_go[$i_next_nonblank]
- )
+ # Some complex examples which will cause trouble for some editors:
+ # while ( $mask_string =~ /\{[^{]*?\}/g ) {
+ # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
+ # if ( $1 eq '{' ) {
+ # test file test1/braces.pl has many such examples.
- || ( $tokens_to_go[$i_last_end] eq $token
- && $is_chain_operator{$token}
- && ( $type eq 'k' || $type eq $token )
- && $nesting_depth_to_go[$i_last_end] >=
- $nesting_depth_to_go[$i_test] )
- )
- {
- $leading_alignment_token = $next_nonblank_token;
- $leading_alignment_type = $next_nonblank_type;
- }
- }
- }
+ my ($csc) = @_;
- my $too_long = ( $i_test >= $imax );
- if ( !$too_long ) {
- my $next_length =
- $leading_spaces +
- $summed_lengths_to_go[ $i_test + 2 ] -
- $starting_sum;
- $too_long = $next_length > $maximum_line_length;
+ # loop to examine characters one-by-one, RIGHT to LEFT and
+ # build a balancing ending, LEFT to RIGHT.
+ for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
- # To prevent blinkers we will avoid leaving a token exactly at
- # the line length limit unless it is the last token or one of
- # several "good" types.
- #
- # The following code was a blinker with -pbp before this
- # modification:
-## $last_nonblank_token eq '('
-## && $is_indirect_object_taker{ $paren_type
-## [$paren_depth] }
- # The issue causing the problem is that if the
- # term [$paren_depth] gets broken across a line then
- # the whitespace routine doesn't see both opening and closing
- # brackets and will format like '[ $paren_depth ]'. This
- # leads to an oscillation in length depending if we break
- # before the closing bracket or not.
- if ( !$too_long
- && $i_test + 1 < $imax
- && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
- {
- $too_long = $next_length >= $maximum_line_length;
- }
- }
+ my $char = substr( $csc, $pos, 1 );
- FORMATTER_DEBUG_FLAG_BREAK
- && do {
- my $ltok = $token;
- my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
- my $i_testp2 = $i_test + 2;
- if ( $i_testp2 > $max_index_to_go + 1 ) {
- $i_testp2 = $max_index_to_go + 1;
- }
- if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
- if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
- print STDOUT
-"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
- };
+ # ignore everything except structural characters
+ next unless ( $matching_char{$char} );
- # allow one extra terminal token after exceeding line length
- # if it would strand this token.
- if ( $rOpts_fuzzy_line_length
- && $too_long
- && $i_lowest == $i_test
- && $token_lengths_to_go[$i_test] > 1
- && $next_nonblank_type =~ /^[\;\,]$/ )
- {
- $too_long = 0;
- }
+ # pop most recently appended character
+ my $top = chop($csc);
- last
- if (
- ( $i_test == $imax ) # we're done if no more tokens,
- || (
- ( $i_lowest >= 0 ) # or no more space and we have a break
- && $too_long
- )
- );
+ # push it back plus the mate to the newest character
+ # unless they balance each other.
+ $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
}
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint
- # Now decide exactly where to put the breakpoint
- #-------------------------------------------------------
+ # return the balanced string
+ return $csc;
+ }
+} ## end closure balance_csc_text
- # it's always ok to break at imax if no other break was found
- if ( $i_lowest < 0 ) { $i_lowest = $imax }
+sub add_closing_side_comment {
- # semi-final index calculation
- my $i_next_nonblank = $inext_to_go[$i_lowest];
- my $next_nonblank_type = $types_to_go[$i_next_nonblank];
- my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ my ( $self, $ri_first, $ri_last ) = @_;
+ my $rLL = $self->[_rLL_];
- #-------------------------------------------------------
- # ?/: rule 1 : if a break here will separate a '?' on this
- # line from its closing ':', then break at the '?' instead.
- #-------------------------------------------------------
- foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
- next unless ( $tokens_to_go[$i] eq '?' );
+ # add closing side comments after closing block braces if -csc used
+ my ( $closing_side_comment, $cscw_block_comment );
- # do not break if probable sequence of ?/: statements
- next if ($is_colon_chain);
+ #---------------------------------------------------------------
+ # Step 1: loop through all tokens of this line to accumulate
+ # the text needed to create the closing side comments. Also see
+ # how the line ends.
+ #---------------------------------------------------------------
- # do not break if statement is broken by side comment
- next
- if ( $tokens_to_go[$max_index_to_go] eq '#'
- && $self->terminal_type_i( 0, $max_index_to_go ) !~
- /^[\;\}]$/ );
+ my ( $terminal_type, $i_terminal, $i_block_leading_text,
+ $block_leading_text, $block_line_count, $block_label )
+ = $self->accumulate_csc_text();
- # no break needed if matching : is also on the line
- next
- if ( $mate_index_to_go[$i] >= 0
- && $mate_index_to_go[$i] <= $i_next_nonblank );
+ #---------------------------------------------------------------
+ # Step 2: make the closing side comment if this ends a block
+ #---------------------------------------------------------------
+ my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
- $i_lowest = $i;
- if ( $want_break_before{'?'} ) { $i_lowest-- }
- last;
- }
+ # if this line might end in a block closure..
+ if (
+ $terminal_type eq '}'
- #-------------------------------------------------------
- # END of inner loop to find the best next breakpoint:
- # Break the line after the token with index i=$i_lowest
- #-------------------------------------------------------
+ # Fix 1 for c091, this is only for blocks
+ && $block_type_to_go[$i_terminal]
- # final index calculation
- $i_next_nonblank = $inext_to_go[$i_lowest];
- $next_nonblank_type = $types_to_go[$i_next_nonblank];
- $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
+ # ..and either
+ && (
- FORMATTER_DEBUG_FLAG_BREAK
- && print STDOUT
- "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
+ # the block is long enough
+ ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
- #-------------------------------------------------------
- # ?/: rule 2 : if we break at a '?', then break at its ':'
- #
- # Note: this rule is also in sub scan_list to handle a break
- # at the start and end of a line (in case breaks are dictated
- # by side comments).
- #-------------------------------------------------------
- if ( $next_nonblank_type eq '?' ) {
- set_closing_breakpoint($i_next_nonblank);
- }
- elsif ( $types_to_go[$i_lowest] eq '?' ) {
- set_closing_breakpoint($i_lowest);
- }
+ # or there is an existing comment to check
+ || ( $have_side_comment
+ && $rOpts->{'closing-side-comment-warnings'} )
+ )
- #-------------------------------------------------------
- # ?/: rule 3 : if we break at a ':' then we save
- # its location for further work below. We may need to go
- # back and break at its '?'.
- #-------------------------------------------------------
- if ( $next_nonblank_type eq ':' ) {
- push @i_colon_breaks, $i_next_nonblank;
- }
- elsif ( $types_to_go[$i_lowest] eq ':' ) {
- push @i_colon_breaks, $i_lowest;
+ # .. and if this is one of the types of interest
+ && $block_type_to_go[$i_terminal] =~
+ /$closing_side_comment_list_pattern/
+
+ # .. but not an anonymous sub
+ # These are not normally of interest, and their closing braces are
+ # often followed by commas or semicolons anyway. This also avoids
+ # possible erratic output due to line numbering inconsistencies
+ # in the cases where their closing braces terminate a line.
+ && $block_type_to_go[$i_terminal] ne 'sub'
+
+ # ..and the corresponding opening brace must is not in this batch
+ # (because we do not need to tag one-line blocks, although this
+ # should also be caught with a positive -csci value)
+ && $mate_index_to_go[$i_terminal] < 0
+
+ # ..and either
+ && (
+
+ # this is the last token (line doesn't have a side comment)
+ !$have_side_comment
+
+ # or the old side comment is a closing side comment
+ || $tokens_to_go[$max_index_to_go] =~
+ /$closing_side_comment_prefix_pattern/
+ )
+ )
+ {
+
+ # then make the closing side comment text
+ if ($block_label) { $block_label .= " " }
+ my $token =
+"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
+
+ # append any extra descriptive text collected above
+ if ( $i_block_leading_text == $i_terminal ) {
+ $token .= $block_leading_text;
}
- # here we should set breaks for all '?'/':' pairs which are
- # separated by this line
+ $token = balance_csc_text($token)
+ if $rOpts->{'closing-side-comments-balanced'};
- $line_count++;
+ $token =~ s/\s*$//; # trim any trailing whitespace
- # save this line segment, after trimming blanks at the ends
- push( @i_first,
- ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
- push( @i_last,
- ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
+ # handle case of existing closing side comment
+ if ($have_side_comment) {
+
+ # warn if requested and tokens differ significantly
+ if ( $rOpts->{'closing-side-comment-warnings'} ) {
+ my $old_csc = $tokens_to_go[$max_index_to_go];
+ my $new_csc = $token;
+ $new_csc =~ s/\s+//g; # trim all whitespace
+ $old_csc =~ s/\s+//g; # trim all whitespace
+ $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
+ my $new_trailing_dots = $1;
+ $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
- # set a forced breakpoint at a container opening, if necessary, to
- # signal a break at a closing container. Excepting '(' for now.
- if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
- && !$forced_breakpoint_to_go[$i_lowest] )
- {
- set_closing_breakpoint($i_lowest);
- }
+ # Patch to handle multiple closing side comments at
+ # else and elsif's. These have become too complicated
+ # to check, so if we see an indication of
+ # '[ if' or '[ # elsif', then assume they were made
+ # by perltidy.
+ if ( $block_type_to_go[$i_terminal] eq 'else' ) {
+ if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
+ }
+ elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
+ if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
+ }
- # get ready to go again
- $i_begin = $i_lowest + 1;
- $last_break_strength = $lowest_strength;
- $i_last_break = $i_lowest;
- $leading_alignment_token = "";
- $leading_alignment_type = "";
- $lowest_next_token = '';
- $lowest_next_type = 'b';
+ # if old comment is contained in new comment,
+ # only compare the common part.
+ if ( length($new_csc) > length($old_csc) ) {
+ $new_csc = substr( $new_csc, 0, length($old_csc) );
+ }
- if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
- $i_begin++;
- }
+ # if the new comment is shorter and has been limited,
+ # only compare the common part.
+ if ( length($new_csc) < length($old_csc)
+ && $new_trailing_dots )
+ {
+ $old_csc = substr( $old_csc, 0, length($new_csc) );
+ }
- # update indentation size
- if ( $i_begin <= $imax ) {
- $leading_spaces = leading_spaces_to_go($i_begin);
- }
- }
+ # any remaining difference?
+ if ( $new_csc ne $old_csc ) {
- #-------------------------------------------------------
- # END of main loop to set continuation breakpoints
- # Now go back and make any necessary corrections
- #-------------------------------------------------------
+ # just leave the old comment if we are below the threshold
+ # for creating side comments
+ if ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ $token = undef;
+ }
- #-------------------------------------------------------
- # ?/: rule 4 -- if we broke at a ':', then break at
- # corresponding '?' unless this is a chain of ?: expressions
- #-------------------------------------------------------
- if (@i_colon_breaks) {
+ # otherwise we'll make a note of it
+ else {
- # using a simple method for deciding if we are in a ?/: chain --
- # this is a chain if it has multiple ?/: pairs all in order;
- # otherwise not.
- # Note that if line starts in a ':' we count that above as a break
- my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
+ warning(
+"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
+ );
- unless ($is_chain) {
- my @insert_list = ();
- foreach (@i_colon_breaks) {
- my $i_question = $mate_index_to_go[$_];
- if ( $i_question >= 0 ) {
- if ( $want_break_before{'?'} ) {
- $i_question = $iprev_to_go[$i_question];
+ # save the old side comment in a new trailing block
+ # comment
+ my $timestamp = "";
+ if ( $rOpts->{'timestamp'} ) {
+ my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
+ $year += 1900;
+ $month += 1;
+ $timestamp = "$year-$month-$day";
+ }
+ $cscw_block_comment =
+"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
+## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
}
+ }
+ else {
- if ( $i_question >= 0 ) {
- push @insert_list, $i_question;
+ # No differences.. we can safely delete old comment if we
+ # are below the threshold
+ if ( $block_line_count <
+ $rOpts->{'closing-side-comment-interval'} )
+ {
+ # Since the line breaks have already been set, we have
+ # to remove the token from the _to_go array and also
+ # from the line range (this fixes issue c081).
+ # Note that we can only get here if -cscw has been set
+ # because otherwise the old comment is already deleted.
+ $token = undef;
+ my $ibeg = $ri_first->[-1];
+ my $iend = $ri_last->[-1];
+ if ( $iend > $ibeg
+ && $iend == $max_index_to_go
+ && $types_to_go[$max_index_to_go] eq '#' )
+ {
+ $iend--;
+ $max_index_to_go--;
+ if ( $iend > $ibeg
+ && $types_to_go[$max_index_to_go] eq 'b' )
+ {
+ $iend--;
+ $max_index_to_go--;
+ }
+ $ri_last->[-1] = $iend;
+ }
}
}
- insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
}
+
+ # switch to the new csc (unless we deleted it!)
+ if ($token) {
+
+ my $len_tok = length($token); # NOTE: length no longer important
+ my $added_len =
+ $len_tok - $token_lengths_to_go[$max_index_to_go];
+
+ $tokens_to_go[$max_index_to_go] = $token;
+ $token_lengths_to_go[$max_index_to_go] = $len_tok;
+ my $K = $K_to_go[$max_index_to_go];
+ $rLL->[$K]->[_TOKEN_] = $token;
+ $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
+ }
+ }
+
+ # handle case of NO existing closing side comment
+ else {
+
+ # To avoid inserting a new token in the token arrays, we
+ # will just return the new side comment so that it can be
+ # inserted just before it is needed in the call to the
+ # vertical aligner.
+ $closing_side_comment = $token;
}
}
- return ( \@i_first, \@i_last, $colon_count );
+ return ( $closing_side_comment, $cscw_block_comment );
}
-sub insert_additional_breaks {
+############################
+# CODE SECTION 15: Summarize
+############################
- # this routine will add line breaks at requested locations after
- # sub set_continuation_breaks has made preliminary breaks.
+sub wrapup {
- my ( $ri_break_list, $ri_first, $ri_last ) = @_;
- my $i_f;
- my $i_l;
- my $line_number = 0;
- foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
+ # This is the last routine called when a file is formatted.
+ # Flush buffer and write any informative messages
+ my $self = shift;
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
- while ( $i_break_left >= $i_l ) {
- $line_number++;
+ $self->flush();
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->decrement_output_line_number()
+ ; # fix up line number since it was incremented
+ we_are_at_the_last_line();
- # shouldn't happen unless caller passes bad indexes
- if ( $line_number >= @{$ri_last} ) {
- warning(
-"Non-fatal program bug: couldn't set break at $i_break_left\n"
- );
- report_definite_bug();
- return;
- }
- $i_f = $ri_first->[$line_number];
- $i_l = $ri_last->[$line_number];
+ my $max_depth = $self->[_maximum_BLOCK_level_];
+ my $at_line = $self->[_maximum_BLOCK_level_at_line_];
+ write_logfile_entry(
+"Maximum leading structural depth is $max_depth in input at line $at_line\n"
+ );
+
+ my $added_semicolon_count = $self->[_added_semicolon_count_];
+ my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
+ my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
+
+ if ( $added_semicolon_count > 0 ) {
+ my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
+ my $what =
+ ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
+ write_logfile_entry("$added_semicolon_count $what added:\n");
+ write_logfile_entry(
+ " $first at input line $first_added_semicolon_at\n");
+
+ if ( $added_semicolon_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_added_semicolon_at\n");
}
+ write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
+ write_logfile_entry("\n");
+ }
- # Do not leave a blank at the end of a line; back up if necessary
- if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+ my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
+ my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
+ my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
+ if ( $deleted_semicolon_count > 0 ) {
+ my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
+ my $what =
+ ( $deleted_semicolon_count > 1 )
+ ? "semicolons were"
+ : "semicolon was";
+ write_logfile_entry(
+ "$deleted_semicolon_count unnecessary $what deleted:\n");
+ write_logfile_entry(
+ " $first at input line $first_deleted_semicolon_at\n");
- my $i_break_right = $inext_to_go[$i_break_left];
- if ( $i_break_left >= $i_f
- && $i_break_left < $i_l
- && $i_break_right > $i_f
- && $i_break_right <= $i_l )
- {
- splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
- splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
+ if ( $deleted_semicolon_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_deleted_semicolon_at\n");
}
+ write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
+ write_logfile_entry("\n");
}
- return;
-}
-sub set_closing_breakpoint {
+ my $embedded_tab_count = $self->[_embedded_tab_count_];
+ my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
+ my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
+ if ( $embedded_tab_count > 0 ) {
+ my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
+ my $what =
+ ( $embedded_tab_count > 1 )
+ ? "quotes or patterns"
+ : "quote or pattern";
+ write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
+ write_logfile_entry(
+"This means the display of this script could vary with device or software\n"
+ );
+ write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
+
+ if ( $embedded_tab_count > 1 ) {
+ write_logfile_entry(
+ " Last at input line $last_embedded_tab_at\n");
+ }
+ write_logfile_entry("\n");
+ }
- # set a breakpoint at a matching closing token
- # at present, this is only used to break at a ':' which matches a '?'
- my $i_break = shift;
+ my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
+ my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
+ my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_];
+ my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
- if ( $mate_index_to_go[$i_break] >= 0 ) {
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
+ );
+ }
- # CAUTION: infinite recursion possible here:
- # set_closing_breakpoint calls set_forced_breakpoint, and
- # set_forced_breakpoint call set_closing_breakpoint
- # ( test files attrib.t, BasicLyx.pm.html).
- # Don't reduce the '2' in the statement below
- if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
+ my $first_btd = $self->[_first_brace_tabbing_disagreement_];
+ if ($first_btd) {
+ my $msg =
+"First closing brace indentation disagreement started at input line $first_btd\n";
+ write_logfile_entry($msg);
- # break before } ] and ), but sub set_forced_breakpoint will decide
- # to break before or after a ? and :
- my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
- set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
- }
+ # leave a hint in the .ERR file if there was a brace error
+ if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
- else {
- my $type_sequence = $type_sequence_to_go[$i_break];
- if ($type_sequence) {
- my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
- $postponed_breakpoint{$type_sequence} = 1;
- }
+
+ my $in_btd = $self->[_in_brace_tabbing_disagreement_];
+ if ($in_btd) {
+ my $msg =
+"Ending with brace indentation disagreement which started at input line $in_btd\n";
+ write_logfile_entry($msg);
+
+ # leave a hint in the .ERR file if there was a brace error
+ if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
- return;
-}
-sub compare_indentation_levels {
+ if ($in_tabbing_disagreement) {
+ my $msg =
+"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
+ write_logfile_entry($msg);
+ }
+ else {
- # check to see if output line tabbing agrees with input line
- # this can be very useful for debugging a script which has an extra
- # or missing brace
- my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
- if ( $guessed_indentation_level ne $structural_indentation_level ) {
- $last_tabbing_disagreement = $input_line_number;
+ if ($last_tabbing_disagreement) {
- if ($in_tabbing_disagreement) {
+ write_logfile_entry(
+"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
+ );
}
else {
- $tabbing_disagreement_count++;
-
- if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
- );
- }
- $in_tabbing_disagreement = $input_line_number;
- $first_tabbing_disagreement = $in_tabbing_disagreement
- unless ($first_tabbing_disagreement);
+ write_logfile_entry("No indentation disagreement seen\n");
}
}
- else {
- if ($in_tabbing_disagreement) {
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
+ );
+ }
+ write_logfile_entry("\n");
- if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
- write_logfile_entry(
-"End indentation disagreement from input line $in_tabbing_disagreement\n"
- );
+ my $vao = $self->[_vertical_aligner_object_];
+ $vao->report_anything_unusual();
+
+ $file_writer_object->report_line_length_errors();
+
+ $self->[_converged_] = $file_writer_object->get_convergence_check()
+ || $rOpts->{'indent-only'};
- if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
- write_logfile_entry(
- "No further tabbing disagreements will be noted\n");
- }
- }
- $in_tabbing_disagreement = 0;
- }
- }
return;
}
+
+} ## end package Perl::Tidy::Formatter
1;