X-Git-Url: https://git.donarmstrong.com/perltidy.git?a=blobdiff_plain;f=lib%2FPerl%2FTidy%2FFormatter.pm;h=357b03b1257e0568ba4c72212f2f566dfc5a7672;hb=8360fafa7774a02a63bd43854a82f22c335851d9;hp=e55bf05c9aaa10435a19a10acd7eec2dca94f2d3;hpb=c514d57dc8088e1f4d3f51857b1155c20085c296;p=perltidy.git diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index e55bf05..357b03b 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -1,4 +1,4 @@ -##################################################################### +#################################################################### # # The Perl::Tidy::Formatter package adds indentation, whitespace, and # line breaks to the token stream @@ -51,8 +51,9 @@ use constant SPACE => q{ }; { #<<< A non-indenting brace to contain all lexical variables use Carp; -use English qw( -no_match_vars ); -our $VERSION = '20220613'; +use English qw( -no_match_vars ); +use List::Util qw( min max ); # min, max are in Perl 5.8 +our $VERSION = '20221112'; # The Tokenizer will be loaded with the Formatter ##use Perl::Tidy::Tokenizer; # for is_keyword() @@ -125,6 +126,31 @@ EOM return; } ## end sub Fault +sub Fault_Warn { + my ($msg) = @_; + + # This is the same as Fault except that it calls Warn instead of Die + # and returns. + my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); + my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); + my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); + my $input_stream_name = get_input_stream_name(); + + Warn(< $i++, - _rlines_new_ => $i++, _rLL_ => $i++, _Klimit_ => $i++, _rdepth_of_opening_seqno_ => $i++, @@ -402,15 +437,16 @@ BEGIN { _K_opening_ternary_ => $i++, _K_closing_ternary_ => $i++, _K_first_seq_item_ => $i++, - _rK_phantom_semicolons_ => $i++, _rtype_count_by_seqno_ => $i++, _ris_function_call_paren_ => $i++, _rlec_count_by_seqno_ => $i++, _ris_broken_container_ => $i++, _ris_permanently_broken_ => $i++, + _rblank_and_comment_count_ => $i++, _rhas_list_ => $i++, _rhas_broken_list_ => $i++, _rhas_broken_list_with_lec_ => $i++, + _rfirst_comma_line_index_ => $i++, _rhas_code_block_ => $i++, _rhas_broken_code_block_ => $i++, _rhas_ternary_ => $i++, @@ -423,6 +459,7 @@ BEGIN { _rparent_of_seqno_ => $i++, _rchildren_of_seqno_ => $i++, _ris_list_by_seqno_ => $i++, + _ris_cuddled_closing_brace_ => $i++, _rbreak_container_ => $i++, _rshort_nested_ => $i++, _length_function_ => $i++, @@ -493,9 +530,13 @@ BEGIN { _ris_essential_old_breakpoint_ => $i++, _roverride_cab3_ => $i++, _ris_assigned_structure_ => $i++, + _ris_short_broken_eval_block_ => $i++, + _ris_bare_trailing_comma_by_seqno_ => $i++, - _rseqno_non_indenting_brace_by_ix_ => $i++, - _rreduce_vertical_tightness_by_seqno_ => $i++, + _rseqno_non_indenting_brace_by_ix_ => $i++, + _rmax_vertical_tightness_ => $i++, + + _no_vertical_tightness_flags_ => $i++, _LAST_SELF_INDEX_ => $i - 1, }; @@ -519,6 +560,7 @@ BEGIN { _rix_seqno_controlling_ci_ => $i++, _batch_CODE_type_ => $i++, _ri_starting_one_line_block_ => $i++, + _runmatched_opening_indexes_ => $i++, }; } @@ -568,6 +610,10 @@ BEGIN { ); @is_assignment{@q} = (1) x scalar(@q); + # a hash needed by break_lists for efficiency: + push @q, qw{ ; < > ~ f }; + @is_non_list_type{@q} = (1) x scalar(@q); + @q = qw(is if unless and or err last next redo return); @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q); @@ -744,7 +790,7 @@ sub new { initialize_undo_ci(); initialize_process_line_of_CODE(); initialize_grind_batch_of_CODE(); - initialize_final_indentation_adjustment(); + initialize_get_final_indentation(); initialize_postponed_breakpoint(); initialize_batch_variables(); initialize_write_line(); @@ -754,7 +800,7 @@ sub new { file_writer_object => $file_writer_object, logger_object => $logger_object, diagnostics_object => $diagnostics_object, - length_function => $length_function + length_function => $length_function, ); write_logfile_entry("\nStarting tokenization pass...\n"); @@ -777,8 +823,7 @@ sub new { my $self = []; # Basic data structures... - $self->[_rlines_] = []; # = ref to array of lines of the file - $self->[_rlines_new_] = []; # = ref to array of output lines + $self->[_rlines_] = []; # = ref to array of lines of the file # 'rLL' = reference to the continuous liner array of all tokens in a file. # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but @@ -795,9 +840,6 @@ sub new { $self->[_K_closing_ternary_] = {}; $self->[_K_first_seq_item_] = undef; # K of first token with a sequence # - # Array of phantom semicolons, in case we ever need to undo them - $self->[_rK_phantom_semicolons_] = undef; - # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence # numbers with + or - indicating opening or closing. This list represents # the entire container tree and is invariant under reformatting. It can be @@ -820,9 +862,11 @@ sub new { $self->[_rlec_count_by_seqno_] = {}; $self->[_ris_broken_container_] = {}; $self->[_ris_permanently_broken_] = {}; + $self->[_rblank_and_comment_count_] = {}; $self->[_rhas_list_] = {}; $self->[_rhas_broken_list_] = {}; $self->[_rhas_broken_list_with_lec_] = {}; + $self->[_rfirst_comma_line_index_] = {}; $self->[_rhas_code_block_] = {}; $self->[_rhas_broken_code_block_] = {}; $self->[_rhas_ternary_] = {}; @@ -835,6 +879,7 @@ sub new { $self->[_rparent_of_seqno_] = {}; $self->[_rchildren_of_seqno_] = {}; $self->[_ris_list_by_seqno_] = {}; + $self->[_ris_cuddled_closing_brace_] = {}; $self->[_rbreak_container_] = {}; # prevent one-line blocks $self->[_rshort_nested_] = {}; # blocks not forced open @@ -909,9 +954,13 @@ sub new { $self->[_ris_essential_old_breakpoint_] = {}; $self->[_roverride_cab3_] = {}; $self->[_ris_assigned_structure_] = {}; + $self->[_ris_short_broken_eval_block_] = {}; + $self->[_ris_bare_trailing_comma_by_seqno_] = {}; + + $self->[_rseqno_non_indenting_brace_by_ix_] = {}; + $self->[_rmax_vertical_tightness_] = {}; - $self->[_rseqno_non_indenting_brace_by_ix_] = {}; - $self->[_rreduce_vertical_tightness_by_seqno_] = {}; + $self->[_no_vertical_tightness_flags_] = 0; # This flag will be updated later by a call to get_save_logfile() $self->[_save_logfile_] = defined($logger_object); @@ -1171,11 +1220,6 @@ sub get_convergence_check { return $self->[_converged_]; } -sub get_added_semicolon_count { - my $self = shift; - return $self->[_added_semicolon_count_]; -} - sub get_output_line_number { my ($self) = @_; my $vao = $self->[_vertical_aligner_object_]; @@ -1206,20 +1250,6 @@ sub consecutive_nonblank_lines { $vao->get_cached_line_count(); } -sub max { - my (@vals) = @_; - my $max = shift @vals; - for (@vals) { $max = $_ > $max ? $_ : $max } - return $max; -} - -sub min { - my (@vals) = @_; - my $min = shift @vals; - for (@vals) { $min = $_ < $min ? $_ : $min } - return $min; -} - sub split_words { # given a string containing words separated by whitespace, @@ -1471,6 +1501,7 @@ EOM my @toks = @_; foreach my $tok (@toks) { if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: + if ( $tok eq ',' ) { $controlled_comma_style = 1 } my $lbs = $left_bond_strength{$tok}; my $rbs = $right_bond_strength{$tok}; if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { @@ -1484,6 +1515,7 @@ EOM my $break_before = sub { my @toks = @_; foreach my $tok (@toks) { + if ( $tok eq ',' ) { $controlled_comma_style = 1 } my $lbs = $left_bond_strength{$tok}; my $rbs = $right_bond_strength{$tok}; if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { @@ -1567,6 +1599,25 @@ EOM ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n"); } + #----------------------------------------------------------- + # The combination -lp -vmll -atc -dtc -wtc=b can be unstable + #----------------------------------------------------------- + # This fixes b1386 b1387 b1388 + if ( $rOpts->{'variable-maximum-line-length'} + && $rOpts->{'line-up-parentheses'} + && $rOpts->{'add-trailing-commas'} + && $rOpts->{'delete-trailing-commas'} + && $rOpts->{'want-trailing-commas'} + && $rOpts->{'want-trailing-commas'} =~ /b/ ) + { + $rOpts->{'delete-trailing-commas'} = 0; +## warning causes trouble with test cases and this combo is so rare that +## it is unlikely to not occur in practice. +## Warn( +##"The combination -vmll -lp -atc -dtc -wtc=b can be unstable; turning off -dtc\n" +## ); + } + %container_indentation_options = (); foreach my $pair ( [ 'break-before-hash-brace-and-indent', '{' ], @@ -1581,10 +1632,13 @@ EOM # (1) -lp is not compatible with opt=2, silently set to opt=0 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster + # (3) set opt=0 if -i < -ci (can be unstable, case b1355) if ( $opt == 2 ) { - if ( $rOpts->{'line-up-parentheses'} - || $rOpts->{'indent-columns'} == - $rOpts->{'continuation-indentation'} ) + if ( + $rOpts->{'line-up-parentheses'} + || ( $rOpts->{'indent-columns'} <= + $rOpts->{'continuation-indentation'} ) + ) { $opt = 0; } @@ -1689,6 +1743,11 @@ EOM '(' => ')', '[' => ']', '?' => ':', + + '}' => '{', + ')' => '(', + ']' => '[', + ':' => '?', ); if ( $rOpts->{'ignore-old-breakpoints'} ) { @@ -1740,12 +1799,16 @@ EOM initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'}, 'kba', \%keep_break_after_type ); + $controlled_comma_style ||= $keep_break_before_type{','}; + $controlled_comma_style ||= $keep_break_after_type{','}; + #------------------------------------------------------------ # Make global vars for frequently used options for efficiency #------------------------------------------------------------ - $rOpts_add_newlines = $rOpts->{'add-newlines'}; - $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; + $rOpts_add_newlines = $rOpts->{'add-newlines'}; + $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'}; + $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; $rOpts_blank_lines_after_opening_block = $rOpts->{'blank-lines-after-opening-block'}; $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; @@ -1777,9 +1840,12 @@ EOM $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; $rOpts_extended_continuation_indentation = $rOpts->{'extended-continuation-indentation'}; - $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'}; - $rOpts_format_skipping = $rOpts->{'format-skipping'}; - $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'}; + $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'}; + $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'}; + $rOpts_delete_weld_interfering_commas = + $rOpts->{'delete-weld-interfering-commas'}; + $rOpts_format_skipping = $rOpts->{'format-skipping'}; + $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'}; $rOpts_function_paren_vertical_alignment = $rOpts->{'function-paren-vertical-alignment'}; $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; @@ -1810,6 +1876,7 @@ EOM $rOpts_recombine = $rOpts->{'recombine'}; $rOpts_short_concatenation_item_length = $rOpts->{'short-concatenation-item-length'}; + $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'}; $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; $rOpts_static_block_comments = $rOpts->{'static-block-comments'}; $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'}; @@ -1967,7 +2034,15 @@ EOM $stress_level_beta = $level; } + # This is a combined level which works well for turning off formatting + # features in most cases: + $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 ); + + %trailing_comma_rules = (); + initialize_trailing_comma_rules(); + initialize_weld_nested_exclusion_rules(); + initialize_weld_fat_comma_rules(); %line_up_parentheses_control_hash = (); $line_up_parentheses_control_is_lxpl = 1; @@ -2181,6 +2256,27 @@ EOM return; } ## end sub initialize_weld_nested_exclusion_rules +sub initialize_weld_fat_comma_rules { + + # Initialize a hash controlling which opening token types can be + # welded around a fat comma + %weld_fat_comma_rules = (); + + # The -wfc flag turns on welding of '=>' after an opening paren + if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 } + + # This could be generalized in the future by introducing a parameter + # -weld-fat-comma-after=str (-wfca=str), where str contains any of: + # * { [ ( + # to indicate which opening parens may weld to a subsequent '=>' + + # The flag -wfc would then be equivalent to -wfca='(' + + # This has not been done because it is not yet clear how useful + # this generalization would be. + return; +} ## end sub initialize_weld_fat_comma_rules + sub initialize_line_up_parentheses_control_hash { my ( $str, $opt_name ) = @_; return unless ($str); @@ -2379,6 +2475,136 @@ EOM } ## end sub initialize_keep_old_breakpoints +sub initialize_trailing_comma_rules { + + # Setup control hash for trailing commas + + # -wtc=s defines desired trailing comma policy: + # + # =" " stable + # [ both -atc and -dtc ignored ] + # =0 : none + # [requires -dtc; -atc ignored] + # =1 or * : all + # [requires -atc; -dtc ignored] + # =m : multiline lists require trailing comma + # if -atc set => will add missing multiline trailing commas + # if -dtc set => will delete trailing single line commas + # =b or 'bare' (multiline) lists require trailing comma + # if -atc set => will add missing bare trailing commas + # if -dtc set => will delete non-bare trailing commas + # =h or 'hash': single column stable bare lists require trailing comma + # if -atc set will add these + # if -dtc set will delete other trailing commas + + # This routine must be called after the alpha and beta stress levels + # have been defined. + + my $rvalid_flags = [qw(0 1 * m b h i)]; + + my $option = $rOpts->{'want-trailing-commas'}; + + if ($option) { + $option =~ s/^\s+//; + $option =~ s/\s+$//; + } + if ( defined($option) && length($option) ) { + my $error_message; + my %rule_hash; + my @q = @{$rvalid_flags}; + my %is_valid_flag; + @is_valid_flag{@q} = (1) x scalar(@q); + + # handle single character control, such as -wtc='b' + if ( length($option) == 1 ) { + foreach (qw< ) ] } >) { + $rule_hash{$_} = [ $option, EMPTY_STRING ]; + } + } + + # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m' + else { + my @parts = split /\s+/, $option; + foreach my $part (@parts) { + if ( length($part) >= 2 && length($part) <= 3 ) { + my $val = substr( $part, -1, 1 ); + my $key_o = substr( $part, -2, 1 ); + if ( $is_opening_token{$key_o} ) { + my $paren_flag = EMPTY_STRING; + if ( length($part) == 3 ) { + $paren_flag = substr( $part, 0, 1 ); + } + my $key = $matching_token{$key_o}; + $rule_hash{$key} = [ $val, $paren_flag ]; + } + else { + $error_message .= "Unrecognized term: '$part'\n"; + } + } + else { + $error_message .= "Unrecognized term: '$part'\n"; + } + } + } + + # check for valid control characters + if ( !$error_message ) { + foreach my $key ( keys %rule_hash ) { + my $item = $rule_hash{$key}; + my ( $val, $paren_flag ) = @{$item}; + if ( $val && !$is_valid_flag{$val} ) { + my $valid_str = join( SPACE, @{$rvalid_flags} ); + $error_message .= + "Unexpected value '$val'; must be one of: $valid_str\n"; + last; + } + if ($paren_flag) { + if ( $paren_flag !~ /^[kKfFwW]$/ ) { + $error_message .= +"Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n"; + last; + } + if ( $key ne ')' ) { + $error_message .= +"paren flag '$paren_flag' is only allowed before a '('\n"; + last; + } + } + } + } + + if ($error_message) { + Warn(< 0; +# closure variables +my ( + + $rLL, + $jmax, + + $j_tight_closing_paren, + $last_token, + $token, + $type, + $ws, + +); + +# Hashes to set spaces around container tokens according to their +# sequence numbers. These are set as keywords are examined. +# They are controlled by the -kpit and -kpitl flags. +my %opening_container_inside_ws; +my %closing_container_inside_ws; + sub set_whitespace_flags { # This routine is called once per file to set whitespace flags for that @@ -2547,9 +2795,19 @@ sub set_whitespace_flags { my $self = shift; - my $rLL = $self->[_rLL_]; + # initialize closure variables + $rLL = $self->[_rLL_]; + $jmax = @{$rLL} - 1; + + $j_tight_closing_paren = -1; + $token = SPACE; + $type = 'b'; + $last_token = EMPTY_STRING; + + %opening_container_inside_ws = (); + %closing_container_inside_ws = (); + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $jmax = @{$rLL} - 1; my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'}; @@ -2562,111 +2820,19 @@ sub set_whitespace_flags { my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 ); - my ( $rtokh, $token, $type ); + my $rtokh; my $rtokh_last = $rLL->[0]; my $rtokh_last_last = $rtokh_last; - my $last_type = EMPTY_STRING; - my $last_token = EMPTY_STRING; - - my $j_tight_closing_paren = -1; + my $last_type = EMPTY_STRING; $rtokh = [ @{ $rLL->[0] } ]; - $token = SPACE; - $type = 'b'; $rtokh->[_TOKEN_] = $token; $rtokh->[_TYPE_] = $type; $rtokh->[_TYPE_SEQUENCE_] = EMPTY_STRING; $rtokh->[_LINE_INDEX_] = 0; - # This is some logic moved to a sub to avoid deep nesting of if stmts - my $ws_in_container = sub { - - my ($j) = @_; - my $ws = WS_YES; - if ( $j + 1 > $jmax ) { return (WS_NO) } - - # Patch to count '-foo' as single token so that - # each of $a{-foo} and $a{foo} and $a{'foo'} do - # not get spaces with default formatting. - my $j_here = $j; - ++$j_here - if ( $token eq '-' - && $last_token eq '{' - && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); - - # Patch to count a sign separated from a number as a single token, as - # in the following line. Otherwise, it takes two steps to converge: - # deg2rad(- 0.5) - if ( ( $type eq 'm' || $type eq 'p' ) - && $j < $jmax + 1 - && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b' - && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n' - && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ ) - { - $j_here = $j + 2; - } - - # $j_next is where a closing token should be if - # the container has a single token - if ( $j_here + 1 > $jmax ) { return (WS_NO) } - my $j_next = - ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) - ? $j_here + 2 - : $j_here + 1; - - if ( $j_next > $jmax ) { return WS_NO } - my $tok_next = $rLL->[$j_next]->[_TOKEN_]; - my $type_next = $rLL->[$j_next]->[_TYPE_]; - - # for tightness = 1, if there is just one token - # within the matching pair, we will keep it tight - if ( - $tok_next eq $matching_token{$last_token} - - # but watch out for this: [ [ ] (misc.t) - && $last_token ne $token - - # double diamond is usually spaced - && $token ne '<<>>' - - ) - { - - # remember where to put the space for the closing paren - $j_tight_closing_paren = $j_next; - return (WS_NO); - } - return (WS_YES); - }; - - # Local hashes to set spaces around container tokens according to their - # sequence numbers. These are set as keywords are examined. - # They are controlled by the -kpit and -kpitl flags. - my %opening_container_inside_ws; - my %closing_container_inside_ws; - my $set_container_ws_by_keyword = sub { - - return unless (%keyword_paren_inner_tightness); - - my ( $word, $sequence_number ) = @_; - - # We just saw a keyword (or other function name) followed by an opening - # paren. Now check to see if the following paren should have special - # treatment for its inside space. If so we set a hash value using the - # sequence number as key. - if ( $word && $sequence_number ) { - my $tightness = $keyword_paren_inner_tightness{$word}; - if ( defined($tightness) && $tightness != 1 ) { - my $ws_flag = $tightness == 0 ? WS_YES : WS_NO; - $opening_container_inside_ws{$sequence_number} = $ws_flag; - $closing_container_inside_ws{$sequence_number} = $ws_flag; - } - } - return; - }; - my ( $ws_1, $ws_2, $ws_3, $ws_4 ); # main loop over all tokens to define the whitespace flags @@ -2677,17 +2843,19 @@ sub set_whitespace_flags { next; } - $rtokh_last_last = $rtokh_last; - - $rtokh_last = $rtokh; $last_token = $token; $last_type = $type; + if ( $type ne '#' ) { + $rtokh_last_last = $rtokh_last; + $rtokh_last = $rtokh; + } + $rtokh = $rLL->[$j]; $token = $rtokh->[_TOKEN_]; $type = $rtokh->[_TYPE_]; - my $ws; + $ws = undef; #--------------------------------------------------------------- # Whitespace Rules Section 1: @@ -2754,7 +2922,7 @@ sub set_whitespace_flags { $ws = WS_NO; } else { - $ws = $ws_in_container->($j); + $ws = ws_in_container($j); } } @@ -2775,17 +2943,10 @@ sub set_whitespace_flags { #--------------------------------------------------------------- # The hash '%is_special_ws_type' significantly speeds up this routine, # but be sure to update it if a new check is added. - # Currently has types: qw(k w i C m - Q #) + # Currently has types: qw(k w C m - Q #) if ( $is_special_ws_type{$type} ) { - if ( $type eq 'i' ) { - - # never a space before -> - if ( substr( $token, 0, 2 ) eq '->' ) { - $ws = WS_NO; - } - } - elsif ( $type eq 'k' ) { + if ( $type eq 'k' ) { # Keywords 'for', 'foreach' are special cases for -kpit since # the opening paren does not always immediately follow the @@ -2809,7 +2970,7 @@ sub set_whitespace_flags { last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' ); my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_]; - $set_container_ws_by_keyword->( $token, $seqno_p ); + set_container_ws_by_keyword( $token, $seqno_p ); last; } } @@ -2818,11 +2979,6 @@ sub set_whitespace_flags { # retain any space between '-' and bare word elsif ( $type eq 'w' || $type eq 'C' ) { $ws = WS_OPTIONAL if $last_type eq '-'; - - # never a space before -> - if ( substr( $token, 0, 2 ) eq '->' ) { - $ws = WS_NO; - } } # retain any space between '-' and bare word; for example @@ -2925,7 +3081,7 @@ sub set_whitespace_flags { || $space_after_keyword{$last_token} ); # Set inside space flag if requested - $set_container_ws_by_keyword->( $last_token, $seqno ); + set_container_ws_by_keyword( $last_token, $seqno ); } # Space between function and '(' @@ -2943,39 +3099,40 @@ sub set_whitespace_flags { # NOTE: this would be the place to allow spaces between # repeated parens, like () () (), as in case c017, but I # decided that would not be a good idea. + + # Updated to allow detached '->' from tokenizer (issue c140) elsif ( - ##$last_type =~ /^[wCUG]$/ + + # /^[wCUG]$/ $is_wCUG{$last_type} + || ( - ##$last_type =~ /^[wi]$/ + + # /^[wi]$/ $is_wi{$last_type} && ( + + # with prefix '->' or '&' $last_token =~ /^([\&]|->)/ - # or -> or & split from bareword by newline (b1337) - || ( - $last_token =~ /^\w/ - && ( - $rtokh_last_last->[_TYPE_] eq '->' - || ( $rtokh_last_last->[_TYPE_] eq 't' - && $rtokh_last_last->[_TOKEN_] =~ - /^\&\s*$/ ) - ) - ) + # or preceding token '->' (see b1337; c140) + || $rtokh_last_last->[_TYPE_] eq '->' + + # or preceding sub call operator token '&' + || ( $rtokh_last_last->[_TYPE_] eq 't' + && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ ) ) ) ) { $ws = $rOpts_space_function_paren ? WS_YES : WS_NO; - $set_container_ws_by_keyword->( $last_token, $seqno ); + set_container_ws_by_keyword( $last_token, $seqno ); $ris_function_call_paren->{$seqno} = 1; } # space between something like $i and ( in 'snippets/space2.in' # for $i ( 0 .. 20 ) { - # FIXME: eventually, type 'i' could be split into multiple - # token types so this can be a hardwired rule. elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { $ws = WS_YES; } @@ -2994,9 +3151,10 @@ sub set_whitespace_flags { $ws = WS_OPTIONAL; } - # keep space between 'sub' and '{' for anonymous sub definition + # keep space between 'sub' and '{' for anonymous sub definition, + # be sure type = 'k' (added for c140) if ( $type eq '{' ) { - if ( $last_token eq 'sub' ) { + if ( $last_token eq 'sub' && $last_type eq 'k' ) { $ws = WS_YES; } @@ -3087,19 +3245,20 @@ sub set_whitespace_flags { $rwhitespace_flags->[$j] = $ws; - if (DEBUG_WHITE) { - my $str = substr( $last_token, 0, 15 ); - $str .= SPACE x ( 16 - length($str) ); - if ( !defined($ws_1) ) { $ws_1 = "*" } - if ( !defined($ws_2) ) { $ws_2 = "*" } - if ( !defined($ws_3) ) { $ws_3 = "*" } - if ( !defined($ws_4) ) { $ws_4 = "*" } - print STDOUT + next if ( !DEBUG_WHITE ); + + my $str = substr( $last_token, 0, 15 ); + $str .= SPACE x ( 16 - length($str) ); + if ( !defined($ws_1) ) { $ws_1 = "*" } + if ( !defined($ws_2) ) { $ws_2 = "*" } + if ( !defined($ws_3) ) { $ws_3 = "*" } + if ( !defined($ws_4) ) { $ws_4 = "*" } + print STDOUT "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; - # reset for next pass - $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef; - } + # reset for next pass + $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef; + } ## end main loop if ( $rOpts->{'tight-secret-operators'} ) { @@ -3110,6 +3269,87 @@ sub set_whitespace_flags { } ## end sub set_whitespace_flags +sub set_container_ws_by_keyword { + + my ( $word, $sequence_number ) = @_; + return unless (%keyword_paren_inner_tightness); + + # We just saw a keyword (or other function name) followed by an opening + # paren. Now check to see if the following paren should have special + # treatment for its inside space. If so we set a hash value using the + # sequence number as key. + if ( $word && $sequence_number ) { + my $tightness = $keyword_paren_inner_tightness{$word}; + if ( defined($tightness) && $tightness != 1 ) { + my $ws_flag = $tightness == 0 ? WS_YES : WS_NO; + $opening_container_inside_ws{$sequence_number} = $ws_flag; + $closing_container_inside_ws{$sequence_number} = $ws_flag; + } + } + return; +} ## end sub set_container_ws_by_keyword + +sub ws_in_container { + + my ($j) = @_; + if ( $j + 1 > $jmax ) { return (WS_NO) } + + # Patch to count '-foo' as single token so that + # each of $a{-foo} and $a{foo} and $a{'foo'} do + # not get spaces with default formatting. + my $j_here = $j; + ++$j_here + if ( $token eq '-' + && $last_token eq '{' + && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); + + # Patch to count a sign separated from a number as a single token, as + # in the following line. Otherwise, it takes two steps to converge: + # deg2rad(- 0.5) + if ( ( $type eq 'm' || $type eq 'p' ) + && $j < $jmax + 1 + && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b' + && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n' + && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ ) + { + $j_here = $j + 2; + } + + # $j_next is where a closing token should be if + # the container has a single token + if ( $j_here + 1 > $jmax ) { return (WS_NO) } + my $j_next = + ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) + ? $j_here + 2 + : $j_here + 1; + + if ( $j_next > $jmax ) { return WS_NO } + my $tok_next = $rLL->[$j_next]->[_TOKEN_]; + my $type_next = $rLL->[$j_next]->[_TYPE_]; + + # for tightness = 1, if there is just one token + # within the matching pair, we will keep it tight + if ( + $tok_next eq $matching_token{$last_token} + + # but watch out for this: [ [ ] (misc.t) + && $last_token ne $token + + # double diamond is usually spaced + && $token ne '<<>>' + + ) + { + + # remember where to put the space for the closing paren + $j_tight_closing_paren = $j_next; + return (WS_NO); + } + return (WS_YES); +} ## end sub ws_in_container + +} ## end closure set_whitespace_flags + sub dump_want_left_space { my $fh = shift; local $LIST_SEPARATOR = "\n"; @@ -3878,6 +4118,9 @@ EOM # $a->$b($c); $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; + # Added for c140 to make 'w ->' and 'i ->' behave the same + $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG; + # Note that the following alternative strength would make the break at the # '->' rather than opening the '('. Both have advantages and disadvantages. # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; # @@ -4010,6 +4253,11 @@ EOM my ($self) = @_; + #----------------------------------------------------------------- + # Define a 'bond strength' for each token pair in an output batch. + # See comments above for definition of bond strength. + #----------------------------------------------------------------- + my $rbond_strength_to_go = []; my $rLL = $self->[_rLL_]; @@ -4323,7 +4571,8 @@ EOM elsif ( $type eq 'w' ) { $bond_str = NO_BREAK if ( !$old_breakpoint_to_go[$i] - && substr( $next_nonblank_token, 0, 1 ) eq '/' ); + && substr( $next_nonblank_token, 0, 1 ) eq '/' + && $next_nonblank_type ne '//' ); } $bond_str_2 = $bond_str if (DEBUG_BOND); @@ -4533,8 +4782,8 @@ sub bad_pattern { # but it should be safe because the pattern has been constructed # by this program. my ($pattern) = @_; - eval "'##'=~/$pattern/"; - return $EVAL_ERROR; + my $ok = eval "'##'=~/$pattern/"; + return !defined($ok) || $EVAL_ERROR; } { ## begin closure prepare_cuddled_block_types @@ -5225,6 +5474,26 @@ EOM return; } ## end sub check_sequence_numbers + sub store_block_type { + my ( $self, $block_type, $seqno ) = @_; + + return if ( !$block_type ); + + $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type; + + if ( substr( $block_type, 0, 3 ) eq 'sub' + || $rOpts_sub_alias_list ) + { + if ( $block_type =~ /$ASUB_PATTERN/ ) { + $self->[_ris_asub_block_]->{$seqno} = 1; + } + elsif ( $block_type =~ /$SUB_PATTERN/ ) { + $self->[_ris_sub_block_]->{$seqno} = 1; + } + } + return; + } + sub write_line { # This routine receives lines one-by-one from the tokenizer and stores @@ -5233,19 +5502,8 @@ EOM # to do the actual formatting. my ( $self, $line_of_tokens_old ) = @_; - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rlines_new = $self->[_rlines_]; - - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $rSS = $self->[_rSS_]; - my $Iss_opening = $self->[_Iss_opening_]; - my $Iss_closing = $self->[_Iss_closing_]; - my $Kfirst; + my $rLL = $self->[_rLL_]; my $line_of_tokens = {}; foreach ( qw( @@ -5265,193 +5523,55 @@ EOM $line_of_tokens->{$_} = $line_of_tokens_old->{$_}; } - # Data needed by Logger - $line_of_tokens->{_level_0} = 0; - $line_of_tokens->{_ci_level_0} = 0; - $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; - $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; - - # Needed to avoid trimming quotes - $line_of_tokens->{_ended_in_blank_token} = undef; - - my $line_type = $line_of_tokens_old->{_line_type}; - my $line_number = $line_of_tokens_old->{_line_number}; - my $CODE_type = EMPTY_STRING; + my $line_type = $line_of_tokens_old->{_line_type}; my $tee_output; + my $Klimit = $self->[_Klimit_]; + my $Kfirst; + # Handle line of non-code if ( $line_type ne 'CODE' ) { $tee_output ||= $rOpts_tee_pod && substr( $line_type, 0, 3 ) eq 'POD'; + + $line_of_tokens->{_level_0} = 0; + $line_of_tokens->{_ci_level_0} = 0; + $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; + $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; + $line_of_tokens->{_ended_in_blank_token} = undef; + } # Handle line of code else { - my $rtokens = $line_of_tokens_old->{_rtokens}; - my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; - my $rblock_type = $line_of_tokens_old->{_rblock_type}; - my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; - my $rlevels = $line_of_tokens_old->{_rlevels}; - my $rci_levels = $line_of_tokens_old->{_rci_levels}; + my $rtokens = $line_of_tokens_old->{_rtokens}; + my $jmax = @{$rtokens} - 1; - my $jmax = @{$rtokens} - 1; if ( $jmax >= 0 ) { - $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; - - DEVEL_MODE - && check_sequence_numbers( $rtokens, $rtoken_type, - $rtype_sequence, $line_number ); - - # Find the starting nesting depth ... - # It must be the value of variable 'level' of the first token - # because the nesting depth is used as a token tag in the - # vertical aligner and is compared to actual levels. - # So vertical alignment problems will occur with any other - # starting value. - if ( !defined($nesting_depth) ) { - $nesting_depth = $rlevels->[0]; - $nesting_depth = 0 if ( $nesting_depth < 0 ); - $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1; - } - - foreach my $j ( 0 .. $jmax ) { - - # Do not clip the 'level' variable yet. We will do this - # later, in sub 'store_token_to_go'. The reason is that in - # files with level errors, the logic in 'weld_cuddled_else' - # uses a stack logic that will give bad welds if we clip - # levels here. - ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 } - - # Handle tokens with sequence numbers ... - my $seqno = $rtype_sequence->[$j]; - if ($seqno) { - my $token = $rtokens->[$j]; - my $sign = 1; - if ( $is_opening_token{$token} ) { - $K_opening_container->{$seqno} = @{$rLL}; - $rdepth_of_opening_seqno->[$seqno] = $nesting_depth; - $nesting_depth++; - - # Save a sequenced block type at its opening token. - # Note that unsequenced block types can occur in - # unbalanced code with errors but are ignored here. - if ( $rblock_type->[$j] ) { - my $block_type = $rblock_type->[$j]; - $rblock_type_of_seqno->{$seqno} = $block_type; - if ( substr( $block_type, 0, 3 ) eq 'sub' - || $rOpts_sub_alias_list ) - { - if ( $block_type =~ /$ASUB_PATTERN/ ) { - $self->[_ris_asub_block_]->{$seqno} = 1; - } - elsif ( $block_type =~ /$SUB_PATTERN/ ) { - $self->[_ris_sub_block_]->{$seqno} = 1; - } - } - } - } - elsif ( $is_closing_token{$token} ) { - - # The opening depth should always be defined, and - # it should equal $nesting_depth-1. To protect - # against unforseen error conditions, however, we - # will check this and fix things if necessary. For - # a test case see issue c055. - my $opening_depth = - $rdepth_of_opening_seqno->[$seqno]; - if ( !defined($opening_depth) ) { - $opening_depth = $nesting_depth - 1; - $opening_depth = 0 if ( $opening_depth < 0 ); - $rdepth_of_opening_seqno->[$seqno] = - $opening_depth; - - # This is not fatal but should not happen. The - # tokenizer generates sequence numbers - # incrementally upon encountering each new - # opening token, so every positive sequence - # number should correspond to an opening token. - if (DEVEL_MODE) { - Fault(<{$seqno} = @{$rLL}; - $nesting_depth = $opening_depth; - $sign = -1; - } - elsif ( $token eq '?' ) { - } - elsif ( $token eq ':' ) { - $sign = -1; - } - - # The only sequenced types output by the tokenizer are - # the opening & closing containers and the ternary - # types. So we would only get here if the tokenizer has - # been changed to mark some other tokens with sequence - # numbers, or if an error has been introduced in a - # hash such as %is_opening_container - else { - if (DEVEL_MODE) { - Fault(<[$j]', sequence=$seqno arrived from tokenizer. -Expecting only opening or closing container tokens or ternary tokens with sequence numbers. -EOM - } - } - if ( $sign > 0 ) { - $Iss_opening->[$seqno] = @{$rSS}; - - # For efficiency, we find the maximum level of - # opening tokens of any type. The actual maximum - # level will be that of their contents which is 1 - # greater. That will be fixed in sub - # 'finish_formatting'. - my $level = $rlevels->[$j]; - if ( $level > $self->[_maximum_level_] ) { - $self->[_maximum_level_] = $level; - $self->[_maximum_level_at_line_] = $line_number; - } - } - else { $Iss_closing->[$seqno] = @{$rSS} } - push @{$rSS}, $sign * $seqno; - - } - else { - $seqno = EMPTY_STRING unless ( defined($seqno) ); - } + $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; - my @tokary; - @tokary[ - _TOKEN_, _TYPE_, _TYPE_SEQUENCE_, - _LEVEL_, _CI_LEVEL_, _LINE_INDEX_, - ] - = ( - $rtokens->[$j], $rtoken_type->[$j], - $seqno, $rlevels->[$j], - $rci_levels->[$j], $line_number - 1, - ); - push @{$rLL}, \@tokary; - } ## end foreach my $j ( 0 .. $jmax ) + #---------------------------- + # get the tokens on this line + #---------------------------- + $self->write_line_inner_loop( $line_of_tokens_old, + $line_of_tokens ); + # update Klimit for added tokens $Klimit = @{$rLL} - 1; - # Need to remember if we can trim the input line - $line_of_tokens->{_ended_in_blank_token} = - $rtoken_type->[$jmax] eq 'b'; + } ## end if ( $jmax >= 0 ) + else { - $line_of_tokens->{_level_0} = $rlevels->[0]; - $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; - $line_of_tokens->{_nesting_blocks_0} = - $line_of_tokens_old->{_nesting_blocks_0}; - $line_of_tokens->{_nesting_tokens_0} = - $line_of_tokens_old->{_nesting_tokens_0}; + # blank line + $line_of_tokens->{_level_0} = 0; + $line_of_tokens->{_ci_level_0} = 0; + $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; + $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; + $line_of_tokens->{_ended_in_blank_token} = undef; - } ## end if ( $jmax >= 0 ) + } $tee_output ||= $rOpts_tee_block_comments @@ -5467,50 +5587,223 @@ EOM } ## end if ( $line_type eq 'CODE') # Finish storing line variables + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; + $self->[_Klimit_] = $Klimit; + my $rlines = $self->[_rlines_]; + push @{$rlines}, $line_of_tokens; + if ($tee_output) { my $fh_tee = $self->[_fh_tee_]; my $line_text = $line_of_tokens_old->{_line_text}; $fh_tee->print($line_text) if ($fh_tee); } - $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; - $line_of_tokens->{_code_type} = $CODE_type; - $self->[_Klimit_] = $Klimit; - - push @{$rlines_new}, $line_of_tokens; return; } ## end sub write_line -} ## end closure write_line -############################################# -# CODE SECTION 5: Pre-process the entire file -############################################# + sub write_line_inner_loop { + my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_; -sub finish_formatting { + #--------------------------------------------------------------------- + # Copy the tokens on one line received from the tokenizer to their new + # storage locations. + #--------------------------------------------------------------------- - my ( $self, $severe_error ) = @_; + # Input parameters: + # $line_of_tokens_old = line received from tokenizer + # $line_of_tokens = line of tokens being formed for formatter - # The file has been tokenized and is ready to be formatted. - # All of the relevant data is stored in $self, ready to go. + my $rtokens = $line_of_tokens_old->{_rtokens}; + my $jmax = @{$rtokens} - 1; + if ( $jmax < 0 ) { - # Check the maximum level. If it is extremely large we will give up and - # output the file verbatim. Note that the actual maximum level is 1 - # greater than the saved value, so we fix that here. - $self->[_maximum_level_] += 1; - my $maximum_level = $self->[_maximum_level_]; - my $maximum_table_index = $#maximum_line_length_at_level; - if ( !$severe_error && $maximum_level >= $maximum_table_index ) { - $severe_error ||= 1; - Warn(<{_line_number}; + my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; + my $rblock_type = $line_of_tokens_old->{_rblock_type}; + my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; + my $rlevels = $line_of_tokens_old->{_rlevels}; + my $rci_levels = $line_of_tokens_old->{_rci_levels}; + + my $rLL = $self->[_rLL_]; + my $rSS = $self->[_rSS_]; + my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; + + DEVEL_MODE + && check_sequence_numbers( $rtokens, $rtoken_type, + $rtype_sequence, $line_number ); + + # Find the starting nesting depth ... + # It must be the value of variable 'level' of the first token + # because the nesting depth is used as a token tag in the + # vertical aligner and is compared to actual levels. + # So vertical alignment problems will occur with any other + # starting value. + if ( !defined($nesting_depth) ) { + $nesting_depth = $rlevels->[0]; + $nesting_depth = 0 if ( $nesting_depth < 0 ); + $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1; + } + + foreach my $j ( 0 .. $jmax ) { + + # Do not clip the 'level' variable yet. We will do this + # later, in sub 'store_token_to_go'. The reason is that in + # files with level errors, the logic in 'weld_cuddled_else' + # uses a stack logic that will give bad welds if we clip + # levels here. + ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 } + + # Handle tokens with sequence numbers ... + my $seqno = $rtype_sequence->[$j]; + if ($seqno) { + my $token = $rtokens->[$j]; + my $sign = 1; + if ( $is_opening_token{$token} ) { + $self->[_K_opening_container_]->{$seqno} = @{$rLL}; + $rdepth_of_opening_seqno->[$seqno] = $nesting_depth; + $nesting_depth++; + + # Save a sequenced block type at its opening token. + # Note that unsequenced block types can occur in + # unbalanced code with errors but are ignored here. + $self->store_block_type( $rblock_type->[$j], $seqno ) + if ( $rblock_type->[$j] ); + } + elsif ( $is_closing_token{$token} ) { + + # The opening depth should always be defined, and + # it should equal $nesting_depth-1. To protect + # against unforseen error conditions, however, we + # will check this and fix things if necessary. For + # a test case see issue c055. + my $opening_depth = $rdepth_of_opening_seqno->[$seqno]; + if ( !defined($opening_depth) ) { + $opening_depth = $nesting_depth - 1; + $opening_depth = 0 if ( $opening_depth < 0 ); + $rdepth_of_opening_seqno->[$seqno] = $opening_depth; + + # This is not fatal but should not happen. The + # tokenizer generates sequence numbers + # incrementally upon encountering each new + # opening token, so every positive sequence + # number should correspond to an opening token. + DEVEL_MODE && Fault(<[_K_closing_container_]->{$seqno} = @{$rLL}; + $nesting_depth = $opening_depth; + $sign = -1; + } + elsif ( $token eq '?' ) { + } + elsif ( $token eq ':' ) { + $sign = -1; + } + + # The only sequenced types output by the tokenizer are + # the opening & closing containers and the ternary + # types. So we would only get here if the tokenizer has + # been changed to mark some other tokens with sequence + # numbers, or if an error has been introduced in a + # hash such as %is_opening_container + else { + DEVEL_MODE && Fault(<[$j]', sequence=$seqno arrived from tokenizer. +Expecting only opening or closing container tokens or ternary tokens with sequence numbers. +EOM + } + + if ( $sign > 0 ) { + $self->[_Iss_opening_]->[$seqno] = @{$rSS}; + + # For efficiency, we find the maximum level of + # opening tokens of any type. The actual maximum + # level will be that of their contents which is 1 + # greater. That will be fixed in sub + # 'finish_formatting'. + my $level = $rlevels->[$j]; + if ( $level > $self->[_maximum_level_] ) { + $self->[_maximum_level_] = $level; + $self->[_maximum_level_at_line_] = $line_number; + } + } + else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} } + push @{$rSS}, $sign * $seqno; + + } + else { + $seqno = EMPTY_STRING unless ( defined($seqno) ); + } + + my @tokary; + @tokary[ + _TOKEN_, _TYPE_, _TYPE_SEQUENCE_, + _LEVEL_, _CI_LEVEL_, _LINE_INDEX_, + ] + = ( + $rtokens->[$j], $rtoken_type->[$j], $seqno, $rlevels->[$j], + $rci_levels->[$j], $line_number - 1, + ); + push @{$rLL}, \@tokary; + } ## end foreach my $j ( 0 .. $jmax ) + + # Need to remember if we can trim the input line + $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b'; + + # Values needed by Logger + $line_of_tokens->{_level_0} = $rlevels->[0]; + $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; + $line_of_tokens->{_nesting_blocks_0} = + $line_of_tokens_old->{_nesting_blocks_0}; + $line_of_tokens->{_nesting_tokens_0} = + $line_of_tokens_old->{_nesting_tokens_0}; + + return; + + } ## end sub write_line_inner_loop + +} ## end closure write_line + +############################################# +# CODE SECTION 5: Pre-process the entire file +############################################# + +sub finish_formatting { + + my ( $self, $severe_error ) = @_; + + # The file has been tokenized and is ready to be formatted. + # All of the relevant data is stored in $self, ready to go. + + # Some of the code in sub break_lists is not robust enough to process code + # with arbitrary brace errors. The simplest fix is to just return the file + # verbatim if there are brace errors. This fixes issue c160. + $severe_error ||= get_saw_brace_error(); + + # Check the maximum level. If it is extremely large we will give up and + # output the file verbatim. Note that the actual maximum level is 1 + # greater than the saved value, so we fix that here. + $self->[_maximum_level_] += 1; + my $maximum_level = $self->[_maximum_level_]; + my $maximum_table_index = $#maximum_line_length_at_level; + if ( !$severe_error && $maximum_level >= $maximum_table_index ) { + $severe_error ||= 1; + Warn(<{notidy} ) { $self->dump_verbatim(); - $self->wrapup(); + $self->wrapup($severe_error); return; } @@ -5521,44 +5814,56 @@ EOM $self->[_save_logfile_] = $logger_object->get_save_logfile(); } - my $rix_side_comments = $self->set_CODE_type(); + { + my $rix_side_comments = $self->set_CODE_type(); - $self->find_non_indenting_braces($rix_side_comments); + $self->find_non_indenting_braces($rix_side_comments); - # Handle any requested side comment deletions. It is easier to get - # this done here rather than farther down the pipeline because IO - # lines take a different route, and because lines with deleted HSC - # become BL lines. We have already handled any tee requests in sub - # getline, so it is safe to delete side comments now. - $self->delete_side_comments($rix_side_comments) - if ( $rOpts_delete_side_comments - || $rOpts_delete_closing_side_comments ); + # Handle any requested side comment deletions. It is easier to get + # this done here rather than farther down the pipeline because IO + # lines take a different route, and because lines with deleted HSC + # become BL lines. We have already handled any tee requests in sub + # getline, so it is safe to delete side comments now. + $self->delete_side_comments($rix_side_comments) + if ( $rOpts_delete_side_comments + || $rOpts_delete_closing_side_comments ); + } # Verify that the line hash does not have any unknown keys. $self->check_line_hashes() if (DEVEL_MODE); - # Make a pass through all tokens, adding or deleting any whitespace as - # required. Also make any other changes, such as adding semicolons. - # All token changes must be made here so that the token data structure - # remains fixed for the rest of this iteration. - $self->respace_tokens(); + { + # Make a pass through all tokens, adding or deleting any whitespace as + # required. Also make any other changes, such as adding semicolons. + # All token changes must be made here so that the token data structure + # remains fixed for the rest of this iteration. + my ( $error, $rqw_lines ) = $self->respace_tokens(); + if ($error) { + $self->dump_verbatim(); + $self->wrapup(); + return; + } + + $self->find_multiline_qw($rqw_lines); + } + + $self->examine_vertical_tightness_flags(); $self->set_excluded_lp_containers(); - $self->find_multiline_qw(); - $self->keep_old_line_breaks(); # Implement any welding needed for the -wn or -cb options $self->weld_containers(); - $self->collapsed_lengths() + # Collect info needed to implement the -xlp style + $self->xlp_collapsed_lengths() if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses ); # Locate small nested blocks which should not be broken $self->mark_short_nested_blocks(); - $self->adjust_indentation_levels(); + $self->special_indentation_adjustments(); # Verify that the main token array looks OK. If this ever causes a fault # then place similar checks before the sub calls above to localize the @@ -5619,25 +5924,26 @@ sub set_CODE_type { my $ix_line = -1; foreach my $line_of_tokens ( @{$rlines} ) { $ix_line++; - my $input_line_no = $line_of_tokens->{_line_number}; - my $line_type = $line_of_tokens->{_line_type}; + my $line_type = $line_of_tokens->{_line_type}; my $Last_line_had_side_comment = $has_side_comment; if ($has_side_comment) { push @ix_side_comments, $ix_line - 1; + $has_side_comment = 0; } - $has_side_comment = 0; - next unless ( $line_type eq 'CODE' ); + my $last_CODE_type = $CODE_type; + $CODE_type = EMPTY_STRING; + + if ( $line_type ne 'CODE' ) { + next; + } my $Klast_prev = $Klast; my $rK_range = $line_of_tokens->{_rK_range}; ( $Kfirst, $Klast ) = @{$rK_range}; - my $last_CODE_type = $CODE_type; - $CODE_type = EMPTY_STRING; - my $input_line = $line_of_tokens->{_line_text}; my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1; @@ -5663,11 +5969,12 @@ sub set_CODE_type { ) { $In_format_skipping_section = 0; + my $input_line_no = $line_of_tokens->{_line_number}; write_logfile_entry( "Line $input_line_no: Exiting format-skipping section\n"); } $CODE_type = 'FS'; - goto NEXT; + next; } # Check for a continued quote.. @@ -5676,12 +5983,12 @@ sub set_CODE_type { # A line which is entirely a quote or pattern must go out # verbatim. Note: the \n is contained in $input_line. if ( $jmax <= 0 ) { - if ( ( $input_line =~ "\t" ) ) { + if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) { my $input_line_number = $line_of_tokens->{_line_number}; $self->note_embedded_tab($input_line_number); } $CODE_type = 'VB'; - goto NEXT; + next; } } @@ -5699,10 +6006,11 @@ sub set_CODE_type { ) { $In_format_skipping_section = 1; + my $input_line_no = $line_of_tokens->{_line_number}; write_logfile_entry( "Line $input_line_no: Entering format-skipping section\n"); $CODE_type = 'FS'; - goto NEXT; + next; } # ignore trailing blank tokens (they will get deleted later) @@ -5713,7 +6021,7 @@ sub set_CODE_type { # blank line.. if ( $jmax < 0 ) { $CODE_type = 'BL'; - goto NEXT; + next; } # Handle comments @@ -5765,7 +6073,7 @@ sub set_CODE_type { if ( $last_CODE_type eq 'HSC' ) { $has_side_comment = 1; $CODE_type = 'HSC'; - goto NEXT; + next; } # starting a new HSC chain? @@ -5799,14 +6107,14 @@ sub set_CODE_type { if ( !$follows_csc ) { $has_side_comment = 1; $CODE_type = 'HSC'; - goto NEXT; + next; } } } if ($is_static_block_comment) { $CODE_type = $no_leading_space ? 'SBCX' : 'SBC'; - goto NEXT; + next; } elsif ($Last_line_had_side_comment && !$rOpts_maximum_consecutive_blank_lines @@ -5817,11 +6125,11 @@ sub set_CODE_type { # cannot be inserted. There is related code in sub # 'process_line_of_CODE' $CODE_type = 'SBCX'; - goto NEXT; + next; } else { $CODE_type = 'BC'; - goto NEXT; + next; } } @@ -5829,12 +6137,12 @@ sub set_CODE_type { if ($rOpts_indent_only) { $CODE_type = 'IO'; - goto NEXT; + next; } if ( !$rOpts_add_newlines ) { $CODE_type = 'NIN'; - goto NEXT; + next; } # Patch needed for MakeMaker. Do not break a statement @@ -5868,10 +6176,10 @@ sub set_CODE_type { # This code type has lower priority than others $CODE_type = 'VER'; - goto NEXT; + next; } - - NEXT: + } + continue { $line_of_tokens->{_code_type} = $CODE_type; } @@ -5900,6 +6208,7 @@ sub find_non_indenting_braces { if ( $line_type ne 'CODE' ) { # shouldn't happen + DEVEL_MODE && Fault("unexpected line_type=$line_type\n"); next; } my $CODE_type = $line_of_tokens->{_code_type}; @@ -5908,6 +6217,7 @@ sub find_non_indenting_braces { unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) { # shouldn't happen + DEVEL_MODE && Fault("did not get a comment\n"); next; } next unless ( $Klast > $Kfirst ); # maybe HSC @@ -6096,13 +6406,146 @@ BEGIN { } +{ #<<< begin clousure respace_tokens + +my $rLL_new; # This will be the new array of tokens + +# These are variables in $self +my $rLL; +my $length_function; +my $is_encoded_data; + +my $K_closing_ternary; +my $K_opening_ternary; +my $rchildren_of_seqno; +my $rhas_broken_code_block; +my $rhas_broken_list; +my $rhas_broken_list_with_lec; +my $rhas_code_block; +my $rhas_list; +my $rhas_ternary; +my $ris_assigned_structure; +my $ris_broken_container; +my $ris_excluded_lp_container; +my $ris_list_by_seqno; +my $ris_permanently_broken; +my $rlec_count_by_seqno; +my $roverride_cab3; +my $rparent_of_seqno; +my $rtype_count_by_seqno; +my $rblock_type_of_seqno; + +my $K_opening_container; +my $K_closing_container; + +my %K_first_here_doc_by_seqno; + +my $last_nonblank_code_type; +my $last_nonblank_code_token; +my $last_nonblank_block_type; +my $last_last_nonblank_code_type; +my $last_last_nonblank_code_token; + +my %seqno_stack; +my %K_old_opening_by_seqno; +my $depth_next; +my $depth_next_max; + +my $cumulative_length; + +# Variables holding the current line info +my $Ktoken_vars; +my $Kfirst_old; +my $Klast_old; +my $Klast_old_code; +my $CODE_type; + +my $rwhitespace_flags; + +sub initialize_respace_tokens_closure { + + my ($self) = @_; + + $rLL_new = []; # This is the new array + + $rLL = $self->[_rLL_]; + $length_function = $self->[_length_function_]; + $is_encoded_data = $self->[_is_encoded_data_]; + + $K_closing_ternary = $self->[_K_closing_ternary_]; + $K_opening_ternary = $self->[_K_opening_ternary_]; + $rchildren_of_seqno = $self->[_rchildren_of_seqno_]; + $rhas_broken_code_block = $self->[_rhas_broken_code_block_]; + $rhas_broken_list = $self->[_rhas_broken_list_]; + $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; + $rhas_code_block = $self->[_rhas_code_block_]; + $rhas_list = $self->[_rhas_list_]; + $rhas_ternary = $self->[_rhas_ternary_]; + $ris_assigned_structure = $self->[_ris_assigned_structure_]; + $ris_broken_container = $self->[_ris_broken_container_]; + $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; + $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + $ris_permanently_broken = $self->[_ris_permanently_broken_]; + $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; + $roverride_cab3 = $self->[_roverride_cab3_]; + $rparent_of_seqno = $self->[_rparent_of_seqno_]; + $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; + $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + # Note that $K_opening_container and $K_closing_container have values + # defined in sub get_line() for the previous K indexes. They were needed + # in case option 'indent-only' was set, and we didn't get here. We no longer + # need those and will eliminate them now to avoid any possible mixing of + # old and new values. + $K_opening_container = $self->[_K_opening_container_] = {}; + $K_closing_container = $self->[_K_closing_container_] = {}; + + %K_first_here_doc_by_seqno = (); + + $last_nonblank_code_type = ';'; + $last_nonblank_code_token = ';'; + $last_nonblank_block_type = EMPTY_STRING; + $last_last_nonblank_code_type = ';'; + $last_last_nonblank_code_token = ';'; + + %seqno_stack = (); + %K_old_opening_by_seqno = (); # Note: old K index + $depth_next = 0; + $depth_next_max = 0; + + # we will be setting token lengths as we go + $cumulative_length = 0; + + $Ktoken_vars = undef; # the old K value of $rtoken_vars + $Kfirst_old = undef; # min K of old line + $Klast_old = undef; # max K of old line + $Klast_old_code = undef; # K of last token if side comment + $CODE_type = EMPTY_STRING; + + # Set the whitespace flags, which indicate the token spacing preference. + $rwhitespace_flags = $self->set_whitespace_flags(); + + return; + +} ## end sub initialize_respace_tokens_closure + sub respace_tokens { my $self = shift; - return if $rOpts->{'indent-only'}; + #-------------------------------------------------------------------------- # This routine is called once per file to do as much formatting as possible # before new line breaks are set. + #-------------------------------------------------------------------------- + + # Return parameters: + # Set $severe_error=true if processing must terminate immediately + my ( $severe_error, $rqw_lines ); + + # We change any spaces in --indent-only mode + if ( $rOpts->{'indent-only'} ) { + return ( $severe_error, $rqw_lines ); + } # This routine makes all necessary and possible changes to the tokenization # after the initial tokenization of the file. This is a tedious routine, @@ -6121,1160 +6564,704 @@ sub respace_tokens { # Method: The old tokens are copied one-by-one, with changes, from the old # linear storage array $rLL to a new array $rLL_new. - my $rLL = $self->[_rLL_]; - my $Klimit_old = $self->[_Klimit_]; - my $rlines = $self->[_rlines_]; - my $length_function = $self->[_length_function_]; - my $is_encoded_data = $self->[_is_encoded_data_]; + # (re-)initialize closure variables for this problem + $self->initialize_respace_tokens_closure(); - my $rLL_new = []; # This is the new array - my $rtoken_vars; - my $Ktoken_vars; # the old K value of $rtoken_vars - my ( $Kfirst_old, $Klast_old ); # Range of old line - my $Klast_old_code; # K of last token if side comment - my $Kmax = @{$rLL} - 1; - - my $CODE_type = EMPTY_STRING; + #-------------------------------- + # Main over all lines of the file + #-------------------------------- + my $rlines = $self->[_rlines_]; my $line_type = EMPTY_STRING; + my $last_K_out; - # Set the whitespace flags, which indicate the token spacing preference. - my $rwhitespace_flags = $self->set_whitespace_flags(); + foreach my $line_of_tokens ( @{$rlines} ) { - # we will be setting token lengths as we go - my $cumulative_length = 0; + my $input_line_number = $line_of_tokens->{_line_number}; + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + my $last_CODE_type = $CODE_type; + $CODE_type = $line_of_tokens->{_code_type}; - my %seqno_stack; - my %K_old_opening_by_seqno = (); # Note: old K index - my $depth_next = 0; - my $depth_next_max = 0; + if ( $CODE_type eq 'BL' ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $self->[_rblank_and_comment_count_]->{$seqno} += 1; + $self->set_permanently_broken($seqno) + if (!$ris_permanently_broken->{$seqno} + && $rOpts_maximum_consecutive_blank_lines ); + } + } - # Note that $K_opening_container and $K_closing_container have values - # defined in sub get_line() for the previous K indexes. They were needed - # in case option 'indent-only' was set, and we didn't get here. We no longer - # need those and will eliminate them now to avoid any possible mixing of - # old and new values. - my $K_opening_container = $self->[_K_opening_container_] = {}; - my $K_closing_container = $self->[_K_closing_container_] = {}; - - my $K_closing_ternary = $self->[_K_closing_ternary_]; - my $K_opening_ternary = $self->[_K_opening_ternary_]; - my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_]; - my $rchildren_of_seqno = $self->[_rchildren_of_seqno_]; - my $rhas_broken_code_block = $self->[_rhas_broken_code_block_]; - my $rhas_broken_list = $self->[_rhas_broken_list_]; - my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; - my $rhas_code_block = $self->[_rhas_code_block_]; - my $rhas_list = $self->[_rhas_list_]; - my $rhas_ternary = $self->[_rhas_ternary_]; - my $ris_assigned_structure = $self->[_ris_assigned_structure_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $ris_permanently_broken = $self->[_ris_permanently_broken_]; - my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; - my $roverride_cab3 = $self->[_roverride_cab3_]; - my $rparent_of_seqno = $self->[_rparent_of_seqno_]; - my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless defined($Kfirst); + ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast ); + $Klast_old_code = $Klast_old; - my $last_nonblank_code_type = ';'; - my $last_nonblank_code_token = ';'; - my $last_nonblank_block_type = EMPTY_STRING; - my $last_last_nonblank_code_type = ';'; - my $last_last_nonblank_code_token = ';'; + # Be sure an old K value is defined for sub store_token + $Ktoken_vars = $Kfirst; - my %K_first_here_doc_by_seqno; + # Check for correct sequence of token indexes... + # An error here means that sub write_line() did not correctly + # package the tokenized lines as it received them. If we + # get a fault here it has not output a continuous sequence + # of K values. Or a line of CODE may have been mis-marked as + # something else. There is no good way to continue after such an + # error. + if ( defined($last_K_out) ) { + if ( $Kfirst != $last_K_out + 1 ) { + Fault_Warn( + "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" + ); + $severe_error = 1; + return ( $severe_error, $rqw_lines ); + } + } + else { - my $set_permanently_broken = sub { - my ($seqno) = @_; - while ( defined($seqno) ) { - $ris_permanently_broken->{$seqno} = 1; - $seqno = $rparent_of_seqno->{$seqno}; + # The first token should always have been given index 0 by sub + # write_line() + if ( $Kfirst != 0 ) { + Fault("Program Bug: first K is $Kfirst but should be 0"); + } } - return; - }; - my $store_token = sub { - my ($item) = @_; + $last_K_out = $Klast; - # This will be the index of this item in the new array - my $KK_new = @{$rLL_new}; + # Handle special lines of code + if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { - #------------------------------------------------------------------ - # NOTE: called once per token so coding efficiency is critical here - #------------------------------------------------------------------ + # CODE_types are as follows. + # 'BL' = Blank Line + # 'VB' = Verbatim - line goes out verbatim + # 'FS' = Format Skipping - line goes out verbatim, no blanks + # 'IO' = Indent Only - only indentation may be changed + # 'NIN' = No Internal Newlines - line does not get broken + # 'HSC'=Hanging Side Comment - fix this hanging side comment + # 'BC'=Block Comment - an ordinary full line comment + # 'SBC'=Static Block Comment - a block comment which does not get + # indented + # 'SBCX'=Static Block Comment Without Leading Space + # 'VER'=VERSION statement + # '' or (undefined) - no restructions - my $type = $item->[_TYPE_]; - my $is_blank = $type eq 'b'; - my $block_type = EMPTY_STRING; + # For a hanging side comment we insert an empty quote before + # the comment so that it becomes a normal side comment and + # will be aligned by the vertical aligner + if ( $CODE_type eq 'HSC' ) { - # Do not output consecutive blanks. This situation should have been - # prevented earlier, but it is worth checking because later routines - # make this assumption. - if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) { - return; - } + # Safety Check: This must be a line with one token (a comment) + my $rvars_Kfirst = $rLL->[$Kfirst]; + if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) { - # check for a sequenced item (i.e., container or ?/:) - my $type_sequence = $item->[_TYPE_SEQUENCE_]; - my $token = $item->[_TOKEN_]; - if ($type_sequence) { + # Note that even if the flag 'noadd-whitespace' is set, we + # will make an exception here and allow a blank to be + # inserted to push the comment to the right. We can think + # of this as an adjustment of indentation rather than + # whitespace between tokens. This will also prevent the + # hanging side comment from getting converted to a block + # comment if whitespace gets deleted, as for example with + # the -extrude and -mangle options. + my $rcopy = + copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING ); + $self->store_token($rcopy); + $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE ); + $self->store_token($rcopy); + $self->store_token($rvars_Kfirst); + next; + } + else { - if ( $is_opening_token{$token} ) { + # This line was mis-marked by sub scan_comment. Catch in + # DEVEL_MODE, otherwise try to repair and keep going. + Fault( + "Program bug. A hanging side comment has been mismarked" + ) if (DEVEL_MODE); - $K_opening_container->{$type_sequence} = $KK_new; - $block_type = $rblock_type_of_seqno->{$type_sequence}; + $CODE_type = EMPTY_STRING; + $line_of_tokens->{_code_type} = $CODE_type; + } + } - # Fix for case b1100: Count a line ending in ', [' as having - # a line-ending comma. Otherwise, these commas can be hidden - # with something like --opening-square-bracket-right - if ( $last_nonblank_code_type eq ',' - && $Ktoken_vars == $Klast_old_code - && $Ktoken_vars > $Kfirst_old ) - { - $rlec_count_by_seqno->{$type_sequence}++; - } - - if ( $last_nonblank_code_type eq '=' - || $last_nonblank_code_type eq '=>' ) - { - $ris_assigned_structure->{$type_sequence} = - $last_nonblank_code_type; - } - - my $seqno_parent = $seqno_stack{ $depth_next - 1 }; - $seqno_parent = SEQ_ROOT unless defined($seqno_parent); - push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence; - $rparent_of_seqno->{$type_sequence} = $seqno_parent; - $seqno_stack{$depth_next} = $type_sequence; - $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars; - $depth_next++; - - if ( $depth_next > $depth_next_max ) { - $depth_next_max = $depth_next; - } - } - elsif ( $is_closing_token{$token} ) { - - $K_closing_container->{$type_sequence} = $KK_new; - $block_type = $rblock_type_of_seqno->{$type_sequence}; - - # Do not include terminal commas in counts - if ( $last_nonblank_code_type eq ',' - || $last_nonblank_code_type eq '=>' ) - { - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ($seqno) { - $rtype_count_by_seqno->{$seqno} - ->{$last_nonblank_code_type}--; - - if ( $Ktoken_vars == $Kfirst_old - && $last_nonblank_code_type eq ',' - && $rlec_count_by_seqno->{$seqno} ) - { - $rlec_count_by_seqno->{$seqno}--; - } - } - } - - # Update the stack... - $depth_next--; + # Copy tokens unchanged + foreach my $KK ( $Kfirst .. $Klast ) { + $Ktoken_vars = $KK; + $self->store_token( $rLL->[$KK] ); } - else { - - # For ternary, note parent but do not include as child - my $seqno_parent = $seqno_stack{ $depth_next - 1 }; - $seqno_parent = SEQ_ROOT unless defined($seqno_parent); - $rparent_of_seqno->{$type_sequence} = $seqno_parent; + next; + } - # These are not yet used but could be useful - if ( $token eq '?' ) { - $K_opening_ternary->{$type_sequence} = $KK_new; - } - elsif ( $token eq ':' ) { - $K_closing_ternary->{$type_sequence} = $KK_new; - } - else { + # Handle normal line.. - # We really shouldn't arrive here, just being cautious: - # The only sequenced types output by the tokenizer are the - # opening & closing containers and the ternary types. Each - # of those was checked above. So we would only get here - # if the tokenizer has been changed to mark some other - # tokens with sequence numbers. - if (DEVEL_MODE) { - Fault( -"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'" - ); - } - } + # Define index of last token before any side comment for comma counts + my $type_end = $rLL->[$Klast_old_code]->[_TYPE_]; + if ( ( $type_end eq '#' || $type_end eq 'b' ) + && $Klast_old_code > $Kfirst_old ) + { + $Klast_old_code--; + if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b' + && $Klast_old_code > $Kfirst_old ) + { + $Klast_old_code--; } } - # Find the length of this token. Later it may be adjusted if phantom - # or ignoring side comment lengths. - my $token_length = - $is_encoded_data - ? $length_function->($token) - : length($token); - - # handle comments - my $is_comment = $type eq '#'; - if ($is_comment) { - - # trim comments if necessary - my $ord = ord( substr( $token, -1, 1 ) ); + # Insert any essential whitespace between lines + # if last line was normal CODE. + # Patch for rt #125012: use K_previous_code rather than '_nonblank' + # because comments may disappear. + if ( $last_line_type eq 'CODE' ) { + my $type_next = $rLL->[$Kfirst]->[_TYPE_]; + my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; if ( - $ord > 0 - && ( $ord < ORD_PRINTABLE_MIN - || $ord > ORD_PRINTABLE_MAX ) - && $token =~ s/\s+$// + is_essential_whitespace( + $last_last_nonblank_code_token, + $last_last_nonblank_code_type, + $last_nonblank_code_token, + $last_nonblank_code_type, + $token_next, + $type_next, + ) ) { - $token_length = $length_function->($token); - $item->[_TOKEN_] = $token; - } - # Mark length of side comments as just 1 if sc lengths are ignored - if ( $rOpts_ignore_side_comment_lengths - && ( !$CODE_type || $CODE_type eq 'HSC' ) ) - { - $token_length = 1; - } - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ( defined($seqno) - && !$ris_permanently_broken->{$seqno} ) - { - $set_permanently_broken->($seqno); + # Copy this first token as blank, but use previous line number + my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE ); + $rcopy->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; + + # The level and ci_level of newly created spaces should be the + # same as the previous token. Otherwise blinking states can + # be created if the -lp mode is used. See similar coding in + # sub 'store_space_and_token'. Fixes cases b1109 b1110. + $rcopy->[_LEVEL_] = + $rLL_new->[-1]->[_LEVEL_]; + $rcopy->[_CI_LEVEL_] = + $rLL_new->[-1]->[_CI_LEVEL_]; + + $self->store_token($rcopy); } } - $item->[_TOKEN_LENGTH_] = $token_length; - - # and update the cumulative length - $cumulative_length += $token_length; + #----------------------------------------------- + # Inner loop to respace tokens on a line of code + #----------------------------------------------- - # Save the length sum to just AFTER this token - $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; + # The inner loop is in a separate sub for clarity + $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number ); - if ( !$is_blank && !$is_comment ) { + } # End line loop - # Remember the most recent two non-blank, non-comment tokens. - # NOTE: the phantom semicolon code may change the output stack - # without updating these values. Phantom semicolons are considered - # the same as blanks for now, but future needs might change that. - # See the related note in sub '$add_phantom_semicolon'. - $last_last_nonblank_code_type = $last_nonblank_code_type; - $last_last_nonblank_code_token = $last_nonblank_code_token; + # finalize data structures + $self->respace_post_loop_ops(); - $last_nonblank_code_type = $type; - $last_nonblank_code_token = $token; - $last_nonblank_block_type = $block_type; + # Reset memory to be the new array + $self->[_rLL_] = $rLL_new; + my $Klimit; + if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 } + $self->[_Klimit_] = $Klimit; - # count selected types - if ( $is_counted_type{$type} ) { - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ( defined($seqno) ) { - $rtype_count_by_seqno->{$seqno}->{$type}++; + # During development, verify that the new array still looks okay. + DEVEL_MODE && $self->check_token_array(); - # Count line-ending commas for -bbx - if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) { - $rlec_count_by_seqno->{$seqno}++; - } + # update the token limits of each line + ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens(); - # Remember index of first here doc target - if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) { - $K_first_here_doc_by_seqno{$seqno} = $KK_new; - } - } - } - } + return ( $severe_error, $rqw_lines ); +} ## end sub respace_tokens - # For reference, here is how to get the parent sequence number. - # This is not used because it is slower than finding it on the fly - # in sub parent_seqno_by_K: +sub respace_tokens_inner_loop { - # my $seqno_parent = - # $type_sequence && $is_opening_token{$token} - # ? $seqno_stack{ $depth_next - 2 } - # : $seqno_stack{ $depth_next - 1 }; - # my $KK = @{$rLL_new}; - # $rseqno_of_parent_by_K->{$KK} = $seqno_parent; + my ( $self, $Kfirst, $Klast, $input_line_number ) = @_; - # and finally, add this item to the new array - push @{$rLL_new}, $item; - return; - }; + #----------------------------------------------------------------- + # Loop to copy all tokens on one line, making any spacing changes, + # while also collecting information needed by later subs. + #----------------------------------------------------------------- + foreach my $KK ( $Kfirst .. $Klast ) { - my $store_token_and_space = sub { - my ( $item, $want_space ) = @_; + # TODO: consider eliminating this closure var by passing directly to + # store_token following pattern of store_tokens_to_go. + $Ktoken_vars = $KK; - # store a token with preceding space if requested and needed + my $rtoken_vars = $rLL->[$KK]; + my $type = $rtoken_vars->[_TYPE_]; - # First store the space - if ( $want_space - && @{$rLL_new} - && $rLL_new->[-1]->[_TYPE_] ne 'b' - && $rOpts_add_whitespace ) - { - my $rcopy = [ @{$item} ]; - $rcopy->[_TYPE_] = 'b'; - $rcopy->[_TOKEN_] = SPACE; - $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING; + # Handle a blank space ... + if ( $type eq 'b' ) { - $rcopy->[_LINE_INDEX_] = - $rLL_new->[-1]->[_LINE_INDEX_]; + # Delete it if not wanted by whitespace rules + # or we are deleting all whitespace + # Note that whitespace flag is a flag indicating whether a + # white space BEFORE the token is needed + next if ( $KK >= $Klast ); # skip terminal blank + my $Knext = $KK + 1; - # Patch 23-Jan-2021 to fix -lp blinkers: - # The level and ci_level of newly created spaces should be the same - # as the previous token. Otherwise the coding for the -lp option - # can create a blinking state in some rare cases. - $rcopy->[_LEVEL_] = - $rLL_new->[-1]->[_LEVEL_]; - $rcopy->[_CI_LEVEL_] = - $rLL_new->[-1]->[_CI_LEVEL_]; + if ($rOpts_freeze_whitespace) { + $self->store_token($rtoken_vars); + next; + } - $store_token->($rcopy); - } + my $ws = $rwhitespace_flags->[$Knext]; + if ( $ws == -1 + || $rOpts_delete_old_whitespace ) + { - # then the token - $store_token->($item); - return; - }; + my $token_next = $rLL->[$Knext]->[_TOKEN_]; + my $type_next = $rLL->[$Knext]->[_TYPE_]; - my $add_phantom_semicolon = sub { + my $do_not_delete = is_essential_whitespace( + $last_last_nonblank_code_token, + $last_last_nonblank_code_type, + $last_nonblank_code_token, + $last_nonblank_code_type, + $token_next, + $type_next, + ); - my ($KK) = @_; + # Note that repeated blanks will get filtered out here + next unless ($do_not_delete); + } - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - return unless ( defined($Kp) ); + # make it just one character + $rtoken_vars->[_TOKEN_] = SPACE; + $self->store_token($rtoken_vars); + next; + } - # we are only adding semicolons for certain block types - my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - return unless ($type_sequence); - my $block_type = $rblock_type_of_seqno->{$type_sequence}; - return unless ($block_type); - return - unless ( $ok_to_add_semicolon_for_block_type{$block_type} - || $block_type =~ /^(sub|package)/ - || $block_type =~ /^\w+\:$/ ); + my $token = $rtoken_vars->[_TOKEN_]; - my $type_p = $rLL_new->[$Kp]->[_TYPE_]; - my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; - my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; + # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? : + if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) { - # Do not add a semicolon if... - return - if ( + # One of ) ] } ... + if ( $is_closing_token{$token} ) { - # it would follow a comment (and be isolated) - $type_p eq '#' + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + my $block_type = $rblock_type_of_seqno->{$type_sequence}; - # it follows a code block ( because they are not always wanted - # there and may add clutter) - || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p} + #--------------------------------------------- + # check for semicolon addition in a code block + #--------------------------------------------- + if ($block_type) { - # it would follow a label - || $type_p eq 'J' + # if not preceded by a ';' .. + if ( $last_nonblank_code_type ne ';' ) { - # it would be inside a 'format' statement (and cause syntax error) - || ( $type_p eq 'k' - && $token_p =~ /format/ ) + # tentatively insert a semicolon if appropriate + $self->add_phantom_semicolon($KK) + if $rOpts->{'add-semicolons'}; + } + } - ); + #---------------------------------------------------------- + # check for addition/deletion of a trailing comma in a list + #---------------------------------------------------------- + else { - # Do not add a semicolon if it would impede a weld with an immediately - # following closing token...like this - # { ( some code ) } - # ^--No semicolon can go here + # if this is a list .. + my $rtype_count = $rtype_count_by_seqno->{$type_sequence}; + if ( $rtype_count + && $rtype_count->{','} + && !$rtype_count->{';'} + && !$rtype_count->{'f'} ) + { - # look at the previous token... note use of the _NEW rLL array here, - # but sequence numbers are invariant. - my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; + # if NOT preceded by a comma.. + if ( $last_nonblank_code_type ne ',' ) { - # If it is also a CLOSING token we have to look closer... - if ( - $seqno_inner - && $is_closing_token{$token_p} + # insert a comma if requested + if ( $rOpts_add_trailing_commas + && %trailing_comma_rules ) + { + $self->add_trailing_comma( $KK, $Kfirst, + $trailing_comma_rules{$token} ); + } + } - # we only need to look if there is just one inner container.. - && defined( $rchildren_of_seqno->{$type_sequence} ) - && @{ $rchildren_of_seqno->{$type_sequence} } == 1 - ) - { + # if preceded by a comma .. + else { - # Go back and see if the corresponding two OPENING tokens are also - # together. Note that we are using the OLD K indexing here: - my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence}; - if ( defined($K_outer_opening) ) { - my $K_nxt = $self->K_next_nonblank($K_outer_opening); - if ( defined($K_nxt) ) { - my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_]; + # delete a trailing comma if requested + my $deleted; + if ( $rOpts_delete_trailing_commas + && %trailing_comma_rules ) + { + $deleted = + $self->delete_trailing_comma( $KK, $Kfirst, + $trailing_comma_rules{$token} ); + } - # Is the next token after the outer opening the same as - # our inner closing (i.e. same sequence number)? - # If so, do not insert a semicolon here. - return if ( $seqno_nxt && $seqno_nxt == $seqno_inner ); + # delete a weld-interfering comma if requested + if ( !$deleted + && $rOpts_delete_weld_interfering_commas + && $is_closing_type{ + $last_last_nonblank_code_type} ) + { + $self->delete_weld_interfering_comma($KK); + } + } + } } } } - # We will insert an empty semicolon here as a placeholder. Later, if - # it becomes the last token on a line, we will bring it to life. The - # advantage of doing this is that (1) we just have to check line - # endings, and (2) the phantom semicolon has zero width and therefore - # won't cause needless breaks of one-line blocks. - my $Ktop = -1; - if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' - && $want_left_space{';'} == WS_NO ) - { + # Modify certain tokens here for whitespace + # The following is not yet done, but could be: + # sub (x x x) + # ( $type =~ /^[wit]$/ ) + elsif ( $is_wit{$type} ) { + + # change '$ var' to '$var' etc + # change '@ ' to '@' + # Examples: <> + my $ord = ord( substr( $token, 1, 1 ) ); + if ( - # convert the blank into a semicolon.. - # be careful: we are working on the new stack top - # on a token which has been stored. - my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); + # quick test for possible blank at second char + $ord > 0 && ( $ord < ORD_PRINTABLE_MIN + || $ord > ORD_PRINTABLE_MAX ) + ) + { + my ( $sigil, $word ) = split /\s+/, $token, 2; - # Convert the existing blank to: - # a phantom semicolon for one_line_block option = 0 or 1 - # a real semicolon for one_line_block option = 2 - my $tok = EMPTY_STRING; - my $len_tok = 0; - if ( $rOpts_one_line_block_semicolons == 2 ) { - $tok = ';'; - $len_tok = 1; + # $sigil =~ /^[\$\&\%\*\@]$/ ) + if ( $is_sigil{$sigil} ) { + $token = $sigil; + $token .= $word if ( defined($word) ); # fix c104 + $rtoken_vars->[_TOKEN_] = $token; + } } - $rLL_new->[$Ktop]->[_TOKEN_] = $tok; - $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok; - $rLL_new->[$Ktop]->[_TYPE_] = ';'; + # Trim certain spaces in identifiers + if ( $type eq 'i' ) { - # NOTE: we are changing the output stack without updating variables - # $last_nonblank_code_type, etc. Future needs might require that - # those variables be updated here. For now, it seems ok to skip - # this. + if ( + ( + substr( $token, 0, 3 ) eq 'sub' + || $rOpts_sub_alias_list + ) + && $token =~ /$SUB_PATTERN/ + ) + { - # Save list of new K indexes of phantom semicolons. - # This will be needed if we want to undo them for iterations in - # future coding. - push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; + # -spp = 0 : no space before opening prototype paren + # -spp = 1 : stable (follow input spacing) + # -spp = 2 : always space before opening prototype paren + if ( !defined($rOpts_space_prototype_paren) + || $rOpts_space_prototype_paren == 1 ) + { + ## default: stable + } + elsif ( $rOpts_space_prototype_paren == 0 ) { + $token =~ s/\s+\(/\(/; + } + elsif ( $rOpts_space_prototype_paren == 2 ) { + $token =~ s/\(/ (/; + } - # Then store a new blank - $store_token->($rcopy); - } - else { + # one space max, and no tabs + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; + } - # Patch for issue c078: keep line indexes in order. If the top - # token is a space that we are keeping (due to '-wls=';') then - # we have to check that old line indexes stay in order. - # In very rare - # instances in which side comments have been deleted and converted - # into blanks, we may have filtered down multiple blanks into just - # one. In that case the top blank may have a higher line number - # than the previous nonblank token. Although the line indexes of - # blanks are not really significant, we need to keep them in order - # in order to pass error checks. - if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) { - my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; - my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; - if ( $new_top_ix < $old_top_ix ) { - $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; - } - } - - my $rcopy = - copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING ); - $store_token->($rcopy); - push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; - } - return; - }; + # clean up spaces in package identifiers, like + # "package Bob::Dog;" + elsif ( substr( $token, 0, 7 ) eq 'package' + && $token =~ /^package\s/ ) + { + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; + } - my $check_Q = sub { + # trim identifiers of trailing blanks which can occur + # under some unusual circumstances, such as if the + # identifier 'witch' has trailing blanks on input here: + # + # sub + # witch + # () # prototype may be on new line ... + # ... + my $ord_ch = ord( substr( $token, -1, 1 ) ); + if ( - # Check that a quote looks okay - # This sub works but needs to by sync'd with the log file output - # before it can be used. - my ( $KK, $Kfirst, $line_number ) = @_; - my $token = $rLL->[$KK]->[_TOKEN_]; - $self->note_embedded_tab($line_number) if ( $token =~ "\t" ); + # quick check for possible ending space + $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN + || $ord_ch > ORD_PRINTABLE_MAX ) + ) + { + $token =~ s/\s+$//g; + $rtoken_vars->[_TOKEN_] = $token; + } + } + } - # The remainder of this routine looks for something like - # '$var = s/xxx/yyy/;' - # in case it should have been '$var =~ s/xxx/yyy/;' + # handle semicolons + elsif ( $type eq ';' ) { - # Start by looking for a token beginning with one of: s y m / tr - return - unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) } - || substr( $token, 0, 2 ) eq 'tr' ); + # Remove unnecessary semicolons, but not after bare + # blocks, where it could be unsafe if the brace is + # mis-tokenized. + if ( + $rOpts->{'delete-semicolons'} + && ( + ( + $last_nonblank_block_type + && $last_nonblank_code_type eq '}' + && ( + $is_block_without_semicolon{ + $last_nonblank_block_type} + || $last_nonblank_block_type =~ /$SUB_PATTERN/ + || $last_nonblank_block_type =~ /^\w+:$/ + ) + ) + || $last_nonblank_code_type eq ';' + ) + ) + { - # ... and preceded by one of: = == != - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - return unless ( defined($Kp) ); - my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; - return unless ( $is_unexpected_equals{$previous_nonblank_type} ); - my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; + # This looks like a deletable semicolon, but even if a + # semicolon can be deleted it is not necessarily best to do + # so. We apply these additional rules for deletion: + # - Always ok to delete a ';' at the end of a line + # - Never delete a ';' before a '#' because it would + # promote it to a block comment. + # - If a semicolon is not at the end of line, then only + # delete if it is followed by another semicolon or closing + # token. This includes the comment rule. It may take + # two passes to get to a final state, but it is a little + # safer. For example, keep the first semicolon here: + # eval { sub bubba { ok(0) }; ok(0) } || ok(1); + # It is not required but adds some clarity. + my $ok_to_delete = 1; + if ( $KK < $Klast ) { + my $Kn = $self->K_next_nonblank($KK); + if ( defined($Kn) && $Kn <= $Klast ) { + my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_]; + $ok_to_delete = $next_nonblank_token_type eq ';' + || $next_nonblank_token_type eq '}'; + } + } - my $previous_nonblank_type_2 = 'b'; - my $previous_nonblank_token_2 = EMPTY_STRING; - my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); - if ( defined($Kpp) ) { - $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; - $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; - } + # do not delete only nonblank token in a file + else { + my $Kp = $self->K_previous_code( undef, $rLL_new ); + my $Kn = $self->K_next_nonblank($KK); + $ok_to_delete = defined($Kn) || defined($Kp); + } - my $next_nonblank_token = EMPTY_STRING; - my $Kn = $KK + 1; - if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 } - if ( $Kn <= $Kmax ) { - $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; + if ($ok_to_delete) { + $self->note_deleted_semicolon($input_line_number); + next; + } + else { + write_logfile_entry("Extra ';'\n"); + } + } } - my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; - my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; + # Old patch to add space to something like "x10". + # Note: This is now done in the Tokenizer, but this code remains + # for reference. + elsif ( $type eq 'n' ) { + if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) { + $token =~ s/x/x /; + $rtoken_vars->[_TOKEN_] = $token; + if (DEVEL_MODE) { + Fault(<[_TOKEN_] = $token; + if ( $self->[_save_logfile_] && $token =~ /\t/ ) { + $self->note_embedded_tab($input_line_number); + } + if ( $rwhitespace_flags->[$KK] == WS_YES ) { + $self->store_space_and_token($rtoken_vars); + } + else { + $self->store_token($rtoken_vars); + } + next; + } ## end if ( $type eq 'q' ) - # followed by some kind of termination - # (but give complaint if we can not see far enough ahead) - && $next_nonblank_token =~ /^[; \)\}]$/ + # delete repeated commas if requested + elsif ( $type eq ',' ) { + if ( $last_nonblank_code_type eq ',' + && $rOpts->{'delete-repeated-commas'} ) + { + # Could note this deletion as a possible future update: + ## $self->note_deleted_comma($input_line_number); + next; + } - # scalar is not declared - ## =~ /^(my|our|local)$/ - && !( $type_0 eq 'k' && $is_my_our_local{$token_0} ) - ) - { - my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_]; - my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~'; - complain( -"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" - ); + # remember input line index of first comma if -wtc is used + if (%trailing_comma_rules) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) + && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} ) + ) + { + $self->[_rfirst_comma_line_index_]->{$seqno} = + $rtoken_vars->[_LINE_INDEX_]; + } + } } - return; - }; - - #------------------------------------------- - # Main loop to respace all lines of the file - #------------------------------------------- - my $last_K_out; - foreach my $line_of_tokens ( @{$rlines} ) { - - my $input_line_number = $line_of_tokens->{_line_number}; - my $last_line_type = $line_type; - $line_type = $line_of_tokens->{_line_type}; - next unless ( $line_type eq 'CODE' ); - my $last_CODE_type = $CODE_type; - $CODE_type = $line_of_tokens->{_code_type}; - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless defined($Kfirst); - ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast ); - $Klast_old_code = $Klast_old; + # change 'LABEL :' to 'LABEL:' + elsif ( $type eq 'J' ) { + $token =~ s/\s+//g; + $rtoken_vars->[_TOKEN_] = $token; + } - # Be sure an old K value is defined for sub $store_token - $Ktoken_vars = $Kfirst; + # check a quote for problems + elsif ( $type eq 'Q' ) { + $self->check_Q( $KK, $Kfirst, $input_line_number ) + if ( $self->[_save_logfile_] ); + } - # Check for correct sequence of token indexes... - # An error here means that sub write_line() did not correctly - # package the tokenized lines as it received them. If we - # get a fault here it has not output a continuous sequence - # of K values. Or a line of CODE may have been mis-marked as - # something else. There is no good way to continue after such an - # error. - # FIXME: Calling Fault will produce zero output; it would be best to - # find a way to dump the input file. - if ( defined($last_K_out) ) { - if ( $Kfirst != $last_K_out + 1 ) { - Fault( - "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" - ); - } + # Store this token with possible previous blank + if ( $rwhitespace_flags->[$KK] == WS_YES ) { + $self->store_space_and_token($rtoken_vars); } else { - - # The first token should always have been given index 0 by sub - # write_line() - if ( $Kfirst != 0 ) { - Fault("Program Bug: first K is $Kfirst but should be 0"); - } + $self->store_token($rtoken_vars); } - $last_K_out = $Klast; - # Handle special lines of code - if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { - - # CODE_types are as follows. - # 'BL' = Blank Line - # 'VB' = Verbatim - line goes out verbatim - # 'FS' = Format Skipping - line goes out verbatim, no blanks - # 'IO' = Indent Only - only indentation may be changed - # 'NIN' = No Internal Newlines - line does not get broken - # 'HSC'=Hanging Side Comment - fix this hanging side comment - # 'BC'=Block Comment - an ordinary full line comment - # 'SBC'=Static Block Comment - a block comment which does not get - # indented - # 'SBCX'=Static Block Comment Without Leading Space - # 'VER'=VERSION statement - # '' or (undefined) - no restructions + } # End token loop + return; +} ## end sub respace_tokens_inner_loop - # For a hanging side comment we insert an empty quote before - # the comment so that it becomes a normal side comment and - # will be aligned by the vertical aligner - if ( $CODE_type eq 'HSC' ) { +sub respace_post_loop_ops { - # Safety Check: This must be a line with one token (a comment) - my $rvars_Kfirst = $rLL->[$Kfirst]; - if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) { + my ($self) = @_; - # Note that even if the flag 'noadd-whitespace' is set, we - # will make an exception here and allow a blank to be - # inserted to push the comment to the right. We can think - # of this as an adjustment of indentation rather than - # whitespace between tokens. This will also prevent the - # hanging side comment from getting converted to a block - # comment if whitespace gets deleted, as for example with - # the -extrude and -mangle options. - my $rcopy = - copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING ); - $store_token->($rcopy); - $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE ); - $store_token->($rcopy); - $store_token->($rvars_Kfirst); - next; - } - else { + # Walk backwards through the tokens, making forward links to sequence items. + if ( @{$rLL_new} ) { + my $KNEXT; + foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) { + $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT; + if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK } + } + $self->[_K_first_seq_item_] = $KNEXT; + } - # This line was mis-marked by sub scan_comment. Catch in - # DEVEL_MODE, otherwise try to repair and keep going. - Fault( - "Program bug. A hanging side comment has been mismarked" - ) if (DEVEL_MODE); + # Find and remember lists by sequence number + my %is_C_style_for; + foreach my $seqno ( keys %{$K_opening_container} ) { + my $K_opening = $K_opening_container->{$seqno}; + next unless defined($K_opening); - $CODE_type = EMPTY_STRING; - $line_of_tokens->{_code_type} = $CODE_type; - } - } + # code errors may leave undefined closing tokens + my $K_closing = $K_closing_container->{$seqno}; + next unless defined($K_closing); - if ( $CODE_type eq 'BL' ) { - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ( defined($seqno) - && !$ris_permanently_broken->{$seqno} - && $rOpts_maximum_consecutive_blank_lines ) - { - $set_permanently_broken->($seqno); - } - } + my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_]; + my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_]; + my $line_diff = $lx_close - $lx_open; + $ris_broken_container->{$seqno} = $line_diff; - # Copy tokens unchanged - foreach my $KK ( $Kfirst .. $Klast ) { - $Ktoken_vars = $KK; - $store_token->( $rLL->[$KK] ); + # See if this is a list + my $is_list; + my $rtype_count = $rtype_count_by_seqno->{$seqno}; + if ($rtype_count) { + my $comma_count = $rtype_count->{','}; + my $fat_comma_count = $rtype_count->{'=>'}; + my $semicolon_count = $rtype_count->{';'}; + if ( $rtype_count->{'f'} ) { + $semicolon_count += $rtype_count->{'f'}; + $is_C_style_for{$seqno} = 1; } - next; - } - # Handle normal line.. + # We will define a list to be a container with one or more commas + # and no semicolons. Note that we have included the semicolons + # in a 'for' container in the semicolon count to keep c-style for + # statements from being formatted as lists. + if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) { + $is_list = 1; - # Define index of last token before any side comment for comma counts - my $type_end = $rLL->[$Klast_old_code]->[_TYPE_]; - if ( ( $type_end eq '#' || $type_end eq 'b' ) - && $Klast_old_code > $Kfirst_old ) - { - $Klast_old_code--; - if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b' - && $Klast_old_code > $Kfirst_old ) - { - $Klast_old_code--; + # We need to do one more check for a parenthesized list: + # At an opening paren following certain tokens, such as 'if', + # we do not want to format the contents as a list. + if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) { + my $Kp = $self->K_previous_code( $K_opening, $rLL_new ); + if ( defined($Kp) ) { + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; + $is_list = + $type_p eq 'k' + ? !$is_nonlist_keyword{$token_p} + : !$is_nonlist_type{$type_p}; + } + } } } - # Insert any essential whitespace between lines - # if last line was normal CODE. - # Patch for rt #125012: use K_previous_code rather than '_nonblank' - # because comments may disappear. - if ( $last_line_type eq 'CODE' ) { - my $type_next = $rLL->[$Kfirst]->[_TYPE_]; - my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; - if ( - is_essential_whitespace( - $last_last_nonblank_code_token, - $last_last_nonblank_code_type, - $last_nonblank_code_token, - $last_nonblank_code_type, - $token_next, - $type_next, - ) - ) - { - - # Copy this first token as blank, but use previous line number - my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE ); - $rcopy->[_LINE_INDEX_] = - $rLL_new->[-1]->[_LINE_INDEX_]; + # Look for a block brace marked as uncertain. If the tokenizer thinks + # its guess is uncertain for the type of a brace following an unknown + # bareword then it adds a trailing space as a signal. We can fix the + # type here now that we have had a better look at the contents of the + # container. This fixes case b1085. To find the corresponding code in + # Tokenizer.pm search for 'b1085' with an editor. + my $block_type = $rblock_type_of_seqno->{$seqno}; + if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) { - # The level and ci_level of newly created spaces should be the - # same as the previous token. Otherwise blinking states can - # be created if the -lp mode is used. See similar coding in - # sub 'store_token_and_space'. Fixes cases b1109 b1110. - $rcopy->[_LEVEL_] = - $rLL_new->[-1]->[_LEVEL_]; - $rcopy->[_CI_LEVEL_] = - $rLL_new->[-1]->[_CI_LEVEL_]; + # Always remove the trailing space + $block_type =~ s/\s+$//; - $store_token->($rcopy); + # Try to filter out parenless sub calls + my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new ); + my $Knn2; + if ( defined($Knn1) ) { + $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ); } - } - - #------------------------------------------------------- - # Loop to copy all tokens on this line, with any changes - #------------------------------------------------------- - my $type_sequence; - foreach my $KK ( $Kfirst .. $Klast ) { - $Ktoken_vars = $KK; - $rtoken_vars = $rLL->[$KK]; - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - my $last_type_sequence = $type_sequence; - $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - - # Handle a blank space ... - if ( $type eq 'b' ) { - - # Delete it if not wanted by whitespace rules - # or we are deleting all whitespace - # Note that whitespace flag is a flag indicating whether a - # white space BEFORE the token is needed - next if ( $KK >= $Klast ); # skip terminal blank - my $Knext = $KK + 1; - - if ($rOpts_freeze_whitespace) { - $store_token->($rtoken_vars); - next; - } - - my $ws = $rwhitespace_flags->[$Knext]; - if ( $ws == -1 - || $rOpts_delete_old_whitespace ) - { - - my $token_next = $rLL->[$Knext]->[_TOKEN_]; - my $type_next = $rLL->[$Knext]->[_TYPE_]; - - my $do_not_delete = is_essential_whitespace( - $last_last_nonblank_code_token, - $last_last_nonblank_code_type, - $last_nonblank_code_token, - $last_nonblank_code_type, - $token_next, - $type_next, - ); - - # Note that repeated blanks will get filtered out here - next unless ($do_not_delete); - } + my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b'; + my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b'; - # make it just one character - $rtoken_vars->[_TOKEN_] = SPACE; - $store_token->($rtoken_vars); - next; + # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) { + if ( $wU{$type_nn1} && $wiq{$type_nn2} ) { + $is_list = 0; } - # Handle a nonblank token... + # Convert to a hash brace if it looks like it holds a list + if ($is_list) { - if ($type_sequence) { + $block_type = EMPTY_STRING; - # Insert a tentative missing semicolon if the next token is - # a closing block brace - if ( - $type eq '}' - && $token eq '}' + $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1; + $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1; + } - # not preceded by a ';' - && $last_nonblank_code_type ne ';' + $rblock_type_of_seqno->{$seqno} = $block_type; + } - # and this is not a VERSION stmt (is all one line, we - # are not inserting semicolons on one-line blocks) - && $CODE_type ne 'VER' + # Handle a list container + if ( $is_list && !$block_type ) { + $ris_list_by_seqno->{$seqno} = $seqno; + my $seqno_parent = $rparent_of_seqno->{$seqno}; + my $depth = 0; + while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { + $depth++; - # and we are allowed to add semicolons - && $rOpts->{'add-semicolons'} - ) + # for $rhas_list we need to save the minimum depth + if ( !$rhas_list->{$seqno_parent} + || $rhas_list->{$seqno_parent} > $depth ) { - $add_phantom_semicolon->($KK); + $rhas_list->{$seqno_parent} = $depth; } - } - - # Modify certain tokens here for whitespace - # The following is not yet done, but could be: - # sub (x x x) - # ( $type =~ /^[wit]$/ ) - elsif ( $is_wit{$type} ) { - # change '$ var' to '$var' etc - # change '@ ' to '@' - # Examples: <> - my $ord = ord( substr( $token, 1, 1 ) ); - if ( - - # quick test for possible blank at second char - $ord > 0 && ( $ord < ORD_PRINTABLE_MIN - || $ord > ORD_PRINTABLE_MAX ) - ) - { - my ( $sigil, $word ) = split /\s+/, $token, 2; - - # $sigil =~ /^[\$\&\%\*\@]$/ ) - if ( $is_sigil{$sigil} ) { - $token = $sigil; - $token .= $word if ( defined($word) ); # fix c104 - $rtoken_vars->[_TOKEN_] = $token; - } - } - - # Split identifiers with leading arrows, inserting blanks - # if necessary. It is easier and safer here than in the - # tokenizer. For example '->new' becomes two tokens, '->' - # and 'new' with a possible blank between. - # - # Note: there is a related patch in sub set_whitespace_flags - elsif (length($token) > 2 - && substr( $token, 0, 2 ) eq '->' - && $token =~ /^\-\>(.*)$/ - && $1 ) - { - - my $token_save = $1; - my $type_save = $type; - - # Change '-> new' to '->new' - $token_save =~ s/^\s+//g; - - # store a blank to left of arrow if necessary - my $Kprev = $self->K_previous_nonblank($KK); - if ( defined($Kprev) - && $rLL->[$Kprev]->[_TYPE_] ne 'b' - && $rOpts_add_whitespace - && $want_left_space{'->'} == WS_YES ) - { - my $rcopy = - copy_token_as_type( $rtoken_vars, 'b', SPACE ); - $store_token->($rcopy); - } - - # then store the arrow - my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' ); - $store_token->($rcopy); - - # store a blank after the arrow if requested - # added for issue git #33 - if ( $want_right_space{'->'} == WS_YES ) { - my $rcopy_b = - copy_token_as_type( $rtoken_vars, 'b', SPACE ); - $store_token->($rcopy_b); - } - - # then reset the current token to be the remainder, - # and reset the whitespace flag according to the arrow - $token = $rtoken_vars->[_TOKEN_] = $token_save; - $type = $rtoken_vars->[_TYPE_] = $type_save; - $store_token->($rtoken_vars); - next; - } - - # Trim certain spaces in identifiers - if ( $type eq 'i' ) { - - if ( - ( - substr( $token, 0, 3 ) eq 'sub' - || $rOpts_sub_alias_list - ) - && $token =~ /$SUB_PATTERN/ - ) - { - - # -spp = 0 : no space before opening prototype paren - # -spp = 1 : stable (follow input spacing) - # -spp = 2 : always space before opening prototype paren - my $spp = $rOpts->{'space-prototype-paren'}; - if ( defined($spp) ) { - if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; } - elsif ( $spp == 2 ) { $token =~ s/\(/ (/; } - } - - # one space max, and no tabs - $token =~ s/\s+/ /g; - $rtoken_vars->[_TOKEN_] = $token; - } - - # clean up spaces in package identifiers, like - # "package Bob::Dog;" - elsif ( substr( $token, 0, 7 ) eq 'package' - && $token =~ /^package\s/ ) - { - $token =~ s/\s+/ /g; - $rtoken_vars->[_TOKEN_] = $token; - } - - # trim identifiers of trailing blanks which can occur - # under some unusual circumstances, such as if the - # identifier 'witch' has trailing blanks on input here: - # - # sub - # witch - # () # prototype may be on new line ... - # ... - my $ord_ch = ord( substr( $token, -1, 1 ) ); - if ( - - # quick check for possible ending space - $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN - || $ord_ch > ORD_PRINTABLE_MAX ) - ) - { - $token =~ s/\s+$//g; - $rtoken_vars->[_TOKEN_] = $token; - } - } - } - - # handle semicolons - elsif ( $type eq ';' ) { - - # Remove unnecessary semicolons, but not after bare - # blocks, where it could be unsafe if the brace is - # mis-tokenized. - if ( - $rOpts->{'delete-semicolons'} - && ( - ( - $last_nonblank_block_type - && $last_nonblank_code_type eq '}' - && ( - $is_block_without_semicolon{ - $last_nonblank_block_type} - || $last_nonblank_block_type =~ /$SUB_PATTERN/ - || $last_nonblank_block_type =~ /^\w+:$/ - ) - ) - || $last_nonblank_code_type eq ';' - ) - ) - { - - # This looks like a deletable semicolon, but even if a - # semicolon can be deleted it is not necessarily best to do - # so. We apply these additional rules for deletion: - # - Always ok to delete a ';' at the end of a line - # - Never delete a ';' before a '#' because it would - # promote it to a block comment. - # - If a semicolon is not at the end of line, then only - # delete if it is followed by another semicolon or closing - # token. This includes the comment rule. It may take - # two passes to get to a final state, but it is a little - # safer. For example, keep the first semicolon here: - # eval { sub bubba { ok(0) }; ok(0) } || ok(1); - # It is not required but adds some clarity. - my $ok_to_delete = 1; - if ( $KK < $Klast ) { - my $Kn = $self->K_next_nonblank($KK); - if ( defined($Kn) && $Kn <= $Klast ) { - my $next_nonblank_token_type = - $rLL->[$Kn]->[_TYPE_]; - $ok_to_delete = $next_nonblank_token_type eq ';' - || $next_nonblank_token_type eq '}'; - } - } - - # do not delete only nonblank token in a file - else { - my $Kp = $self->K_previous_code( undef, $rLL_new ); - my $Kn = $self->K_next_nonblank($KK); - $ok_to_delete = defined($Kn) || defined($Kp); - } - - if ($ok_to_delete) { - $self->note_deleted_semicolon($input_line_number); - next; - } - else { - write_logfile_entry("Extra ';'\n"); - } - } - } - - # Old patch to add space to something like "x10". - # Note: This is now done in the Tokenizer, but this code remains - # for reference. - elsif ( $type eq 'n' ) { - if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) { - $token =~ s/x/x /; - $rtoken_vars->[_TOKEN_] = $token; - if (DEVEL_MODE) { - Fault(<[_TOKEN_] = $token; - $self->note_embedded_tab($input_line_number) - if ( $token =~ "\t" ); - $store_token_and_space->( - $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES - ); - next; - } ## end if ( $type eq 'q' ) - - # change 'LABEL :' to 'LABEL:' - elsif ( $type eq 'J' ) { - $token =~ s/\s+//g; - $rtoken_vars->[_TOKEN_] = $token; - } - - # check a quote for problems - elsif ( $type eq 'Q' ) { - $check_Q->( $KK, $Kfirst, $input_line_number ); - } - - # Store this token with possible previous blank - if ( $rwhitespace_flags->[$KK] == WS_YES ) { - $store_token_and_space->( $rtoken_vars, 1 ); - } - else { - $store_token->($rtoken_vars); - } - - } # End token loop - } # End line loop - - # Walk backwards through the tokens, making forward links to sequence items. - if ( @{$rLL_new} ) { - my $KNEXT; - foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) { - $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT; - if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK } - } - $self->[_K_first_seq_item_] = $KNEXT; - } - - # Find and remember lists by sequence number - foreach my $seqno ( keys %{$K_opening_container} ) { - my $K_opening = $K_opening_container->{$seqno}; - next unless defined($K_opening); - - # code errors may leave undefined closing tokens - my $K_closing = $K_closing_container->{$seqno}; - next unless defined($K_closing); - - my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_]; - my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_]; - my $line_diff = $lx_close - $lx_open; - $ris_broken_container->{$seqno} = $line_diff; - - # See if this is a list - my $is_list; - my $rtype_count = $rtype_count_by_seqno->{$seqno}; - if ($rtype_count) { - my $comma_count = $rtype_count->{','}; - my $fat_comma_count = $rtype_count->{'=>'}; - my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'}; - - # We will define a list to be a container with one or more commas - # and no semicolons. Note that we have included the semicolons - # in a 'for' container in the semicolon count to keep c-style for - # statements from being formatted as lists. - if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) { - $is_list = 1; - - # We need to do one more check for a parenthesized list: - # At an opening paren following certain tokens, such as 'if', - # we do not want to format the contents as a list. - if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) { - my $Kp = $self->K_previous_code( $K_opening, $rLL_new ); - if ( defined($Kp) ) { - my $type_p = $rLL_new->[$Kp]->[_TYPE_]; - if ( $type_p eq 'k' ) { - my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; - $is_list = 0 if ( $is_nonlist_keyword{$token_p} ); - } - else { - $is_list = 0 if ( $is_nonlist_type{$type_p} ); - } - } - } - } - } - - # Look for a block brace marked as uncertain. If the tokenizer thinks - # its guess is uncertain for the type of a brace following an unknown - # bareword then it adds a trailing space as a signal. We can fix the - # type here now that we have had a better look at the contents of the - # container. This fixes case b1085. To find the corresponding code in - # Tokenizer.pm search for 'b1085' with an editor. - my $block_type = $rblock_type_of_seqno->{$seqno}; - if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) { - - # Always remove the trailing space - $block_type =~ s/\s+$//; - - # Try to filter out parenless sub calls - my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new ); - my $Knn2; - if ( defined($Knn1) ) { - $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ); - } - my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b'; - my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b'; - - # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) { - if ( $wU{$type_nn1} && $wiq{$type_nn2} ) { - $is_list = 0; - } - - # Convert to a hash brace if it looks like it holds a list - if ($is_list) { - - $block_type = EMPTY_STRING; - - $rLL_new->[$K_opening]->[_CI_LEVEL_] = 1; - $rLL_new->[$K_closing]->[_CI_LEVEL_] = 1; - } - - $rblock_type_of_seqno->{$seqno} = $block_type; - } - - # Handle a list container - if ( $is_list && !$block_type ) { - $ris_list_by_seqno->{$seqno} = $seqno; - my $seqno_parent = $rparent_of_seqno->{$seqno}; - my $depth = 0; - while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { - $depth++; - - # for $rhas_list we need to save the minimum depth - if ( !$rhas_list->{$seqno_parent} - || $rhas_list->{$seqno_parent} > $depth ) - { - $rhas_list->{$seqno_parent} = $depth; - } - - if ($line_diff) { - $rhas_broken_list->{$seqno_parent} = 1; + if ($line_diff) { + $rhas_broken_list->{$seqno_parent} = 1; # Patch1: We need to mark broken lists with non-terminal # line-ending commas for the -bbx=2 parameter. This insures @@ -7357,1162 +7344,2185 @@ EOM } } - # Reset memory to be the new array - $self->[_rLL_] = $rLL_new; - my $Klimit; - if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 } - $self->[_Klimit_] = $Klimit; - - # During development, verify that the new array still looks okay. - DEVEL_MODE && $self->check_token_array(); + # Add -ci to C-style for loops (issue c154) + # This is much easier to do here than in the tokenizer. + foreach my $seqno ( keys %is_C_style_for ) { + my $K_opening = $K_opening_container->{$seqno}; + my $K_closing = $K_closing_container->{$seqno}; + my $type_last = 'f'; + for my $KK ( $K_opening + 1 .. $K_closing - 1 ) { + $rLL_new->[$KK]->[_CI_LEVEL_] = $type_last eq 'f' ? 0 : 1; + my $type = $rLL_new->[$KK]->[_TYPE_]; + if ( $type ne 'b' && $type ne '#' ) { $type_last = $type } + } + } - # reset the token limits of each line - $self->resync_lines_and_tokens(); + return; +} ## end sub respace_post_loop_ops +sub set_permanently_broken { + my ( $self, $seqno ) = @_; + while ( defined($seqno) ) { + $ris_permanently_broken->{$seqno} = 1; + $seqno = $rparent_of_seqno->{$seqno}; + } return; -} ## end sub respace_tokens +} ## end sub set_permanently_broken -sub copy_token_as_type { +sub store_token { - # This provides a quick way to create a new token by - # slightly modifying an existing token. - my ( $rold_token, $type, $token ) = @_; - if ( $type eq 'b' ) { - $token = SPACE unless defined($token); - } - elsif ( $type eq 'q' ) { - $token = EMPTY_STRING unless defined($token); - } - elsif ( $type eq '->' ) { - $token = '->' unless defined($token); - } - elsif ( $type eq ';' ) { - $token = ';' unless defined($token); - } - else { + my ( $self, $item ) = @_; - # Unexpected type ... this sub will work as long as both $token and - # $type are defined, but we should catch any unexpected types during - # development. - if (DEVEL_MODE) { - Fault(<' or ';' -EOM - } - else { - # shouldn't happen - } - } + #------------------------------------------ + # Store one token during respace operations + #------------------------------------------ - my @rnew_token = @{$rold_token}; - $rnew_token[_TYPE_] = $type; - $rnew_token[_TOKEN_] = $token; - $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING; - return \@rnew_token; -} ## end sub copy_token_as_type + # Input parameter: + # $item = ref to a token -sub Debug_dump_tokens { + # NOTE: this sub is called once per token so coding efficiency is critical. - # a debug routine, not normally used - my ( $self, $msg ) = @_; - my $rLL = $self->[_rLL_]; - my $nvars = @{$rLL}; - print STDERR "$msg\n"; - print STDERR "ntokens=$nvars\n"; - print STDERR "K\t_TOKEN_\t_TYPE_\n"; - my $K = 0; + # The next multiple assignment statements are significantly faster than + # doing them one-by-one. + my ( - foreach my $item ( @{$rLL} ) { - print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n"; - $K++; - } - return; -} ## end sub Debug_dump_tokens + $type, + $token, + $type_sequence, -sub K_next_code { - my ( $self, $KK, $rLL ) = @_; + ) = @{$item}[ - # return the index K of the next nonblank, non-comment token - return unless ( defined($KK) && $KK >= 0 ); + _TYPE_, + _TOKEN_, + _TYPE_SEQUENCE_, - # use the standard array unless given otherwise - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - my $Knnb = $KK + 1; - while ( $Knnb < $Num ) { - if ( !defined( $rLL->[$Knnb] ) ) { + ]; - # We seem to have encountered a gap in our array. - # This shouldn't happen because sub write_line() pushed - # items into the $rLL array. - Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); + # Set the token length. Later it may be adjusted again if phantom or + # ignoring side comment lengths. + my $token_length = + $is_encoded_data ? $length_function->($token) : length($token); + + # handle blanks + if ( $type eq 'b' ) { + + # Do not output consecutive blanks. This situation should have been + # prevented earlier, but it is worth checking because later routines + # make this assumption. + if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) { return; } - if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' - && $rLL->[$Knnb]->[_TYPE_] ne '#' ) - { - return $Knnb; - } - $Knnb++; } - return; -} ## end sub K_next_code - -sub K_next_nonblank { - my ( $self, $KK, $rLL ) = @_; - - # return the index K of the next nonblank token, or - # return undef if none - return unless ( defined($KK) && $KK >= 0 ); - # The third arg allows this routine to be used on any array. This is - # useful in sub respace_tokens when we are copying tokens from an old $rLL - # to a new $rLL array. But usually the third arg will not be given and we - # will just use the $rLL array in $self. - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - my $Knnb = $KK + 1; - return unless ( $Knnb < $Num ); - return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); - return unless ( ++$Knnb < $Num ); - return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); + # handle comments + elsif ( $type eq '#' ) { - # Backup loop. Very unlikely to get here; it means we have neighboring - # blanks in the token stream. - $Knnb++; - while ( $Knnb < $Num ) { + # trim comments if necessary + my $ord = ord( substr( $token, -1, 1 ) ); + if ( + $ord > 0 + && ( $ord < ORD_PRINTABLE_MIN + || $ord > ORD_PRINTABLE_MAX ) + && $token =~ s/\s+$// + ) + { + $token_length = $length_function->($token); + $item->[_TOKEN_] = $token; + } - # Safety check, this fault shouldn't happen: The $rLL array is the - # main array of tokens, so all entries should be used. It is - # initialized in sub write_line, and then re-initialized by sub - # $store_token() within sub respace_tokens. Tokens are pushed on - # so there shouldn't be any gaps. - if ( !defined( $rLL->[$Knnb] ) ) { - Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); - return; + # Mark length of side comments as just 1 if sc lengths are ignored + if ( $rOpts_ignore_side_comment_lengths + && ( !$CODE_type || $CODE_type eq 'HSC' ) ) + { + $token_length = 1; + } + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $self->[_rblank_and_comment_count_]->{$seqno} += 1 + if ( $CODE_type eq 'BC' ); + $self->set_permanently_broken($seqno) + if !$ris_permanently_broken->{$seqno}; } - if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } - $Knnb++; } - return; -} ## end sub K_next_nonblank -sub K_previous_code { + # handle non-blanks and non-comments + else { - # return the index K of the previous nonblank, non-comment token - # Call with $KK=undef to start search at the top of the array - my ( $self, $KK, $rLL ) = @_; + my $block_type; - # use the standard array unless given otherwise - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - if ( !defined($KK) ) { $KK = $Num } - elsif ( $KK > $Num ) { + # check for a sequenced item (i.e., container or ?/:) + if ($type_sequence) { - # This fault can be caused by a programming error in which a bad $KK is - # given. The caller should make the first call with KK_new=undef to - # avoid this error. - Fault( -"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" - ) if (DEVEL_MODE); - return; - } - my $Kpnb = $KK - 1; - while ( $Kpnb >= 0 ) { - if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' - && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) - { - return $Kpnb; - } - $Kpnb--; - } - return; -} ## end sub K_previous_code + # This will be the index of this item in the new array + my $KK_new = @{$rLL_new}; -sub K_previous_nonblank { + if ( $is_opening_token{$token} ) { - # return index of previous nonblank token before item K; - # Call with $KK=undef to start search at the top of the array - my ( $self, $KK, $rLL ) = @_; + $K_opening_container->{$type_sequence} = $KK_new; + $block_type = $rblock_type_of_seqno->{$type_sequence}; - # use the standard array unless given otherwise - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - if ( !defined($KK) ) { $KK = $Num } - elsif ( $KK > $Num ) { + # Fix for case b1100: Count a line ending in ', [' as having + # a line-ending comma. Otherwise, these commas can be hidden + # with something like --opening-square-bracket-right + if ( $last_nonblank_code_type eq ',' + && $Ktoken_vars == $Klast_old_code + && $Ktoken_vars > $Kfirst_old ) + { + $rlec_count_by_seqno->{$type_sequence}++; + } - # This fault can be caused by a programming error in which a bad $KK is - # given. The caller should make the first call with KK_new=undef to - # avoid this error. - Fault( -"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" - ) if (DEVEL_MODE); - return; - } - my $Kpnb = $KK - 1; - return unless ( $Kpnb >= 0 ); - return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); - return unless ( --$Kpnb >= 0 ); - return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); + if ( $last_nonblank_code_type eq '=' + || $last_nonblank_code_type eq '=>' ) + { + $ris_assigned_structure->{$type_sequence} = + $last_nonblank_code_type; + } - # Backup loop. We should not get here unless some routine - # slipped repeated blanks into the token stream. - return unless ( --$Kpnb >= 0 ); - while ( $Kpnb >= 0 ) { - if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } - $Kpnb--; - } - return; -} ## end sub K_previous_nonblank + my $seqno_parent = $seqno_stack{ $depth_next - 1 }; + $seqno_parent = SEQ_ROOT unless defined($seqno_parent); + push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence; + $rparent_of_seqno->{$type_sequence} = $seqno_parent; + $seqno_stack{$depth_next} = $type_sequence; + $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars; + $depth_next++; -sub parent_seqno_by_K { + if ( $depth_next > $depth_next_max ) { + $depth_next_max = $depth_next; + } + } + elsif ( $is_closing_token{$token} ) { - # Return the sequence number of the parent container of token K, if any. + $K_closing_container->{$type_sequence} = $KK_new; + $block_type = $rblock_type_of_seqno->{$type_sequence}; - my ( $self, $KK ) = @_; - my $rLL = $self->[_rLL_]; + # Do not include terminal commas in counts + if ( $last_nonblank_code_type eq ',' + || $last_nonblank_code_type eq '=>' ) + { + $rtype_count_by_seqno->{$type_sequence} + ->{$last_nonblank_code_type}--; - # The task is to jump forward to the next container token - # and use the sequence number of either it or its parent. + if ( $Ktoken_vars == $Kfirst_old + && $last_nonblank_code_type eq ',' + && $rlec_count_by_seqno->{$type_sequence} ) + { + $rlec_count_by_seqno->{$type_sequence}--; + } + } - # For example, consider the following with seqno=5 of the '[' and ']' - # being called with index K of the first token of each line: + # Update the stack... + $depth_next--; + } + else { - # # result - # push @tests, # - - # [ # - - # sub { 99 }, 'do {&{%s} for 1,2}', # 5 - # '(&{})(&{})', undef, # 5 - # [ 2, 2, 0 ], 0 # 5 - # ]; # - + # For ternary, note parent but do not include as child + my $seqno_parent = $seqno_stack{ $depth_next - 1 }; + $seqno_parent = SEQ_ROOT unless defined($seqno_parent); + $rparent_of_seqno->{$type_sequence} = $seqno_parent; - # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For - # unbalanced files, last sequence number will either be undefined or it may - # be at a deeper level. In either case we will just return SEQ_ROOT to - # have a defined value and allow formatting to proceed. - my $parent_seqno = SEQ_ROOT; - my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - if ($type_sequence) { - $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; - } - else { - my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; - if ( defined($Kt) ) { - $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; - my $type = $rLL->[$Kt]->[_TYPE_]; + # These are not yet used but could be useful + if ( $token eq '?' ) { + $K_opening_ternary->{$type_sequence} = $KK_new; + } + elsif ( $token eq ':' ) { + $K_closing_ternary->{$type_sequence} = $KK_new; + } + else { - # if next container token is closing, it is the parent seqno - if ( $is_closing_type{$type} ) { - $parent_seqno = $type_sequence; + # We really shouldn't arrive here, just being cautious: + # The only sequenced types output by the tokenizer are the + # opening & closing containers and the ternary types. Each + # of those was checked above. So we would only get here + # if the tokenizer has been changed to mark some other + # tokens with sequence numbers. + if (DEVEL_MODE) { + Fault( +"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'" + ); + } + } } + } - # otherwise we want its parent container - else { - $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; + # Remember the most recent two non-blank, non-comment tokens. + # NOTE: the phantom semicolon code may change the output stack + # without updating these values. Phantom semicolons are considered + # the same as blanks for now, but future needs might change that. + # See the related note in sub 'add_phantom_semicolon'. + $last_last_nonblank_code_type = $last_nonblank_code_type; + $last_last_nonblank_code_token = $last_nonblank_code_token; + + $last_nonblank_code_type = $type; + $last_nonblank_code_token = $token; + $last_nonblank_block_type = $block_type; + + # count selected types + if ( $is_counted_type{$type} ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $rtype_count_by_seqno->{$seqno}->{$type}++; + + # Count line-ending commas for -bbx + if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) { + $rlec_count_by_seqno->{$seqno}++; + } + + # Remember index of first here doc target + if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) { + my $KK_new = @{$rLL_new}; + $K_first_here_doc_by_seqno{$seqno} = $KK_new; + } } } } - $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) ); - return $parent_seqno; -} ## end sub parent_seqno_by_K -sub is_in_block_by_i { - my ( $self, $i ) = @_; + # cumulative length is the length sum including this token + $cumulative_length += $token_length; - # returns true if - # token at i is contained in a BLOCK - # or is at root level - # or there is some kind of error (i.e. unbalanced file) - # returns false otherwise - return 1 if ( $i < 0 ); # shouldn't happen, bad call - my $seqno = $parent_seqno_to_go[$i]; - return 1 if ( !$seqno || $seqno eq SEQ_ROOT ); - return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} ); - return; -} ## end sub is_in_block_by_i + $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; + $item->[_TOKEN_LENGTH_] = $token_length; -sub is_in_list_by_i { - my ( $self, $i ) = @_; + # For reference, here is how to get the parent sequence number. + # This is not used because it is slower than finding it on the fly + # in sub parent_seqno_by_K: - # returns true if token at i is contained in a LIST - # returns false otherwise - my $seqno = $parent_seqno_to_go[$i]; - return unless ( $seqno && $seqno ne SEQ_ROOT ); - if ( $self->[_ris_list_by_seqno_]->{$seqno} ) { - return 1; - } + # my $seqno_parent = + # $type_sequence && $is_opening_token{$token} + # ? $seqno_stack{ $depth_next - 2 } + # : $seqno_stack{ $depth_next - 1 }; + # my $KK = @{$rLL_new}; + # $rseqno_of_parent_by_K->{$KK} = $seqno_parent; + + # and finally, add this item to the new array + push @{$rLL_new}, $item; return; -} ## end sub is_in_list_by_i +} ## end sub store_token -sub is_list_by_K { +sub store_space_and_token { + my ( $self, $item ) = @_; - # Return true if token K is in a list - my ( $self, $KK ) = @_; + # store a token with preceding space if requested and needed - my $parent_seqno = $self->parent_seqno_by_K($KK); - return unless defined($parent_seqno); - return $self->[_ris_list_by_seqno_]->{$parent_seqno}; -} + # First store the space + if ( @{$rLL_new} + && $rLL_new->[-1]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace ) + { + my $rcopy = [ @{$item} ]; + $rcopy->[_TYPE_] = 'b'; + $rcopy->[_TOKEN_] = SPACE; + $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING; -sub is_list_by_seqno { + $rcopy->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; - # Return true if the immediate contents of a container appears to be a - # list. - my ( $self, $seqno ) = @_; - return unless defined($seqno); - return $self->[_ris_list_by_seqno_]->{$seqno}; -} + # Patch 23-Jan-2021 to fix -lp blinkers: + # The level and ci_level of newly created spaces should be the same + # as the previous token. Otherwise the coding for the -lp option + # can create a blinking state in some rare cases. + $rcopy->[_LEVEL_] = + $rLL_new->[-1]->[_LEVEL_]; + $rcopy->[_CI_LEVEL_] = + $rLL_new->[-1]->[_CI_LEVEL_]; -sub resync_lines_and_tokens { + $self->store_token($rcopy); + } - my $self = shift; - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rlines = $self->[_rlines_]; - my @Krange_code_without_comments; - my @Klast_valign_code; + # then the token + $self->store_token($item); + return; +} ## end sub store_space_and_token - # Re-construct the arrays of tokens associated with the original input lines - # since they have probably changed due to inserting and deleting blanks - # and a few other tokens. +sub add_phantom_semicolon { - # This is the next token and its line index: - my $Knext = 0; - my $Kmax = defined($Klimit) ? $Klimit : -1; + my ( $self, $KK ) = @_; - # Verify that old line indexes are in still order. If this error occurs, - # check locations where sub 'respace_tokens' creates new tokens (like - # blank spaces). It must have set a bad old line index. - if ( DEVEL_MODE && defined($Klimit) ) { - my $iline = $rLL->[0]->[_LINE_INDEX_]; - foreach my $KK ( 1 .. $Klimit ) { - my $iline_last = $iline; - $iline = $rLL->[$KK]->[_LINE_INDEX_]; - if ( $iline < $iline_last ) { - my $KK_m = $KK - 1; - my $token_m = $rLL->[$KK_m]->[_TOKEN_]; - my $token = $rLL->[$KK]->[_TOKEN_]; - my $type_m = $rLL->[$KK_m]->[_TYPE_]; - my $type = $rLL->[$KK]->[_TYPE_]; - Fault(<K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); - my $iline = -1; - foreach my $line_of_tokens ( @{$rlines} ) { - $iline++; - my $line_type = $line_of_tokens->{_line_type}; - if ( $line_type eq 'CODE' ) { + # we are only adding semicolons for certain block types + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + return unless ($type_sequence); + my $block_type = $rblock_type_of_seqno->{$type_sequence}; + return unless ($block_type); + return + unless ( $ok_to_add_semicolon_for_block_type{$block_type} + || $block_type =~ /^(sub|package)/ + || $block_type =~ /^\w+\:$/ ); - # Get the old number of tokens on this line - my $rK_range_old = $line_of_tokens->{_rK_range}; - my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old}; - my $Kdiff_old = 0; - if ( defined($Kfirst_old) ) { - $Kdiff_old = $Klast_old - $Kfirst_old; - } + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; + my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; - # Find the range of NEW K indexes for the line: - # $Kfirst = index of first token on line - # $Klast = index of last token on line - my ( $Kfirst, $Klast ); + # Do not add a semicolon if... + return + if ( - my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens + # it would follow a comment (and be isolated) + $type_p eq '#' - # Optimization: Although the actual K indexes may be completely - # changed after respacing, the number of tokens on any given line - # will often be nearly unchanged. So we will see if we can start - # our search by guessing that the new line has the same number - # of tokens as the old line. - my $Knext_guess = $Knext + $Kdiff_old; - if ( $Knext_guess > $Knext - && $Knext_guess < $Kmax - && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline ) - { + # it follows a code block ( because they are not always wanted + # there and may add clutter) + || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p} - # the guess is good, so we can start our search here - $Knext = $Knext_guess + 1; - } + # it would follow a label + || $type_p eq 'J' - while ($Knext <= $Kmax - && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline ) - { - $Knext++; - } + # it would be inside a 'format' statement (and cause syntax error) + || ( $type_p eq 'k' + && $token_p =~ /format/ ) - if ( $Knext > $Knext_beg ) { + ); - $Klast = $Knext - 1; + # Do not add a semicolon if it would impede a weld with an immediately + # following closing token...like this + # { ( some code ) } + # ^--No semicolon can go here - # Delete any terminal blank token - if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 } + # look at the previous token... note use of the _NEW rLL array here, + # but sequence numbers are invariant. + my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; - if ( $Klast < $Knext_beg ) { - $Klast = undef; - } - else { + # If it is also a CLOSING token we have to look closer... + if ( + $seqno_inner + && $is_closing_token{$token_p} - $Kfirst = $Knext_beg; + # we only need to look if there is just one inner container.. + && defined( $rchildren_of_seqno->{$type_sequence} ) + && @{ $rchildren_of_seqno->{$type_sequence} } == 1 + ) + { - # Save ranges of non-comment code. This will be used by - # sub keep_old_line_breaks. - if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) { - push @Krange_code_without_comments, [ $Kfirst, $Klast ]; - } + # Go back and see if the corresponding two OPENING tokens are also + # together. Note that we are using the OLD K indexing here: + my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence}; + if ( defined($K_outer_opening) ) { + my $K_nxt = $self->K_next_nonblank($K_outer_opening); + if ( defined($K_nxt) ) { + my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_]; - # Only save ending K indexes of code types which are blank - # or 'VER'. These will be used for a convergence check. - # See related code in sub 'convey_batch_to_vertical_aligner' - my $CODE_type = $line_of_tokens->{_code_type}; - if ( !$CODE_type - || $CODE_type eq 'VER' ) - { - push @Klast_valign_code, $Klast; - } - } + # Is the next token after the outer opening the same as + # our inner closing (i.e. same sequence number)? + # If so, do not insert a semicolon here. + return if ( $seqno_nxt && $seqno_nxt == $seqno_inner ); } + } + } - # It is only safe to trim the actual line text if the input - # line had a terminal blank token. Otherwise, we may be - # in a quote. - if ( $line_of_tokens->{_ended_in_blank_token} ) { - $line_of_tokens->{_line_text} =~ s/\s+$//; - } - $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; + # We will insert an empty semicolon here as a placeholder. Later, if + # it becomes the last token on a line, we will bring it to life. The + # advantage of doing this is that (1) we just have to check line + # endings, and (2) the phantom semicolon has zero width and therefore + # won't cause needless breaks of one-line blocks. + my $Ktop = -1; + if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' + && $want_left_space{';'} == WS_NO ) + { - # Deleting semicolons can create new empty code lines - # which should be marked as blank - if ( !defined($Kfirst) ) { - my $CODE_type = $line_of_tokens->{_code_type}; - if ( !$CODE_type ) { - $line_of_tokens->{_code_type} = 'BL'; - } - } + # convert the blank into a semicolon.. + # be careful: we are working on the new stack top + # on a token which has been stored. + my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); + + # Convert the existing blank to: + # a phantom semicolon for one_line_block option = 0 or 1 + # a real semicolon for one_line_block option = 2 + my $tok = EMPTY_STRING; + my $len_tok = 0; + if ( $rOpts_one_line_block_semicolons == 2 ) { + $tok = ';'; + $len_tok = 1; } - } - # There shouldn't be any nodes beyond the last one. This routine is - # relinking lines and tokens after the tokens have been respaced. A fault - # here indicates some kind of bug has been introduced into the above loops. - # There is not good way to keep going; we better stop here. - # FIXME: This will produce zero output. it would be best to find a way to - # dump the input file. - if ( $Knext <= $Kmax ) { + $rLL_new->[$Ktop]->[_TOKEN_] = $tok; + $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok; + $rLL_new->[$Ktop]->[_TYPE_] = ';'; - Fault("unexpected tokens at end of file when reconstructing lines"); - } - $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments; + $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++; - # Setup the convergence test in the FileWriter based on line-ending indexes - my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->setup_convergence_test( \@Klast_valign_code ); + # NOTE: we are changing the output stack without updating variables + # $last_nonblank_code_type, etc. Future needs might require that + # those variables be updated here. For now, it seems ok to skip + # this. - # Mark essential old breakpoints if combination -iob -lp is used. These - # two options do not work well together, but we can avoid turning -iob off - # by ignoring -iob at certain essential line breaks. - # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 - if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) { - my %is_assignment_or_fat_comma = %is_assignment; - $is_assignment_or_fat_comma{'=>'} = 1; - my $ris_essential_old_breakpoint = - $self->[_ris_essential_old_breakpoint_]; - my ( $Kfirst, $Klast ); - foreach my $line_of_tokens ( @{$rlines} ) { - my $line_type = $line_of_tokens->{_line_type}; - if ( $line_type ne 'CODE' ) { - ( $Kfirst, $Klast ) = ( undef, undef ); - next; - } - my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast ); - ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} }; + # Then store a new blank + $self->store_token($rcopy); + } + else { - next unless defined($Klast_prev); - next unless defined($Kfirst); - my $type_last = $rLL->[$Klast_prev]->[_TOKEN_]; - my $type_first = $rLL->[$Kfirst]->[_TOKEN_]; - next - unless ( $is_assignment_or_fat_comma{$type_last} - || $is_assignment_or_fat_comma{$type_first} ); - $ris_essential_old_breakpoint->{$Klast_prev} = 1; + # Patch for issue c078: keep line indexes in order. If the top + # token is a space that we are keeping (due to '-wls=';') then + # we have to check that old line indexes stay in order. + # In very rare + # instances in which side comments have been deleted and converted + # into blanks, we may have filtered down multiple blanks into just + # one. In that case the top blank may have a higher line number + # than the previous nonblank token. Although the line indexes of + # blanks are not really significant, we need to keep them in order + # in order to pass error checks. + if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) { + my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; + my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; + if ( $new_top_ix < $old_top_ix ) { + $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; + } } + + my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING ); + $self->store_token($rcopy); } return; -} ## end sub resync_lines_and_tokens - -sub keep_old_line_breaks { +} ## end sub add_phantom_semicolon - # Called once per file to find and mark any old line breaks which - # should be kept. We will be translating the input hashes into - # token indexes. +sub add_trailing_comma { - # A flag is set as follows: - # = 1 make a hard break (flush the current batch) - # best for something like leading commas (-kbb=',') - # = 2 make a soft break (keep building current batch) - # best for something like leading -> + # Implement the --add-trailing-commas flag to the line end before index $KK: - my ($self) = @_; + my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_; - my $rLL = $self->[_rLL_]; - my $rKrange_code_without_comments = - $self->[_rKrange_code_without_comments_]; - my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_]; - my $rbreak_after_Klast = $self->[_rbreak_after_Klast_]; - my $rwant_container_open = $self->[_rwant_container_open_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + # Input parameter: + # $KK = index of closing token in old ($rLL) token list + # which starts a new line and is not preceded by a comma + # $Kfirst = index of first token on the current line of input tokens + # $add_flags = user control flags - # This code moved here from sub break_lists to fix b1120 - if ( $rOpts->{'break-at-old-method-breakpoints'} ) { - foreach my $item ( @{$rKrange_code_without_comments} ) { - my ( $Kfirst, $Klast ) = @{$item}; - my $type = $rLL->[$Kfirst]->[_TYPE_]; - my $token = $rLL->[$Kfirst]->[_TOKEN_]; + # For example, we might want to add a comma here: - # leading '->' use a value of 2 which causes a soft - # break rather than a hard break - if ( $type eq '->' ) { - $rbreak_before_Kfirst->{$Kfirst} = 2; - } + # bless { + # _name => $name, + # _price => $price, + # _rebate => $rebate <------ location of possible bare comma + # }, $pkg; + # ^-------------------closing token at index $KK on new line - # leading ')->' use a special flag to insure that both - # opening and closing parens get opened - # Fix for b1120: only for parens, not braces - elsif ( $token eq ')' ) { - my $Kn = $self->K_next_nonblank($Kfirst); - next - unless ( defined($Kn) - && $Kn <= $Klast - && $rLL->[$Kn]->[_TYPE_] eq '->' ); - my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_]; - next unless ($seqno); + # Do not add a comma if it would follow a comment + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + return if ( $type_p eq '#' ); - # Note: in previous versions there was a fix here to avoid - # instability between conflicting -bom and -pvt or -pvtc flags. - # The fix skipped -bom for a small line difference. But this - # was troublesome, and instead the fix has been moved to - # sub set_vertical_tightness_flags where priority is given to - # the -bom flag over -pvt and -pvtc flags. Both opening and - # closing paren flags are involved because even though -bom only - # requests breaking before the closing paren, automated logic - # opens the opening paren when the closing paren opens. - # Relevant cases are b977, b1215, b1270, b1303 + # see if the user wants a trailing comma here + my $match = + $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp, + $trailing_comma_rule, 1 ); - $rwant_container_open->{$seqno} = 1; - } - } + # if so, add a comma + if ($match) { + my $Knew = $self->store_new_token( ',', ',', $Kp ); } - return unless ( %keep_break_before_type || %keep_break_after_type ); + return; - my $check_for_break = sub { - my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_; - my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; +} ## end sub add_trailing_comma - # non-container tokens use the type as the key - if ( !$seqno ) { - my $type = $rLL->[$KK]->[_TYPE_]; - if ( $rkeep_break_hash->{$type} ) { - $rbreak_hash->{$KK} = 1; - } - } +sub delete_trailing_comma { - # container tokens use the token as the key - else { - my $token = $rLL->[$KK]->[_TOKEN_]; - my $flag = $rkeep_break_hash->{$token}; - if ($flag) { + my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_; - my $match = $flag eq '1' || $flag eq '*'; + # Apply the --delete-trailing-commas flag to the comma before index $KK - # check for special matching codes - if ( !$match ) { - if ( $token eq '(' || $token eq ')' ) { - $match = $self->match_paren_flag( $KK, $flag ); - } - elsif ( $token eq '{' || $token eq '}' ) { + # Input parameter: + # $KK = index of a closing token in OLD ($rLL) token list + # which is preceded by a comma on the same line. + # $Kfirst = index of first token on the current line of input tokens + # $delete_option = user control flag - # These tentative codes 'b' and 'B' for brace types are - # placeholders for possible future brace types. They - # are not documented and may be changed. - my $block_type = - $self->[_rblock_type_of_seqno_]->{$seqno}; - if ( $flag eq 'b' ) { $match = $block_type } - elsif ( $flag eq 'B' ) { $match = !$block_type } - else { - # unknown code - no match - } - } - } - $rbreak_hash->{$KK} = 1 if ($match); - } - } - }; - - foreach my $item ( @{$rKrange_code_without_comments} ) { - my ( $Kfirst, $Klast ) = @{$item}; - $check_for_break->( - $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst - ); - $check_for_break->( - $Klast, \%keep_break_after_type, $rbreak_after_Klast - ); - } - return; -} ## end sub keep_old_line_breaks + # Returns true if the comma was deleted -sub weld_containers { + # For example, we might want to delete this comma: + # my @asset = ("FASMX", "FASGX", "FASIX",); + # | |^--------token at index $KK + # | ^------comma of interest + # ^-------------token at $Kfirst - # Called once per file to do any welding operations requested by --weld* - # flags. - my ($self) = @_; + # Verify that the previous token is a comma. Note that we are working in + # the new token list $rLL_new. + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) { - # This count is used to eliminate needless calls for weld checks elsewhere - $total_weld_count = 0; + # there must be a '#' between the ',' and closing token; give up. + return; + } - return if ( $rOpts->{'indent-only'} ); - return unless ($rOpts_add_newlines); + # Do not delete commas when formatting under stress to avoid instability. + # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has + # been found to work well for trailing commas. + if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) { + return; + } - # Important: sub 'weld_cuddled_blocks' must be called before - # sub 'weld_nested_containers'. This is because the cuddled option needs to - # use the original _LEVEL_ values of containers, but the weld nested - # containers changes _LEVEL_ of welded containers. + # See if the user wants this trailing comma + my $match = + $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp, + $trailing_comma_rule, 0 ); - # Here is a good test case to be sure that both cuddling and welding - # are working and not interfering with each other: <> + # Patch: the --noadd-whitespace flag can cause instability in complex + # structures. In this case do not delete the comma. Fixes b1409. + if ( !$match && !$rOpts_add_whitespace ) { + my $Kn = $self->K_next_nonblank($KK); + if ( defined($Kn) ) { + my $type_n = $rLL->[$Kn]->[_TYPE_]; + if ( $type_n ne ';' && $type_n ne '#' ) { return } + } + } - # perltidy -wn -ce + # If no match, delete it + if ( !$match ) { - # if ($BOLD_MATH) { ( - # $labels, $comment, - # join( '', '', &make_math( $mode, '', '', $_ ), '' ) - # ) } else { ( - # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), - # $after - # ) } + return $self->unstore_last_nonblank_token(','); + } + return; - $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} ); +} ## end sub delete_trailing_comma - if ( $rOpts->{'weld-nested-containers'} ) { +sub delete_weld_interfering_comma { - $self->weld_nested_containers(); + my ( $self, $KK ) = @_; - $self->weld_nested_quotes(); - } + # Apply the flag '--delete-weld-interfering-commas' to the comma + # before index $KK - #------------------------------------------------------------- - # All welding is done. Finish setting up weld data structures. - #------------------------------------------------------------- + # Input parameter: + # $KK = index of a closing token in OLD ($rLL) token list + # which is preceded by a comma on the same line. - my $rLL = $self->[_rLL_]; - my $rK_weld_left = $self->[_rK_weld_left_]; - my $rK_weld_right = $self->[_rK_weld_right_]; - my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_]; + # Returns true if the comma was deleted - my @K_multi_weld; - my @keys = keys %{$rK_weld_right}; - $total_weld_count = @keys; + # For example, we might want to delete this comma: - # First pass to process binary welds. - # This loop is processed in unsorted order for efficiency. - foreach my $Kstart (@keys) { - my $Kend = $rK_weld_right->{$Kstart}; + # my $tmpl = { foo => {no_override => 1, default => 42}, }; + # || ^------$KK + # |^---$Kp + # $Kpp---^ + # + # Note that: + # index $KK is in the old $rLL array, but + # indexes $Kp and $Kpp are in the new $rLL_new array. - # An error here would be due to an incorrect initialization introduced - # in one of the above weld routines, like sub weld_nested. - if ( $Kend <= $Kstart ) { - Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n") - if (DEVEL_MODE); - next; - } + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + return unless ($type_sequence); - # Set weld values for all tokens this welded pair - foreach ( $Kstart + 1 .. $Kend ) { - $rK_weld_left->{$_} = $Kstart; - } - foreach my $Kx ( $Kstart .. $Kend - 1 ) { - $rK_weld_right->{$Kx} = $Kend; - $rweld_len_right_at_K->{$Kx} = - $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; - } + # Find the previous token and verify that it is a comma. + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) { - # Remember the leftmost index of welds which continue to the right - if ( defined( $rK_weld_right->{$Kend} ) - && !defined( $rK_weld_left->{$Kstart} ) ) - { - push @K_multi_weld, $Kstart; - } + # it is not a comma, so give up ( it is probably a '#' ) + return; } - # Second pass to process chains of welds (these are rare). - # This has to be processed in sorted order. - if (@K_multi_weld) { - my $Kend = -1; - foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) { + # This must be the only comma in this list + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence}; + return + unless ( defined($rtype_count) + && $rtype_count->{','} + && $rtype_count->{','} == 1 ); - # Skip any interior K which was originally missing a left link - next if ( $Kstart <= $Kend ); + # Back up to the previous closing token + my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); + return unless ( defined($Kpp) ); + my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_]; + my $type_pp = $rLL_new->[$Kpp]->[_TYPE_]; - # Find the end of this chain - $Kend = $rK_weld_right->{$Kstart}; - my $Knext = $rK_weld_right->{$Kend}; - while ( defined($Knext) ) { - $Kend = $Knext; - $Knext = $rK_weld_right->{$Kend}; - } + # The containers must be nesting (i.e., sequence numbers must differ by 1 ) + if ( $seqno_pp && $is_closing_type{$type_pp} ) { + if ( $seqno_pp == $type_sequence + 1 ) { - # Set weld values this chain - foreach ( $Kstart + 1 .. $Kend ) { - $rK_weld_left->{$_} = $Kstart; - } - foreach my $Kx ( $Kstart .. $Kend - 1 ) { - $rK_weld_right->{$Kx} = $Kend; - $rweld_len_right_at_K->{$Kx} = - $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; - } + # remove the ',' from the top of the new token list + return $self->unstore_last_nonblank_token(','); } } - return; -} ## end sub weld_containers -sub cumulative_length_before_K { - my ( $self, $KK ) = @_; - my $rLL = $self->[_rLL_]; - return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; -} +} ## end sub delete_trailing_comma -sub weld_cuddled_blocks { - my ($self) = @_; +sub unstore_last_nonblank_token { - # Called once per file to handle cuddled formatting + my ( $self, $type ) = @_; - my $rK_weld_left = $self->[_rK_weld_left_]; - my $rK_weld_right = $self->[_rK_weld_right_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + # remove the most recent nonblank token from the new token list + # Input parameter: + # $type = type to be removed (for safety check) - # This routine implements the -cb flag by finding the appropriate - # closing and opening block braces and welding them together. - return unless ( %{$rcuddled_block_types} ); + # Returns true if success + # false if error - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); - my $rbreak_container = $self->[_rbreak_container_]; + # This was written and is used for removing commas, but might + # be useful for other tokens. If it is ever used for other tokens + # then the issue of what to do about the other variables, such + # as token counts and the '$last...' vars needs to be considered. - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; + # Safety check, shouldn't happen + if ( @{$rLL_new} < 3 ) { + DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n"); + return; + } - my $length_to_opening_seqno = sub { - my ($seqno) = @_; - my $KK = $K_opening_container->{$seqno}; - my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; - my $length_to_closing_seqno = sub { - my ($seqno) = @_; - my $KK = $K_closing_container->{$seqno}; - my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; + my ( $rcomma, $rblank ); - my $is_broken_block = sub { + # case 1: pop comma from top of stack + if ( $rLL_new->[-1]->[_TYPE_] eq $type ) { + $rcomma = pop @{$rLL_new}; + } - # a block is broken if the input line numbers of the braces differ - # we can only cuddle between broken blocks - my ($seqno) = @_; - my $K_opening = $K_opening_container->{$seqno}; - return unless ( defined($K_opening) ); - my $K_closing = $K_closing_container->{$seqno}; - return unless ( defined($K_closing) ); - return $rbreak_container->{$seqno} - || $rLL->[$K_closing]->[_LINE_INDEX_] != - $rLL->[$K_opening]->[_LINE_INDEX_]; - }; + # case 2: pop blank and then comma from top of stack + elsif ($rLL_new->[-1]->[_TYPE_] eq 'b' + && $rLL_new->[-2]->[_TYPE_] eq $type ) + { + $rblank = pop @{$rLL_new}; + $rcomma = pop @{$rLL_new}; + } - # A stack to remember open chains at all levels: This is a hash rather than - # an array for safety because negative levels can occur in files with - # errors. This allows us to keep processing with negative levels. - # $in_chain{$level} = [$chain_type, $type_sequence]; - my %in_chain; - my $CBO = $rOpts->{'cuddled-break-option'}; + # case 3: error, shouldn't happen unless bad call + else { + DEVEL_MODE && Fault("Could not find token type '$type' to remove\n"); + return; + } - # loop over structure items to find cuddled pairs - my $level = 0; - my $KNEXT = $self->[_K_first_seq_item_]; - while ( defined($KNEXT) ) { - my $KK = $KNEXT; - $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; - my $rtoken_vars = $rLL->[$KK]; - my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - if ( !$type_sequence ) { - next if ( $KK == 0 ); # first token in file may not be container + # A note on updating vars set by sub store_token for this comma: If we + # reduce the comma count by 1 then we also have to change the variable + # $last_nonblank_code_type to be $last_last_nonblank_code_type because + # otherwise sub store_token is going to ALSO reduce the comma count. + # Alternatively, we can leave the count alone and the + # $last_nonblank_code_type alone. Then sub store_token will produce + # the correct result. This is simpler and is done here. - # A fault here implies that an error was made in the little loop at - # the bottom of sub 'respace_tokens' which set the values of - # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the - # loop control lines above. - Fault("sequence = $type_sequence not defined at K=$KK") - if (DEVEL_MODE); - next; + # Now add a blank space after the comma if appropriate. + # Some unusual spacing controls might need another iteration to + # reach a final state. + if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) { + if ( defined($rblank) ) { + $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma + push @{$rLL_new}, $rblank; } + } + return 1; +} - # NOTE: we must use the original levels here. They can get changed - # by sub 'weld_nested_containers', so this routine must be called - # before sub 'weld_nested_containers'. - my $last_level = $level; - $level = $rtoken_vars->[_LEVEL_]; +sub match_trailing_comma_rule { + + my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_; + + # Decide if a trailing comma rule is matched. + + # Input parameter: + # $KK = index of closing token in old ($rLL) token list which follows + # the location of a possible trailing comma. See diagram below. + # $Kfirst = (old) index of first token on the current line of input tokens + # $Kp = index of previous nonblank token in new ($rLL_new) array + # $trailing_comma_rule = packed user control flags + # $if_add = true if adding comma, false if deleteing comma + + # Returns: + # false if no match + # true if match + + # For example, we might be checking for addition of a comma here: + + # bless { + # _name => $name, + # _price => $price, + # _rebate => $rebate <------ location of possible trailing comma + # }, $pkg; + # ^-------------------closing token at index $KK + + return unless ($trailing_comma_rule); + my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule}; + + # List of $trailing_comma_style values: + # undef stable: do not change + # '0' : no list should have a trailing comma + # '1' or '*' : every list should have a trailing comma + # 'm' a multi-line list should have a trailing commas + # 'b' trailing commas should be 'bare' (comma followed by newline) + # 'h' lists of key=>value pairs with a bare trailing comma + # 'i' same as s=h but also include any list with no more than about one + # comma per line + # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT]. + + # Note: an interesting generalization would be to let an upper case + # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might + # be useful for undoing operations. It would be implemented as a wrapper + # around this routine. + + #----------------------------------------- + # No style defined : do not add or delete + #----------------------------------------- + if ( !defined($trailing_comma_style) ) { return !$if_add } + + #---------------------------------------- + # Set some flags describing this location + #---------------------------------------- + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + return unless ($type_sequence); + my $closing_token = $rLL->[$KK]->[_TOKEN_]; + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence}; + return unless ( defined($rtype_count) && $rtype_count->{','} ); + my $is_permanently_broken = + $self->[_ris_permanently_broken_]->{$type_sequence}; + + # Note that _ris_broken_container_ also stores the line diff + # but it is not available at this early stage. + my $K_opening = $self->[_K_opening_container_]->{$type_sequence}; + return if ( !defined($K_opening) ); + + # multiline definition 1: opening and closing tokens on different lines + my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_]; + my $iline_c = $rLL->[$KK]->[_LINE_INDEX_]; + my $line_diff_containers = $iline_c - $iline_o; + my $has_multiline_containers = $line_diff_containers > 0; + + # multiline definition 2: first and last commas on different lines + my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence}; + my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_]; + my $has_multiline_commas; + my $line_diff_commas = 0; + if ( !defined($iline_first) ) { + + # shouldn't happen if caller checked comma count + my $type_kp = $rLL_new->[$Kp]->[_TYPE_]; + Fault( +"at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n" + ) if (DEVEL_MODE); + } + else { + $line_diff_commas = $iline_last - $iline_first; + $has_multiline_commas = $line_diff_commas > 0; + } - if ( $level < $last_level ) { $in_chain{$last_level} = undef } - elsif ( $level > $last_level ) { $in_chain{$level} = undef } + # To avoid instability in edge cases, when adding commas we uses the + # multiline_commas definition, but when deleting we use multiline + # containers. This fixes b1384, b1396, b1397, b1398, b1400. + my $is_multiline = + $if_add ? $has_multiline_commas : $has_multiline_containers; - # We are only looking at code blocks - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - next unless ( $type eq $token ); + my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst; - if ( $token eq '{' ) { + my $match; - my $block_type = $rblock_type_of_seqno->{$type_sequence}; - if ( !$block_type ) { + #---------------------------- + # 0 : does not match any list + #---------------------------- + if ( $trailing_comma_style eq '0' ) { + $match = 0; + } - # patch for unrecognized block types which may not be labeled - my $Kp = $self->K_previous_nonblank($KK); - while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { - $Kp = $self->K_previous_nonblank($Kp); - } - next unless $Kp; - $block_type = $rLL->[$Kp]->[_TOKEN_]; - } - if ( $in_chain{$level} ) { + #------------------------------ + # '*' or '1' : matches any list + #------------------------------ + elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) { + $match = 1; + } - # we are in a chain and are at an opening block brace. - # See if we are welding this opening brace with the previous - # block brace. Get their identification numbers: - my $closing_seqno = $in_chain{$level}->[1]; - my $opening_seqno = $type_sequence; + #----------------------------- + # 'm' matches a Multiline list + #----------------------------- + elsif ( $trailing_comma_style eq 'm' ) { + $match = $is_multiline; + } - # The preceding block must be on multiple lines so that its - # closing brace will start a new line. - if ( !$is_broken_block->($closing_seqno) ) { - next unless ( $CBO == 2 ); - $rbreak_container->{$closing_seqno} = 1; - } + #---------------------------------- + # 'b' matches a Bare trailing comma + #---------------------------------- + elsif ( $trailing_comma_style eq 'b' ) { + $match = $is_bare_multiline_comma; + } - # we will let the trailing block be either broken or intact - ## && $is_broken_block->($opening_seqno); + #-------------------------------------------------------------------------- + # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line. + # 'i' matches a bare stable list with about 1 comma per line. + #-------------------------------------------------------------------------- + elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) { - # We can weld the closing brace to its following word .. - my $Ko = $K_closing_container->{$closing_seqno}; - my $Kon; - if ( defined($Ko) ) { - $Kon = $self->K_next_nonblank($Ko); - } + # We can treat these together because they are similar. + # The set of 'i' matches includes the set of 'h' matches. - # ..unless it is a comment - if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) { + # the trailing comma must be bare for both 'h' and 'i' + return if ( !$is_bare_multiline_comma ); - # OK to weld these two tokens... - $rK_weld_right->{$Ko} = $Kon; - $rK_weld_left->{$Kon} = $Ko; + # there must be no more than one comma per line for both 'h' and 'i' + my $new_comma_count = $rtype_count->{','}; + $new_comma_count += 1 if ($if_add); + return if ( $new_comma_count > $line_diff_commas + 1 ); - # Set flag that we want to break the next container - # so that the cuddled line is balanced. - $rbreak_container->{$opening_seqno} = 1 - if ($CBO); - } - - } - else { + # a list of key=>value pairs with at least 2 fat commas is a match + # for both 'h' and 'i' + my $fat_comma_count = $rtype_count->{'=>'}; + if ( $fat_comma_count && $fat_comma_count >= 2 ) { - # We are not in a chain. Start a new chain if we see the - # starting block type. - if ( $rcuddled_block_types->{$block_type} ) { - $in_chain{$level} = [ $block_type, $type_sequence ]; - } - else { - $block_type = '*'; - $in_chain{$level} = [ $block_type, $type_sequence ]; - } - } + # comma count (including trailer) and fat comma count must differ by + # by no more than 1. This allows for some small variations. + my $comma_diff = $new_comma_count - $fat_comma_count; + $match = ( $comma_diff >= -1 && $comma_diff <= 1 ); } - elsif ( $token eq '}' ) { - if ( $in_chain{$level} ) { - - # We are in a chain at a closing brace. See if this chain - # continues.. - my $Knn = $self->K_next_code($KK); - next unless $Knn; - - my $chain_type = $in_chain{$level}->[0]; - my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; - if ( - $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} - ) - { - # Note that we do not weld yet because we must wait until - # we we are sure that an opening brace for this follows. - $in_chain{$level}->[1] = $type_sequence; - } - else { $in_chain{$level} = undef } - } + # For 'i' only, a list that can be shown to be stable is a match + if ( $trailing_comma_style eq 'i' ) { + $match ||= ( + $is_permanently_broken + || ( $rOpts_break_at_old_comma_breakpoints + && !$rOpts_ignore_old_breakpoints ) + ); } } - return; -} ## end sub weld_cuddled_blocks -sub find_nested_pairs { - my $self = shift; + #------------------------------------------------------------------------- + # Unrecognized parameter. This should have been caught in the input check. + #------------------------------------------------------------------------- + else { - # This routine is called once per file to do preliminary work needed for - # the --weld-nested option. This information is also needed for adding - # semicolons. + DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n"); - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); - my $Num = @{$rLL}; + # do not add or delete + return !$if_add; + } - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + # Now do any special paren check + if ( $match + && $paren_flag + && $paren_flag ne '1' + && $paren_flag ne '*' + && $closing_token eq ')' ) + { + $match &&= + $self->match_paren_control_flag( $type_sequence, $paren_flag, + $rLL_new ); + } - # We define an array of pairs of nested containers - my @nested_pairs; + # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas + # for use by -vtc logic to avoid instability when -dtc and -atc are both + # active. + if ($match) { + if ( $if_add && $rOpts_delete_trailing_commas + || !$if_add && $rOpts_add_trailing_commas ) + { + $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1; - # Names of calling routines can either be marked as 'i' or 'w', - # and they may invoke a sub call with an '->'. We will consider - # any consecutive string of such types as a single unit when making - # weld decisions. We also allow a leading ! - my $is_name_type = { - 'i' => 1, - 'w' => 1, - 'U' => 1, - '->' => 1, - '!' => 1, - }; + # The combination of -atc and -dtc and -cab=3 can be unstable + # (b1394). So we deactivate -cab=3 in this case. + if ( $rOpts_comma_arrow_breakpoints == 3 ) { + $self->[_roverride_cab3_]->{$type_sequence} = 1; + } + } + } + return $match; +} - # Loop over all closing container tokens - foreach my $inner_seqno ( keys %{$K_closing_container} ) { - my $K_inner_closing = $K_closing_container->{$inner_seqno}; +sub store_new_token { - # See if it is immediately followed by another, outer closing token - my $K_outer_closing = $K_inner_closing + 1; - $K_outer_closing += 1 - if ( $K_outer_closing < $Num - && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' ); + my ( $self, $type, $token, $Kp ) = @_; - next unless ( $K_outer_closing < $Num ); - my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_]; - next unless ($outer_seqno); - my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_]; - next unless ( $is_closing_token{$token_outer_closing} ); + # Create and insert a completely new token into the output stream - # Now we have to check the opening tokens. - my $K_outer_opening = $K_opening_container->{$outer_seqno}; - my $K_inner_opening = $K_opening_container->{$inner_seqno}; - next unless defined($K_outer_opening) && defined($K_inner_opening); + # Input parameters: + # $type = the token type + # $token = the token text + # $Kp = index of the previous token in the new list, $rLL_new - my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno}; - my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno}; + # Returns: + # $Knew = index in $rLL_new of the new token - # Verify that the inner opening token is the next container after the - # outer opening token. - my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_]; - next unless defined($K_io_check); - if ( $K_io_check != $K_inner_opening ) { + # This operation is a little tricky because we are creating a new token and + # we have to take care to follow the requested whitespace rules. - # The inner opening container does not immediately follow the outer - # opening container, but we may still allow a weld if they are - # separated by a sub signature. For example, we may have something - # like this, where $K_io_check may be at the first 'x' instead of - # 'io'. So we need to hop over the signature and see if we arrive - # at 'io'. + my $Ktop = @{$rLL_new} - 1; + my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b'; + my $Knew; + if ( $top_is_space && $want_left_space{$type} == WS_NO ) { - # oo io - # | x x | - # $obj->then( sub ( $code ) { - # ... - # return $c->render(text => '', status => $code); - # } ); - # | | - # ic oc + #---------------------------------------------------- + # Method 1: Convert the top blank into the new token. + #---------------------------------------------------- - next if ( !$inner_blocktype || $inner_blocktype ne 'sub' ); - next if $rLL->[$K_io_check]->[_TOKEN_] ne '('; - my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_]; - next unless defined($seqno_signature); - my $K_signature_closing = $K_closing_container->{$seqno_signature}; - next unless defined($K_signature_closing); - my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_]; - next - unless ( defined($K_test) && $K_test == $K_inner_opening ); + # Be Careful: we are working on the top of the new stack, on a token + # which has been stored. - # OK, we have arrived at 'io' in the above diagram. We should put - # a limit on the length or complexity of the signature here. There - # is no perfect way to do this, one way is to put a limit on token - # count. For consistency with older versions, we should allow a - # signature with a single variable to weld, but not with - # multiple variables. A single variable as in 'sub ($code) {' can - # have a $Kdiff of 2 to 4, depending on spacing. + my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); - # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to - # 7, depending on spacing. So to keep formatting consistent with - # previous versions, we will also avoid welding if there is a comma - # in the signature. + $Knew = $Ktop; + $rLL_new->[$Knew]->[_TOKEN_] = $token; + $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token); + $rLL_new->[$Knew]->[_TYPE_] = $type; - my $Kdiff = $K_signature_closing - $K_io_check; - next if ( $Kdiff > 4 ); + # NOTE: we are changing the output stack without updating variables + # $last_nonblank_code_type, etc. Future needs might require that + # those variables be updated here. For now, we just update the + # type counts as necessary. - my $saw_comma; - foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) { - if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last } + if ( $is_counted_type{$type} ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ($seqno) { + $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++; } - next if ($saw_comma); } - # Yes .. this is a possible nesting pair. - # They can be separated by a small amount. - my $K_diff = $K_inner_opening - $K_outer_opening; - - # Count nonblank characters separating them. - if ( $K_diff < 0 ) { next } # Shouldn't happen - my $nonblank_count = 0; - my $type; - my $is_name; - - # Here is an example of a long identifier chain which counts as a - # single nonblank here (this spans about 10 K indexes): - # if ( !Boucherot::SetOfConnections->new->handler->execute( - # ^--K_o_o ^--K_i_o - # @array) ) - my $Kn_first = $K_outer_opening; - my $Kn_last_nonblank; - my $saw_comment; - foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) { - next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' ); - if ( !$nonblank_count ) { $Kn_first = $Kn } - if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; } - $Kn_last_nonblank = $Kn; + # Then store a new blank + $self->store_token($rcopy); + } + else { - # skip chain of identifier tokens - my $last_type = $type; - my $last_is_name = $is_name; - $type = $rLL->[$Kn]->[_TYPE_]; - if ( $type eq '#' ) { $saw_comment = 1; last } - $is_name = $is_name_type->{$type}; - next if ( $is_name && $last_is_name ); + #---------------------------------------- + # Method 2: Use the normal storage method + #---------------------------------------- - $nonblank_count++; - last if ( $nonblank_count > 2 ); + # Patch for issue c078: keep line indexes in order. If the top + # token is a space that we are keeping (due to '-wls=...) then + # we have to check that old line indexes stay in order. + # In very rare + # instances in which side comments have been deleted and converted + # into blanks, we may have filtered down multiple blanks into just + # one. In that case the top blank may have a higher line number + # than the previous nonblank token. Although the line indexes of + # blanks are not really significant, we need to keep them in order + # in order to pass error checks. + if ($top_is_space) { + my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; + my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; + if ( $new_top_ix < $old_top_ix ) { + $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; + } } - # Do not weld across a comment .. fix for c058. - next if ($saw_comment); - - # Patch for b1104: do not weld to a paren preceded by sort/map/grep - # because the special line break rules may cause a blinking state - if ( defined($Kn_last_nonblank) - && $rLL->[$K_inner_opening]->[_TOKEN_] eq '(' - && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' ) - { - my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_]; + my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token ); + $self->store_token($rcopy); + $Knew = @{$rLL_new} - 1; + } + return $Knew; +} ## end sub store_new_token - # Turn off welding at sort/map/grep ( - if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 } - } +sub check_Q { - if ( + # Check that a quote looks okay, and report possible problems + # to the logfile. - # adjacent opening containers, like: do {{ - $nonblank_count == 1 + my ( $self, $KK, $Kfirst, $line_number ) = @_; + my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $token =~ /\t/ ) { + $self->note_embedded_tab($line_number); + } - # short item following opening paren, like: fun( yyy ( - || ( $nonblank_count == 2 - && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' ) + # The remainder of this routine looks for something like + # '$var = s/xxx/yyy/;' + # in case it should have been '$var =~ s/xxx/yyy/;' - # anonymous sub + prototype or sig: )->then( sub ($code) { - # ... but it seems best not to stack two structural blocks, like - # this - # sub make_anon_with_my_sub { sub { - # because it probably hides the structure a little too much. - || ( $inner_blocktype - && $inner_blocktype eq 'sub' - && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub' - && !$outer_blocktype ) - ) - { - push @nested_pairs, - [ $inner_seqno, $outer_seqno, $K_inner_closing ]; - } - next; - } + # Start by looking for a token beginning with one of: s y m / tr + return + unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) } + || substr( $token, 0, 2 ) eq 'tr' ); - # The weld routine expects the pairs in order in the form - # [$seqno_inner, $seqno_outer] - # And they must be in the same order as the inner closing tokens - # (otherwise, welds of three or more adjacent tokens will not work). The K - # value of this inner closing token has temporarily been stored for - # sorting. - @nested_pairs = + # ... and preceded by one of: = == != + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; + return unless ( $is_unexpected_equals{$previous_nonblank_type} ); + my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; - # Drop the K index after sorting (it would cause trouble downstream) - map { [ $_->[0], $_->[1] ] } + my $previous_nonblank_type_2 = 'b'; + my $previous_nonblank_token_2 = EMPTY_STRING; + my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); + if ( defined($Kpp) ) { + $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; + $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; + } - # Sort on the K values - sort { $a->[2] <=> $b->[2] } @nested_pairs; + my $next_nonblank_token = EMPTY_STRING; + my $Kn = $KK + 1; + my $Kmax = @{$rLL} - 1; + if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 } + if ( $Kn <= $Kmax ) { + $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; + } - return \@nested_pairs; -} ## end sub find_nested_pairs + my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; + my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; -sub match_paren_flag { + if ( - # Decide if this paren is excluded by user request: - # undef matches no parens - # '*' matches all parens - # 'k' matches only if the previous nonblank token is a perl builtin - # keyword (such as 'if', 'while'), - # 'K' matches if 'k' does not, meaning if the previous token is not a - # keyword. - # 'f' matches if the previous token is a function other than a keyword. - # 'F' matches if 'f' does not. - # 'w' matches if either 'k' or 'f' match. - # 'W' matches if 'w' does not. - my ( $self, $KK, $flag ) = @_; + # preceded by simple scalar + $previous_nonblank_type_2 eq 'i' + && $previous_nonblank_token_2 =~ /^\$/ - return 0 unless ( defined($flag) ); - return 0 if $flag eq '0'; - return 1 if $flag eq '1'; - return 1 if $flag eq '*'; - return 0 unless ( defined($KK) ); + # followed by some kind of termination + # (but give complaint if we can not see far enough ahead) + && $next_nonblank_token =~ /^[; \)\}]$/ - my $rLL = $self->[_rLL_]; - my $rtoken_vars = $rLL->[$KK]; - my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; - return 0 unless ($seqno); - my $token = $rtoken_vars->[_TOKEN_]; - my $K_opening = $KK; - if ( !$is_opening_token{$token} ) { - $K_opening = $self->[_K_opening_container_]->{$seqno}; + # scalar is not declared + ## =~ /^(my|our|local)$/ + && !( $type_0 eq 'k' && $is_my_our_local{$token_0} ) + ) + { + my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_]; + my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~'; + complain( +"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" + ); } - return unless ( defined($K_opening) ); + return; +} ## end sub check_Q - my ( $is_f, $is_k, $is_w ); - my $Kp = $self->K_previous_nonblank($K_opening); - if ( defined($Kp) ) { - my $type_p = $rLL->[$Kp]->[_TYPE_]; +} ## end closure respace_tokens - # keyword? - $is_k = $type_p eq 'k'; +sub copy_token_as_type { - # function call? + # This provides a quick way to create a new token by + # slightly modifying an existing token. + my ( $rold_token, $type, $token ) = @_; + if ( !defined($token) ) { + if ( $type eq 'b' ) { + $token = SPACE; + } + elsif ( $type eq 'q' ) { + $token = EMPTY_STRING; + } + elsif ( $type eq '->' ) { + $token = '->'; + } + elsif ( $type eq ';' ) { + $token = ';'; + } + elsif ( $type eq ',' ) { + $token = ','; + } + else { + + # Unexpected type ... this sub will work as long as both $token and + # $type are defined, but we should catch any unexpected types during + # development. + if (DEVEL_MODE) { + Fault(<' or ';' +EOM + } + + # Shouldn't get here + $token = $type; + } + } + + my @rnew_token = @{$rold_token}; + $rnew_token[_TYPE_] = $type; + $rnew_token[_TOKEN_] = $token; + $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING; + return \@rnew_token; +} ## end sub copy_token_as_type + +sub K_next_code { + my ( $self, $KK, $rLL ) = @_; + + # return the index K of the next nonblank, non-comment token + return unless ( defined($KK) && $KK >= 0 ); + + # use the standard array unless given otherwise + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + while ( $Knnb < $Num ) { + if ( !defined( $rLL->[$Knnb] ) ) { + + # We seem to have encountered a gap in our array. + # This shouldn't happen because sub write_line() pushed + # items into the $rLL array. + Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); + return; + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' + && $rLL->[$Knnb]->[_TYPE_] ne '#' ) + { + return $Knnb; + } + $Knnb++; + } + return; +} ## end sub K_next_code + +sub K_next_nonblank { + my ( $self, $KK, $rLL ) = @_; + + # return the index K of the next nonblank token, or + # return undef if none + return unless ( defined($KK) && $KK >= 0 ); + + # The third arg allows this routine to be used on any array. This is + # useful in sub respace_tokens when we are copying tokens from an old $rLL + # to a new $rLL array. But usually the third arg will not be given and we + # will just use the $rLL array in $self. + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + return unless ( $Knnb < $Num ); + return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); + return unless ( ++$Knnb < $Num ); + return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); + + # Backup loop. Very unlikely to get here; it means we have neighboring + # blanks in the token stream. + $Knnb++; + while ( $Knnb < $Num ) { + + # Safety check, this fault shouldn't happen: The $rLL array is the + # main array of tokens, so all entries should be used. It is + # initialized in sub write_line, and then re-initialized by sub + # store_token() within sub respace_tokens. Tokens are pushed on + # so there shouldn't be any gaps. + if ( !defined( $rLL->[$Knnb] ) ) { + Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); + return; + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } + $Knnb++; + } + return; +} ## end sub K_next_nonblank + +sub K_previous_code { + + # return the index K of the previous nonblank, non-comment token + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; + + # use the standard array unless given otherwise + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { + + # This fault can be caused by a programming error in which a bad $KK is + # given. The caller should make the first call with KK_new=undef to + # avoid this error. + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ) if (DEVEL_MODE); + return; + } + my $Kpnb = $KK - 1; + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' + && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) + { + return $Kpnb; + } + $Kpnb--; + } + return; +} ## end sub K_previous_code + +sub K_previous_nonblank { + + # return index of previous nonblank token before item K; + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; + + # use the standard array unless given otherwise + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { + + # This fault can be caused by a programming error in which a bad $KK is + # given. The caller should make the first call with KK_new=undef to + # avoid this error. + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ) if (DEVEL_MODE); + return; + } + my $Kpnb = $KK - 1; + return unless ( $Kpnb >= 0 ); + return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); + return unless ( --$Kpnb >= 0 ); + return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); + + # Backup loop. We should not get here unless some routine + # slipped repeated blanks into the token stream. + return unless ( --$Kpnb >= 0 ); + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } + $Kpnb--; + } + return; +} ## end sub K_previous_nonblank + +sub parent_seqno_by_K { + + # Return the sequence number of the parent container of token K, if any. + + my ( $self, $KK ) = @_; + my $rLL = $self->[_rLL_]; + + # The task is to jump forward to the next container token + # and use the sequence number of either it or its parent. + + # For example, consider the following with seqno=5 of the '[' and ']' + # being called with index K of the first token of each line: + + # # result + # push @tests, # - + # [ # - + # sub { 99 }, 'do {&{%s} for 1,2}', # 5 + # '(&{})(&{})', undef, # 5 + # [ 2, 2, 0 ], 0 # 5 + # ]; # - + + # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For + # unbalanced files, last sequence number will either be undefined or it may + # be at a deeper level. In either case we will just return SEQ_ROOT to + # have a defined value and allow formatting to proceed. + my $parent_seqno = SEQ_ROOT; + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; + } + else { + my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; + if ( defined($Kt) ) { + $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; + my $type = $rLL->[$Kt]->[_TYPE_]; + + # if next container token is closing, it is the parent seqno + if ( $is_closing_type{$type} ) { + $parent_seqno = $type_sequence; + } + + # otherwise we want its parent container + else { + $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; + } + } + } + $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) ); + return $parent_seqno; +} ## end sub parent_seqno_by_K + +sub is_in_block_by_i { + my ( $self, $i ) = @_; + + # returns true if + # token at i is contained in a BLOCK + # or is at root level + # or there is some kind of error (i.e. unbalanced file) + # returns false otherwise + + if ( $i < 0 ) { + DEVEL_MODE && Fault("Bad call, i='$i'\n"); + return 1; + } + + my $seqno = $parent_seqno_to_go[$i]; + return 1 if ( !$seqno || $seqno eq SEQ_ROOT ); + return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} ); + return; +} ## end sub is_in_block_by_i + +sub is_in_list_by_i { + my ( $self, $i ) = @_; + + # returns true if token at i is contained in a LIST + # returns false otherwise + my $seqno = $parent_seqno_to_go[$i]; + return unless ( $seqno && $seqno ne SEQ_ROOT ); + if ( $self->[_ris_list_by_seqno_]->{$seqno} ) { + return 1; + } + return; +} ## end sub is_in_list_by_i + +sub is_list_by_K { + + # Return true if token K is in a list + my ( $self, $KK ) = @_; + + my $parent_seqno = $self->parent_seqno_by_K($KK); + return unless defined($parent_seqno); + return $self->[_ris_list_by_seqno_]->{$parent_seqno}; +} + +sub is_list_by_seqno { + + # Return true if the immediate contents of a container appears to be a + # list. + my ( $self, $seqno ) = @_; + return unless defined($seqno); + return $self->[_ris_list_by_seqno_]->{$seqno}; +} + +sub resync_lines_and_tokens { + + my $self = shift; + + # Re-construct the arrays of tokens associated with the original input + # lines since they have probably changed due to inserting and deleting + # blanks and a few other tokens. + + # Return paremeters: + # set severe_error = true if processing needs to terminate + my $severe_error; + my $rqw_lines = []; + + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $rlines = $self->[_rlines_]; + my @Krange_code_without_comments; + my @Klast_valign_code; + + # This is the next token and its line index: + my $Knext = 0; + my $Kmax = defined($Klimit) ? $Klimit : -1; + + # Verify that old line indexes are in still order. If this error occurs, + # check locations where sub 'respace_tokens' creates new tokens (like + # blank spaces). It must have set a bad old line index. + if ( DEVEL_MODE && defined($Klimit) ) { + my $iline = $rLL->[0]->[_LINE_INDEX_]; + foreach my $KK ( 1 .. $Klimit ) { + my $iline_last = $iline; + $iline = $rLL->[$KK]->[_LINE_INDEX_]; + if ( $iline < $iline_last ) { + my $KK_m = $KK - 1; + my $token_m = $rLL->[$KK_m]->[_TOKEN_]; + my $token = $rLL->[$KK]->[_TOKEN_]; + my $type_m = $rLL->[$KK_m]->[_TYPE_]; + my $type = $rLL->[$KK]->[_TYPE_]; + Fault(<{_line_type}; + if ( $line_type eq 'CODE' ) { + + # Get the old number of tokens on this line + my $rK_range_old = $line_of_tokens->{_rK_range}; + my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old}; + my $Kdiff_old = 0; + if ( defined($Kfirst_old) ) { + $Kdiff_old = $Klast_old - $Kfirst_old; + } + + # Find the range of NEW K indexes for the line: + # $Kfirst = index of first token on line + # $Klast = index of last token on line + my ( $Kfirst, $Klast ); + + my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens + + # Optimization: Although the actual K indexes may be completely + # changed after respacing, the number of tokens on any given line + # will often be nearly unchanged. So we will see if we can start + # our search by guessing that the new line has the same number + # of tokens as the old line. + my $Knext_guess = $Knext + $Kdiff_old; + if ( $Knext_guess > $Knext + && $Knext_guess < $Kmax + && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline ) + { + + # the guess is good, so we can start our search here + $Knext = $Knext_guess + 1; + } + + while ($Knext <= $Kmax + && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline ) + { + $Knext++; + } + + if ( $Knext > $Knext_beg ) { + + $Klast = $Knext - 1; + + # Delete any terminal blank token + if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 } + + if ( $Klast < $Knext_beg ) { + $Klast = undef; + } + else { + + $Kfirst = $Knext_beg; + + # Save ranges of non-comment code. This will be used by + # sub keep_old_line_breaks. + if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) { + push @Krange_code_without_comments, [ $Kfirst, $Klast ]; + } + + # Only save ending K indexes of code types which are blank + # or 'VER'. These will be used for a convergence check. + # See related code in sub 'convey_batch_to_vertical_aligner' + my $CODE_type = $line_of_tokens->{_code_type}; + if ( !$CODE_type + || $CODE_type eq 'VER' ) + { + push @Klast_valign_code, $Klast; + } + } + } + + # It is only safe to trim the actual line text if the input + # line had a terminal blank token. Otherwise, we may be + # in a quote. + if ( $line_of_tokens->{_ended_in_blank_token} ) { + $line_of_tokens->{_line_text} =~ s/\s+$//; + } + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; + + # Deleting semicolons can create new empty code lines + # which should be marked as blank + if ( !defined($Kfirst) ) { + my $CODE_type = $line_of_tokens->{_code_type}; + if ( !$CODE_type ) { + $line_of_tokens->{_code_type} = 'BL'; + } + } + else { + + #--------------------------------------------------- + # save indexes of all lines with a 'q' at either end + # for later use by sub find_multiline_qw + #--------------------------------------------------- + if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q' + || $rLL->[$Klast]->[_TYPE_] eq 'q' ) + { + push @{$rqw_lines}, $iline; + } + } + } + } + + # There shouldn't be any nodes beyond the last one. This routine is + # relinking lines and tokens after the tokens have been respaced. A fault + # here indicates some kind of bug has been introduced into the above loops. + # There is not good way to keep going; we better stop here. + if ( $Knext <= $Kmax ) { + Fault_Warn( + "unexpected tokens at end of file when reconstructing lines"); + $severe_error = 1; + return ( $severe_error, $rqw_lines ); + } + $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments; + + # Setup the convergence test in the FileWriter based on line-ending indexes + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->setup_convergence_test( \@Klast_valign_code ); + + # Mark essential old breakpoints if combination -iob -lp is used. These + # two options do not work well together, but we can avoid turning -iob off + # by ignoring -iob at certain essential line breaks. + # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 + if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) { + my %is_assignment_or_fat_comma = %is_assignment; + $is_assignment_or_fat_comma{'=>'} = 1; + my $ris_essential_old_breakpoint = + $self->[_ris_essential_old_breakpoint_]; + my ( $Kfirst, $Klast ); + foreach my $line_of_tokens ( @{$rlines} ) { + my $line_type = $line_of_tokens->{_line_type}; + if ( $line_type ne 'CODE' ) { + ( $Kfirst, $Klast ) = ( undef, undef ); + next; + } + my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast ); + ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} }; + + next unless defined($Klast_prev); + next unless defined($Kfirst); + my $type_last = $rLL->[$Klast_prev]->[_TOKEN_]; + my $type_first = $rLL->[$Kfirst]->[_TOKEN_]; + next + unless ( $is_assignment_or_fat_comma{$type_last} + || $is_assignment_or_fat_comma{$type_first} ); + $ris_essential_old_breakpoint->{$Klast_prev} = 1; + } + } + return ( $severe_error, $rqw_lines ); +} ## end sub resync_lines_and_tokens + +sub keep_old_line_breaks { + + # Called once per file to find and mark any old line breaks which + # should be kept. We will be translating the input hashes into + # token indexes. + + # A flag is set as follows: + # = 1 make a hard break (flush the current batch) + # best for something like leading commas (-kbb=',') + # = 2 make a soft break (keep building current batch) + # best for something like leading -> + + my ($self) = @_; + + my $rLL = $self->[_rLL_]; + my $rKrange_code_without_comments = + $self->[_rKrange_code_without_comments_]; + my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_]; + my $rbreak_after_Klast = $self->[_rbreak_after_Klast_]; + my $rwant_container_open = $self->[_rwant_container_open_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $ris_broken_container = $self->[_ris_broken_container_]; + my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + + # This code moved here from sub break_lists to fix b1120 + if ( $rOpts->{'break-at-old-method-breakpoints'} ) { + foreach my $item ( @{$rKrange_code_without_comments} ) { + my ( $Kfirst, $Klast ) = @{$item}; + my $type = $rLL->[$Kfirst]->[_TYPE_]; + my $token = $rLL->[$Kfirst]->[_TOKEN_]; + + # leading '->' use a value of 2 which causes a soft + # break rather than a hard break + if ( $type eq '->' ) { + $rbreak_before_Kfirst->{$Kfirst} = 2; + } + + # leading ')->' use a special flag to insure that both + # opening and closing parens get opened + # Fix for b1120: only for parens, not braces + elsif ( $token eq ')' ) { + my $Kn = $self->K_next_nonblank($Kfirst); + next + unless ( defined($Kn) + && $Kn <= $Klast + && $rLL->[$Kn]->[_TYPE_] eq '->' ); + my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_]; + next unless ($seqno); + + # Note: in previous versions there was a fix here to avoid + # instability between conflicting -bom and -pvt or -pvtc flags. + # The fix skipped -bom for a small line difference. But this + # was troublesome, and instead the fix has been moved to + # sub set_vertical_tightness_flags where priority is given to + # the -bom flag over -pvt and -pvtc flags. Both opening and + # closing paren flags are involved because even though -bom only + # requests breaking before the closing paren, automated logic + # opens the opening paren when the closing paren opens. + # Relevant cases are b977, b1215, b1270, b1303 + + $rwant_container_open->{$seqno} = 1; + } + } + } + + return unless ( %keep_break_before_type || %keep_break_after_type ); + + my $check_for_break = sub { + my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_; + my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + + # non-container tokens use the type as the key + if ( !$seqno ) { + my $type = $rLL->[$KK]->[_TYPE_]; + if ( $rkeep_break_hash->{$type} ) { + $rbreak_hash->{$KK} = 1; + } + } + + # container tokens use the token as the key + else { + my $token = $rLL->[$KK]->[_TOKEN_]; + my $flag = $rkeep_break_hash->{$token}; + if ($flag) { + + my $match = $flag eq '1' || $flag eq '*'; + + # check for special matching codes + if ( !$match ) { + if ( $token eq '(' || $token eq ')' ) { + $match = + $self->match_paren_control_flag( $seqno, $flag ); + } + elsif ( $token eq '{' || $token eq '}' ) { + + # These tentative codes 'b' and 'B' for brace types are + # placeholders for possible future brace types. They + # are not documented and may be changed. + my $block_type = + $self->[_rblock_type_of_seqno_]->{$seqno}; + if ( $flag eq 'b' ) { $match = $block_type } + elsif ( $flag eq 'B' ) { $match = !$block_type } + else { + # unknown code - no match + } + } + } + $rbreak_hash->{$KK} = 1 if ($match); + } + } + }; + + foreach my $item ( @{$rKrange_code_without_comments} ) { + my ( $Kfirst, $Klast ) = @{$item}; + $check_for_break->( + $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst + ); + $check_for_break->( + $Klast, \%keep_break_after_type, $rbreak_after_Klast + ); + } + return; +} ## end sub keep_old_line_breaks + +sub weld_containers { + + # Called once per file to do any welding operations requested by --weld* + # flags. + my ($self) = @_; + + # This count is used to eliminate needless calls for weld checks elsewhere + $total_weld_count = 0; + + return if ( $rOpts->{'indent-only'} ); + return unless ($rOpts_add_newlines); + + # Important: sub 'weld_cuddled_blocks' must be called before + # sub 'weld_nested_containers'. This is because the cuddled option needs to + # use the original _LEVEL_ values of containers, but the weld nested + # containers changes _LEVEL_ of welded containers. + + # Here is a good test case to be sure that both cuddling and welding + # are working and not interfering with each other: <> + + # perltidy -wn -ce + + # if ($BOLD_MATH) { ( + # $labels, $comment, + # join( '', '', &make_math( $mode, '', '', $_ ), '' ) + # ) } else { ( + # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), + # $after + # ) } + + $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} ); + + if ( $rOpts->{'weld-nested-containers'} ) { + + $self->weld_nested_containers(); + + $self->weld_nested_quotes(); + } + + #------------------------------------------------------------- + # All welding is done. Finish setting up weld data structures. + #------------------------------------------------------------- + + my $rLL = $self->[_rLL_]; + my $rK_weld_left = $self->[_rK_weld_left_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_]; + + my @K_multi_weld; + my @keys = keys %{$rK_weld_right}; + $total_weld_count = @keys; + + # First pass to process binary welds. + # This loop is processed in unsorted order for efficiency. + foreach my $Kstart (@keys) { + my $Kend = $rK_weld_right->{$Kstart}; + + # An error here would be due to an incorrect initialization introduced + # in one of the above weld routines, like sub weld_nested. + if ( $Kend <= $Kstart ) { + Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n") + if (DEVEL_MODE); + next; + } + + # Set weld values for all tokens this welded pair + foreach ( $Kstart + 1 .. $Kend ) { + $rK_weld_left->{$_} = $Kstart; + } + foreach my $Kx ( $Kstart .. $Kend - 1 ) { + $rK_weld_right->{$Kx} = $Kend; + $rweld_len_right_at_K->{$Kx} = + $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - + $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; + } + + # Remember the leftmost index of welds which continue to the right + if ( defined( $rK_weld_right->{$Kend} ) + && !defined( $rK_weld_left->{$Kstart} ) ) + { + push @K_multi_weld, $Kstart; + } + } + + # Second pass to process chains of welds (these are rare). + # This has to be processed in sorted order. + if (@K_multi_weld) { + my $Kend = -1; + foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) { + + # Skip any interior K which was originally missing a left link + next if ( $Kstart <= $Kend ); + + # Find the end of this chain + $Kend = $rK_weld_right->{$Kstart}; + my $Knext = $rK_weld_right->{$Kend}; + while ( defined($Knext) ) { + $Kend = $Knext; + $Knext = $rK_weld_right->{$Kend}; + } + + # Set weld values this chain + foreach ( $Kstart + 1 .. $Kend ) { + $rK_weld_left->{$_} = $Kstart; + } + foreach my $Kx ( $Kstart .. $Kend - 1 ) { + $rK_weld_right->{$Kx} = $Kend; + $rweld_len_right_at_K->{$Kx} = + $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - + $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; + } + } + } + + return; +} ## end sub weld_containers + +sub cumulative_length_before_K { + my ( $self, $KK ) = @_; + my $rLL = $self->[_rLL_]; + return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; +} + +sub weld_cuddled_blocks { + my ($self) = @_; + + # Called once per file to handle cuddled formatting + + my $rK_weld_left = $self->[_rK_weld_left_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + # This routine implements the -cb flag by finding the appropriate + # closing and opening block braces and welding them together. + return unless ( %{$rcuddled_block_types} ); + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + + my $rbreak_container = $self->[_rbreak_container_]; + my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + + my $is_broken_block = sub { + + # a block is broken if the input line numbers of the braces differ + # we can only cuddle between broken blocks + my ($seqno) = @_; + my $K_opening = $K_opening_container->{$seqno}; + return unless ( defined($K_opening) ); + my $K_closing = $K_closing_container->{$seqno}; + return unless ( defined($K_closing) ); + return $rbreak_container->{$seqno} + || $rLL->[$K_closing]->[_LINE_INDEX_] != + $rLL->[$K_opening]->[_LINE_INDEX_]; + }; + + # A stack to remember open chains at all levels: This is a hash rather than + # an array for safety because negative levels can occur in files with + # errors. This allows us to keep processing with negative levels. + # $in_chain{$level} = [$chain_type, $type_sequence]; + my %in_chain; + my $CBO = $rOpts->{'cuddled-break-option'}; + + # loop over structure items to find cuddled pairs + my $level = 0; + my $KNEXT = $self->[_K_first_seq_item_]; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + next if ( $KK == 0 ); # first token in file may not be container + + # A fault here implies that an error was made in the little loop at + # the bottom of sub 'respace_tokens' which set the values of + # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the + # loop control lines above. + Fault("sequence = $type_sequence not defined at K=$KK") + if (DEVEL_MODE); + next; + } + + # NOTE: we must use the original levels here. They can get changed + # by sub 'weld_nested_containers', so this routine must be called + # before sub 'weld_nested_containers'. + my $last_level = $level; + $level = $rtoken_vars->[_LEVEL_]; + + if ( $level < $last_level ) { $in_chain{$last_level} = undef } + elsif ( $level > $last_level ) { $in_chain{$level} = undef } + + # We are only looking at code blocks + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + next unless ( $type eq $token ); + + if ( $token eq '{' ) { + + my $block_type = $rblock_type_of_seqno->{$type_sequence}; + if ( !$block_type ) { + + # patch for unrecognized block types which may not be labeled + my $Kp = $self->K_previous_nonblank($KK); + while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { + $Kp = $self->K_previous_nonblank($Kp); + } + next unless $Kp; + $block_type = $rLL->[$Kp]->[_TOKEN_]; + } + if ( $in_chain{$level} ) { + + # we are in a chain and are at an opening block brace. + # See if we are welding this opening brace with the previous + # block brace. Get their identification numbers: + my $closing_seqno = $in_chain{$level}->[1]; + my $opening_seqno = $type_sequence; + + # The preceding block must be on multiple lines so that its + # closing brace will start a new line. + if ( !$is_broken_block->($closing_seqno) ) { + next unless ( $CBO == 2 ); + $rbreak_container->{$closing_seqno} = 1; + } + + # We can weld the closing brace to its following word .. + my $Ko = $K_closing_container->{$closing_seqno}; + my $Kon; + if ( defined($Ko) ) { + $Kon = $self->K_next_nonblank($Ko); + } + + # ..unless it is a comment + if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) { + + # OK to weld these two tokens... + $rK_weld_right->{$Ko} = $Kon; + $rK_weld_left->{$Kon} = $Ko; + + # Set flag that we want to break the next container + # so that the cuddled line is balanced. + $rbreak_container->{$opening_seqno} = 1 + if ($CBO); + + # Remember which braces are cuddled. + # The closing brace is used to set adjusted indentations. + # The opening brace is not yet used but might eventually + # be needed in setting adjusted indentation. + $ris_cuddled_closing_brace->{$closing_seqno} = 1; + + } + + } + else { + + # We are not in a chain. Start a new chain if we see the + # starting block type. + if ( $rcuddled_block_types->{$block_type} ) { + $in_chain{$level} = [ $block_type, $type_sequence ]; + } + else { + $block_type = '*'; + $in_chain{$level} = [ $block_type, $type_sequence ]; + } + } + } + elsif ( $token eq '}' ) { + if ( $in_chain{$level} ) { + + # We are in a chain at a closing brace. See if this chain + # continues.. + my $Knn = $self->K_next_code($KK); + next unless $Knn; + + my $chain_type = $in_chain{$level}->[0]; + my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; + if ( + $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} + ) + { + + # Note that we do not weld yet because we must wait until + # we we are sure that an opening brace for this follows. + $in_chain{$level}->[1] = $type_sequence; + } + else { $in_chain{$level} = undef } + } + } + } + return; +} ## end sub weld_cuddled_blocks + +sub find_nested_pairs { + my $self = shift; + + # This routine is called once per file to do preliminary work needed for + # the --weld-nested option. This information is also needed for adding + # semicolons. + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + my $Num = @{$rLL}; + + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + # We define an array of pairs of nested containers + my @nested_pairs; + + # Names of calling routines can either be marked as 'i' or 'w', + # and they may invoke a sub call with an '->'. We will consider + # any consecutive string of such types as a single unit when making + # weld decisions. We also allow a leading ! + my $is_name_type = { + 'i' => 1, + 'w' => 1, + 'U' => 1, + '->' => 1, + '!' => 1, + }; + + # Loop over all closing container tokens + foreach my $inner_seqno ( keys %{$K_closing_container} ) { + my $K_inner_closing = $K_closing_container->{$inner_seqno}; + + # See if it is immediately followed by another, outer closing token + my $K_outer_closing = $K_inner_closing + 1; + $K_outer_closing += 1 + if ( $K_outer_closing < $Num + && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' ); + + next unless ( $K_outer_closing < $Num ); + my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_]; + next unless ($outer_seqno); + my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_]; + next unless ( $is_closing_token{$token_outer_closing} ); + + # Simple filter: No commas or semicolons in the outer container + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno}; + if ($rtype_count) { + next if ( $rtype_count->{','} || $rtype_count->{';'} ); + } + + # Now we have to check the opening tokens. + my $K_outer_opening = $K_opening_container->{$outer_seqno}; + my $K_inner_opening = $K_opening_container->{$inner_seqno}; + next unless defined($K_outer_opening) && defined($K_inner_opening); + + my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno}; + my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno}; + + # Verify that the inner opening token is the next container after the + # outer opening token. + my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_]; + next unless defined($K_io_check); + if ( $K_io_check != $K_inner_opening ) { + + # The inner opening container does not immediately follow the outer + # opening container, but we may still allow a weld if they are + # separated by a sub signature. For example, we may have something + # like this, where $K_io_check may be at the first 'x' instead of + # 'io'. So we need to hop over the signature and see if we arrive + # at 'io'. + + # oo io + # | x x | + # $obj->then( sub ( $code ) { + # ... + # return $c->render(text => '', status => $code); + # } ); + # | | + # ic oc + + next if ( !$inner_blocktype || $inner_blocktype ne 'sub' ); + next if $rLL->[$K_io_check]->[_TOKEN_] ne '('; + my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_]; + next unless defined($seqno_signature); + my $K_signature_closing = $K_closing_container->{$seqno_signature}; + next unless defined($K_signature_closing); + my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_]; + next + unless ( defined($K_test) && $K_test == $K_inner_opening ); + + # OK, we have arrived at 'io' in the above diagram. We should put + # a limit on the length or complexity of the signature here. There + # is no perfect way to do this, one way is to put a limit on token + # count. For consistency with older versions, we should allow a + # signature with a single variable to weld, but not with + # multiple variables. A single variable as in 'sub ($code) {' can + # have a $Kdiff of 2 to 4, depending on spacing. + + # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to + # 7, depending on spacing. So to keep formatting consistent with + # previous versions, we will also avoid welding if there is a comma + # in the signature. + + my $Kdiff = $K_signature_closing - $K_io_check; + next if ( $Kdiff > 4 ); + + # backup comma count test; but we cannot get here with Kdiff<=4 + my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature}; + next if ( $rtc && $rtc->{','} ); + } + + # Yes .. this is a possible nesting pair. + # They can be separated by a small amount. + my $K_diff = $K_inner_opening - $K_outer_opening; + + # Count the number of nonblank characters separating them. + # Note: the $nonblank_count includes the inner opening container + # but not the outer opening container, so it will be >= 1. + if ( $K_diff < 0 ) { next } # Shouldn't happen + my $nonblank_count = 0; + my $type; + my $is_name; + + # Here is an example of a long identifier chain which counts as a + # single nonblank here (this spans about 10 K indexes): + # if ( !Boucherot::SetOfConnections->new->handler->execute( + # ^--K_o_o ^--K_i_o + # @array) ) + my $Kn_first = $K_outer_opening; + my $Kn_last_nonblank; + my $saw_comment; + + foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) { + next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' ); + if ( !$nonblank_count ) { $Kn_first = $Kn } + if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; } + $Kn_last_nonblank = $Kn; + + # skip chain of identifier tokens + my $last_type = $type; + my $last_is_name = $is_name; + $type = $rLL->[$Kn]->[_TYPE_]; + if ( $type eq '#' ) { $saw_comment = 1; last } + $is_name = $is_name_type->{$type}; + next if ( $is_name && $last_is_name ); + + # do not count a possible leading - of bareword hash key + next if ( $type eq 'm' && !$last_type ); + + $nonblank_count++; + last if ( $nonblank_count > 2 ); + } + + # Do not weld across a comment .. fix for c058. + next if ($saw_comment); + + # Patch for b1104: do not weld to a paren preceded by sort/map/grep + # because the special line break rules may cause a blinking state + if ( defined($Kn_last_nonblank) + && $rLL->[$K_inner_opening]->[_TOKEN_] eq '(' + && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' ) + { + my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_]; + + # Turn off welding at sort/map/grep ( + if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 } + } + + my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_]; + + if ( + + # 1: adjacent opening containers, like: do {{ + $nonblank_count == 1 + + # 2. anonymous sub + prototype or sig: )->then( sub ($code) { + # ... but it seems best not to stack two structural blocks, like + # this + # sub make_anon_with_my_sub { sub { + # because it probably hides the structure a little too much. + || ( $inner_blocktype + && $inner_blocktype eq 'sub' + && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub' + && !$outer_blocktype ) + + # 3. short item following opening paren, like: fun( yyy ( + || $nonblank_count == 2 && $token_oo eq '(' + + # 4. weld around fat commas, if requested (git #108), such as + # elf->call_method( method_name_foo => { + || ( $type eq '=>' + && $nonblank_count <= 3 + && %weld_fat_comma_rules + && $weld_fat_comma_rules{$token_oo} ) + ) + { + push @nested_pairs, + [ $inner_seqno, $outer_seqno, $K_inner_closing ]; + } + next; + } + + # The weld routine expects the pairs in order in the form + # [$seqno_inner, $seqno_outer] + # And they must be in the same order as the inner closing tokens + # (otherwise, welds of three or more adjacent tokens will not work). The K + # value of this inner closing token has temporarily been stored for + # sorting. + @nested_pairs = + + # Drop the K index after sorting (it would cause trouble downstream) + map { [ $_->[0], $_->[1] ] } + + # Sort on the K values + sort { $a->[2] <=> $b->[2] } @nested_pairs; + + return \@nested_pairs; +} ## end sub find_nested_pairs + +sub match_paren_control_flag { + + # Decide if this paren is excluded by user request: + # undef matches no parens + # '*' matches all parens + # 'k' matches only if the previous nonblank token is a perl builtin + # keyword (such as 'if', 'while'), + # 'K' matches if 'k' does not, meaning if the previous token is not a + # keyword. + # 'f' matches if the previous token is a function other than a keyword. + # 'F' matches if 'f' does not. + # 'w' matches if either 'k' or 'f' match. + # 'W' matches if 'w' does not. + my ( $self, $seqno, $flag, $rLL ) = @_; + + # Input parameters: + # $seqno = sequence number of the container (should be paren) + # $flag = the flag which defines what matches + # $rLL = an optional alternate token list needed for respace operations + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + + return 0 unless ( defined($flag) ); + return 0 if $flag eq '0'; + return 1 if $flag eq '1'; + return 1 if $flag eq '*'; + return 0 unless ($seqno); + my $K_opening = $self->[_K_opening_container_]->{$seqno}; + return unless ( defined($K_opening) ); + + my ( $is_f, $is_k, $is_w ); + my $Kp = $self->K_previous_nonblank( $K_opening, $rLL ); + if ( defined($Kp) ) { + my $type_p = $rLL->[$Kp]->[_TYPE_]; + + # keyword? + $is_k = $type_p eq 'k'; + + # function call? $is_f = $self->[_ris_function_call_paren_]->{$seqno}; # either keyword or function call? @@ -8526,7 +9536,7 @@ sub match_paren_flag { elsif ( $flag eq 'w' ) { $match = $is_w } elsif ( $flag eq 'W' ) { $match = !$is_w } return $match; -} ## end sub match_paren_flag +} ## end sub match_paren_control_flag sub is_excluded_weld { @@ -8540,7 +9550,8 @@ sub is_excluded_weld { my $flag = $is_leading ? $rflags->[0] : $rflags->[1]; return 0 unless ( defined($flag) ); return 1 if $flag eq '*'; - return $self->match_paren_flag( $KK, $flag ); + my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + return $self->match_paren_control_flag( $seqno, $flag ); } ## end sub is_excluded_weld # hashes to simplify welding logic @@ -8716,10 +9727,12 @@ sub setup_new_weld_measurements { # - Add ';' to fix case b1139 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162. # - relaxed constraints for b1227 + # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353 if ( $starting_ci && $rOpts_line_up_parentheses && $rOpts_delete_old_whitespace && !$rOpts_add_whitespace + && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q' && defined($Kprev) ) { my $type_first = $rLL->[$Kfirst]->[_TYPE_]; @@ -8808,6 +9821,8 @@ sub weld_nested_containers { my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; my $ris_asub_block = $self->[_ris_asub_block_]; + my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_]; + my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; # Find nested pairs of container tokens for any welding. @@ -8820,29 +9835,6 @@ sub weld_nested_containers { # pairs. But it isn't clear if this is possible because we don't know # which sequences might actually start a weld. - # Setup a hash to avoid instabilities with combination -lp -wn -pvt=2. - # We do this by reducing -vt=2 to -vt=1 where there could be a conflict - # with welding at the same tokens. - # See issues b1338, b1339, b1340, b1341, b1342, b1343. - if ($rOpts_line_up_parentheses) { - - # NOTE: just parens for now but this could be applied to all types if - # necessary. - if ( $opening_vertical_tightness{'('} == 2 ) { - my $rreduce_vertical_tightness_by_seqno = - $self->[_rreduce_vertical_tightness_by_seqno_]; - foreach my $item ( @{$rnested_pairs} ) { - my ( $inner_seqno, $outer_seqno ) = @{$item}; - if ( !$ris_excluded_lp_container->{$outer_seqno} ) { - - # Set a flag which means that if a token has -vt=2 - # then reduce it to -vt=1. - $rreduce_vertical_tightness_by_seqno->{$outer_seqno} = 1; - } - } - } - } - my $rOpts_break_at_old_method_breakpoints = $rOpts->{'break-at-old-method-breakpoints'}; @@ -8870,8 +9862,9 @@ sub weld_nested_containers { # We use the minimum of two criteria, either of which may be more # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and # the 'beta' value is more restrictive in other cases (b1243). - - my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 ); + # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously: + # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2); + # This is now '$high_stress_level'. # The vertical tightness flags can throw off line length calculations. # This patch was added to fix instability issue b1284. @@ -8880,22 +9873,6 @@ sub weld_nested_containers { # It may be necessary to include '[' and '{' here in the future. my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0; - my $length_to_opening_seqno = sub { - my ($seqno) = @_; - my $KK = $K_opening_container->{$seqno}; - my $lentot = defined($KK) - && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0; - return $lentot; - }; - - my $length_to_closing_seqno = sub { - my ($seqno) = @_; - my $KK = $K_closing_container->{$seqno}; - my $lentot = defined($KK) - && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0; - return $lentot; - }; - # Abbreviations: # _oo=outer opening, i.e. first of { { # _io=inner opening, i.e. second of { { @@ -8940,9 +9917,7 @@ sub weld_nested_containers { # RULE: do not weld to a square bracket which does not contain commas if ( $inner_opening->[_TYPE_] eq '[' ) { my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno}; - next unless ($rtype_count); - my $comma_count = $rtype_count->{','}; - next unless ($comma_count); + next unless ( $rtype_count && $rtype_count->{','} ); # Do not weld if there is text before a '[' such as here: # curr_opt ( @beg [2,5] ) @@ -8962,7 +9937,7 @@ sub weld_nested_containers { # welds can still be made. This rule will seldom be a limiting factor # in actual working code. Fixes b1206, b1243. my $inner_level = $inner_opening->[_LEVEL_]; - if ( $inner_level >= $weld_cutoff_level ) { next } + if ( $inner_level >= $high_stress_level ) { next } # Set flag saying if this pair starts a new weld my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); @@ -8984,6 +9959,38 @@ sub weld_nested_containers { my $token_oo = $outer_opening->[_TOKEN_]; my $token_io = $inner_opening->[_TOKEN_]; + # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom + # Added for case b973. Moved here from below to fix b1423. + if ( !$do_not_weld_rule + && $rOpts_break_at_old_method_breakpoints + && $iline_io > $iline_oo ) + { + + foreach my $iline ( $iline_oo + 1 .. $iline_io ) { + my $rK_range = $rlines->[$iline]->{_rK_range}; + next unless defined($rK_range); + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless defined($Kfirst); + if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) { + $do_not_weld_rule = 7; + last; + } + } + } + next if ($do_not_weld_rule); + + # Turn off vertical tightness at possible one-line welds. Fixes b1402, + # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339, + # b1340, b1341, b1342, b1343, which previously used a separate fix. + # Issue c161 is the latest and simplest check, using + # $iline_ic==$iline_io as the test. + if ( %opening_vertical_tightness + && $iline_ic == $iline_io + && $opening_vertical_tightness{$token_oo} ) + { + $rmax_vertical_tightness->{$outer_seqno} = 0; + } + my $is_multiline_weld = $iline_oo == $iline_io && $iline_ic == $iline_oc @@ -9223,13 +10230,10 @@ EOM # instead of -asbl, and this fixed most cases. But it turns out that # the real problem was the -asbl flag, and switching to this was # necessary to fixe b1268. This also fixes b1269, b1277, b1278. - if ( - !$do_not_weld_rule - ##&& $is_one_line_weld + if ( !$do_not_weld_rule && $rOpts_line_up_parentheses && $rOpts_asbl - && $ris_asub_block->{$outer_seqno} - ) + && $ris_asub_block->{$outer_seqno} ) { $do_not_weld_rule = '2A'; } @@ -9335,25 +10339,6 @@ EOM # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above. - # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom - # (case b973) - if ( !$do_not_weld_rule - && $rOpts_break_at_old_method_breakpoints - && $iline_io > $iline_oo ) - { - - foreach my $iline ( $iline_oo + 1 .. $iline_io ) { - my $rK_range = $rlines->[$iline]->{_rK_range}; - next unless defined($rK_range); - my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless defined($Kfirst); - if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) { - $do_not_weld_rule = 7; - last; - } - } - } - if ($do_not_weld_rule) { # After neglecting a pair, we start measuring from start of point @@ -9513,7 +10498,7 @@ sub weld_nested_quotes { my $next_type = $rLL->[$Kn]->[_TYPE_]; next unless ( ( $next_type eq 'q' || $next_type eq 'Q' ) - && $next_token =~ /^q/ ); + && substr( $next_token, 0, 1 ) eq 'q' ); # The token before the closing container must also be a quote my $Kouter_closing = $K_closing_container->{$outer_seqno}; @@ -9847,7 +10832,7 @@ sub mark_short_nested_blocks { return; } ## end sub mark_short_nested_blocks -sub adjust_indentation_levels { +sub special_indentation_adjustments { my ($self) = @_; @@ -9894,7 +10879,7 @@ sub adjust_indentation_levels { $self->clip_adjusted_levels(); return; -} ## end sub adjust_indentation_levels +} ## end sub special_indentation_adjustments sub clip_adjusted_levels { @@ -9903,7 +10888,12 @@ sub clip_adjusted_levels { my ($self) = @_; my $radjusted_levels = $self->[_radjusted_levels_]; return unless defined($radjusted_levels) && @{$radjusted_levels}; - foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) } + my $min = min( @{$radjusted_levels} ); # fast check for min + if ( $min < 0 ) { + + # slow loop, but rarely needed + foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) } + } return; } ## end sub clip_adjusted_levels @@ -10117,6 +11107,7 @@ sub break_before_list_opening_containers { next unless ($break_option); # Do not use -bbx under stress for stability ... fixes b1300 + # TODO: review this; do we also need to look at stress_level_lalpha? my $level = $rLL->[$KK]->[_LEVEL_]; if ( $level >= $stress_level_beta ) { DEBUG_BBX @@ -10278,7 +11269,7 @@ sub break_before_list_opening_containers { next unless ($ci_flag); # -bbxi=1: This option removes ci and is handled in - # later sub final_indentation_adjustment + # later sub get_final_indentation if ( $ci_flag == 1 ) { $rwant_reduced_ci->{$seqno} = 1; next; @@ -10316,62 +11307,65 @@ sub break_before_list_opening_containers { && $rOpts_continuation_indentation > $rOpts_indent_columns ); # Always ok to change ci for permanently broken containers - if ( $ris_permanently_broken->{$seqno} ) { - goto OK; - } + if ( $ris_permanently_broken->{$seqno} ) { } # Always OK if this list contains a broken sub-container with # a non-terminal line-ending comma - if ($has_list_with_lec) { goto OK } + elsif ($has_list_with_lec) { } + + # Otherwise, we are considering a single container... + else { - # From here on we are considering a single container... + # A single container must have at least 1 line-ending comma: + next unless ( $rlec_count_by_seqno->{$seqno} ); - # A single container must have at least 1 line-ending comma: - next unless ( $rlec_count_by_seqno->{$seqno} ); + my $OK; - # Since it has a line-ending comma, it will stay broken if the -boc - # flag is set - if ($rOpts_break_at_old_comma_breakpoints) { goto OK } + # Since it has a line-ending comma, it will stay broken if the + # -boc flag is set + if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 } - # OK if the container contains multiple fat commas - # Better: multiple lines with fat commas - if ( !$rOpts_ignore_old_breakpoints ) { - my $rtype_count = $rtype_count_by_seqno->{$seqno}; - next unless ($rtype_count); - my $fat_comma_count = $rtype_count->{'=>'}; - DEBUG_BBX - && print STDOUT "BBX: fat comma count=$fat_comma_count\n"; - if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK } - } - - # The last check we can make is to see if this container could fit on a - # single line. Use the least possible indentation estimate, ci=0, - # so we are not subtracting $ci * $rOpts_continuation_indentation from - # tabulated $maximum_text_length value. - my $maximum_text_length = $maximum_text_length_at_level[$level]; - my $K_closing = $K_closing_container->{$seqno}; - my $length = $self->cumulative_length_before_K($K_closing) - - $self->cumulative_length_before_K($KK); - my $excess_length = $length - $maximum_text_length; - DEBUG_BBX - && print STDOUT + # OK if the container contains multiple fat commas + # Better: multiple lines with fat commas + if ( !$OK && !$rOpts_ignore_old_breakpoints ) { + my $rtype_count = $rtype_count_by_seqno->{$seqno}; + next unless ($rtype_count); + my $fat_comma_count = $rtype_count->{'=>'}; + DEBUG_BBX + && print STDOUT "BBX: fat comma count=$fat_comma_count\n"; + if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 } + } + + # The last check we can make is to see if this container could + # fit on a single line. Use the least possible indentation + # estimate, ci=0, so we are not subtracting $ci * + # $rOpts_continuation_indentation from tabulated + # $maximum_text_length value. + if ( !$OK ) { + my $maximum_text_length = $maximum_text_length_at_level[$level]; + my $K_closing = $K_closing_container->{$seqno}; + my $length = $self->cumulative_length_before_K($K_closing) - + $self->cumulative_length_before_K($KK); + my $excess_length = $length - $maximum_text_length; + DEBUG_BBX + && print STDOUT "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n"; - # OK if the net container definitely breaks on length - if ( $excess_length > $length_tol ) { - DEBUG_BBX - && print STDOUT "BBX: excess_length=$excess_length\n"; - goto OK; - } + # OK if the net container definitely breaks on length + if ( $excess_length > $length_tol ) { + $OK = 1; + DEBUG_BBX + && print STDOUT "BBX: excess_length=$excess_length\n"; + } - # Otherwise skip it - next; + # Otherwise skip it + else { next } + } + } - ################################################################# + #------------------------------------------------------------ # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag - ################################################################# - - OK: + #------------------------------------------------------------ DEBUG_BBX && print STDOUT "BBX: OK to break\n"; @@ -10508,8 +11502,7 @@ sub extended_ci { my $KK = $KNEXT; $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; - my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - my $K_opening = $K_opening_container->{$seqno}; + my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; # see if we have reached the end of the current controlling container if ( $seqno_top && $seqno == $seqno_top ) { @@ -10539,20 +11532,8 @@ sub extended_ci { next; } - # Skip if requested by -bbx to avoid blinkers - if ( $rno_xci_by_seqno->{$seqno} ) { - next; - } - - # Skip if this is a -bli container (this fixes case b1065) Note: case - # b1065 is also fixed by the update for b1055, so this update is not - # essential now. But there does not seem to be a good reason to add - # xci and bli together, so the update is retained. - if ( $ris_bli_container->{$seqno} ) { - next; - } - # We are looking for opening container tokens with ci + my $K_opening = $K_opening_container->{$seqno}; next unless ( defined($K_opening) && $KK == $K_opening ); # Make sure there is a corresponding closing container @@ -10560,6 +11541,15 @@ sub extended_ci { my $K_closing = $K_closing_container->{$seqno}; next unless defined($K_closing); + # Skip if requested by -bbx to avoid blinkers + next if ( $rno_xci_by_seqno->{$seqno} ); + + # Skip if this is a -bli container (this fixes case b1065) Note: case + # b1065 is also fixed by the update for b1055, so this update is not + # essential now. But there does not seem to be a good reason to add + # xci and bli together, so the update is retained. + next if ( $ris_bli_container->{$seqno} ); + # Require different input lines. This will filter out a large number # of small hash braces and array brackets. If we accidentally filter # out an important container, it will get fixed on the next pass. @@ -10586,6 +11576,7 @@ sub extended_ci { # Fix for b1197 b1198 b1199 b1200 b1201 b1202 # Do not apply -xci if we are running out of space + # TODO: review this; do we also need to look at stress_level_alpha? if ( $level >= $stress_level_beta ) { DEBUG_XCI && print @@ -10697,17 +11688,37 @@ sub bli_adjustment { sub find_multiline_qw { - my $self = shift; + my ( $self, $rqw_lines ) = @_; # Multiline qw quotes are not sequenced items like containers { [ ( # but behave in some respects in a similar way. So this routine finds them # and creates a separate sequence number system for later use. # This is straightforward because they always begin at the end of one line - # and and at the beginning of a later line. This is true no matter how we + # and end at the beginning of a later line. This is true no matter how we # finally make our line breaks, so we can find them before deciding on new # line breaks. + # Input parameter: + # if $rqw_lines is defined it is a ref to array of all line index numbers + # for which there is a type 'q' qw quote at either end of the line. This + # was defined by sub resync_lines_and_tokens for efficiency. + # + + my $rlines = $self->[_rlines_]; + + # if $rqw_lines is not defined (this will occur with -io option) then we + # will have to scan all lines. + if ( !defined($rqw_lines) ) { + $rqw_lines = [ 0 .. @{$rlines} - 1 ]; + } + + # if $rqw_lines is defined but empty, just return because there are no + # multiline qw's + else { + if ( !@{$rqw_lines} ) { return } + } + my $rstarting_multiline_qw_seqno_by_K = {}; my $rending_multiline_qw_seqno_by_K = {}; my $rKrange_multiline_qw_by_seqno = {}; @@ -10715,19 +11726,25 @@ sub find_multiline_qw { my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $rlines = $self->[_rlines_]; - my $rLL = $self->[_rLL_]; + my $rLL = $self->[_rLL_]; my $qw_seqno; my $num_qw_seqno = 0; my $K_start_multiline_qw; - foreach my $line_of_tokens ( @{$rlines} ) { + # For reference, here is the old loop, before $rqw_lines became available: + ## foreach my $line_of_tokens ( @{$rlines} ) { + foreach my $iline ( @{$rqw_lines} ) { + my $line_of_tokens = $rlines->[$iline]; + # Note that these first checks are required in case we have to scan + # all lines, not just lines with type 'q' at the ends. my $line_type = $line_of_tokens->{_line_type}; next unless ( $line_type eq 'CODE' ); my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line + + # Continuing a sequence of qw lines ... if ( defined($K_start_multiline_qw) ) { my $type = $rLL->[$Kfirst]->[_TYPE_]; @@ -10751,6 +11768,8 @@ EOM $qw_seqno = undef; } } + + # Starting a new a sequence of qw lines ? if ( !defined($K_start_multiline_qw) && $rLL->[$Klast]->[_TYPE_] eq 'q' ) { @@ -10878,7 +11897,7 @@ BEGIN { }; } -sub collapsed_lengths { +sub xlp_collapsed_lengths { my $self = shift; @@ -10934,9 +11953,17 @@ sub collapsed_lengths { push @stack, [ $max_prong_len, $handle_len_x, SEQ_ROOT, undef, undef, undef, undef ]; + #-------------------------------- + # Loop over all lines in the file + #-------------------------------- my $iline = -1; + my $skip_next_line; foreach my $line_of_tokens ( @{$rlines} ) { $iline++; + if ($skip_next_line) { + $skip_next_line = 0; + next; + } my $line_type = $line_of_tokens->{_line_type}; next if ( $line_type ne 'CODE' ); my $CODE_type = $line_of_tokens->{_code_type}; @@ -10992,41 +12019,41 @@ sub collapsed_lengths { else { # Fix for b1319, b1320 - goto NOT_MULTILINE_QW; + $K_start_multiline_qw = undef; } } } - $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] - - $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - - # We may have to add the spaces of one level or ci level ... it - # depends depends on the -xci flag, the -wn flag, and if the qw - # uses a container token as the quote delimiter. + if ( defined($K_start_multiline_qw) ) { + $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] - + $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - # First rule: add ci if there is a $ci_level - if ($ci_level) { - $len += $rOpts_continuation_indentation; - } + # We may have to add the spaces of one level or ci level ... it + # depends depends on the -xci flag, the -wn flag, and if the qw + # uses a container token as the quote delimiter. - # Second rule: otherwise, look for an extra indentation level - # from the start and add one indentation level if found. - elsif ( $level > $level_start_multiline_qw ) { - $len += $rOpts_indent_columns; - } + # First rule: add ci if there is a $ci_level + if ($ci_level) { + $len += $rOpts_continuation_indentation; + } - if ( $len > $max_prong_len ) { $max_prong_len = $len } + # Second rule: otherwise, look for an extra indentation level + # from the start and add one indentation level if found. + elsif ( $level > $level_start_multiline_qw ) { + $len += $rOpts_indent_columns; + } - $last_nonblank_type = 'q'; + if ( $len > $max_prong_len ) { $max_prong_len = $len } - $K_begin_loop = $K_first + 1; + $last_nonblank_type = 'q'; - # We can skip to the next line if more tokens - next if ( $K_begin_loop > $K_last ); + $K_begin_loop = $K_first + 1; + # We can skip to the next line if more tokens + next if ( $K_begin_loop > $K_last ); + } } - NOT_MULTILINE_QW: $K_start_multiline_qw = undef; # Find the terminal token, before any side comment @@ -11041,44 +12068,110 @@ sub collapsed_lengths { # Use length to terminal comma if interrupted list rule applies if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) { my $K_c = $stack[-1]->[_K_c_]; - if ( - defined($K_c) - && $rLL->[$K_terminal]->[_TYPE_] eq ',' + if ( defined($K_c) ) { + + #-------------------------------------------------------------- + # BEGIN patch for issue b1408: If this line ends in an opening + # token, look for the closing token and comma at the end of the + # next line. If so, combine the two lines to get the correct + # sums. This problem seems to require -xlp -vtc=2 and blank + # lines to occur. + #-------------------------------------------------------------- + if ( $rLL->[$K_terminal]->[_TYPE_] eq '{' && !$has_comment ) { + my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_]; + my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_]; + + # We are looking for a short broken remnant on the next + # line; something like the third line here (b1408): + # parent => + # Moose::Util::TypeConstraints::find_type_constraint( + # 'RefXX' ), + # or this + # + # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user( + # $story_set_all_chores), + if ( defined($Kc_test) + && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_] + && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 ) + { + my $line_of_tokens_next = $rlines->[ $iline + 1 ]; + my $rtype_count = $rtype_count_by_seqno->{$seqno_end}; + my $comma_count = + defined($rtype_count) ? $rtype_count->{','} : 0; + my ( $K_first_next, $K_terminal_next ) = + @{ $line_of_tokens_next->{_rK_range} }; + + # NOTE: Do not try to do this if there is a side comment + # because then the instability does not seem to occur. + if ( + defined($K_terminal_next) - # Ignore if terminal comma, causes instability (b1297, b1330) - && ( - $K_c - $K_terminal > 2 - || ( $K_c - $K_terminal == 2 - && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' ) - ) - ) - { - my $Kend = $K_terminal; + # next line ends with a comma + && $rLL->[$K_terminal_next]->[_TYPE_] eq ',' + + # which follows the closing container token + && ( + $K_terminal_next - $Kc_test == 1 + || ( $K_terminal_next - $Kc_test == 2 + && $rLL->[ $K_terminal_next - 1 ]->[_TYPE_] + eq 'b' ) + ) - # This caused an instability in b1311 by making the result - # dependent on input. It is not really necessary because the - # comment length is added at the end of the loop. - ##if ( $has_comment - ## && !$rOpts_ignore_side_comment_lengths ) - ##{ - ## $Kend = $K_last; - ##} + # no commas in the container + && ( !defined($rtype_count) + || !$rtype_count->{','} ) - # changed from $len to my $leng to fix b1302 b1306 b1317 b1321 - my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_]; + # for now, restrict this to a container with just 1 + # or two tokens + && $K_terminal_next - $K_terminal <= 5 - # Fix for b1331: at a broken => item, include the length of - # the previous half of the item plus one for the missing space - if ( $last_nonblank_type eq '=>' ) { - $leng += $len + 1; + ) + { + + # combine the next line with the current line + $K_terminal = $K_terminal_next; + $skip_next_line = 1; + if (DEBUG_COLLAPSED_LENGTHS) { + print "Combining lines at line $iline\n"; + } + } + } } - if ( $leng > $max_prong_len ) { $max_prong_len = $leng } + #-------------------------- + # END patch for issue b1408 + #-------------------------- + + if ( + $rLL->[$K_terminal]->[_TYPE_] eq ',' + + # Ignore if terminal comma, causes instability (b1297, b1330) + && ( + $K_c - $K_terminal > 2 + || ( $K_c - $K_terminal == 2 + && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' ) + ) + ) + { + + # changed $len to my $leng to fix b1302 b1306 b1317 b1321 + my $leng = $rLL->[$K_terminal]->[_CUMULATIVE_LENGTH_] - + $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_]; + + # Fix for b1331: at a broken => item, include the length of + # the previous half of the item plus one for the missing + # space + if ( $last_nonblank_type eq '=>' ) { + $leng += $len + 1; + } + if ( $leng > $max_prong_len ) { $max_prong_len = $leng } + } } } + #---------------------------------- # Loop over tokens on this line ... + #---------------------------------- foreach my $KK ( $K_begin_loop .. $K_terminal ) { my $type = $rLL->[$KK]->[_TYPE_]; @@ -11157,18 +12250,12 @@ sub collapsed_lengths { } # Include length to a comma ending this line + # note: any side comments are handled at loop end (b1332) if ( $interrupted_list_rule && $rLL->[$K_terminal]->[_TYPE_] eq ',' ) { my $Kend = $K_terminal; - # fix for b1332: side comments handled at end of loop - ##if ( $Kend < $K_last - ## && !$rOpts_ignore_side_comment_lengths ) - ##{ - ## $Kend = $K_last; - ##} - # Measure from the next blank if any (fixes b1301) my $Kbeg = $KK; if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b' @@ -11196,97 +12283,95 @@ sub collapsed_lengths { #-------------------- # Exiting a container #-------------------- - elsif ( $is_closing_token{$token} ) { - if (@stack) { - - # The current prong ends - get its handle - my $item = pop @stack; - my $handle_len = $item->[_handle_len_]; - my $seqno_o = $item->[_seqno_o_]; - my $iline_o = $item->[_iline_o_]; - my $K_o = $item->[_K_o_]; - my $K_c_expect = $item->[_K_c_]; - my $collapsed_len = $max_prong_len; - - if ( $seqno_o ne $seqno ) { - - # This can happen if input file has brace errors. - # Otherwise it shouldn't happen. Not fatal but -lp - # formatting could get messed up. - if ( DEVEL_MODE && !get_saw_brace_error() ) { - Fault(<[_handle_len_]; + my $seqno_o = $item->[_seqno_o_]; + my $iline_o = $item->[_iline_o_]; + my $K_o = $item->[_K_o_]; + my $K_c_expect = $item->[_K_c_]; + my $collapsed_len = $max_prong_len; + + if ( $seqno_o ne $seqno ) { + + # This can happen if input file has brace errors. + # Otherwise it shouldn't happen. Not fatal but -lp + # formatting could get messed up. + if ( DEVEL_MODE && !get_saw_brace_error() ) { + Fault(<{$seqno}; - if ($block_type) { - - my $K_c = $KK; - my $block_length = MIN_BLOCK_LEN; - my $is_one_line_block; - my $level = $rLL->[$K_o]->[_LEVEL_]; - if ( defined($K_o) && defined($K_c) ) { - - # note: fixed 3 May 2022 (removed 'my') - $block_length = - $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] - - $rLL->[$K_o]->[_CUMULATIVE_LENGTH_]; - $is_one_line_block = $iline == $iline_o; - } + #------------------------------------------ + # Rules to avoid scrunching code blocks ... + #------------------------------------------ + # Some test cases: + # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119 + my $block_type = $rblock_type_of_seqno->{$seqno}; + if ($block_type) { + + my $K_c = $KK; + my $block_length = MIN_BLOCK_LEN; + my $is_one_line_block; + my $level = $rLL->[$K_o]->[_LEVEL_]; + if ( defined($K_o) && defined($K_c) ) { + + # note: fixed 3 May 2022 (removed 'my') + $block_length = + $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] - + $rLL->[$K_o]->[_CUMULATIVE_LENGTH_]; + $is_one_line_block = $iline == $iline_o; + } - # Code block rule 1: Use the total block length if - # it is less than the minimum. - if ( $block_length < MIN_BLOCK_LEN ) { - $collapsed_len = $block_length; - } + # Code block rule 1: Use the total block length if + # it is less than the minimum. + if ( $block_length < MIN_BLOCK_LEN ) { + $collapsed_len = $block_length; + } - # Code block rule 2: Use the full length of a - # one-line block to avoid breaking it, unless - # extremely long. We do not need to do a precise - # check here, because if it breaks then it will - # stay broken on later iterations. - elsif ( - $is_one_line_block - && $block_length < - $maximum_line_length_at_level[$level] - - # But skip this for sort/map/grep/eval blocks - # because they can reform (b1345) - && !$is_sort_map_grep_eval{$block_type} - ) - { - $collapsed_len = $block_length; - } + # Code block rule 2: Use the full length of a + # one-line block to avoid breaking it, unless + # extremely long. We do not need to do a precise + # check here, because if it breaks then it will + # stay broken on later iterations. + elsif ( + $is_one_line_block + && $block_length < + $maximum_line_length_at_level[$level] + + # But skip this for sort/map/grep/eval blocks + # because they can reform (b1345) + && !$is_sort_map_grep_eval{$block_type} + ) + { + $collapsed_len = $block_length; + } - # Code block rule 3: Otherwise the length should be - # at least MIN_BLOCK_LEN to avoid scrunching code - # blocks. - elsif ( $collapsed_len < MIN_BLOCK_LEN ) { - $collapsed_len = MIN_BLOCK_LEN; - } + # Code block rule 3: Otherwise the length should be + # at least MIN_BLOCK_LEN to avoid scrunching code + # blocks. + elsif ( $collapsed_len < MIN_BLOCK_LEN ) { + $collapsed_len = MIN_BLOCK_LEN; } + } - # Store the result. Some extra space, '2', allows for - # length of an opening token, inside space, comma, ... - # This constant has been tuned to give good overall - # results. - $collapsed_len += 2; - $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len; - - # Restart scanning the lower level prong - if (@stack) { - $max_prong_len = $stack[-1]->[_max_prong_len_]; - $collapsed_len += $handle_len; - if ( $collapsed_len > $max_prong_len ) { - $max_prong_len = $collapsed_len; - } + # Store the result. Some extra space, '2', allows for + # length of an opening token, inside space, comma, ... + # This constant has been tuned to give good overall + # results. + $collapsed_len += 2; + $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len; + + # Restart scanning the lower level prong + if (@stack) { + $max_prong_len = $stack[-1]->[_max_prong_len_]; + $collapsed_len += $handle_len; + if ( $collapsed_len > $max_prong_len ) { + $max_prong_len = $collapsed_len; } } } @@ -11335,7 +12420,7 @@ EOM } ## end loop over tokens on this line - # Now take care of any side comment + # Now take care of any side comment; if ($has_comment) { if ($rOpts_ignore_side_comment_lengths) { $len = 0; @@ -11366,7 +12451,7 @@ EOM } return; -} ## end sub collapsed_lengths +} ## end sub xlp_collapsed_lengths sub is_excluded_lp { @@ -11378,6 +12463,9 @@ sub is_excluded_lp { # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or # what to include: $line_up_parentheses_control_is_lxpl = 0 + # Input parameter: + # $KK = index of the container opening token + my ( $self, $KK ) = @_; my $rLL = $self->[_rLL_]; my $rtoken_vars = $rLL->[$KK]; @@ -11425,6 +12513,7 @@ sub is_excluded_lp { elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f } elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w } elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w } + ## else { no match found } } # See if we can exclude this based on the flag1 test... @@ -11512,26 +12601,6 @@ sub process_all_lines { my $vertical_aligner_object = $self->[_vertical_aligner_object_]; my $save_logfile = $self->[_save_logfile_]; - # Note for RT#118553, leave only one newline at the end of a file. - # Example code to do this is in comments below: - # my $Opt_trim_ending_blank_lines = 0; - # if ($Opt_trim_ending_blank_lines) { - # while ( my $line_of_tokens = pop @{$rlines} ) { - # my $line_type = $line_of_tokens->{_line_type}; - # if ( $line_type eq 'CODE' ) { - # my $CODE_type = $line_of_tokens->{_code_type}; - # next if ( $CODE_type eq 'BL' ); - # } - # push @{$rlines}, $line_of_tokens; - # last; - # } - # } - - # But while this would be a trivial update, it would have very undesirable - # side effects when perltidy is run from within an editor on a small snippet. - # So this is best done with a separate filter, such - # as 'delete_ending_blank_lines.pl' in the examples folder. - # Flag to prevent blank lines when POD occurs in a format skipping sect. my $in_format_skipping_section; @@ -11542,16 +12611,16 @@ sub process_all_lines { my $i_last_POD_END = -10; my $i = -1; foreach my $line_of_tokens ( @{$rlines} ) { - $i++; # insert blank lines requested for keyword sequences - if ( $i > 0 - && defined( $rwant_blank_line_after->{ $i - 1 } ) - && $rwant_blank_line_after->{ $i - 1 } == 1 ) + if ( defined( $rwant_blank_line_after->{$i} ) + && $rwant_blank_line_after->{$i} == 1 ) { $self->want_blank_line(); } + $i++; + my $last_line_type = $line_type; $line_type = $line_of_tokens->{_line_type}; my $input_line = $line_of_tokens->{_line_text}; @@ -11982,31 +13051,29 @@ EOM # First check: skip if next line is not one deeper my $Knext_nonblank = $self->K_next_nonblank($K_last); - goto RETURN if ( !defined($Knext_nonblank) ); + return if ( !defined($Knext_nonblank) ); my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_]; - goto RETURN if ( $level_next != $level_beg + 1 ); + return if ( $level_next != $level_beg + 1 ); # Find the parent container of the first token on the next line my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank); - goto RETURN unless ( defined($parent_seqno) ); + return unless ( defined($parent_seqno) ); # Must not be a weld (can be unstable) - goto RETURN + return if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) ); # Opening container must exist and be on this line my $Ko = $K_opening_container->{$parent_seqno}; - goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last ); + return unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last ); # Verify that the closing container exists and is on a later line my $Kc = $K_closing_container->{$parent_seqno}; - goto RETURN unless ( defined($Kc) && $Kc > $K_last ); + return unless ( defined($Kc) && $Kc > $K_last ); # That's it $K_closing = $Kc; - goto RETURN; - RETURN: return; }; @@ -12286,17 +13353,17 @@ EOM # Batch variables: these describe the current batch of code being formed # and sent down the pipeline. They are initialized in the next # sub. - my ( $rbrace_follower, $index_start_one_line_block, - $semicolons_before_block_self_destruct, - $starting_in_quote, $ending_in_quote, ); + my ( + $rbrace_follower, $index_start_one_line_block, + $starting_in_quote, $ending_in_quote, + ); # Called before the start of each new batch sub initialize_batch_variables { - $max_index_to_go = UNDEFINED_INDEX; - $summed_lengths_to_go[0] = 0; - $nesting_depth_to_go[0] = 0; - ##@summed_lengths_to_go = @nesting_depth_to_go = (0); + $max_index_to_go = UNDEFINED_INDEX; + $summed_lengths_to_go[0] = 0; + $nesting_depth_to_go[0] = 0; $ri_starting_one_line_block = []; # The initialization code for the remaining batch arrays is as follows @@ -12333,9 +13400,7 @@ EOM $rbrace_follower = undef; $ending_in_quote = 0; - # These get re-initialized by calls to sub destroy_one_line_block(): - $index_start_one_line_block = UNDEFINED_INDEX; - $semicolons_before_block_self_destruct = 0; + $index_start_one_line_block = undef; # initialize forced breakpoint vars associated with each output batch $forced_breakpoint_count = 0; @@ -12357,14 +13422,10 @@ EOM } ## end sub leading_spaces_to_go sub create_one_line_block { - ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) - = @_; - return; - } - sub destroy_one_line_block { - $index_start_one_line_block = UNDEFINED_INDEX; - $semicolons_before_block_self_destruct = 0; + # set index starting next one-line block + # call with no args to delete the current one-line block + ($index_start_one_line_block) = @_; return; } @@ -12377,16 +13438,37 @@ EOM my ( $self, $Ktoken_vars, $rtoken_vars ) = @_; - # Add one token to the next batch. + #------------------------------------------------------- + # Token storage utility for sub process_line_of_CODE. + # Add one token to the next batch of '_to_go' variables. + #------------------------------------------------------- + + # Input parameters: # $Ktoken_vars = the index K in the global token array # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values # unless they are temporarily being overridden - #------------------------------------------------------------------ # NOTE: called once per token so coding efficiency is critical here - #------------------------------------------------------------------ - my $type = $rtoken_vars->[_TYPE_]; + my ( + + $type, + $token, + $ci_level, + $level, + $seqno, + $length, + + ) = @{$rtoken_vars}[ + + _TYPE_, + _TOKEN_, + _CI_LEVEL_, + _LEVEL_, + _TYPE_SEQUENCE_, + _TOKEN_LENGTH_, + + ]; # Check for emergency flush... # The K indexes in the batch must always be a continuous sequence of @@ -12425,37 +13507,48 @@ EOM if ( $type eq 'b' ) { return } } + # Clip levels to zero if there are level errors in the file. + # We had to wait until now for reasons explained in sub 'write_line'. + if ( $level < 0 ) { $level = 0 } + + # Safety check that length is defined. Should not be needed now. + # Former patch for indent-only, in which the entire set of tokens is + # turned into type 'q'. Lengths may have not been defined because sub + # 'respace_tokens' is bypassed. We do not need lengths in this case, + # but we will use the character count to have a defined value. In the + # future, it would be nicer to have 'respace_tokens' convert the lines + # to quotes and get correct lengths. + if ( !defined($length) ) { $length = length($token) } + #---------------------------- # add this token to the batch #---------------------------- - $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars; - $types_to_go[$max_index_to_go] = $type; - + $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars; + $types_to_go[$max_index_to_go] = $type; $old_breakpoint_to_go[$max_index_to_go] = 0; $forced_breakpoint_to_go[$max_index_to_go] = 0; $mate_index_to_go[$max_index_to_go] = -1; + $tokens_to_go[$max_index_to_go] = $token; + $ci_levels_to_go[$max_index_to_go] = $ci_level; + $levels_to_go[$max_index_to_go] = $level; + $type_sequence_to_go[$max_index_to_go] = $seqno; + $nobreak_to_go[$max_index_to_go] = $no_internal_newlines; + $token_lengths_to_go[$max_index_to_go] = $length; - my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_]; - - my $ci_level = $ci_levels_to_go[$max_index_to_go] = - $rtoken_vars->[_CI_LEVEL_]; - - # Clip levels to zero if there are level errors in the file. - # We had to wait until now for reasons explained in sub 'write_line'. - my $level = $rtoken_vars->[_LEVEL_]; - if ( $level < 0 ) { $level = 0 } - $levels_to_go[$max_index_to_go] = $level; - - my $seqno = $type_sequence_to_go[$max_index_to_go] = - $rtoken_vars->[_TYPE_SEQUENCE_]; - - my $in_continued_quote = - ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote}; + # We keep a running sum of token lengths from the start of this batch: + # summed_lengths_to_go[$i] = total length to just before token $i + # summed_lengths_to_go[$i+1] = total length to just after token $i + $summed_lengths_to_go[ $max_index_to_go + 1 ] = + $summed_lengths_to_go[$max_index_to_go] + $length; # Initializations for first token of new batch - if ( $max_index_to_go == 0 ) { + if ( !$max_index_to_go ) { - $starting_in_quote = $in_continued_quote; + # Reset flag '$starting_in_quote' for a new batch. It must be set + # to the value of '$in_continued_quote', but here for efficiency we + # set it to zero, which is its normal value. Then in coding below + # we will change it if we find we are actually in a continued quote. + $starting_in_quote = 0; # Update the next parent sequence number for each new batch. @@ -12536,33 +13629,15 @@ EOM } } - $nobreak_to_go[$max_index_to_go] = $no_internal_newlines; - - my $length = $rtoken_vars->[_TOKEN_LENGTH_]; - - # Safety check that length is defined. Should not be needed now. - # Former patch for indent-only, in which the entire set of tokens is - # turned into type 'q'. Lengths may have not been defined because sub - # 'respace_tokens' is bypassed. We do not need lengths in this case, - # but we will use the character count to have a defined value. In the - # future, it would be nicer to have 'respace_tokens' convert the lines - # to quotes and get correct lengths. - if ( !defined($length) ) { - $length = length($token); - } - - $token_lengths_to_go[$max_index_to_go] = $length; - - # We keep a running sum of token lengths from the start of this batch: - # summed_lengths_to_go[$i] = total length to just before token $i - # summed_lengths_to_go[$i+1] = total length to just after token $i - $summed_lengths_to_go[ $max_index_to_go + 1 ] = - $summed_lengths_to_go[$max_index_to_go] + $length; - # Define the indentation that this token will have in two cases: # Without CI = reduced_spaces_to_go # With CI = leading_spaces_to_go - if ($in_continued_quote) { + if ( ( $Ktoken_vars == $K_first ) + && $line_of_tokens->{_starting_in_quote} ) + { + # in a continued quote - correct value set above if first token + if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 } + $leading_spaces_to_go[$max_index_to_go] = 0; $reduced_spaces_to_go[$max_index_to_go] = 0; } @@ -12586,10 +13661,11 @@ EOM sub flush_batch_of_CODE { - # Finish any batch packaging and call the process routine. + # Finish and process the current batch. # This must be the only call to grind_batch_of_CODE() my ($self) = @_; + # If a batch has been started ... if ( $max_index_to_go >= 0 ) { # Create an array to hold variables for this batch @@ -12620,6 +13696,9 @@ EOM $self->[_this_batch_] = $this_batch; + #------------------- + # process this batch + #------------------- $self->grind_batch_of_CODE(); # Done .. this batch is history @@ -12633,14 +13712,14 @@ EOM sub end_batch { - # end the current batch, EXCEPT for a few special cases + # End the current batch, EXCEPT for a few special cases my ($self) = @_; if ( $max_index_to_go < 0 ) { - # This is harmless but should be eliminated in development + # nothing to do .. this is harmless but wastes time. if (DEVEL_MODE) { - Fault("End batch called with nothing to do; please fix\n"); + Fault("sub end_batch called with nothing to do; please fix\n"); } return; } @@ -12655,7 +13734,7 @@ EOM # Exception 2: just set a tentative breakpoint if we might be in a # one-line block - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + if ( defined($index_start_one_line_block) ) { $self->set_forced_breakpoint($max_index_to_go); return; } @@ -12679,7 +13758,7 @@ EOM # end the current batch with 1 exception - destroy_one_line_block(); + $index_start_one_line_block = undef; # Exception: if we are flushing within the code stream only to insert # blank line(s), then we can keep the batch intact at a weld. This @@ -12706,25 +13785,16 @@ EOM # It outputs full-line comments and blank lines immediately. - # The tokens are copied one-by-one from the global token array $rLL to - # a set of '_to_go' arrays which collect batches of tokens for a - # further processing via calls to 'sub store_token_to_go', until a well - # defined 'structural' break point* or 'forced' breakpoint* is reached. - # Then, the batch of collected '_to_go' tokens is passed along to 'sub - # grind_batch_of_CODE' for further processing. - - # * 'structural' break points are basically line breaks corresponding - # to code blocks. An example is a chain of if-elsif-else statements, - # which should typically be broken at the opening and closing braces. - - # * 'forced' break points are breaks required by side comments or by - # special user controls. - - # So this routine is just making an initial set of required line - # breaks, basically regardless of the maximum requested line length. - # The subsequent stage of formatting make additional line breaks - # appropriate for lists and logical structures, and to keep line - # lengths below the requested maximum line length. + # For lines of code: + # - Tokens are copied one-by-one from the global token + # array $rLL to a set of '_to_go' arrays which collect batches of + # tokens. This is done with calls to 'store_token_to_go'. + # - A batch is closed and processed upon reaching a well defined + # structural break point (i.e. code block boundary) or forced + # breakpoint (i.e. side comment or special user controls). + # - Subsequent stages of formatting make additional line breaks + # appropriate for lists and logical structures, and as necessary to + # keep line lengths below the requested maximum line length. #----------------------------------- # begin initialize closure variables @@ -12809,7 +13879,7 @@ EOM return; } - destroy_one_line_block(); + $index_start_one_line_block = undef; $self->end_batch() if ( $max_index_to_go >= 0 ); # output a blank line before block comments @@ -12871,30 +13941,33 @@ EOM return; } - # Compare input/output indentation except for: - # - hanging side comments - # - continuation lines (have unknown amount of initial blank space) - # - and lines which are quotes (because they may have been outdented) - my $guessed_indentation_level = - $line_of_tokens->{_guessed_indentation_level}; - - unless ( $CODE_type eq 'HSC' - || $rtok_first->[_CI_LEVEL_] > 0 - || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' ) - { - my $input_line_number = $line_of_tokens->{_line_number}; - $self->compare_indentation_levels( $K_first, - $guessed_indentation_level, $input_line_number ); + #-------------------------------------------- + # Compare input/output indentation in logfile + #-------------------------------------------- + if ( $self->[_save_logfile_] ) { + + # Compare input/output indentation except for: + # - hanging side comments + # - continuation lines (have unknown leading blank space) + # - and lines which are quotes (they may have been outdented) + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + + unless ( $CODE_type eq 'HSC' + || $rtok_first->[_CI_LEVEL_] > 0 + || $guessed_indentation_level == 0 + && $rtok_first->[_TYPE_] eq 'Q' ) + { + my $input_line_number = $line_of_tokens->{_line_number}; + $self->compare_indentation_levels( $K_first, + $guessed_indentation_level, $input_line_number ); + } } - #------------------------ - # Handle indentation-only - #------------------------ + #----------------------------------------- + # Handle a line marked as indentation-only + #----------------------------------------- - # NOTE: In previous versions we sent all qw lines out immediately here. - # No longer doing this: also write a line which is entirely a 'qw' list - # to allow stacking of opening and closing tokens. Note that interior - # qw lines will still go out at the end of this routine. if ( $CODE_type eq 'IO' ) { $self->flush(); my $line = $input_line; @@ -12927,55 +14000,144 @@ EOM # if we do not see another elseif or an else. if ($looking_for_else) { - ## /^(elsif|else)$/ - if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) { - write_logfile_entry("(No else block)\n"); - } - $looking_for_else = 0; - } + ## /^(elsif|else)$/ + if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) { + write_logfile_entry("(No else block)\n"); + } + $looking_for_else = 0; + } + + # This is a good place to kill incomplete one-line blocks + if ( $max_index_to_go >= 0 ) { + if ( + + # this check needed -mangle (for example rt125012) + ( + ( !$index_start_one_line_block ) + && ( $last_old_nonblank_type eq ';' ) + && ( $first_new_nonblank_token ne '}' ) + ) + + # Patch for RT #98902. Honor request to break at old commas. + || ( $rOpts_break_at_old_comma_breakpoints + && $last_old_nonblank_type eq ',' ) + ) + { + $forced_breakpoint_to_go[$max_index_to_go] = 1 + if ($rOpts_break_at_old_comma_breakpoints); + $index_start_one_line_block = undef; + $self->end_batch(); + } + + # Keep any requested breaks before this line. Note that we have to + # use the original K_first because it may have been reduced above + # to add a blank. The value of the flag is as follows: + # 1 => hard break, flush the batch + # 2 => soft break, set breakpoint and continue building the batch + if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) { + $index_start_one_line_block = undef; + if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { + $self->set_forced_breakpoint($max_index_to_go); + } + else { + $self->end_batch() if ( $max_index_to_go >= 0 ); + } + } + } + + #-------------------------------------- + # loop to process the tokens one-by-one + #-------------------------------------- + $self->process_line_inner_loop($has_side_comment); + + # if there is anything left in the output buffer ... + if ( $max_index_to_go >= 0 ) { + + my $type = $rLL->[$K_last]->[_TYPE_]; + my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last}; + + # we have to flush .. + if ( + + # if there is a side comment... + $type eq '#' + + # if this line ends in a quote + # NOTE: This is critically important for insuring that quoted + # lines do not get processed by things like -sot and -sct + || $in_quote + + # if this is a VERSION statement + || $CODE_type eq 'VER' + + # to keep a label at the end of a line + || ( $type eq 'J' && $rOpts_break_after_labels != 2 ) + + # if we have a hard break request + || $break_flag && $break_flag != 2 - # This is a good place to kill incomplete one-line blocks - if ( $max_index_to_go >= 0 ) { - if ( - ( - ( $semicolons_before_block_self_destruct == 0 ) - && ( $last_old_nonblank_type eq ';' ) - && ( $first_new_nonblank_token ne '}' ) - ) + # if we are instructed to keep all old line breaks + || !$rOpts->{'delete-old-newlines'} - # Patch for RT #98902. Honor request to break at old commas. - || ( $rOpts_break_at_old_comma_breakpoints - && $last_old_nonblank_type eq ',' ) + # if this is a line of the form 'use overload'. A break here in + # the input file is a good break because it will allow the + # operators which follow to be formatted well. Without this + # break the formatting with -ci=4 -xci is poor, for example. + + # use overload + # '+' => sub { + # print length $_[2], "\n"; + # my ( $x, $y ) = _order(@_); + # Number::Roman->new( int $x + $y ); + # }, + # '-' => sub { + # my ( $x, $y ) = _order(@_); + # Number::Roman->new( int $x - $y ); + # }; + || ( $max_index_to_go == 2 + && $types_to_go[0] eq 'k' + && $tokens_to_go[0] eq 'use' + && $tokens_to_go[$max_index_to_go] eq 'overload' ) ) { - $forced_breakpoint_to_go[$max_index_to_go] = 1 - if ($rOpts_break_at_old_comma_breakpoints); - destroy_one_line_block(); + $index_start_one_line_block = undef; $self->end_batch(); } - # Keep any requested breaks before this line. Note that we have to - # use the original K_first because it may have been reduced above - # to add a blank. The value of the flag is as follows: - # 1 => hard break, flush the batch - # 2 => soft break, set breakpoint and continue building the batch - if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) { - destroy_one_line_block(); - if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { + else { + + # Check for a soft break request + if ( $break_flag && $break_flag == 2 ) { $self->set_forced_breakpoint($max_index_to_go); } - else { - $self->end_batch() if ( $max_index_to_go >= 0 ); + + # mark old line breakpoints in current output stream + if ( !$rOpts_ignore_old_breakpoints + || $self->[_ris_essential_old_breakpoint_]->{$K_last} ) + { + my $jobp = $max_index_to_go; + if ( $types_to_go[$max_index_to_go] eq 'b' + && $max_index_to_go > 0 ) + { + $jobp--; + } + $old_breakpoint_to_go[$jobp] = 1; } } } - #-------------------------------------- - # loop to process the tokens one-by-one - #-------------------------------------- + return; + } ## end sub process_line_of_CODE + + sub process_line_inner_loop { - # We do not want a leading blank if the previous batch just got output + my ( $self, $has_side_comment ) = @_; + #-------------------------------------------------------------------- + # Loop to move all tokens from one input line to a newly forming batch + #-------------------------------------------------------------------- + + # Do not start a new batch with a blank space if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { $K_first++; } @@ -13037,28 +14199,25 @@ EOM } } - # if at last token ... - if ( $Ktoken_vars == $K_last ) { + #--------------------- + # handle side comments + #--------------------- + if ($has_side_comment) { - #--------------------- - # handle side comments - #--------------------- - if ($has_side_comment) { + # if at last token ... + if ( $Ktoken_vars == $K_last ) { $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); next; } - } - # if before last token ... do not allow breaks which would promote - # a side comment to a block comment - elsif ( - $has_side_comment - && ( $Ktoken_vars == $K_last - 1 + # if before last token ... do not allow breaks which would + # promote a side comment to a block comment + elsif ($Ktoken_vars == $K_last - 1 || $Ktoken_vars == $K_last - 2 && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' ) - ) - { - $no_internal_newlines = 2; + { + $no_internal_newlines = 2; + } } # Process non-blank and non-comment tokens ... @@ -13077,22 +14236,12 @@ EOM $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; } - my $break_before_semicolon = ( $Ktoken_vars == $K_first ) - && $rOpts_break_at_old_semicolon_breakpoints; - - # kill one-line blocks with too many semicolons - $semicolons_before_block_self_destruct--; - if ( - $break_before_semicolon - || ( $semicolons_before_block_self_destruct < 0 ) - || ( $semicolons_before_block_self_destruct == 0 - && $next_nonblank_token_type !~ /^[b\}]$/ ) - ) + if ( $rOpts_break_at_old_semicolon_breakpoints + && ( $Ktoken_vars == $K_first ) + && $max_index_to_go >= 0 + && !defined($index_start_one_line_block) ) { - destroy_one_line_block(); - $self->end_batch() - if ( $break_before_semicolon - && $max_index_to_go >= 0 ); + $self->end_batch(); } $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); @@ -13153,7 +14302,7 @@ EOM $want_break # and we were unable to start looking for a block, - && $index_start_one_line_block == UNDEFINED_INDEX + && !defined($index_start_one_line_block) # or if it will not be on same line as its keyword, so that # it will be outdented (eval.t, overload.t), and the user @@ -13198,7 +14347,7 @@ EOM } # If there is a pending one-line block .. - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + if ( defined($index_start_one_line_block) ) { # Fix for b1208: if a side comment follows this closing # brace then we must include its length in the length test @@ -13220,14 +14369,9 @@ EOM # token $self->excess_line_length( $index_start_one_line_block, $max_index_to_go ) + $added_length >= 0 - - # or if it has too many semicolons - || ( $semicolons_before_block_self_destruct == 0 - && defined($K_last_nonblank_code) - && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' ) ) { - destroy_one_line_block(); + $index_start_one_line_block = undef; } } @@ -13235,7 +14379,7 @@ EOM $self->end_batch() if ( $max_index_to_go >= 0 && !$nobreak_BEFORE_BLOCK - && $index_start_one_line_block == UNDEFINED_INDEX ); + && !defined($index_start_one_line_block) ); # store the closing curly brace $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); @@ -13245,14 +14389,14 @@ EOM # So now we have to check for special cases. # if this '}' successfully ends a one-line block.. - my $is_one_line_block = 0; - my $keep_going = 0; - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + my $one_line_block_type = EMPTY_STRING; + my $keep_going; + if ( defined($index_start_one_line_block) ) { # Remember the type of token just before the # opening brace. It would be more general to use # a stack, but this will work for one-line blocks. - $is_one_line_block = + $one_line_block_type = $types_to_go[$index_start_one_line_block]; # we have to actually make it by removing tentative @@ -13284,7 +14428,7 @@ EOM $index_start_one_line_block; # then re-initialize for the next one-line block - destroy_one_line_block(); + $index_start_one_line_block = undef; # then decide if we want to break after the '}' .. # We will keep going to allow certain brace followers as in: @@ -13298,7 +14442,7 @@ EOM # Follow users break point for # one line block types U & G, such as a 'try' block - || $is_one_line_block =~ /^[UG]$/ + || $one_line_block_type =~ /^[UG]$/ && $Ktoken_vars == $K_last ) @@ -13328,7 +14472,7 @@ EOM # added eval for borris.t elsif ($is_sort_map_grep_eval{$block_type} - || $is_one_line_block eq 'G' ) + || $one_line_block_type eq 'G' ) { $rbrace_follower = undef; $keep_going = 1; @@ -13336,7 +14480,7 @@ EOM # anonymous sub elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) { - if ($is_one_line_block) { + if ($one_line_block_type) { $rbrace_follower = \%is_anon_sub_1_brace_follower; @@ -13364,14 +14508,12 @@ EOM my $Kc = $K_closing_container->{$p_seqno}; my $is_excluded = $self->[_ris_excluded_lp_container_]->{$p_seqno}; - if ( defined($Kc) - && $rLL->[$Kc]->[_TOKEN_] eq '}' - && !$is_excluded - && $Kc - $Ktoken_vars <= 2 ) - { - $rbrace_follower = undef; - $keep_going = 1; - } + $keep_going = + ( defined($Kc) + && $rLL->[$Kc]->[_TOKEN_] eq '}' + && !$is_excluded + && $Kc - $Ktoken_vars <= 2 ); + $rbrace_follower = undef if ($keep_going); } } else { @@ -13408,6 +14550,8 @@ EOM if ($keep_going) { # keep going + $rbrace_follower = undef; + } # if no more tokens, postpone decision until re-entering @@ -13422,11 +14566,27 @@ EOM } elsif ($rbrace_follower) { - unless ( $rbrace_follower->{$next_nonblank_token} ) { + if ( $rbrace_follower->{$next_nonblank_token} ) { + + # Fix for b1385: keep break after a comma following a + # 'do' block. This could also be used for other block + # types, but that would cause a significant change in + # existing formatting without much benefit. + if ( $next_nonblank_token eq ',' + && $Knnb eq $K_last + && $block_type eq 'do' + && $rOpts_add_newlines + && $self->is_trailing_comma($Knnb) ) + { + $self->[_rbreak_after_Klast_]->{$K_last} = 1; + } + } + else { $self->end_batch() unless ( $no_internal_newlines || $max_index_to_go < 0 ); } + $rbrace_follower = undef; } @@ -13445,7 +14605,6 @@ EOM # no newlines after seeing here-target $no_internal_newlines = 2; - ## destroy_one_line_block(); # deleted to fix case b529 $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); } @@ -13470,86 +14629,37 @@ EOM $K_last_nonblank_code = $Ktoken_vars; } ## end of loop over all tokens in this line + return; + } ## end sub process_line_inner_loop - # if there is anything left in the output buffer ... - if ( $max_index_to_go >= 0 ) { - - my $type = $rLL->[$K_last]->[_TYPE_]; - my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last}; - - # we have to flush .. - if ( - - # if there is a side comment... - $type eq '#' - - # if this line ends in a quote - # NOTE: This is critically important for insuring that quoted - # lines do not get processed by things like -sot and -sct - || $in_quote - - # if this is a VERSION statement - || $CODE_type eq 'VER' - - # to keep a label at the end of a line - || ( $type eq 'J' && $rOpts_break_after_labels != 2 ) - - # if we have a hard break request - || $break_flag && $break_flag != 2 - - # if we are instructed to keep all old line breaks - || !$rOpts->{'delete-old-newlines'} - - # if this is a line of the form 'use overload'. A break here in - # the input file is a good break because it will allow the - # operators which follow to be formatted well. Without this - # break the formatting with -ci=4 -xci is poor, for example. - - # use overload - # '+' => sub { - # print length $_[2], "\n"; - # my ( $x, $y ) = _order(@_); - # Number::Roman->new( int $x + $y ); - # }, - # '-' => sub { - # my ( $x, $y ) = _order(@_); - # Number::Roman->new( int $x - $y ); - # }; - || ( $max_index_to_go == 2 - && $types_to_go[0] eq 'k' - && $tokens_to_go[0] eq 'use' - && $tokens_to_go[$max_index_to_go] eq 'overload' ) - ) - { - destroy_one_line_block(); - $self->end_batch(); - } - - else { +} ## end closure process_line_of_CODE - # Check for a soft break request - if ( $break_flag && $break_flag == 2 ) { - $self->set_forced_breakpoint($max_index_to_go); - } +sub is_trailing_comma { + my ( $self, $KK ) = @_; - # mark old line breakpoints in current output stream - if ( !$rOpts_ignore_old_breakpoints - || $self->[_ris_essential_old_breakpoint_]->{$K_last} ) - { - my $jobp = $max_index_to_go; - if ( $types_to_go[$max_index_to_go] eq 'b' - && $max_index_to_go > 0 ) - { - $jobp--; - } - $old_breakpoint_to_go[$jobp] = 1; - } - } - } + # Given: + # $KK - index of a comma in token list + # Return: + # true if the comma at index $KK is a trailing comma + # false if not + my $rLL = $self->[_rLL_]; + my $type_KK = $rLL->[$KK]->[_TYPE_]; + if ( $type_KK ne ',' ) { + DEVEL_MODE + && Fault("Bad call: expected type ',' but received '$type_KK'\n"); return; - } ## end sub process_line_of_CODE -} ## end closure process_line_of_CODE + } + my $Knnb = $self->K_next_nonblank($KK); + if ( defined($Knnb) ) { + my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_]; + my $type_Knnb = $rLL->[$Knnb]->[_TYPE_]; + if ( $type_sequence && $is_closing_type{$type_Knnb} ) { + return 1; + } + } + return; +} ## end sub is_trailing_comma sub tight_paren_follows { @@ -13698,13 +14808,27 @@ BEGIN { sub starting_one_line_block { - # after seeing an opening curly brace, look for the closing brace and see + # After seeing an opening curly brace, look for the closing brace and see # if the entire block will fit on a line. This routine is not always right # so a check is made later (at the closing brace) to make sure we really # have a one-line block. We have to do this preliminary check, though, # because otherwise we would always break at a semicolon within a one-line # block if the block contains multiple statements. + # Given: + # $Kj = index of opening brace + # $K_last_nonblank = index of previous nonblank code token + # $K_last = index of last token of input line + + # Calls 'create_one_line_block' if one-line block might be formed. + + # Also returns a flag '$too_long': + # true = distance from opening keyword to OPENING brace exceeds + # the maximum line length. + # false (simple return) => not too long + # Note that this flag is for distance from the statement start to the + # OPENING brace, not the closing brace. + my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_; my $rbreak_container = $self->[_rbreak_container_]; @@ -13714,11 +14838,7 @@ sub starting_one_line_block { my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; # kill any current block - we can only go 1 deep - destroy_one_line_block(); - - # return value: - # 1=distance from start of block to opening brace exceeds line length - # 0=otherwise + create_one_line_block(); my $i_start = 0; @@ -13730,13 +14850,13 @@ sub starting_one_line_block { if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) { Fault("program bug: store_token_to_go called incorrectly\n") if (DEVEL_MODE); - return 0; + return; } # Return if block should be broken my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_]; if ( $rbreak_container->{$type_sequence_j} ) { - return 0; + return; } my $ris_bli_container = $self->[_ris_bli_container_]; @@ -13754,7 +14874,9 @@ sub starting_one_line_block { } } + #--------------------------------------------------------------------- # find the starting keyword for this block (such as 'if', 'else', ...) + #--------------------------------------------------------------------- if ( $max_index_to_go == 0 ##|| $block_type =~ /^[\{\}\;\:]$/ @@ -13785,22 +14907,22 @@ sub starting_one_line_block { # Find the opening paren my $K_start = $K_to_go[$i_start]; - return 0 unless defined($K_start); + return unless defined($K_start); my $seqno = $type_sequence_to_go[$i_start]; - return 0 unless ($seqno); + return unless ($seqno); my $K_opening = $K_opening_container->{$seqno}; - return 0 unless defined($K_opening); + return unless defined($K_opening); my $i_opening = $i_start + ( $K_opening - $K_start ); # give up if not on this line - return 0 unless ( $i_opening >= 0 ); - $i_start = $i_opening; ##$index_max_forced_break + 1; + return unless ( $i_opening >= 0 ); + $i_start = $i_opening; # go back one token before the opening paren if ( $i_start > 0 ) { $i_start-- } if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; } my $lev = $levels_to_go[$i_start]; - if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 } + if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return } } } @@ -13827,7 +14949,7 @@ sub starting_one_line_block { $stripped_block_type = substr( $block_type, 0, -2 ); } unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { - return 0; + return; } } @@ -13841,11 +14963,14 @@ sub starting_one_line_block { $i_start++; } unless ( $tokens_to_go[$i_start] eq $block_type ) { - return 0; + return; } } - else { + + #------------------------------------------- + # Couldn't find start - return too_long flag + #------------------------------------------- return 1; } @@ -13854,15 +14979,23 @@ sub starting_one_line_block { my $maximum_line_length = $maximum_line_length_at_level[ $levels_to_go[$i_start] ]; - # see if block starting location is too great to even start + # see if distance to the opening container is too great to even start if ( $pos > $maximum_line_length ) { + + #------------------------------ + # too long to the opening token + #------------------------------ return 1; } - # See if everything to the closing token will fit on one line + #----------------------------------------------------------------------- + # OK so far: the statement is not to long just to the OPENING token. Now + # see if everything to the closing token will fit on one line + #----------------------------------------------------------------------- + # This is part of an update to fix cases b562 .. b983 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j}; - return 0 unless ( defined($K_closing) ); + return unless ( defined($K_closing) ); my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - $rLL->[$Kj]->[_CUMULATIVE_LENGTH_]; @@ -13877,7 +15010,7 @@ sub starting_one_line_block { # line is too long... there is no chance of forming a one line block # if the excess is more than 1 char - return 0 if ( $excess > 1 ); + return if ( $excess > 1 ); # ... and give up if it is not a one-line block on input. # note: for a one-line block on input, it may be possible to keep @@ -13885,9 +15018,12 @@ sub starting_one_line_block { my $K_start = $K_to_go[$i_start]; my $ldiff = $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_]; - return 0 if ($ldiff); + return if ($ldiff); } + #------------------------------------------------------------------ + # Loop to check contents and length of the potential one-line block + #------------------------------------------------------------------ foreach my $Ki ( $Kj + 1 .. $K_last ) { # old whitespace could be arbitrarily large, so don't use it @@ -13900,7 +15036,7 @@ sub starting_one_line_block { # Return false result if we exceed the maximum line length, if ( $pos > $maximum_line_length ) { - return 0; + return; } # keep going for non-containers @@ -13915,7 +15051,7 @@ sub starting_one_line_block { && $rblock_type_of_seqno->{$type_sequence_i} && !$nobreak ) { - return 0; + return; } # if we find our closing brace.. @@ -13997,14 +15133,16 @@ sub starting_one_line_block { } if ( $pos >= $maximum_line_length ) { - return 0; + return; } } } + #-------------------------- # ok, it's a one-line block - create_one_line_block( $i_start, 20 ); - return 0; + #-------------------------- + create_one_line_block($i_start); + return; } # just keep going for other characters @@ -14012,6 +15150,10 @@ sub starting_one_line_block { } } + #-------------------------------------------------- + # End Loop to examine tokens in potential one-block + #-------------------------------------------------- + # We haven't hit the closing brace, but there is still space. So the # question here is, should we keep going to look at more lines in hopes of # forming a new one-line block, or should we stop right now. The problem @@ -14027,9 +15169,33 @@ sub starting_one_line_block { # The blocks which we can keep going are in a hash, but we never want # to continue if we are at a '-bli' block. if ( $want_one_line_block{$block_type} && !$is_bli ) { - create_one_line_block( $i_start, 1 ); + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j}; + my $semicolon_count = $rtype_count + && $rtype_count->{';'} ? $rtype_count->{';'} : 0; + + # Ignore a terminal semicolon in the count + if ( $semicolon_count <= 2 ) { + my $K_closing_container = $self->[_K_closing_container_]; + my $K_closing_j = $K_closing_container->{$type_sequence_j}; + my $Kp = $self->K_previous_nonblank($K_closing_j); + if ( defined($Kp) + && $rLL->[$Kp]->[_TYPE_] eq ';' ) + { + $semicolon_count -= 1; + } + } + if ( $semicolon_count <= 0 ) { + create_one_line_block($i_start); + } + elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) { + + # Mark short broken eval blocks for possible later use in + # avoiding adding spaces before a 'package' line. This is not + # essential but helps keep newer and older formatting the same. + $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1; + } } - return 0; + return; } ## end sub starting_one_line_block sub unstore_token_to_go { @@ -14152,15 +15318,6 @@ sub compare_indentation_levels { @break_before_or_after_token{@q} = (1) x scalar(@q); } - # This is no longer called - global vars - moved into initialize_batch_vars - sub initialize_forced_breakpoint_vars { - $forced_breakpoint_count = 0; - $index_max_forced_break = UNDEFINED_INDEX; - $forced_breakpoint_undo_count = 0; - ##@forced_breakpoint_undo_stack = (); # not needed - return; - } - sub set_fake_breakpoint { # Just bump up the breakpoint count as a signal that there are breaks. @@ -14350,11 +15507,12 @@ EOM # shouldn't happen, but not a critical error else { - DEBUG_UNDOBP && do { + if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); - print STDOUT -"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; - }; + Fault(< #; + push @q, ','; + @quick_filter{@q} = (1) x scalar(@q); + } + sub grind_batch_of_CODE { my ($self) = @_; + #----------------------------------------------------------------- + # This sub directs the formatting of one complete batch of tokens. + # The tokens of the batch are in the '_to_go' arrays. + #----------------------------------------------------------------- + my $this_batch = $self->[_this_batch_]; - $batch_count++; + $this_batch->[_peak_batch_size_] = $peak_batch_size; + $this_batch->[_batch_count_] = ++$batch_count; $self->check_grind_input() if (DEVEL_MODE); @@ -14539,26 +15711,18 @@ EOM return if ( $max_index_to_go < 0 ); - $self->set_lp_indentation() - if ($rOpts_line_up_parentheses); + if ($rOpts_line_up_parentheses) { + $self->set_lp_indentation(); + } - #---------------------------- + #-------------------------------------------------- # Shortcut for block comments - #---------------------------- - if ( - $max_index_to_go == 0 - && $types_to_go[0] eq '#' - - # this shortcut does not work for -lp yet - && !$rOpts_line_up_parentheses - ) - { + # Note that this shortcut does not work for -lp yet + #-------------------------------------------------- + elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) { my $ibeg = 0; $this_batch->[_ri_first_] = [$ibeg]; $this_batch->[_ri_last_] = [$ibeg]; - $this_batch->[_peak_batch_size_] = $peak_batch_size; - $this_batch->[_do_not_pad_] = 0; - $this_batch->[_batch_count_] = $batch_count; $this_batch->[_rix_seqno_controlling_ci_] = []; $self->convey_batch_to_vertical_aligner(); @@ -14576,9 +15740,7 @@ EOM # Normal route #------------- - my $rLL = $self->[_rLL_]; - my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_]; - my $rwant_container_open = $self->[_rwant_container_open_]; + my $rLL = $self->[_rLL_]; #------------------------------------------------------- # Loop over the batch to initialize some batch variables @@ -14590,99 +15752,110 @@ EOM my %comma_arrow_count; my $comma_arrow_count_contained = 0; my @unmatched_closing_indexes_in_this_batch; + my @unmatched_opening_indexes_in_this_batch; - @unmatched_opening_indexes_in_this_batch = (); - + my @i_for_semicolon; foreach my $i ( 0 .. $max_index_to_go ) { - $iprev_to_go[$i] = $ilast_nonblank; - $inext_to_go[$i] = $i + 1; + $iprev_to_go[$i] = $ilast_nonblank; # correct value + $inext_to_go[$i] = $i + 1; # just a first guess - my $type = $types_to_go[$i]; - if ( $type ne 'b' ) { - if ( $ilast_nonblank >= 0 ) { - $inext_to_go[$ilast_nonblank] = $i; + next if ( $types_to_go[$i] eq 'b' ); - # just in case there are two blanks in a row (shouldn't - # happen) - if ( ++$ilast_nonblank < $i ) { - $inext_to_go[$ilast_nonblank] = $i; - } - } - $ilast_nonblank = $i; + if ( $ilast_nonblank >= 0 ) { + $inext_to_go[$ilast_nonblank] = $i; # correction + } + $ilast_nonblank = $i; - # This is a good spot to efficiently collect information needed - # for breaking lines... + # This is an optional shortcut to save a bit of time by skipping + # most tokens. Note: the filter may need to be updated if the + # next 'if' tests are ever changed to include more token types. + next if ( !$quick_filter{ $types_to_go[$i] } ); - # gather info needed by sub break_long_lines - if ( $type_sequence_to_go[$i] ) { - my $seqno = $type_sequence_to_go[$i]; - my $token = $tokens_to_go[$i]; + my $type = $types_to_go[$i]; - # remember indexes of any tokens controlling xci - # in this batch. This list is needed by sub undo_ci. - if ( $ris_seqno_controlling_ci->{$seqno} ) { - push @ix_seqno_controlling_ci, $i; - } + # gather info needed by sub break_long_lines + if ( $type_sequence_to_go[$i] ) { + my $seqno = $type_sequence_to_go[$i]; + my $token = $tokens_to_go[$i]; - if ( $is_opening_sequence_token{$token} ) { - if ( $rwant_container_open->{$seqno} ) { - $self->set_forced_breakpoint($i); - } - push @unmatched_opening_indexes_in_this_batch, $i; - if ( $type eq '?' ) { - push @colon_list, $type; - } + # remember indexes of any tokens controlling xci + # in this batch. This list is needed by sub undo_ci. + if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) { + push @ix_seqno_controlling_ci, $i; + } + + if ( $is_opening_sequence_token{$token} ) { + if ( $self->[_rwant_container_open_]->{$seqno} ) { + $self->set_forced_breakpoint($i); } - elsif ( $is_closing_sequence_token{$token} ) { + push @unmatched_opening_indexes_in_this_batch, $i; + if ( $type eq '?' ) { + push @colon_list, $type; + } + } + elsif ( $is_closing_sequence_token{$token} ) { - if ( $i > 0 && $rwant_container_open->{$seqno} ) { - $self->set_forced_breakpoint( $i - 1 ); - } + if ( $i > 0 && $self->[_rwant_container_open_]->{$seqno} ) { + $self->set_forced_breakpoint( $i - 1 ); + } - my $i_mate = - pop @unmatched_opening_indexes_in_this_batch; - if ( defined($i_mate) && $i_mate >= 0 ) { - if ( $type_sequence_to_go[$i_mate] == - $type_sequence_to_go[$i] ) - { - $mate_index_to_go[$i] = $i_mate; - $mate_index_to_go[$i_mate] = $i; - if ( $comma_arrow_count{$seqno} ) { - $comma_arrow_count_contained += - $comma_arrow_count{$seqno}; - } - } - else { - push @unmatched_opening_indexes_in_this_batch, - $i_mate; - push @unmatched_closing_indexes_in_this_batch, - $i; - } + my $i_mate = pop @unmatched_opening_indexes_in_this_batch; + if ( defined($i_mate) && $i_mate >= 0 ) { + if ( $type_sequence_to_go[$i_mate] == + $type_sequence_to_go[$i] ) + { + $mate_index_to_go[$i] = $i_mate; + $mate_index_to_go[$i_mate] = $i; + my $cac = $comma_arrow_count{$seqno}; + $comma_arrow_count_contained += $cac if ($cac); } else { + push @unmatched_opening_indexes_in_this_batch, + $i_mate; push @unmatched_closing_indexes_in_this_batch, $i; } - if ( $type eq ':' ) { - push @colon_list, $type; - } - } ## end elsif ( $is_closing_sequence_token...) + } + else { + push @unmatched_closing_indexes_in_this_batch, $i; + } + if ( $type eq ':' ) { + push @colon_list, $type; + } + } ## end elsif ( $is_closing_sequence_token...) - } ## end if ($seqno) + } ## end if ($seqno) - elsif ( $type eq ',' ) { $comma_count_in_batch++; } - elsif ( $tokens_to_go[$i] eq '=>' ) { - if (@unmatched_opening_indexes_in_this_batch) { - my $j = $unmatched_opening_indexes_in_this_batch[-1]; - my $seqno = $type_sequence_to_go[$j]; - $comma_arrow_count{$seqno}++; - } + elsif ( $type eq ',' ) { $comma_count_in_batch++; } + elsif ( $type eq '=>' ) { + if (@unmatched_opening_indexes_in_this_batch) { + my $j = $unmatched_opening_indexes_in_this_batch[-1]; + my $seqno = $type_sequence_to_go[$j]; + $comma_arrow_count{$seqno}++; } - } ## end if ( $type ne 'b' ) + } + elsif ( $type eq 'f' ) { + push @i_for_semicolon, $i; + } + } ## end for ( my $i = 0 ; $i <=...) + # Break at a single interior C-style for semicolon in this batch (c154) + if ( @i_for_semicolon && @i_for_semicolon == 1 ) { + my $i = $i_for_semicolon[0]; + my $inext = $inext_to_go[$i]; + if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) { + $self->set_forced_breakpoint($i); + } + } + my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch + @unmatched_closing_indexes_in_this_batch; + if (@unmatched_opening_indexes_in_this_batch) { + $this_batch->[_runmatched_opening_indexes_] = + \@unmatched_opening_indexes_in_this_batch; + } + #------------------------ # Set special breakpoints #------------------------ @@ -14691,7 +15864,7 @@ EOM # blocks on one line. This is very rare but can happen for # user-defined subs. For example we might be looking at this: # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { - my $saw_good_break = 0; # flag to force breaks even if short line + my $saw_good_break; # flag to force breaks even if short line if ( # looking for opening or closing block brace @@ -14756,16 +15929,16 @@ EOM my $last_last_line_leading_level = $self->[_last_last_line_leading_level_]; - # add a blank line before certain key types but not after a comment + # add blank line(s) before certain key types but not after a comment if ( $last_line_leading_type ne '#' ) { - my $want_blank = 0; + my $blank_count = 0; my $leading_token = $tokens_to_go[$imin]; my $leading_type = $types_to_go[$imin]; # break before certain key blocks except one-liners if ( $leading_type eq 'k' ) { if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) { - $want_blank = $rOpts->{'blank-lines-before-subs'} + $blank_count = $rOpts->{'blank-lines-before-subs'} if ( terminal_type_i( $imin, $imax ) ne '}' ); } @@ -14784,12 +15957,14 @@ EOM $lc = 0; } - $want_blank = - $rOpts->{'blanks-before-blocks'} - && $lc >= $rOpts->{'long-block-line-count'} - && $self->consecutive_nonblank_lines() >= - $rOpts->{'long-block-line-count'} - && terminal_type_i( $imin, $imax ) ne '}'; + if ( $rOpts->{'blanks-before-blocks'} + && $lc >= $rOpts->{'long-block-line-count'} + && $self->consecutive_nonblank_lines() >= + $rOpts->{'long-block-line-count'} + && terminal_type_i( $imin, $imax ) ne '}' ) + { + $blank_count = 1; + } } } @@ -14807,13 +15982,17 @@ EOM && $leading_token =~ /$SUB_PATTERN/ ) { - $want_blank = $rOpts->{'blank-lines-before-subs'} + $blank_count = $rOpts->{'blank-lines-before-subs'} if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ ); } # break before all package declarations elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) { - $want_blank = $rOpts->{'blank-lines-before-packages'}; + + # ... except in a very short eval block + my $pseqno = $parent_seqno_to_go[$imin]; + $blank_count = $rOpts->{'blank-lines-before-packages'} + if ( !$self->[_ris_short_broken_eval_block_]->{$pseqno} ); } } @@ -14825,18 +16004,18 @@ EOM /$blank_lines_before_closing_block_pattern/ ) { my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; - if ( $nblanks > $want_blank ) { - $want_blank = $nblanks; + if ( $nblanks > $blank_count ) { + $blank_count = $nblanks; } } } - if ($want_blank) { + if ($blank_count) { - # future: send blank line down normal path to VerticalAligner + # future: send blank line down normal path to VerticalAligner? $self->flush_vertical_aligner(); my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->require_blank_code_lines($want_blank); + $file_writer_object->require_blank_code_lines($blank_count); } } @@ -14873,21 +16052,22 @@ EOM my $called_pad_array_to_go; # set all forced breakpoints for good list formatting - my $is_long_line = $max_index_to_go > 0 - && $self->excess_line_length( $imin, $max_index_to_go ) > 0; - - my $old_line_count_in_batch = 1; + my $is_long_line; + my $multiple_old_lines_in_batch; if ( $max_index_to_go > 0 ) { + $is_long_line = + $self->excess_line_length( $imin, $max_index_to_go ) > 0; + my $Kbeg = $K_to_go[0]; my $Kend = $K_to_go[$max_index_to_go]; - $old_line_count_in_batch += + $multiple_old_lines_in_batch = $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_]; } my $rbond_strength_bias = []; if ( $is_long_line - || $old_line_count_in_batch > 1 + || $multiple_old_lines_in_batch # must always call break_lists() with unbalanced batches because # it is maintaining some stacks @@ -14919,97 +16099,97 @@ EOM # first and last tokens of line fragments to output.. my ( $ri_first, $ri_last ); - #------------------------- - # write a single line if.. - #------------------------- - if ( + #----------------------------- + # a single token uses one line + #----------------------------- + if ( !$max_index_to_go ) { + $ri_first = [$imin]; + $ri_last = [$imax]; + } + + # for multiple tokens + else { - # we aren't allowed to add any newlines - !$rOpts_add_newlines + #------------------------- + # write a single line if.. + #------------------------- + if ( + ( - # or, - || ( + # this line is 'short' + !$is_long_line - # this line is 'short' - !$is_long_line + # and we didn't see a good breakpoint + && !$saw_good_break - # and we didn't see a good breakpoint - && !$saw_good_break + # and we don't already have an interior breakpoint + && !$forced_breakpoint_count + ) - # and we don't already have an interior breakpoint - && !$forced_breakpoint_count - ) - ) - { - @{$ri_first} = ($imin); - @{$ri_last} = ($imax); - } + # or, we aren't allowed to add any newlines + || !$rOpts_add_newlines - #----------------------------- - # otherwise use multiple lines - #----------------------------- - else { + ) + { + $ri_first = [$imin]; + $ri_last = [$imax]; + } - # add a couple of extra terminal blank tokens if we haven't - # already done so - $self->pad_array_to_go() unless ($called_pad_array_to_go); + #----------------------------- + # otherwise use multiple lines + #----------------------------- + else { - ( $ri_first, $ri_last, my $rbond_strength_to_go ) = - $self->break_long_lines( $saw_good_break, \@colon_list, - $rbond_strength_bias ); + # add a couple of extra terminal blank tokens if we haven't + # already done so + $self->pad_array_to_go() unless ($called_pad_array_to_go); - $self->break_all_chain_tokens( $ri_first, $ri_last ); + ( $ri_first, $ri_last, my $rbond_strength_to_go ) = + $self->break_long_lines( $saw_good_break, \@colon_list, + $rbond_strength_bias ); - $self->break_equals( $ri_first, $ri_last ); + $self->break_all_chain_tokens( $ri_first, $ri_last ); - # now we do a correction step to clean this up a bit - # (The only time we would not do this is for debugging) - $self->recombine_breakpoints( $ri_first, $ri_last, - $rbond_strength_to_go ) - if ( $rOpts_recombine && @{$ri_first} > 1 ); + $self->break_equals( $ri_first, $ri_last ) + if @{$ri_first} >= 3; - $self->insert_final_ternary_breaks( $ri_first, $ri_last ) - if (@colon_list); - } + # now we do a correction step to clean this up a bit + # (The only time we would not do this is for debugging) + $self->recombine_breakpoints( $ri_first, $ri_last, + $rbond_strength_to_go ) + if ( $rOpts_recombine && @{$ri_first} > 1 ); - $self->insert_breaks_before_list_opening_containers( $ri_first, - $ri_last ) - if ( %break_before_container_types && $max_index_to_go > 0 ); + $self->insert_final_ternary_breaks( $ri_first, $ri_last ) + if (@colon_list); + } - #------------------- - # -lp corrector step - #------------------- - my $do_not_pad = 0; - if ($rOpts_line_up_parentheses) { - $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last ); - } + $self->insert_breaks_before_list_opening_containers( $ri_first, + $ri_last ) + if ( %break_before_container_types && $max_index_to_go > 0 ); - #-------------------------- - # unmask phantom semicolons - #-------------------------- - if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) { - my $i = $imax; - my $tok = ';'; - my $tok_len = 1; - if ( $want_left_space{';'} != WS_NO ) { - $tok = ' ;'; - $tok_len = 2; + # Check for a phantom semicolon at the end of the batch + if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) { + $self->unmask_phantom_token($imax); + } + + if ( $rOpts_one_line_block_semicolons == 0 ) { + $self->delete_one_line_semicolons( $ri_first, $ri_last ); } - $tokens_to_go[$i] = $tok; - $token_lengths_to_go[$i] = $tok_len; - my $KK = $K_to_go[$i]; - $rLL->[$KK]->[_TOKEN_] = $tok; - $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len; - my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_]; - $self->note_added_semicolon($line_number); - foreach ( $imax .. $max_index_to_go ) { - $summed_lengths_to_go[ $_ + 1 ] += $tok_len; + # Remember the largest batch size processed. This is needed by the + # logical padding routine to avoid padding the first nonblank token + if ( $max_index_to_go > $peak_batch_size ) { + $peak_batch_size = $max_index_to_go; } } - if ( $rOpts_one_line_block_semicolons == 0 ) { - $self->delete_one_line_semicolons( $ri_first, $ri_last ); + #------------------- + # -lp corrector step + #------------------- + if ($rOpts_line_up_parentheses) { + my $do_not_pad = + $self->correct_lp_indentation( $ri_first, $ri_last ); + $this_batch->[_do_not_pad_] = $do_not_pad; } #-------------------- @@ -15017,9 +16197,6 @@ EOM #-------------------- $this_batch->[_ri_first_] = $ri_first; $this_batch->[_ri_last_] = $ri_last; - $this_batch->[_peak_batch_size_] = $peak_batch_size; - $this_batch->[_do_not_pad_] = $do_not_pad; - $this_batch->[_batch_count_] = $batch_count; $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci; $self->convey_batch_to_vertical_aligner(); @@ -15048,14 +16225,49 @@ EOM } } - # Remember the largest batch size processed. This is needed by the - # logical padding routine to avoid padding the first nonblank token - if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) { - $peak_batch_size = $max_index_to_go; + return; + } ## end sub grind_batch_of_CODE + + sub unmask_phantom_token { + my ( $self, $iend ) = @_; + + # Turn a phantom token into a real token. + + # Input parameter: + # $iend = the index in the output batch array of this token. + + # Phantom tokens are specially marked token types (such as ';') with + # no token text which only become real tokens if they occur at the end + # of an output line. At one time phantom ',' tokens were handled + # here, but now they are processed elsewhere. + + my $rLL = $self->[_rLL_]; + my $KK = $K_to_go[$iend]; + my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_]; + + my $type = $types_to_go[$iend]; + return unless ( $type eq ';' ); + my $tok = $type; + my $tok_len = length($tok); + if ( $want_left_space{$type} != WS_NO ) { + $tok = SPACE . $tok; + $tok_len += 1; } + $tokens_to_go[$iend] = $tok; + $token_lengths_to_go[$iend] = $tok_len; + + $rLL->[$KK]->[_TOKEN_] = $tok; + $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len; + + $self->note_added_semicolon($line_number); + + # This changes the summed lengths of the rest of this batch + foreach ( $iend .. $max_index_to_go ) { + $summed_lengths_to_go[ $_ + 1 ] += $tok_len; + } return; - } ## end sub grind_batch_of_CODE + } sub save_opening_indentation { @@ -15063,7 +16275,12 @@ EOM # saves indentations of lines of all unmatched opening tokens. # These will be used by sub get_opening_indentation. - my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_; + my ( $self, $ri_first, $ri_last, $rindentation_list, + $runmatched_opening_indexes ) + = @_; + + $runmatched_opening_indexes = [] + if ( !defined($runmatched_opening_indexes) ); # QW INDENTATION PATCH 1: # Also save indentation for multiline qw quotes @@ -15080,7 +16297,7 @@ EOM # we need to save indentations of any unmatched opening tokens # in this batch because we may need them in a subsequent batch. - foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) { + foreach ( @{$runmatched_opening_indexes}, @i_qw ) { my $seqno = $type_sequence_to_go[$_]; @@ -15092,6 +16309,7 @@ EOM # shouldn't happen $seqno = 'UNKNOWN'; + DEVEL_MODE && Fault("unable to find sequence number\n"); } } @@ -15325,6 +16543,7 @@ sub break_all_chain_tokens { # now look for any interior tokens of the same types $count = 0; + my $has_interior_dot_or_plus; for my $n ( 0 .. $nmax ) { my $il = $ri_left->[$n]; my $ir = $ri_right->[$n]; @@ -15336,21 +16555,27 @@ sub break_all_chain_tokens { if ( $saw_chain_type{$key} ) { push @{ $interior_chain_type{$key} }, $i; $count++; + $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' ); } } } return unless $count; + my @keys = keys %saw_chain_type; + + # quit if just ONE continuation line with leading . For example-- + # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' + # . $contents; + # Fixed for b1399. + if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) { + return; + } + # now make a list of all new break points my @insert_list; # loop over all chain types - foreach my $key ( keys %saw_chain_type ) { - - # quit if just ONE continuation line with leading . For example-- - # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' - # . $contents; - last if ( $nmax == 1 && $key =~ /^[\.\+]$/ ); + foreach my $key (@keys) { # loop over all interior chain tokens foreach my $itest ( @{ $interior_chain_type{$key} } ) { @@ -15784,7 +17009,7 @@ sub break_equals { # That's the task of this routine. # do nothing under extreme stress - return if ( $stress_level_alpha < 1 && !DEVEL_MODE ); + return if ( $high_stress_level < 1 ); my $rK_weld_right = $self->[_rK_weld_right_]; my $rK_weld_left = $self->[_rK_weld_left_]; @@ -15792,12 +17017,15 @@ sub break_equals { my $nmax_start = @{$ri_end} - 1; return if ( $nmax_start <= 0 ); - # Make a list of all good joining tokens between the lines + #---------------------------------------------------------------- + # Break into small sub-sections to decrease the maximum n-squared + # operations and avoid excess run time. See comments below. + #---------------------------------------------------------------- + + # Also make a list of all good joining tokens between the lines # n-1 and n. my @joint; - # Break the total batch sub-sections with lengths short enough to - # recombine my $rsections = []; my $nbeg_sec = 0; my $nend_sec; @@ -15808,7 +17036,7 @@ sub break_equals { my $iend_2 = $ri_end->[$nn]; my $ibeg_2 = $ri_beg->[$nn]; - # Define the joint variable + # Define certain good joint tokens my ( $itok, $itokp, $itokm ); foreach my $itest ( $iend_1, $ibeg_2 ) { my $type = $types_to_go[$itest]; @@ -15846,6 +17074,7 @@ sub break_equals { $nbeg_sec = $nn; } } + if ( defined($nend_sec) ) { push @{$rsections}, [ $nbeg_sec, $nend_sec ]; my $num = $nend_sec - $nbeg_sec; @@ -15889,6 +17118,7 @@ sub break_equals { # Loop over all sub-sections. Note that we have to work backwards # from the end of the batch since the sections use original line # numbers, and the line numbers change as we go. + OUTER_LOOP: while ( my $section = pop @{$rsections} ) { my ( $nbeg, $nend ) = @{$section}; @@ -15908,7 +17138,7 @@ sub break_equals { # Safety check for excess total iterations $it_count++; if ( $it_count > $it_count_max ) { - goto RETURN; + last OUTER_LOOP; } my $n_best = 0; @@ -15931,8 +17161,13 @@ sub break_equals { } $nmax_last = $nmax; $more_to_do = 0; - my $skip_Section_3; - my $leading_amp_count = 0; + + # Count lines with leading &&, ||, :, at any level. + # This is used to avoid some recombinations which might + # be hard to read. + my $rleading_amp_count; + ${$rleading_amp_count} = 0; + my $this_line_is_semicolon_terminated; # loop over all remaining lines in this batch @@ -15961,8 +17196,8 @@ sub break_equals { # between the tokens at $iend_1 and $ibeg_2 # # We will apply a number of ad-hoc tests to see if joining - # here will look ok. The code will just issue a 'next' - # command if the join doesn't look good. If we get through + # here will look ok. The code will just move to the next + # pair if the join doesn't look good. If we get through # the gauntlet of tests, the lines will be recombined. #---------------------------------------------------------- # @@ -15973,989 +17208,1109 @@ sub break_equals { my $ibeg_2 = $ri_beg->[$n]; my $ibeg_nmax = $ri_beg->[$nmax]; - # combined line cannot be too long - my $excess = - $self->excess_line_length( $ibeg_1, $iend_2, 1 ); - next if ( $excess > 0 ); + # combined line cannot be too long + my $excess = + $self->excess_line_length( $ibeg_1, $iend_2, 1 ); + next if ( $excess > 0 ); + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # terminal token of line 2 if any side comment is ignored: + my $iend_2t = $iend_2; + my $type_iend_2t = $type_iend_2; + + DEBUG_RECOMBINE > 1 && do { + print STDERR +"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; + }; + + # If line $n is the last line, we set some flags and + # do any special checks for it + if ( $n == $nmax ) { + + # a terminal '{' should stay where it is + # unless preceded by a fat comma + next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' ); + + if ( $type_iend_2 eq '#' + && $iend_2 - $ibeg_2 >= 2 + && $types_to_go[ $iend_2 - 1 ] eq 'b' ) + { + $iend_2t = $iend_2 - 2; + $type_iend_2t = $types_to_go[$iend_2t]; + } + + $this_line_is_semicolon_terminated = + $type_iend_2t eq ';'; + } + + #---------------------------------------------------------- + # Recombine Section 0: + # Examine the special token joining this line pair, if any. + # Put as many tests in this section to avoid duplicate code + # and to make formatting independent of whether breaks are + # to the left or right of an operator. + #---------------------------------------------------------- + + # Note that parens around ($itok) are essential here: + my ($itok) = @{ $joint[$n] }; + if ($itok) { + my $ok_0 = + recombine_section_0( $itok, $ri_beg, $ri_end, $n, + $rleading_amp_count ); + next if ( !$ok_0 ); + } + + #---------------------------------------------------------- + # Recombine Section 1: + # Join welded nested containers immediately + #---------------------------------------------------------- + + if ( + $total_weld_count + && ( $type_sequence_to_go[$iend_1] + && defined( $rK_weld_right->{ $K_to_go[$iend_1] } ) + || $type_sequence_to_go[$ibeg_2] + && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) ) + ) + { + $n_best = $n; + last; + } + + $reverse = 0; + + #---------------------------------------------------------- + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) + #---------------------------------------------------------- + + my ( $ok_2, $skip_Section_3 ) = + recombine_section_2( $ri_beg, $ri_end, $n, + $this_line_is_semicolon_terminated, + $rleading_amp_count ); + next if ( !$ok_2 ); + + #---------------------------------------------------------- + # Recombine Section 3: + # Examine token at $ibeg_2 (left end of second line of pair) + #---------------------------------------------------------- + + # Join lines identified above as capable of + # causing an outdented line with leading closing paren. + # Note that we are skipping the rest of this section + # and the rest of the loop to do the join. + if ($skip_Section_3) { + $forced_breakpoint_to_go[$iend_1] = 0; + $n_best = $n; + last; + } + + my ( $ok_3, $bs_tweak ) = + recombine_section_3( $ri_beg, $ri_end, $n, + $this_line_is_semicolon_terminated, + $rleading_amp_count ); + next if ( !$ok_3 ); + + #---------------------------------------------------------- + # Recombine Section 4: + # Combine the lines if we arrive here and it is possible + #---------------------------------------------------------- + + # honor hard breakpoints + next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); + + my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak; + + # Require a few extra spaces before recombining lines if we + # are at an old breakpoint unless this is a simple list or + # terminal line. The goal is to avoid oscillating between + # two quasi-stable end states. For example this snippet + # caused problems: + +## my $this = +## bless { +## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" +## }, +## $type; + next + if ( $old_breakpoint_to_go[$iend_1] + && !$this_line_is_semicolon_terminated + && $n < $nmax + && $excess + 4 > 0 + && $type_iend_2 ne ',' ); + + # do not recombine if we would skip in indentation levels + if ( $n < $nmax ) { + my $if_next = $ri_beg->[ $n + 1 ]; + next + if ( + $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] + && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] + + # but an isolated 'if (' is undesirable + && !( + $n == 1 + && $iend_1 - $ibeg_1 <= 2 + && $type_ibeg_1 eq 'k' + && $tokens_to_go[$ibeg_1] eq 'if' + && $tokens_to_go[$iend_1] ne '(' + ) + ); + } + + ## OLD: honor no-break's + ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257 + + # remember the pair with the greatest bond strength + if ( !$n_best ) { + $n_best = $n; + $bs_best = $bs; + } + else { + + if ( $bs > $bs_best ) { + $n_best = $n; + $bs_best = $bs; + } + } + } + + # recombine the pair with the greatest bond strength + if ($n_best) { + splice @{$ri_beg}, $n_best, 1; + splice @{$ri_end}, $n_best - 1, 1; + splice @joint, $n_best, 1; + + # keep going if we are still making progress + $more_to_do++; + } + } # end iteration loop + + } # end loop over sections + + if (DEBUG_RECOMBINE) { + my $nmax_last = @{$ri_end} - 1; + print STDERR +"exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n"; + } + return; + } ## end sub recombine_breakpoints + + sub recombine_section_0 { + my ( $itok, $ri_beg, $ri_end, $n, $rleading_amp_count ) = @_; - my $type_iend_1 = $types_to_go[$iend_1]; - my $type_iend_2 = $types_to_go[$iend_2]; - my $type_ibeg_1 = $types_to_go[$ibeg_1]; - my $type_ibeg_2 = $types_to_go[$ibeg_2]; + # Recombine Section 0: + # Examine special candidate joining token $itok - # terminal token of line 2 if any side comment is ignored: - my $iend_2t = $iend_2; - my $type_iend_2t = $type_iend_2; + # Given: + # $itok = index of token at a possible join of lines $n-1 and $n - # some beginning indexes of other lines, which may not exist - my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; - my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; - my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; + # Return: + # true => ok to combine + # false => do not combine lines - my $bs_tweak = 0; + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ ^ + # | | + # ------------$itok is one of these tokens - #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - - # $nesting_depth_to_go[$ibeg_1] ); + # Put as many tests in this section to avoid duplicate code + # and to make formatting independent of whether breaks are + # to the left or right of an operator. - DEBUG_RECOMBINE > 1 && do { - print STDERR -"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; - }; + my $nmax = @{$ri_end} - 1; + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $ibeg_2 = $ri_beg->[$n]; + my $iend_2 = $ri_end->[$n]; - # If line $n is the last line, we set some flags and - # do any special checks for it - if ( $n == $nmax ) { + if ($itok) { - # a terminal '{' should stay where it is - # unless preceded by a fat comma - next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' ); + my $type = $types_to_go[$itok]; - if ( $type_iend_2 eq '#' - && $iend_2 - $ibeg_2 >= 2 - && $types_to_go[ $iend_2 - 1 ] eq 'b' ) - { - $iend_2t = $iend_2 - 2; - $type_iend_2t = $types_to_go[$iend_2t]; - } + if ( $type eq ':' ) { - $this_line_is_semicolon_terminated = - $type_iend_2t eq ';'; - } + # do not join at a colon unless it disobeys the + # break request + if ( $itok eq $iend_1 ) { + return unless $want_break_before{$type}; + } + else { + ${$rleading_amp_count}++; + return if $want_break_before{$type}; + } + } ## end if ':' - #---------------------------------------------------------- - # Recombine Section 0: - # Examine the special token joining this line pair, if any. - # Put as many tests in this section to avoid duplicate code - # and to make formatting independent of whether breaks are - # to the left or right of an operator. - #---------------------------------------------------------- + # handle math operators + - * / + elsif ( $is_math_op{$type} ) { - my ($itok) = @{ $joint[$n] }; - if ($itok) { + # Combine these lines if this line is a single + # number, or if it is a short term with same + # operator as the previous line. For example, in + # the following code we will combine all of the + # short terms $A, $B, $C, $D, $E, $F, together + # instead of leaving them one per line: + # my $time = + # $A * $B * $C * $D * $E * $F * + # ( 2. * $eps * $sigma * $area ) * + # ( 1. / $tcold**3 - 1. / $thot**3 ); - my $type = $types_to_go[$itok]; + # This can be important in math-intensive code. - if ( $type eq ':' ) { + my $good_combo; - # do not join at a colon unless it disobeys the - # break request - if ( $itok eq $iend_1 ) { - next unless $want_break_before{$type}; - } - else { - $leading_amp_count++; - next if $want_break_before{$type}; - } - } ## end if ':' - - # handle math operators + - * / - elsif ( $is_math_op{$type} ) { - - # Combine these lines if this line is a single - # number, or if it is a short term with same - # operator as the previous line. For example, in - # the following code we will combine all of the - # short terms $A, $B, $C, $D, $E, $F, together - # instead of leaving them one per line: - # my $time = - # $A * $B * $C * $D * $E * $F * - # ( 2. * $eps * $sigma * $area ) * - # ( 1. / $tcold**3 - 1. / $thot**3 ); - - # This can be important in math-intensive code. - - my $good_combo; - - my $itokp = min( $inext_to_go[$itok], $iend_2 ); - my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); - my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); - my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); - - # check for a number on the right - if ( $types_to_go[$itokp] eq 'n' ) { - - # ok if nothing else on right - if ( $itokp == $iend_2 ) { - $good_combo = 1; - } - else { - - # look one more token to right.. - # okay if math operator or some termination - $good_combo = - ( ( $itokpp == $iend_2 ) - && $is_math_op{ $types_to_go[$itokpp] - } ) - || $types_to_go[$itokpp] =~ /^[#,;]$/; - } - } + my $itokp = min( $inext_to_go[$itok], $iend_2 ); + my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); + my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); + my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); - # check for a number on the left - if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { + # check for a number on the right + if ( $types_to_go[$itokp] eq 'n' ) { - # okay if nothing else to left - if ( $itokm == $ibeg_1 ) { - $good_combo = 1; - } + # ok if nothing else on right + if ( $itokp == $iend_2 ) { + $good_combo = 1; + } + else { - # otherwise look one more token to left - else { + # look one more token to right.. + # okay if math operator or some termination + $good_combo = + ( ( $itokpp == $iend_2 ) + && $is_math_op{ $types_to_go[$itokpp] } ) + || $types_to_go[$itokpp] =~ /^[#,;]$/; + } + } - # okay if math operator, comma, or assignment - $good_combo = ( $itokmm == $ibeg_1 ) - && ( $is_math_op{ $types_to_go[$itokmm] } - || $types_to_go[$itokmm] =~ /^[,]$/ - || $is_assignment{ $types_to_go[$itokmm] - } ); - } - } + # check for a number on the left + if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { - # look for a single short token either side of the - # operator - if ( !$good_combo ) { + # okay if nothing else to left + if ( $itokm == $ibeg_1 ) { + $good_combo = 1; + } - # Slight adjustment factor to make results - # independent of break before or after operator - # in long summed lists. (An operator and a - # space make two spaces). - my $two = ( $itok eq $iend_1 ) ? 2 : 0; + # otherwise look one more token to left + else { - $good_combo = + # okay if math operator, comma, or assignment + $good_combo = ( $itokmm == $ibeg_1 ) + && ( $is_math_op{ $types_to_go[$itokmm] } + || $types_to_go[$itokmm] =~ /^[,]$/ + || $is_assignment{ $types_to_go[$itokmm] } ); + } + } - # numbers or id's on both sides of this joint - $types_to_go[$itokp] =~ /^[in]$/ - && $types_to_go[$itokm] =~ /^[in]$/ + # look for a single short token either side of the + # operator + if ( !$good_combo ) { - # one of the two lines must be short: - && ( - ( - # no more than 2 nonblank tokens right - # of joint - $itokpp == $iend_2 - - # short - && token_sequence_length( - $itokp, $iend_2 - ) < $two + - $rOpts_short_concatenation_item_length - ) - || ( - # no more than 2 nonblank tokens left of - # joint - $itokmm == $ibeg_1 - - # short - && token_sequence_length( - $ibeg_1, $itokm - ) < 2 - $two + - $rOpts_short_concatenation_item_length - ) + # Slight adjustment factor to make results + # independent of break before or after operator + # in long summed lists. (An operator and a + # space make two spaces). + my $two = ( $itok eq $iend_1 ) ? 2 : 0; - ) + $good_combo = - # keep pure terms; don't mix +- with */ - && !( - $is_plus_minus{$type} - && ( $is_mult_div{ $types_to_go[$itokmm] } - || $is_mult_div{ $types_to_go[$itokpp] } - ) - ) - && !( - $is_mult_div{$type} - && ( $is_plus_minus{ $types_to_go[$itokmm] } - || $is_plus_minus{ $types_to_go[$itokpp] - } ) - ) + # numbers or id's on both sides of this joint + $types_to_go[$itokp] =~ /^[in]$/ + && $types_to_go[$itokm] =~ /^[in]$/ - ; - } + # one of the two lines must be short: + && ( + ( + # no more than 2 nonblank tokens right + # of joint + $itokpp == $iend_2 - # it is also good to combine if we can reduce to 2 - # lines - if ( !$good_combo ) { + # short + && token_sequence_length( $itokp, $iend_2 ) < + $two + $rOpts_short_concatenation_item_length + ) + || ( + # no more than 2 nonblank tokens left of + # joint + $itokmm == $ibeg_1 - # index on other line where same token would be - # in a long chain. - my $iother = - ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; + # short + && token_sequence_length( $ibeg_1, $itokm ) < + 2 - $two + $rOpts_short_concatenation_item_length + ) - $good_combo = - $n == 2 - && $n == $nmax - && $types_to_go[$iother] ne $type; - } + ) - next unless ($good_combo); + # keep pure terms; don't mix +- with */ + && !( + $is_plus_minus{$type} + && ( $is_mult_div{ $types_to_go[$itokmm] } + || $is_mult_div{ $types_to_go[$itokpp] } ) + ) + && !( + $is_mult_div{$type} + && ( $is_plus_minus{ $types_to_go[$itokmm] } + || $is_plus_minus{ $types_to_go[$itokpp] } ) + ) - } ## end math + ; + } - elsif ( $is_amp_amp{$type} ) { - ##TBD - } ## end &&, || + # it is also good to combine if we can reduce to 2 + # lines + if ( !$good_combo ) { - elsif ( $is_assignment{$type} ) { - ##TBD - } ## end assignment - } + # index on other line where same token would be + # in a long chain. + my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; - #---------------------------------------------------------- - # Recombine Section 1: - # Join welded nested containers immediately - #---------------------------------------------------------- + $good_combo = + $n == 2 + && $n == $nmax + && $types_to_go[$iother] ne $type; + } - if ( - $total_weld_count - && ( $type_sequence_to_go[$iend_1] - && defined( $rK_weld_right->{ $K_to_go[$iend_1] } ) - || $type_sequence_to_go[$ibeg_2] - && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) ) - ) - { - $n_best = $n; - last; - } + return unless ($good_combo); - $reverse = 0; + } ## end math - #---------------------------------------------------------- - # Recombine Section 2: - # Examine token at $iend_1 (right end of first line of pair) - #---------------------------------------------------------- + elsif ( $is_amp_amp{$type} ) { + ##TBD + } ## end &&, || - # an isolated '}' may join with a ';' terminated segment - if ( $type_iend_1 eq '}' ) { - - # Check for cases where combining a semicolon terminated - # statement with a previous isolated closing paren will - # allow the combined line to be outdented. This is - # generally a good move. For example, we can join up - # the last two lines here: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) - # = stat($file); - # - # to get: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) = stat($file); - # - # which makes the parens line up. - # - # Another example, from Joe Matarazzo, probably looks best - # with the 'or' clause appended to the trailing paren: - # $self->some_method( - # PARAM1 => 'foo', - # PARAM2 => 'bar' - # ) or die "Some_method didn't work"; - # - # But we do not want to do this for something like the -lp - # option where the paren is not outdentable because the - # trailing clause will be far to the right. - # - # The logic here is synchronized with the logic in sub - # sub final_indentation_adjustment, which actually does - # the outdenting. - # - $skip_Section_3 ||= $this_line_is_semicolon_terminated - - # only one token on last line - && $ibeg_1 == $iend_1 - - # must be structural paren - && $tokens_to_go[$iend_1] eq ')' - - # style must allow outdenting, - && !$closing_token_indentation{')'} - - # only leading '&&', '||', and ':' if no others seen - # (but note: our count made below could be wrong - # due to intervening comments) - && ( $leading_amp_count == 0 - || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) - - # but leading colons probably line up with a - # previous colon or question (count could be wrong). - && $type_ibeg_2 ne ':' - - # only one step in depth allowed. this line must not - # begin with a ')' itself. - && ( $nesting_depth_to_go[$iend_1] == - $nesting_depth_to_go[$iend_2] + 1 ); - - # YVES patch 2 of 2: - # Allow cuddled eval chains, like this: - # eval { - # #STUFF; - # 1; # return true - # } or do { - # #handle error - # }; - # This patch works together with a patch in - # setting adjusted indentation (where the closing eval - # brace is outdented if possible). - # The problem is that an 'eval' block has continuation - # indentation and it looks better to undo it in some - # cases. If we do not use this patch we would get: - # eval { - # #STUFF; - # 1; # return true - # } - # or do { - # #handle error - # }; - # The alternative, for uncuddled style, is to create - # a patch in final_indentation_adjustment which undoes - # the indentation of a leading line like 'or do {'. - # This doesn't work well with -icb through - if ( - $block_type_to_go[$iend_1] eq 'eval' - && !ref( $leading_spaces_to_go[$iend_1] ) - && !$rOpts_indent_closing_brace - && $tokens_to_go[$iend_2] eq '{' - && ( - ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ ) - || ( $type_ibeg_2 eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_2] } ) - || $is_if_unless{ $tokens_to_go[$ibeg_2] } - ) - ) - { - $skip_Section_3 ||= 1; - } + elsif ( $is_assignment{$type} ) { + ##TBD + } ## end assignment + } - next - unless ( - $skip_Section_3 + # ok to combine lines + return 1; + } ## end sub recombine_section_0 - # handle '.' and '?' specially below - || ( $type_ibeg_2 =~ /^[\.\?]$/ ) + sub recombine_section_2 { - # fix for c054 (unusual -pbp case) - || $type_ibeg_2 eq '==' + my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated, + $rleading_amp_count ) + = @_; - ); - } + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) - elsif ( $type_iend_1 eq '{' ) { + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # -----Section 2 looks at this token - # YVES - # honor breaks at opening brace - # Added to prevent recombining something like this: - # } || eval { package main; - next if $forced_breakpoint_to_go[$iend_1]; - } + # Returns: + # (nothing) => do not join lines + # 1, skip_Section_3 => ok to join lines + + # $skip_Section_3 is a flag for skipping the next section + my $skip_Section_3 = 0; + + my $nmax = @{$ri_end} - 1; + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; + my $ibeg_nmax = $ri_beg->[$nmax]; + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # an isolated '}' may join with a ';' terminated segment + if ( $type_iend_1 eq '}' ) { + + # Check for cases where combining a semicolon terminated + # statement with a previous isolated closing paren will + # allow the combined line to be outdented. This is + # generally a good move. For example, we can join up + # the last two lines here: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) + # = stat($file); + # + # to get: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) = stat($file); + # + # which makes the parens line up. + # + # Another example, from Joe Matarazzo, probably looks best + # with the 'or' clause appended to the trailing paren: + # $self->some_method( + # PARAM1 => 'foo', + # PARAM2 => 'bar' + # ) or die "Some_method didn't work"; + # + # But we do not want to do this for something like the -lp + # option where the paren is not outdentable because the + # trailing clause will be far to the right. + # + # The logic here is synchronized with the logic in sub + # sub get_final_indentation, which actually does + # the outdenting. + # + $skip_Section_3 ||= $this_line_is_semicolon_terminated + + # only one token on last line + && $ibeg_1 == $iend_1 + + # must be structural paren + && $tokens_to_go[$iend_1] eq ')' + + # style must allow outdenting, + && !$closing_token_indentation{')'} + + # only leading '&&', '||', and ':' if no others seen + # (but note: our count made below could be wrong + # due to intervening comments). Note that this + # count includes these tokens at all levels. The idea is + # that seeing these at any level can make it hard to read + # formatting if we recombine. + && ( !${$rleading_amp_count} + || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) + + # but leading colons probably line up with a + # previous colon or question (count could be wrong). + && $type_ibeg_2 ne ':' + + # only one step in depth allowed. this line must not + # begin with a ')' itself. + && ( $nesting_depth_to_go[$iend_1] == + $nesting_depth_to_go[$iend_2] + 1 ); + + # YVES patch 2 of 2: + # Allow cuddled eval chains, like this: + # eval { + # #STUFF; + # 1; # return true + # } or do { + # #handle error + # }; + # This patch works together with a patch in + # setting adjusted indentation (where the closing eval + # brace is outdented if possible). + # The problem is that an 'eval' block has continuation + # indentation and it looks better to undo it in some + # cases. If we do not use this patch we would get: + # eval { + # #STUFF; + # 1; # return true + # } + # or do { + # #handle error + # }; + # The alternative, for uncuddled style, is to create + # a patch in get_final_indentation which undoes + # the indentation of a leading line like 'or do {'. + # This doesn't work well with -icb through + if ( + $block_type_to_go[$iend_1] eq 'eval' + && !ref( $leading_spaces_to_go[$iend_1] ) + && !$rOpts_indent_closing_brace + && $tokens_to_go[$iend_2] eq '{' + && ( + ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ ) + || ( $type_ibeg_2 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_2] } ) + || $is_if_unless{ $tokens_to_go[$ibeg_2] } + ) + ) + { + $skip_Section_3 ||= 1; + } - # do not recombine lines with ending &&, ||, - elsif ( $is_amp_amp{$type_iend_1} ) { - next unless $want_break_before{$type_iend_1}; - } + return + unless ( + $skip_Section_3 - # Identify and recombine a broken ?/: chain - elsif ( $type_iend_1 eq '?' ) { + # handle '.' and '?' specially below + || ( $type_ibeg_2 =~ /^[\.\?]$/ ) - # Do not recombine different levels - next - if ( - $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); + # fix for c054 (unusual -pbp case) + || $type_ibeg_2 eq '==' - # do not recombine unless next line ends in : - next unless $type_iend_2 eq ':'; - } + ); + } - # for lines ending in a comma... - elsif ( $type_iend_1 eq ',' ) { + elsif ( $type_iend_1 eq '{' ) { - # Do not recombine at comma which is following the - # input bias. - # TODO: might be best to make a special flag - next if ( $old_breakpoint_to_go[$iend_1] ); + # YVES + # honor breaks at opening brace + # Added to prevent recombining something like this: + # } || eval { package main; + return if $forced_breakpoint_to_go[$iend_1]; + } - # An isolated '},' may join with an identifier + ';' - # This is useful for the class of a 'bless' statement - # (bless.t) - if ( $type_ibeg_1 eq '}' - && $type_ibeg_2 eq 'i' ) - { - next - unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) - && ( $iend_2 == ( $ibeg_2 + 1 ) ) - && $this_line_is_semicolon_terminated ); + # do not recombine lines with ending &&, ||, + elsif ( $is_amp_amp{$type_iend_1} ) { + return unless $want_break_before{$type_iend_1}; + } - # override breakpoint - $forced_breakpoint_to_go[$iend_1] = 0; - } + # Identify and recombine a broken ?/: chain + elsif ( $type_iend_1 eq '?' ) { - # but otherwise .. - else { + # Do not recombine different levels + return + if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); - # do not recombine after a comma unless this will - # leave just 1 more line - next unless ( $n + 1 >= $nmax ); + # do not recombine unless next line ends in : + return unless $type_iend_2 eq ':'; + } - # do not recombine if there is a change in - # indentation depth - next - if ( $levels_to_go[$iend_1] != - $levels_to_go[$iend_2] ); - - # do not recombine a "complex expression" after a - # comma. "complex" means no parens. - my $saw_paren; - foreach my $ii ( $ibeg_2 .. $iend_2 ) { - if ( $tokens_to_go[$ii] eq '(' ) { - $saw_paren = 1; - last; - } - } - next if $saw_paren; - } - } + # for lines ending in a comma... + elsif ( $type_iend_1 eq ',' ) { - # opening paren.. - elsif ( $type_iend_1 eq '(' ) { + # Do not recombine at comma which is following the + # input bias. + # NOTE: this could be controlled by a special flag, + # but it seems to work okay. + return if ( $old_breakpoint_to_go[$iend_1] ); - # No longer doing this - } + # An isolated '},' may join with an identifier + ';' + # This is useful for the class of a 'bless' statement + # (bless.t) + if ( $type_ibeg_1 eq '}' + && $type_ibeg_2 eq 'i' ) + { + return + unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) + && ( $iend_2 == ( $ibeg_2 + 1 ) ) + && $this_line_is_semicolon_terminated ); - elsif ( $type_iend_1 eq ')' ) { + # override breakpoint + $forced_breakpoint_to_go[$iend_1] = 0; + } - # No longer doing this - } + # but otherwise .. + else { - # keep a terminal for-semicolon - elsif ( $type_iend_1 eq 'f' ) { - next; + # do not recombine after a comma unless this will + # leave just 1 more line + return unless ( $n + 1 >= $nmax ); + + # do not recombine if there is a change in + # indentation depth + return + if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); + + # do not recombine a "complex expression" after a + # comma. "complex" means no parens. + my $saw_paren; + foreach my $ii ( $ibeg_2 .. $iend_2 ) { + if ( $tokens_to_go[$ii] eq '(' ) { + $saw_paren = 1; + last; } + } + return if $saw_paren; + } + } - # if '=' at end of line ... - elsif ( $is_assignment{$type_iend_1} ) { + # opening paren.. + elsif ( $type_iend_1 eq '(' ) { - # keep break after = if it was in input stream - # this helps prevent 'blinkers' - next - if ( - $old_breakpoint_to_go[$iend_1] + # No longer doing this + } - # don't strand an isolated '=' - && $iend_1 != $ibeg_1 - ); + elsif ( $type_iend_1 eq ')' ) { - my $is_short_quote = - ( $type_ibeg_2 eq 'Q' - && $ibeg_2 == $iend_2 - && token_sequence_length( $ibeg_2, $ibeg_2 ) < - $rOpts_short_concatenation_item_length ); - my $is_ternary = ( - $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0 - && $types_to_go[$ibeg_3] eq ':' ) - ); + # No longer doing this + } - # always join an isolated '=', a short quote, or if this - # will put ?/: at start of adjacent lines - if ( $ibeg_1 != $iend_1 - && !$is_short_quote - && !$is_ternary ) - { - next - unless ( - ( + # keep a terminal for-semicolon + elsif ( $type_iend_1 eq 'f' ) { + return; + } - # unless we can reduce this to two lines - $nmax < $n + 2 + # if '=' at end of line ... + elsif ( $is_assignment{$type_iend_1} ) { - # or three lines, the last with a leading - # semicolon - || ( $nmax == $n + 2 - && $types_to_go[$ibeg_nmax] eq ';' ) + # keep break after = if it was in input stream + # this helps prevent 'blinkers' + return + if ( + $old_breakpoint_to_go[$iend_1] - # or the next line ends with a here doc - || $type_iend_2 eq 'h' + # don't strand an isolated '=' + && $iend_1 != $ibeg_1 + ); - # or the next line ends in an open paren or - # brace and the break hasn't been forced - # [dima.t] - || ( !$forced_breakpoint_to_go[$iend_1] - && $type_iend_2 eq '{' ) - ) + my $is_short_quote = + ( $type_ibeg_2 eq 'Q' + && $ibeg_2 == $iend_2 + && token_sequence_length( $ibeg_2, $ibeg_2 ) < + $rOpts_short_concatenation_item_length ); + my $is_ternary = ( + $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0 + && $types_to_go[$ibeg_3] eq ':' ) + ); - # do not recombine if the two lines might align - # well this is a very approximate test for this - && ( + # always join an isolated '=', a short quote, or if this + # will put ?/: at start of adjacent lines + if ( $ibeg_1 != $iend_1 + && !$is_short_quote + && !$is_ternary ) + { + return + unless ( + ( - # RT#127633 - the leading tokens are not - # operators - ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] ) + # unless we can reduce this to two lines + $nmax < $n + 2 - # or they are different - || ( $ibeg_3 >= 0 - && $type_ibeg_2 ne - $types_to_go[$ibeg_3] ) - ) - ); - - if ( - - # Recombine if we can make two lines - $nmax >= $n + 2 - - # -lp users often prefer this: - # my $title = function($env, $env, $sysarea, - # "bubba Borrower Entry"); - # so we will recombine if -lp is used we have - # ending comma - && !( - $ibeg_3 > 0 - && ref( $leading_spaces_to_go[$ibeg_3] ) - && $type_iend_2 eq ',' - ) - ) - { + # or three lines, the last with a leading + # semicolon + || ( $nmax == $n + 2 + && $types_to_go[$ibeg_nmax] eq ';' ) - # otherwise, scan the rhs line up to last token - # for complexity. Note that we are not - # counting the last token in case it is an - # opening paren. - my $tv = 0; - my $depth = $nesting_depth_to_go[$ibeg_2]; - foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 1 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # ok to recombine if no level changes before - # last token - if ( $tv > 0 ) { - - # otherwise, do not recombine if more than - # two level changes. - next if ( $tv > 1 ); - - # check total complexity of the two - # adjacent lines that will occur if we do - # this join - my $istop = - ( $n < $nmax ) - ? $ri_end->[ $n + 1 ] - : $iend_2; - foreach my $i ( $iend_2 .. $istop ) { - if ( - $nesting_depth_to_go[$i] != $depth ) - { - $tv++; - last if ( $tv > 2 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # do not recombine if total is more than 2 - # level changes - next if ( $tv > 2 ); - } - } - } + # or the next line ends with a here doc + || $type_iend_2 eq 'h' - unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { - $forced_breakpoint_to_go[$iend_1] = 0; - } - } + # or the next line ends in an open paren or + # brace and the break hasn't been forced + # [dima.t] + || ( !$forced_breakpoint_to_go[$iend_1] + && $type_iend_2 eq '{' ) + ) - # for keywords.. - elsif ( $type_iend_1 eq 'k' ) { + # do not recombine if the two lines might align + # well this is a very approximate test for this + && ( - # make major control keywords stand out - # (recombine.t) - next - if ( + # RT#127633 - the leading tokens are not + # operators + ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] ) - #/^(last|next|redo|return)$/ - $is_last_next_redo_return{ $tokens_to_go[$iend_1] } + # or they are different + || ( $ibeg_3 >= 0 + && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) + ) + ); - # but only if followed by multiple lines - && $n < $nmax - ); + if ( - if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { - next - unless $want_break_before{ $tokens_to_go[$iend_1] - }; - } - } + # Recombine if we can make two lines + $nmax >= $n + 2 - #---------------------------------------------------------- - # Recombine Section 3: - # Examine token at $ibeg_2 (left end of second line of pair) - #---------------------------------------------------------- + # -lp users often prefer this: + # my $title = function($env, $env, $sysarea, + # "bubba Borrower Entry"); + # so we will recombine if -lp is used we have + # ending comma + && !( + $ibeg_3 > 0 + && ref( $leading_spaces_to_go[$ibeg_3] ) + && $type_iend_2 eq ',' + ) + ) + { - # join lines identified above as capable of - # causing an outdented line with leading closing paren - # Note that we are skipping the rest of this section - # and the rest of the loop to do the join - if ($skip_Section_3) { - $forced_breakpoint_to_go[$iend_1] = 0; - $n_best = $n; - last; - } + # otherwise, scan the rhs line up to last token for + # complexity. Note that we are not counting the last token + # in case it is an opening paren. + my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ); + return if ( !$ok ); - # handle lines with leading &&, || - elsif ( $is_amp_amp{$type_ibeg_2} ) { + } + } - $leading_amp_count++; + unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { + $forced_breakpoint_to_go[$iend_1] = 0; + } + } - # ok to recombine if it follows a ? or : - # and is followed by an open paren.. - my $ok = - ( $is_ternary{$type_ibeg_1} - && $tokens_to_go[$iend_2] eq '(' ) + # for keywords.. + elsif ( $type_iend_1 eq 'k' ) { - # or is followed by a ? or : at same depth - # - # We are looking for something like this. We can - # recombine the && line with the line above to make the - # structure more clear: - # return - # exists $G->{Attr}->{V} - # && exists $G->{Attr}->{V}->{$u} - # ? %{ $G->{Attr}->{V}->{$u} } - # : (); - # - # We should probably leave something like this alone: - # return - # exists $G->{Attr}->{E} - # && exists $G->{Attr}->{E}->{$u} - # && exists $G->{Attr}->{E}->{$u}->{$v} - # ? %{ $G->{Attr}->{E}->{$u}->{$v} } - # : (); - # so that we either have all of the &&'s (or ||'s) - # on one line, as in the first example, or break at - # each one as in the second example. However, it - # sometimes makes things worse to check for this because - # it prevents multiple recombinations. So this is not done. - || ( $ibeg_3 >= 0 - && $is_ternary{ $types_to_go[$ibeg_3] } - && $nesting_depth_to_go[$ibeg_3] == - $nesting_depth_to_go[$ibeg_2] ); - - # Combine a trailing && term with an || term: fix for - # c060 This is rare but can happen. - $ok ||= 1 - if ( $ibeg_3 < 0 - && $type_ibeg_2 eq '&&' - && $type_ibeg_1 eq '||' - && $nesting_depth_to_go[$ibeg_2] == - $nesting_depth_to_go[$ibeg_1] ); - - next if !$ok && $want_break_before{$type_ibeg_2}; - $forced_breakpoint_to_go[$iend_1] = 0; + # make major control keywords stand out + # (recombine.t) + return + if ( - # tweak the bond strength to give this joint priority - # over ? and : - $bs_tweak = 0.25; - } + #/^(last|next|redo|return)$/ + $is_last_next_redo_return{ $tokens_to_go[$iend_1] } - # Identify and recombine a broken ?/: chain - elsif ( $type_ibeg_2 eq '?' ) { - - # Do not recombine different levels - my $lev = $levels_to_go[$ibeg_2]; - next if ( $lev ne $levels_to_go[$ibeg_1] ); - - # Do not recombine a '?' if either next line or - # previous line does not start with a ':'. The reasons - # are that (1) no alignment of the ? will be possible - # and (2) the expression is somewhat complex, so the - # '?' is harder to see in the interior of the line. - my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; - my $precedes_colon = - $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; - next unless ( $follows_colon || $precedes_colon ); - - # we will always combining a ? line following a : line - if ( !$follows_colon ) { - - # ...otherwise recombine only if it looks like a - # chain. we will just look at a few nearby lines - # to see if this looks like a chain. - my $local_count = 0; - foreach - my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) - { - $local_count++ - if $ii >= 0 - && $types_to_go[$ii] eq ':' - && $levels_to_go[$ii] == $lev; - } - next unless ( $local_count > 1 ); - } - $forced_breakpoint_to_go[$iend_1] = 0; - } + # but only if followed by multiple lines + && $n < $nmax + ); - # do not recombine lines with leading '.' - elsif ( $type_ibeg_2 eq '.' ) { - my $i_next_nonblank = - min( $inext_to_go[$ibeg_2], $iend_2 ); - next - unless ( - - # ... unless there is just one and we can reduce - # this to two lines if we do. For example, this - # - # - # $bodyA .= - # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' - # - # looks better than this: - # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' - # . '$args .= $pat;' - - ( - $n == 2 - && $n == $nmax - && $type_ibeg_1 ne $type_ibeg_2 - ) + if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { + return + unless $want_break_before{ $tokens_to_go[$iend_1] }; + } + } + return ( 1, $skip_Section_3 ); + } ## end sub recombine_section_2 - # ... or this would strand a short quote , like this - # . "some long quote" - # . "\n"; + sub simple_rhs { - || ( $types_to_go[$i_next_nonblank] eq 'Q' - && $i_next_nonblank >= $iend_2 - 1 - && $token_lengths_to_go[$i_next_nonblank] < - $rOpts_short_concatenation_item_length ) - ); - } + my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_; - # handle leading keyword.. - elsif ( $type_ibeg_2 eq 'k' ) { + # Scan line ibeg_2 to $iend_2 up to last token for complexity. + # We are not counting the last token in case it is an opening paren. + # Return: + # true if rhs is simple, ok to recombine + # false otherwise - # handle leading "or" - if ( $tokens_to_go[$ibeg_2] eq 'or' ) { - next - unless ( - $this_line_is_semicolon_terminated - && ( - $type_ibeg_1 eq '}' - || ( - - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] - } - - # important: only combine a very simple - # or statement because the step below - # may have combined a trailing 'and' - # with this or, and we do not want to - # then combine everything together - && ( $iend_2 - $ibeg_2 <= 7 ) - ) - ) - ); + my $tv = 0; + my $depth = $nesting_depth_to_go[$ibeg_2]; + foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 1 ); + } + $depth = $nesting_depth_to_go[$i]; + } - #X: RT #81854 - $forced_breakpoint_to_go[$iend_1] = 0 - unless ( $old_breakpoint_to_go[$iend_1] ); - } + # ok to recombine if no level changes before + # last token + if ( $tv > 0 ) { - # handle leading 'and' and 'xor' - elsif ($tokens_to_go[$ibeg_2] eq 'and' - || $tokens_to_go[$ibeg_2] eq 'xor' ) - { + # otherwise, do not recombine if more than + # two level changes. + return if ( $tv > 1 ); - # Decide if we will combine a single terminal 'and' - # after an 'if' or 'unless'. - - # This looks best with the 'and' on the same - # line as the 'if': - # - # $a = 1 - # if $seconds and $nu < 2; - # - # But this looks better as shown: - # - # $a = 1 - # if !$this->{Parents}{$_} - # or $this->{Parents}{$_} eq $_; - # - next - unless ( - $this_line_is_semicolon_terminated - && ( - - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } - || $tokens_to_go[$ibeg_1] eq 'or' ) - ) - ); - } + # check total complexity of the two + # adjacent lines that will occur if we do + # this join + my $istop = + ( $n < $nmax ) + ? $ri_end->[ $n + 1 ] + : $iend_2; + foreach my $i ( $iend_2 .. $istop ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 2 ); + } + $depth = $nesting_depth_to_go[$i]; + } + + # do not recombine if total is more than 2 + # level changes + return if ( $tv > 2 ); + } + return 1; + } + + sub recombine_section_3 { + + my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated, + $rleading_amp_count ) + = @_; + + # Recombine Section 3: + # Examine token at $ibeg_2 (right end of first line of pair) + + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # -----Section 3 looks at this token - # handle leading "if" and "unless" - elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { + # Returns: + # (nothing) => do not join lines + # 1, bs_tweak => ok to join lines + + # $bstweak is a small tolerance to add to bond strengths + my $bs_tweak = 0; + + my $nmax = @{$ri_end} - 1; + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + + my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; + my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; + my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; + my $ibeg_nmax = $ri_beg->[$nmax]; + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # handle lines with leading &&, || + if ( $is_amp_amp{$type_ibeg_2} ) { + + ${$rleading_amp_count}++; + + # ok to recombine if it follows a ? or : + # and is followed by an open paren.. + my $ok = + ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' ) + + # or is followed by a ? or : at same depth + # + # We are looking for something like this. We can + # recombine the && line with the line above to make the + # structure more clear: + # return + # exists $G->{Attr}->{V} + # && exists $G->{Attr}->{V}->{$u} + # ? %{ $G->{Attr}->{V}->{$u} } + # : (); + # + # We should probably leave something like this alone: + # return + # exists $G->{Attr}->{E} + # && exists $G->{Attr}->{E}->{$u} + # && exists $G->{Attr}->{E}->{$u}->{$v} + # ? %{ $G->{Attr}->{E}->{$u}->{$v} } + # : (); + # so that we either have all of the &&'s (or ||'s) + # on one line, as in the first example, or break at + # each one as in the second example. However, it + # sometimes makes things worse to check for this because + # it prevents multiple recombinations. So this is not done. + || ( $ibeg_3 >= 0 + && $is_ternary{ $types_to_go[$ibeg_3] } + && $nesting_depth_to_go[$ibeg_3] == + $nesting_depth_to_go[$ibeg_2] ); + + # Combine a trailing && term with an || term: fix for + # c060 This is rare but can happen. + $ok ||= 1 + if ( $ibeg_3 < 0 + && $type_ibeg_2 eq '&&' + && $type_ibeg_1 eq '||' + && $nesting_depth_to_go[$ibeg_2] == + $nesting_depth_to_go[$ibeg_1] ); + + return if !$ok && $want_break_before{$type_ibeg_2}; + $forced_breakpoint_to_go[$iend_1] = 0; + + # tweak the bond strength to give this joint priority + # over ? and : + $bs_tweak = 0.25; + } + + # Identify and recombine a broken ?/: chain + elsif ( $type_ibeg_2 eq '?' ) { + + # Do not recombine different levels + my $lev = $levels_to_go[$ibeg_2]; + return if ( $lev ne $levels_to_go[$ibeg_1] ); + + # Do not recombine a '?' if either next line or + # previous line does not start with a ':'. The reasons + # are that (1) no alignment of the ? will be possible + # and (2) the expression is somewhat complex, so the + # '?' is harder to see in the interior of the line. + my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; + my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; + return unless ( $follows_colon || $precedes_colon ); + + # we will always combining a ? line following a : line + if ( !$follows_colon ) { + + # ...otherwise recombine only if it looks like a + # chain. we will just look at a few nearby lines + # to see if this looks like a chain. + my $local_count = 0; + foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { + $local_count++ + if $ii >= 0 + && $types_to_go[$ii] eq ':' + && $levels_to_go[$ii] == $lev; + } + return unless ( $local_count > 1 ); + } + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # do not recombine lines with leading '.' + elsif ( $type_ibeg_2 eq '.' ) { + my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); + return + unless ( - # Combine something like: - # next - # if ( $lang !~ /${l}$/i ); - # into: - # next if ( $lang !~ /${l}$/i ); - next - unless ( - $this_line_is_semicolon_terminated + # ... unless there is just one and we can reduce + # this to two lines if we do. For example, this + # + # + # $bodyA .= + # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' + # + # looks better than this: + # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' + # . '$args .= $pat;' - # previous line begins with 'and' or 'or' - && $type_ibeg_1 eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_1] } + ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 ) - ); - } + # ... or this would strand a short quote , like this + # . "some long quote" + # . "\n"; - # handle all other leading keywords - else { + || ( $types_to_go[$i_next_nonblank] eq 'Q' + && $i_next_nonblank >= $iend_2 - 1 + && $token_lengths_to_go[$i_next_nonblank] < + $rOpts_short_concatenation_item_length ) + ); + } - # keywords look best at start of lines, - # but combine things like "1 while" - unless ( $is_assignment{$type_iend_1} ) { - next - if ( ( $type_iend_1 ne 'k' ) - && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); - } - } - } + # handle leading keyword.. + elsif ( $type_ibeg_2 eq 'k' ) { - # similar treatment of && and || as above for 'and' and - # 'or': NOTE: This block of code is currently bypassed - # because of a previous block but is retained for possible - # future use. - elsif ( $is_amp_amp{$type_ibeg_2} ) { + # handle leading "or" + if ( $tokens_to_go[$ibeg_2] eq 'or' ) { + return + unless ( + $this_line_is_semicolon_terminated + && ( + $type_ibeg_1 eq '}' + || ( - # maybe looking at something like: - # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } - next - unless ( - $this_line_is_semicolon_terminated + # important: only combine a very simple + # or statement because the step below + # may have combined a trailing 'and' + # with this or, and we do not want to + # then combine everything together + && ( $iend_2 - $ibeg_2 <= 7 ) + ) + ) + ); - # previous line begins with an 'if' or 'unless' - # keyword - && $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } + #X: RT #81854 + $forced_breakpoint_to_go[$iend_1] = 0 + unless ( $old_breakpoint_to_go[$iend_1] ); + } - ); - } + # handle leading 'and' and 'xor' + elsif ($tokens_to_go[$ibeg_2] eq 'and' + || $tokens_to_go[$ibeg_2] eq 'xor' ) + { - # handle line with leading = or similar - elsif ( $is_assignment{$type_ibeg_2} ) { - next unless ( $n == 1 || $n == $nmax ); - next if ( $old_breakpoint_to_go[$iend_1] ); - next - unless ( + # Decide if we will combine a single terminal 'and' + # after an 'if' or 'unless'. - # unless we can reduce this to two lines - $nmax == 2 + # This looks best with the 'and' on the same + # line as the 'if': + # + # $a = 1 + # if $seconds and $nu < 2; + # + # But this looks better as shown: + # + # $a = 1 + # if !$this->{Parents}{$_} + # or $this->{Parents}{$_} eq $_; + # + return + unless ( + $this_line_is_semicolon_terminated + && ( - # or three lines, the last with a leading semicolon - || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } + || $tokens_to_go[$ibeg_1] eq 'or' ) + ) + ); + } - # or the next line ends with a here doc - || $type_iend_2 eq 'h' + # handle leading "if" and "unless" + elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { - # or this is a short line ending in ; - || ( $n == $nmax - && $this_line_is_semicolon_terminated ) - ); - $forced_breakpoint_to_go[$iend_1] = 0; - } + # Combine something like: + # next + # if ( $lang !~ /${l}$/i ); + # into: + # next if ( $lang !~ /${l}$/i ); + return + unless ( + $this_line_is_semicolon_terminated - #---------------------------------------------------------- - # Recombine Section 4: - # Combine the lines if we arrive here and it is possible - #---------------------------------------------------------- + # previous line begins with 'and' or 'or' + && $type_ibeg_1 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_1] } - # honor hard breakpoints - next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); + ); + } - my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak; + # handle all other leading keywords + else { - # Require a few extra spaces before recombining lines if we are - # at an old breakpoint unless this is a simple list or terminal - # line. The goal is to avoid oscillating between two - # quasi-stable end states. For example this snippet caused - # problems: -## my $this = -## bless { -## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" -## }, -## $type; - next - if ( $old_breakpoint_to_go[$iend_1] - && !$this_line_is_semicolon_terminated - && $n < $nmax - && $excess + 4 > 0 - && $type_iend_2 ne ',' ); + # keywords look best at start of lines, + # but combine things like "1 while" + unless ( $is_assignment{$type_iend_1} ) { + return + if ( ( $type_iend_1 ne 'k' ) + && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); + } + } + } - # do not recombine if we would skip in indentation levels - if ( $n < $nmax ) { - my $if_next = $ri_beg->[ $n + 1 ]; - next - if ( - $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] - && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] + # similar treatment of && and || as above for 'and' and + # 'or': NOTE: This block of code is currently bypassed + # because of a previous block but is retained for possible + # future use. + elsif ( $is_amp_amp{$type_ibeg_2} ) { - # but an isolated 'if (' is undesirable - && !( - $n == 1 - && $iend_1 - $ibeg_1 <= 2 - && $type_ibeg_1 eq 'k' - && $tokens_to_go[$ibeg_1] eq 'if' - && $tokens_to_go[$iend_1] ne '(' - ) - ); - } + # maybe looking at something like: + # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; - # honor no-break's - ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257 + return + unless ( + $this_line_is_semicolon_terminated - # remember the pair with the greatest bond strength - if ( !$n_best ) { - $n_best = $n; - $bs_best = $bs; - } - else { + # previous line begins with an 'if' or 'unless' + # keyword + && $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } - if ( $bs > $bs_best ) { - $n_best = $n; - $bs_best = $bs; - } - } - } + ); + } - # recombine the pair with the greatest bond strength - if ($n_best) { - splice @{$ri_beg}, $n_best, 1; - splice @{$ri_end}, $n_best - 1, 1; - splice @joint, $n_best, 1; + # handle line with leading = or similar + elsif ( $is_assignment{$type_ibeg_2} ) { + return unless ( $n == 1 || $n == $nmax ); + return if ( $old_breakpoint_to_go[$iend_1] ); + return + unless ( - # keep going if we are still making progress - $more_to_do++; - } - } # end iteration loop + # unless we can reduce this to two lines + $nmax == 2 - } # end loop over sections + # or three lines, the last with a leading semicolon + || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) - RETURN: + # or the next line ends with a here doc + || $type_iend_2 eq 'h' - if (DEBUG_RECOMBINE) { - my $nmax_last = @{$ri_end} - 1; - print STDERR -"exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n"; + # or this is a short line ending in ; + || ( $n == $nmax + && $this_line_is_semicolon_terminated ) + ); + $forced_breakpoint_to_go[$iend_1] = 0; } - return; - } ## end sub recombine_breakpoints + return ( 1, $bs_tweak ); + } ## end sub recombine_section_3 + } ## end closure recombine_breakpoints sub insert_final_ternary_breaks { @@ -17287,8 +18642,12 @@ sub correct_lp_indentation { get_saved_opening_indentation($align_seqno); if ( defined($indent) ) { - # FIXME: should use '1' here if no space after opening - # and '2' if want space; hardwired at 1 like -gnu-style + # NOTE: we could use '1' here if no space after + # opening and '2' if want space; it is hardwired at 1 + # like -gnu-style. But it is probably best to leave + # this alone because changing it would change + # formatting of much existing code without any + # significant benefit. $actual_pos = get_spaces($indent) + $offset + 1; } } @@ -17532,6 +18891,8 @@ sub undo_lp_ci { # CODE SECTION 10: Code to break long statments ############################################### +use constant DEBUG_BREAK_LINES => 0; + sub break_long_lines { #----------------------------------------------------------- @@ -17539,22 +18900,13 @@ sub break_long_lines { # maximum line length. #----------------------------------------------------------- - # Define an array of indexes for inserting newline characters to - # keep the line lengths below the maximum desired length. There is - # an implied break after the last token, so it need not be included. + my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_; - # Method: - # This routine is part of series of routines which adjust line - # lengths. It is only called if a statement is longer than the - # maximum line length, or if a preliminary scanning located - # desirable break points. Sub break_lists has already looked at - # these tokens and set breakpoints (in array - # $forced_breakpoint_to_go[$i]) where it wants breaks (for example - # after commas, after opening parens, and before closing parens). - # This routine will honor these breakpoints and also add additional - # breakpoints as necessary to keep the line length below the maximum - # requested. It bases its decision on where the 'bond strength' is - # lowest. + # Input parameters: + # $saw_good_break - a flag set by break_lists + # $rcolon_list - ref to a list of all the ? and : tokens in the batch, + # in order. + # $rbond_strength_bias - small bond strength bias values set by break_lists # Output: returns references to the arrays: # @i_first @@ -17562,673 +18914,731 @@ sub break_long_lines { # which contain the indexes $i of the first and last tokens on each # line. - # In addition, the array: - # $forced_breakpoint_to_go[$i] - # may be updated to be =1 for any index $i after which there must be - # a break. This signals later routines not to undo the breakpoint. + # In addition, the array: + # $forced_breakpoint_to_go[$i] + # may be updated to be =1 for any index $i after which there must be + # a break. This signals later routines not to undo the breakpoint. + + # Method: + # This routine is called if a statement is longer than the maximum line + # length, or if a preliminary scanning located desirable break points. + # Sub break_lists has already looked at these tokens and set breakpoints + # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for + # example after commas, after opening parens, and before closing parens). + # This routine will honor these breakpoints and also add additional + # breakpoints as necessary to keep the line length below the maximum + # requested. It bases its decision on where the 'bond strength' is + # lowest. + + my @i_first = (); # the first index to output + my @i_last = (); # the last index to output + my @i_colon_breaks = (); # needed to decide if we have to break at ?'s + if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } + + # Get the 'bond strengths' between tokens + my $rbond_strength_to_go = $self->set_bond_strengths(); + + # Add any comma bias set by break_lists + if ( @{$rbond_strength_bias} ) { + foreach my $item ( @{$rbond_strength_bias} ) { + my ( $ii, $bias ) = @{$item}; + if ( $ii >= 0 && $ii <= $max_index_to_go ) { + $rbond_strength_to_go->[$ii] += $bias; + } + elsif (DEVEL_MODE) { + my $KK = $K_to_go[0]; + my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; + Fault( +"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n" + ); + } + } + } + + my $imin = 0; + my $imax = $max_index_to_go; + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } + + my $i_begin = $imin; + my $last_break_strength = NO_BREAK; + my $i_last_break = -1; + my $line_count = 0; + + # see if any ?/:'s are in order + my $colons_in_order = 1; + my $last_tok = EMPTY_STRING; + foreach ( @{$rcolon_list} ) { + if ( $_ eq $last_tok ) { $colons_in_order = 0; last } + $last_tok = $_; + } + + # This is a sufficient but not necessary condition for colon chain + my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 ); + + #------------------------------------------ + # BEGINNING of main loop to set breakpoints + # Keep iterating until we reach the end + #------------------------------------------ + while ( $i_begin <= $imax ) { + + #------------------------------------------------------------------ + # Find the best next breakpoint based on token-token bond strengths + #------------------------------------------------------------------ + my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) = + $self->break_lines_inner_loop( + + $i_begin, + $i_last_break, + $imax, + $last_break_strength, + $line_count, + $rbond_strength_to_go, + $saw_good_break, + + ); + + # Now make any adjustments required by ternary breakpoint rules + if ( @{$rcolon_list} ) { + + my $i_next_nonblank = $inext_to_go[$i_lowest]; + + #------------------------------------------------------- + # ?/: rule 1 : if a break here will separate a '?' on this + # line from its closing ':', then break at the '?' instead. + # But do not break a sequential chain of ?/: statements + #------------------------------------------------------- + if ( !$is_colon_chain ) { + foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { + next unless ( $tokens_to_go[$i] eq '?' ); + + # do not break if statement is broken by side comment + next + if ( $tokens_to_go[$max_index_to_go] eq '#' + && terminal_type_i( 0, $max_index_to_go ) !~ + /^[\;\}]$/ ); + + # no break needed if matching : is also on the line + next + if ( $mate_index_to_go[$i] >= 0 + && $mate_index_to_go[$i] <= $i_next_nonblank ); + + $i_lowest = $i; + if ( $want_break_before{'?'} ) { $i_lowest-- } + $i_next_nonblank = $inext_to_go[$i_lowest]; + last; + } + } + + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + + #------------------------------------------------------------- + # ?/: rule 2 : if we break at a '?', then break at its ':' + # + # Note: this rule is also in sub break_lists to handle a break + # at the start and end of a line (in case breaks are dictated + # by side comments). + #------------------------------------------------------------- + if ( $next_nonblank_type eq '?' ) { + $self->set_closing_breakpoint($i_next_nonblank); + } + elsif ( $types_to_go[$i_lowest] eq '?' ) { + $self->set_closing_breakpoint($i_lowest); + } + + #-------------------------------------------------------- + # ?/: rule 3 : if we break at a ':' then we save + # its location for further work below. We may need to go + # back and break at its '?'. + #-------------------------------------------------------- + if ( $next_nonblank_type eq ':' ) { + push @i_colon_breaks, $i_next_nonblank; + } + elsif ( $types_to_go[$i_lowest] eq ':' ) { + push @i_colon_breaks, $i_lowest; + } + + # here we should set breaks for all '?'/':' pairs which are + # separated by this line + } + + # guard against infinite loop (should never happen) + if ( $i_lowest <= $i_last_break ) { + DEVEL_MODE + && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n"); + $i_lowest = $imax; + } + + DEBUG_BREAK_LINES + && print STDOUT +"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n"; + + $line_count++; + + # save this line segment, after trimming blanks at the ends + push( @i_first, + ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); + push( @i_last, + ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); + + # set a forced breakpoint at a container opening, if necessary, to + # signal a break at a closing container. Excepting '(' for now. + if ( + ( + $tokens_to_go[$i_lowest] eq '{' + || $tokens_to_go[$i_lowest] eq '[' + ) + && !$forced_breakpoint_to_go[$i_lowest] + ) + { + $self->set_closing_breakpoint($i_lowest); + } + + # get ready to find the next breakpoint + $last_break_strength = $lowest_strength; + $i_last_break = $i_lowest; + $i_begin = $i_lowest + 1; + + # skip past a blank + if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { + $i_begin++; + } + } + + #------------------------------------------------- + # END of main loop to set continuation breakpoints + #------------------------------------------------- + + #----------------------------------------------------------- + # ?/: rule 4 -- if we broke at a ':', then break at + # corresponding '?' unless this is a chain of ?: expressions + #----------------------------------------------------------- + if (@i_colon_breaks) { + my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); + if ( !$is_chain ) { + $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last ); + } + } + + return ( \@i_first, \@i_last, $rbond_strength_to_go ); +} ## end sub break_long_lines - my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_; +# small bond strength numbers to help break ties +use constant TINY_BIAS => 0.0001; +use constant MAX_BIAS => 0.001; - # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in - # order. +sub break_lines_inner_loop { - use constant DEBUG_BREAK_LINES => 0; + #----------------------------------------------------------------- + # Find the best next breakpoint in index range ($i_begin .. $imax) + # which, if possible, does not exceed the maximum line length. + #----------------------------------------------------------------- - my @i_first = (); # the first index to output - my @i_last = (); # the last index to output - my @i_colon_breaks = (); # needed to decide if we have to break at ?'s - if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } + my ( + $self, # - my $rbond_strength_to_go = $self->set_bond_strengths(); + $i_begin, + $i_last_break, + $imax, + $last_break_strength, + $line_count, + $rbond_strength_to_go, + $saw_good_break, - # Add any comma bias set by break_lists - if ( @{$rbond_strength_bias} ) { - foreach my $item ( @{$rbond_strength_bias} ) { - my ( $ii, $bias ) = @{$item}; - if ( $ii >= 0 && $ii <= $max_index_to_go ) { - $rbond_strength_to_go->[$ii] += $bias; - } - elsif (DEVEL_MODE) { - my $KK = $K_to_go[0]; - my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; - Fault( -"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n" - ); - } + ) = @_; + + # Given: + # $i_begin = first index of range + # $i_last_break = index of previous break + # $imax = last index of range + # $last_break_strength = bond strength of last break + # $line_count = number of output lines so far + # $rbond_strength_to_go = ref to array of bond strengths + # $saw_good_break = true if old line had a good breakpoint + + # Returns: + # $i_lowest = index of best breakpoint + # $lowest_strength = 'bond strength' at best breakpoint + # $leading_alignment_type = special token type after break + # $Msg = string of debug info + + my $Msg = EMPTY_STRING; + my $strength = NO_BREAK; + my $i_test = $i_begin - 1; + my $i_lowest = -1; + my $starting_sum = $summed_lengths_to_go[$i_begin]; + my $lowest_strength = NO_BREAK; + my $leading_alignment_type = EMPTY_STRING; + my $leading_spaces = leading_spaces_to_go($i_begin); + my $maximum_line_length = + $maximum_line_length_at_level[ $levels_to_go[$i_begin] ]; + DEBUG_BREAK_LINES + && do { + $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n"; + }; + + # Do not separate an isolated bare word from an opening paren. + # Alternate Fix #2 for issue b1299. This waits as long as possible + # to make the decision. + if ( $types_to_go[$i_begin] eq 'i' + && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ ) + { + my $i_next_nonblank = $inext_to_go[$i_begin]; + if ( $tokens_to_go[$i_next_nonblank] eq '(' ) { + $rbond_strength_to_go->[$i_begin] = NO_BREAK; } } - my $imin = 0; - my $imax = $max_index_to_go; - if ( $types_to_go[$imin] eq 'b' ) { $imin++ } - if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - my $i_begin = $imin; # index for starting next iteration + #------------------------------------------------- + # Begin loop over the indexes in the _to_go arrays + #------------------------------------------------- + while ( ++$i_test <= $imax ) { + my $type = $types_to_go[$i_test]; + my $token = $tokens_to_go[$i_test]; + my $next_type = $types_to_go[ $i_test + 1 ]; + my $next_token = $tokens_to_go[ $i_test + 1 ]; + my $i_next_nonblank = $inext_to_go[$i_test]; + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - my $leading_spaces = leading_spaces_to_go($imin); - my $line_count = 0; - my $last_break_strength = NO_BREAK; - my $i_last_break = -1; - my $max_bias = 0.001; - my $tiny_bias = 0.0001; - my $leading_alignment_token = EMPTY_STRING; - my $leading_alignment_type = EMPTY_STRING; + #--------------------------------------------------------------- + # Section A: Get token-token strength and handle any adjustments + #--------------------------------------------------------------- - # see if any ?/:'s are in order - my $colons_in_order = 1; - my $last_tok = EMPTY_STRING; - foreach ( @{$rcolon_list} ) { - if ( $_ eq $last_tok ) { $colons_in_order = 0; last } - $last_tok = $_; - } + # adjustments to the previous bond strength may have been made, and + # we must keep the bond strength of a token and its following blank + # the same; + my $last_strength = $strength; + $strength = $rbond_strength_to_go->[$i_test]; + if ( $type eq 'b' ) { $strength = $last_strength } - # This is a sufficient but not necessary condition for colon chain - my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 ); + # reduce strength a bit to break ties at an old comma breakpoint ... + if ( - my $Msg = EMPTY_STRING; + $old_breakpoint_to_go[$i_test] - #------------------------------------------------------- - # BEGINNING of main loop to set continuation breakpoints - # Keep iterating until we reach the end - #------------------------------------------------------- - while ( $i_begin <= $imax ) { - my $lowest_strength = NO_BREAK; - my $starting_sum = $summed_lengths_to_go[$i_begin]; - my $i_lowest = -1; - my $i_test = -1; - my $lowest_next_token = EMPTY_STRING; - my $lowest_next_type = 'b'; - my $i_lowest_next_nonblank = -1; - my $maximum_line_length = - $maximum_line_length_at_level[ $levels_to_go[$i_begin] ]; - - # Do not separate an isolated bare word from an opening paren. - # Alternate Fix #2 for issue b1299. This waits as long as possible - # to make the decision. - if ( $types_to_go[$i_begin] eq 'i' - && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ ) + # Patch: limited to just commas to avoid blinking states + && $type eq ',' + + # which is a 'good' breakpoint, meaning ... + # we don't want to break before it + && !$want_break_before{$type} + + # and either we want to break before the next token + # or the next token is not short (i.e. not a '*', '/' etc.) + && $i_next_nonblank <= $imax + && ( $want_break_before{$next_nonblank_type} + || $token_lengths_to_go[$i_next_nonblank] > 2 + || $next_nonblank_type eq ',' + || $is_opening_type{$next_nonblank_type} ) + ) { - my $i_next_nonblank = $inext_to_go[$i_begin]; - if ( $tokens_to_go[$i_next_nonblank] eq '(' ) { - $rbond_strength_to_go->[$i_begin] = NO_BREAK; - } + $strength -= TINY_BIAS; + DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" }; } - #------------------------------------------------------- - # BEGINNING of inner loop to find the best next breakpoint - #------------------------------------------------------- - my $strength = NO_BREAK; - $i_test = $i_begin - 1; - while ( ++$i_test <= $imax ) { - my $type = $types_to_go[$i_test]; - my $token = $tokens_to_go[$i_test]; - my $next_type = $types_to_go[ $i_test + 1 ]; - my $next_token = $tokens_to_go[ $i_test + 1 ]; - my $i_next_nonblank = $inext_to_go[$i_test]; - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - - # adjustments to the previous bond strength may have been made, and - # we must keep the bond strength of a token and its following blank - # the same; - my $last_strength = $strength; - $strength = $rbond_strength_to_go->[$i_test]; - if ( $type eq 'b' ) { $strength = $last_strength } - - # reduce strength a bit to break ties at an old comma breakpoint ... - if ( + # otherwise increase strength a bit if this token would be at the + # maximum line length. This is necessary to avoid blinking + # in the above example when the -iob flag is added. + else { + my $len = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum; + if ( $len >= $maximum_line_length ) { + $strength += TINY_BIAS; + DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" }; + } + } - $old_breakpoint_to_go[$i_test] + #------------------------------------- + # Section B: Handle forced breakpoints + #------------------------------------- + my $must_break; - # Patch: limited to just commas to avoid blinking states - && $type eq ',' + # Force an immediate break at certain operators + # with lower level than the start of the line, + # unless we've already seen a better break. + # + # Note on an issue with a preceding '?' : - # which is a 'good' breakpoint, meaning ... - # we don't want to break before it - && !$want_break_before{$type} + # There may be a break at a previous ? if the line is long. Because + # of this we do not want to force a break if there is a previous ? on + # this line. For now the best way to do this is to not break if we + # have seen a lower strength point, which is probably a ?. + # + # Example of unwanted breaks we are avoiding at a '.' following a ? + # from pod2html using perltidy -gnu: + # ) + # ? "\n<A NAME=\"" + # . $value + # . "\">\n$text</A>\n" + # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; + if ( + ( $strength <= $lowest_strength ) + && ( $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_next_nonblank] ) + && ( + $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ + || ( + $next_nonblank_type eq 'k' - # and either we want to break before the next token - # or the next token is not short (i.e. not a '*', '/' etc.) - && $i_next_nonblank <= $imax - && ( $want_break_before{$next_nonblank_type} - || $token_lengths_to_go[$i_next_nonblank] > 2 - || $next_nonblank_type eq ',' - || $is_opening_type{$next_nonblank_type} ) - ) - { - $strength -= $tiny_bias; - DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" }; - } + ## /^(and|or)$/ # note: includes 'xor' now + && $is_and_or{$next_nonblank_token} + ) + ) + ) + { + $self->set_forced_breakpoint($i_next_nonblank); + DEBUG_BREAK_LINES + && do { $Msg .= " :Forced break at i=$i_next_nonblank" }; + } - # otherwise increase strength a bit if this token would be at the - # maximum line length. This is necessary to avoid blinking - # in the above example when the -iob flag is added. - else { - my $len = - $leading_spaces + - $summed_lengths_to_go[ $i_test + 1 ] - - $starting_sum; - if ( $len >= $maximum_line_length ) { - $strength += $tiny_bias; - DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" }; - } - } + if ( - my $must_break = 0; + # Try to put a break where requested by break_lists + $forced_breakpoint_to_go[$i_test] - # Force an immediate break at certain operators - # with lower level than the start of the line, - # unless we've already seen a better break. + # break between ) { in a continued line so that the '{' can + # be outdented + # See similar logic in break_lists which catches instances + # where a line is just something like ') {'. We have to + # be careful because the corresponding block keyword might + # not be on the first line, such as 'for' here: # - #------------------------------------ - # Note on an issue with a preceding ? - #------------------------------------ - # We don't include a ? in the above list, but there may - # be a break at a previous ? if the line is long. - # Because of this we do not want to force a break if - # there is a previous ? on this line. For now the best way - # to do this is to not break if we have seen a lower strength - # point, which is probably a ?. + # eval { + # for ("a") { + # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } + # } + # }; # - # Example of unwanted breaks we are avoiding at a '.' following a ? - # from pod2html using perltidy -gnu: - # ) - # ? "\n<A NAME=\"" - # . $value - # . "\">\n$text</A>\n" - # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; - if ( - ( $strength <= $lowest_strength ) - && ( $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_next_nonblank] ) - && ( - $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ - || ( - $next_nonblank_type eq 'k' + || ( + $line_count + && ( $token eq ')' ) + && ( $next_nonblank_type eq '{' ) + && ($next_nonblank_block_type) + && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) + + # RT #104427: Dont break before opening sub brace because + # sub block breaks handled at higher level, unless + # it looks like the preceding list is long and broken + && !( - ## /^(and|or)$/ # note: includes 'xor' now - && $is_and_or{$next_nonblank_token} + ( + $next_nonblank_block_type =~ /$SUB_PATTERN/ + || $next_nonblank_block_type =~ /$ASUB_PATTERN/ ) + && ( $nesting_depth_to_go[$i_begin] == + $nesting_depth_to_go[$i_next_nonblank] ) ) - ) - { - $self->set_forced_breakpoint($i_next_nonblank); + + && !$rOpts_opening_brace_always_on_right + ) + + # There is an implied forced break at a terminal opening brace + || ( ( $type eq '{' ) && ( $i_test == $imax ) ) + ) + { + + # Forced breakpoints must sometimes be overridden, for example + # because of a side comment causing a NO_BREAK. It is easier + # to catch this here than when they are set. + if ( $strength < NO_BREAK - 1 ) { + $strength = $lowest_strength - TINY_BIAS; + $must_break = 1; DEBUG_BREAK_LINES - && do { $Msg .= " :Forced break at i=$i_next_nonblank" }; + && do { $Msg .= " :set must_break at i=$i_next_nonblank" }; } + } - if ( - - # Try to put a break where requested by break_lists - $forced_breakpoint_to_go[$i_test] + # quit if a break here would put a good terminal token on + # the next line and we already have a possible break + if ( + !$must_break + && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_next_nonblank + 1 ] - + $starting_sum + ) > $maximum_line_length + ) + ) + { + if ( $i_lowest >= 0 ) { + DEBUG_BREAK_LINES && do { + $Msg .= " :quit at good terminal='$next_nonblank_type'"; + }; + last; + } + } - # break between ) { in a continued line so that the '{' can - # be outdented - # See similar logic in break_lists which catches instances - # where a line is just something like ') {'. We have to - # be careful because the corresponding block keyword might - # not be on the first line, such as 'for' here: - # - # eval { - # for ("a") { - # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } - # } - # }; - # - || ( - $line_count - && ( $token eq ')' ) - && ( $next_nonblank_type eq '{' ) - && ($next_nonblank_block_type) - && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) - - # RT #104427: Dont break before opening sub brace because - # sub block breaks handled at higher level, unless - # it looks like the preceding list is long and broken - && !( + # Avoid a break which would strand a single punctuation + # token. For example, we do not want to strand a leading + # '.' which is followed by a long quoted string. + # But note that we do want to do this with -extrude (l=1) + # so please test any changes to this code on -extrude. + if ( + !$must_break + && ( $i_test == $i_begin ) + && ( $i_test < $imax ) + && ( $token eq $type ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum + ) < $maximum_line_length + ) + ) + { + $i_test = min( $imax, $inext_to_go[$i_test] ); + DEBUG_BREAK_LINES && do { + $Msg .= " :redo at i=$i_test"; + }; + redo; + } - ( - $next_nonblank_block_type =~ /$SUB_PATTERN/ - || $next_nonblank_block_type =~ /$ASUB_PATTERN/ - ) - && ( $nesting_depth_to_go[$i_begin] == - $nesting_depth_to_go[$i_next_nonblank] ) - ) + #------------------------------------------------------------ + # Section C: Look for the lowest bond strength between tokens + #------------------------------------------------------------ + if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) { - && !$rOpts_opening_brace_always_on_right - ) + # break at previous best break if it would have produced + # a leading alignment of certain common tokens, and it + # is different from the latest candidate break + if ($leading_alignment_type) { + DEBUG_BREAK_LINES && do { + $Msg .= + " :last at leading_alignment='$leading_alignment_type'"; + }; + last; + } - # There is an implied forced break at a terminal opening brace - || ( ( $type eq '{' ) && ( $i_test == $imax ) ) + # Force at least one breakpoint if old code had good + # break It is only called if a breakpoint is required or + # desired. This will probably need some adjustments + # over time. A goal is to try to be sure that, if a new + # side comment is introduced into formatted text, then + # the same breakpoints will occur. scbreak.t + if ( + $i_test == $imax # we are at the end + && !$forced_breakpoint_count + && $saw_good_break # old line had good break + && $type =~ /^[#;\{]$/ # and this line ends in + # ';' or side comment + && $i_last_break < 0 # and we haven't made a break + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $imax - 1 # (but not just before this ;) + && $strength - $lowest_strength < 0.5 * WEAK # and it's good ) { - # Forced breakpoints must sometimes be overridden, for example - # because of a side comment causing a NO_BREAK. It is easier - # to catch this here than when they are set. - if ( $strength < NO_BREAK - 1 ) { - $strength = $lowest_strength - $tiny_bias; - $must_break = 1; - DEBUG_BREAK_LINES - && do { $Msg .= " :set must_break at i=$i_next_nonblank" }; - } + DEBUG_BREAK_LINES && do { + $Msg .= " :last at good old break\n"; + }; + last; } - # quit if a break here would put a good terminal token on - # the next line and we already have a possible break + # Do not skip past an important break point in a short final + # segment. For example, without this check we would miss the + # break at the final / in the following code: + # + # $depth_stop = + # ( $tau * $mass_pellet * $q_0 * + # ( 1. - exp( -$t_stop / $tau ) ) - + # 4. * $pi * $factor * $k_ice * + # ( $t_melt - $t_ice ) * + # $r_pellet * + # $t_stop ) / + # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); + # if ( - !$must_break - && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) - && ( - ( - $leading_spaces + - $summed_lengths_to_go[ $i_next_nonblank + 1 ] - - $starting_sum - ) > $maximum_line_length - ) + $line_count > 2 + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $i_test + && $i_test > $imax - 2 + && $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_lowest] + && $lowest_strength < $last_break_strength - .5 * WEAK ) { - if ( $i_lowest >= 0 ) { + # Make this break for math operators for now + my $ir = $inext_to_go[$i_lowest]; + my $il = $iprev_to_go[$ir]; + if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ + || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ) + { DEBUG_BREAK_LINES && do { - $Msg .= " :quit at good terminal='$next_nonblank_type'"; + $Msg .= " :last-noskip_short"; }; last; } } - # Avoid a break which would strand a single punctuation - # token. For example, we do not want to strand a leading - # '.' which is followed by a long quoted string. - # But note that we do want to do this with -extrude (l=1) - # so please test any changes to this code on -extrude. - if ( - !$must_break - && ( $i_test == $i_begin ) - && ( $i_test < $imax ) - && ( $token eq $type ) - && ( - ( - $leading_spaces + - $summed_lengths_to_go[ $i_test + 1 ] - - $starting_sum - ) < $maximum_line_length - ) - ) - { - $i_test = min( $imax, $inext_to_go[$i_test] ); + # Update the minimum bond strength location + $lowest_strength = $strength; + $i_lowest = $i_test; + if ($must_break) { DEBUG_BREAK_LINES && do { - $Msg .= " :redo at i=$i_test"; + $Msg .= " :last-must_break"; }; - redo; + last; } - if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) + # set flags to remember if a break here will produce a + # leading alignment of certain common tokens + if ( $line_count > 0 + && $i_test < $imax + && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) ) { - - # break at previous best break if it would have produced - # a leading alignment of certain common tokens, and it - # is different from the latest candidate break - if ($leading_alignment_type) { - DEBUG_BREAK_LINES && do { - $Msg .= -" :last at leading_alignment='$leading_alignment_type'"; - }; - last; - } - - # Force at least one breakpoint if old code had good - # break It is only called if a breakpoint is required or - # desired. This will probably need some adjustments - # over time. A goal is to try to be sure that, if a new - # side comment is introduced into formatted text, then - # the same breakpoints will occur. scbreak.t - if ( - $i_test == $imax # we are at the end - && !$forced_breakpoint_count - && $saw_good_break # old line had good break - && $type =~ /^[#;\{]$/ # and this line ends in - # ';' or side comment - && $i_last_break < 0 # and we haven't made a break - && $i_lowest >= 0 # and we saw a possible break - && $i_lowest < $imax - 1 # (but not just before this ;) - && $strength - $lowest_strength < 0.5 * WEAK # and it's good - ) - { - - DEBUG_BREAK_LINES && do { - $Msg .= " :last at good old break\n"; - }; - last; - } - - # Do not skip past an important break point in a short final - # segment. For example, without this check we would miss the - # break at the final / in the following code: - # - # $depth_stop = - # ( $tau * $mass_pellet * $q_0 * - # ( 1. - exp( -$t_stop / $tau ) ) - - # 4. * $pi * $factor * $k_ice * - # ( $t_melt - $t_ice ) * - # $r_pellet * - # $t_stop ) / - # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); - # + my $i_last_end = $iprev_to_go[$i_begin]; + my $tok_beg = $tokens_to_go[$i_begin]; + my $type_beg = $types_to_go[$i_begin]; if ( - $line_count > 2 - && $i_lowest >= 0 # and we saw a possible break - && $i_lowest < $i_test - && $i_test > $imax - 2 - && $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_lowest] - && $lowest_strength < $last_break_strength - .5 * WEAK - ) - { - # Make this break for math operators for now - my $ir = $inext_to_go[$i_lowest]; - my $il = $iprev_to_go[$ir]; - if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ - || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ) - { - DEBUG_BREAK_LINES && do { - $Msg .= " :last-noskip_short"; - }; - last; - } - } - # Update the minimum bond strength location - $lowest_strength = $strength; - $i_lowest = $i_test; - $lowest_next_token = $next_nonblank_token; - $lowest_next_type = $next_nonblank_type; - $i_lowest_next_nonblank = $i_next_nonblank; - if ($must_break) { - DEBUG_BREAK_LINES && do { - $Msg .= " :last-must_break"; - }; - last; - } + # check for leading alignment of certain tokens + ( + $tok_beg eq $next_nonblank_token + && $is_chain_operator{$tok_beg} + && ( $type_beg eq 'k' + || $type_beg eq $tok_beg ) + && $nesting_depth_to_go[$i_begin] >= + $nesting_depth_to_go[$i_next_nonblank] + ) - # set flags to remember if a break here will produce a - # leading alignment of certain common tokens - if ( $line_count > 0 - && $i_test < $imax - && ( $lowest_strength - $last_break_strength <= $max_bias ) + || ( $tokens_to_go[$i_last_end] eq $token + && $is_chain_operator{$token} + && ( $type eq 'k' || $type eq $token ) + && $nesting_depth_to_go[$i_last_end] >= + $nesting_depth_to_go[$i_test] ) ) { - my $i_last_end = $iprev_to_go[$i_begin]; - my $tok_beg = $tokens_to_go[$i_begin]; - my $type_beg = $types_to_go[$i_begin]; - if ( - - # check for leading alignment of certain tokens - ( - $tok_beg eq $next_nonblank_token - && $is_chain_operator{$tok_beg} - && ( $type_beg eq 'k' - || $type_beg eq $tok_beg ) - && $nesting_depth_to_go[$i_begin] >= - $nesting_depth_to_go[$i_next_nonblank] - ) - - || ( $tokens_to_go[$i_last_end] eq $token - && $is_chain_operator{$token} - && ( $type eq 'k' || $type eq $token ) - && $nesting_depth_to_go[$i_last_end] >= - $nesting_depth_to_go[$i_test] ) - ) - { - $leading_alignment_token = $next_nonblank_token; - $leading_alignment_type = $next_nonblank_type; - } + $leading_alignment_type = $next_nonblank_type; } } + } - my $too_long = ( $i_test >= $imax ); - if ( !$too_long ) { - my $next_length = - $leading_spaces + - $summed_lengths_to_go[ $i_test + 2 ] - - $starting_sum; - $too_long = $next_length > $maximum_line_length; + #----------------------------------------------------------- + # Section D: See if the maximum line length will be exceeded + #----------------------------------------------------------- + my $too_long = ( $i_test >= $imax ); + if ( !$too_long ) { + my $next_length = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 2 ] - + $starting_sum; + $too_long = $next_length > $maximum_line_length; - # To prevent blinkers we will avoid leaving a token exactly at - # the line length limit unless it is the last token or one of - # several "good" types. - # - # The following code was a blinker with -pbp before this - # modification: + # To prevent blinkers we will avoid leaving a token exactly at + # the line length limit unless it is the last token or one of + # several "good" types. + # + # The following code was a blinker with -pbp before this + # modification: ## $last_nonblank_token eq '(' ## && $is_indirect_object_taker{ $paren_type ## [$paren_depth] } - # The issue causing the problem is that if the - # term [$paren_depth] gets broken across a line then - # the whitespace routine doesn't see both opening and closing - # brackets and will format like '[ $paren_depth ]'. This - # leads to an oscillation in length depending if we break - # before the closing bracket or not. - if ( !$too_long - && $i_test + 1 < $imax - && $next_nonblank_type ne ',' - && !$is_closing_type{$next_nonblank_type} ) - { - $too_long = $next_length >= $maximum_line_length; - DEBUG_BREAK_LINES && do { - $Msg .= " :too_long=$too_long" if ($too_long); - } - } - } - - DEBUG_BREAK_LINES && do { - my $ltok = $token; - my $rtok = - $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING; - my $i_testp2 = $i_test + 2; - if ( $i_testp2 > $max_index_to_go + 1 ) { - $i_testp2 = $max_index_to_go + 1; - } - if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } - if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } - print STDOUT -"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; - }; - - # allow one extra terminal token after exceeding line length - # if it would strand this token. - if ( $rOpts_fuzzy_line_length - && $too_long - && $i_lowest == $i_test - && $token_lengths_to_go[$i_test] > 1 - && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) - ) - { - $too_long = 0; - DEBUG_BREAK_LINES && do { - $Msg .= " :do_not_strand next='$next_nonblank_type'"; - }; - } - - # we are done if... - if ( - - # ... no more space and we have a break - $too_long && $i_lowest >= 0 - - # ... or no more tokens - || $i_test == $imax - ) + # The issue causing the problem is that if the + # term [$paren_depth] gets broken across a line then + # the whitespace routine doesn't see both opening and closing + # brackets and will format like '[ $paren_depth ]'. This + # leads to an oscillation in length depending if we break + # before the closing bracket or not. + if ( !$too_long + && $i_test + 1 < $imax + && $next_nonblank_type ne ',' + && !$is_closing_type{$next_nonblank_type} ) { + $too_long = $next_length >= $maximum_line_length; DEBUG_BREAK_LINES && do { - $Msg .= -" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax"; - }; - last; + $Msg .= " :too_long=$too_long" if ($too_long); + } } } - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint - # Now decide exactly where to put the breakpoint - #------------------------------------------------------- - - # it's always ok to break at imax if no other break was found - if ( $i_lowest < 0 ) { $i_lowest = $imax } - - # semi-final index calculation - my $i_next_nonblank = $inext_to_go[$i_lowest]; - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - #------------------------------------------------------- - # ?/: rule 1 : if a break here will separate a '?' on this - # line from its closing ':', then break at the '?' instead. - #------------------------------------------------------- - foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { - next unless ( $tokens_to_go[$i] eq '?' ); - - # do not break if probable sequence of ?/: statements - next if ($is_colon_chain); - - # do not break if statement is broken by side comment - next - if ( $tokens_to_go[$max_index_to_go] eq '#' - && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ ); - - # no break needed if matching : is also on the line - next - if ( $mate_index_to_go[$i] >= 0 - && $mate_index_to_go[$i] <= $i_next_nonblank ); - - $i_lowest = $i; - if ( $want_break_before{'?'} ) { $i_lowest-- } - last; - } - - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint: - # Break the line after the token with index i=$i_lowest - #------------------------------------------------------- - - # final index calculation - $i_next_nonblank = $inext_to_go[$i_lowest]; - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - - DEBUG_BREAK_LINES - && print STDOUT -"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n"; - $Msg = EMPTY_STRING; - - #------------------------------------------------------- - # ?/: rule 2 : if we break at a '?', then break at its ':' - # - # Note: this rule is also in sub break_lists to handle a break - # at the start and end of a line (in case breaks are dictated - # by side comments). - #------------------------------------------------------- - if ( $next_nonblank_type eq '?' ) { - $self->set_closing_breakpoint($i_next_nonblank); - } - elsif ( $types_to_go[$i_lowest] eq '?' ) { - $self->set_closing_breakpoint($i_lowest); - } - - #------------------------------------------------------- - # ?/: rule 3 : if we break at a ':' then we save - # its location for further work below. We may need to go - # back and break at its '?'. - #------------------------------------------------------- - if ( $next_nonblank_type eq ':' ) { - push @i_colon_breaks, $i_next_nonblank; - } - elsif ( $types_to_go[$i_lowest] eq ':' ) { - push @i_colon_breaks, $i_lowest; - } - - # here we should set breaks for all '?'/':' pairs which are - # separated by this line - - $line_count++; - - # save this line segment, after trimming blanks at the ends - push( @i_first, - ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); - push( @i_last, - ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); + DEBUG_BREAK_LINES && do { + my $ltok = $token; + my $rtok = + $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING; + my $i_testp2 = $i_test + 2; + if ( $i_testp2 > $max_index_to_go + 1 ) { + $i_testp2 = $max_index_to_go + 1; + } + if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } + if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } + print STDOUT +"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; + }; - # set a forced breakpoint at a container opening, if necessary, to - # signal a break at a closing container. Excepting '(' for now. - if ( - ( - $tokens_to_go[$i_lowest] eq '{' - || $tokens_to_go[$i_lowest] eq '[' - ) - && !$forced_breakpoint_to_go[$i_lowest] - ) + # allow one extra terminal token after exceeding line length + # if it would strand this token. + if ( $rOpts_fuzzy_line_length + && $too_long + && $i_lowest == $i_test + && $token_lengths_to_go[$i_test] > 1 + && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) ) { - $self->set_closing_breakpoint($i_lowest); + $too_long = 0; + DEBUG_BREAK_LINES && do { + $Msg .= " :do_not_strand next='$next_nonblank_type'"; + }; } - # get ready to go again - $i_begin = $i_lowest + 1; - $last_break_strength = $lowest_strength; - $i_last_break = $i_lowest; - $leading_alignment_token = EMPTY_STRING; - $leading_alignment_type = EMPTY_STRING; - $lowest_next_token = EMPTY_STRING; - $lowest_next_type = 'b'; + # Stop if line will be too long and we have a solution + if ( - if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { - $i_begin++; - } + # ... no more space and we have a break + $too_long && $i_lowest >= 0 - # update indentation size - if ( $i_begin <= $imax ) { - $leading_spaces = leading_spaces_to_go($i_begin); - DEBUG_BREAK_LINES - && print STDOUT - "updating leading spaces to be $leading_spaces at i=$i_begin\n"; + # ... or no more tokens + || $i_test == $imax + ) + { + DEBUG_BREAK_LINES && do { + $Msg .= +" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax"; + }; + last; } } - #------------------------------------------------------- - # END of main loop to set continuation breakpoints - # Now go back and make any necessary corrections - #------------------------------------------------------- + #----------------------------------------------- + # End loop over the indexes in the _to_go arrays + #----------------------------------------------- - #------------------------------------------------------- - # ?/: rule 4 -- if we broke at a ':', then break at - # corresponding '?' unless this is a chain of ?: expressions - #------------------------------------------------------- - if (@i_colon_breaks) { + # Be sure we return an index in the range ($ibegin .. $imax). + # We will break at imax if no other break was found. + if ( $i_lowest < 0 ) { $i_lowest = $imax } - # using a simple method for deciding if we are in a ?/: chain -- - # this is a chain if it has multiple ?/: pairs all in order; - # otherwise not. - # Note that if line starts in a ':' we count that above as a break - my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); + return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ); +} ## end sub break_lines_inner_loop - unless ($is_chain) { - my @insert_list = (); - foreach (@i_colon_breaks) { - my $i_question = $mate_index_to_go[$_]; - if ( $i_question >= 0 ) { - if ( $want_break_before{'?'} ) { - $i_question = $iprev_to_go[$i_question]; - } +sub do_colon_breaks { + my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_; - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } - } - $self->insert_additional_breaks( \@insert_list, \@i_first, - \@i_last ); + # using a simple method for deciding if we are in a ?/: chain -- + # this is a chain if it has multiple ?/: pairs all in order; + # otherwise not. + # Note that if line starts in a ':' we count that above as a break + + my @insert_list = (); + foreach ( @{$ri_colon_breaks} ) { + my $i_question = $mate_index_to_go[$_]; + if ( $i_question >= 0 ) { + if ( $want_break_before{'?'} ) { + $i_question = $iprev_to_go[$i_question]; + } + + if ( $i_question >= 0 ) { + push @insert_list, $i_question; } } + $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last ); } - return ( \@i_first, \@i_last, $rbond_strength_to_go ); -} ## end sub break_long_lines + return; +} ########################################### # CODE SECTION 11: Code to break long lists @@ -18242,29 +19652,54 @@ sub break_long_lines { use constant DEBUG_BREAK_LISTS => 0; my ( - $block_type, $current_depth, - $depth, $i, - $i_last_nonblank_token, $last_nonblank_token, - $last_nonblank_type, $last_nonblank_block_type, - $last_old_breakpoint_count, $minimum_depth, - $next_nonblank_block_type, $next_nonblank_token, - $next_nonblank_type, $old_breakpoint_count, - $starting_breakpoint_count, $starting_depth, - $token, $type, + + $block_type, + $current_depth, + $depth, + $i, + $i_last_colon, + $i_line_end, + $i_line_start, + $i_last_nonblank_token, + $last_nonblank_block_type, + $last_nonblank_token, + $last_nonblank_type, + $last_old_breakpoint_count, + $minimum_depth, + $next_nonblank_block_type, + $next_nonblank_token, + $next_nonblank_type, + $old_breakpoint_count, + $starting_breakpoint_count, + $starting_depth, + $token, + $type, $type_sequence, + ); my ( - @breakpoint_stack, @breakpoint_undo_stack, - @comma_index, @container_type, - @identifier_count_stack, @index_before_arrow, - @interrupted_list, @item_count_stack, - @last_comma_index, @last_dot_index, - @last_nonblank_type, @old_breakpoint_count_stack, - @opening_structure_index_stack, @rfor_semicolon_list, - @has_old_logical_breakpoints, @rand_or_list, - @i_equals, @override_cab3, + + @breakpoint_stack, + @breakpoint_undo_stack, + @comma_index, + @container_type, + @identifier_count_stack, + @index_before_arrow, + @interrupted_list, + @item_count_stack, + @last_comma_index, + @last_dot_index, + @last_nonblank_type, + @old_breakpoint_count_stack, + @opening_structure_index_stack, + @rfor_semicolon_list, + @has_old_logical_breakpoints, + @rand_or_list, + @i_equals, + @override_cab3, @type_sequence_stack, + ); # these arrays must retain values between calls @@ -18272,7 +19707,6 @@ sub break_long_lines { my $length_tol; my $lp_tol_boost; - my $list_stress_level; sub initialize_break_lists { @dont_align = (); @@ -18339,7 +19773,8 @@ sub break_long_lines { # Define a level where list formatting becomes highly stressed and # needs to be simplified. Introduced for case b1262. - $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 ); + # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2); + # This is now '$high_stress_level'. return; } ## end sub initialize_break_lists @@ -18400,16 +19835,17 @@ sub break_long_lines { my $bp_count = 0; my $do_not_break_apart = 0; - # Do not break a list unless there are some non-line-ending commas. - # This avoids getting different results with only non-essential commas, - # and fixes b1192. - my $seqno = $type_sequence_stack[$dd]; - my $real_comma_count = - $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1; - # anything to do? if ( $item_count_stack[$dd] ) { + # Do not break a list unless there are some non-line-ending commas. + # This avoids getting different results with only non-essential + # commas, and fixes b1192. + my $seqno = $type_sequence_stack[$dd]; + + my $real_comma_count = + $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1; + # handle commas not in containers... if ( $dont_align[$dd] ) { $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias ); @@ -18424,7 +19860,7 @@ sub break_long_lines { # look like a function call) my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; - $self->set_comma_breakpoints_do( + $self->set_comma_breakpoints_final( { depth => $dd, i_opening_paren => $opening_structure_index_stack[$dd], @@ -18609,6 +20045,7 @@ EOM %quick_filter = %is_assignment; @q = qw# => . ; < > ~ #; push @q, ','; + push @q, 'f'; # added for ';' for issue c154 @quick_filter{@q} = (1) x scalar(@q); } @@ -18663,13 +20100,15 @@ EOM my ( $self, $is_long_line, $rbond_strength_bias ) = @_; - #---------------------------------------------------------------------- - # This routine is called once per batch, if the batch is a list, to set - # line breaks so that hierarchical structure can be displayed and so - # that list items can be vertically aligned. The output of this + #-------------------------------------------------------------------- + # This routine is called once per batch, if the batch is a list, to + # set line breaks so that hierarchical structure can be displayed and + # so that list items can be vertically aligned. The output of this # routine is stored in the array @forced_breakpoint_to_go, which is - # used by sub 'break_long_lines' to set final breakpoints. - #---------------------------------------------------------------------- + # used by sub 'break_long_lines' to set final breakpoints. This is + # probably the most complex routine in perltidy, so I have + # broken it into pieces and over-commented it. + #-------------------------------------------------------------------- my $rLL = $self->[_rLL_]; my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; @@ -18682,6 +20121,9 @@ EOM $block_type = SPACE; $current_depth = $starting_depth; $i = -1; + $i_last_colon = -1; + $i_line_end = -1; + $i_line_start = -1; $last_nonblank_token = ';'; $last_nonblank_type = ';'; $last_nonblank_block_type = SPACE; @@ -18699,14 +20141,12 @@ EOM my $comma_follows_last_closing_token; $self->check_for_new_minimum_depth( $current_depth, - $parent_seqno_to_go[0] ); + $parent_seqno_to_go[0] ) + if ( $current_depth < $minimum_depth ); my $want_previous_breakpoint = -1; my $saw_good_breakpoint; - my $i_line_end = -1; - my $i_line_start = -1; - my $i_last_colon = -1; #---------------------------------------- # Main loop over all tokens in this batch @@ -18717,18 +20157,21 @@ EOM $last_nonblank_type = $type; $last_nonblank_token = $token; $last_nonblank_block_type = $block_type; - } ## end if ( $type ne 'b' ) + } $type = $types_to_go[$i]; $block_type = $block_type_to_go[$i]; $token = $tokens_to_go[$i]; $type_sequence = $type_sequence_to_go[$i]; - my $next_type = $types_to_go[ $i + 1 ]; - my $next_token = $tokens_to_go[ $i + 1 ]; - my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); + + my $i_next_nonblank = $inext_to_go[$i]; $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; + #------------------------------------------- + # Loop Section A: Look for special breakpoints... + #------------------------------------------- + # set break if flag was set if ( $want_previous_breakpoint >= 0 ) { $self->set_forced_breakpoint($want_previous_breakpoint); @@ -18737,63 +20180,26 @@ EOM $last_old_breakpoint_count = $old_breakpoint_count; - # Fixed for case b1097 to not consider old breaks at highly - # stressed locations, such as types 'L' and 'R'. It might be - # useful to generalize this concept in the future by looking at - # actual bond strengths. - if ( $old_breakpoint_to_go[$i] - && $type ne 'L' - && $next_nonblank_type ne 'R' ) - { - $i_line_end = $i; - $i_line_start = $i_next_nonblank; - - $old_breakpoint_count++; - - # Break before certain keywords if user broke there and - # this is a 'safe' break point. The idea is to retain - # any preferred breaks for sequential list operations, - # like a schwartzian transform. - if ($rOpts_break_at_old_keyword_breakpoints) { - if ( - $next_nonblank_type eq 'k' - && $is_keyword_returning_list{$next_nonblank_token} - && ( $type =~ /^[=\)\]\}Riw]$/ - || $type eq 'k' - && $is_keyword_returning_list{$token} ) - ) - { - - # we actually have to set this break next time through - # the loop because if we are at a closing token (such - # as '}') which forms a one-line block, this break might - # get undone. - - # And do not do this at an equals if the user wants - # breaks before an equals (blinker cases b434 b903) - unless ( $type eq '=' && $want_break_before{$type} ) { - $want_previous_breakpoint = $i; - } - } ## end if ( $next_nonblank_type...) - } ## end if ($rOpts_break_at_old_keyword_breakpoints) + # Check for a good old breakpoint .. + if ( + $old_breakpoint_to_go[$i] - # Break before attributes if user broke there - if ($rOpts_break_at_old_attribute_breakpoints) { - if ( $next_nonblank_type eq 'A' ) { - $want_previous_breakpoint = $i; - } - } + # Note: ignore old breaks at types 'L' and 'R' to fix case + # b1097. These breaks only occur under high stress. + && $type ne 'L' + && $next_nonblank_type ne 'R' - # remember an = break as possible good break point - if ( $is_assignment{$type} ) { - $i_old_assignment_break = $i; - } - elsif ( $is_assignment{$next_nonblank_type} ) { - $i_old_assignment_break = $i_next_nonblank; - } - } ## end if ( $old_breakpoint_to_go...) + # ... and ignore other high stress level breaks, fixes b1395 + && $levels_to_go[$i] < $high_stress_level + ) + { + ( $want_previous_breakpoint, $i_old_assignment_break ) = + $self->check_old_breakpoints( $i_next_nonblank, + $want_previous_breakpoint, $i_old_assignment_break ); + } next if ( $type eq 'b' ); + $depth = $nesting_depth_to_go[ $i + 1 ]; $total_depth_variation += abs( $depth - $depth_last ); @@ -18843,942 +20249,1067 @@ EOM ) { $self->set_forced_breakpoint( $i - 1 ); - } ## end if ( $type eq 'k' && $i...) + } + + # remember locations of '||' and '&&' for possible breaks if we + # decide this is a long logical expression. + if ( $type eq '||' ) { + push @{ $rand_or_list[$depth][2] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } + elsif ( $type eq '&&' ) { + push @{ $rand_or_list[$depth][3] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } + elsif ( $type eq 'f' ) { + push @{ $rfor_semicolon_list[$depth] }, $i; + } + elsif ( $type eq 'k' ) { + if ( $token eq 'and' ) { + push @{ $rand_or_list[$depth][1] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } + + # break immediately at 'or's which are probably not in a logical + # block -- but we will break in logical breaks below so that + # they do not add to the forced_breakpoint_count + elsif ( $token eq 'or' ) { + push @{ $rand_or_list[$depth][0] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + if ( $is_logical_container{ $container_type[$depth] } ) { + } + else { + if ($is_long_line) { $self->set_forced_breakpoint($i) } + elsif ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + $saw_good_breakpoint = 1; + } + } + } + elsif ( $token eq 'if' || $token eq 'unless' ) { + push @{ $rand_or_list[$depth][4] }, $i; + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + $self->set_forced_breakpoint($i); + } + } + } + elsif ( $is_assignment{$type} ) { + $i_equals[$depth] = $i; + } + + #----------------------------------------- + # Loop Section B: Handle a sequenced token + #----------------------------------------- + if ($type_sequence) { + $self->break_lists_type_sequence; + } + + #------------------------------------------ + # Loop Section C: Handle Increasing Depth.. + #------------------------------------------ + + # hardened against bad input syntax: depth jump must be 1 and type + # must be opening..fixes c102 + if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) { + $self->break_lists_increasing_depth(); + } + + #------------------------------------------ + # Loop Section D: Handle Decreasing Depth.. + #------------------------------------------ + + # hardened against bad input syntax: depth jump must be 1 and type + # must be closing .. fixes c102 + elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) { + + $self->break_lists_decreasing_depth(); + + $comma_follows_last_closing_token = + $next_nonblank_type eq ',' || $next_nonblank_type eq '=>'; + + } + + #---------------------------------- + # Loop Section E: Handle this token + #---------------------------------- + + $current_depth = $depth; + + # most token types can skip the rest of this loop + next unless ( $quick_filter{$type} ); + + # handle comma-arrow + if ( $type eq '=>' ) { + next if ( $last_nonblank_type eq '=>' ); + next if $rOpts_break_at_old_comma_breakpoints; + next + if ( $rOpts_comma_arrow_breakpoints == 3 + && !$override_cab3[$depth] ); + $want_comma_break[$depth] = 1; + $index_before_arrow[$depth] = $i_last_nonblank_token; + next; + } + + elsif ( $type eq '.' ) { + $last_dot_index[$depth] = $i; + } + + # Turn off comma alignment if we are sure that this is not a list + # environment. To be safe, we will do this if we see certain + # non-list tokens, such as ';', '=', and also the environment is + # not a list. + ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type} + elsif ( $is_non_list_type{$type} + && !$self->is_in_list_by_i($i) ) + { + $dont_align[$depth] = 1; + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; + + # no special comma breaks in C-style 'for' terms (c154) + if ( $type eq 'f' ) { $last_comma_index[$depth] = undef } + } + + # now just handle any commas + next if ( $type ne ',' ); + $self->study_comma($comma_follows_last_closing_token); + + } ## end while ( ++$i <= $max_index_to_go) + + #------------------------------------------- + # END of loop over all tokens in this batch + # Now set breaks for any unfinished lists .. + #------------------------------------------- + + foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) { + + $interrupted_list[$dd] = 1; + $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); + $self->set_comma_breakpoints( $dd, $rbond_strength_bias ) + if ( $item_count_stack[$dd] ); + $self->set_logical_breakpoints($dd) + if ( $has_old_logical_breakpoints[$dd] ); + $self->set_for_semicolon_breakpoints($dd); + + # break open container... + my $i_opening = $opening_structure_index_stack[$dd]; + if ( defined($i_opening) && $i_opening >= 0 ) { + $self->set_forced_breakpoint($i_opening) + unless ( + is_unbreakable_container($dd) + + # Avoid a break which would place an isolated ' or " + # on a line + || ( $type eq 'Q' + && $i_opening >= $max_index_to_go - 2 + && ( $token eq "'" || $token eq '"' ) ) + ); + } + } ## end for ( my $dd = $current_depth...) + + #---------------------------------------- + # Return the flag '$saw_good_breakpoint'. + #---------------------------------------- + # This indicates if the input file had some good breakpoints. This + # flag will be used to force a break in a line shorter than the + # allowed line length. + if ( $has_old_logical_breakpoints[$current_depth] ) { + $saw_good_breakpoint = 1; + } + + # A complex line with one break at an = has a good breakpoint. + # This is not complex ($total_depth_variation=0): + # $res1 + # = 10; + # + # This is complex ($total_depth_variation=6): + # $res2 = + # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); + + # The check ($i_old_.. < $max_index_to_go) was added to fix b1333 + elsif ($i_old_assignment_break + && $total_depth_variation > 4 + && $old_breakpoint_count == 1 + && $i_old_assignment_break < $max_index_to_go ) + { + $saw_good_breakpoint = 1; + } + + return $saw_good_breakpoint; + } ## end sub break_lists - # remember locations of '||' and '&&' for possible breaks if we - # decide this is a long logical expression. - if ( $type eq '||' ) { - push @{ $rand_or_list[$depth][2] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end elsif ( $type eq '||' ) - elsif ( $type eq '&&' ) { - push @{ $rand_or_list[$depth][3] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end elsif ( $type eq '&&' ) - elsif ( $type eq 'f' ) { - push @{ $rfor_semicolon_list[$depth] }, $i; + sub study_comma { + + # study and store info for a list comma + + my ( $self, $comma_follows_last_closing_token ) = @_; + + $last_dot_index[$depth] = undef; + $last_comma_index[$depth] = $i; + + # break here if this comma follows a '=>' + # but not if there is a side comment after the comma + if ( $want_comma_break[$depth] ) { + + if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { + if ($rOpts_comma_arrow_breakpoints) { + $want_comma_break[$depth] = 0; + return; + } } - elsif ( $type eq 'k' ) { - if ( $token eq 'and' ) { - push @{ $rand_or_list[$depth][1] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end if ( $token eq 'and' ) - # break immediately at 'or's which are probably not in a logical - # block -- but we will break in logical breaks below so that - # they do not add to the forced_breakpoint_count - elsif ( $token eq 'or' ) { - push @{ $rand_or_list[$depth][0] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - if ( $is_logical_container{ $container_type[$depth] } ) { - } - else { - if ($is_long_line) { $self->set_forced_breakpoint($i) } - elsif ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) - { - $saw_good_breakpoint = 1; - } - } ## end else [ if ( $is_logical_container...)] - } ## end elsif ( $token eq 'or' ) - elsif ( $token eq 'if' || $token eq 'unless' ) { - push @{ $rand_or_list[$depth][4] }, $i; - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) + $self->set_forced_breakpoint($i) + unless ( $next_nonblank_type eq '#' ); + + # break before the previous token if it looks safe + # Example of something that we will not try to break before: + # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, + # Also we don't want to break at a binary operator (like +): + # $c->createOval( + # $x + $R, $y + + # $R => $x - $R, + # $y - $R, -fill => 'black', + # ); + my $ibreak = $index_before_arrow[$depth] - 1; + if ( $ibreak > 0 + && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) + { + if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } + if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } + if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { + + # don't break before a comma, as in the following: + # ( LONGER_THAN,=> 1, + # EIGHTY_CHARACTERS,=> 2, + # CAUSES_FORMATTING,=> 3, + # LIKE_THIS,=> 4, + # ); + # This example is for -tso but should be general rule + if ( $tokens_to_go[ $ibreak + 1 ] ne '->' + && $tokens_to_go[ $ibreak + 1 ] ne ',' ) { - $self->set_forced_breakpoint($i); + $self->set_forced_breakpoint($ibreak); } - } ## end elsif ( $token eq 'if' ||...) - } ## end elsif ( $type eq 'k' ) - elsif ( $is_assignment{$type} ) { - $i_equals[$depth] = $i; + } } - if ($type_sequence) { + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; - # handle any postponed closing breakpoints - if ( $is_closing_sequence_token{$token} ) { - if ( $type eq ':' ) { - $i_last_colon = $i; + # handle list which mixes '=>'s and ','s: + # treat any list items so far as an interrupted list + $interrupted_list[$depth] = 1; + return; + } - # retain break at a ':' line break - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_ternary_breakpoints - && $levels_to_go[$i] < $list_stress_level ) - { + # Break after all commas above starting depth... + # But only if the last closing token was followed by a comma, + # to avoid breaking a list operator (issue c119) + if ( $depth < $starting_depth + && $comma_follows_last_closing_token + && !$dont_align[$depth] ) + { + $self->set_forced_breakpoint($i) + unless ( $next_nonblank_type eq '#' ); + return; + } - $self->set_forced_breakpoint($i); + # add this comma to the list.. + my $item_count = $item_count_stack[$depth]; + if ( $item_count == 0 ) { - # Break at a previous '=', but only if it is before - # the mating '?'. Mate_index test fixes b1287. - my $ieq = $i_equals[$depth]; - if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) { - $self->set_forced_breakpoint( - $i_equals[$depth] ); - $i_equals[$depth] = -1; - } - } ## end if ( ( $i == $i_line_start...)) - } ## end if ( $type eq ':' ) - if ( has_postponed_breakpoint($type_sequence) ) { - my $inc = ( $type eq ':' ) ? 0 : 1; - if ( $i >= $inc ) { - $self->set_forced_breakpoint( $i - $inc ); - } - } - } ## end if ( $is_closing_sequence_token{$token} ) + # but do not form a list with no opening structure + # for example: - # set breaks at ?/: if they will get separated (and are - # not a ?/: chain), or if the '?' is at the end of the - # line - elsif ( $token eq '?' ) { - my $i_colon = $mate_index_to_go[$i]; - if ( - $i_colon <= 0 # the ':' is not in this batch - || $i == 0 # this '?' is the first token of the line - || $i == - $max_index_to_go # or this '?' is the last token - ) - { + # open INFILE_COPY, ">$input_file_copy" + # or die ("very long message"); + if ( ( $opening_structure_index_stack[$depth] < 0 ) + && $self->is_in_block_by_i($i) ) + { + $dont_align[$depth] = 1; + } + } - # don't break if # this has a side comment, and - # don't break at a '?' if preceded by ':' on - # this line of previous ?/: pair on this line. - # This is an attempt to preserve a chain of ?/: - # expressions (elsif2.t). - if ( - ( - $i_last_colon < 0 - || $parent_seqno_to_go[$i_last_colon] != - $parent_seqno_to_go[$i] - ) - && $tokens_to_go[$max_index_to_go] ne '#' - ) - { - $self->set_forced_breakpoint($i); - } - $self->set_closing_breakpoint($i); - } ## end if ( $i_colon <= 0 ||...) - } ## end elsif ( $token eq '?' ) - - elsif ( $is_opening_token{$token} ) { - - # do requested -lp breaks at the OPENING token for BROKEN - # blocks. NOTE: this can be done for both -lp and -xlp, - # but only -xlp can really take advantage of this. So this - # is currently restricted to -xlp to avoid excess changes to - # existing -lp formatting. - if ( $rOpts_extended_line_up_parentheses - && $mate_index_to_go[$i] < 0 ) - { - my $lp_object = - $self->[_rlp_object_by_seqno_]->{$type_sequence}; - if ($lp_object) { - my $K_begin_line = $lp_object->get_K_begin_line(); - my $i_begin_line = $K_begin_line - $K_to_go[0]; - $self->set_forced_lp_break( $i_begin_line, $i ); - } - } - } + $comma_index[$depth][$item_count] = $i; + ++$item_count_stack[$depth]; + if ( $last_nonblank_type =~ /^[iR\]]$/ ) { + $identifier_count_stack[$depth]++; + } + return; + } ## end sub study_comma - } ## end if ($type_sequence) + sub check_old_breakpoints { -#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; + # Check for a good old breakpoint - #------------------------------------------------------------ - # Handle Increasing Depth.. - # - # prepare for a new list when depth increases - # token $i is a '(','{', or '[' - #------------------------------------------------------------ - # hardened against bad input syntax: depth jump must be 1 and type - # must be opening..fixes c102 - if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) { + my ( $self, $i_next_nonblank, $want_previous_breakpoint, + $i_old_assignment_break ) + = @_; - #---------------------------------------------------------- - # BEGIN initialize depth arrays - # ... use the same order as sub check_for_new_minimum_depth - #---------------------------------------------------------- - $type_sequence_stack[$depth] = $type_sequence; - $override_cab3[$depth] = - $rOpts_comma_arrow_breakpoints == 3 - && $type_sequence - && $self->[_roverride_cab3_]->{$type_sequence}; - - $breakpoint_stack[$depth] = $forced_breakpoint_count; - $container_type[$depth] = - - # k => && || ? : . - $is_container_label_type{$last_nonblank_type} - ? $last_nonblank_token - : EMPTY_STRING; - $identifier_count_stack[$depth] = 0; - $index_before_arrow[$depth] = -1; - $interrupted_list[$depth] = 0; - $item_count_stack[$depth] = 0; - $last_nonblank_type[$depth] = $last_nonblank_type; - $opening_structure_index_stack[$depth] = $i; - - $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; - $comma_index[$depth] = undef; - $last_comma_index[$depth] = undef; - $last_dot_index[$depth] = undef; - $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; - $has_old_logical_breakpoints[$depth] = 0; - $rand_or_list[$depth] = []; - $rfor_semicolon_list[$depth] = []; - $i_equals[$depth] = -1; - - # if line ends here then signal closing token to break - if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) - { - $self->set_closing_breakpoint($i); - } + $i_line_end = $i; + $i_line_start = $i_next_nonblank; + + $old_breakpoint_count++; + + # Break before certain keywords if user broke there and + # this is a 'safe' break point. The idea is to retain + # any preferred breaks for sequential list operations, + # like a schwartzian transform. + if ($rOpts_break_at_old_keyword_breakpoints) { + if ( + $next_nonblank_type eq 'k' + && $is_keyword_returning_list{$next_nonblank_token} + && ( $type =~ /^[=\)\]\}Riw]$/ + || $type eq 'k' && $is_keyword_returning_list{$token} ) + ) + { - # Not all lists of values should be vertically aligned.. - $dont_align[$depth] = + # we actually have to set this break next time through + # the loop because if we are at a closing token (such + # as '}') which forms a one-line block, this break might + # get undone. - # code BLOCKS are handled at a higher level - ( $block_type ne EMPTY_STRING ) + # But do not do this at an '=' if: + # - the user wants breaks before an equals (b434 b903) + # - or -naws is set (can be unstable, see b1354) + my $skip = $type eq '=' + && ( $want_break_before{$type} + || !$rOpts_add_whitespace ); - # certain paren lists - || ( $type eq '(' ) && ( + $want_previous_breakpoint = $i + unless ($skip); - # it does not usually look good to align a list of - # identifiers in a parameter list, as in: - # my($var1, $var2, ...) - # (This test should probably be refined, for now I'm just - # testing for any keyword) - ( $last_nonblank_type eq 'k' ) + } + } - # a trailing '(' usually indicates a non-list - || ( $next_nonblank_type eq '(' ) - ); - $has_broken_sublist[$depth] = 0; - $want_comma_break[$depth] = 0; + # Break before attributes if user broke there + if ($rOpts_break_at_old_attribute_breakpoints) { + if ( $next_nonblank_type eq 'A' ) { + $want_previous_breakpoint = $i; + } + } - #------------------------------------- - # END initialize depth arrays - #------------------------------------- + # remember an = break as possible good break point + if ( $is_assignment{$type} ) { + $i_old_assignment_break = $i; + } + elsif ( $is_assignment{$next_nonblank_type} ) { + $i_old_assignment_break = $i_next_nonblank; + } + return ( $want_previous_breakpoint, $i_old_assignment_break ); + } ## end sub check_old_breakpoints - # patch to outdent opening brace of long if/for/.. - # statements (like this one). See similar coding in - # set_continuation breaks. We have also catch it here for - # short line fragments which otherwise will not go through - # break_long_lines. - if ( - $block_type + sub break_lists_type_sequence { - # if we have the ')' but not its '(' in this batch.. - && ( $last_nonblank_token eq ')' ) - && $mate_index_to_go[$i_last_nonblank_token] < 0 + my ($self) = @_; - # and user wants brace to left - && !$rOpts_opening_brace_always_on_right + # handle any postponed closing breakpoints + if ( $is_closing_sequence_token{$token} ) { + if ( $type eq ':' ) { + $i_last_colon = $i; - && ( $type eq '{' ) # should be true - && ( $token eq '{' ) # should be true - ) + # retain break at a ':' line break + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_ternary_breakpoints + && $levels_to_go[$i] < $high_stress_level ) { - $self->set_forced_breakpoint( $i - 1 ); - } ## end if ( $block_type && ( ...)) - } ## end if ( $depth > $current_depth) - #------------------------------------------------------------ - # Handle Decreasing Depth.. - # - # finish off any old list when depth decreases - # token $i is a ')','}', or ']' - #------------------------------------------------------------ - # hardened against bad input syntax: depth jump must be 1 and type - # must be closing .. fixes c102 - elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) { + $self->set_forced_breakpoint($i); - $self->check_for_new_minimum_depth( $depth, - $parent_seqno_to_go[$i] ); + # Break at a previous '=', but only if it is before + # the mating '?'. Mate_index test fixes b1287. + my $ieq = $i_equals[$depth]; + if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) { + $self->set_forced_breakpoint( $i_equals[$depth] ); + $i_equals[$depth] = -1; + } + } + } + if ( has_postponed_breakpoint($type_sequence) ) { + my $inc = ( $type eq ':' ) ? 0 : 1; + if ( $i >= $inc ) { + $self->set_forced_breakpoint( $i - $inc ); + } + } + } - $comma_follows_last_closing_token = - $next_nonblank_type eq ',' || $next_nonblank_type eq '=>'; + # set breaks at ?/: if they will get separated (and are + # not a ?/: chain), or if the '?' is at the end of the + # line + elsif ( $token eq '?' ) { + my $i_colon = $mate_index_to_go[$i]; + if ( + $i_colon <= 0 # the ':' is not in this batch + || $i == 0 # this '?' is the first token of the line + || $i == $max_index_to_go # or this '?' is the last token + ) + { - # force all outer logical containers to break after we see on - # old breakpoint - $has_old_logical_breakpoints[$depth] ||= - $has_old_logical_breakpoints[$current_depth]; - - # Patch to break between ') {' if the paren list is broken. - # There is similar logic in break_long_lines for - # non-broken lists. - if ( $token eq ')' - && $next_nonblank_block_type - && $interrupted_list[$current_depth] - && $next_nonblank_type eq '{' - && !$rOpts_opening_brace_always_on_right ) + # don't break if # this has a side comment, and + # don't break at a '?' if preceded by ':' on + # this line of previous ?/: pair on this line. + # This is an attempt to preserve a chain of ?/: + # expressions (elsif2.t). + if ( + ( + $i_last_colon < 0 + || $parent_seqno_to_go[$i_last_colon] != + $parent_seqno_to_go[$i] + ) + && $tokens_to_go[$max_index_to_go] ne '#' + ) { $self->set_forced_breakpoint($i); - } ## end if ( $token eq ')' && ... + } + $self->set_closing_breakpoint($i); + } + } -#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; + elsif ( $is_opening_token{$token} ) { - # set breaks at commas if necessary - my ( $bp_count, $do_not_break_apart ) = - $self->set_comma_breakpoints( $current_depth, - $rbond_strength_bias ); + # do requested -lp breaks at the OPENING token for BROKEN + # blocks. NOTE: this can be done for both -lp and -xlp, + # but only -xlp can really take advantage of this. So this + # is currently restricted to -xlp to avoid excess changes to + # existing -lp formatting. + if ( $rOpts_extended_line_up_parentheses + && $mate_index_to_go[$i] < 0 ) + { + my $lp_object = + $self->[_rlp_object_by_seqno_]->{$type_sequence}; + if ($lp_object) { + my $K_begin_line = $lp_object->get_K_begin_line(); + my $i_begin_line = $K_begin_line - $K_to_go[0]; + $self->set_forced_lp_break( $i_begin_line, $i ); + } + } + } + return; + } ## end sub break_lists_type_sequence - my $i_opening = $opening_structure_index_stack[$current_depth]; - my $saw_opening_structure = ( $i_opening >= 0 ); - my $lp_object; - if ( $rOpts_line_up_parentheses && $saw_opening_structure ) { - $lp_object = $self->[_rlp_object_by_seqno_] - ->{ $type_sequence_to_go[$i_opening] }; - } - - # this term is long if we had to break at interior commas.. - my $is_long_term = $bp_count > 0; - - # If this is a short container with one or more comma arrows, - # then we will mark it as a long term to open it if requested. - # $rOpts_comma_arrow_breakpoints = - # 0 - open only if comma precedes closing brace - # 1 - stable: except for one line blocks - # 2 - try to form 1 line blocks - # 3 - ignore => - # 4 - always open up if vt=0 - # 5 - stable: even for one line blocks if vt=0 - - # PATCH: Modify the -cab flag if we are not processing a list: - # We only want the -cab flag to apply to list containers, so - # for non-lists we use the default and stable -cab=5 value. - # Fixes case b939a. - my $cab_flag = $rOpts_comma_arrow_breakpoints; - if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) { - $cab_flag = 5; - } - - # Ignore old breakpoints when under stress. - # Fixes b1203 b1204 as well as b1197-b1200. - # But not if -lp: fixes b1264, b1265. NOTE: rechecked with - # b1264 to see if this check is still required at all, and - # these still require a check, but at higher level beta+3 - # instead of beta: b1193 b780 - if ( $saw_opening_structure - && !$lp_object - && $levels_to_go[$i_opening] >= $list_stress_level ) - { - $cab_flag = 2; + sub break_lists_increasing_depth { - # Do not break hash braces under stress (fixes b1238) - $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L'; + my ($self) = @_; - # This option fixes b1235, b1237, b1240 with old and new - # -lp, but formatting is nicer with next option. - ## $is_long_term ||= - ## $levels_to_go[$i_opening] > $stress_level_beta + 1; + #-------------------------------------------- + # prepare for a new list when depth increases + # token $i is a '(','{', or '[' + #-------------------------------------------- - # This option fixes b1240 but not b1235, b1237 with new -lp, - # but this gives better formatting than the previous option. - $do_not_break_apart ||= - $levels_to_go[$i_opening] > $stress_level_beta; - } + #---------------------------------------------------------- + # BEGIN initialize depth arrays + # ... use the same order as sub check_for_new_minimum_depth + #---------------------------------------------------------- + $type_sequence_stack[$depth] = $type_sequence; + $override_cab3[$depth] = + $rOpts_comma_arrow_breakpoints == 3 + && $type_sequence + && $self->[_roverride_cab3_]->{$type_sequence}; + + $breakpoint_stack[$depth] = $forced_breakpoint_count; + $container_type[$depth] = + + # k => && || ? : . + $is_container_label_type{$last_nonblank_type} + ? $last_nonblank_token + : EMPTY_STRING; + $identifier_count_stack[$depth] = 0; + $index_before_arrow[$depth] = -1; + $interrupted_list[$depth] = 0; + $item_count_stack[$depth] = 0; + $last_nonblank_type[$depth] = $last_nonblank_type; + $opening_structure_index_stack[$depth] = $i; + + $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; + $comma_index[$depth] = undef; + $last_comma_index[$depth] = undef; + $last_dot_index[$depth] = undef; + $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; + $has_old_logical_breakpoints[$depth] = 0; + $rand_or_list[$depth] = []; + $rfor_semicolon_list[$depth] = []; + $i_equals[$depth] = -1; + + # if line ends here then signal closing token to break + if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) { + $self->set_closing_breakpoint($i); + } + + # Not all lists of values should be vertically aligned.. + $dont_align[$depth] = + + # code BLOCKS are handled at a higher level + ( $block_type ne EMPTY_STRING ) + + # certain paren lists + || ( $type eq '(' ) && ( + + # it does not usually look good to align a list of + # identifiers in a parameter list, as in: + # my($var1, $var2, ...) + # (This test should probably be refined, for now I'm just + # testing for any keyword) + ( $last_nonblank_type eq 'k' ) + + # a trailing '(' usually indicates a non-list + || ( $next_nonblank_type eq '(' ) + ); + $has_broken_sublist[$depth] = 0; + $want_comma_break[$depth] = 0; - if ( !$is_long_term - && $saw_opening_structure - && $is_opening_token{ $tokens_to_go[$i_opening] } - && $index_before_arrow[ $depth + 1 ] > 0 - && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } - ) - { - $is_long_term = - $cab_flag == 4 - || $cab_flag == 0 && $last_nonblank_token eq ',' - || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening]; - } ## end if ( !$is_long_term &&...) + #---------------------------- + # END initialize depth arrays + #---------------------------- - # mark term as long if the length between opening and closing - # parens exceeds allowed line length - if ( !$is_long_term && $saw_opening_structure ) { + # patch to outdent opening brace of long if/for/.. + # statements (like this one). See similar coding in + # set_continuation breaks. We have also catch it here for + # short line fragments which otherwise will not go through + # break_long_lines. + if ( + $block_type - my $i_opening_minus = - $self->find_token_starting_list($i_opening); + # if we have the ')' but not its '(' in this batch.. + && ( $last_nonblank_token eq ')' ) + && $mate_index_to_go[$i_last_nonblank_token] < 0 - my $excess = - $self->excess_line_length( $i_opening_minus, $i ); - - # Use standard spaces for indentation of lists in -lp mode - # if it gives a longer line length. This helps to avoid an - # instability due to forming and breaking one-line blocks. - # This fixes case b1314. - my $indentation = $leading_spaces_to_go[$i_opening_minus]; - if ( ref($indentation) - && $ris_broken_container->{$type_sequence} ) - { - my $lp_spaces = $indentation->get_spaces(); - my $std_spaces = $indentation->get_standard_spaces(); - my $diff = $std_spaces - $lp_spaces; - if ( $diff > 0 ) { $excess += $diff } - } + # and user wants brace to left + && !$rOpts_opening_brace_always_on_right - my $tol = $length_tol; + && ( $type eq '{' ) # should be true + && ( $token eq '{' ) # should be true + ) + { + $self->set_forced_breakpoint( $i - 1 ); + } - # boost tol for an -lp container - if ( - $lp_tol_boost - && $lp_object - && ( $rOpts_extended_continuation_indentation - || !$ris_list_by_seqno->{$type_sequence} ) - ) - { - $tol += $lp_tol_boost; - } + return; + } ## end sub break_lists_increasing_depth + + sub break_lists_decreasing_depth { + + my ( $self, $rbond_strength_bias ) = @_; + + # We have arrived at a closing container token in sub break_lists: + # the token at index $i is one of these: ')','}', ']' + # A number of important breakpoints for this container can now be set + # based on the information that we have collected. This includes: + # - breaks at commas to format tables + # - breaks at certain logical operators and other good breakpoints + # - breaks at opening and closing containers if needed by selected + # formatting styles + # These breaks are made by calling sub 'set_forced_breakpoint' + + $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] ) + if ( $depth < $minimum_depth ); + + # force all outer logical containers to break after we see on + # old breakpoint + $has_old_logical_breakpoints[$depth] ||= + $has_old_logical_breakpoints[$current_depth]; + + # Patch to break between ') {' if the paren list is broken. + # There is similar logic in break_long_lines for + # non-broken lists. + if ( $token eq ')' + && $next_nonblank_block_type + && $interrupted_list[$current_depth] + && $next_nonblank_type eq '{' + && !$rOpts_opening_brace_always_on_right ) + { + $self->set_forced_breakpoint($i); + } - # Patch to avoid blinking with -bbxi=2 and -cab=2 - # in which variations in -ci cause unstable formatting - # in edge cases. We just always add one ci level so that - # the formatting is independent of the -BBX results. - # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160 - # b1161 b1166 b1167 b1168 - if ( !$ci_levels_to_go[$i_opening] - && $rbreak_before_container_by_seqno->{$type_sequence} ) - { - $tol += $rOpts->{'continuation-indentation'}; - } +#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; - $is_long_term = $excess + $tol > 0; + #----------------------------------------------------------------- + # Set breaks at commas to display a table of values if appropriate + #----------------------------------------------------------------- + my ( $bp_count, $do_not_break_apart ) = ( 0, 0 ); + ( $bp_count, $do_not_break_apart ) = + $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias ) + if ( $item_count_stack[$current_depth] ); + + #----------------------------------------------------------- + # Now set flags needed to decide if we should break open the + # container ... This is a long rambling section which has + # grown over time to handle all situations. + #----------------------------------------------------------- + my $i_opening = $opening_structure_index_stack[$current_depth]; + my $saw_opening_structure = ( $i_opening >= 0 ); + my $lp_object; + if ( $rOpts_line_up_parentheses && $saw_opening_structure ) { + $lp_object = $self->[_rlp_object_by_seqno_] + ->{ $type_sequence_to_go[$i_opening] }; + } + + # this term is long if we had to break at interior commas.. + my $is_long_term = $bp_count > 0; + + # If this is a short container with one or more comma arrows, + # then we will mark it as a long term to open it if requested. + # $rOpts_comma_arrow_breakpoints = + # 0 - open only if comma precedes closing brace + # 1 - stable: except for one line blocks + # 2 - try to form 1 line blocks + # 3 - ignore => + # 4 - always open up if vt=0 + # 5 - stable: even for one line blocks if vt=0 + + # PATCH: Modify the -cab flag if we are not processing a list: + # We only want the -cab flag to apply to list containers, so + # for non-lists we use the default and stable -cab=5 value. + # Fixes case b939a. + my $cab_flag = $rOpts_comma_arrow_breakpoints; + if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} ) + { + $cab_flag = 5; + } + + # Ignore old breakpoints when under stress. + # Fixes b1203 b1204 as well as b1197-b1200. + # But not if -lp: fixes b1264, b1265. NOTE: rechecked with + # b1264 to see if this check is still required at all, and + # these still require a check, but at higher level beta+3 + # instead of beta: b1193 b780 + if ( $saw_opening_structure + && !$lp_object + && $levels_to_go[$i_opening] >= $high_stress_level ) + { + $cab_flag = 2; - } ## end if ( !$is_long_term &&...) + # Do not break hash braces under stress (fixes b1238) + $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L'; - # We've set breaks after all comma-arrows. Now we have to - # undo them if this can be a one-line block - # (the only breakpoints set will be due to comma-arrows) + # This option fixes b1235, b1237, b1240 with old and new + # -lp, but formatting is nicer with next option. + ## $is_long_term ||= + ## $levels_to_go[$i_opening] > $stress_level_beta + 1; - if ( + # This option fixes b1240 but not b1235, b1237 with new -lp, + # but this gives better formatting than the previous option. + # TODO: see if stress_level_alha should also be considered + $do_not_break_apart ||= + $levels_to_go[$i_opening] > $stress_level_beta; + } - # user doesn't require breaking after all comma-arrows - ( $cab_flag != 0 ) && ( $cab_flag != 4 ) + if ( !$is_long_term + && $saw_opening_structure + && $is_opening_token{ $tokens_to_go[$i_opening] } + && $index_before_arrow[ $depth + 1 ] > 0 + && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } ) + { + $is_long_term = + $cab_flag == 4 + || $cab_flag == 0 && $last_nonblank_token eq ',' + || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening]; + } - # and if the opening structure is in this batch - && $saw_opening_structure + # mark term as long if the length between opening and closing + # parens exceeds allowed line length + if ( !$is_long_term && $saw_opening_structure ) { - # and either on the same old line - && ( - $old_breakpoint_count_stack[$current_depth] == - $last_old_breakpoint_count + my $i_opening_minus = $self->find_token_starting_list($i_opening); - # or user wants to form long blocks with arrows - || $cab_flag == 2 + my $excess = $self->excess_line_length( $i_opening_minus, $i ); - # if -cab=3 is overridden then use -cab=2 behavior - || $cab_flag == 3 && $override_cab3[$current_depth] - ) + # Use standard spaces for indentation of lists in -lp mode + # if it gives a longer line length. This helps to avoid an + # instability due to forming and breaking one-line blocks. + # This fixes case b1314. + my $indentation = $leading_spaces_to_go[$i_opening_minus]; + if ( ref($indentation) + && $self->[_ris_broken_container_]->{$type_sequence} ) + { + my $lp_spaces = $indentation->get_spaces(); + my $std_spaces = $indentation->get_standard_spaces(); + my $diff = $std_spaces - $lp_spaces; + if ( $diff > 0 ) { $excess += $diff } + } - # and we made breakpoints between the opening and closing - && ( $breakpoint_undo_stack[$current_depth] < - $forced_breakpoint_undo_count ) + my $tol = $length_tol; - # and this block is short enough to fit on one line - # Note: use < because need 1 more space for possible comma - && !$is_long_term + # boost tol for an -lp container + if ( + $lp_tol_boost + && $lp_object + && ( $rOpts_extended_continuation_indentation + || !$self->[_ris_list_by_seqno_]->{$type_sequence} ) + ) + { + $tol += $lp_tol_boost; + } - ) - { - $self->undo_forced_breakpoint_stack( - $breakpoint_undo_stack[$current_depth] ); - } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) - - # now see if we have any comma breakpoints left - my $has_comma_breakpoints = - ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ); - - # update broken-sublist flag of the outer container - $has_broken_sublist[$depth] = - $has_broken_sublist[$depth] - || $has_broken_sublist[$current_depth] - || $is_long_term - || $has_comma_breakpoints; - -# Having come to the closing ')', '}', or ']', now we have to decide if we -# should 'open up' the structure by placing breaks at the opening and -# closing containers. This is a tricky decision. Here are some of the -# basic considerations: -# -# -If this is a BLOCK container, then any breakpoints will have already -# been set (and according to user preferences), so we need do nothing here. -# -# -If we have a comma-separated list for which we can align the list items, -# then we need to do so because otherwise the vertical aligner cannot -# currently do the alignment. -# -# -If this container does itself contain a container which has been broken -# open, then it should be broken open to properly show the structure. -# -# -If there is nothing to align, and no other reason to break apart, -# then do not do it. -# -# We will not break open the parens of a long but 'simple' logical expression. -# For example: -# -# This is an example of a simple logical expression and its formatting: -# -# if ( $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 ) -# -# Most people would prefer this than the 'spacey' version: -# -# if ( -# $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 -# ) -# -# To illustrate the rules for breaking logical expressions, consider: -# -# FULLY DENSE: -# if ( $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc )) -# -# This is on the verge of being difficult to read. The current default is to -# open it up like this: -# -# DEFAULT: -# if ( -# $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc ) -# ) -# -# This is a compromise which tries to avoid being too dense and to spacey. -# A more spaced version would be: -# -# SPACEY: -# if ( -# $opt_excl -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# ) -# -# Some people might prefer the spacey version -- an option could be added. The -# innermost expression contains a long block '( exists $ids_... ')'. -# -# Here is how the logic goes: We will force a break at the 'or' that the -# innermost expression contains, but we will not break apart its opening and -# closing containers because (1) it contains no multi-line sub-containers itself, -# and (2) there is no alignment to be gained by breaking it open like this -# -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# -# (although this looks perfectly ok and might be good for long expressions). The -# outer 'if' container, though, contains a broken sub-container, so it will be -# broken open to avoid too much density. Also, since it contains no 'or's, there -# will be a forced break at its 'and'. - - # Open-up if parens if requested. We do this by pretending we - # did not see the opening structure, since in that case parens - # always get opened up. - if ( $saw_opening_structure - && $rOpts_break_open_compact_parens ) - { + # Patch to avoid blinking with -bbxi=2 and -cab=2 + # in which variations in -ci cause unstable formatting + # in edge cases. We just always add one ci level so that + # the formatting is independent of the -BBX results. + # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160 + # b1161 b1166 b1167 b1168 + if ( !$ci_levels_to_go[$i_opening] + && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence} + ) + { + $tol += $rOpts_continuation_indentation; + } - # This parameter is a one-character flag, as follows: - # '0' matches no parens -> break open NOT OK - # '1' matches all parens -> break open OK - # Other values are same as used by the weld-exclusion-list - my $flag = $rOpts_break_open_compact_parens; - if ( $flag eq '*' - || $flag eq '1' ) - { - $saw_opening_structure = 0; - } - else { - my $KK = $K_to_go[$i_opening]; - $saw_opening_structure = - !$self->match_paren_flag( $KK, $flag ); - } - } + $is_long_term = $excess + $tol > 0; - # set some flags telling something about this container.. - my $is_simple_logical_expression = 0; - if ( $item_count_stack[$current_depth] == 0 - && $saw_opening_structure - && $tokens_to_go[$i_opening] eq '(' - && $is_logical_container{ $container_type[$current_depth] } - ) - { + } - # This seems to be a simple logical expression with - # no existing breakpoints. Set a flag to prevent - # opening it up. - if ( !$has_comma_breakpoints ) { - $is_simple_logical_expression = 1; - } + # We've set breaks after all comma-arrows. Now we have to + # undo them if this can be a one-line block + # (the only breakpoints set will be due to comma-arrows) - # This seems to be a simple logical expression with - # breakpoints (broken sublists, for example). Break - # at all 'or's and '||'s. - else { - $self->set_logical_breakpoints($current_depth); - } - } ## end if ( $item_count_stack...) + if ( - if ( $is_long_term - && @{ $rfor_semicolon_list[$current_depth] } ) - { - $self->set_for_semicolon_breakpoints($current_depth); + # user doesn't require breaking after all comma-arrows + ( $cab_flag != 0 ) && ( $cab_flag != 4 ) - # open up a long 'for' or 'foreach' container to allow - # leading term alignment unless -lp is used. - $has_comma_breakpoints = 1 unless ($lp_object); - } ## end if ( $is_long_term && ...) + # and if the opening structure is in this batch + && $saw_opening_structure - if ( + # and either on the same old line + && ( + $old_breakpoint_count_stack[$current_depth] == + $last_old_breakpoint_count - # breaks for code BLOCKS are handled at a higher level - !$block_type + # or user wants to form long blocks with arrows + || $cab_flag == 2 - # we do not need to break at the top level of an 'if' - # type expression - && !$is_simple_logical_expression + # if -cab=3 is overridden then use -cab=2 behavior + || $cab_flag == 3 && $override_cab3[$current_depth] + ) - ## modification to keep ': (' containers vertically tight; - ## but probably better to let user set -vt=1 to avoid - ## inconsistency with other paren types - ## && ($container_type[$current_depth] ne ':') + # and we made breakpoints between the opening and closing + && ( $breakpoint_undo_stack[$current_depth] < + $forced_breakpoint_undo_count ) - # otherwise, we require one of these reasons for breaking: - && ( + # and this block is short enough to fit on one line + # Note: use < because need 1 more space for possible comma + && !$is_long_term - # - this term has forced line breaks - $has_comma_breakpoints + ) + { + $self->undo_forced_breakpoint_stack( + $breakpoint_undo_stack[$current_depth] ); + } - # - the opening container is separated from this batch - # for some reason (comment, blank line, code block) - # - this is a non-paren container spanning multiple lines - || !$saw_opening_structure + # now see if we have any comma breakpoints left + my $has_comma_breakpoints = + ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count ); - # - this is a long block contained in another breakable - # container - || $is_long_term && !$self->is_in_block_by_i($i_opening) - ) - ) - { + # update broken-sublist flag of the outer container + $has_broken_sublist[$depth] = + $has_broken_sublist[$depth] + || $has_broken_sublist[$current_depth] + || $is_long_term + || $has_comma_breakpoints; - # do special -lp breaks at the CLOSING token for INTACT - # blocks (because we might not do them if the block does - # not break open) - if ($lp_object) { - my $K_begin_line = $lp_object->get_K_begin_line(); - my $i_begin_line = $K_begin_line - $K_to_go[0]; - $self->set_forced_lp_break( $i_begin_line, $i_opening ); - } + # Having come to the closing ')', '}', or ']', now we have to decide + # if we should 'open up' the structure by placing breaks at the + # opening and closing containers. This is a tricky decision. Here + # are some of the basic considerations: + # + # -If this is a BLOCK container, then any breakpoints will have + # already been set (and according to user preferences), so we need do + # nothing here. + # + # -If we have a comma-separated list for which we can align the list + # items, then we need to do so because otherwise the vertical aligner + # cannot currently do the alignment. + # + # -If this container does itself contain a container which has been + # broken open, then it should be broken open to properly show the + # structure. + # + # -If there is nothing to align, and no other reason to break apart, + # then do not do it. + # + # We will not break open the parens of a long but 'simple' logical + # expression. For example: + # + # This is an example of a simple logical expression and its formatting: + # + # if ( $bigwasteofspace1 && $bigwasteofspace2 + # || $bigwasteofspace3 && $bigwasteofspace4 ) + # + # Most people would prefer this than the 'spacey' version: + # + # if ( + # $bigwasteofspace1 && $bigwasteofspace2 + # || $bigwasteofspace3 && $bigwasteofspace4 + # ) + # + # To illustrate the rules for breaking logical expressions, consider: + # + # FULLY DENSE: + # if ( $opt_excl + # and ( exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc )) + # + # This is on the verge of being difficult to read. The current + # default is to open it up like this: + # + # DEFAULT: + # if ( + # $opt_excl + # and ( exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc ) + # ) + # + # This is a compromise which tries to avoid being too dense and to + # spacey. A more spaced version would be: + # + # SPACEY: + # if ( + # $opt_excl + # and ( + # exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc + # ) + # ) + # + # Some people might prefer the spacey version -- an option could be + # added. The innermost expression contains a long block '( exists + # $ids_... ')'. + # + # Here is how the logic goes: We will force a break at the 'or' that + # the innermost expression contains, but we will not break apart its + # opening and closing containers because (1) it contains no + # multi-line sub-containers itself, and (2) there is no alignment to + # be gained by breaking it open like this + # + # and ( + # exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc + # ) + # + # (although this looks perfectly ok and might be good for long + # expressions). The outer 'if' container, though, contains a broken + # sub-container, so it will be broken open to avoid too much density. + # Also, since it contains no 'or's, there will be a forced break at + # its 'and'. + + # Handle the experimental flag --break-open-compact-parens + # NOTE: This flag is not currently used and may eventually be removed. + # If this flag is set, we will implement it by + # pretending we did not see the opening structure, since in that case + # parens always get opened up. + if ( $saw_opening_structure + && $rOpts_break_open_compact_parens ) + { - # break after opening structure. - # note: break before closing structure will be automatic - if ( $minimum_depth <= $current_depth ) { + # This parameter is a one-character flag, as follows: + # '0' matches no parens -> break open NOT OK + # '1' matches all parens -> break open OK + # Other values are same as used by the weld-exclusion-list + my $flag = $rOpts_break_open_compact_parens; + if ( $flag eq '*' + || $flag eq '1' ) + { + $saw_opening_structure = 0; + } + else { - if ( $i_opening >= 0 ) { - $self->set_forced_breakpoint($i_opening) - unless ( $do_not_break_apart - || is_unbreakable_container($current_depth) ); - } + # NOTE: $seqno will be equal to closure var $type_sequence here + my $seqno = $type_sequence_to_go[$i_opening]; + $saw_opening_structure = + !$self->match_paren_control_flag( $seqno, $flag ); + } + } - # break at ',' of lower depth level before opening token - if ( $last_comma_index[$depth] ) { - $self->set_forced_breakpoint( - $last_comma_index[$depth] ); - } + # Set some more flags telling something about this container.. + my $is_simple_logical_expression; + if ( $item_count_stack[$current_depth] == 0 + && $saw_opening_structure + && $tokens_to_go[$i_opening] eq '(' + && $is_logical_container{ $container_type[$current_depth] } ) + { - # break at '.' of lower depth level before opening token - if ( $last_dot_index[$depth] ) { - $self->set_forced_breakpoint( - $last_dot_index[$depth] ); - } + # This seems to be a simple logical expression with + # no existing breakpoints. Set a flag to prevent + # opening it up. + if ( !$has_comma_breakpoints ) { + $is_simple_logical_expression = 1; + } - # break before opening structure if preceded by another - # closing structure and a comma. This is normally - # done by the previous closing brace, but not - # if it was a one-line block. - if ( $i_opening > 2 ) { - my $i_prev = - ( $types_to_go[ $i_opening - 1 ] eq 'b' ) - ? $i_opening - 2 - : $i_opening - 1; - - if ( - $types_to_go[$i_prev] eq ',' - && ( $types_to_go[ $i_prev - 1 ] eq ')' - || $types_to_go[ $i_prev - 1 ] eq '}' ) - ) - { - $self->set_forced_breakpoint($i_prev); - } + #--------------------------------------------------- + # This seems to be a simple logical expression with + # breakpoints (broken sublists, for example). Break + # at all 'or's and '||'s. + #--------------------------------------------------- + else { + $self->set_logical_breakpoints($current_depth); + } + } - # also break before something like ':(' or '?(' - # if appropriate. - elsif ( - $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) - { - my $token_prev = $tokens_to_go[$i_prev]; - if ( $want_break_before{$token_prev} ) { - $self->set_forced_breakpoint($i_prev); - } - } ## end elsif ( $types_to_go[$i_prev...]) - } ## end if ( $i_opening > 2 ) - } ## end if ( $minimum_depth <=...) - - # break after comma following closing structure - if ( $next_type eq ',' ) { - $self->set_forced_breakpoint( $i + 1 ); - } + # break long terms at any C-style for semicolons (c154) + if ( $is_long_term + && @{ $rfor_semicolon_list[$current_depth] } ) + { + $self->set_for_semicolon_breakpoints($current_depth); - # break before an '=' following closing structure - if ( - $is_assignment{$next_nonblank_type} - && ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ) - ) - { - $self->set_forced_breakpoint($i); - } ## end if ( $is_assignment{$next_nonblank_type...}) - - # break at any comma before the opening structure Added - # for -lp, but seems to be good in general. It isn't - # obvious how far back to look; the '5' below seems to - # work well and will catch the comma in something like - # push @list, myfunc( $param, $param, .. - - my $icomma = $last_comma_index[$depth]; - if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { - unless ( $forced_breakpoint_to_go[$icomma] ) { - $self->set_forced_breakpoint($icomma); - } - } - } ## end logic to open up a container + # and open up a long 'for' or 'foreach' container to allow + # leading term alignment unless -lp is used. + $has_comma_breakpoints = 1 unless ($lp_object); + } - # Break open a logical container open if it was already open - elsif ($is_simple_logical_expression - && $has_old_logical_breakpoints[$current_depth] ) - { - $self->set_logical_breakpoints($current_depth); - } + #---------------------------------------------------------------- + # FINALLY: Break open container according to the flags which have + # been set. + #---------------------------------------------------------------- + if ( - # Handle long container which does not get opened up - elsif ($is_long_term) { + # breaks for code BLOCKS are handled at a higher level + !$block_type - # must set fake breakpoint to alert outer containers that - # they are complex - set_fake_breakpoint(); - } ## end elsif ($is_long_term) + # we do not need to break at the top level of an 'if' + # type expression + && !$is_simple_logical_expression - } ## end elsif ( $depth < $current_depth) + ## modification to keep ': (' containers vertically tight; + ## but probably better to let user set -vt=1 to avoid + ## inconsistency with other paren types + ## && ($container_type[$current_depth] ne ':') - #------------------------------------------------------------ - # Handle this token - #------------------------------------------------------------ + # otherwise, we require one of these reasons for breaking: + && ( - $current_depth = $depth; + # - this term has forced line breaks + $has_comma_breakpoints - # most token types can skip the rest of this loop - next unless ( $quick_filter{$type} ); + # - the opening container is separated from this batch + # for some reason (comment, blank line, code block) + # - this is a non-paren container spanning multiple lines + || !$saw_opening_structure - # handle comma-arrow - if ( $type eq '=>' ) { - next if ( $last_nonblank_type eq '=>' ); - next if $rOpts_break_at_old_comma_breakpoints; - next - if ( $rOpts_comma_arrow_breakpoints == 3 - && !$override_cab3[$depth] ); - $want_comma_break[$depth] = 1; - $index_before_arrow[$depth] = $i_last_nonblank_token; - next; - } ## end if ( $type eq '=>' ) + # - this is a long block contained in another breakable + # container + || $is_long_term && !$self->is_in_block_by_i($i_opening) + ) + ) + { - elsif ( $type eq '.' ) { - $last_dot_index[$depth] = $i; + # do special -lp breaks at the CLOSING token for INTACT + # blocks (because we might not do them if the block does + # not break open) + if ($lp_object) { + my $K_begin_line = $lp_object->get_K_begin_line(); + my $i_begin_line = $K_begin_line - $K_to_go[0]; + $self->set_forced_lp_break( $i_begin_line, $i_opening ); } - # Turn off alignment if we are sure that this is not a list - # environment. To be safe, we will do this if we see certain - # non-list tokens, such as ';', and also the environment is - # not a list. Note that '=' could be in any of the = operators - # (lextest.t). We can't just use the reported environment - # because it can be incorrect in some cases. - elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) - && !$self->is_in_list_by_i($i) ) - { - $dont_align[$depth] = 1; - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) - - # now just handle any commas - next unless ( $type eq ',' ); - - $last_dot_index[$depth] = undef; - $last_comma_index[$depth] = $i; - - # break here if this comma follows a '=>' - # but not if there is a side comment after the comma - if ( $want_comma_break[$depth] ) { + # break after opening structure. + # note: break before closing structure will be automatic + if ( $minimum_depth <= $current_depth ) { - if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { - if ($rOpts_comma_arrow_breakpoints) { - $want_comma_break[$depth] = 0; - next; - } + if ( $i_opening >= 0 ) { + $self->set_forced_breakpoint($i_opening) + unless ( $do_not_break_apart + || is_unbreakable_container($current_depth) ); } - $self->set_forced_breakpoint($i) - unless ( $next_nonblank_type eq '#' ); - - # break before the previous token if it looks safe - # Example of something that we will not try to break before: - # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, - # Also we don't want to break at a binary operator (like +): - # $c->createOval( - # $x + $R, $y + - # $R => $x - $R, - # $y - $R, -fill => 'black', - # ); - my $ibreak = $index_before_arrow[$depth] - 1; - if ( $ibreak > 0 - && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) - { - if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } - if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } - if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { - - # don't break pointer calls, such as the following: - # File::Spec->curdir => 1, - # (This is tokenized as adjacent 'w' tokens) - ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { - - # And don't break before a comma, as in the following: - # ( LONGER_THAN,=> 1, - # EIGHTY_CHARACTERS,=> 2, - # CAUSES_FORMATTING,=> 3, - # LIKE_THIS,=> 4, - # ); - # This example is for -tso but should be general rule - if ( $tokens_to_go[ $ibreak + 1 ] ne '->' - && $tokens_to_go[ $ibreak + 1 ] ne ',' ) - { - $self->set_forced_breakpoint($ibreak); - } - } ## end if ( $types_to_go[$ibreak...]) - } ## end if ( $ibreak > 0 && $tokens_to_go...) - - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; + # break at ',' of lower depth level before opening token + if ( $last_comma_index[$depth] ) { + $self->set_forced_breakpoint( $last_comma_index[$depth] ); + } - # handle list which mixes '=>'s and ','s: - # treat any list items so far as an interrupted list - $interrupted_list[$depth] = 1; - next; - } ## end if ( $want_comma_break...) - - # Break after all commas above starting depth... - # But only if the last closing token was followed by a comma, - # to avoid breaking a list operator (issue c119) - if ( $depth < $starting_depth - && $comma_follows_last_closing_token - && !$dont_align[$depth] ) - { - $self->set_forced_breakpoint($i) - unless ( $next_nonblank_type eq '#' ); - next; - } + # break at '.' of lower depth level before opening token + if ( $last_dot_index[$depth] ) { + $self->set_forced_breakpoint( $last_dot_index[$depth] ); + } - # add this comma to the list.. - my $item_count = $item_count_stack[$depth]; - if ( $item_count == 0 ) { + # break before opening structure if preceded by another + # closing structure and a comma. This is normally + # done by the previous closing brace, but not + # if it was a one-line block. + if ( $i_opening > 2 ) { + my $i_prev = + ( $types_to_go[ $i_opening - 1 ] eq 'b' ) + ? $i_opening - 2 + : $i_opening - 1; - # but do not form a list with no opening structure - # for example: + my $type_prev = $types_to_go[$i_prev]; + my $token_prev = $tokens_to_go[$i_prev]; + if ( + $type_prev eq ',' + && ( $types_to_go[ $i_prev - 1 ] eq ')' + || $types_to_go[ $i_prev - 1 ] eq '}' ) + ) + { + $self->set_forced_breakpoint($i_prev); + } - # open INFILE_COPY, ">$input_file_copy" - # or die ("very long message"); - if ( ( $opening_structure_index_stack[$depth] < 0 ) - && $self->is_in_block_by_i($i) ) - { - $dont_align[$depth] = 1; + # also break before something like ':(' or '?(' + # if appropriate. + elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/ + && $want_break_before{$token_prev} ) + { + $self->set_forced_breakpoint($i_prev); + } } - } ## end if ( $item_count == 0 ) - - $comma_index[$depth][$item_count] = $i; - ++$item_count_stack[$depth]; - if ( $last_nonblank_type =~ /^[iR\]]$/ ) { - $identifier_count_stack[$depth]++; } - } ## end while ( ++$i <= $max_index_to_go) - - #------------------------------------------- - # end of loop over all tokens in this batch - #------------------------------------------- - # set breaks for any unfinished lists .. - foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) { + # break after comma following closing structure + if ( $types_to_go[ $i + 1 ] eq ',' ) { + $self->set_forced_breakpoint( $i + 1 ); + } - $interrupted_list[$dd] = 1; - $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); - $self->set_comma_breakpoints( $dd, $rbond_strength_bias ); - $self->set_logical_breakpoints($dd) - if ( $has_old_logical_breakpoints[$dd] ); - $self->set_for_semicolon_breakpoints($dd); + # break before an '=' following closing structure + if ( + $is_assignment{$next_nonblank_type} + && ( $breakpoint_stack[$current_depth] != + $forced_breakpoint_count ) + ) + { + $self->set_forced_breakpoint($i); + } - # break open container... - my $i_opening = $opening_structure_index_stack[$dd]; - if ( defined($i_opening) && $i_opening >= 0 ) { - $self->set_forced_breakpoint($i_opening) - unless ( - is_unbreakable_container($dd) + # break at any comma before the opening structure Added + # for -lp, but seems to be good in general. It isn't + # obvious how far back to look; the '5' below seems to + # work well and will catch the comma in something like + # push @list, myfunc( $param, $param, .. - # Avoid a break which would place an isolated ' or " - # on a line - || ( $type eq 'Q' - && $i_opening >= $max_index_to_go - 2 - && ( $token eq "'" || $token eq '"' ) ) - ); + my $icomma = $last_comma_index[$depth]; + if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { + unless ( $forced_breakpoint_to_go[$icomma] ) { + $self->set_forced_breakpoint($icomma); + } } - } ## end for ( my $dd = $current_depth...) + } - # Return a flag indicating if the input file had some good breakpoints. - # This flag will be used to force a break in a line shorter than the - # allowed line length. - if ( $has_old_logical_breakpoints[$current_depth] ) { - $saw_good_breakpoint = 1; + #----------------------------------------------------------- + # Break open a logical container open if it was already open + #----------------------------------------------------------- + elsif ($is_simple_logical_expression + && $has_old_logical_breakpoints[$current_depth] ) + { + $self->set_logical_breakpoints($current_depth); } - # A complex line with one break at an = has a good breakpoint. - # This is not complex ($total_depth_variation=0): - # $res1 - # = 10; - # - # This is complex ($total_depth_variation=6): - # $res2 = - # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); + # Handle long container which does not get opened up + elsif ($is_long_term) { - # The check ($i_old_.. < $max_index_to_go) was added to fix b1333 - elsif ($i_old_assignment_break - && $total_depth_variation > 4 - && $old_breakpoint_count == 1 - && $i_old_assignment_break < $max_index_to_go ) - { - $saw_good_breakpoint = 1; - } ## end elsif ( $i_old_assignment_break...) + # must set fake breakpoint to alert outer containers that + # they are complex + set_fake_breakpoint(); + } - return $saw_good_breakpoint; - } ## end sub break_lists + return; + } ## end sub break_lists_decreasing_depth } ## end closure break_lists my %is_kwiZ; @@ -19809,7 +21340,9 @@ sub find_token_starting_list { # This will be the return index my $i_opening_minus = $i_opening_paren; - goto RETURN if ( $i_opening_minus <= 0 ); + if ( $i_opening_minus <= 0 ) { + return $i_opening_minus; + } my $im1 = $i_opening_paren - 1; my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] ); @@ -19850,8 +21383,6 @@ sub find_token_starting_list { if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } } - RETURN: - DEBUG_FIND_START && print < im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus] EOM @@ -19859,7 +21390,7 @@ EOM return $i_opening_minus; } ## end sub find_token_starting_list -{ ## begin closure set_comma_breakpoints_do +{ ## begin closure set_comma_breakpoints_final my %is_keyword_with_special_leading_term; @@ -19874,10 +21405,178 @@ EOM use constant DEBUG_SPARSE => 0; - sub set_comma_breakpoints_do { + sub comma_broken_sublist_rule { + + my ( + + $self, # + + $item_count, + $interrupted, + $i_first_comma, + $i_true_last_comma, + $ri_term_end, + $ri_term_begin, + $ri_term_comma, + $ritem_lengths, + + ) = @_; + + # Break at every comma except for a comma between two + # simple, small terms. This prevents long vertical + # columns of, say, just 0's. + my $small_length = 10; # 2 + actual maximum length wanted + + # We'll insert a break in long runs of small terms to + # allow alignment in uniform tables. + my $skipped_count = 0; + my $columns = table_columns_available($i_first_comma); + my $fields = int( $columns / $small_length ); + if ( $rOpts_maximum_fields_per_table + && $fields > $rOpts_maximum_fields_per_table ) + { + $fields = $rOpts_maximum_fields_per_table; + } + my $max_skipped_count = $fields - 1; + + my $is_simple_last_term = 0; + my $is_simple_next_term = 0; + foreach my $j ( 0 .. $item_count ) { + $is_simple_last_term = $is_simple_next_term; + $is_simple_next_term = 0; + if ( $j < $item_count + && $ri_term_end->[$j] == $ri_term_begin->[$j] + && $ritem_lengths->[$j] <= $small_length ) + { + $is_simple_next_term = 1; + } + next if $j == 0; + if ( $is_simple_last_term + && $is_simple_next_term + && $skipped_count < $max_skipped_count ) + { + $skipped_count++; + } + else { + $skipped_count = 0; + my $i_tc = $ri_term_comma->[ $j - 1 ]; + last unless defined $i_tc; + $self->set_forced_breakpoint($i_tc); + } + } + + # always break at the last comma if this list is + # interrupted; we wouldn't want to leave a terminal '{', for + # example. + if ($interrupted) { + $self->set_forced_breakpoint($i_true_last_comma); + } + return; + } + + sub set_emergency_comma_breakpoints { + + my ( + + $self, # + + $number_of_fields_best, + $rinput_hash, + $comma_count, + $i_first_comma, + + ) = @_; + + # The number of fields worked out to be negative, so we + # have to make an emergency fix. + + my $rcomma_index = $rinput_hash->{rcomma_index}; + my $next_nonblank_type = $rinput_hash->{next_nonblank_type}; + my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart}; + my $must_break_open = $rinput_hash->{must_break_open}; + + # are we an item contained in an outer list? + my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; + + # In many cases, it may be best to not force a break if there is just + # one comma, because the standard continuation break logic will do a + # better job without it. + + # In the common case that all but one of the terms can fit + # on a single line, it may look better not to break open the + # containing parens. Consider, for example + + # $color = + # join ( '/', + # sort { $color_value{$::a} <=> $color_value{$::b}; } + # keys %colors ); + + # which will look like this with the container broken: + + # $color = join ( + # '/', + # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors + # ); + + # Here is an example of this rule for a long last term: + + # log_message( 0, 256, 128, + # "Number of routes in adj-RIB-in to be considered: $peercount" ); + + # And here is an example with a long first term: + + # $s = sprintf( + # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", + # $r, $pu, $ps, $cu, $cs, $tt + # ) + # if $style eq 'all'; + + my $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; + + my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0; + my $long_first_term = + $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= + 0; + + # break at every comma ... + if ( + + # if requested by user or is best looking + $number_of_fields_best == 1 - # Given a list with some commas, set breakpoints at some of the - # commas, if necessary, to make it easy to read. + # or if this is a sublist of a larger list + || $in_hierarchical_list + + # or if multiple commas and we don't have a long first or last + # term + || ( $comma_count > 1 + && !( $long_last_term || $long_first_term ) ) + ) + { + foreach ( 0 .. $comma_count - 1 ) { + $self->set_forced_breakpoint( $rcomma_index->[$_] ); + } + } + elsif ($long_last_term) { + + $self->set_forced_breakpoint($i_last_comma); + ${$rdo_not_break_apart} = 1 unless $must_break_open; + } + elsif ($long_first_term) { + + $self->set_forced_breakpoint($i_first_comma); + } + else { + + # let breaks be defined by default bond strength logic + } + return; + } + + sub set_comma_breakpoints_final { + + # Given a list of comma-separated items, set breakpoints at some of + # the commas, if necessary, to make it easy to read. my ( $self, $rinput_hash ) = @_; @@ -19906,16 +21605,20 @@ EOM } my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] ); - #--------------------------------------------------------------- - # find lengths of all items in the list to calculate page layout - #--------------------------------------------------------------- + #----------------------------------------------------------- + # Section A: Find lengths of all items in the list needed to + # calculate page layout + #----------------------------------------------------------- my $comma_count = $item_count; - my @item_lengths; - my @i_term_begin; - my @i_term_end; - my @i_term_comma; + + my $ritem_lengths = []; + my $ri_term_begin = []; + my $ri_term_end = []; + my $ri_term_comma = []; + + my $rmax_length = [ 0, 0 ]; + my $i_prev_plus; - my @max_length = ( 0, 0 ); my $first_term_length; my $i = $i_opening_paren; my $is_odd = 1; @@ -19931,22 +21634,22 @@ EOM ( $types_to_go[$i_prev_plus] eq 'b' ) ? $i_prev_plus + 1 : $i_prev_plus; - push @i_term_begin, $i_term_begin; - push @i_term_end, $i_term_end; - push @i_term_comma, $i; + push @{$ri_term_begin}, $i_term_begin; + push @{$ri_term_end}, $i_term_end; + push @{$ri_term_comma}, $i; # note: currently adding 2 to all lengths (for comma and space) my $length = 2 + token_sequence_length( $i_term_begin, $i_term_end ); - push @item_lengths, $length; + push @{$ritem_lengths}, $length; if ( $j == 0 ) { $first_term_length = $length; } else { - if ( $length > $max_length[$is_odd] ) { - $max_length[$is_odd] = $length; + if ( $length > $rmax_length->[$is_odd] ) { + $rmax_length->[$is_odd] = $length; } } } @@ -19970,15 +21673,15 @@ EOM # add 2 to length because other lengths include a comma and a blank $last_item_length += 2; - push @item_lengths, $last_item_length; - push @i_term_begin, $i_b + 1; - push @i_term_end, $i_e; - push @i_term_comma, undef; + push @{$ritem_lengths}, $last_item_length; + push @{$ri_term_begin}, $i_b + 1; + push @{$ri_term_end}, $i_e; + push @{$ri_term_comma}, undef; my $i_odd = $item_count % 2; - if ( $last_item_length > $max_length[$i_odd] ) { - $max_length[$i_odd] = $last_item_length; + if ( $last_item_length > $rmax_length->[$i_odd] ) { + $rmax_length->[$i_odd] = $last_item_length; } $item_count++; @@ -19989,67 +21692,32 @@ EOM } } - #--------------------------------------------------------------- # End of length calculations - #--------------------------------------------------------------- - #--------------------------------------------------------------- - # Compound List Rule 1: + #----------------------------------------- + # Section B: Handle some special cases ... + #----------------------------------------- + + #------------------------------------------------------------- + # Special Case B1: Compound List Rule 1: # Break at (almost) every comma for a list containing a broken # sublist. This has higher priority than the Interrupted List # Rule. - #--------------------------------------------------------------- + #------------------------------------------------------------- if ($has_broken_sublist) { - # Break at every comma except for a comma between two - # simple, small terms. This prevents long vertical - # columns of, say, just 0's. - my $small_length = 10; # 2 + actual maximum length wanted - - # We'll insert a break in long runs of small terms to - # allow alignment in uniform tables. - my $skipped_count = 0; - my $columns = table_columns_available($i_first_comma); - my $fields = int( $columns / $small_length ); - if ( $rOpts_maximum_fields_per_table - && $fields > $rOpts_maximum_fields_per_table ) - { - $fields = $rOpts_maximum_fields_per_table; - } - my $max_skipped_count = $fields - 1; - - my $is_simple_last_term = 0; - my $is_simple_next_term = 0; - foreach my $j ( 0 .. $item_count ) { - $is_simple_last_term = $is_simple_next_term; - $is_simple_next_term = 0; - if ( $j < $item_count - && $i_term_end[$j] == $i_term_begin[$j] - && $item_lengths[$j] <= $small_length ) - { - $is_simple_next_term = 1; - } - next if $j == 0; - if ( $is_simple_last_term - && $is_simple_next_term - && $skipped_count < $max_skipped_count ) - { - $skipped_count++; - } - else { - $skipped_count = 0; - my $i_tc = $i_term_comma[ $j - 1 ]; - last unless defined $i_tc; - $self->set_forced_breakpoint($i_tc); - } - } + $self->comma_broken_sublist_rule( - # always break at the last comma if this list is - # interrupted; we wouldn't want to leave a terminal '{', for - # example. - if ($interrupted) { - $self->set_forced_breakpoint($i_true_last_comma); - } + $item_count, + $interrupted, + $i_first_comma, + $i_true_last_comma, + $ri_term_end, + $ri_term_begin, + $ri_term_comma, + $ritem_lengths, + + ); return; } @@ -20058,11 +21726,11 @@ EOM #i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; #print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; - #--------------------------------------------------------------- - # Interrupted List Rule: + #-------------------------------------------------------------- + # Special Case B2: Interrupted List Rule: # A list is forced to use old breakpoints if it was interrupted # by side comments or blank lines, or requested by user. - #--------------------------------------------------------------- + #-------------------------------------------------------------- if ( $rOpts_break_at_old_comma_breakpoints || $interrupted || $i_opening_paren < 0 ) @@ -20071,19 +21739,16 @@ EOM return; } - #--------------------------------------------------------------- - # Looks like a list of items. We have to look at it and size it up. - #--------------------------------------------------------------- - my $opening_token = $tokens_to_go[$i_opening_paren]; my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren); - #------------------------------------------------------------------- - # Return if this will fit on one line - #------------------------------------------------------------------- + #----------------------------------------------------------------- + # Special Case B3: If it fits on one line, return and let the line + # break logic decide if and where to break. + #----------------------------------------------------------------- - # The -bbxi=2 parameters can add an extra hidden level of indentation; - # this needs a tolerance to avoid instability. Fixes b1259, 1260. + # The -bbxi=2 parameters can add an extra hidden level of indentation + # so they need a tolerance to avoid instability. Fixes b1259, 1260. my $tol = 0; if ( $break_before_container_types{$opening_token} && $container_indentation_options{$opening_token} @@ -20098,15 +21763,22 @@ EOM } my $i_opening_minus = $self->find_token_starting_list($i_opening_paren); - return - unless $self->excess_line_length( $i_opening_minus, $i_closing_paren ) - + $tol > 0; + my $excess = + $self->excess_line_length( $i_opening_minus, $i_closing_paren ); + return if ( $excess + $tol <= 0 ); + + #--------------------------------------- + # Section C: Handle a multiline list ... + #--------------------------------------- + + #--------------------------------------------------------------- + # Section C1: Determine '$number_of_fields' = the best number of + # fields to use if this is to be formatted as a table. + #--------------------------------------------------------------- - #------------------------------------------------------------------- # Now we know that this block spans multiple lines; we have to set # at least one breakpoint -- real or fake -- as a signal to break # open any outer containers. - #------------------------------------------------------------------- set_fake_breakpoint(); # be sure we do not extend beyond the current list length @@ -20123,8 +21795,8 @@ EOM $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ] - total_line_length( $i_opening_minus, $i_opening_paren ); $need_lp_break_open = - ( $max_length[0] > $columns_if_unbroken ) - || ( $max_length[1] > $columns_if_unbroken ) + ( $rmax_length->[0] > $columns_if_unbroken ) + || ( $rmax_length->[1] > $columns_if_unbroken ) || ( $first_term_length > $columns_if_unbroken ); } @@ -20133,8 +21805,8 @@ EOM # list items might be a hash list. But if we can be sure that # it is not a hash, then we can allow an odd number for more # flexibility. - my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count - + # 1 = odd field count ok, 2 = want even count + my $odd_or_even = 2; if ( $identifier_count >= $item_count - 1 || $is_assignment{$next_nonblank_type} || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) @@ -20146,12 +21818,12 @@ EOM # do we have a long first term which should be # left on a line by itself? my $use_separate_first_term = ( - $odd_or_even == 1 # only if we can use 1 field/line - && $item_count > 3 # need several items + $odd_or_even == 1 # only if we can use 1 field/line + && $item_count > 3 # need several items && $first_term_length > - 2 * $max_length[0] - 2 # need long first term + 2 * $rmax_length->[0] - 2 # need long first term && $first_term_length > - 2 * $max_length[1] - 2 # need long first term + 2 * $rmax_length->[1] - 2 # need long first term ); # or do we know from the type of list that the first term should @@ -20187,23 +21859,25 @@ EOM $i_first_comma = $rcomma_index->[1]; $item_count--; return if $comma_count == 1; - shift @item_lengths; - shift @i_term_begin; - shift @i_term_end; - shift @i_term_comma; + shift @{$ritem_lengths}; + shift @{$ri_term_begin}; + shift @{$ri_term_end}; + shift @{$ri_term_comma}; } # if not, update the metrics to include the first term else { - if ( $first_term_length > $max_length[0] ) { - $max_length[0] = $first_term_length; + if ( $first_term_length > $rmax_length->[0] ) { + $rmax_length->[0] = $first_term_length; } } # Field width parameters - my $pair_width = ( $max_length[0] + $max_length[1] ); + my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] ); my $max_width = - ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; + ( $rmax_length->[0] > $rmax_length->[1] ) + ? $rmax_length->[0] + : $rmax_length->[1]; # Number of free columns across the page width for laying out tables my $columns = table_columns_available($i_first_comma); @@ -20215,10 +21889,9 @@ EOM # paren, but in some cases we might not. if ( $rOpts_variable_maximum_line_length && $tokens_to_go[$i_opening_paren] eq '(' - && @i_term_begin ) - ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch + && @{$ri_term_begin} ) { - my $ib = $i_term_begin[0]; + my $ib = $ri_term_begin->[0]; my $type = $types_to_go[$ib]; # So far, the only known instance of this problem is when @@ -20239,19 +21912,19 @@ EOM } } - # Estimated maximum number of fields which fit this space - # This will be our first guess + # Estimated maximum number of fields which fit this space. + # This will be our first guess: my $number_of_fields_max = maximum_number_of_fields( $columns, $odd_or_even, $max_width, $pair_width ); my $number_of_fields = $number_of_fields_max; - # Find the best-looking number of fields - # and make this our second guess if possible + # Find the best-looking number of fields. + # This will be our second guess, if possible. my ( $number_of_fields_best, $ri_ragged_break_list, $new_identifier_count ) - = $self->study_list_complexity( \@i_term_begin, \@i_term_end, - \@item_lengths, $max_width ); + = $self->study_list_complexity( $ri_term_begin, $ri_term_end, + $ritem_lengths, $max_width ); if ( $number_of_fields_best != 0 && $number_of_fields_best < $number_of_fields_max ) @@ -20259,10 +21932,8 @@ EOM $number_of_fields = $number_of_fields_best; } - # ---------------------------------------------------------------------- - # If we are crowded and the -lp option is being used, try to - # undo some indentation - # ---------------------------------------------------------------------- + # If we are crowded and the -lp option is being used, try + # to undo some indentation if ( $is_lp_formatting && ( @@ -20272,46 +21943,19 @@ EOM ) ) { - my $available_spaces = - $self->get_available_spaces_to_go($i_first_comma); - if ( $available_spaces > 0 ) { - - my $spaces_wanted = $max_width - $columns; # for 1 field - - if ( $number_of_fields_best == 0 ) { - $number_of_fields_best = - get_maximum_fields_wanted( \@item_lengths ); - } - - if ( $number_of_fields_best != 1 ) { - my $spaces_wanted_2 = - 1 + $pair_width - $columns; # for 2 fields - if ( $available_spaces > $spaces_wanted_2 ) { - $spaces_wanted = $spaces_wanted_2; - } - } + ( $number_of_fields, $number_of_fields_best, $columns ) = + $self->lp_table_fix( + + $columns, + $i_first_comma, + $max_width, + $number_of_fields, + $number_of_fields_best, + $odd_or_even, + $pair_width, + $ritem_lengths, - if ( $spaces_wanted > 0 ) { - my $deleted_spaces = - $self->reduce_lp_indentation( $i_first_comma, - $spaces_wanted ); - - # redo the math - if ( $deleted_spaces > 0 ) { - $columns = table_columns_available($i_first_comma); - $number_of_fields_max = - maximum_number_of_fields( $columns, $odd_or_even, - $max_width, $pair_width ); - $number_of_fields = $number_of_fields_max; - - if ( $number_of_fields_best == 1 - && $number_of_fields >= 1 ) - { - $number_of_fields = $number_of_fields_best; - } - } - } - } + ); } # try for one column if two won't work @@ -20337,94 +21981,30 @@ EOM # are we an item contained in an outer list? my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; + #----------------------------------------------------------------- + # Section C2: Stop here if we did not compute a positive number of + # fields. In this case we just have to bail out. + #----------------------------------------------------------------- if ( $number_of_fields <= 0 ) { -# #--------------------------------------------------------------- -# # We're in trouble. We can't find a single field width that works. -# # There is no simple answer here; we may have a single long list -# # item, or many. -# #--------------------------------------------------------------- -# -# In many cases, it may be best to not force a break if there is just one -# comma, because the standard continuation break logic will do a better -# job without it. -# -# In the common case that all but one of the terms can fit -# on a single line, it may look better not to break open the -# containing parens. Consider, for example -# -# $color = -# join ( '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } -# keys %colors ); -# -# which will look like this with the container broken: -# -# $color = join ( -# '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors -# ); -# -# Here is an example of this rule for a long last term: -# -# log_message( 0, 256, 128, -# "Number of routes in adj-RIB-in to be considered: $peercount" ); -# -# And here is an example with a long first term: -# -# $s = sprintf( -# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", -# $r, $pu, $ps, $cu, $cs, $tt -# ) -# if $style eq 'all'; - - $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; - - my $long_last_term = - $self->excess_line_length( 0, $i_last_comma ) <= 0; - my $long_first_term = - $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) - <= 0; - - # break at every comma ... - if ( - - # if requested by user or is best looking - $number_of_fields_best == 1 - - # or if this is a sublist of a larger list - || $in_hierarchical_list - - # or if multiple commas and we don't have a long first or last - # term - || ( $comma_count > 1 - && !( $long_last_term || $long_first_term ) ) - ) - { - foreach ( 0 .. $comma_count - 1 ) { - $self->set_forced_breakpoint( $rcomma_index->[$_] ); - } - } - elsif ($long_last_term) { - - $self->set_forced_breakpoint($i_last_comma); - ${$rdo_not_break_apart} = 1 unless $must_break_open; - } - elsif ($long_first_term) { + $self->set_emergency_comma_breakpoints( - $self->set_forced_breakpoint($i_first_comma); - } - else { + $number_of_fields_best, + $rinput_hash, + $comma_count, + $i_first_comma, - # let breaks be defined by default bond strength logic - } + ); return; } - # -------------------------------------------------------- - # We have a tentative field count that seems to work. + #------------------------------------------------------------------ + # Section C3: We have a tentative field count that seems to work. + # Now we must look more closely to determine if a table layout will + # actually look okay. + #------------------------------------------------------------------ + # How many lines will this require? - # -------------------------------------------------------- my $formatted_lines = $item_count / ($number_of_fields); if ( $formatted_lines != int $formatted_lines ) { $formatted_lines = 1 + int $formatted_lines; @@ -20490,26 +22070,27 @@ EOM $two_line_word_wrap_ok = 1; } else { - my $KK = $K_to_go[$i_opening_paren]; + my $seqno = $type_sequence_to_go[$i_opening_paren]; $two_line_word_wrap_ok = - !$self->match_paren_flag( $KK, $flag ); + !$self->match_paren_control_flag( $seqno, $flag ); } } } - # Begin check for shortcut methods, which avoid treating a list - # as a table for relatively small parenthesized lists. These + #------------------------------------------------------------------- + # Section C4: Check for shortcut methods, which avoid treating + # a list as a table for relatively small parenthesized lists. These # are usually easier to read if not formatted as tables. + #------------------------------------------------------------------- if ( $packed_lines <= 2 # probably can fit in 2 lines && $item_count < 9 # doesn't have too many items && $opening_is_in_block # not a sub-container && $two_line_word_wrap_ok # ok to wrap this paren list - ##&& $opening_token eq '(' # is paren list ) { - # Shortcut method 1: for -lp and just one comma: + # Section C4A: Shortcut method 1: for -lp and just one comma: # This is a no-brainer, just break at the comma. if ( $is_lp_formatting # -lp @@ -20524,8 +22105,8 @@ EOM } - # method 2 is for most small ragged lists which might look - # best if not displayed as a table. + # Section C4B: Shortcut method 2 is for most small ragged lists + # which might look best if not displayed as a table. if ( ( $number_of_fields == 2 && $item_count == 3 ) || ( @@ -20535,7 +22116,7 @@ EOM ) { - my $break_count = $self->set_ragged_breakpoints( \@i_term_comma, + my $break_count = $self->set_ragged_breakpoints( $ri_term_comma, $ri_ragged_break_list ); ++$break_count if ($use_separate_first_term); @@ -20563,15 +22144,15 @@ EOM }; - #--------------------------------------------------------------- - # Compound List Rule 2: + #------------------------------------------------------------------ + # Section C5: Compound List Rule 2: # If this list is too long for one line, and it is an item of a # larger list, then we must format it, regardless of sparsity # (ian.t). One reason that we have to do this is to trigger # Compound List Rule 1, above, which causes breaks at all commas of # all outer lists. In this way, the structure will be properly # displayed. - #--------------------------------------------------------------- + #------------------------------------------------------------------ # Decide if this list is too long for one line unless broken my $total_columns = table_columns_available($i_opening_paren); @@ -20587,7 +22168,7 @@ EOM $i_effective_last_comma + 1 ) > 0; } - # FIXME: For an item after a '=>', try to include the length of the + # TODO: For an item after a '=>', try to include the length of the # thing before the '=>'. This is crude and should be improved by # actually looking back token by token. if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { @@ -20607,23 +22188,21 @@ EOM #print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; - #--------------------------------------------------------------- - # The main decision: - # Now decide if we will align the data into aligned columns. Do not - # attempt to align columns if this is a tiny table or it would be - # too spaced. It seems that the more packed lines we have, the - # sparser the list that can be allowed and still look ok. - #--------------------------------------------------------------- + #-------------------------------------------------------------------- + # Section C6: A table will work here. But do not attempt to align + # columns if this is a tiny table or it would be too spaced. It + # seems that the more packed lines we have, the sparser the list that + # can be allowed and still look ok. + #-------------------------------------------------------------------- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) || ( $formatted_lines < 2 ) || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) ) { - - #--------------------------------------------------------------- - # too sparse: would look ugly if aligned in a table; - #--------------------------------------------------------------- + #---------------------------------------------------------------- + # Section C6A: too sparse: would not look good aligned in a table + #---------------------------------------------------------------- # use old breakpoints if this is a 'big' list if ( $packed_lines > 2 && $item_count > 10 ) { @@ -20634,7 +22213,7 @@ EOM # let the continuation logic handle it if 2 lines else { - my $break_count = $self->set_ragged_breakpoints( \@i_term_comma, + my $break_count = $self->set_ragged_breakpoints( $ri_term_comma, $ri_ragged_break_list ); ++$break_count if ($use_separate_first_term); @@ -20650,9 +22229,82 @@ EOM return; } - #--------------------------------------------------------------- - # go ahead and format as a table - #--------------------------------------------------------------- + #-------------------------------------------- + # Section C6B: Go ahead and format as a table + #-------------------------------------------- + $self->write_formatted_table( $number_of_fields, $comma_count, + $rcomma_index, $use_separate_first_term ); + + return; + } ## end sub set_comma_breakpoints_final + + sub lp_table_fix { + + # try to undo some -lp indentation to improve table formatting + + my ( + + $self, # + + $columns, + $i_first_comma, + $max_width, + $number_of_fields, + $number_of_fields_best, + $odd_or_even, + $pair_width, + $ritem_lengths, + + ) = @_; + + my $available_spaces = + $self->get_available_spaces_to_go($i_first_comma); + if ( $available_spaces > 0 ) { + + my $spaces_wanted = $max_width - $columns; # for 1 field + + if ( $number_of_fields_best == 0 ) { + $number_of_fields_best = + get_maximum_fields_wanted($ritem_lengths); + } + + if ( $number_of_fields_best != 1 ) { + my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields + if ( $available_spaces > $spaces_wanted_2 ) { + $spaces_wanted = $spaces_wanted_2; + } + } + + if ( $spaces_wanted > 0 ) { + my $deleted_spaces = + $self->reduce_lp_indentation( $i_first_comma, + $spaces_wanted ); + + # redo the math + if ( $deleted_spaces > 0 ) { + $columns = table_columns_available($i_first_comma); + $number_of_fields = + maximum_number_of_fields( $columns, $odd_or_even, + $max_width, $pair_width ); + + if ( $number_of_fields_best == 1 + && $number_of_fields >= 1 ) + { + $number_of_fields = $number_of_fields_best; + } + } + } + } + return ( $number_of_fields, $number_of_fields_best, $columns ); + } ## end sub lp_table_fix + + sub write_formatted_table { + + # Write a table of comma separated items with fixed number of fields + my ( $self, $number_of_fields, $comma_count, $rcomma_index, + $use_separate_first_term ) + = @_; + write_logfile_entry( "List: auto formatting with $number_of_fields fields/row\n"); @@ -20666,8 +22318,8 @@ EOM $j += $number_of_fields; } return; - } ## end sub set_comma_breakpoints_do -} ## end closure set_comma_breakpoints_do + } +} ## end closure set_comma_breakpoints_final sub study_list_complexity { @@ -20750,7 +22402,7 @@ sub study_list_complexity { && $i_last_last_break != $i - 2 ) { - ## FIXME: don't strand a small term + ## TODO: don't strand a small term pop @i_ragged_break_list; push @i_ragged_break_list, $i - 2; push @i_ragged_break_list, $i - 1; @@ -20915,7 +22567,24 @@ sub copy_old_breakpoints { my ( $self, $i_first_comma, $i_last_comma ) = @_; for my $i ( $i_first_comma .. $i_last_comma ) { if ( $old_breakpoint_to_go[$i] ) { - $self->set_forced_breakpoint($i); + + # If the comma style is under certain controls, and if this is a + # comma breakpoint with the comma is at the beginning of the next + # line, then we must pass that index instead. This will allow sub + # set_forced_breakpoints to check and follow the user settings. This + # produces a uniform style and can prevent instability (b1422). + # + # The flag '$controlled_comma_style' will be set if the user + # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not + # needed or set for the -boc flag. + my $ibreak = $i; + if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) { + my $index = $inext_to_go[$ibreak]; + if ( $index > $ibreak && $types_to_go[$index] eq ',' ) { + $ibreak = $index; + } + } + $self->set_forced_breakpoint($ibreak); } } return; @@ -20936,11 +22605,12 @@ sub set_nobreaks { # shouldn't happen; non-critical error else { - 0 && do { + if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); - print STDOUT - "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; - }; + Fault(<= 0 ); # List of -lp indentation objects created in this batch - my $rlp_object_list = []; - my $max_lp_object_list = UNDEFINED_INDEX; - - my %last_lp_equals; - my %lp_comma_count; - my %lp_arrow_count; - my $ii_begin_line = 0; - - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rbreak_container = $self->[_rbreak_container_]; - my $rshort_nested = $self->[_rshort_nested_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_]; - my $radjusted_levels = $self->[_radjusted_levels_]; - my $rbreak_before_container_by_seqno = - $self->[_rbreak_before_container_by_seqno_]; - my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; + $rlp_object_list = []; + $max_lp_object_list = -1; + + %lp_comma_count = (); + %lp_arrow_count = (); + $space_count = undef; + $current_level = undef; + $current_ci_level = undef; + $ii_begin_line = 0; + $in_lp_mode = 0; + $stack_changed = 1; + $K_last_nonblank = undef; + $last_nonblank_token = EMPTY_STRING; + $last_nonblank_type = EMPTY_STRING; + $last_last_nonblank_type = EMPTY_STRING; + + my %last_lp_equals = (); + + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_]; + my $radjusted_levels = $self->[_radjusted_levels_]; my $nws = @{$radjusted_levels}; my $imin = 0; @@ -21159,7 +22842,6 @@ sub get_available_spaces_to_go { $imin += 1; } - my $K_last_nonblank; my $Kpnb = $K_to_go[0] - 1; if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) { $Kpnb -= 1; @@ -21168,38 +22850,35 @@ sub get_available_spaces_to_go { $K_last_nonblank = $Kpnb; } - my $last_nonblank_token = EMPTY_STRING; - my $last_nonblank_type = EMPTY_STRING; - my $last_last_nonblank_type = EMPTY_STRING; - if ( defined($K_last_nonblank) ) { $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_]; $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_]; } - my ( $space_count, $current_level, $current_ci_level, $in_lp_mode ); - my $stack_changed = 1; - #----------------------------------- # Loop over all tokens in this batch #----------------------------------- foreach my $ii ( $imin .. $max_index_to_go ) { - my $KK = $K_to_go[$ii]; - my $type = $types_to_go[$ii]; - my $token = $tokens_to_go[$ii]; - my $level = $levels_to_go[$ii]; - my $ci_level = $ci_levels_to_go[$ii]; - my $total_depth = $nesting_depth_to_go[$ii]; - my $standard_spaces = $leading_spaces_to_go[$ii]; + my $type = $types_to_go[$ii]; + my $token = $tokens_to_go[$ii]; + my $level = $levels_to_go[$ii]; + my $ci_level = $ci_levels_to_go[$ii]; + my $total_depth = $nesting_depth_to_go[$ii]; #-------------------------------------------------- # Adjust levels if necessary to recycle whitespace: #-------------------------------------------------- if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit ) { + my $KK = $K_to_go[$ii]; $level = $radjusted_levels->[$KK]; - if ( $level < 0 ) { $level = 0 } # note: this should not happen + if ( $level < 0 ) { + + # should not happen + DEVEL_MODE && Fault("unexpected level=$level\n"); + $level = 0; + } } # get the top state from the stack if it has changed @@ -21213,523 +22892,44 @@ sub get_available_spaces_to_go { else { $current_ci_level = $rLP_top->[_lp_ci_level_]; $current_level = $rLP_top->[_lp_level_]; - $space_count = $rLP_top->[_lp_space_count_]; - } - $stack_changed = 0; - } - - #------------------------------ - # update the position predictor - #------------------------------ - if ( $type eq '{' || $type eq '(' ) { - - $lp_comma_count{ $total_depth + 1 } = 0; - $lp_arrow_count{ $total_depth + 1 } = 0; - - # If we come to an opening token after an '=' token of some - # type, see if it would be helpful to 'break' after the '=' to - # save space - my $last_equals = $last_lp_equals{$total_depth}; - - # Skip an empty set of parens, such as after channel(): - # my $exchange = $self->_channel()->exchange( - # This fixes issues b1318 b1322 b1323 b1328 - # TODO: maybe also skip parens with just one token? - my $is_empty_container; - if ( $last_equals && $ii < $max_index_to_go ) { - my $seqno = $type_sequence_to_go[$ii]; - my $inext_nb = $ii + 1; - $inext_nb++ - if ( $types_to_go[$inext_nb] eq 'b' ); - my $seqno_nb = $type_sequence_to_go[$inext_nb]; - $is_empty_container = - $seqno && $seqno_nb && $seqno_nb == $seqno; - } - - if ( $last_equals - && $last_equals > $ii_begin_line - && !$is_empty_container ) - { - - my $seqno = $type_sequence_to_go[$ii]; - - # find the position if we break at the '=' - my $i_test = $last_equals; - - # Fix for issue b1229, check for break before - if ( $want_break_before{ $types_to_go[$i_test] } ) { - if ( $i_test > 0 ) { $i_test-- } - } - elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } - - my $test_position = total_line_length( $i_test, $ii ); - my $mll = - $maximum_line_length_at_level[ $levels_to_go[$i_test] ]; - - #------------------------------------------------------ - # Break if structure will reach the maximum line length - #------------------------------------------------------ - - # Historically, -lp just used one-half line length here - my $len_increase = $rOpts_maximum_line_length / 2; - - # For -xlp, we can also use the pre-computed lengths - my $min_len = $rcollapsed_length_by_seqno->{$seqno}; - if ( $min_len && $min_len > $len_increase ) { - $len_increase = $min_len; - } - - if ( - - # if we might exceed the maximum line length - $lp_position_predictor + $len_increase > $mll - - # if a -bbx flag WANTS a break before this opening token - || ( $seqno - && $rbreak_before_container_by_seqno->{$seqno} ) - - # or we are beyond the 1/4 point and there was an old - # break at an assignment (not '=>') [fix for b1035] - || ( - $lp_position_predictor > - $mll - $rOpts_maximum_line_length * 3 / 4 - && $types_to_go[$last_equals] ne '=>' - && ( - $old_breakpoint_to_go[$last_equals] - || ( $last_equals > 0 - && $old_breakpoint_to_go[ $last_equals - 1 ] - ) - || ( $last_equals > 1 - && $types_to_go[ $last_equals - 1 ] eq 'b' - && $old_breakpoint_to_go[ $last_equals - 2 ] - ) - ) - ) - ) - { - - # then make the switch -- note that we do not set a - # real breakpoint here because we may not really need - # one; sub break_lists will do that if necessary. - - my $Kc = $K_closing_container->{$seqno}; - if ( - - # For -lp, only if the closing token is in this - # batch (c117). Otherwise it cannot be done by sub - # break_lists. - defined($Kc) && $Kc <= $K_to_go[$max_index_to_go] - - # For -xlp, we only need one nonblank token after - # the opening token. - || $rOpts_extended_line_up_parentheses - ) - { - $ii_begin_line = $i_test + 1; - $lp_position_predictor = $test_position; - - #-------------------------------------------------- - # Fix for an opening container terminating a batch: - #-------------------------------------------------- - # To get alignment of a -lp container with its - # contents, we have to put a break after $i_test. - # For $ii<$max_index_to_go, this will be done by - # sub break_lists based on the indentation object. - # But for $ii=$max_index_to_go, the indentation - # object for this seqno will not be created until - # the next batch, so we have to set a break at - # $i_test right now in order to get one. - if ( $ii == $max_index_to_go - && !$block_type_to_go[$ii] - && $type eq '{' - && $seqno - && !$ris_excluded_lp_container->{$seqno} ) - { - $self->set_forced_lp_break( $ii_begin_line, - $ii ); - } - } - } - } - } ## end update position predictor - - #------------------------ - # Handle decreasing depth - #------------------------ - # Note that one token may have both decreasing and then increasing - # depth. For example, (level, ci) can go from (1,1) to (2,0). So, - # in this example we would first go back to (1,0) then up to (2,0) - # in a single call. - if ( $level < $current_level || $ci_level < $current_ci_level ) { - - # loop to find the first entry at or completely below this level - while (1) { - if ($max_lp_stack) { - - # save index of token which closes this level - if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { - my $lp_object = - $rLP->[$max_lp_stack]->[_lp_object_]; - - $lp_object->set_closed($ii); - - my $comma_count = 0; - my $arrow_count = 0; - if ( $type eq '}' || $type eq ')' ) { - $comma_count = $lp_comma_count{$total_depth}; - $arrow_count = $lp_arrow_count{$total_depth}; - $comma_count = 0 unless $comma_count; - $arrow_count = 0 unless $arrow_count; - } - - $lp_object->set_comma_count($comma_count); - $lp_object->set_arrow_count($arrow_count); - - # Undo any extra indentation if we saw no commas - my $available_spaces = - $lp_object->get_available_spaces(); - my $K_start = $lp_object->get_K_begin_line(); - - if ( $available_spaces > 0 - && $K_start >= $K_to_go[0] - && ( $comma_count <= 0 || $arrow_count > 0 ) ) - { - - my $i = $lp_object->get_lp_item_index(); - - # Safety check for a valid stack index. It - # should be ok because we just checked that the - # index K of the token associated with this - # indentation is in this batch. - if ( $i < 0 || $i > $max_lp_object_list ) { - if (DEVEL_MODE) { - my $lno = $rLL->[$KK]->[_LINE_INDEX_]; - Fault(<=0 and <= max=$max_lp_object_list -EOM - } - } - else { - if ( $arrow_count == 0 ) { - $rlp_object_list->[$i] - ->permanently_decrease_available_spaces - ($available_spaces); - } - else { - $rlp_object_list->[$i] - ->tentatively_decrease_available_spaces - ($available_spaces); - } - foreach - my $j ( $i + 1 .. $max_lp_object_list ) - { - $rlp_object_list->[$j] - ->decrease_SPACES($available_spaces); - } - } - } - } - - # go down one level - --$max_lp_stack; - - my $rLP_top = $rLP->[$max_lp_stack]; - my $ci_lev = $rLP_top->[_lp_ci_level_]; - my $lev = $rLP_top->[_lp_level_]; - my $spaces = $rLP_top->[_lp_space_count_]; - if ( $rLP_top->[_lp_object_] ) { - my $lp_obj = $rLP_top->[_lp_object_]; - ( $spaces, $lev, $ci_lev ) = - @{ $lp_obj->get_spaces_level_ci() }; - } - - # stop when we reach a level at or below the current - # level - if ( $lev <= $level && $ci_lev <= $ci_level ) { - $space_count = $spaces; - $current_level = $lev; - $current_ci_level = $ci_lev; - last; - } - } - - # reached bottom of stack .. should never happen because - # only negative levels can get here, and $level was forced - # to be positive above. - else { - - # non-fatal, keep going except in DEVEL_MODE - if (DEVEL_MODE) { -##program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp - Fault(< $current_level || $ci_level > $current_ci_level ) { - - $stack_changed = 1; - - # Compute the standard incremental whitespace. This will be - # the minimum incremental whitespace that will be used. This - # choice results in a smooth transition between the gnu-style - # and the standard style. - my $standard_increment = - ( $level - $current_level ) * - $rOpts_indent_columns + - ( $ci_level - $current_ci_level ) * - $rOpts_continuation_indentation; - - # Now we have to define how much extra incremental space - # ("$available_space") we want. This extra space will be - # reduced as necessary when long lines are encountered or when - # it becomes clear that we do not have a good list. - my $available_spaces = 0; - my $align_seqno = 0; - - my $last_nonblank_seqno; - my $last_nonblank_block_type; - if ( defined($K_last_nonblank) ) { - $last_nonblank_seqno = - $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_]; - $last_nonblank_block_type = - $last_nonblank_seqno - ? $rblock_type_of_seqno->{$last_nonblank_seqno} - : undef; - } - - $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_]; - - #----------------------------------------------- - # Initialize indentation spaces on empty stack.. - #----------------------------------------------- - if ( $max_lp_stack == 0 ) { - $space_count = $level * $rOpts_indent_columns; - } - - #---------------------------------------- - # Add the standard space increment if ... - #---------------------------------------- - elsif ( - - # if this is a BLOCK, add the standard increment - $last_nonblank_block_type - - # or if this is not a sequenced item - || !$last_nonblank_seqno - - # or this container is excluded by user rules - # or contains here-docs or multiline qw text - || defined($last_nonblank_seqno) - && $ris_excluded_lp_container->{$last_nonblank_seqno} - - # or if last nonblank token was not structural indentation - || $last_nonblank_type ne '{' - - # and do not start -lp under stress .. fixes b1244, b1255 - || !$in_lp_mode && $level >= $lp_cutoff_level - - ) - { - - # If we have entered lp mode, use the top lp object to get - # the current indentation spaces because it may have - # changed. Fixes b1285, b1286. - if ($in_lp_mode) { - $space_count = $in_lp_mode->get_spaces(); - } - $space_count += $standard_increment; - } - - #--------------------------------------------------------------- - # -lp mode: try to use space to the first non-blank level change - #--------------------------------------------------------------- - else { - - # see how much space we have available - my $test_space_count = $lp_position_predictor; - my $excess = 0; - my $min_len = - $rcollapsed_length_by_seqno->{$last_nonblank_seqno}; - my $next_opening_too_far; - - if ( defined($min_len) ) { - $excess = - $test_space_count + - $min_len - - $maximum_line_length_at_level[$level]; - if ( $excess > 0 ) { - $test_space_count -= $excess; - - # will the next opening token be a long way out? - $next_opening_too_far = - $lp_position_predictor + $excess > - $maximum_line_length_at_level[$level]; - } - } - - my $rLP_top = $rLP->[$max_lp_stack]; - my $min_gnu_indentation = $rLP_top->[_lp_space_count_]; - if ( $rLP_top->[_lp_object_] ) { - $min_gnu_indentation = - $rLP_top->[_lp_object_]->get_spaces(); - } - $available_spaces = - $test_space_count - $min_gnu_indentation; - - # Do not startup -lp indentation mode if no space ... - # ... or if it puts the opening far to the right - if ( !$in_lp_mode - && ( $available_spaces <= 0 || $next_opening_too_far ) ) - { - $space_count += $standard_increment; - $available_spaces = 0; - } - - # Use -lp mode - else { - $space_count = $test_space_count; - - $in_lp_mode = 1; - if ( $available_spaces >= $standard_increment ) { - $min_gnu_indentation += $standard_increment; - } - elsif ( $available_spaces > 1 ) { - $min_gnu_indentation += $available_spaces + 1; - } - ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { - elsif ( $is_opening_token{$last_nonblank_token} ) { - if ( ( $tightness{$last_nonblank_token} < 2 ) ) { - $min_gnu_indentation += 2; - } - else { - $min_gnu_indentation += 1; - } - } - else { - $min_gnu_indentation += $standard_increment; - } - $available_spaces = $space_count - $min_gnu_indentation; - - if ( $available_spaces < 0 ) { - $space_count = $min_gnu_indentation; - $available_spaces = 0; - } - $align_seqno = $last_nonblank_seqno; - } - } - - #------------------------------------------- - # update the state, but not on a blank token - #------------------------------------------- - if ( $type ne 'b' ) { - - if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { - $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1); - $in_lp_mode = 1; - } - - #---------------------------------------- - # Create indentation object if in lp-mode - #---------------------------------------- - ++$max_lp_stack; - my $lp_object; - if ($in_lp_mode) { - - # A negative level implies not to store the item in the - # item_list - my $lp_item_index = 0; - if ( $level >= 0 ) { - $lp_item_index = ++$max_lp_object_list; - } - - my $K_begin_line = 0; - if ( $ii_begin_line >= 0 - && $ii_begin_line <= $max_index_to_go ) - { - $K_begin_line = $K_to_go[$ii_begin_line]; - } - - # Minor Fix: when creating indentation at a side - # comment we don't know what the space to the actual - # next code token will be. We will allow a space for - # sub correct_lp to move it in if necessary. - if ( $type eq '#' - && $max_index_to_go > 0 - && $align_seqno ) - { - $available_spaces += 1; - } - - $lp_object = Perl::Tidy::IndentationItem->new( - spaces => $space_count, - level => $level, - ci_level => $ci_level, - available_spaces => $available_spaces, - lp_item_index => $lp_item_index, - align_seqno => $align_seqno, - stack_depth => $max_lp_stack, - K_begin_line => $K_begin_line, - standard_spaces => $standard_spaces, - ); + $space_count = $rLP_top->[_lp_space_count_]; + } + $stack_changed = 0; + } - DEBUG_LP && do { - my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_]; - print STDERR <= 0 ) { - $rlp_object_list->[$max_lp_object_list] = - $lp_object; - } + # If we come to an opening token after an '=' token of some + # type, see if it would be helpful to 'break' after the '=' to + # save space + my $ii_last_equals = $last_lp_equals{$total_depth}; + if ($ii_last_equals) { + $self->lp_equals_break_check( $ii, $ii_last_equals ); + } + } - ##if ( $last_nonblank_token =~ /^[\{\[\(]$/ - if ( $is_opening_token{$last_nonblank_token} - && $last_nonblank_seqno ) - { - $rlp_object_by_seqno->{$last_nonblank_seqno} = - $lp_object; - } - } + #------------------------ + # Handle decreasing depth + #------------------------ + # Note that one token may have both decreasing and then increasing + # depth. For example, (level, ci) can go from (1,1) to (2,0). So, + # in this example we would first go back to (1,0) then up to (2,0) + # in a single call. + if ( $level < $current_level || $ci_level < $current_ci_level ) { + $self->lp_decreasing_depth($ii); + } - #------------------------------------ - # Store this indentation on the stack - #------------------------------------ - $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level; - $rLP->[$max_lp_stack]->[_lp_level_] = $level; - $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object; - $rLP->[$max_lp_stack]->[_lp_container_seqno_] = - $last_nonblank_seqno; - $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count; - - # If the opening paren is beyond the half-line length, then - # we will use the minimum (standard) indentation. This will - # help avoid problems associated with running out of space - # near the end of a line. As a result, in deeply nested - # lists, there will be some indentations which are limited - # to this minimum standard indentation. But the most deeply - # nested container will still probably be able to shift its - # parameters to the right for proper alignment, so in most - # cases this will not be noticeable. - if ( $available_spaces > 0 && $lp_object ) { - my $halfway = - $maximum_line_length_at_level[$level] - - $rOpts_maximum_line_length / 2; - $lp_object->tentatively_decrease_available_spaces( - $available_spaces) - if ( $space_count > $halfway ); - } - } - } ## end increasing depth + #------------------------ + # handle increasing depth + #------------------------ + if ( $level > $current_level || $ci_level > $current_ci_level ) { + $self->lp_increasing_depth($ii); + } #------------------ # Handle all tokens @@ -21757,73 +22957,75 @@ EOM # this token might start a new line if .. if ( + $ii > $ii_begin_line - # this is the first nonblank token of the line - $ii == 1 && $types_to_go[0] eq 'b' + && ( - # or previous character was one of these: - # /^([\:\?\,f])$/ - || $hash_test2{$last_nonblank_type} + # this is the first nonblank token of the line + $ii == 1 && $types_to_go[0] eq 'b' - # or previous character was opening and this is not closing - || ( $last_nonblank_type eq '{' && $type ne '}' ) - || ( $last_nonblank_type eq '(' and $type ne ')' ) + # or previous character was one of these: + # /^([\:\?\,f])$/ + || $hash_test2{$last_nonblank_type} - # or this token is one of these: - # /^([\.]|\|\||\&\&)$/ - || $hash_test3{$type} + # or previous character was opening and this is not + # closing + || ( $last_nonblank_type eq '{' && $type ne '}' ) + || ( $last_nonblank_type eq '(' and $type ne ')' ) - # or this is a closing structure - || ( $last_nonblank_type eq '}' - && $last_nonblank_token eq $last_nonblank_type ) + # or this token is one of these: + # /^([\.]|\|\||\&\&)$/ + || $hash_test3{$type} - # or previous token was keyword 'return' - || ( - $last_nonblank_type eq 'k' - && ( $last_nonblank_token eq 'return' - && $type ne '{' ) - ) + # or this is a closing structure + || ( $last_nonblank_type eq '}' + && $last_nonblank_token eq $last_nonblank_type ) + + # or previous token was keyword 'return' + || ( + $last_nonblank_type eq 'k' + && ( $last_nonblank_token eq 'return' + && $type ne '{' ) + ) - # or starting a new line at certain keywords is fine - || ( $type eq 'k' - && $is_if_unless_and_or_last_next_redo_return{$token} ) + # or starting a new line at certain keywords is fine + || ( $type eq 'k' + && $is_if_unless_and_or_last_next_redo_return{ + $token} ) - # or this is after an assignment after a closing structure - || ( - $is_assignment{$last_nonblank_type} - && ( - # /^[\}\)\]]$/ - $hash_test1{$last_last_nonblank_type} + # or this is after an assignment after a closing + # structure + || ( + $is_assignment{$last_nonblank_type} + && ( + # /^[\}\)\]]$/ + $hash_test1{$last_last_nonblank_type} - # and it is significantly to the right - || $lp_position_predictor > ( - $maximum_line_length_at_level[$level] - - $rOpts_maximum_line_length / 2 + # and it is significantly to the right + || $lp_position_predictor > ( + $maximum_line_length_at_level[$level] - + $rOpts_maximum_line_length / 2 + ) ) ) ) ) { - check_for_long_gnu_style_lines( $ii, $rlp_object_list ); + check_for_long_gnu_style_lines($ii); $ii_begin_line = $ii; # back up 1 token if we want to break before that type # otherwise, we may strand tokens like '?' or ':' on a line if ( $ii_begin_line > 0 ) { - if ( $last_nonblank_type eq 'k' ) { - - if ( $want_break_before{$last_nonblank_token} ) { - $ii_begin_line--; - } - } - elsif ( $want_break_before{$last_nonblank_type} ) { - $ii_begin_line--; - } + my $wbb = + $last_nonblank_type eq 'k' + ? $want_break_before{$last_nonblank_token} + : $want_break_before{$last_nonblank_type}; + $ii_begin_line-- if ($wbb); } - } ## end if ( $ii == 1 && $types_to_go...) - - $K_last_nonblank = $KK; + } + $K_last_nonblank = $K_to_go[$ii]; $last_last_nonblank_type = $last_nonblank_type; $last_nonblank_type = $type; $last_nonblank_token = $token; @@ -21876,19 +23078,561 @@ EOM } } ## end loop over all tokens in this batch - undo_incomplete_lp_indentation($rlp_object_list) + undo_incomplete_lp_indentation() if ( !$rOpts_extended_line_up_parentheses ); return; } ## end sub set_lp_indentation + sub lp_equals_break_check { + + my ( $self, $ii, $ii_last_equals ) = @_; + + # If we come to an opening token after an '=' token of some + # type, see if it would be helpful to 'break' after the '=' to + # save space. + + # Given: + # $ii = index of an opening token in the output batch + # $ii_begin_line = index of token starting next output line + # Update: + # $lp_position_predictor - updated position predictor + # $ii_begin_line = updated starting token index + + # Skip an empty set of parens, such as after channel(): + # my $exchange = $self->_channel()->exchange( + # This fixes issues b1318 b1322 b1323 b1328 + my $is_empty_container; + if ( $ii_last_equals && $ii < $max_index_to_go ) { + my $seqno = $type_sequence_to_go[$ii]; + my $inext_nb = $ii + 1; + $inext_nb++ + if ( $types_to_go[$inext_nb] eq 'b' ); + my $seqno_nb = $type_sequence_to_go[$inext_nb]; + $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno; + } + + if ( $ii_last_equals + && $ii_last_equals > $ii_begin_line + && !$is_empty_container ) + { + + my $seqno = $type_sequence_to_go[$ii]; + + # find the position if we break at the '=' + my $i_test = $ii_last_equals; + + # Fix for issue b1229, check if want break before this token + # Fix for issue b1356, if i_test is a blank, the leading spaces may + # be incorrect (if it was an interline blank). + # Fix for issue b1357 .. b1370, i_test must be prev nonblank + # ( the ci value for blanks can vary ) + # See also case b223 + # Fix for issue b1371-b1374 : all of these and the above are fixed + # by simply backing up one index and setting the leading spaces of + # a blank equal to that of the equals. + if ( $want_break_before{ $types_to_go[$i_test] } ) { + $i_test -= 1; + $leading_spaces_to_go[$i_test] = + $leading_spaces_to_go[$ii_last_equals] + if ( $types_to_go[$i_test] eq 'b' ); + } + elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } + + my $test_position = total_line_length( $i_test, $ii ); + my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ]; + + #------------------------------------------------------ + # Break if structure will reach the maximum line length + #------------------------------------------------------ + + # Historically, -lp just used one-half line length here + my $len_increase = $rOpts_maximum_line_length / 2; + + # For -xlp, we can also use the pre-computed lengths + my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno}; + if ( $min_len && $min_len > $len_increase ) { + $len_increase = $min_len; + } + + if ( + + # if we might exceed the maximum line length + $lp_position_predictor + $len_increase > $mll + + # if a -bbx flag WANTS a break before this opening token + || ( $seqno + && $self->[_rbreak_before_container_by_seqno_]->{$seqno} ) + + # or we are beyond the 1/4 point and there was an old + # break at an assignment (not '=>') [fix for b1035] + || ( + $lp_position_predictor > + $mll - $rOpts_maximum_line_length * 3 / 4 + && $types_to_go[$ii_last_equals] ne '=>' + && ( + $old_breakpoint_to_go[$ii_last_equals] + || ( $ii_last_equals > 0 + && $old_breakpoint_to_go[ $ii_last_equals - 1 ] ) + || ( $ii_last_equals > 1 + && $types_to_go[ $ii_last_equals - 1 ] eq 'b' + && $old_breakpoint_to_go[ $ii_last_equals - 2 ] ) + ) + ) + ) + { + + # then make the switch -- note that we do not set a + # real breakpoint here because we may not really need + # one; sub break_lists will do that if necessary. + + my $Kc = $self->[_K_closing_container_]->{$seqno}; + if ( + + # For -lp, only if the closing token is in this + # batch (c117). Otherwise it cannot be done by sub + # break_lists. + defined($Kc) && $Kc <= $K_to_go[$max_index_to_go] + + # For -xlp, we only need one nonblank token after + # the opening token. + || $rOpts_extended_line_up_parentheses + ) + { + $ii_begin_line = $i_test + 1; + $lp_position_predictor = $test_position; + + #-------------------------------------------------- + # Fix for an opening container terminating a batch: + #-------------------------------------------------- + # To get alignment of a -lp container with its + # contents, we have to put a break after $i_test. + # For $ii<$max_index_to_go, this will be done by + # sub break_lists based on the indentation object. + # But for $ii=$max_index_to_go, the indentation + # object for this seqno will not be created until + # the next batch, so we have to set a break at + # $i_test right now in order to get one. + if ( $ii == $max_index_to_go + && !$block_type_to_go[$ii] + && $types_to_go[$ii] eq '{' + && $seqno + && !$self->[_ris_excluded_lp_container_]->{$seqno} ) + { + $self->set_forced_lp_break( $ii_begin_line, $ii ); + } + } + } + } + return; + } ## end sub lp_equals_break_check + + sub lp_decreasing_depth { + my ( $self, $ii ) = @_; + + my $rLL = $self->[_rLL_]; + + my $level = $levels_to_go[$ii]; + my $ci_level = $ci_levels_to_go[$ii]; + + # loop to find the first entry at or completely below this level + while (1) { + + # Be sure we have not hit the stack bottom - should never + # happen because only negative levels can get here, and + # $level was forced to be positive above. + if ( !$max_lp_stack ) { + + # non-fatal, just keep going except in DEVEL_MODE + if (DEVEL_MODE) { + Fault(<[$max_lp_stack]->[_lp_object_] ) { + my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_]; + + $lp_object->set_closed($ii); + + my $comma_count = 0; + my $arrow_count = 0; + my $type = $types_to_go[$ii]; + if ( $type eq '}' || $type eq ')' ) { + my $total_depth = $nesting_depth_to_go[$ii]; + $comma_count = $lp_comma_count{$total_depth}; + $arrow_count = $lp_arrow_count{$total_depth}; + $comma_count = 0 unless $comma_count; + $arrow_count = 0 unless $arrow_count; + } + + $lp_object->set_comma_count($comma_count); + $lp_object->set_arrow_count($arrow_count); + + # Undo any extra indentation if we saw no commas + my $available_spaces = $lp_object->get_available_spaces(); + my $K_start = $lp_object->get_K_begin_line(); + + if ( $available_spaces > 0 + && $K_start >= $K_to_go[0] + && ( $comma_count <= 0 || $arrow_count > 0 ) ) + { + + my $i = $lp_object->get_lp_item_index(); + + # Safety check for a valid stack index. It + # should be ok because we just checked that the + # index K of the token associated with this + # indentation is in this batch. + if ( $i < 0 || $i > $max_lp_object_list ) { + my $KK = $K_to_go[$ii]; + my $lno = $rLL->[$KK]->[_LINE_INDEX_]; + DEVEL_MODE && Fault(<=0 and <= max=$max_lp_object_list +EOM + last; + } + + if ( $arrow_count == 0 ) { + $rlp_object_list->[$i] + ->permanently_decrease_available_spaces( + $available_spaces); + } + else { + $rlp_object_list->[$i] + ->tentatively_decrease_available_spaces( + $available_spaces); + } + foreach my $j ( $i + 1 .. $max_lp_object_list ) { + $rlp_object_list->[$j] + ->decrease_SPACES($available_spaces); + } + } + } + + # go down one level + --$max_lp_stack; + + my $rLP_top = $rLP->[$max_lp_stack]; + my $ci_lev = $rLP_top->[_lp_ci_level_]; + my $lev = $rLP_top->[_lp_level_]; + my $spaces = $rLP_top->[_lp_space_count_]; + if ( $rLP_top->[_lp_object_] ) { + my $lp_obj = $rLP_top->[_lp_object_]; + ( $spaces, $lev, $ci_lev ) = + @{ $lp_obj->get_spaces_level_ci() }; + } + + # stop when we reach a level at or below the current + # level + if ( $lev <= $level && $ci_lev <= $ci_level ) { + $space_count = $spaces; + $current_level = $lev; + $current_ci_level = $ci_lev; + last; + } + } + return; + } ## end sub lp_decreasing_depth + + sub lp_increasing_depth { + my ( $self, $ii ) = @_; + + my $rLL = $self->[_rLL_]; + + my $type = $types_to_go[$ii]; + my $level = $levels_to_go[$ii]; + my $ci_level = $ci_levels_to_go[$ii]; + + $stack_changed = 1; + + # Compute the standard incremental whitespace. This will be + # the minimum incremental whitespace that will be used. This + # choice results in a smooth transition between the gnu-style + # and the standard style. + my $standard_increment = + ( $level - $current_level ) * $rOpts_indent_columns + + ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; + + # Now we have to define how much extra incremental space + # ("$available_space") we want. This extra space will be + # reduced as necessary when long lines are encountered or when + # it becomes clear that we do not have a good list. + my $available_spaces = 0; + my $align_seqno = 0; + my $K_extra_space; + + my $last_nonblank_seqno; + my $last_nonblank_block_type; + if ( defined($K_last_nonblank) ) { + $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_]; + $last_nonblank_block_type = + $last_nonblank_seqno + ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno} + : undef; + } + + $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_]; + + #----------------------------------------------- + # Initialize indentation spaces on empty stack.. + #----------------------------------------------- + if ( $max_lp_stack == 0 ) { + $space_count = $level * $rOpts_indent_columns; + } + + #---------------------------------------- + # Add the standard space increment if ... + #---------------------------------------- + elsif ( + + # if this is a BLOCK, add the standard increment + $last_nonblank_block_type + + # or if this is not a sequenced item + || !$last_nonblank_seqno + + # or this container is excluded by user rules + # or contains here-docs or multiline qw text + || defined($last_nonblank_seqno) + && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno} + + # or if last nonblank token was not structural indentation + || $last_nonblank_type ne '{' + + # and do not start -lp under stress .. fixes b1244, b1255 + || !$in_lp_mode && $level >= $high_stress_level + + ) + { + + # If we have entered lp mode, use the top lp object to get + # the current indentation spaces because it may have + # changed. Fixes b1285, b1286. + if ($in_lp_mode) { + $space_count = $in_lp_mode->get_spaces(); + } + $space_count += $standard_increment; + } + + #--------------------------------------------------------------- + # -lp mode: try to use space to the first non-blank level change + #--------------------------------------------------------------- + else { + + # see how much space we have available + my $test_space_count = $lp_position_predictor; + my $excess = 0; + my $min_len = + $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno}; + my $next_opening_too_far; + + if ( defined($min_len) ) { + $excess = + $test_space_count + + $min_len - + $maximum_line_length_at_level[$level]; + if ( $excess > 0 ) { + $test_space_count -= $excess; + + # will the next opening token be a long way out? + $next_opening_too_far = + $lp_position_predictor + $excess > + $maximum_line_length_at_level[$level]; + } + } + + my $rLP_top = $rLP->[$max_lp_stack]; + my $min_gnu_indentation = $rLP_top->[_lp_space_count_]; + if ( $rLP_top->[_lp_object_] ) { + $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces(); + } + $available_spaces = $test_space_count - $min_gnu_indentation; + + # Do not startup -lp indentation mode if no space ... + # ... or if it puts the opening far to the right + if ( !$in_lp_mode + && ( $available_spaces <= 0 || $next_opening_too_far ) ) + { + $space_count += $standard_increment; + $available_spaces = 0; + } + + # Use -lp mode + else { + $space_count = $test_space_count; + + $in_lp_mode = 1; + if ( $available_spaces >= $standard_increment ) { + $min_gnu_indentation += $standard_increment; + } + elsif ( $available_spaces > 1 ) { + $min_gnu_indentation += $available_spaces + 1; + + # The "+1" space can cause mis-alignment if there is no + # blank space between the opening paren and the next + # nonblank token (i.e., -pt=2) and the container does not + # get broken open. So we will mark this token for later + # space removal by sub 'xlp_tweak' if this container + # remains intact (issue git #106). + if ( + $type ne 'b' + + # Skip if the maximum line length is exceeded here + && $excess <= 0 + + # This is only for level changes, not ci level changes. + # But note: this test is here out of caution but I have + # not found a case where it is actually necessary. + && $is_opening_token{$last_nonblank_token} + + # Be sure we are at consecutive nonblanks. This test + # should be true, but it guards against future coding + # changes to level values assigned to blank spaces. + && $ii > 0 + && $types_to_go[ $ii - 1 ] ne 'b' + + ) + { + $K_extra_space = $K_to_go[$ii]; + } + } + elsif ( $is_opening_token{$last_nonblank_token} ) { + if ( ( $tightness{$last_nonblank_token} < 2 ) ) { + $min_gnu_indentation += 2; + } + else { + $min_gnu_indentation += 1; + } + } + else { + $min_gnu_indentation += $standard_increment; + } + $available_spaces = $space_count - $min_gnu_indentation; + + if ( $available_spaces < 0 ) { + $space_count = $min_gnu_indentation; + $available_spaces = 0; + } + $align_seqno = $last_nonblank_seqno; + } + } + + #------------------------------------------- + # update the state, but not on a blank token + #------------------------------------------- + if ( $type ne 'b' ) { + + if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { + $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1); + $in_lp_mode = 1; + } + + #---------------------------------------- + # Create indentation object if in lp-mode + #---------------------------------------- + ++$max_lp_stack; + my $lp_object; + if ($in_lp_mode) { + + # A negative level implies not to store the item in the + # item_list + my $lp_item_index = 0; + if ( $level >= 0 ) { + $lp_item_index = ++$max_lp_object_list; + } + + my $K_begin_line = 0; + if ( $ii_begin_line >= 0 + && $ii_begin_line <= $max_index_to_go ) + { + $K_begin_line = $K_to_go[$ii_begin_line]; + } + + # Minor Fix: when creating indentation at a side + # comment we don't know what the space to the actual + # next code token will be. We will allow a space for + # sub correct_lp to move it in if necessary. + if ( $type eq '#' + && $max_index_to_go > 0 + && $align_seqno ) + { + $available_spaces += 1; + } + + my $standard_spaces = $leading_spaces_to_go[$ii]; + $lp_object = Perl::Tidy::IndentationItem->new( + spaces => $space_count, + level => $level, + ci_level => $ci_level, + available_spaces => $available_spaces, + lp_item_index => $lp_item_index, + align_seqno => $align_seqno, + stack_depth => $max_lp_stack, + K_begin_line => $K_begin_line, + standard_spaces => $standard_spaces, + K_extra_space => $K_extra_space, + ); + + DEBUG_LP && do { + my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_]; + my $token = $tokens_to_go[$ii]; + print STDERR <= 0 ) { + $rlp_object_list->[$max_lp_object_list] = $lp_object; + } + + if ( $is_opening_token{$last_nonblank_token} + && $last_nonblank_seqno ) + { + $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} = + $lp_object; + } + } + + #------------------------------------ + # Store this indentation on the stack + #------------------------------------ + $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level; + $rLP->[$max_lp_stack]->[_lp_level_] = $level; + $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object; + $rLP->[$max_lp_stack]->[_lp_container_seqno_] = + $last_nonblank_seqno; + $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count; + + # If the opening paren is beyond the half-line length, then + # we will use the minimum (standard) indentation. This will + # help avoid problems associated with running out of space + # near the end of a line. As a result, in deeply nested + # lists, there will be some indentations which are limited + # to this minimum standard indentation. But the most deeply + # nested container will still probably be able to shift its + # parameters to the right for proper alignment, so in most + # cases this will not be noticeable. + if ( $available_spaces > 0 && $lp_object ) { + my $halfway = + $maximum_line_length_at_level[$level] - + $rOpts_maximum_line_length / 2; + $lp_object->tentatively_decrease_available_spaces( + $available_spaces) + if ( $space_count > $halfway ); + } + } + return; + } ## end sub lp_increasing_depth + sub check_for_long_gnu_style_lines { # look at the current estimated maximum line length, and # remove some whitespace if it exceeds the desired maximum - my ( $mx_index_to_go, $rlp_object_list ) = @_; - - my $max_lp_object_list = @{$rlp_object_list} - 1; + my ($mx_index_to_go) = @_; # nothing can be done if no stack items defined for this line return if ( $max_lp_object_list < 0 ); @@ -21990,9 +23734,6 @@ EOM # was always done because it could cause problems otherwise, but recent # improvements allow fairly good results to be obtained by skipping # this step with the -xlp flag. - my ($rlp_object_list) = @_; - - my $max_lp_object_list = @{$rlp_object_list} - 1; # nothing to do if no stack items defined for this line return if ( $max_lp_object_list < 0 ); @@ -22176,31 +23917,28 @@ sub convey_batch_to_vertical_aligner { # have been defined. Here we prepare the lines for passing to the vertical # aligner. We do the following tasks: # - mark certain vertical alignment tokens, such as '=', in each line - # - make minor indentation adjustments + # - make final indentation adjustments # - do logical padding: insert extra blank spaces to help display certain # logical constructions + # - send the line to the vertical aligner + + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + my $this_batch = $self->[_this_batch_]; - my $this_batch = $self->[_this_batch_]; - my $ri_first = $this_batch->[_ri_first_]; - my $ri_last = $this_batch->[_ri_last_]; + my $do_not_pad = $this_batch->[_do_not_pad_]; + my $starting_in_quote = $this_batch->[_starting_in_quote_]; + my $ending_in_quote = $this_batch->[_ending_in_quote_]; + my $is_static_block_comment = $this_batch->[_is_static_block_comment_]; + my $batch_CODE_type = $this_batch->[_batch_CODE_type_]; + my $ri_first = $this_batch->[_ri_first_]; + my $ri_last = $this_batch->[_ri_last_]; $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE); my $n_last_line = @{$ri_first} - 1; - my $do_not_pad = $this_batch->[_do_not_pad_]; - my $peak_batch_size = $this_batch->[_peak_batch_size_]; - my $starting_in_quote = $this_batch->[_starting_in_quote_]; - my $ending_in_quote = $this_batch->[_ending_in_quote_]; - my $is_static_block_comment = $this_batch->[_is_static_block_comment_]; - my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_]; - my $batch_CODE_type = $this_batch->[_batch_CODE_type_]; - - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $ibeg_next = $ri_first->[0]; my $iend_next = $ri_last->[0]; @@ -22208,29 +23946,37 @@ sub convey_batch_to_vertical_aligner { my $type_end_next = $types_to_go[$iend_next]; my $token_beg_next = $tokens_to_go[$ibeg_next]; - my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#'; - my $rindentation_list = [0]; # ref to indentations for each line - my ( $cscw_block_comment, $closing_side_comment ); + my ( $cscw_block_comment, $closing_side_comment, $is_block_comment ); + + if ( !$max_index_to_go && $type_beg_next eq '#' ) { + $is_block_comment = 1; + } + if ($rOpts_closing_side_comments) { ( $closing_side_comment, $cscw_block_comment ) = $self->add_closing_side_comment( $ri_first, $ri_last ); } - # flush before a long if statement to avoid unwanted alignment - if ( $n_last_line > 0 - && $type_beg_next eq 'k' - && $is_if_unless{$token_beg_next} ) - { - $self->flush_vertical_aligner(); + if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) { + $self->undo_ci( $ri_first, $ri_last, + $this_batch->[_rix_seqno_controlling_ci_] ); } - $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci ) - if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ); + # for multi-line batches ... + if ( $n_last_line > 0 ) { + + # flush before a long if statement to avoid unwanted alignment + $self->flush_vertical_aligner() + if ( $type_beg_next eq 'k' + && $is_if_unless{$token_beg_next} ); + + $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote ) + if ($rOpts_logical_padding); - $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size, - $starting_in_quote ) - if ( $n_last_line > 0 && $rOpts_logical_padding ); + $self->xlp_tweak( $ri_first, $ri_last ) + if ($rOpts_extended_line_up_parentheses); + } if (DEVEL_MODE) { $self->check_batch_summed_lengths() } @@ -22243,7 +23989,7 @@ sub convey_batch_to_vertical_aligner { # ---------------------------------------------- # loop to send each line to the vertical aligner # ---------------------------------------------- - my ( $type_beg, $type_end, $token_beg ); + my ( $type_beg, $type_end, $token_beg, $ljump ); for my $n ( 0 .. $n_last_line ) { @@ -22279,10 +24025,8 @@ sub convey_batch_to_vertical_aligner { my $Kend_code = $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend; - # $ljump is a level jump needed by 'sub final_indentation_adjustment' - my $ljump = 0; - - # Get some vars on line [n+1], if any: + # Get some vars on line [n+1], if any, + # and define $ljump = level jump needed by 'sub get_final_indentation' if ( $n < $n_last_line ) { $ibeg_next = $ri_first->[ $n + 1 ]; $iend_next = $ri_last->[ $n + 1 ]; @@ -22311,6 +24055,9 @@ sub convey_batch_to_vertical_aligner { $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_]; } + else { + $ljump = 0; + } # --------------------------------------------- # get the vertical alignment info for this line @@ -22343,12 +24090,28 @@ EOM # -------------------------------------- # get the final indentation of this line # -------------------------------------- - my ( $indentation, $lev, $level_end, $terminal_type, - $terminal_block_type, $is_semicolon_terminated, $is_outdented_line ) - = $self->final_indentation_adjustment( $ibeg, $iend, $rfields, - $rpatterns, $ri_first, $ri_last, - $rindentation_list, $ljump, $starting_in_quote, - $is_static_block_comment, ); + my ( + + $indentation, + $lev, + $level_end, + $i_terminal, + $is_outdented_line, + + ) = $self->get_final_indentation( + + $ibeg, + $iend, + $rfields, + $rpatterns, + $ri_first, + $ri_last, + $rindentation_list, + $ljump, + $starting_in_quote, + $is_static_block_comment, + + ); # -------------------------------- # define flag 'outdent_long_lines' @@ -22415,7 +24178,7 @@ EOM my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_]; if ($seqno_m) { - $block_type_m = $rblock_type_of_seqno->{$seqno_m}; + $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m}; } } @@ -22442,7 +24205,8 @@ EOM $rvao_args->{rvertical_tightness_flags} = $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last, $ending_in_quote, $closing_side_comment ) - if ( !$is_block_comment ); + unless ( $is_block_comment + || $self->[_no_vertical_tightness_flags_] ); # ---------------------------------- # define 'is_terminal_ternary' flag @@ -22467,6 +24231,7 @@ EOM my $is_terminal_ternary = 0; my $last_leading_type = $n > 0 ? $type_beg_last : ':'; + my $terminal_type = $types_to_go[$i_terminal]; if ( $terminal_type ne ';' && $n_last_line > $n && $level_end == $lev ) @@ -22573,7 +24338,7 @@ EOM # This flag tells the vertical aligner to reset the side comment # location if we are entering a new block from level 0. This is # intended to keep side comments from drifting too far to the right. - if ( $terminal_block_type + if ( $block_type_to_go[$i_terminal] && $nesting_depth_end > $nesting_depth_beg ) { my $level_adj = $lev; @@ -22608,41 +24373,43 @@ EOM $do_not_pad = 0; - # Set flag indicating if this line ends in an opening - # token and is very short, so that a blank line is not - # needed if the subsequent line is a comment. - # Examples of what we are looking for: - # { - # && ( - # BEGIN { - # default { - # sub { - $self->[_last_output_short_opening_token_] - - # line ends in opening token - # /^[\{\(\[L]$/ - = $is_opening_type{$type_end} - - # and either - && ( - # line has either single opening token - $Kend == $Kbeg - - # or is a single token followed by opening token. - # Note that sub identifiers have blanks like 'sub doit' - # $token_beg !~ /\s+/ - || ( $Kend - $Kbeg <= 2 && index( $token_beg, SPACE ) < 0 ) - ) + } ## end of loop to output each line - # and limit total to 10 character widths - && token_sequence_length( $ibeg, $iend ) <= 10; + # Set flag indicating if the last line ends in an opening + # token and is very short, so that a blank line is not + # needed if the subsequent line is a comment. + # Examples of what we are looking for: + # { + # && ( + # BEGIN { + # default { + # sub { + $self->[_last_output_short_opening_token_] + + # line ends in opening token + # /^[\{\(\[L]$/ + = $is_opening_type{$type_end} + + # and either + && ( + # line has either single opening token + $iend_next == $ibeg_next + + # or is a single token followed by opening token. + # Note that sub identifiers have blanks like 'sub doit' + # $token_beg !~ /\s+/ + || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 ) + ) - } ## end of loop to output each line + # and limit total to 10 character widths + && token_sequence_length( $ibeg_next, $iend_next ) <= 10; # remember indentation of lines containing opening containers for - # later use by sub final_indentation_adjustment - $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list ) - if ( !$is_block_comment ); + # later use by sub get_final_indentation + $self->save_opening_indentation( $ri_first, $ri_last, + $rindentation_list, $this_batch->[_runmatched_opening_indexes_] ) + if ( $this_batch->[_runmatched_opening_indexes_] + || $types_to_go[$max_index_to_go] eq 'q' ); # output any new -cscw block comment if ($cscw_block_comment) { @@ -22661,7 +24428,7 @@ sub check_batch_summed_lengths { # Verify that the summed lengths are correct. We want to be sure that # errors have not been introduced by programming changes. Summed lengths - # are defined in sub $store_token. Operations like padding and unmasking + # are defined in sub store_token. Operations like padding and unmasking # semicolons can change token lengths, but those operations are expected to # update the summed lengths when they make changes. So the summed lengths # should always be correct. @@ -22727,16 +24494,21 @@ EOM sub set_vertical_alignment_markers { - # This routine takes the first step toward vertical alignment of the - # lines of output text. It looks for certain tokens which can serve as - # vertical alignment markers (such as an '='). - # + my ( $self, $ri_first, $ri_last ) = @_; + + #---------------------------------------------------------------------- + # This routine looks at output lines for certain tokens which can serve + # as vertical alignment markers (such as an '='). + #---------------------------------------------------------------------- + + # Input parameters: + # $ri_first = ref to list of starting line indexes in _to_go arrays + # $ri_last = ref to list of ending line indexes in _to_go arrays + # Method: We look at each token $i in this output batch and set # $ralignment_type_to_go->[$i] equal to those tokens at which we would # accept vertical alignment. - my ( $self, $ri_first, $ri_last ) = @_; - my $ralignment_type_to_go; my $ralignment_counts = []; my $ralignment_hash_by_line = []; @@ -23036,25 +24808,25 @@ EOM $alignment_type = EMPTY_STRING; } - # For a paren after keyword, only align something like this: - # if ( $a ) { &a } - # elsif ( $b ) { &b } if ( $token eq '(' ) { - if ( $vert_last_nonblank_type eq 'k' ) { - $alignment_type = EMPTY_STRING - unless - $is_if_unless_elsif{$vert_last_nonblank_token}; - ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/; + # For a paren after keyword, only align if-like parens, + # such as: + # if ( $a ) { &a } + # elsif ( $b ) { &b } + # ^-------------------aligned parens + if ( $vert_last_nonblank_type eq 'k' + && !$is_if_unless_elsif{$vert_last_nonblank_token} ) + { + $alignment_type = EMPTY_STRING; } # Do not align a spaced-function-paren if requested. # Issue git #53, #73. if ( !$rOpts_function_paren_vertical_alignment ) { my $seqno = $type_sequence_to_go[$i]; - if ( $ris_function_call_paren->{$seqno} ) { - $alignment_type = EMPTY_STRING; - } + $alignment_type = EMPTY_STRING + if ( $ris_function_call_paren->{$seqno} ); } # make () align with qw in a 'use' statement (git #93) @@ -23063,6 +24835,11 @@ EOM && $mate_index_to_go[$i] == $i + 1 ) { $alignment_type = 'q'; + + ## Note on discussion git #101. We could make this + ## a separate type '()' to separate it from qw's: + ## $alignment_type = + ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()'; } } @@ -23089,18 +24866,12 @@ EOM # because it may occur in short blocks). elsif ( - # we haven't already set it - ##!$alignment_type - # previous token IS one of these: ( $vert_last_nonblank_type eq ',' || $vert_last_nonblank_type eq ';' ) - # and its not the first token of the line - ## && $i > $ibeg - # and it follows a blank && $types_to_go[ $i - 1 ] eq 'b' @@ -23190,8 +24961,16 @@ sub make_vertical_alignments { #--------------------------------------------------------- # Step 1: Define the alignment tokens for the entire batch #--------------------------------------------------------- - my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ) - = $self->set_vertical_alignment_markers( $ri_first, $ri_last ); + my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ); + + # We only need to make this call if vertical alignment of code is + # requested or if a line might have a side comment. + if ( $rOpts_valign_code + || $types_to_go[$max_index_to_go] eq '#' ) + { + ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ) + = $self->set_vertical_alignment_markers( $ri_first, $ri_last ); + } #---------------------------------------------- # Step 2: Break each line into alignment fields @@ -23254,7 +25033,6 @@ sub get_seqno { # Undo continuation indentation in certain sequences my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_; my ( $line_1, $line_2, $lev_last ); - my $this_line_is_semicolon_terminated; my $max_line = @{$ri_first} - 1; my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; @@ -23340,20 +25118,21 @@ sub get_seqno { # chain continues... # check for chain ending at end of a statement - if ( $line == $max_line ) { + my $is_semicolon_terminated = ( + $line == $max_line + && ( + $types_to_go[$iend] eq ';' - # see of this line ends a statement - $this_line_is_semicolon_terminated = - $types_to_go[$iend] eq ';' + # with possible side comment + || ( $types_to_go[$iend] eq '#' + && $iend - $ibeg >= 2 + && $types_to_go[ $iend - 2 ] eq ';' + && $types_to_go[ $iend - 1 ] eq 'b' ) + ) + ); - # with possible side comment - || ( $types_to_go[$iend] eq '#' - && $iend - $ibeg >= 2 - && $types_to_go[ $iend - 2 ] eq ';' - && $types_to_go[ $iend - 1 ] eq 'b' ); - } $line_2 = $line - if ($this_line_is_semicolon_terminated); + if ($is_semicolon_terminated); } else { @@ -23404,7 +25183,7 @@ sub get_seqno { # SECTION 2: Undo ci at cuddled blocks #------------------------------------- - # Note that sub final_indentation_adjustment will be called later to + # Note that sub get_final_indentation will be called later to # actually do this, but for now we will tentatively mark cuddled # lines with ci=0 so that the the -xci loop which follows will be # correct at cuddles. @@ -23421,7 +25200,14 @@ sub get_seqno { $terminal_type = $types_to_go[ $iend - 2 ]; } } - if ( $terminal_type eq '{' ) { + + # Patch for rt144979, part 2. Coordinated with part 1. + # Skip cuddled braces. + my $seqno_beg = $type_sequence_to_go[$ibeg]; + my $is_cuddled_closing_brace = $seqno_beg + && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg}; + + if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) { my $Kbeg = $K_to_go[$ibeg]; $ci_levels_to_go[$ibeg] = 0; } @@ -23491,8 +25277,7 @@ sub get_seqno { # &Error_OutOfRange; # } # - my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote ) - = @_; + my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_; my $max_line = @{$ri_first} - 1; my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces, @@ -23684,36 +25469,32 @@ sub get_seqno { # : $i == 2 ? ( "Then", "Rarity" ) # : ( "Then", "Name" ); - if ( $max_line > 1 ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $tokens_differ; - - # never indent line 1 of a '.' series because - # previous line is most likely at same level. - # TODO: we should also look at the leading_spaces - # of the last output line and skip if it is same - # as this line. - next if ( $leading_token eq '.' ); - - my $count = 1; - foreach my $l ( 2 .. 3 ) { - last if ( $line + $l > $max_line ); - my $ibeg_next_next = $ri_first->[ $line + $l ]; - if ( $tokens_to_go[$ibeg_next_next] ne - $leading_token ) - { - $tokens_differ = 1; - last; - } - $count++; - } - next if ($tokens_differ); - next if ( $count < 3 && $leading_token ne ':' ); - $ipad = $ibeg; - } - else { - next; + next if ( $max_line <= 1 ); + + my $leading_token = $tokens_to_go[$ibeg_next]; + my $tokens_differ; + + # never indent line 1 of a '.' series because + # previous line is most likely at same level. + # TODO: we should also look at the leading_spaces + # of the last output line and skip if it is same + # as this line. + next if ( $leading_token eq '.' ); + + my $count = 1; + foreach my $l ( 2 .. 3 ) { + last if ( $line + $l > $max_line ); + $count++; + my $ibeg_next_next = $ri_first->[ $line + $l ]; + next + if ( $tokens_to_go[$ibeg_next_next] eq + $leading_token ); + $tokens_differ = 1; + last; } + next if ($tokens_differ); + next if ( $count < 3 && $leading_token ne ':' ); + $ipad = $ibeg; } } } @@ -23752,26 +25533,10 @@ sub get_seqno { # an editor. In that case either the user will see and # fix the problem or it will be corrected next time the # entire file is processed with perltidy. + my $this_batch = $self->[_this_batch_]; + my $peak_batch_size = $this_batch->[_peak_batch_size_]; next if ( $ipad == 0 && $peak_batch_size <= 1 ); -## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT -## IT DID MORE HARM THAN GOOD -## ceil( -## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 -## / $upem -## ), -## # do not put leading padding for just 2 lines of math -## if ( $ipad == $ibeg -## && $line > 0 -## && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] -## && $is_math_op{$type_next} -## && $line + 2 <= $max_line ) -## { -## my $ibeg_next_next = $ri_first->[ $line + 2 ]; -## my $type_next_next = $types_to_go[$ibeg_next_next]; -## next if !$is_math_op{$type_next_next}; -## } - # next line must not be at greater depth my $iend_next = $ri_last->[ $line + 1 ]; next @@ -24002,6 +25767,9 @@ sub pad_token { $tok = SPACE x $pad_spaces . $tok; $tok_len += $pad_spaces; } + elsif ( $pad_spaces == 0 ) { + return; + } elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) { $tok = EMPTY_STRING; $tok_len = 0; @@ -24009,6 +25777,8 @@ sub pad_token { else { # shouldn't happen + DEVEL_MODE + && Fault("unexpected request for pad spaces = $pad_spaces\n"); return; } @@ -24024,6 +25794,64 @@ sub pad_token { return; } ## end sub pad_token +sub xlp_tweak { + + # Remove one indentation space from unbroken containers marked with + # 'K_extra_space'. These are mostly two-line lists with short names + # formatted with -xlp -pt=2. + # + # Before this fix (extra space in line 2): + # is($module->VERSION, $expected, + # "$main_module->VERSION matches $module->VERSION ($expected)"); + # + # After this fix: + # is($module->VERSION, $expected, + # "$main_module->VERSION matches $module->VERSION ($expected)"); + # + # Notes: + # - This fixes issue git #106 + # - This must be called after 'set_logical_padding'. + # - This is currently only applied to -xlp. It would also work for -lp + # but that style is essentially frozen. + + my ( $self, $ri_first, $ri_last ) = @_; + + # Must be 2 or more lines + return unless ( @{$ri_first} > 1 ); + + # Pull indentation object from start of second line + my $ibeg_1 = $ri_first->[1]; + my $lp_object = $leading_spaces_to_go[$ibeg_1]; + return if ( !ref($lp_object) ); + + # This only applies to an indentation object with a marked token + my $K_extra_space = $lp_object->get_K_extra_space(); + return unless ($K_extra_space); + + # Look for the marked token within the first line of this batch + my $ibeg_0 = $ri_first->[0]; + my $iend_0 = $ri_last->[0]; + my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0]; + return if ( $ii <= $ibeg_0 || $ii > $iend_0 ); + + # Skip padded tokens, they have already been aligned + my $tok = $tokens_to_go[$ii]; + return if ( substr( $tok, 0, 1 ) eq SPACE ); + + # Skip 'if'-like statements, this does not improve them + return + if ( $types_to_go[$ibeg_0] eq 'k' + && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } ); + + # Looks okay, reduce indentation by 1 space if possible + my $spaces = $lp_object->get_spaces(); + if ( $spaces > 0 ) { + $lp_object->decrease_SPACES(1); + } + + return; +} + { ## begin closure make_alignment_patterns my %keyword_map; @@ -24093,8 +25921,8 @@ sub pad_token { @{is_binary_type}{@q} = (1) x scalar(@q); # token keywords which prevent using leading word as a container name - @_ = qw(and or err eq ne cmp); - @is_binary_keyword{@_} = (1) x scalar(@_); + @q = qw(and or err eq ne cmp); + @is_binary_keyword{@q} = (1) x scalar(@q); # Some common function calls whose args can be aligned. These do not # give good alignments if the lengths differ significantly. @@ -24108,11 +25936,26 @@ sub pad_token { sub make_alignment_patterns { - # Here we do some important preliminary work for the - # vertical aligner. We create four arrays for one - # output line. These arrays contain strings that can - # be tested by the vertical aligner to see if - # consecutive lines can be aligned vertically. + my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count, + $ralignment_hash ) + = @_; + + #------------------------------------------------------------------ + # This sub creates arrays of vertical alignment info for one output + # line. + #------------------------------------------------------------------ + + # Input parameters: + # $ibeg, $iend - index range of this line in the _to_go arrays + # $ralignment_type_to_go - alignment type of tokens, like '=', if any + # $alignment_count - number of alignment tokens in the line + # $ralignment_hash - this contains all of the alignments for this + # line. It is not yet used but is available for future coding in + # case there is a need to do a preliminary scan of alignment tokens. + + # The arrays which are created contain strings that can be tested by + # the vertical aligner to see if consecutive lines can be aligned + # vertically. # # The four arrays are indexed on the vertical # alignment fields and are: @@ -24129,13 +25972,6 @@ sub pad_token { # allowed, even when the alignment tokens match. # @field_lengths - the display width of each field - my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count, - $ralignment_hash ) - = @_; - - # The var $ralignment_hash contains all of the alignments for this - # line. It is not yet used but is available for future coding in case - # there is a need to do a preliminary scan of the alignment tokens. if (DEVEL_MODE) { my $new_count = 0; if ( defined($ralignment_hash) ) { @@ -24177,6 +26013,8 @@ sub pad_token { my $i_start = $ibeg; my $depth = 0; + my $i_depth_prev = $i_start; + my $depth_prev = $depth; my %container_name = ( 0 => EMPTY_STRING ); my @tokens = (); @@ -24207,95 +26045,36 @@ sub pad_token { && !$is_my_local_our{ $tokens_to_go[$ibeg] } && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] ) { - - # Make a container name by combining all leading barewords, - # keywords and functions. - my $name = EMPTY_STRING; - my $count = 0; - my $count_max; - my $iname_end; - my $ilast_blank; - for ( $ibeg .. $iterm ) { - my $type = $types_to_go[$_]; - - if ( $type eq 'b' ) { - $ilast_blank = $_; - next; - } - - my $token = $tokens_to_go[$_]; - - # Give up if we find an opening paren, binary operator or - # comma within or after the proposed container name. - if ( $token eq '(' - || $is_binary_type{$type} - || $type eq 'k' && $is_binary_keyword{$token} ) - { - $name = EMPTY_STRING; - last; - } - - # The container name is only built of certain types: - last if ( !$is_kwU{$type} ); - - # Normally it is made of one word, but two words for 'use' - if ( $count == 0 ) { - if ( $type eq 'k' - && $is_use_like{ $tokens_to_go[$_] } ) - { - $count_max = 2; - } - else { - $count_max = 1; - } - } - elsif ( defined($count_max) && $count >= $count_max ) { - last; - } - - if ( defined( $name_map{$token} ) ) { - $token = $name_map{$token}; - } - - $name .= SPACE . $token; - $iname_end = $_; - $count++; - } - - # Require a space after the container name token(s) - if ( $name - && defined($ilast_blank) - && $ilast_blank > $iname_end ) - { - $name = substr( $name, 1 ); - $container_name{'0'} = $name; - } + $container_name{'0'} = + make_uncontained_comma_name( $iterm, $ibeg, $iend ); } } - # -------------------- - # Loop over all tokens - # -------------------- + #-------------------------------- + # Begin main loop over all tokens + #-------------------------------- my $j = 0; # field index $patterns[0] = EMPTY_STRING; my %token_count; for my $i ( $ibeg .. $iend ) { - # Keep track of containers balanced on this line only. + #------------------------------------------------------------- + # Part 1: keep track of containers balanced on this line only. + #------------------------------------------------------------- # These are used below to prevent unwanted cross-line alignments. # Unbalanced containers already avoid aligning across # container boundaries. - - my $type = $types_to_go[$i]; - my $token = $tokens_to_go[$i]; - my $depth_last = $depth; + my $type = $types_to_go[$i]; if ( $type_sequence_to_go[$i] ) { + my $token = $tokens_to_go[$i]; if ( $is_opening_token{$token} ) { # if container is balanced on this line... my $i_mate = $mate_index_to_go[$i]; if ( $i_mate > $i && $i_mate <= $iend ) { + $i_depth_prev = $i; + $depth_prev = $depth; $depth++; # Append the previous token name to make the container name @@ -24315,10 +26094,8 @@ sub pad_token { # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } ); # is_d( [ \$a, \$a ], [ \$b, \$c ] ); - my $name = $token; - if ( $token eq '(' ) { - $name = $self->make_paren_name($i); - } + my $name = + $token eq '(' ? $self->make_paren_name($i) : $token; # name cannot be '.', so change to something else if so if ( $name eq '.' ) { $name = 'dot' } @@ -24354,32 +26131,7 @@ sub pad_token { # if we are not aligning on this paren... if ( !$ralignment_type_to_go->[$i] ) { - # Sum length from previous alignment - my $len = token_sequence_length( $i_start, $i - 1 ); - - # Minor patch: do not include the length of any '!'. - # Otherwise, commas in the following line will not - # match - # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) ); - # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) ); - if ( grep { $_ eq '!' } - @types_to_go[ $i_start .. $i - 1 ] ) - { - $len -= 1; - } - - if ( $i_start == $ibeg ) { - - # For first token, use distance from start of - # line but subtract off the indentation due to - # level. Otherwise, results could vary with - # indentation. - $len += - leading_spaces_to_go($ibeg) - - $levels_to_go[$i_start] * - $rOpts_indent_columns; - if ( $len < 0 ) { $len = 0 } - } + my $len = length_tag( $i, $ibeg, $i_start ); # tack this length onto the container name to try # to make a unique token name @@ -24389,12 +26141,16 @@ sub pad_token { } ## end if ( $is_opening_token...) elsif ( $is_closing_type{$token} ) { + $i_depth_prev = $i; + $depth_prev = $depth; $depth-- if $depth > 0; } } ## end if ( $type_sequence_to_go...) - # if we find a new synchronization token, we are done with - # a field + #------------------------------------------------------------ + # Part 2: if we find a new synchronization token, we are done + # with a field + #------------------------------------------------------------ if ( $i > $i_start && $ralignment_type_to_go->[$i] ) { my $tok = my $raw_tok = $ralignment_type_to_go->[$i]; @@ -24415,6 +26171,7 @@ sub pad_token { # If we are at an opening token which increased depth, we have # to use the name from the previous depth. + my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth; my $depth_p = ( $depth_last < $depth ? $depth_last : $depth ); if ( $container_name{$depth_p} ) { @@ -24497,7 +26254,9 @@ sub pad_token { $patterns[$j] = EMPTY_STRING; } ## end if ( new synchronization token - # continue accumulating tokens + #----------------------------------------------- + # Part 3: continue accumulating the next pattern + #----------------------------------------------- # for keywords we have to use the actual text if ( $type eq 'k' ) { @@ -24568,15 +26327,18 @@ sub pad_token { # everything else else { $patterns[$j] .= $type; - } - # remove any zero-level name at first fat comma - if ( $depth == 0 && $type eq '=>' ) { - $container_name{$depth} = EMPTY_STRING; + # remove any zero-level name at first fat comma + if ( $depth == 0 && $type eq '=>' ) { + $container_name{$depth} = EMPTY_STRING; + } } + } ## end for my $i ( $ibeg .. $iend) - # done with this line .. join text of tokens to make the last field + #--------------------------------------------------------------- + # End of main loop .. join text of tokens to make the last field + #--------------------------------------------------------------- push( @fields, join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) ); push @field_lengths, @@ -24585,6 +26347,108 @@ sub pad_token { return [ \@tokens, \@fields, \@patterns, \@field_lengths ]; } ## end sub make_alignment_patterns + sub make_uncontained_comma_name { + my ( $iterm, $ibeg, $iend ) = @_; + + # Make a container name by combining all leading barewords, + # keywords and functions. + my $name = EMPTY_STRING; + my $count = 0; + my $count_max; + my $iname_end; + my $ilast_blank; + for ( $ibeg .. $iterm ) { + my $type = $types_to_go[$_]; + + if ( $type eq 'b' ) { + $ilast_blank = $_; + next; + } + + my $token = $tokens_to_go[$_]; + + # Give up if we find an opening paren, binary operator or + # comma within or after the proposed container name. + if ( $token eq '(' + || $is_binary_type{$type} + || $type eq 'k' && $is_binary_keyword{$token} ) + { + $name = EMPTY_STRING; + last; + } + + # The container name is only built of certain types: + last if ( !$is_kwU{$type} ); + + # Normally it is made of one word, but two words for 'use' + if ( $count == 0 ) { + if ( $type eq 'k' + && $is_use_like{ $tokens_to_go[$_] } ) + { + $count_max = 2; + } + else { + $count_max = 1; + } + } + elsif ( defined($count_max) && $count >= $count_max ) { + last; + } + + if ( defined( $name_map{$token} ) ) { + $token = $name_map{$token}; + } + + $name .= SPACE . $token; + $iname_end = $_; + $count++; + } + + # Require a space after the container name token(s) + if ( $name + && defined($ilast_blank) + && $ilast_blank > $iname_end ) + { + $name = substr( $name, 1 ); + } + return $name; + } ## end sub make_uncontained_comma_name + + sub length_tag { + + my ( $i, $ibeg, $i_start ) = @_; + + # Generate a line length to be used as a tag for rejecting bad + # alignments. The tag is the length of the line from the previous + # matching token, or beginning of line, to the function name. This + # will allow the vertical aligner to reject undesirable matches. + + # The basic method: sum length from previous alignment + my $len = token_sequence_length( $i_start, $i - 1 ); + + # Minor patch: do not include the length of any '!'. + # Otherwise, commas in the following line will not + # match + # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) ); + # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) ); + if ( grep { $_ eq '!' } @types_to_go[ $i_start .. $i - 1 ] ) { + $len -= 1; + } + + if ( $i_start == $ibeg ) { + + # For first token, use distance from start of + # line but subtract off the indentation due to + # level. Otherwise, results could vary with + # indentation. + $len += + leading_spaces_to_go($ibeg) - + $levels_to_go[$i_start] * $rOpts_indent_columns; + } + if ( $len < 0 ) { $len = 0 } + return $len; + } ## end sub length_tag + } ## end closure make_alignment_patterns sub make_paren_name { @@ -24617,23 +26481,40 @@ sub make_paren_name { return $name; } ## end sub make_paren_name -{ ## begin closure final_indentation_adjustment +{ ## begin closure get_final_indentation my ( $last_indentation_written, $last_unadjusted_indentation, $last_leading_token ); - sub initialize_final_indentation_adjustment { + sub initialize_get_final_indentation { $last_indentation_written = 0; $last_unadjusted_indentation = 0; $last_leading_token = EMPTY_STRING; return; } - sub final_indentation_adjustment { + sub get_final_indentation { - #-------------------------------------------------------------------- - # This routine sets the final indentation of a line in the Formatter. - #-------------------------------------------------------------------- + my ( + $self, # + + $ibeg, + $iend, + $rfields, + $rpatterns, + $ri_first, + $ri_last, + $rindentation_list, + $level_jump, + $starting_in_quote, + $is_static_block_comment, + + ) = @_; + + #-------------------------------------------------------------- + # This routine makes any necessary adjustments to get the final + # indentation of a line in the Formatter. + #-------------------------------------------------------------- # It starts with the basic indentation which has been defined for the # leading token, and then takes into account any options that the user @@ -24656,22 +26537,6 @@ sub make_paren_name { # undo_ci, which was processed earlier, so care has to be taken to # keep them coordinated. - my ( - $self, $ibeg, - $iend, $rfields, - $rpatterns, $ri_first, - $ri_last, $rindentation_list, - $level_jump, $starting_in_quote, - $is_static_block_comment, - ) = @_; - - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $ris_bli_container = $self->[_ris_bli_container_]; - my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; - my $rwant_reduced_ci = $self->[_rwant_reduced_ci_]; - my $rK_weld_left = $self->[_rK_weld_left_]; - # Find the last code token of this line my $i_terminal = $iend; my $terminal_type = $types_to_go[$iend]; @@ -24684,19 +26549,15 @@ sub make_paren_name { } } - my $terminal_block_type = $block_type_to_go[$i_terminal]; - my $is_outdented_line = 0; + my $is_outdented_line; my $type_beg = $types_to_go[$ibeg]; my $token_beg = $tokens_to_go[$ibeg]; - my $block_type_beg = $block_type_to_go[$ibeg]; my $level_beg = $levels_to_go[$ibeg]; + my $block_type_beg = $block_type_to_go[$ibeg]; my $leading_spaces_beg = $leading_spaces_to_go[$ibeg]; - my $K_beg = $K_to_go[$ibeg]; my $seqno_beg = $type_sequence_to_go[$ibeg]; - my $ibeg_weld_fix = $ibeg; my $is_closing_type_beg = $is_closing_type{$type_beg}; - my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0; # QW INDENTATION PATCH 3: my $seqno_qw_closing; @@ -24724,7 +26585,7 @@ sub make_paren_name { # } # - # MOJO: Set a flag if this lines begins with ')->' + # MOJO patch: Set a flag if this lines begins with ')->' my $leading_paren_arrow = ( $is_closing_type_beg && $token_beg eq ')' @@ -24748,661 +26609,787 @@ sub make_paren_name { # 2 - vertically align with opening token # 3 - indent #--------------------------------------------------------- + my $adjust_indentation = 0; - my $default_adjust_indentation = $adjust_indentation; + my $default_adjust_indentation = 0; + # Parameters needed for option 2, aligning with opening token: my ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ); - # Honor any flag to reduce -ci set by the -bbxi=n option - if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) { + #------------------------------------- + # Section 1A: + # if line starts with a sequenced item + #------------------------------------- + if ( $seqno_beg || $seqno_qw_closing ) { + + # This can be tedious so we let a sub do it + ( + $adjust_indentation, + $default_adjust_indentation, + $opening_indentation, + $opening_offset, + $is_leading, + $opening_exists, + + ) = $self->get_closing_token_indentation( + + $ibeg, + $iend, + $ri_first, + $ri_last, + $rindentation_list, + $level_jump, + $i_terminal, + $is_semicolon_terminated, + $seqno_qw_closing, + + ); + } + + #-------------------------------------------------------- + # Section 1B: + # if at ');', '};', '>;', and '];' of a terminal qw quote + #-------------------------------------------------------- + elsif ( + substr( $rpatterns->[0], 0, 2 ) eq 'qb' + && substr( $rfields->[0], -1, 1 ) eq ';' + ## $rpatterns->[0] =~ /^qb*;$/ + && $rfields->[0] =~ /^([\)\}\]\>]);$/ + ) + { + if ( $closing_token_indentation{$1} == 0 ) { + $adjust_indentation = 1; + } + else { + $adjust_indentation = 3; + } + } + + #--------------------------------------------------------- + # Section 2: set indentation according to flag set above + # + # Select the indentation object to define leading + # whitespace. If we are outdenting something like '} } );' + # then we want to use one level below the last token + # ($i_terminal) in order to get it to fully outdent through + # all levels. + #--------------------------------------------------------- + my $indentation; + my $lev; + my $level_end = $levels_to_go[$iend]; + + #------------------------------------ + # Section 2A: adjust_indentation == 0 + # No change in indentation + #------------------------------------ + if ( $adjust_indentation == 0 ) { + $indentation = $leading_spaces_beg; + $lev = $level_beg; + } + + #------------------------------------------------------------------- + # Secton 2B: adjust_indentation == 1 + # Change the indentation to be that of a different token on the line + #------------------------------------------------------------------- + elsif ( $adjust_indentation == 1 ) { + + # Previously, the indentation of the terminal token was used: + # OLD CODING: + # $indentation = $reduced_spaces_to_go[$i_terminal]; + # $lev = $levels_to_go[$i_terminal]; + + # Generalization for MOJO patch: + # Use the lowest level indentation of the tokens on the line. + # For example, here we can use the indentation of the ending ';': + # } until ($selection > 0 and $selection < 10); # ok to use ';' + # But this will not outdent if we use the terminal indentation: + # )->then( sub { # use indentation of the ->, not the { + # Warning: reduced_spaces_to_go[] may be a reference, do not + # do numerical checks with it + + my $i_ind = $ibeg; + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + while ( $i_ind < $i_terminal ) { + $i_ind++; + if ( $levels_to_go[$i_ind] < $lev ) { + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + } + } + } + + #-------------------------------------------------------------- + # Secton 2C: adjust_indentation == 2 + # Handle indented closing token which aligns with opening token + #-------------------------------------------------------------- + elsif ( $adjust_indentation == 2 ) { + + # handle option to align closing token with opening token + $lev = $level_beg; + + # calculate spaces needed to align with opening token + my $space_count = + get_spaces($opening_indentation) + $opening_offset; + + # Indent less than the previous line. + # + # Problem: For -lp we don't exactly know what it was if there + # were recoverable spaces sent to the aligner. A good solution + # would be to force a flush of the vertical alignment buffer, so + # that we would know. For now, this rule is used for -lp: + # + # When the last line did not start with a closing token we will + # be optimistic that the aligner will recover everything wanted. + # + # This rule will prevent us from breaking a hierarchy of closing + # tokens, and in a worst case will leave a closing paren too far + # indented, but this is better than frequently leaving it not + # indented enough. + my $last_spaces = get_spaces($last_indentation_written); - # if this is an opening, it must be alone on the line ... - if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) { - $adjust_indentation = 1; + if ( ref($last_indentation_written) + && !$is_closing_token{$last_leading_token} ) + { + $last_spaces += + get_recoverable_spaces($last_indentation_written); } - # ... or a single welded unit (fix for b1173) - elsif ($total_weld_count) { - my $Kterm = $K_to_go[$i_terminal]; - my $Kterm_test = $rK_weld_left->{$Kterm}; - if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) { - $Kterm = $Kterm_test; - } - if ( $Kterm == $K_beg ) { $adjust_indentation = 1 } - } - } + # reset the indentation to the new space count if it works + # only options are all or none: nothing in-between looks good + $lev = $level_beg; - # Update the $is_bli flag as we go. It is initially 1. - # We note seeing a leading opening brace by setting it to 2. - # If we get to the closing brace without seeing the opening then we - # turn it off. This occurs if the opening brace did not get output - # at the start of a line, so we will then indent the closing brace - # in the default way. - if ( $is_bli_beg && $is_bli_beg == 1 ) { - my $K_opening_container = $self->[_K_opening_container_]; - my $K_opening = $K_opening_container->{$seqno_beg}; - if ( $K_beg eq $K_opening ) { - $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2; + my $diff = $last_spaces - $space_count; + if ( $diff > 0 ) { + $indentation = $space_count; } - else { $is_bli_beg = 0 } - } + else { - # QW PATCH for the combination -lp -wn - # For -lp formatting use $ibeg_weld_fix to get around the problem - # that with -lp type formatting the opening and closing tokens to not - # have sequence numbers. - if ( $seqno_qw_closing && $total_weld_count ) { - my $i_plus = $inext_to_go[$ibeg]; - if ( $i_plus <= $max_index_to_go ) { - my $K_plus = $K_to_go[$i_plus]; - if ( defined( $rK_weld_left->{$K_plus} ) ) { - $ibeg_weld_fix = $i_plus; + # We need to fix things ... but there is no good way to do it. + # The best solution is for the user to use a longer maximum + # line length. We could get a smooth variation if we just move + # the paren in using + # $space_count -= ( 1 - $diff ); + # But unfortunately this can give a rather unbalanced look. + + # For -xlp we currently allow a tolerance of one indentation + # level and then revert to a simpler default. This will jump + # suddenly but keeps a balanced look. + if ( $rOpts_extended_line_up_parentheses + && $diff >= -$rOpts_indent_columns + && $space_count > $leading_spaces_beg ) + { + $indentation = $space_count; + } + + # Otherwise revert to defaults + elsif ( $default_adjust_indentation == 0 ) { + $indentation = $leading_spaces_beg; + } + elsif ( $default_adjust_indentation == 1 ) { + $indentation = $reduced_spaces_to_go[$i_terminal]; + $lev = $levels_to_go[$i_terminal]; } } } - # if we are at a closing token of some type.. - if ( $is_closing_type_beg || $seqno_qw_closing ) { - - # get the indentation of the line containing the corresponding - # opening token - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first, - $ri_last, $rindentation_list, $seqno_qw_closing ); + #------------------------------------------------------------- + # Secton 2D: adjust_indentation == 3 + # Full indentation of closing tokens (-icb and -icp or -cti=2) + #------------------------------------------------------------- + else { - my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal); + # handle -icb (indented closing code block braces) + # Updated method for indented block braces: indent one full level if + # there is no continuation indentation. This will occur for major + # structures such as sub, if, else, but not for things like map + # blocks. + # + # Note: only code blocks without continuation indentation are + # handled here (if, else, unless, ..). In the following snippet, + # the terminal brace of the sort block will have continuation + # indentation as shown so it will not be handled by the coding + # here. We would have to undo the continuation indentation to do + # this, but it probably looks ok as is. This is a possible future + # update for semicolon terminated lines. + # + # if ($sortby eq 'date' or $sortby eq 'size') { + # @files = sort { + # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} + # or $a cmp $b + # } @files; + # } + # + if ( $block_type_beg + && $ci_levels_to_go[$i_terminal] == 0 ) + { + my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); + $indentation = $spaces + $rOpts_indent_columns; - # First set the default behavior: - if ( + # NOTE: for -lp we could create a new indentation object, but + # there is probably no need to do it + } - # default behavior is to outdent closing lines - # of the form: "); }; ]; )->xxx;" - $is_semicolon_terminated + # handle -icp and any -icb block braces which fall through above + # test such as the 'sort' block mentioned above. + else { - # and 'cuddled parens' of the form: ")->pack(" - # Bug fix for RT #123749]: the types here were - # incorrectly '(' and ')'. Corrected to be '{' and '}' - || ( - $terminal_type eq '{' - && $type_beg eq '}' - && ( $nesting_depth_to_go[$iend] + 1 == - $nesting_depth_to_go[$ibeg] ) - ) + # There are currently two ways to handle -icp... + # One way is to use the indentation of the previous line: + # $indentation = $last_indentation_written; - # remove continuation indentation for any line like - # } ... { - # or without ending '{' and unbalanced, such as - # such as '}->{$operator}' - || ( - $type_beg eq '}' + # The other way is to use the indentation that the previous line + # would have had if it hadn't been adjusted: + $indentation = $last_unadjusted_indentation; - && ( $types_to_go[$iend] eq '{' - || $levels_to_go[$iend] < $level_beg ) - ) + # Current method: use the minimum of the two. This avoids + # inconsistent indentation. + if ( get_spaces($last_indentation_written) < + get_spaces($indentation) ) + { + $indentation = $last_indentation_written; + } + } - # and when the next line is at a lower indentation level... + # use previous indentation but use own level + # to cause list to be flushed properly + $lev = $level_beg; + } - # PATCH #1: and only if the style allows undoing continuation - # for all closing token types. We should really wait until - # the indentation of the next line is known and then make - # a decision, but that would require another pass. + #------------------------------------------------------------- + # Remember indentation except for multi-line quotes, which get + # no indentation + #------------------------------------------------------------- + if ( !( $ibeg == 0 && $starting_in_quote ) ) { + $last_indentation_written = $indentation; + $last_unadjusted_indentation = $leading_spaces_beg; + $last_leading_token = $token_beg; - # PATCH #2: and not if this token is under -xci control - || ( $level_jump < 0 - && !$some_closing_token_indentation - && !$rseqno_controlling_my_ci->{$K_beg} ) + # Patch to make a line which is the end of a qw quote work with the + # -lp option. Make $token_beg look like a closing token as some + # type even if it is not. This variable will become + # $last_leading_token at the end of this loop. Then, if the -lp + # style is selected, and the next line is also a + # closing token, it will not get more indentation than this line. + # We need to do this because qw quotes (at present) only get + # continuation indentation, not one level of indentation, so we + # need to turn off the -lp indentation. - # Patch for -wn=2, multiple welded closing tokens - || ( $i_terminal > $ibeg - && $is_closing_type{ $types_to_go[$iend] } ) + # ... a picture is worth a thousand words: - # Alternate Patch for git #51, isolated closing qw token not - # outdented if no-delete-old-newlines is set. This works, but - # a more general patch elsewhere fixes the real problem: ljump. - # || ( $seqno_qw_closing && $ibeg == $i_terminal ) + # perltidy -wn -gnu (Without this patch): + # ok(defined( + # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 + # 2981014)]) + # )); - ) + # perltidy -wn -gnu (With this patch): + # ok(defined( + # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 + # 2981014)]) + # )); + if ( $seqno_qw_closing + && ( length($token_beg) > 1 || $token_beg eq '>' ) ) { - $adjust_indentation = 1; + $last_leading_token = ')'; } + } - # outdent something like '),' - if ( - $terminal_type eq ',' + #--------------------------------------------------------------------- + # Rule: lines with leading closing tokens should not be outdented more + # than the line which contained the corresponding opening token. + #--------------------------------------------------------------------- - # Removed this constraint for -wn - # OLD: allow just one character before the comma - # && $i_terminal == $ibeg + 1 + # Updated per bug report in alex_bug.pl: we must not + # mess with the indentation of closing logical braces, so + # we must treat something like '} else {' as if it were + # an isolated brace + my $is_isolated_block_brace = $block_type_beg + && ( $i_terminal == $ibeg + || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg} + ); - # require LIST environment; otherwise, we may outdent too much - - # this can happen in calls without parentheses (overload.t); - && $terminal_is_in_list - ) - { - $adjust_indentation = 1; - } + # only do this for a ':; which is aligned with its leading '?' + my $is_unaligned_colon = $type_beg eq ':' && !$is_leading; - # undo continuation indentation of a terminal closing token if - # it is the last token before a level decrease. This will allow - # a closing token to line up with its opening counterpart, and - # avoids an indentation jump larger than 1 level. - if ( $i_terminal == $ibeg - && $is_closing_type_beg - && defined($K_beg) - && $K_beg < $Klimit ) - { - my $K_plus = $K_beg + 1; - my $type_plus = $rLL->[$K_plus]->[_TYPE_]; + if ( + defined($opening_indentation) + && !$leading_paren_arrow # MOJO patch + && !$is_isolated_block_brace + && !$is_unaligned_colon + ) + { + if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { + $indentation = $opening_indentation; + } + } - if ( $type_plus eq 'b' && $K_plus < $Klimit ) { - $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; - } + #---------------------------------------------------- + # remember the indentation of each line of this batch + #---------------------------------------------------- + push @{$rindentation_list}, $indentation; - if ( $type_plus eq '#' && $K_plus < $Klimit ) { - $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; - if ( $type_plus eq 'b' && $K_plus < $Klimit ) { - $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; - } + #--------------------------------------------- + # outdent lines with certain leading tokens... + #--------------------------------------------- + if ( - # Note: we have skipped past just one comment (perhaps a - # side comment). There could be more, and we could easily - # skip past all the rest with the following code, or with a - # while loop. It would be rare to have to do this, and - # those block comments would still be indented, so it would - # to leave them indented. So it seems best to just stop at - # a maximum of one comment. - ##if ($type_plus eq '#') { - ## $K_plus = $self->K_next_code($K_plus); - ##} - } + # must be first word of this batch + $ibeg == 0 - if ( !$is_bli_beg && defined($K_plus) ) { - my $lev = $level_beg; - my $level_next = $rLL->[$K_plus]->[_LEVEL_]; + # and ... + && ( - # and do not undo ci if it was set by the -xci option - $adjust_indentation = 1 - if ( $level_next < $lev - && !$rseqno_controlling_my_ci->{$K_beg} ); - } + # certain leading keywords if requested + $rOpts_outdent_keywords + && $type_beg eq 'k' + && $outdent_keyword{$token_beg} - # Patch for RT #96101, in which closing brace of anonymous subs - # was not outdented. We should look ahead and see if there is - # a level decrease at the next token (i.e., a closing token), - # but right now we do not have that information. For now - # we see if we are in a list, and this works well. - # See test files 'sub*.t' for good test cases. - if ( $terminal_is_in_list - && !$rOpts_indent_closing_brace - && $block_type_beg - && $block_type_beg =~ /$ASUB_PATTERN/ ) - { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = $self->get_opening_indentation( $ibeg, $ri_first, - $ri_last, $rindentation_list ); - my $indentation = $leading_spaces_beg; - if ( defined($opening_indentation) - && get_spaces($indentation) > - get_spaces($opening_indentation) ) - { - $adjust_indentation = 1; - } - } - } + # or labels if requested + || $rOpts_outdent_labels && $type_beg eq 'J' - # YVES patch 1 of 2: - # Undo ci of line with leading closing eval brace, - # but not beyond the indentation of the line with - # the opening brace. - if ( - $block_type_beg eq 'eval' - ##&& !$rOpts_line_up_parentheses - && !ref($leading_spaces_beg) - && !$rOpts_indent_closing_brace - ) - { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - my $indentation = $leading_spaces_beg; - if ( defined($opening_indentation) - && get_spaces($indentation) > - get_spaces($opening_indentation) ) - { - $adjust_indentation = 1; + # or static block comments if requested + || $is_static_block_comment + && $rOpts_outdent_static_block_comments + ) + ) + { + my $space_count = leading_spaces_to_go($ibeg); + if ( $space_count > 0 ) { + $space_count -= $rOpts_continuation_indentation; + $is_outdented_line = 1; + if ( $space_count < 0 ) { $space_count = 0 } + + # do not promote a spaced static block comment to non-spaced; + # this is not normally necessary but could be for some + # unusual user inputs (such as -ci = -i) + if ( $type_beg eq '#' && $space_count == 0 ) { + $space_count = 1; } + + $indentation = $space_count; } + } - # patch for issue git #40: -bli setting has priority - $adjust_indentation = 0 if ($is_bli_beg); + return ( - $default_adjust_indentation = $adjust_indentation; + $indentation, + $lev, + $level_end, + $i_terminal, + $is_outdented_line, - # Now modify default behavior according to user request: - # handle option to indent non-blocks of the form ); }; ]; - # But don't do special indentation to something like ')->pack(' - if ( !$block_type_beg ) { + ); + } ## end sub get_final_indentation - # Note that logical padding has already been applied, so we may - # need to remove some spaces to get a valid hash key. - my $tok = $token_beg; - my $cti = $closing_token_indentation{$tok}; + sub get_closing_token_indentation { - # Fix the value of 'cti' for an isolated non-welded closing qw - # delimiter. - if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) { + # Determine indentation adjustment for a line with a leading closing + # token - i.e. one of these: ) ] } : - # A quote delimiter which is not a container will not have - # a cti value defined. In this case use the style of a - # paren. For example - # my @fars = ( - # qw< - # far - # farfar - # farfars-far - # >, - # ); - if ( !defined($cti) && length($tok) == 1 ) { + my ( + $self, # + + $ibeg, + $iend, + $ri_first, + $ri_last, + $rindentation_list, + $level_jump, + $i_terminal, + $is_semicolon_terminated, + $seqno_qw_closing, - # something other than ')', '}', ']' ; use flag for ')' - $cti = $closing_token_indentation{')'}; + ) = @_; - # But for now, do not outdent non-container qw - # delimiters because it would would change existing - # formatting. - if ( $tok ne '>' ) { $cti = 3 } - } + my $adjust_indentation = 0; + my $default_adjust_indentation = $adjust_indentation; + my $terminal_type = $types_to_go[$i_terminal]; - # A non-welded closing qw cannot currently use -cti=1 - # because that option requires a sequence number to find - # the opening indentation, and qw quote delimiters are not - # sequenced items. - if ( defined($cti) && $cti == 1 ) { $cti = 0 } - } + my $type_beg = $types_to_go[$ibeg]; + my $token_beg = $tokens_to_go[$ibeg]; + my $level_beg = $levels_to_go[$ibeg]; + my $block_type_beg = $block_type_to_go[$ibeg]; + my $leading_spaces_beg = $leading_spaces_to_go[$ibeg]; + my $seqno_beg = $type_sequence_to_go[$ibeg]; + my $is_closing_type_beg = $is_closing_type{$type_beg}; - if ( !defined($cti) ) { + my ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ); - # $cti may not be defined for several reasons. - # -padding may have been applied so the character - # has a length > 1 - # - we may have welded to a closing quote token. - # Here is an example (perltidy -wn): - # __PACKAGE__->load_components( qw( - # > Core - # > - # > ) ); - $adjust_indentation = 0; + # Honor any flag to reduce -ci set by the -bbxi=n option + if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) { - } - elsif ( $cti == 1 ) { - if ( $i_terminal <= $ibeg + 1 - || $is_semicolon_terminated ) - { - $adjust_indentation = 2; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 2 ) { - if ($is_semicolon_terminated) { - $adjust_indentation = 3; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 3 ) { - $adjust_indentation = 3; - } + # if this is an opening, it must be alone on the line ... + if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) { + $adjust_indentation = 1; } - # handle option to indent blocks - else { - if ( - $rOpts_indent_closing_brace - && ( - $i_terminal == $ibeg # isolated terminal '}' - || $is_semicolon_terminated - ) - ) # } xxxx ; - { - $adjust_indentation = 3; + # ... or a single welded unit (fix for b1173) + elsif ($total_weld_count) { + my $K_beg = $K_to_go[$ibeg]; + my $Kterm = $K_to_go[$i_terminal]; + my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm}; + if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) { + $Kterm = $Kterm_test; } + if ( $Kterm == $K_beg ) { $adjust_indentation = 1 } } } - # if at ');', '};', '>;', and '];' of a terminal qw quote - elsif ( - substr( $rpatterns->[0], 0, 2 ) eq 'qb' - && substr( $rfields->[0], -1, 1 ) eq ';' - ##&& $rpatterns->[0] =~ /^qb*;$/ - && $rfields->[0] =~ /^([\)\}\]\>]);$/ - ) - { - if ( $closing_token_indentation{$1} == 0 ) { - $adjust_indentation = 1; + my $ris_bli_container = $self->[_ris_bli_container_]; + my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0; + + # Update the $is_bli flag as we go. It is initially 1. + # We note seeing a leading opening brace by setting it to 2. + # If we get to the closing brace without seeing the opening then we + # turn it off. This occurs if the opening brace did not get output + # at the start of a line, so we will then indent the closing brace + # in the default way. + if ( $is_bli_beg && $is_bli_beg == 1 ) { + my $K_opening_container = $self->[_K_opening_container_]; + my $K_opening = $K_opening_container->{$seqno_beg}; + my $K_beg = $K_to_go[$ibeg]; + if ( $K_beg eq $K_opening ) { + $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2; } - else { - $adjust_indentation = 3; + else { $is_bli_beg = 0 } + } + + # QW PATCH for the combination -lp -wn + # For -lp formatting use $ibeg_weld_fix to get around the problem + # that with -lp type formatting the opening and closing tokens to not + # have sequence numbers. + my $ibeg_weld_fix = $ibeg; + if ( $seqno_qw_closing && $total_weld_count ) { + my $i_plus = $inext_to_go[$ibeg]; + if ( $i_plus <= $max_index_to_go ) { + my $K_plus = $K_to_go[$i_plus]; + if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) { + $ibeg_weld_fix = $i_plus; + } } } - # if line begins with a ':', align it with any - # previous line leading with corresponding ? - elsif ( $type_beg eq ':' ) { + # if we are at a closing token of some type.. + if ( $is_closing_type_beg || $seqno_qw_closing ) { + + my $K_beg = $K_to_go[$ibeg]; + + # get the indentation of the line containing the corresponding + # opening token ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) - = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - if ($is_leading) { $adjust_indentation = 2; } - } - - #--------------------------------------------------------- - # Section 2: set indentation according to flag set above - # - # Select the indentation object to define leading - # whitespace. If we are outdenting something like '} } );' - # then we want to use one level below the last token - # ($i_terminal) in order to get it to fully outdent through - # all levels. - #--------------------------------------------------------- - my $indentation; - my $lev; - my $level_end = $levels_to_go[$iend]; - - if ( $adjust_indentation == 0 ) { - $indentation = $leading_spaces_beg; - $lev = $level_beg; - } - elsif ( $adjust_indentation == 1 ) { + = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first, + $ri_last, $rindentation_list, $seqno_qw_closing ); - # Change the indentation to be that of a different token on the line - # Previously, the indentation of the terminal token was used: - # OLD CODING: - # $indentation = $reduced_spaces_to_go[$i_terminal]; - # $lev = $levels_to_go[$i_terminal]; + # Patch for rt144979, part 1. Coordinated with part 2. + # Do not undo ci for a cuddled closing brace control; it + # needs to be treated exactly the same ci as an isolated + # closing brace. + my $is_cuddled_closing_brace = $seqno_beg + && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg}; - # Generalization for MOJO: - # Use the lowest level indentation of the tokens on the line. - # For example, here we can use the indentation of the ending ';': - # } until ($selection > 0 and $selection < 10); # ok to use ';' - # But this will not outdent if we use the terminal indentation: - # )->then( sub { # use indentation of the ->, not the { - # Warning: reduced_spaces_to_go[] may be a reference, do not - # do numerical checks with it + # First set the default behavior: + if ( - my $i_ind = $ibeg; - $indentation = $reduced_spaces_to_go[$i_ind]; - $lev = $levels_to_go[$i_ind]; - while ( $i_ind < $i_terminal ) { - $i_ind++; - if ( $levels_to_go[$i_ind] < $lev ) { - $indentation = $reduced_spaces_to_go[$i_ind]; - $lev = $levels_to_go[$i_ind]; - } - } - } + # default behavior is to outdent closing lines + # of the form: "); }; ]; )->xxx;" + $is_semicolon_terminated - # handle indented closing token which aligns with opening token - elsif ( $adjust_indentation == 2 ) { + # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT + # #123749]: the TYPES here were incorrectly ')' and '('. The + # corrected TYPES are '}' and '{'. But skip a cuddled block. + || ( + $terminal_type eq '{' + && $type_beg eq '}' + && ( $nesting_depth_to_go[$iend] + 1 == + $nesting_depth_to_go[$ibeg] ) + && !$is_cuddled_closing_brace + ) - # handle option to align closing token with opening token - $lev = $level_beg; + # remove continuation indentation for any line like + # } ... { + # or without ending '{' and unbalanced, such as + # such as '}->{$operator}' + || ( + $type_beg eq '}' - # calculate spaces needed to align with opening token - my $space_count = - get_spaces($opening_indentation) + $opening_offset; + && ( $types_to_go[$iend] eq '{' + || $levels_to_go[$iend] < $level_beg ) - # Indent less than the previous line. - # - # Problem: For -lp we don't exactly know what it was if there - # were recoverable spaces sent to the aligner. A good solution - # would be to force a flush of the vertical alignment buffer, so - # that we would know. For now, this rule is used for -lp: - # - # When the last line did not start with a closing token we will - # be optimistic that the aligner will recover everything wanted. - # - # This rule will prevent us from breaking a hierarchy of closing - # tokens, and in a worst case will leave a closing paren too far - # indented, but this is better than frequently leaving it not - # indented enough. - my $last_spaces = get_spaces($last_indentation_written); + # but not if a cuddled block + && !$is_cuddled_closing_brace + ) - if ( ref($last_indentation_written) - && !$is_closing_token{$last_leading_token} ) - { - $last_spaces += - get_recoverable_spaces($last_indentation_written); - } + # and when the next line is at a lower indentation level... - # reset the indentation to the new space count if it works - # only options are all or none: nothing in-between looks good - $lev = $level_beg; + # PATCH #1: and only if the style allows undoing continuation + # for all closing token types. We should really wait until + # the indentation of the next line is known and then make + # a decision, but that would require another pass. - my $diff = $last_spaces - $space_count; - if ( $diff > 0 ) { - $indentation = $space_count; - } - else { + # PATCH #2: and not if this token is under -xci control + || ( $level_jump < 0 + && !$some_closing_token_indentation + && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} ) - # We need to fix things ... but there is no good way to do it. - # The best solution is for the user to use a longer maximum - # line length. We could get a smooth variation if we just move - # the paren in using - # $space_count -= ( 1 - $diff ); - # But unfortunately this can give a rather unbalanced look. + # Patch for -wn=2, multiple welded closing tokens + || ( $i_terminal > $ibeg + && $is_closing_type{ $types_to_go[$iend] } ) - # For -xlp we currently allow a tolerance of one indentation - # level and then revert to a simpler default. This will jump - # suddenly but keeps a balanced look. - if ( $rOpts_extended_line_up_parentheses - && $diff >= -$rOpts_indent_columns - && $space_count > $leading_spaces_beg ) - { - $indentation = $space_count; - } + # Alternate Patch for git #51, isolated closing qw token not + # outdented if no-delete-old-newlines is set. This works, but + # a more general patch elsewhere fixes the real problem: ljump. + # || ( $seqno_qw_closing && $ibeg == $i_terminal ) - # Otherwise revert to defaults - elsif ( $default_adjust_indentation == 0 ) { - $indentation = $leading_spaces_beg; - } - elsif ( $default_adjust_indentation == 1 ) { - $indentation = $reduced_spaces_to_go[$i_terminal]; - $lev = $levels_to_go[$i_terminal]; - } + ) + { + $adjust_indentation = 1; } - } - # Full indentation of closing tokens (-icb and -icp or -cti=2) - else { + # outdent something like '),' + if ( + $terminal_type eq ',' - # handle -icb (indented closing code block braces) - # Updated method for indented block braces: indent one full level if - # there is no continuation indentation. This will occur for major - # structures such as sub, if, else, but not for things like map - # blocks. - # - # Note: only code blocks without continuation indentation are - # handled here (if, else, unless, ..). In the following snippet, - # the terminal brace of the sort block will have continuation - # indentation as shown so it will not be handled by the coding - # here. We would have to undo the continuation indentation to do - # this, but it probably looks ok as is. This is a possible future - # update for semicolon terminated lines. - # - # if ($sortby eq 'date' or $sortby eq 'size') { - # @files = sort { - # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} - # or $a cmp $b - # } @files; - # } - # - if ( $block_type_beg - && $ci_levels_to_go[$i_terminal] == 0 ) - { - my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); - $indentation = $spaces + $rOpts_indent_columns; + # Removed this constraint for -wn + # OLD: allow just one character before the comma + # && $i_terminal == $ibeg + 1 - # NOTE: for -lp we could create a new indentation object, but - # there is probably no need to do it + # require LIST environment; otherwise, we may outdent too much - + # this can happen in calls without parentheses (overload.t); + && $self->is_in_list_by_i($i_terminal) + ) + { + $adjust_indentation = 1; } - # handle -icp and any -icb block braces which fall through above - # test such as the 'sort' block mentioned above. - else { - - # There are currently two ways to handle -icp... - # One way is to use the indentation of the previous line: - # $indentation = $last_indentation_written; - - # The other way is to use the indentation that the previous line - # would have had if it hadn't been adjusted: - $indentation = $last_unadjusted_indentation; + # undo continuation indentation of a terminal closing token if + # it is the last token before a level decrease. This will allow + # a closing token to line up with its opening counterpart, and + # avoids an indentation jump larger than 1 level. + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + if ( $i_terminal == $ibeg + && $is_closing_type_beg + && defined($K_beg) + && $K_beg < $Klimit ) + { + my $K_plus = $K_beg + 1; + my $type_plus = $rLL->[$K_plus]->[_TYPE_]; - # Current method: use the minimum of the two. This avoids - # inconsistent indentation. - if ( get_spaces($last_indentation_written) < - get_spaces($indentation) ) - { - $indentation = $last_indentation_written; + if ( $type_plus eq 'b' && $K_plus < $Klimit ) { + $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; } - } - - # use previous indentation but use own level - # to cause list to be flushed properly - $lev = $level_beg; - } - # remember indentation except for multi-line quotes, which get - # no indentation - unless ( $ibeg == 0 && $starting_in_quote ) { - $last_indentation_written = $indentation; - $last_unadjusted_indentation = $leading_spaces_beg; - $last_leading_token = $token_beg; + if ( $type_plus eq '#' && $K_plus < $Klimit ) { + $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; + if ( $type_plus eq 'b' && $K_plus < $Klimit ) { + $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; + } - # Patch to make a line which is the end of a qw quote work with the - # -lp option. Make $token_beg look like a closing token as some - # type even if it is not. This variable will become - # $last_leading_token at the end of this loop. Then, if the -lp - # style is selected, and the next line is also a - # closing token, it will not get more indentation than this line. - # We need to do this because qw quotes (at present) only get - # continuation indentation, not one level of indentation, so we - # need to turn off the -lp indentation. + # Note: we have skipped past just one comment (perhaps a + # side comment). There could be more, and we could easily + # skip past all the rest with the following code, or with a + # while loop. It would be rare to have to do this, and + # those block comments would still be indented, so it would + # to leave them indented. So it seems best to just stop at + # a maximum of one comment. + ##if ($type_plus eq '#') { + ## $K_plus = $self->K_next_code($K_plus); + ##} + } - # ... a picture is worth a thousand words: + if ( !$is_bli_beg && defined($K_plus) ) { + my $lev = $level_beg; + my $level_next = $rLL->[$K_plus]->[_LEVEL_]; - # perltidy -wn -gnu (Without this patch): - # ok(defined( - # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 - # 2981014)]) - # )); + # and do not undo ci if it was set by the -xci option + $adjust_indentation = 1 + if ( $level_next < $lev + && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} ); + } - # perltidy -wn -gnu (With this patch): - # ok(defined( - # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 - # 2981014)]) - # )); - if ( $seqno_qw_closing - && ( length($token_beg) > 1 || $token_beg eq '>' ) ) + # Patch for RT #96101, in which closing brace of anonymous subs + # was not outdented. We should look ahead and see if there is + # a level decrease at the next token (i.e., a closing token), + # but right now we do not have that information. For now + # we see if we are in a list, and this works well. + # See test files 'sub*.t' for good test cases. + if ( !$rOpts_indent_closing_brace + && $block_type_beg + && $self->[_ris_asub_block_]->{$seqno_beg} + && $self->is_in_list_by_i($i_terminal) ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = $self->get_opening_indentation( $ibeg, $ri_first, + $ri_last, $rindentation_list ); + my $indentation = $leading_spaces_beg; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } + } + } + + # YVES patch 1 of 2: + # Undo ci of line with leading closing eval brace, + # but not beyond the indentation of the line with + # the opening brace. + if ( $block_type_beg eq 'eval' + && !ref($leading_spaces_beg) + && !$rOpts_indent_closing_brace ) { - $last_leading_token = ')'; + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_beg; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } } - } - # be sure lines with leading closing tokens are not outdented more - # than the line which contained the corresponding opening token. + # patch for issue git #40: -bli setting has priority + $adjust_indentation = 0 if ($is_bli_beg); - #-------------------------------------------------------- - # updated per bug report in alex_bug.pl: we must not - # mess with the indentation of closing logical braces so - # we must treat something like '} else {' as if it were - # an isolated brace - #-------------------------------------------------------- - my $is_isolated_block_brace = $block_type_beg - && ( $i_terminal == $ibeg - || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg} - ); + $default_adjust_indentation = $adjust_indentation; - # only do this for a ':; which is aligned with its leading '?' - my $is_unaligned_colon = $type_beg eq ':' && !$is_leading; + # Now modify default behavior according to user request: + # handle option to indent non-blocks of the form ); }; ]; + # But don't do special indentation to something like ')->pack(' + if ( !$block_type_beg ) { - if ( - defined($opening_indentation) - && !$leading_paren_arrow # MOJO - && !$is_isolated_block_brace - && !$is_unaligned_colon - ) - { - if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { - $indentation = $opening_indentation; - } - } + # Note that logical padding has already been applied, so we may + # need to remove some spaces to get a valid hash key. + my $tok = $token_beg; + my $cti = $closing_token_indentation{$tok}; - # remember the indentation of each line of this batch - push @{$rindentation_list}, $indentation; + # Fix the value of 'cti' for an isolated non-welded closing qw + # delimiter. + if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) { - # outdent lines with certain leading tokens... - if ( + # A quote delimiter which is not a container will not have + # a cti value defined. In this case use the style of a + # paren. For example + # my @fars = ( + # qw< + # far + # farfar + # farfars-far + # >, + # ); + if ( !defined($cti) && length($tok) == 1 ) { - # must be first word of this batch - $ibeg == 0 + # something other than ')', '}', ']' ; use flag for ')' + $cti = $closing_token_indentation{')'}; - # and ... - && ( + # But for now, do not outdent non-container qw + # delimiters because it would would change existing + # formatting. + if ( $tok ne '>' ) { $cti = 3 } + } - # certain leading keywords if requested - $rOpts_outdent_keywords - && $type_beg eq 'k' - && $outdent_keyword{$token_beg} + # A non-welded closing qw cannot currently use -cti=1 + # because that option requires a sequence number to find + # the opening indentation, and qw quote delimiters are not + # sequenced items. + if ( defined($cti) && $cti == 1 ) { $cti = 0 } + } - # or labels if requested - || $rOpts_outdent_labels && $type_beg eq 'J' + if ( !defined($cti) ) { - # or static block comments if requested - || $is_static_block_comment - && $rOpts_outdent_static_block_comments - ) - ) - { - my $space_count = leading_spaces_to_go($ibeg); - if ( $space_count > 0 ) { - $space_count -= $rOpts_continuation_indentation; - $is_outdented_line = 1; - if ( $space_count < 0 ) { $space_count = 0 } + # $cti may not be defined for several reasons. + # -padding may have been applied so the character + # has a length > 1 + # - we may have welded to a closing quote token. + # Here is an example (perltidy -wn): + # __PACKAGE__->load_components( qw( + # > Core + # > + # > ) ); + $adjust_indentation = 0; - # do not promote a spaced static block comment to non-spaced; - # this is not normally necessary but could be for some - # unusual user inputs (such as -ci = -i) - if ( $type_beg eq '#' && $space_count == 0 ) { - $space_count = 1; } + elsif ( $cti == 1 ) { + if ( $i_terminal <= $ibeg + 1 + || $is_semicolon_terminated ) + { + $adjust_indentation = 2; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 2 ) { + if ($is_semicolon_terminated) { + $adjust_indentation = 3; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 3 ) { + $adjust_indentation = 3; + } + } - $indentation = $space_count; + # handle option to indent blocks + else { + if ( + $rOpts_indent_closing_brace + && ( + $i_terminal == $ibeg # isolated terminal '}' + || $is_semicolon_terminated + ) + ) # } xxxx ; + { + $adjust_indentation = 3; + } } + } ## end if ( $is_closing_type_beg || $seqno_qw_closing ) + + # if line begins with a ':', align it with any + # previous line leading with corresponding ? + elsif ( $type_beg eq ':' ) { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + if ($is_leading) { $adjust_indentation = 2; } } - return ( $indentation, $lev, $level_end, $terminal_type, - $terminal_block_type, $is_semicolon_terminated, - $is_outdented_line ); - } ## end sub final_indentation_adjustment -} ## end closure final_indentation_adjustment + return ( + + $adjust_indentation, + $default_adjust_indentation, + $opening_indentation, + $opening_offset, + $is_leading, + $opening_exists, + + ); + } +} ## end closure get_final_indentation sub get_opening_indentation { @@ -25419,7 +27406,7 @@ sub get_opening_indentation { # $rindentation_list - reference to a list containing the indentation # used for each line. # $qw_seqno - optional sequence number to use if normal seqno not defined - # (TODO: would be more general to just look this up from index i) + # (NOTE: would be more general to just look this up from index i) # # return: # -the indentation of the line which contained the opening token @@ -25451,6 +27438,61 @@ sub get_opening_indentation { return ( $indent, $offset, $is_leading, $exists ); } ## end sub get_opening_indentation +sub examine_vertical_tightness_flags { + my ($self) = @_; + + # For efficiency, we will set a flag to skip all calls to sub + # 'set_vertical_tightness_flags' if vertical tightness is not possible with + # the user input parameters. If vertical tightness is possible, we will + # simply leave the flag undefined and return. + + # Vertical tightness is never possible with --freeze-whitespace + if ($rOpts_freeze_whitespace) { + $self->[_no_vertical_tightness_flags_] = 1; + return; + } + + # This sub is coordinated with sub set_vertical_tightness_flags. + # The Section numbers in the following comments are the sections + # in sub set_vertical_tightness_flags: + + # Examine controls for Section 1a: + return if ($rOpts_line_up_parentheses); + + foreach my $key ( keys %opening_vertical_tightness ) { + return if ( $opening_vertical_tightness{$key} ); + } + + # Examine controls for Section 1b: + foreach my $key ( keys %closing_vertical_tightness ) { + return if ( $closing_vertical_tightness{$key} ); + } + + # Examine controls for Section 1c: + foreach my $key ( keys %opening_token_right ) { + return if ( $opening_token_right{$key} ); + } + + # Examine controls for Section 1d: + foreach my $key ( keys %stack_opening_token ) { + return if ( $stack_opening_token{$key} ); + } + foreach my $key ( keys %stack_closing_token ) { + return if ( $stack_closing_token{$key} ); + } + + # Examine controls for Section 2: + return if ($rOpts_block_brace_vertical_tightness); + + # Examine controls for Section 3: + return if ($rOpts_stack_closing_block_brace); + + # None of the controls used for vertical tightness are set, so + # we can skip all calls to sub set_vertical_tightness_flags + $self->[_no_vertical_tightness_flags_] = 1; + return; +} + sub set_vertical_tightness_flags { my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last, @@ -25458,6 +27500,8 @@ sub set_vertical_tightness_flags { = @_; # Define vertical tightness controls for the nth line of a batch. + # Note: do not call this sub for a block comment or if + # $rOpts_freeze_whitespace is set. # These parameters are passed to the vertical aligner to indicated # if we should combine this line with the next line to achieve the @@ -25487,11 +27531,6 @@ sub set_vertical_tightness_flags { # continually increase if we allowed it when the -fws flag is set. # See case b499 for an example. - # Speedup: just return for a comment - if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) { - return; - } - # Define these values... my $vt_type = 0; my $vt_opening_flag = 0; @@ -25503,13 +27542,11 @@ sub set_vertical_tightness_flags { my $vt_min_lines = 0; my $vt_max_lines = 0; - goto RETURN - if ($rOpts_freeze_whitespace); - # Uses these global parameters: # $rOpts_block_brace_tightness # $rOpts_block_brace_vertical_tightness # $rOpts_stack_closing_block_brace + # $rOpts_line_up_parentheses # %opening_vertical_tightness # %closing_vertical_tightness # %opening_token_right @@ -25560,17 +27597,19 @@ sub set_vertical_tightness_flags { if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] } && $is_closing_type{$type_end_next} ); - # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270 - # See similar patch above for $cvt. + # The flag '_rwant_container_open_' avoids conflict of -bom and -pt=1 + # or -pt=2; fixes b1270. See similar patch above for $cvt. my $seqno = $type_sequence_to_go[$iend]; - if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) { + if ( $ovt + && $self->[_rwant_container_open_]->{$seqno} ) + { $ovt = 0; } - if ( $ovt == 2 - && $self->[_rreduce_vertical_tightness_by_seqno_]->{$seqno} ) - { - $ovt = 1; + # The flag '_rmax_vertical_tightness_' avoids welding conflicts. + if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) { + $ovt = + min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} ); } unless ( @@ -25603,7 +27642,6 @@ sub set_vertical_tightness_flags { && $is_closing_token{$token_next} && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen! { - my $ovt = $opening_vertical_tightness{$token_next}; my $cvt = $closing_vertical_tightness{$token_next}; # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303 @@ -25628,6 +27666,17 @@ sub set_vertical_tightness_flags { $cvt = 1; } + # Fix for b1379, b1380, b1381, b1382, b1384 part 2, + # instablility with adding and deleting trailing commas: + # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380. + # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382. + # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384 + if ( $cvt + && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} ) + { + $cvt = 0; + } + if ( # Never append a trailing line like ')->pack(' because it @@ -25740,8 +27789,9 @@ sub set_vertical_tightness_flags { && $token_end ne '||' && $token_end ne '&&' # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089. + # Generalized from '=' to $is_assignment to fix b1375. && !( - $token_end eq '=' + $is_assignment{ $types_to_go[$iend] } && $rOpts_line_up_parentheses && $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$ibeg_next] } @@ -25875,8 +27925,6 @@ sub set_vertical_tightness_flags { $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote ); } - RETURN: - my $rvertical_tightness_flags = { _vt_type => $vt_type, _vt_opening_flag => $vt_opening_flag, @@ -26503,35 +28551,33 @@ sub add_closing_side_comment { ## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; } } - else { - # No differences.. we can safely delete old comment if we - # are below the threshold - if ( $block_line_count < - $rOpts->{'closing-side-comment-interval'} ) + # No differences.. we can safely delete old comment if we + # are below the threshold + elsif ( $block_line_count < + $rOpts->{'closing-side-comment-interval'} ) + { + # Since the line breaks have already been set, we have + # to remove the token from the _to_go array and also + # from the line range (this fixes issue c081). + # Note that we can only get here if -cscw has been set + # because otherwise the old comment is already deleted. + $token = undef; + my $ibeg = $ri_first->[-1]; + my $iend = $ri_last->[-1]; + if ( $iend > $ibeg + && $iend == $max_index_to_go + && $types_to_go[$max_index_to_go] eq '#' ) { - # Since the line breaks have already been set, we have - # to remove the token from the _to_go array and also - # from the line range (this fixes issue c081). - # Note that we can only get here if -cscw has been set - # because otherwise the old comment is already deleted. - $token = undef; - my $ibeg = $ri_first->[-1]; - my $iend = $ri_last->[-1]; + $iend--; + $max_index_to_go--; if ( $iend > $ibeg - && $iend == $max_index_to_go - && $types_to_go[$max_index_to_go] eq '#' ) + && $types_to_go[$max_index_to_go] eq 'b' ) { $iend--; $max_index_to_go--; - if ( $iend > $ibeg - && $types_to_go[$max_index_to_go] eq 'b' ) - { - $iend--; - $max_index_to_go--; - } - $ri_last->[-1] = $iend; } + $ri_last->[-1] = $iend; } } } @@ -26573,7 +28619,7 @@ sub wrapup { # This is the last routine called when a file is formatted. # Flush buffer and write any informative messages - my $self = shift; + my ( $self, $severe_error ) = @_; $self->flush(); my $file_writer_object = $self->[_file_writer_object_]; @@ -26712,7 +28758,10 @@ sub wrapup { $file_writer_object->report_line_length_errors(); - $self->[_converged_] = $file_writer_object->get_convergence_check() + # Define the formatter self-check for convergence. + $self->[_converged_] = + $severe_error + || $file_writer_object->get_convergence_check() || $rOpts->{'indent-only'}; return;