X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=lib%2FPerl%2FTidy%2FFormatter.pm;h=b762fa327000aacccfc58096a8b1af96af17d9c9;hb=effbe8e558790d5f5e4eb49a10b2ed020b0eaaee;hp=e55bf05c9aaa10435a19a10acd7eec2dca94f2d3;hpb=c514d57dc8088e1f4d3f51857b1155c20085c296;p=perltidy.git diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index e55bf05..b762fa3 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -1,4 +1,4 @@ -##################################################################### +#################################################################### # # The Perl::Tidy::Formatter package adds indentation, whitespace, and # line breaks to the token stream @@ -51,8 +51,9 @@ use constant SPACE => q{ }; { #<<< A non-indenting brace to contain all lexical variables use Carp; -use English qw( -no_match_vars ); -our $VERSION = '20220613'; +use English qw( -no_match_vars ); +use List::Util qw( min max ); # min, max are in Perl 5.8 +our $VERSION = '20230309'; # The Tokenizer will be loaded with the Formatter ##use Perl::Tidy::Tokenizer; # for is_keyword() @@ -106,6 +107,8 @@ sub Fault { my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); + my $pkg = __PACKAGE__; + my $input_stream_name = get_input_stream_name(); Die(< $i++, }; -} +} ## end BEGIN BEGIN { @@ -387,7 +455,6 @@ BEGIN { my $i = 0; use constant { _rlines_ => $i++, - _rlines_new_ => $i++, _rLL_ => $i++, _Klimit_ => $i++, _rdepth_of_opening_seqno_ => $i++, @@ -402,15 +469,16 @@ BEGIN { _K_opening_ternary_ => $i++, _K_closing_ternary_ => $i++, _K_first_seq_item_ => $i++, - _rK_phantom_semicolons_ => $i++, _rtype_count_by_seqno_ => $i++, _ris_function_call_paren_ => $i++, _rlec_count_by_seqno_ => $i++, _ris_broken_container_ => $i++, _ris_permanently_broken_ => $i++, + _rblank_and_comment_count_ => $i++, _rhas_list_ => $i++, _rhas_broken_list_ => $i++, _rhas_broken_list_with_lec_ => $i++, + _rfirst_comma_line_index_ => $i++, _rhas_code_block_ => $i++, _rhas_broken_code_block_ => $i++, _rhas_ternary_ => $i++, @@ -423,6 +491,7 @@ BEGIN { _rparent_of_seqno_ => $i++, _rchildren_of_seqno_ => $i++, _ris_list_by_seqno_ => $i++, + _ris_cuddled_closing_brace_ => $i++, _rbreak_container_ => $i++, _rshort_nested_ => $i++, _length_function_ => $i++, @@ -435,11 +504,11 @@ BEGIN { _radjusted_levels_ => $i++, _this_batch_ => $i++, + _ris_special_identifier_token_ => $i++, _last_output_short_opening_token_ => $i++, - _last_line_leading_type_ => $i++, - _last_line_leading_level_ => $i++, - _last_last_line_leading_level_ => $i++, + _last_line_leading_type_ => $i++, + _last_line_leading_level_ => $i++, _added_semicolon_count_ => $i++, _first_added_semicolon_at_ => $i++, @@ -480,7 +549,6 @@ BEGIN { _rKrange_code_without_comments_ => $i++, _rbreak_before_Kfirst_ => $i++, _rbreak_after_Klast_ => $i++, - _rwant_container_open_ => $i++, _converged_ => $i++, _rstarting_multiline_qw_seqno_by_K_ => $i++, @@ -490,16 +558,19 @@ BEGIN { _rcollapsed_length_by_seqno_ => $i++, _rbreak_before_container_by_seqno_ => $i++, - _ris_essential_old_breakpoint_ => $i++, _roverride_cab3_ => $i++, _ris_assigned_structure_ => $i++, + _ris_short_broken_eval_block_ => $i++, + _ris_bare_trailing_comma_by_seqno_ => $i++, - _rseqno_non_indenting_brace_by_ix_ => $i++, - _rreduce_vertical_tightness_by_seqno_ => $i++, + _rseqno_non_indenting_brace_by_ix_ => $i++, + _rmax_vertical_tightness_ => $i++, + + _no_vertical_tightness_flags_ => $i++, _LAST_SELF_INDEX_ => $i - 1, }; -} +} ## end BEGIN BEGIN { @@ -519,8 +590,9 @@ BEGIN { _rix_seqno_controlling_ci_ => $i++, _batch_CODE_type_ => $i++, _ri_starting_one_line_block_ => $i++, + _runmatched_opening_indexes_ => $i++, }; -} +} ## end BEGIN BEGIN { @@ -568,6 +640,10 @@ BEGIN { ); @is_assignment{@q} = (1) x scalar(@q); + # a hash needed by break_lists for efficiency: + push @q, qw{ ; < > ~ f }; + @is_non_list_type{@q} = (1) x scalar(@q); + @q = qw(is if unless and or err last next redo return); @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q); @@ -678,6 +754,18 @@ BEGIN { @q = qw< } ) ] : >; @is_closing_sequence_token{@q} = (1) x scalar(@q); + %matching_token = ( + '{' => '}', + '(' => ')', + '[' => ']', + '?' => ':', + + '}' => '{', + ')' => '(', + ']' => '[', + ':' => '?', + ); + # a hash needed by sub break_lists for labeling containers @q = qw( k => && || ? : . ); @is_container_label_type{@q} = (1) x scalar(@q); @@ -698,7 +786,44 @@ BEGIN { push @q, ','; @is_counted_type{@q} = (1) x scalar(@q); -} + # Tokens where --keep-old-break-xxx flags make soft breaks instead + # of hard breaks. See b1433 and b1436. + # NOTE: $type is used as the hash key for now; if other container tokens + # are added it might be necessary to use a token/type mixture. + @q = qw# -> ? : && || + - / * #; + @is_soft_keep_break_type{@q} = (1) x scalar(@q); + + # these functions allow an identifier in the indirect object slot + @q = qw( print printf sort exec system say); + @is_indirect_object_taker{@q} = (1) x scalar(@q); + + # Define here tokens which may follow the closing brace of a do statement + # on the same line, as in: + # } while ( $something); + my @dof = qw(until while unless if ; : ); + push @dof, ','; + @is_do_follower{@dof} = (1) x scalar(@dof); + + # what can follow a multi-line anonymous sub definition closing curly: + my @asf = qw# ; : => or and && || ~~ !~~ ) #; + push @asf, ','; + @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf); + + # what can follow a one-line anonymous sub closing curly: + # one-line anonymous subs also have ']' here... + # see tk3.t and PP.pm + my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #; + push @asf1, ','; + @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1); + + # What can follow a closing curly of a block + # which is not an if/elsif/else/do/sort/map/grep/eval/sub + # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' + my @obf = qw# ; : => or and && || ) #; + push @obf, ','; + @is_other_brace_follower{@obf} = (1) x scalar(@obf); + +} ## end BEGIN { ## begin closure to count instances @@ -744,7 +869,7 @@ sub new { initialize_undo_ci(); initialize_process_line_of_CODE(); initialize_grind_batch_of_CODE(); - initialize_final_indentation_adjustment(); + initialize_get_final_indentation(); initialize_postponed_breakpoint(); initialize_batch_variables(); initialize_write_line(); @@ -754,7 +879,7 @@ sub new { file_writer_object => $file_writer_object, logger_object => $logger_object, diagnostics_object => $diagnostics_object, - length_function => $length_function + length_function => $length_function, ); write_logfile_entry("\nStarting tokenization pass...\n"); @@ -777,8 +902,7 @@ sub new { my $self = []; # Basic data structures... - $self->[_rlines_] = []; # = ref to array of lines of the file - $self->[_rlines_new_] = []; # = ref to array of output lines + $self->[_rlines_] = []; # = ref to array of lines of the file # 'rLL' = reference to the continuous liner array of all tokens in a file. # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but @@ -795,9 +919,6 @@ sub new { $self->[_K_closing_ternary_] = {}; $self->[_K_first_seq_item_] = undef; # K of first token with a sequence # - # Array of phantom semicolons, in case we ever need to undo them - $self->[_rK_phantom_semicolons_] = undef; - # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence # numbers with + or - indicating opening or closing. This list represents # the entire container tree and is invariant under reformatting. It can be @@ -820,9 +941,11 @@ sub new { $self->[_rlec_count_by_seqno_] = {}; $self->[_ris_broken_container_] = {}; $self->[_ris_permanently_broken_] = {}; + $self->[_rblank_and_comment_count_] = {}; $self->[_rhas_list_] = {}; $self->[_rhas_broken_list_] = {}; $self->[_rhas_broken_list_with_lec_] = {}; + $self->[_rfirst_comma_line_index_] = {}; $self->[_rhas_code_block_] = {}; $self->[_rhas_broken_code_block_] = {}; $self->[_rhas_ternary_] = {}; @@ -835,6 +958,7 @@ sub new { $self->[_rparent_of_seqno_] = {}; $self->[_rchildren_of_seqno_] = {}; $self->[_ris_list_by_seqno_] = {}; + $self->[_ris_cuddled_closing_brace_] = {}; $self->[_rbreak_container_] = {}; # prevent one-line blocks $self->[_rshort_nested_] = {}; # blocks not forced open @@ -852,7 +976,7 @@ sub new { $self->[_this_batch_] = []; # Memory of processed text... - $self->[_last_last_line_leading_level_] = 0; + $self->[_ris_special_identifier_token_] = {}; $self->[_last_line_leading_level_] = 0; $self->[_last_line_leading_type_] = '#'; $self->[_last_output_short_opening_token_] = 0; @@ -895,7 +1019,6 @@ sub new { $self->[_rKrange_code_without_comments_] = []; $self->[_rbreak_before_Kfirst_] = {}; $self->[_rbreak_after_Klast_] = {}; - $self->[_rwant_container_open_] = {}; $self->[_converged_] = 0; # qw stuff @@ -906,12 +1029,15 @@ sub new { $self->[_rcollapsed_length_by_seqno_] = {}; $self->[_rbreak_before_container_by_seqno_] = {}; - $self->[_ris_essential_old_breakpoint_] = {}; $self->[_roverride_cab3_] = {}; $self->[_ris_assigned_structure_] = {}; + $self->[_ris_short_broken_eval_block_] = {}; + $self->[_ris_bare_trailing_comma_by_seqno_] = {}; - $self->[_rseqno_non_indenting_brace_by_ix_] = {}; - $self->[_rreduce_vertical_tightness_by_seqno_] = {}; + $self->[_rseqno_non_indenting_brace_by_ix_] = {}; + $self->[_rmax_vertical_tightness_] = {}; + + $self->[_no_vertical_tightness_flags_] = 0; # This flag will be updated later by a call to get_save_logfile() $self->[_save_logfile_] = defined($logger_object); @@ -1073,7 +1199,7 @@ sub check_token_array { ); @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys); - } + } ## end BEGIN sub check_line_hashes { my $self = shift; @@ -1107,7 +1233,7 @@ sub check_token_array { $input_stream_name = $logger_object->get_input_stream_name(); } return $input_stream_name; - } + } ## end sub get_input_stream_name # interface to Perl::Tidy::Logger routines sub warning { @@ -1122,7 +1248,7 @@ sub check_token_array { $logger_object->complain($msg); } return; - } + } ## end sub complain sub write_logfile_entry { my @msg = @_; @@ -1130,21 +1256,21 @@ sub check_token_array { $logger_object->write_logfile_entry(@msg); } return; - } + } ## end sub write_logfile_entry sub get_saw_brace_error { if ($logger_object) { return $logger_object->get_saw_brace_error(); } return; - } + } ## end sub get_saw_brace_error sub we_are_at_the_last_line { if ($logger_object) { $logger_object->we_are_at_the_last_line(); } return; - } + } ## end sub we_are_at_the_last_line } ## end closure for logger routines @@ -1163,7 +1289,7 @@ sub check_token_array { $diagnostics_object->write_diagnostics($msg); } return; - } + } ## end sub write_diagnostics } ## end closure for diagnostics routines sub get_convergence_check { @@ -1171,11 +1297,6 @@ sub get_convergence_check { return $self->[_converged_]; } -sub get_added_semicolon_count { - my $self = shift; - return $self->[_added_semicolon_count_]; -} - sub get_output_line_number { my ($self) = @_; my $vao = $self->[_vertical_aligner_object_]; @@ -1188,7 +1309,7 @@ sub want_blank_line { my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->want_blank_line(); return; -} +} ## end sub want_blank_line sub write_unindented_line { my ( $self, $line ) = @_; @@ -1196,7 +1317,7 @@ sub write_unindented_line { my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->write_line($line); return; -} +} ## end sub write_unindented_line sub consecutive_nonblank_lines { my ($self) = @_; @@ -1204,21 +1325,7 @@ sub consecutive_nonblank_lines { my $vao = $self->[_vertical_aligner_object_]; return $file_writer_object->get_consecutive_nonblank_lines() + $vao->get_cached_line_count(); -} - -sub max { - my (@vals) = @_; - my $max = shift @vals; - for (@vals) { $max = $_ > $max ? $_ : $max } - return $max; -} - -sub min { - my (@vals) = @_; - my $min = shift @vals; - for (@vals) { $min = $_ < $min ? $_ : $min } - return $min; -} +} ## end sub consecutive_nonblank_lines sub split_words { @@ -1241,16 +1348,18 @@ sub check_options { # and to configure the control hashes to them. $rOpts = shift; + $controlled_comma_style = 0; + initialize_whitespace_hashes(); initialize_bond_strength_hashes(); # This function must be called early to get hashes with grep initialized - initialize_grep_and_friends( $rOpts->{'grep-alias-list'} ); + initialize_grep_and_friends(); # Make needed regex patterns for matching text. # NOTE: sub_matching_patterns must be made first because later patterns use # them; see RT #133130. - make_sub_matching_pattern(); + make_sub_matching_pattern(); # must be first pattern made make_static_block_comment_pattern(); make_static_side_comment_pattern(); make_closing_side_comment_prefix(); @@ -1290,18 +1399,17 @@ sub check_options { } make_bli_pattern(); + make_bl_pattern(); + make_block_brace_vertical_tightness_pattern(); + make_blank_line_pattern(); - make_keyword_group_list_pattern(); - # Make initial list of desired one line block types - # They will be modified by 'prepare_cuddled_block_types' - # NOTE: this line must come after is_sort_map_grep_eval is - # initialized in sub 'initialize_grep_and_friends' - %want_one_line_block = %is_sort_map_grep_eval; + make_keyword_group_list_pattern(); prepare_cuddled_block_types(); + if ( $rOpts->{'dump-cuddled-block-list'} ) { dump_cuddled_block_list(*STDOUT); Exit(0); @@ -1447,88 +1555,9 @@ EOM Exit(0); } - # default keywords for which space is introduced before an opening paren - # (at present, including them messes up vertical alignment) - my @sak = qw(my local our and or xor err eq ne if else elsif until - unless while for foreach return switch case given when catch); - %space_after_keyword = map { $_ => 1 } @sak; - - # first remove any or all of these if desired - if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) { - - # -nsak='*' selects all the above keywords - if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) } - @space_after_keyword{@q} = (0) x scalar(@q); - } - - # then allow user to add to these defaults - if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) { - @space_after_keyword{@q} = (1) x scalar(@q); - } - - # implement user break preferences - my $break_after = sub { - my @toks = @_; - foreach my $tok (@toks) { - if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); - } - } - return; - }; - - my $break_before = sub { - my @toks = @_; - foreach my $tok (@toks) { - my $lbs = $left_bond_strength{$tok}; - my $rbs = $right_bond_strength{$tok}; - if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { - ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = - ( $lbs, $rbs ); - } - } - return; - }; - - $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); - $break_before->(@all_operators) - if ( $rOpts->{'break-before-all-operators'} ); - - $break_after->( split_words( $rOpts->{'want-break-after'} ) ); - $break_before->( split_words( $rOpts->{'want-break-before'} ) ); - - # make note if breaks are before certain key types - %want_break_before = (); - foreach my $tok ( @all_operators, ',' ) { - $want_break_before{$tok} = - $left_bond_strength{$tok} < $right_bond_strength{$tok}; - } - - # Coordinate ?/: breaks, which must be similar - # The small strength 0.01 which is added is 1% of the strength of one - # indentation level and seems to work okay. - if ( !$want_break_before{':'} ) { - $want_break_before{'?'} = $want_break_before{':'}; - $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; - $left_bond_strength{'?'} = NO_BREAK; - } + initialize_space_after_keyword(); - # Only make a hash entry for the next parameters if values are defined. - # That allows a quick check to be made later. - %break_before_container_types = (); - for ( $rOpts->{'break-before-hash-brace'} ) { - $break_before_container_types{'{'} = $_ if $_ && $_ > 0; - } - for ( $rOpts->{'break-before-square-bracket'} ) { - $break_before_container_types{'['} = $_ if $_ && $_ > 0; - } - for ( $rOpts->{'break-before-paren'} ) { - $break_before_container_types{'('} = $_ if $_ && $_ > 0; - } + initialize_token_break_preferences(); #-------------------------------------------------------------- # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266) @@ -1567,6 +1596,25 @@ EOM ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n"); } + #----------------------------------------------------------- + # The combination -lp -vmll -atc -dtc can be unstable + #----------------------------------------------------------- + # This fixes b1386 b1387 b1388 which had -wtc='b' + # Updated to to include any -wtc to fix b1426 + if ( $rOpts->{'variable-maximum-line-length'} + && $rOpts->{'line-up-parentheses'} + && $rOpts->{'add-trailing-commas'} + && $rOpts->{'delete-trailing-commas'} + && $rOpts->{'want-trailing-commas'} ) + { + $rOpts->{'delete-trailing-commas'} = 0; +## Issuing a warning message causes trouble with test cases, and this combo is +## so rare that it is unlikely to not occur in practice. So skip warning. +## Warn( +##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n" +## ); + } + %container_indentation_options = (); foreach my $pair ( [ 'break-before-hash-brace-and-indent', '{' ], @@ -1581,10 +1629,13 @@ EOM # (1) -lp is not compatible with opt=2, silently set to opt=0 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster + # (3) set opt=0 if -i < -ci (can be unstable, case b1355) if ( $opt == 2 ) { - if ( $rOpts->{'line-up-parentheses'} - || $rOpts->{'indent-columns'} == - $rOpts->{'continuation-indentation'} ) + if ( + $rOpts->{'line-up-parentheses'} + || ( $rOpts->{'indent-columns'} <= + $rOpts->{'continuation-indentation'} ) + ) { $opt = 0; } @@ -1593,88 +1644,19 @@ EOM } } - # Define here tokens which may follow the closing brace of a do statement - # on the same line, as in: - # } while ( $something); - my @dof = qw(until while unless if ; : ); - push @dof, ','; - @is_do_follower{@dof} = (1) x scalar(@dof); - - # what can follow a multi-line anonymous sub definition closing curly: - my @asf = qw# ; : => or and && || ~~ !~~ ) #; - push @asf, ','; - @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf); + $right_bond_strength{'{'} = WEAK; + $left_bond_strength{'{'} = VERY_STRONG; - # what can follow a one-line anonymous sub closing curly: - # one-line anonymous subs also have ']' here... - # see tk3.t and PP.pm - my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #; - push @asf1, ','; - @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1); - - # What can follow a closing curly of a block - # which is not an if/elsif/else/do/sort/map/grep/eval/sub - # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' - my @obf = qw# ; : => or and && || ) #; - push @obf, ','; - @is_other_brace_follower{@obf} = (1) x scalar(@obf); - - $right_bond_strength{'{'} = WEAK; - $left_bond_strength{'{'} = VERY_STRONG; - - # make -l=0 equal to -l=infinite - if ( !$rOpts->{'maximum-line-length'} ) { - $rOpts->{'maximum-line-length'} = 1_000_000; - } + # make -l=0 equal to -l=infinite + if ( !$rOpts->{'maximum-line-length'} ) { + $rOpts->{'maximum-line-length'} = 1_000_000; + } # make -lbl=0 equal to -lbl=infinite if ( !$rOpts->{'long-block-line-count'} ) { $rOpts->{'long-block-line-count'} = 1_000_000; } - my $ole = $rOpts->{'output-line-ending'}; - if ($ole) { - my %endings = ( - dos => "\015\012", - win => "\015\012", - mac => "\015", - unix => "\012", - ); - - # Patch for RT #99514, a memoization issue. - # Normally, the user enters one of 'dos', 'win', etc, and we change the - # value in the options parameter to be the corresponding line ending - # character. But, if we are using memoization, on later passes through - # here the option parameter will already have the desired ending - # character rather than the keyword 'dos', 'win', etc. So - # we must check to see if conversion has already been done and, if so, - # bypass the conversion step. - my %endings_inverted = ( - "\015\012" => 'dos', - "\015\012" => 'win', - "\015" => 'mac', - "\012" => 'unix', - ); - - if ( defined( $endings_inverted{$ole} ) ) { - - # we already have valid line ending, nothing more to do - } - else { - $ole = lc $ole; - unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { - my $str = join SPACE, keys %endings; - Die(<{'preserve-line-endings'} ) { - Warn("Ignoring -ple; conflicts with -ole\n"); - $rOpts->{'preserve-line-endings'} = undef; - } - } - } - # hashes used to simplify setting whitespace %tightness = ( '{' => $rOpts->{'brace-tightness'}, @@ -1684,12 +1666,6 @@ EOM '[' => $rOpts->{'square-bracket-tightness'}, ']' => $rOpts->{'square-bracket-tightness'}, ); - %matching_token = ( - '{' => '}', - '(' => ')', - '[' => ']', - '?' => ':', - ); if ( $rOpts->{'ignore-old-breakpoints'} ) { @@ -1740,235 +1716,44 @@ EOM initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'}, 'kba', \%keep_break_after_type ); - #------------------------------------------------------------ - # Make global vars for frequently used options for efficiency - #------------------------------------------------------------ - - $rOpts_add_newlines = $rOpts->{'add-newlines'}; - $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; - $rOpts_blank_lines_after_opening_block = - $rOpts->{'blank-lines-after-opening-block'}; - $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; - $rOpts_block_brace_vertical_tightness = - $rOpts->{'block-brace-vertical-tightness'}; - $rOpts_break_after_labels = $rOpts->{'break-after-labels'}; - $rOpts_break_at_old_attribute_breakpoints = - $rOpts->{'break-at-old-attribute-breakpoints'}; - $rOpts_break_at_old_comma_breakpoints = - $rOpts->{'break-at-old-comma-breakpoints'}; - $rOpts_break_at_old_keyword_breakpoints = - $rOpts->{'break-at-old-keyword-breakpoints'}; - $rOpts_break_at_old_logical_breakpoints = - $rOpts->{'break-at-old-logical-breakpoints'}; - $rOpts_break_at_old_semicolon_breakpoints = - $rOpts->{'break-at-old-semicolon-breakpoints'}; - $rOpts_break_at_old_ternary_breakpoints = - $rOpts->{'break-at-old-ternary-breakpoints'}; - $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'}; - $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'}; - $rOpts_closing_side_comment_else_flag = - $rOpts->{'closing-side-comment-else-flag'}; - $rOpts_closing_side_comment_maximum_text = - $rOpts->{'closing-side-comment-maximum-text'}; - $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; - $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; - $rOpts_delete_closing_side_comments = - $rOpts->{'delete-closing-side-comments'}; - $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; - $rOpts_extended_continuation_indentation = - $rOpts->{'extended-continuation-indentation'}; - $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'}; - $rOpts_format_skipping = $rOpts->{'format-skipping'}; - $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'}; - $rOpts_function_paren_vertical_alignment = - $rOpts->{'function-paren-vertical-alignment'}; - $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; - $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; - $rOpts_ignore_side_comment_lengths = - $rOpts->{'ignore-side-comment-lengths'}; - $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'}; - $rOpts_indent_columns = $rOpts->{'indent-columns'}; - $rOpts_indent_only = $rOpts->{'indent-only'}; - $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; - $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; - $rOpts_extended_line_up_parentheses = - $rOpts->{'extended-line-up-parentheses'}; - $rOpts_logical_padding = $rOpts->{'logical-padding'}; - $rOpts_maximum_consecutive_blank_lines = - $rOpts->{'maximum-consecutive-blank-lines'}; - $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; - $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; - $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'}; - $rOpts_opening_brace_always_on_right = - $rOpts->{'opening-brace-always-on-right'}; - $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'}; - $rOpts_outdent_labels = $rOpts->{'outdent-labels'}; - $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'}; - $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'}; - $rOpts_outdent_static_block_comments = - $rOpts->{'outdent-static-block-comments'}; - $rOpts_recombine = $rOpts->{'recombine'}; - $rOpts_short_concatenation_item_length = - $rOpts->{'short-concatenation-item-length'}; - $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; - $rOpts_static_block_comments = $rOpts->{'static-block-comments'}; - $rOpts_sub_alias_list = $rOpts->{'sub-alias-list'}; - $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'}; - $rOpts_tee_pod = $rOpts->{'tee-pod'}; - $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'}; - $rOpts_valign = $rOpts->{'valign'}; - $rOpts_valign_code = $rOpts->{'valign-code'}; - $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'}; - $rOpts_variable_maximum_line_length = - $rOpts->{'variable-maximum-line-length'}; - - # Note that both opening and closing tokens can access the opening - # and closing flags of their container types. - %opening_vertical_tightness = ( - '(' => $rOpts->{'paren-vertical-tightness'}, - '{' => $rOpts->{'brace-vertical-tightness'}, - '[' => $rOpts->{'square-bracket-vertical-tightness'}, - ')' => $rOpts->{'paren-vertical-tightness'}, - '}' => $rOpts->{'brace-vertical-tightness'}, - ']' => $rOpts->{'square-bracket-vertical-tightness'}, - ); - - %closing_vertical_tightness = ( - '(' => $rOpts->{'paren-vertical-tightness-closing'}, - '{' => $rOpts->{'brace-vertical-tightness-closing'}, - '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, - ')' => $rOpts->{'paren-vertical-tightness-closing'}, - '}' => $rOpts->{'brace-vertical-tightness-closing'}, - ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, - ); - - # assume flag for '>' same as ')' for closing qw quotes - %closing_token_indentation = ( - ')' => $rOpts->{'closing-paren-indentation'}, - '}' => $rOpts->{'closing-brace-indentation'}, - ']' => $rOpts->{'closing-square-bracket-indentation'}, - '>' => $rOpts->{'closing-paren-indentation'}, - ); - - # flag indicating if any closing tokens are indented - $some_closing_token_indentation = - $rOpts->{'closing-paren-indentation'} - || $rOpts->{'closing-brace-indentation'} - || $rOpts->{'closing-square-bracket-indentation'} - || $rOpts->{'indent-closing-brace'}; - - %opening_token_right = ( - '(' => $rOpts->{'opening-paren-right'}, - '{' => $rOpts->{'opening-hash-brace-right'}, - '[' => $rOpts->{'opening-square-bracket-right'}, - ); - - %stack_opening_token = ( - '(' => $rOpts->{'stack-opening-paren'}, - '{' => $rOpts->{'stack-opening-hash-brace'}, - '[' => $rOpts->{'stack-opening-square-bracket'}, - ); - - %stack_closing_token = ( - ')' => $rOpts->{'stack-closing-paren'}, - '}' => $rOpts->{'stack-closing-hash-brace'}, - ']' => $rOpts->{'stack-closing-square-bracket'}, - ); - - # Create a table of maximum line length vs level for later efficient use. - # We will make the tables very long to be sure it will not be exceeded. - # But we have to choose a fixed length. A check will be made at the start - # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of - # my standard test problems have indentation levels of about 150, so this - # should be fairly large. If the choice of a maximum level ever becomes - # an issue then these table values could be returned in a sub with a simple - # memoization scheme. - - # Also create a table of the maximum spaces available for text due to the - # level only. If a line has continuation indentation, then that space must - # be subtracted from the table value. This table is used for preliminary - # estimates in welding, extended_ci, BBX, and marking short blocks. - use constant LEVEL_TABLE_MAX => 1000; - - # The basic scheme: - foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { - my $indent = $level * $rOpts_indent_columns; - $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length; - $maximum_text_length_at_level[$level] = - $rOpts_maximum_line_length - $indent; - } - - # Correct the maximum_text_length table if the -wc=n flag is used - $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; - if ($rOpts_whitespace_cycle) { - if ( $rOpts_whitespace_cycle > 0 ) { - foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { - my $level_mod = $level % $rOpts_whitespace_cycle; - my $indent = $level_mod * $rOpts_indent_columns; - $maximum_text_length_at_level[$level] = - $rOpts_maximum_line_length - $indent; - } - } - else { - $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0; + # Modify %keep_break_before and %keep_break_after to avoid conflicts + # with %want_break_before; fixes b1436. + # This became necessary after breaks for some tokens were converted + # from hard to soft (see b1433). + # We could do this for all tokens, but to minimize changes to existing + # code we currently only do this for the soft break tokens. + foreach my $key ( keys %keep_break_before_type ) { + if ( defined( $want_break_before{$key} ) + && !$want_break_before{$key} + && $is_soft_keep_break_type{$key} ) + { + $keep_break_after_type{$key} = $keep_break_before_type{$key}; + delete $keep_break_before_type{$key}; } } - - # Correct the tables if the -vmll flag is used. These values override the - # previous values. - if ($rOpts_variable_maximum_line_length) { - foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { - $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length; - $maximum_line_length_at_level[$level] = - $rOpts_maximum_line_length + $level * $rOpts_indent_columns; + foreach my $key ( keys %keep_break_after_type ) { + if ( defined( $want_break_before{$key} ) + && $want_break_before{$key} + && $is_soft_keep_break_type{$key} ) + { + $keep_break_before_type{$key} = $keep_break_after_type{$key}; + delete $keep_break_after_type{$key}; } } - # Define two measures of indentation level, alpha and beta, at which some - # formatting features come under stress and need to start shutting down. - # Some combination of the two will be used to shut down different - # formatting features. - # Put a reasonable upper limit on stress level (say 100) in case the - # whitespace-cycle variable is used. - my $stress_level_limit = min( 100, LEVEL_TABLE_MAX ); + $controlled_comma_style ||= $keep_break_before_type{','}; + $controlled_comma_style ||= $keep_break_after_type{','}; - # Find stress_level_alpha, targeted at very short maximum line lengths. - $stress_level_alpha = $stress_level_limit + 1; - foreach my $level_test ( 0 .. $stress_level_limit ) { - my $max_len = $maximum_text_length_at_level[ $level_test + 1 ]; - my $excess_inside_space = - $max_len - - $rOpts_continuation_indentation - - $rOpts_indent_columns - 8; - if ( $excess_inside_space <= 0 ) { - $stress_level_alpha = $level_test; - last; - } - } + initialize_global_option_vars(); - # Find stress level beta, a stress level targeted at formatting - # at deep levels near the maximum line length. We start increasing - # from zero and stop at the first level which shows no more space. + initialize_line_length_vars(); # after 'initialize_global_option_vars' - # 'const' is a fixed number of spaces for a typical variable. - # Cases b1197-b1204 work ok with const=12 but not with const=8 - my $const = 16; - my $denom = max( 1, $rOpts_indent_columns ); - $stress_level_beta = 0; - foreach my $level ( 0 .. $stress_level_limit ) { - my $remaining_cycles = max( - 0, - ( - $maximum_text_length_at_level[$level] - - $rOpts_continuation_indentation - $const - ) / $denom - ); - last if ( $remaining_cycles <= 3 ); # 2 does not work - $stress_level_beta = $level; - } + initialize_trailing_comma_rules(); # after 'initialize_line_length_vars' initialize_weld_nested_exclusion_rules(); + initialize_weld_fat_comma_rules(); + %line_up_parentheses_control_hash = (); $line_up_parentheses_control_is_lxpl = 1; my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'}; @@ -1995,26 +1780,54 @@ EOM use constant ALIGN_GREP_ALIASES => 0; sub initialize_grep_and_friends { - my ($str) = @_; # Initialize or re-initialize hashes with 'grep' and grep aliases. This # must be done after each set of options because new grep aliases may be # used. - # re-initialize the hash ... this is critical! + # re-initialize the hashes ... this is critical! %is_sort_map_grep = (); my @q = qw(sort map grep); @is_sort_map_grep{@q} = (1) x scalar(@q); + my $olbxl = $rOpts->{'one-line-block-exclusion-list'}; + my %is_olb_exclusion_word; + if ( defined($olbxl) ) { + my @list = split_words($olbxl); + if (@list) { + @is_olb_exclusion_word{@list} = (1) x scalar(@list); + } + } + + # Make the list of block types which may be re-formed into one line. + # They will be modified with the grep-alias-list below and + # by sub 'prepare_cuddled_block_types'. + # Note that it is essential to always re-initialize the hash here: + %want_one_line_block = (); + if ( !$is_olb_exclusion_word{'*'} ) { + foreach (qw(sort map grep eval)) { + if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 } + } + } + # Note that any 'grep-alias-list' string has been preprocessed to be a # trimmed, space-separated list. + my $str = $rOpts->{'grep-alias-list'}; my @grep_aliases = split /\s+/, $str; - @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases); - ##@q = qw(sort map grep eval); - %is_sort_map_grep_eval = %is_sort_map_grep; - $is_sort_map_grep_eval{'eval'} = 1; + if (@grep_aliases) { + + @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases); + + if ( $want_one_line_block{'grep'} ) { + @{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases); + } + } + + ##@q = qw(sort map grep eval); + %is_sort_map_grep_eval = %is_sort_map_grep; + $is_sort_map_grep_eval{'eval'} = 1; ##@q = qw(sort map grep eval do); %is_sort_map_grep_eval_do = %is_sort_map_grep_eval; @@ -2181,6 +1994,27 @@ EOM return; } ## end sub initialize_weld_nested_exclusion_rules +sub initialize_weld_fat_comma_rules { + + # Initialize a hash controlling which opening token types can be + # welded around a fat comma + %weld_fat_comma_rules = (); + + # The -wfc flag turns on welding of '=>' after an opening paren + if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 } + + # This could be generalized in the future by introducing a parameter + # -weld-fat-comma-after=str (-wfca=str), where str contains any of: + # * { [ ( + # to indicate which opening parens may weld to a subsequent '=>' + + # The flag -wfc would then be equivalent to -wfca='(' + + # This has not been done because it is not yet clear how useful + # this generalization would be. + return; +} ## end sub initialize_weld_fat_comma_rules + sub initialize_line_up_parentheses_control_hash { my ( $str, $opt_name ) = @_; return unless ($str); @@ -2289,6 +2123,100 @@ EOM return; } ## end sub initialize_line_up_parentheses_control_hash +sub initialize_space_after_keyword { + + # default keywords for which space is introduced before an opening paren + # (at present, including them messes up vertical alignment) + my @sak = qw(my local our and or xor err eq ne if else elsif until + unless while for foreach return switch case given when catch); + %space_after_keyword = map { $_ => 1 } @sak; + + # first remove any or all of these if desired + if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) { + + # -nsak='*' selects all the above keywords + if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) } + @space_after_keyword{@q} = (0) x scalar(@q); + } + + # then allow user to add to these defaults + if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) { + @space_after_keyword{@q} = (1) x scalar(@q); + } + + return; +} ## end sub initialize_space_after_keyword + +sub initialize_token_break_preferences { + + # implement user break preferences + my $break_after = sub { + my @toks = @_; + foreach my $tok (@toks) { + if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: + if ( $tok eq ',' ) { $controlled_comma_style = 1 } + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } + } + return; + }; + + my $break_before = sub { + my @toks = @_; + foreach my $tok (@toks) { + if ( $tok eq ',' ) { $controlled_comma_style = 1 } + my $lbs = $left_bond_strength{$tok}; + my $rbs = $right_bond_strength{$tok}; + if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { + ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = + ( $lbs, $rbs ); + } + } + return; + }; + + $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); + $break_before->(@all_operators) + if ( $rOpts->{'break-before-all-operators'} ); + + $break_after->( split_words( $rOpts->{'want-break-after'} ) ); + $break_before->( split_words( $rOpts->{'want-break-before'} ) ); + + # make note if breaks are before certain key types + %want_break_before = (); + foreach my $tok ( @all_operators, ',' ) { + $want_break_before{$tok} = + $left_bond_strength{$tok} < $right_bond_strength{$tok}; + } + + # Coordinate ?/: breaks, which must be similar + # The small strength 0.01 which is added is 1% of the strength of one + # indentation level and seems to work okay. + if ( !$want_break_before{':'} ) { + $want_break_before{'?'} = $want_break_before{':'}; + $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; + $left_bond_strength{'?'} = NO_BREAK; + } + + # Only make a hash entry for the next parameters if values are defined. + # That allows a quick check to be made later. + %break_before_container_types = (); + for ( $rOpts->{'break-before-hash-brace'} ) { + $break_before_container_types{'{'} = $_ if $_ && $_ > 0; + } + for ( $rOpts->{'break-before-square-bracket'} ) { + $break_before_container_types{'['} = $_ if $_ && $_ > 0; + } + for ( $rOpts->{'break-before-paren'} ) { + $break_before_container_types{'('} = $_ if $_ && $_ > 0; + } + return; +} ## end sub initialize_token_break_preferences + use constant DEBUG_KB => 0; sub initialize_keep_old_breakpoints { @@ -2307,11 +2235,12 @@ EOM # Ignore kbb='(' and '[' and '{': can cause unstable math formatting # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}' + # Also always ignore ? and : (b1440 and b1433-b1439) if ( $short_name eq 'kbb' ) { - @list = grep { !m/[\(\[\{]/ } @list; + @list = grep { !m/[\(\[\{\?\:]/ } @list; } elsif ( $short_name eq 'kba' ) { - @list = grep { !m/[\)\]\}]/ } @list; + @list = grep { !m/[\)\]\}\?\:]/ } @list; } # pull out any any leading container code, like f( or *{ @@ -2379,57 +2308,433 @@ EOM } ## end sub initialize_keep_old_breakpoints -sub initialize_whitespace_hashes { +sub initialize_global_option_vars { - # This is called once before formatting begins to initialize these global - # hashes, which control the use of whitespace around tokens: - # - # %binary_ws_rules - # %want_left_space - # %want_right_space - # %space_after_keyword - # - # Many token types are identical to the tokens themselves. - # See the tokenizer for a complete list. Here are some special types: - # k = perl keyword - # f = semicolon in for statement - # m = unary minus - # p = unary plus - # Note that :: is excluded since it should be contained in an identifier - # Note that '->' is excluded because it never gets space - # parentheses and brackets are excluded since they are handled specially - # curly braces are included but may be overridden by logic, such as - # newline logic. + #------------------------------------------------------------ + # Make global vars for frequently used options for efficiency + #------------------------------------------------------------ - # NEW_TOKENS: create a whitespace rule here. This can be as - # simple as adding your new letter to @spaces_both_sides, for - # example. + $rOpts_add_newlines = $rOpts->{'add-newlines'}; + $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'}; + $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; + $rOpts_blank_lines_after_opening_block = + $rOpts->{'blank-lines-after-opening-block'}; + $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; + $rOpts_block_brace_vertical_tightness = + $rOpts->{'block-brace-vertical-tightness'}; + $rOpts_brace_follower_vertical_tightness = + $rOpts->{'brace-follower-vertical-tightness'}; + $rOpts_break_after_labels = $rOpts->{'break-after-labels'}; + $rOpts_break_at_old_attribute_breakpoints = + $rOpts->{'break-at-old-attribute-breakpoints'}; + $rOpts_break_at_old_comma_breakpoints = + $rOpts->{'break-at-old-comma-breakpoints'}; + $rOpts_break_at_old_keyword_breakpoints = + $rOpts->{'break-at-old-keyword-breakpoints'}; + $rOpts_break_at_old_logical_breakpoints = + $rOpts->{'break-at-old-logical-breakpoints'}; + $rOpts_break_at_old_semicolon_breakpoints = + $rOpts->{'break-at-old-semicolon-breakpoints'}; + $rOpts_break_at_old_ternary_breakpoints = + $rOpts->{'break-at-old-ternary-breakpoints'}; + $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'}; + $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'}; + $rOpts_closing_side_comment_else_flag = + $rOpts->{'closing-side-comment-else-flag'}; + $rOpts_closing_side_comment_maximum_text = + $rOpts->{'closing-side-comment-maximum-text'}; + $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; + $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; + $rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'}; + $rOpts_delete_closing_side_comments = + $rOpts->{'delete-closing-side-comments'}; + $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; + $rOpts_extended_continuation_indentation = + $rOpts->{'extended-continuation-indentation'}; + $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'}; + $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'}; + $rOpts_delete_weld_interfering_commas = + $rOpts->{'delete-weld-interfering-commas'}; + $rOpts_format_skipping = $rOpts->{'format-skipping'}; + $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'}; + $rOpts_function_paren_vertical_alignment = + $rOpts->{'function-paren-vertical-alignment'}; + $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; + $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; + $rOpts_ignore_side_comment_lengths = + $rOpts->{'ignore-side-comment-lengths'}; + $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'}; + $rOpts_indent_columns = $rOpts->{'indent-columns'}; + $rOpts_indent_only = $rOpts->{'indent-only'}; + $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; + $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; + $rOpts_extended_line_up_parentheses = + $rOpts->{'extended-line-up-parentheses'}; + $rOpts_logical_padding = $rOpts->{'logical-padding'}; + $rOpts_maximum_consecutive_blank_lines = + $rOpts->{'maximum-consecutive-blank-lines'}; + $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; + $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; + $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'}; + $rOpts_opening_brace_always_on_right = + $rOpts->{'opening-brace-always-on-right'}; + $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'}; + $rOpts_outdent_labels = $rOpts->{'outdent-labels'}; + $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'}; + $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'}; + $rOpts_outdent_static_block_comments = + $rOpts->{'outdent-static-block-comments'}; + $rOpts_recombine = $rOpts->{'recombine'}; + $rOpts_short_concatenation_item_length = + $rOpts->{'short-concatenation-item-length'}; + $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'}; + $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; + $rOpts_static_block_comments = $rOpts->{'static-block-comments'}; + $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'}; + $rOpts_tee_pod = $rOpts->{'tee-pod'}; + $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'}; + $rOpts_valign_code = $rOpts->{'valign-code'}; + $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'}; + $rOpts_variable_maximum_line_length = + $rOpts->{'variable-maximum-line-length'}; - my @opening_type = qw< L { ( [ >; - @is_opening_type{@opening_type} = (1) x scalar(@opening_type); + # Note that both opening and closing tokens can access the opening + # and closing flags of their container types. + %opening_vertical_tightness = ( + '(' => $rOpts->{'paren-vertical-tightness'}, + '{' => $rOpts->{'brace-vertical-tightness'}, + '[' => $rOpts->{'square-bracket-vertical-tightness'}, + ')' => $rOpts->{'paren-vertical-tightness'}, + '}' => $rOpts->{'brace-vertical-tightness'}, + ']' => $rOpts->{'square-bracket-vertical-tightness'}, + ); - my @closing_type = qw< R } ) ] >; - @is_closing_type{@closing_type} = (1) x scalar(@closing_type); + %closing_vertical_tightness = ( + '(' => $rOpts->{'paren-vertical-tightness-closing'}, + '{' => $rOpts->{'brace-vertical-tightness-closing'}, + '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, + ')' => $rOpts->{'paren-vertical-tightness-closing'}, + '}' => $rOpts->{'brace-vertical-tightness-closing'}, + ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, + ); - my @spaces_both_sides = qw# - + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= - .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ - &&= ||= //= <=> A k f w F n C Y U G v - #; + # assume flag for '>' same as ')' for closing qw quotes + %closing_token_indentation = ( + ')' => $rOpts->{'closing-paren-indentation'}, + '}' => $rOpts->{'closing-brace-indentation'}, + ']' => $rOpts->{'closing-square-bracket-indentation'}, + '>' => $rOpts->{'closing-paren-indentation'}, + ); - my @spaces_left_side = qw< - t ! ~ m p { \ h pp mm Z j - >; - push( @spaces_left_side, '#' ); # avoids warning message + # flag indicating if any closing tokens are indented + $some_closing_token_indentation = + $rOpts->{'closing-paren-indentation'} + || $rOpts->{'closing-brace-indentation'} + || $rOpts->{'closing-square-bracket-indentation'} + || $rOpts->{'indent-closing-brace'}; - my @spaces_right_side = qw< - ; } ) ] R J ++ -- **= - >; - push( @spaces_right_side, ',' ); # avoids warning message + %opening_token_right = ( + '(' => $rOpts->{'opening-paren-right'}, + '{' => $rOpts->{'opening-hash-brace-right'}, + '[' => $rOpts->{'opening-square-bracket-right'}, + ); - %want_left_space = (); - %want_right_space = (); - %binary_ws_rules = (); + %stack_opening_token = ( + '(' => $rOpts->{'stack-opening-paren'}, + '{' => $rOpts->{'stack-opening-hash-brace'}, + '[' => $rOpts->{'stack-opening-square-bracket'}, + ); + + %stack_closing_token = ( + ')' => $rOpts->{'stack-closing-paren'}, + '}' => $rOpts->{'stack-closing-hash-brace'}, + ']' => $rOpts->{'stack-closing-square-bracket'}, + ); + return; +} ## end sub initialize_global_option_vars + +sub initialize_line_length_vars { + + # Create a table of maximum line length vs level for later efficient use. + # We will make the tables very long to be sure it will not be exceeded. + # But we have to choose a fixed length. A check will be made at the start + # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of + # my standard test problems have indentation levels of about 150, so this + # should be fairly large. If the choice of a maximum level ever becomes + # an issue then these table values could be returned in a sub with a simple + # memoization scheme. + + # Also create a table of the maximum spaces available for text due to the + # level only. If a line has continuation indentation, then that space must + # be subtracted from the table value. This table is used for preliminary + # estimates in welding, extended_ci, BBX, and marking short blocks. + use constant LEVEL_TABLE_MAX => 1000; + + # The basic scheme: + foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { + my $indent = $level * $rOpts_indent_columns; + $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length; + $maximum_text_length_at_level[$level] = + $rOpts_maximum_line_length - $indent; + } + + # Correct the maximum_text_length table if the -wc=n flag is used + $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; + if ($rOpts_whitespace_cycle) { + if ( $rOpts_whitespace_cycle > 0 ) { + foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { + my $level_mod = $level % $rOpts_whitespace_cycle; + my $indent = $level_mod * $rOpts_indent_columns; + $maximum_text_length_at_level[$level] = + $rOpts_maximum_line_length - $indent; + } + } + else { + $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0; + } + } + + # Correct the tables if the -vmll flag is used. These values override the + # previous values. + if ($rOpts_variable_maximum_line_length) { + foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { + $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length; + $maximum_line_length_at_level[$level] = + $rOpts_maximum_line_length + $level * $rOpts_indent_columns; + } + } + + # Define two measures of indentation level, alpha and beta, at which some + # formatting features come under stress and need to start shutting down. + # Some combination of the two will be used to shut down different + # formatting features. + # Put a reasonable upper limit on stress level (say 100) in case the + # whitespace-cycle variable is used. + my $stress_level_limit = min( 100, LEVEL_TABLE_MAX ); + + # Find stress_level_alpha, targeted at very short maximum line lengths. + $stress_level_alpha = $stress_level_limit + 1; + foreach my $level_test ( 0 .. $stress_level_limit ) { + my $max_len = $maximum_text_length_at_level[ $level_test + 1 ]; + my $excess_inside_space = + $max_len - + $rOpts_continuation_indentation - + $rOpts_indent_columns - 8; + if ( $excess_inside_space <= 0 ) { + $stress_level_alpha = $level_test; + last; + } + } + + # Find stress level beta, a stress level targeted at formatting + # at deep levels near the maximum line length. We start increasing + # from zero and stop at the first level which shows no more space. + + # 'const' is a fixed number of spaces for a typical variable. + # Cases b1197-b1204 work ok with const=12 but not with const=8 + my $const = 16; + my $denom = max( 1, $rOpts_indent_columns ); + $stress_level_beta = 0; + foreach my $level ( 0 .. $stress_level_limit ) { + my $remaining_cycles = max( + 0, + ( + $maximum_text_length_at_level[$level] - + $rOpts_continuation_indentation - $const + ) / $denom + ); + last if ( $remaining_cycles <= 3 ); # 2 does not work + $stress_level_beta = $level; + } + + # This is a combined level which works well for turning off formatting + # features in most cases: + $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 ); + + return; +} ## end sub initialize_line_length_vars + +sub initialize_trailing_comma_rules { + + # Setup control hash for trailing commas + + # -wtc=s defines desired trailing comma policy: + # + # =" " stable + # [ both -atc and -dtc ignored ] + # =0 : none + # [requires -dtc; -atc ignored] + # =1 or * : all + # [requires -atc; -dtc ignored] + # =m : multiline lists require trailing comma + # if -atc set => will add missing multiline trailing commas + # if -dtc set => will delete trailing single line commas + # =b or 'bare' (multiline) lists require trailing comma + # if -atc set => will add missing bare trailing commas + # if -dtc set => will delete non-bare trailing commas + # =h or 'hash': single column stable bare lists require trailing comma + # if -atc set will add these + # if -dtc set will delete other trailing commas + + #------------------------------------------------------------------- + # This routine must be called after the alpha and beta stress levels + # have been defined in sub 'initialize_line_length_vars'. + #------------------------------------------------------------------- + + %trailing_comma_rules = (); + + my $rvalid_flags = [qw(0 1 * m b h i)]; + + my $option = $rOpts->{'want-trailing-commas'}; + + if ($option) { + $option =~ s/^\s+//; + $option =~ s/\s+$//; + } + + # We need to use length() here because '0' is a possible option + if ( defined($option) && length($option) ) { + my $error_message; + my %rule_hash; + my @q = @{$rvalid_flags}; + my %is_valid_flag; + @is_valid_flag{@q} = (1) x scalar(@q); + + # handle single character control, such as -wtc='b' + if ( length($option) == 1 ) { + foreach (qw< ) ] } >) { + $rule_hash{$_} = [ $option, EMPTY_STRING ]; + } + } + + # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m' + else { + my @parts = split /\s+/, $option; + foreach my $part (@parts) { + if ( length($part) >= 2 && length($part) <= 3 ) { + my $val = substr( $part, -1, 1 ); + my $key_o = substr( $part, -2, 1 ); + if ( $is_opening_token{$key_o} ) { + my $paren_flag = EMPTY_STRING; + if ( length($part) == 3 ) { + $paren_flag = substr( $part, 0, 1 ); + } + my $key = $matching_token{$key_o}; + $rule_hash{$key} = [ $val, $paren_flag ]; + } + else { + $error_message .= "Unrecognized term: '$part'\n"; + } + } + else { + $error_message .= "Unrecognized term: '$part'\n"; + } + } + } + + # check for valid control characters + if ( !$error_message ) { + foreach my $key ( keys %rule_hash ) { + my $item = $rule_hash{$key}; + my ( $val, $paren_flag ) = @{$item}; + if ( $val && !$is_valid_flag{$val} ) { + my $valid_str = join( SPACE, @{$rvalid_flags} ); + $error_message .= + "Unexpected value '$val'; must be one of: $valid_str\n"; + last; + } + if ($paren_flag) { + if ( $paren_flag !~ /^[kKfFwW]$/ ) { + $error_message .= +"Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n"; + last; + } + if ( $key ne ')' ) { + $error_message .= +"paren flag '$paren_flag' is only allowed before a '('\n"; + last; + } + } + } + } + + if ($error_message) { + Warn(<' is excluded because it never gets space + # parentheses and brackets are excluded since they are handled specially + # curly braces are included but may be overridden by logic, such as + # newline logic. + + # NEW_TOKENS: create a whitespace rule here. This can be as + # simple as adding your new letter to @spaces_both_sides, for + # example. + + my @spaces_both_sides = qw# + + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= + .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ + &&= ||= //= <=> A k f w F n C Y U G v + #; + + my @spaces_left_side = qw< + t ! ~ m p { \ h pp mm Z j + >; + push( @spaces_left_side, '#' ); # avoids warning message + + my @spaces_right_side = qw< + ; } ) ] R J ++ -- **= + >; + push( @spaces_right_side, ',' ); # avoids warning message + + %want_left_space = (); + %want_right_space = (); + %binary_ws_rules = (); # Note that we setting defaults here. Later in processing # the values of %want_left_space and %want_right_space @@ -2509,6 +2814,8 @@ sub initialize_whitespace_hashes { } ## end sub initialize_whitespace_hashes +{ #<<< begin closure set_whitespace_flags + my %is_special_ws_type; my %is_wCUG; my %is_wi; @@ -2517,7 +2824,7 @@ BEGIN { # The following hash is used to skip over needless if tests. # Be sure to update it when adding new checks in its block. - my @q = qw(k w i C m - Q); + my @q = qw(k w C m - Q); push @q, '#'; @is_special_ws_type{@q} = (1) x scalar(@q); @@ -2527,10 +2834,16 @@ BEGIN { @q = qw( w i ); @is_wi{@q} = (1) x scalar(@q); -} +} ## end BEGIN use constant DEBUG_WHITE => 0; +# Hashes to set spaces around container tokens according to their +# sequence numbers. These are set as keywords are examined. +# They are controlled by the -kpit and -kpitl flags. +my %opening_container_inside_ws; +my %closing_container_inside_ws; + sub set_whitespace_flags { # This routine is called once per file to set whitespace flags for that @@ -2547,9 +2860,14 @@ sub set_whitespace_flags { my $self = shift; - my $rLL = $self->[_rLL_]; + my $j_tight_closing_paren = -1; + my $rLL = $self->[_rLL_]; + my $jmax = @{$rLL} - 1; + + %opening_container_inside_ws = (); + %closing_container_inside_ws = (); + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $jmax = @{$rLL} - 1; my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'}; @@ -2562,130 +2880,34 @@ sub set_whitespace_flags { my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 ); - my ( $rtokh, $token, $type ); - my $rtokh_last = $rLL->[0]; - my $rtokh_last_last = $rtokh_last; - - my $last_type = EMPTY_STRING; - my $last_token = EMPTY_STRING; - - my $j_tight_closing_paren = -1; - - $rtokh = [ @{ $rLL->[0] } ]; - $token = SPACE; - $type = 'b'; - - $rtokh->[_TOKEN_] = $token; - $rtokh->[_TYPE_] = $type; - $rtokh->[_TYPE_SEQUENCE_] = EMPTY_STRING; - $rtokh->[_LINE_INDEX_] = 0; - - # This is some logic moved to a sub to avoid deep nesting of if stmts - my $ws_in_container = sub { - - my ($j) = @_; - my $ws = WS_YES; - if ( $j + 1 > $jmax ) { return (WS_NO) } - - # Patch to count '-foo' as single token so that - # each of $a{-foo} and $a{foo} and $a{'foo'} do - # not get spaces with default formatting. - my $j_here = $j; - ++$j_here - if ( $token eq '-' - && $last_token eq '{' - && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); - - # Patch to count a sign separated from a number as a single token, as - # in the following line. Otherwise, it takes two steps to converge: - # deg2rad(- 0.5) - if ( ( $type eq 'm' || $type eq 'p' ) - && $j < $jmax + 1 - && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b' - && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n' - && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ ) - { - $j_here = $j + 2; - } - - # $j_next is where a closing token should be if - # the container has a single token - if ( $j_here + 1 > $jmax ) { return (WS_NO) } - my $j_next = - ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) - ? $j_here + 2 - : $j_here + 1; - - if ( $j_next > $jmax ) { return WS_NO } - my $tok_next = $rLL->[$j_next]->[_TOKEN_]; - my $type_next = $rLL->[$j_next]->[_TYPE_]; - - # for tightness = 1, if there is just one token - # within the matching pair, we will keep it tight - if ( - $tok_next eq $matching_token{$last_token} - - # but watch out for this: [ [ ] (misc.t) - && $last_token ne $token - - # double diamond is usually spaced - && $token ne '<<>>' - - ) - { - - # remember where to put the space for the closing paren - $j_tight_closing_paren = $j_next; - return (WS_NO); - } - return (WS_YES); - }; - - # Local hashes to set spaces around container tokens according to their - # sequence numbers. These are set as keywords are examined. - # They are controlled by the -kpit and -kpitl flags. - my %opening_container_inside_ws; - my %closing_container_inside_ws; - my $set_container_ws_by_keyword = sub { + my $last_token = SPACE; + my $last_type = 'b'; - return unless (%keyword_paren_inner_tightness); + my $rtokh_last = [ @{ $rLL->[0] } ]; + $rtokh_last->[_TOKEN_] = $last_token; + $rtokh_last->[_TYPE_] = $last_type; + $rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING; + $rtokh_last->[_LINE_INDEX_] = 0; - my ( $word, $sequence_number ) = @_; - - # We just saw a keyword (or other function name) followed by an opening - # paren. Now check to see if the following paren should have special - # treatment for its inside space. If so we set a hash value using the - # sequence number as key. - if ( $word && $sequence_number ) { - my $tightness = $keyword_paren_inner_tightness{$word}; - if ( defined($tightness) && $tightness != 1 ) { - my $ws_flag = $tightness == 0 ? WS_YES : WS_NO; - $opening_container_inside_ws{$sequence_number} = $ws_flag; - $closing_container_inside_ws{$sequence_number} = $ws_flag; - } - } - return; - }; + my $rtokh_last_last = $rtokh_last; my ( $ws_1, $ws_2, $ws_3, $ws_4 ); # main loop over all tokens to define the whitespace flags - foreach my $j ( 0 .. $jmax ) { + my $last_type_is_opening; + my ( $token, $type ); + my $j = -1; + foreach my $rtokh ( @{$rLL} ) { - if ( $rLL->[$j]->[_TYPE_] eq 'b' ) { + $j++; + + $type = $rtokh->[_TYPE_]; + if ( $type eq 'b' ) { $rwhitespace_flags->[$j] = WS_OPTIONAL; next; } - $rtokh_last_last = $rtokh_last; - - $rtokh_last = $rtokh; - $last_token = $token; - $last_type = $type; - - $rtokh = $rLL->[$j]; $token = $rtokh->[_TOKEN_]; - $type = $rtokh->[_TYPE_]; my $ws; @@ -2695,7 +2917,9 @@ sub set_whitespace_flags { #--------------------------------------------------------------- # /^[L\{\(\[]$/ - if ( $is_opening_type{$last_type} ) { + if ($last_type_is_opening) { + + $last_type_is_opening = 0; my $seqno = $rtokh->[_TYPE_SEQUENCE_]; my $block_type = $rblock_type_of_seqno->{$seqno}; @@ -2754,7 +2978,28 @@ sub set_whitespace_flags { $ws = WS_NO; } else { - $ws = $ws_in_container->($j); + + # find the index of the closing token + my $j_closing = + $self->[_K_closing_container_]->{$last_seqno}; + + # If the closing token is less than five characters ahead + # we must take a closer look + if ( defined($j_closing) + && $j_closing - $j < 5 + && $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq + $last_seqno ) + { + $ws = + ws_in_container( $j, $j_closing, $rLL, $type, $token, + $last_token ); + if ( $ws == WS_NO ) { + $j_tight_closing_paren = $j_closing; + } + } + else { + $ws = WS_YES; + } } } @@ -2775,17 +3020,10 @@ sub set_whitespace_flags { #--------------------------------------------------------------- # The hash '%is_special_ws_type' significantly speeds up this routine, # but be sure to update it if a new check is added. - # Currently has types: qw(k w i C m - Q #) + # Currently has types: qw(k w C m - Q #) if ( $is_special_ws_type{$type} ) { - if ( $type eq 'i' ) { - - # never a space before -> - if ( substr( $token, 0, 2 ) eq '->' ) { - $ws = WS_NO; - } - } - elsif ( $type eq 'k' ) { + if ( $type eq 'k' ) { # Keywords 'for', 'foreach' are special cases for -kpit since # the opening paren does not always immediately follow the @@ -2809,20 +3047,29 @@ sub set_whitespace_flags { last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' ); my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_]; - $set_container_ws_by_keyword->( $token, $seqno_p ); + set_container_ws_by_keyword( $token, $seqno_p ); last; } } } - # retain any space between '-' and bare word - elsif ( $type eq 'w' || $type eq 'C' ) { - $ws = WS_OPTIONAL if $last_type eq '-'; + # handle a comment + elsif ( $type eq '#' ) { - # never a space before -> - if ( substr( $token, 0, 2 ) eq '->' ) { - $ws = WS_NO; - } + # newline before block comment ($j==0), and + # space before side comment ($j>0), so .. + $ws = WS_YES; + + #--------------------------------- + # Nothing more to do for a comment + #--------------------------------- + $rwhitespace_flags->[$j] = $ws; + next; + } + + # retain any space between '-' and bare word + elsif ( $type eq 'w' || $type eq 'C' ) { + $ws = WS_OPTIONAL if $last_type eq '-'; } # retain any space between '-' and bare word; for example @@ -2832,9 +3079,6 @@ sub set_whitespace_flags { $ws = WS_OPTIONAL if ( $last_type eq 'w' ); } - # always space before side comment - elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } - # space_backslash_quote; RT #123774 <> # allow a space between a backslash and single or double quote # to avoid fooling html formatters @@ -2898,6 +3142,8 @@ sub set_whitespace_flags { # /^[L\{\(\[]$/ elsif ( $is_opening_type{$type} ) { + $last_type_is_opening = 1; + if ( $token eq '(' ) { my $seqno = $rtokh->[_TYPE_SEQUENCE_]; @@ -2925,7 +3171,7 @@ sub set_whitespace_flags { || $space_after_keyword{$last_token} ); # Set inside space flag if requested - $set_container_ws_by_keyword->( $last_token, $seqno ); + set_container_ws_by_keyword( $last_token, $seqno ); } # Space between function and '(' @@ -2943,39 +3189,44 @@ sub set_whitespace_flags { # NOTE: this would be the place to allow spaces between # repeated parens, like () () (), as in case c017, but I # decided that would not be a good idea. + + # Updated to allow detached '->' from tokenizer (issue c140) elsif ( - ##$last_type =~ /^[wCUG]$/ + + # /^[wCUG]$/ $is_wCUG{$last_type} + || ( - ##$last_type =~ /^[wi]$/ + + # /^[wi]$/ $is_wi{$last_type} && ( + + # with prefix '->' or '&' $last_token =~ /^([\&]|->)/ - # or -> or & split from bareword by newline (b1337) - || ( - $last_token =~ /^\w/ - && ( - $rtokh_last_last->[_TYPE_] eq '->' - || ( $rtokh_last_last->[_TYPE_] eq 't' - && $rtokh_last_last->[_TOKEN_] =~ - /^\&\s*$/ ) - ) - ) + # or preceding token '->' (see b1337; c140) + || $rtokh_last_last->[_TYPE_] eq '->' + + # or preceding sub call operator token '&' + || ( $rtokh_last_last->[_TYPE_] eq 't' + && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ ) ) ) ) { - $ws = $rOpts_space_function_paren ? WS_YES : WS_NO; - $set_container_ws_by_keyword->( $last_token, $seqno ); + $ws = + $rOpts_space_function_paren + ? $self->ws_space_function_paren( $j, $rtokh_last_last ) + : WS_NO; + + set_container_ws_by_keyword( $last_token, $seqno ); $ris_function_call_paren->{$seqno} = 1; } # space between something like $i and ( in 'snippets/space2.in' # for $i ( 0 .. 20 ) { - # FIXME: eventually, type 'i' could be split into multiple - # token types so this can be a hardwired rule. elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { $ws = WS_YES; } @@ -2994,9 +3245,10 @@ sub set_whitespace_flags { $ws = WS_OPTIONAL; } - # keep space between 'sub' and '{' for anonymous sub definition + # keep space between 'sub' and '{' for anonymous sub definition, + # be sure type = 'k' (added for c140) if ( $type eq '{' ) { - if ( $last_token eq 'sub' ) { + if ( $last_token eq 'sub' && $last_type eq 'k' ) { $ws = WS_YES; } @@ -3011,12 +3263,14 @@ sub set_whitespace_flags { } } ## end if ( $is_opening_type{$type} ) { - # always preserver whatever space was used after a possible + # always preserve whatever space was used after a possible # filehandle (except _) or here doc operator if ( - $type ne '#' - && ( ( $last_type eq 'Z' && $last_token ne '_' ) - || $last_type eq 'h' ) + ( + ( $last_type eq 'Z' && $last_token ne '_' ) + || $last_type eq 'h' + ) + && $type ne '#' # no longer required due to early exit for '#' above ) { $ws = WS_OPTIONAL; @@ -3031,8 +3285,10 @@ sub set_whitespace_flags { # Whitespace Rules Section 4: # Use the binary rule table. #--------------------------------------------------------------- - $ws = $binary_ws_rules{$last_type}{$type}; - $ws_4 = $ws if DEBUG_WHITE; + if ( defined( $binary_ws_rules{$last_type}{$type} ) ) { + $ws = $binary_ws_rules{$last_type}{$type}; + $ws_4 = $ws if DEBUG_WHITE; + } #--------------------------------------------------------------- # Whitespace Rules Section 5: @@ -3057,7 +3313,7 @@ sub set_whitespace_flags { # # -1 vs 1 --> -1 # 1 vs -1 --> -1 - if ( !defined($ws) ) { + else { my $wl = $want_left_space{$type}; my $wr = $want_right_space{$last_type}; if ( !defined($wl) ) { @@ -3079,27 +3335,34 @@ sub set_whitespace_flags { # my $msg = new Fax::Send # -recipients => $to, # -data => $data; - if ( $ws == 0 + if ( !$ws && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] ) { - $ws = 1; + $ws = WS_YES; } $rwhitespace_flags->[$j] = $ws; - if (DEBUG_WHITE) { - my $str = substr( $last_token, 0, 15 ); - $str .= SPACE x ( 16 - length($str) ); - if ( !defined($ws_1) ) { $ws_1 = "*" } - if ( !defined($ws_2) ) { $ws_2 = "*" } - if ( !defined($ws_3) ) { $ws_3 = "*" } - if ( !defined($ws_4) ) { $ws_4 = "*" } - print STDOUT + # remember non-blank, non-comment tokens + $last_token = $token; + $last_type = $type; + $rtokh_last_last = $rtokh_last; + $rtokh_last = $rtokh; + + next if ( !DEBUG_WHITE ); + + my $str = substr( $last_token, 0, 15 ); + $str .= SPACE x ( 16 - length($str) ); + if ( !defined($ws_1) ) { $ws_1 = "*" } + if ( !defined($ws_2) ) { $ws_2 = "*" } + if ( !defined($ws_3) ) { $ws_3 = "*" } + if ( !defined($ws_4) ) { $ws_4 = "*" } + print STDOUT "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; - # reset for next pass - $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef; - } + # reset for next pass + $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef; + } ## end main loop if ( $rOpts->{'tight-secret-operators'} ) { @@ -3110,6 +3373,137 @@ sub set_whitespace_flags { } ## end sub set_whitespace_flags +sub set_container_ws_by_keyword { + + my ( $word, $sequence_number ) = @_; + return unless (%keyword_paren_inner_tightness); + + # We just saw a keyword (or other function name) followed by an opening + # paren. Now check to see if the following paren should have special + # treatment for its inside space. If so we set a hash value using the + # sequence number as key. + if ( $word && $sequence_number ) { + my $tightness = $keyword_paren_inner_tightness{$word}; + if ( defined($tightness) && $tightness != 1 ) { + my $ws_flag = $tightness == 0 ? WS_YES : WS_NO; + $opening_container_inside_ws{$sequence_number} = $ws_flag; + $closing_container_inside_ws{$sequence_number} = $ws_flag; + } + } + return; +} ## end sub set_container_ws_by_keyword + +sub ws_in_container { + + my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_; + + # Given: + # $j = index of token following an opening container token + # $type, $token = the type and token at index $j + # $j_closing = closing token of the container + # $last_token = the opening token of the container + # Return: + # WS_NO if there is just one token in the container (with exceptions) + # WS_YES otherwise + + #------------------------------------ + # Look forward for the closing token; + #------------------------------------ + if ( $j + 1 > $j_closing ) { return WS_NO } + + # Patch to count '-foo' as single token so that + # each of $a{-foo} and $a{foo} and $a{'foo'} do + # not get spaces with default formatting. + my $j_here = $j; + ++$j_here + if ( $token eq '-' + && $last_token eq '{' + && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); + + # Patch to count a sign separated from a number as a single token, as + # in the following line. Otherwise, it takes two steps to converge: + # deg2rad(- 0.5) + if ( ( $type eq 'm' || $type eq 'p' ) + && $j < $j_closing + 1 + && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b' + && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n' + && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ ) + { + $j_here = $j + 2; + } + + # $j_next is where a closing token should be if the container has + # just a "single" token + if ( $j_here + 1 > $j_closing ) { return WS_NO } + my $j_next = + ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) + ? $j_here + 2 + : $j_here + 1; + + #----------------------------------------------------------------- + # Now decide: if we get to the closing token we will keep it tight + #----------------------------------------------------------------- + if ( + $j_next == $j_closing + + # OLD PROBLEM: but watch out for this: [ [ ] (misc.t) + # No longer necessary because of the previous check on sequence numbers + ##&& $last_token ne $token + + # double diamond is usually spaced + && $token ne '<<>>' + + ) + { + return WS_NO; + } + + return WS_YES; + +} ## end sub ws_in_container + +sub ws_space_function_paren { + + my ( $self, $j, $rtokh_last_last ) = @_; + + # Called if --space-function-paren is set to see if it might cause + # a problem. The manual warns the user about potential problems with + # this flag. Here we just try to catch one common problem. + + # Given: + # $j = index of '(' after function name + # Return: + # WS_NO if no space + # WS_YES otherwise + + # This was added to fix for issue c166. Ignore -sfp at a possible indirect + # object location. For example, do not convert this: + # print header() ... + # to this: + # print header () ... + # because in this latter form, header may be taken to be a file handle + # instead of a function call. + + # Start with the normal value for -sfp: + my $ws = WS_YES; + + # now check to be sure we don't cause a problem: + my $type_ll = $rtokh_last_last->[_TYPE_]; + my $tok_ll = $rtokh_last_last->[_TOKEN_]; + + # NOTE: this is just a minimal check. For example, we might also check + # for something like this: + # print ( header ( .. + if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) { + $ws = WS_NO; + } + + return $ws; + +} ## end sub ws_space_function_paren + +} ## end closure set_whitespace_flags + sub dump_want_left_space { my $fh = shift; local $LIST_SEPARATOR = "\n"; @@ -3219,7 +3613,7 @@ EOM qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ }; @{is_special_variable_char}{@q} = (1) x scalar(@q); - } + } ## end BEGIN sub is_essential_whitespace { @@ -3481,7 +3875,7 @@ EOM my $tok = $value->[0]; push @{ $is_leading_secret_token{$tok} }, $value; } - } + } ## end BEGIN sub new_secret_operator_whitespace { @@ -3528,7 +3922,7 @@ EOM } ## End Loop over all operators } ## End loop over all tokens return; - } # End sub + } ## end sub new_secret_operator_whitespace } ## end closure new_secret_operator_whitespace { ## begin closure set_bond_strengths @@ -3878,6 +4272,9 @@ EOM # $a->$b($c); $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; + # Added for c140 to make 'w ->' and 'i ->' behave the same + $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG; + # Note that the following alternative strength would make the break at the # '->' rather than opening the '('. Both have advantages and disadvantages. # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; # @@ -4010,6 +4407,11 @@ EOM my ($self) = @_; + #----------------------------------------------------------------- + # Define a 'bond strength' for each token pair in an output batch. + # See comments above for definition of bond strength. + #----------------------------------------------------------------- + my $rbond_strength_to_go = []; my $rLL = $self->[_rLL_]; @@ -4323,7 +4725,8 @@ EOM elsif ( $type eq 'w' ) { $bond_str = NO_BREAK if ( !$old_breakpoint_to_go[$i] - && substr( $next_nonblank_token, 0, 1 ) eq '/' ); + && substr( $next_nonblank_token, 0, 1 ) eq '/' + && $next_nonblank_type ne '//' ); } $bond_str_2 = $bond_str if (DEBUG_BOND); @@ -4477,7 +4880,8 @@ EOM # be absolutely sure that we do not allow a break. So for # these the nobreak flag exceeds 1 as a signal. Otherwise we # can run into trouble when small tolerances are added. - $strength += 1 if ( $nobreak_to_go[$i] > 1 ); + $strength += 1 + if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 ); } #--------------------------------------------------------------- @@ -4533,9 +4937,9 @@ sub bad_pattern { # but it should be safe because the pattern has been constructed # by this program. my ($pattern) = @_; - eval "'##'=~/$pattern/"; - return $EVAL_ERROR; -} + my $ok = eval "'##'=~/$pattern/"; + return !defined($ok) || $EVAL_ERROR; +} ## end sub bad_pattern { ## begin closure prepare_cuddled_block_types @@ -5225,6 +5629,22 @@ EOM return; } ## end sub check_sequence_numbers + sub store_block_type { + my ( $self, $block_type, $seqno ) = @_; + + return if ( !$block_type ); + + $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type; + + if ( $block_type =~ /$ASUB_PATTERN/ ) { + $self->[_ris_asub_block_]->{$seqno} = 1; + } + elsif ( $block_type =~ /$SUB_PATTERN/ ) { + $self->[_ris_sub_block_]->{$seqno} = 1; + } + return; + } ## end sub store_block_type + sub write_line { # This routine receives lines one-by-one from the tokenizer and stores @@ -5233,19 +5653,8 @@ EOM # to do the actual formatting. my ( $self, $line_of_tokens_old ) = @_; - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rlines_new = $self->[_rlines_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $rSS = $self->[_rSS_]; - my $Iss_opening = $self->[_Iss_opening_]; - my $Iss_closing = $self->[_Iss_closing_]; - - my $Kfirst; + my $rLL = $self->[_rLL_]; my $line_of_tokens = {}; foreach ( qw( @@ -5265,193 +5674,55 @@ EOM $line_of_tokens->{$_} = $line_of_tokens_old->{$_}; } - # Data needed by Logger - $line_of_tokens->{_level_0} = 0; - $line_of_tokens->{_ci_level_0} = 0; - $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; - $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; - - # Needed to avoid trimming quotes - $line_of_tokens->{_ended_in_blank_token} = undef; - - my $line_type = $line_of_tokens_old->{_line_type}; - my $line_number = $line_of_tokens_old->{_line_number}; - my $CODE_type = EMPTY_STRING; + my $line_type = $line_of_tokens_old->{_line_type}; my $tee_output; + my $Klimit = $self->[_Klimit_]; + my $Kfirst; + # Handle line of non-code if ( $line_type ne 'CODE' ) { $tee_output ||= $rOpts_tee_pod && substr( $line_type, 0, 3 ) eq 'POD'; + + $line_of_tokens->{_level_0} = 0; + $line_of_tokens->{_ci_level_0} = 0; + $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; + $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; + $line_of_tokens->{_ended_in_blank_token} = undef; + } # Handle line of code else { - my $rtokens = $line_of_tokens_old->{_rtokens}; - my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; - my $rblock_type = $line_of_tokens_old->{_rblock_type}; - my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; - my $rlevels = $line_of_tokens_old->{_rlevels}; - my $rci_levels = $line_of_tokens_old->{_rci_levels}; + my $rtokens = $line_of_tokens_old->{_rtokens}; + my $jmax = @{$rtokens} - 1; - my $jmax = @{$rtokens} - 1; if ( $jmax >= 0 ) { - $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; - DEVEL_MODE - && check_sequence_numbers( $rtokens, $rtoken_type, - $rtype_sequence, $line_number ); - - # Find the starting nesting depth ... - # It must be the value of variable 'level' of the first token - # because the nesting depth is used as a token tag in the - # vertical aligner and is compared to actual levels. - # So vertical alignment problems will occur with any other - # starting value. - if ( !defined($nesting_depth) ) { - $nesting_depth = $rlevels->[0]; - $nesting_depth = 0 if ( $nesting_depth < 0 ); - $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1; - } - - foreach my $j ( 0 .. $jmax ) { - - # Do not clip the 'level' variable yet. We will do this - # later, in sub 'store_token_to_go'. The reason is that in - # files with level errors, the logic in 'weld_cuddled_else' - # uses a stack logic that will give bad welds if we clip - # levels here. - ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 } - - # Handle tokens with sequence numbers ... - my $seqno = $rtype_sequence->[$j]; - if ($seqno) { - my $token = $rtokens->[$j]; - my $sign = 1; - if ( $is_opening_token{$token} ) { - $K_opening_container->{$seqno} = @{$rLL}; - $rdepth_of_opening_seqno->[$seqno] = $nesting_depth; - $nesting_depth++; - - # Save a sequenced block type at its opening token. - # Note that unsequenced block types can occur in - # unbalanced code with errors but are ignored here. - if ( $rblock_type->[$j] ) { - my $block_type = $rblock_type->[$j]; - $rblock_type_of_seqno->{$seqno} = $block_type; - if ( substr( $block_type, 0, 3 ) eq 'sub' - || $rOpts_sub_alias_list ) - { - if ( $block_type =~ /$ASUB_PATTERN/ ) { - $self->[_ris_asub_block_]->{$seqno} = 1; - } - elsif ( $block_type =~ /$SUB_PATTERN/ ) { - $self->[_ris_sub_block_]->{$seqno} = 1; - } - } - } - } - elsif ( $is_closing_token{$token} ) { - - # The opening depth should always be defined, and - # it should equal $nesting_depth-1. To protect - # against unforseen error conditions, however, we - # will check this and fix things if necessary. For - # a test case see issue c055. - my $opening_depth = - $rdepth_of_opening_seqno->[$seqno]; - if ( !defined($opening_depth) ) { - $opening_depth = $nesting_depth - 1; - $opening_depth = 0 if ( $opening_depth < 0 ); - $rdepth_of_opening_seqno->[$seqno] = - $opening_depth; - - # This is not fatal but should not happen. The - # tokenizer generates sequence numbers - # incrementally upon encountering each new - # opening token, so every positive sequence - # number should correspond to an opening token. - if (DEVEL_MODE) { - Fault(<{$seqno} = @{$rLL}; - $nesting_depth = $opening_depth; - $sign = -1; - } - elsif ( $token eq '?' ) { - } - elsif ( $token eq ':' ) { - $sign = -1; - } - - # The only sequenced types output by the tokenizer are - # the opening & closing containers and the ternary - # types. So we would only get here if the tokenizer has - # been changed to mark some other tokens with sequence - # numbers, or if an error has been introduced in a - # hash such as %is_opening_container - else { - if (DEVEL_MODE) { - Fault(<[$j]', sequence=$seqno arrived from tokenizer. -Expecting only opening or closing container tokens or ternary tokens with sequence numbers. -EOM - } - } - - if ( $sign > 0 ) { - $Iss_opening->[$seqno] = @{$rSS}; - - # For efficiency, we find the maximum level of - # opening tokens of any type. The actual maximum - # level will be that of their contents which is 1 - # greater. That will be fixed in sub - # 'finish_formatting'. - my $level = $rlevels->[$j]; - if ( $level > $self->[_maximum_level_] ) { - $self->[_maximum_level_] = $level; - $self->[_maximum_level_at_line_] = $line_number; - } - } - else { $Iss_closing->[$seqno] = @{$rSS} } - push @{$rSS}, $sign * $seqno; - - } - else { - $seqno = EMPTY_STRING unless ( defined($seqno) ); - } + $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; - my @tokary; - @tokary[ - _TOKEN_, _TYPE_, _TYPE_SEQUENCE_, - _LEVEL_, _CI_LEVEL_, _LINE_INDEX_, - ] - = ( - $rtokens->[$j], $rtoken_type->[$j], - $seqno, $rlevels->[$j], - $rci_levels->[$j], $line_number - 1, - ); - push @{$rLL}, \@tokary; - } ## end foreach my $j ( 0 .. $jmax ) + #---------------------------- + # get the tokens on this line + #---------------------------- + $self->write_line_inner_loop( $line_of_tokens_old, + $line_of_tokens ); + # update Klimit for added tokens $Klimit = @{$rLL} - 1; - # Need to remember if we can trim the input line - $line_of_tokens->{_ended_in_blank_token} = - $rtoken_type->[$jmax] eq 'b'; + } ## end if ( $jmax >= 0 ) + else { - $line_of_tokens->{_level_0} = $rlevels->[0]; - $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; - $line_of_tokens->{_nesting_blocks_0} = - $line_of_tokens_old->{_nesting_blocks_0}; - $line_of_tokens->{_nesting_tokens_0} = - $line_of_tokens_old->{_nesting_tokens_0}; + # blank line + $line_of_tokens->{_level_0} = 0; + $line_of_tokens->{_ci_level_0} = 0; + $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; + $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; + $line_of_tokens->{_ended_in_blank_token} = undef; - } ## end if ( $jmax >= 0 ) + } $tee_output ||= $rOpts_tee_block_comments @@ -5467,19 +5738,201 @@ EOM } ## end if ( $line_type eq 'CODE') # Finish storing line variables + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; + $self->[_Klimit_] = $Klimit; + my $rlines = $self->[_rlines_]; + push @{$rlines}, $line_of_tokens; + if ($tee_output) { my $fh_tee = $self->[_fh_tee_]; my $line_text = $line_of_tokens_old->{_line_text}; $fh_tee->print($line_text) if ($fh_tee); } - $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; - $line_of_tokens->{_code_type} = $CODE_type; - $self->[_Klimit_] = $Klimit; - - push @{$rlines_new}, $line_of_tokens; return; } ## end sub write_line + + sub write_line_inner_loop { + my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_; + + #--------------------------------------------------------------------- + # Copy the tokens on one line received from the tokenizer to their new + # storage locations. + #--------------------------------------------------------------------- + + # Input parameters: + # $line_of_tokens_old = line received from tokenizer + # $line_of_tokens = line of tokens being formed for formatter + + my $rtokens = $line_of_tokens_old->{_rtokens}; + my $jmax = @{$rtokens} - 1; + if ( $jmax < 0 ) { + + # safety check; shouldn't happen + DEVEL_MODE && Fault("unexpected jmax=$jmax\n"); + return; + } + + my $line_index = $line_of_tokens_old->{_line_number} - 1; + my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; + my $rblock_type = $line_of_tokens_old->{_rblock_type}; + my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; + my $rlevels = $line_of_tokens_old->{_rlevels}; + my $rci_levels = $line_of_tokens_old->{_rci_levels}; + + my $rLL = $self->[_rLL_]; + my $rSS = $self->[_rSS_]; + my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; + + DEVEL_MODE + && check_sequence_numbers( $rtokens, $rtoken_type, + $rtype_sequence, $line_index + 1 ); + + # Find the starting nesting depth ... + # It must be the value of variable 'level' of the first token + # because the nesting depth is used as a token tag in the + # vertical aligner and is compared to actual levels. + # So vertical alignment problems will occur with any other + # starting value. + if ( !defined($nesting_depth) ) { + $nesting_depth = $rlevels->[0]; + $nesting_depth = 0 if ( $nesting_depth < 0 ); + $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1; + } + + my $j = -1; + + # NOTE: coding efficiency is critical in this loop over all tokens + foreach my $token ( @{$rtokens} ) { + + # Do not clip the 'level' variable yet. We will do this + # later, in sub 'store_token_to_go'. The reason is that in + # files with level errors, the logic in 'weld_cuddled_else' + # uses a stack logic that will give bad welds if we clip + # levels here. + ## $j++; + ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 } + + my $seqno = EMPTY_STRING; + + # Handle tokens with sequence numbers ... + # note the ++ increment hidden here for efficiency + if ( $rtype_sequence->[ ++$j ] ) { + $seqno = $rtype_sequence->[$j]; + my $sign = 1; + if ( $is_opening_token{$token} ) { + $self->[_K_opening_container_]->{$seqno} = @{$rLL}; + $rdepth_of_opening_seqno->[$seqno] = $nesting_depth; + $nesting_depth++; + + # Save a sequenced block type at its opening token. + # Note that unsequenced block types can occur in + # unbalanced code with errors but are ignored here. + $self->store_block_type( $rblock_type->[$j], $seqno ) + if ( $rblock_type->[$j] ); + } + elsif ( $is_closing_token{$token} ) { + + # The opening depth should always be defined, and + # it should equal $nesting_depth-1. To protect + # against unforseen error conditions, however, we + # will check this and fix things if necessary. For + # a test case see issue c055. + my $opening_depth = $rdepth_of_opening_seqno->[$seqno]; + if ( !defined($opening_depth) ) { + $opening_depth = $nesting_depth - 1; + $opening_depth = 0 if ( $opening_depth < 0 ); + $rdepth_of_opening_seqno->[$seqno] = $opening_depth; + + # This is not fatal but should not happen. The + # tokenizer generates sequence numbers + # incrementally upon encountering each new + # opening token, so every positive sequence + # number should correspond to an opening token. + DEVEL_MODE && Fault(<[_K_closing_container_]->{$seqno} = @{$rLL}; + $nesting_depth = $opening_depth; + $sign = -1; + } + elsif ( $token eq '?' ) { + } + elsif ( $token eq ':' ) { + $sign = -1; + } + + # The only sequenced types output by the tokenizer are + # the opening & closing containers and the ternary + # types. So we would only get here if the tokenizer has + # been changed to mark some other tokens with sequence + # numbers, or if an error has been introduced in a + # hash such as %is_opening_container + else { + DEVEL_MODE && Fault(<[$j]', sequence=$seqno arrived from tokenizer. +Expecting only opening or closing container tokens or ternary tokens with sequence numbers. +EOM + } + + if ( $sign > 0 ) { + $self->[_Iss_opening_]->[$seqno] = @{$rSS}; + + # For efficiency, we find the maximum level of + # opening tokens of any type. The actual maximum + # level will be that of their contents which is 1 + # greater. That will be fixed in sub + # 'finish_formatting'. + my $level = $rlevels->[$j]; + if ( $level > $self->[_maximum_level_] ) { + $self->[_maximum_level_] = $level; + $self->[_maximum_level_at_line_] = $line_index + 1; + } + } + else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} } + push @{$rSS}, $sign * $seqno; + + } + + my @tokary; + @tokary[ + + _TOKEN_, + _TYPE_, + _TYPE_SEQUENCE_, + _LEVEL_, + _CI_LEVEL_, + _LINE_INDEX_, + + ] = ( + + $token, + $rtoken_type->[$j], + $seqno, + $rlevels->[$j], + $rci_levels->[$j], + $line_index, + + ); + push @{$rLL}, \@tokary; + } ## end token loop + + # Need to remember if we can trim the input line + $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b'; + + # Values needed by Logger + $line_of_tokens->{_level_0} = $rlevels->[0]; + $line_of_tokens->{_ci_level_0} = $rci_levels->[0]; + $line_of_tokens->{_nesting_blocks_0} = + $line_of_tokens_old->{_nesting_blocks_0}; + $line_of_tokens->{_nesting_tokens_0} = + $line_of_tokens_old->{_nesting_tokens_0}; + + return; + + } ## end sub write_line_inner_loop + } ## end closure write_line ############################################# @@ -5493,6 +5946,15 @@ sub finish_formatting { # The file has been tokenized and is ready to be formatted. # All of the relevant data is stored in $self, ready to go. + # Returns: + # true if input file was copied verbatim due to errors + # false otherwise + + # Some of the code in sub break_lists is not robust enough to process code + # with arbitrary brace errors. The simplest fix is to just return the file + # verbatim if there are brace errors. This fixes issue c160. + $severe_error ||= get_saw_brace_error(); + # Check the maximum level. If it is extremely large we will give up and # output the file verbatim. Note that the actual maximum level is 1 # greater than the saved value, so we fix that here. @@ -5507,58 +5969,80 @@ Something may be wrong; formatting will be skipped. EOM } + # Dump any requested block summary data + if ( $rOpts->{'dump-block-summary'} ) { + if ($severe_error) { Exit(1) } + $self->dump_block_summary(); + Exit(0); + } + # output file verbatim if severe error or no formatting requested if ( $severe_error || $rOpts->{notidy} ) { $self->dump_verbatim(); - $self->wrapup(); - return; + $self->wrapup($severe_error); + return 1; } # Update the 'save_logfile' flag based to include any tokenization errors. # We can save time by skipping logfile calls if it is not going to be saved. my $logger_object = $self->[_logger_object_]; if ($logger_object) { - $self->[_save_logfile_] = $logger_object->get_save_logfile(); + my $save_logfile = $logger_object->get_save_logfile(); + $self->[_save_logfile_] = $save_logfile; + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->set_save_logfile($save_logfile); } - my $rix_side_comments = $self->set_CODE_type(); + { + my $rix_side_comments = $self->set_CODE_type(); - $self->find_non_indenting_braces($rix_side_comments); + $self->find_non_indenting_braces($rix_side_comments); - # Handle any requested side comment deletions. It is easier to get - # this done here rather than farther down the pipeline because IO - # lines take a different route, and because lines with deleted HSC - # become BL lines. We have already handled any tee requests in sub - # getline, so it is safe to delete side comments now. - $self->delete_side_comments($rix_side_comments) - if ( $rOpts_delete_side_comments - || $rOpts_delete_closing_side_comments ); + # Handle any requested side comment deletions. It is easier to get + # this done here rather than farther down the pipeline because IO + # lines take a different route, and because lines with deleted HSC + # become BL lines. We have already handled any tee requests in sub + # getline, so it is safe to delete side comments now. + $self->delete_side_comments($rix_side_comments) + if ( $rOpts_delete_side_comments + || $rOpts_delete_closing_side_comments ); + } # Verify that the line hash does not have any unknown keys. $self->check_line_hashes() if (DEVEL_MODE); - # Make a pass through all tokens, adding or deleting any whitespace as - # required. Also make any other changes, such as adding semicolons. - # All token changes must be made here so that the token data structure - # remains fixed for the rest of this iteration. - $self->respace_tokens(); + { + # Make a pass through all tokens, adding or deleting any whitespace as + # required. Also make any other changes, such as adding semicolons. + # All token changes must be made here so that the token data structure + # remains fixed for the rest of this iteration. + my ( $error, $rqw_lines ) = $self->respace_tokens(); + if ($error) { + $self->dump_verbatim(); + $self->wrapup(); + return 1; + } + + $self->find_multiline_qw($rqw_lines); + } + + $self->examine_vertical_tightness_flags(); $self->set_excluded_lp_containers(); - $self->find_multiline_qw(); - $self->keep_old_line_breaks(); # Implement any welding needed for the -wn or -cb options $self->weld_containers(); - $self->collapsed_lengths() + # Collect info needed to implement the -xlp style + $self->xlp_collapsed_lengths() if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses ); # Locate small nested blocks which should not be broken $self->mark_short_nested_blocks(); - $self->adjust_indentation_levels(); + $self->special_indentation_adjustments(); # Verify that the main token array looks OK. If this ever causes a fault # then place similar checks before the sub calls above to localize the @@ -5576,1144 +6060,1364 @@ EOM return; } ## end sub finish_formatting -sub set_CODE_type { - my ($self) = @_; - - # Examine each line of code and set a flag '$CODE_type' to describe it. - # Also return a list of lines with side comments. +my %is_loop_type; - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rlines = $self->[_rlines_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; +BEGIN { + my @q = qw( for foreach while do until ); + @{is_loop_type}{@q} = (1) x scalar(@q); +} - my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'}; - my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'}; - my $rOpts_static_block_comment_prefix = - $rOpts->{'static-block-comment-prefix'}; +sub find_level_info { - # Remember indexes of lines with side comments - my @ix_side_comments; + # Find level ranges and total variations of all code blocks in this file. - my $In_format_skipping_section = 0; - my $Saw_VERSION_in_this_file = 0; - my $has_side_comment = 0; - my ( $Kfirst, $Klast ); - my $CODE_type; + # Returns: + # ref to hash with block info, with seqno as key (see below) - # Loop to set CODE_type + my ($self) = @_; - # Possible CODE_types - # 'VB' = Verbatim - line goes out verbatim (a quote) - # 'FS' = Format Skipping - line goes out verbatim - # 'BL' = Blank Line - # 'HSC' = Hanging Side Comment - fix this hanging side comment - # 'SBCX'= Static Block Comment Without Leading Space - # 'SBC' = Static Block Comment - # 'BC' = Block Comment - an ordinary full line comment - # 'IO' = Indent Only - line goes out unchanged except for indentation - # 'NIN' = No Internal Newlines - line does not get broken - # 'VER' = VERSION statement - # '' = ordinary line of code with no restrictions + # The array _rSS_ has the complete container tree for this file. + my $rSS = $self->[_rSS_]; - my $ix_line = -1; - foreach my $line_of_tokens ( @{$rlines} ) { - $ix_line++; - my $input_line_no = $line_of_tokens->{_line_number}; - my $line_type = $line_of_tokens->{_line_type}; + # We will be ignoring everything except code block containers + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $Last_line_had_side_comment = $has_side_comment; - if ($has_side_comment) { - push @ix_side_comments, $ix_line - 1; + my @stack; + my %level_info; + + # TREE_LOOP: + foreach my $sseq ( @{$rSS} ) { + my $stack_depth = @stack; + my $seq_next = $sseq > 0 ? $sseq : -$sseq; + + next if ( !$rblock_type_of_seqno->{$seq_next} ); + if ( $sseq > 0 ) { + + # STACK_LOOP: + my $item; + foreach my $seq (@stack) { + $item = $level_info{$seq}; + if ( $item->{maximum_depth} < $stack_depth ) { + $item->{maximum_depth} = $stack_depth; + } + $item->{block_count}++; + } ## end STACK LOOP + + push @stack, $seq_next; + my $block_type = $rblock_type_of_seqno->{$seq_next}; + + # If this block is a loop nested within a loop, then we + # will mark it as an 'inner_loop'. This is a useful + # complexity measure. + my $is_inner_loop = 0; + if ( $is_loop_type{$block_type} && defined($item) ) { + $is_inner_loop = $is_loop_type{ $item->{block_type} }; + } + + $level_info{$seq_next} = { + starting_depth => $stack_depth, + maximum_depth => $stack_depth, + block_count => 1, + block_type => $block_type, + is_inner_loop => $is_inner_loop, + }; } - $has_side_comment = 0; + else { + my $seq_test = pop @stack; - next unless ( $line_type eq 'CODE' ); + # error check + if ( $seq_test != $seq_next ) { - my $Klast_prev = $Klast; + # Shouldn't happen - the $rSS array must have an error + DEVEL_MODE && Fault("stack error finding total depths\n"); - my $rK_range = $line_of_tokens->{_rK_range}; - ( $Kfirst, $Klast ) = @{$rK_range}; + %level_info = (); + last; + } + } + } ## end TREE_LOOP + return \%level_info; +} ## end sub find_level_info - my $last_CODE_type = $CODE_type; - $CODE_type = EMPTY_STRING; +sub find_loop_label { - my $input_line = $line_of_tokens->{_line_text}; - my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1; + my ( $self, $seqno ) = @_; - my $is_block_comment = 0; - if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) { - if ( $jmax == 0 ) { $is_block_comment = 1; } - else { $has_side_comment = 1 } - } + # Given: + # $seqno = sequence number of a block of code for a loop + # Return: + # $label = the loop label text, if any, or an empty string - # Write line verbatim if we are in a formatting skip section - if ($In_format_skipping_section) { + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; + my $K_opening_container = $self->[_K_opening_container_]; - # Note: extra space appended to comment simplifies pattern matching - if ( - $is_block_comment + my $label = EMPTY_STRING; + my $K_opening = $K_opening_container->{$seqno}; - # optional fast pre-check - && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>' - || $rOpts_format_skipping_end ) + # backup to the line with the opening paren, if any, in case the + # keyword is on a different line + my $Kp = $self->K_previous_code($K_opening); + return $label unless ( defined($Kp) ); + if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) { + $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_]; + $K_opening = $K_opening_container->{$seqno}; + } - && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ - /$format_skipping_pattern_end/ - ) - { - $In_format_skipping_section = 0; - write_logfile_entry( - "Line $input_line_no: Exiting format-skipping section\n"); - } - $CODE_type = 'FS'; - goto NEXT; - } + return $label unless ( defined($K_opening) ); + my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_]; - # Check for a continued quote.. - if ( $line_of_tokens->{_starting_in_quote} ) { + # look for a lable within a few lines; allow a couple of blank lines + foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) { + last if ( $lx < 0 ); + my $line_of_tokens = $rlines->[$lx]; + my $line_type = $line_of_tokens->{_line_type}; - # A line which is entirely a quote or pattern must go out - # verbatim. Note: the \n is contained in $input_line. - if ( $jmax <= 0 ) { - if ( ( $input_line =~ "\t" ) ) { - my $input_line_number = $line_of_tokens->{_line_number}; - $self->note_embedded_tab($input_line_number); - } - $CODE_type = 'VB'; - goto NEXT; - } - } + # stop search on a non-code line + last if ( $line_type ne 'CODE' ); - # See if we are entering a formatting skip section - if ( - $is_block_comment + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; - # optional fast pre-check - && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<' - || $rOpts_format_skipping_begin ) + # skip a blank line + next if ( !defined($Kfirst) ); - && $rOpts_format_skipping - && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ - /$format_skipping_pattern_begin/ - ) - { - $In_format_skipping_section = 1; - write_logfile_entry( - "Line $input_line_no: Entering format-skipping section\n"); - $CODE_type = 'FS'; - goto NEXT; + # check for a lable + if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) { + $label = $rLL->[$Kfirst]->[_TOKEN_]; + last; } - # ignore trailing blank tokens (they will get deleted later) - if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) { - $jmax--; - } + # quit the search if we are above the starting line + last if ( $lx < $lx_open ); + } - # blank line.. - if ( $jmax < 0 ) { - $CODE_type = 'BL'; - goto NEXT; - } + return $label; +} ## end sub find_loop_label - # Handle comments - if ($is_block_comment) { +{ ## closure find_mccabe_count + my %is_mccabe_logic_keyword; + my %is_mccabe_logic_operator; - # see if this is a static block comment (starts with ## by default) - my $is_static_block_comment = 0; - my $no_leading_space = substr( $input_line, 0, 1 ) eq '#'; - if ( + BEGIN { + my @q = (qw( && || ||= &&= ? <<= >>= )); + @is_mccabe_logic_operator{@q} = (1) x scalar(@q); - # optional fast pre-check - ( - substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##' - || $rOpts_static_block_comment_prefix - ) + @q = (qw( and or xor if else elsif unless until while for foreach )); + @is_mccabe_logic_keyword{@q} = (1) x scalar(@q); + } ## end BEGIN - && $rOpts_static_block_comments - && $input_line =~ /$static_block_comment_pattern/ - ) - { - $is_static_block_comment = 1; - } + sub find_mccabe_count { + my ($self) = @_; - # Check for comments which are line directives - # Treat exactly as static block comments without leading space - # reference: perlsyn, near end, section Plain Old Comments (Not!) - # example: '# line 42 "new_filename.plx"' - if ( - $no_leading_space - && $input_line =~ /^\# \s* - line \s+ (\d+) \s* - (?:\s("?)([^"]+)\2)? \s* - $/x - ) - { - $is_static_block_comment = 1; + # Find the cumulative mccabe count to each token + # Return '$rmccabe_count_sum' = ref to array with cumulative + # mccabe count to each token $K + + # NOTE: This sub currently follows the definitions in Perl::Critic + + my $rmccabe_count_sum; + my $rLL = $self->[_rLL_]; + my $count = 0; + my $Klimit = $self->[_Klimit_]; + foreach my $KK ( 0 .. $Klimit ) { + $rmccabe_count_sum->{$KK} = $count; + my $type = $rLL->[$KK]->[_TYPE_]; + if ( $type eq 'k' ) { + my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $is_mccabe_logic_keyword{$token} ) { $count++ } + } + elsif ( $is_mccabe_logic_operator{$type} ) { + $count++; } + } + $rmccabe_count_sum->{ $Klimit + 1 } = $count; + return $rmccabe_count_sum; + } ## end sub find_mccabe_count +} ## end closure find_mccabe_count - # look for hanging side comment ... - if ( - $Last_line_had_side_comment # last line had side comment - && !$no_leading_space # there is some leading space - && ! - $is_static_block_comment # do not make static comment hanging - ) - { +sub find_code_line_count { + my ($self) = @_; - # continuing an existing HSC chain? - if ( $last_CODE_type eq 'HSC' ) { - $has_side_comment = 1; - $CODE_type = 'HSC'; - goto NEXT; - } + # Find the cumulative number of lines of code, excluding blanks, + # comments and pod. + # Return '$rcode_line_count' = ref to array with cumulative + # code line count for each input line number. - # starting a new HSC chain? - elsif ( + my $rcode_line_count; + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; + my $ix_line = -1; + my $code_line_count = 0; - $rOpts->{'hanging-side-comments'} # user is allowing - # hanging side comments - # like this + # loop over all lines + foreach my $line_of_tokens ( @{$rlines} ) { + $ix_line++; - && ( defined($Klast_prev) && $Klast_prev > 1 ) + # what type of line? + my $line_type = $line_of_tokens->{_line_type}; - # and the previous side comment was not static (issue c070) - && !( - $rOpts->{'static-side-comments'} - && $rLL->[$Klast_prev]->[_TOKEN_] =~ - /$static_side_comment_pattern/ - ) + # if 'CODE' it must be non-blank and non-comment + if ( $line_type eq 'CODE' ) { + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; - ) - { + if ( defined($Kfirst) ) { - # and it is not a closing side comment (issue c070). - my $K_penult = $Klast_prev - 1; - $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' ); - my $follows_csc = - ( $rLL->[$K_penult]->[_TOKEN_] eq '}' - && $rLL->[$K_penult]->[_TYPE_] eq '}' - && $rLL->[$Klast_prev]->[_TOKEN_] =~ - /$closing_side_comment_prefix_pattern/ ); + # it is non-blank + my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1; + if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) { - if ( !$follows_csc ) { - $has_side_comment = 1; - $CODE_type = 'HSC'; - goto NEXT; - } + # ok, it is a non-comment + $code_line_count++; } } - - if ($is_static_block_comment) { - $CODE_type = $no_leading_space ? 'SBCX' : 'SBC'; - goto NEXT; - } - elsif ($Last_line_had_side_comment - && !$rOpts_maximum_consecutive_blank_lines - && $rLL->[$Kfirst]->[_LEVEL_] > 0 ) - { - # Emergency fix to keep a block comment from becoming a hanging - # side comment. This fix is for the case that blank lines - # cannot be inserted. There is related code in sub - # 'process_line_of_CODE' - $CODE_type = 'SBCX'; - goto NEXT; - } - else { - $CODE_type = 'BC'; - goto NEXT; - } } - # End of comments. Handle a line of normal code: + # Count all other special line types except pod; + # For a list of line types see sub 'process_all_lines' + elsif ( $line_type !~ /^POD/ ) { $code_line_count++ } - if ($rOpts_indent_only) { - $CODE_type = 'IO'; - goto NEXT; - } + # Store the cumulative count using the input line index + $rcode_line_count->[$ix_line] = $code_line_count; + } + return $rcode_line_count; +} ## end sub find_code_line_count - if ( !$rOpts_add_newlines ) { - $CODE_type = 'NIN'; - goto NEXT; - } +sub find_selected_packages { - # Patch needed for MakeMaker. Do not break a statement - # in which $VERSION may be calculated. See MakeMaker.pm; - # this is based on the coding in it. - # The first line of a file that matches this will be eval'd: - # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ - # Examples: - # *VERSION = \'1.01'; - # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; - # We will pass such a line straight through without breaking - # it unless -npvl is used. + my ( $self, $rdump_block_types ) = @_; - # Patch for problem reported in RT #81866, where files - # had been flattened into a single line and couldn't be - # tidied without -npvl. There are two parts to this patch: - # First, it is not done for a really long line (80 tokens for now). - # Second, we will only allow up to one semicolon - # before the VERSION. We need to allow at least one semicolon - # for statements like this: - # require Exporter; our $VERSION = $Exporter::VERSION; - # where both statements must be on a single line for MakeMaker + # returns a list of all package statements in a file if requested - if ( !$Saw_VERSION_in_this_file - && $jmax < 80 - && $input_line =~ - /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) - { - $Saw_VERSION_in_this_file = 1; - write_logfile_entry("passing VERSION line; -npvl deactivates\n"); + unless ( $rdump_block_types->{'*'} + || $rdump_block_types->{'package'} + || $rdump_block_types->{'class'} ) + { + return; + } - # This code type has lower priority than others - $CODE_type = 'VER'; - goto NEXT; + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $rlines = $self->[_rlines_]; + + my $K_closing_container = $self->[_K_closing_container_]; + my @package_list; + my @package_sweep; + foreach my $KK ( 0 .. $Klimit ) { + my $item = $rLL->[$KK]; + my $type = $item->[_TYPE_]; + if ( $type ne 'i' ) { + next; } + my $token = $item->[_TOKEN_]; + if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/ + || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ ) + { - NEXT: - $line_of_tokens->{_code_type} = $CODE_type; - } + $token =~ s/\s+/ /g; + my ( $keyword, $name ) = split /\s+/, $token, 2; + + my $lx_start = $item->[_LINE_INDEX_]; + my $level = $item->[_LEVEL_]; + my $parent_seqno = $self->parent_seqno_by_K($KK); + + # Skip a class BLOCK because it will be handled as a block + if ( $keyword eq 'class' ) { + my $line_of_tokens = $rlines->[$lx_start]; + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $K_first, $K_last ) = @{$rK_range}; + if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) { + $K_last = $self->K_previous_code($K_last); + } + if ( defined($K_last) ) { + my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_]; + my $block_type_next = + $self->[_rblock_type_of_seqno_]->{$seqno_class}; + + # these block types are currently marked 'package' + # but may be 'class' in the future, so allow both. + if ( defined($block_type_next) + && $block_type_next =~ /^(class|package)\b/ ) + { + next; + } + } + } - if ($has_side_comment) { - push @ix_side_comments, $ix_line; + my $K_closing = $Klimit; + if ( $parent_seqno != SEQ_ROOT ) { + my $Kc = $K_closing_container->{$parent_seqno}; + if ( defined($Kc) ) { + $K_closing = $Kc; + } + } + + # This package ends any previous package at this level + if ( defined( my $ix = $package_sweep[$level] ) ) { + my $rpk = $package_list[$ix]; + my $Kc = $rpk->{K_closing}; + if ( $Kc > $KK ) { + $rpk->{K_closing} = $KK - 1; + } + } + $package_sweep[$level] = @package_list; + + # max_change and block_count are not currently reported 'package' + push @package_list, + { + line_start => $lx_start + 1, + K_opening => $KK, + K_closing => $Klimit, + name => $name, + type => $keyword, + level => $level, + max_change => 0, + block_count => 0, + }; + } } - return \@ix_side_comments; -} ## end sub set_CODE_type + return \@package_list; +} ## end sub find_selected_packages -sub find_non_indenting_braces { +sub find_selected_blocks { - my ( $self, $rix_side_comments ) = @_; - return unless ( $rOpts->{'non-indenting-braces'} ); - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - return unless ( defined($rLL) && @{$rLL} ); + my ( $self, $rdump_block_types ) = @_; + + # Find blocks needed for --dump-block-summary + # Returns: + # $rslected_blocks = ref to a list of information on the selected blocks + + my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $rseqno_non_indenting_brace_by_ix = - $self->[_rseqno_non_indenting_brace_by_ix_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $ris_asub_block = $self->[_ris_asub_block_]; + my $ris_sub_block = $self->[_ris_sub_block_]; - foreach my $ix ( @{$rix_side_comments} ) { - my $line_of_tokens = $rlines->[$ix]; - my $line_type = $line_of_tokens->{_line_type}; - if ( $line_type ne 'CODE' ) { - - # shouldn't happen - next; - } - my $CODE_type = $line_of_tokens->{_code_type}; - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) { - - # shouldn't happen - next; - } - next unless ( $Klast > $Kfirst ); # maybe HSC - my $token_sc = $rLL->[$Klast]->[_TOKEN_]; - my $K_m = $Klast - 1; - my $type_m = $rLL->[$K_m]->[_TYPE_]; - if ( $type_m eq 'b' && $K_m > $Kfirst ) { - $K_m--; - $type_m = $rLL->[$K_m]->[_TYPE_]; - } - my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_]; - if ($seqno_m) { - my $block_type_m = $rblock_type_of_seqno->{$seqno_m}; + my $dump_all_types = $rdump_block_types->{'*'}; - # The pattern ends in \s but we have removed the newline, so - # we added it back for the match. That way we require an exact - # match to the special string and also allow additional text. - $token_sc .= "\n"; - if ( $block_type_m - && $is_opening_type{$type_m} - && $token_sc =~ /$non_indenting_brace_pattern/ ) - { - $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m; - } - } - } - return; -} ## end sub find_non_indenting_braces + # Get level variation info for code blocks + my $rlevel_info = $self->find_level_info(); -sub delete_side_comments { - my ( $self, $rix_side_comments ) = @_; + my @selected_blocks; - # Given a list of indexes of lines with side comments, handle any - # requested side comment deletions. + #--------------------------------------------------- + # BEGIN loop over all blocks to find selected blocks + #--------------------------------------------------- + foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { - my $rLL = $self->[_rLL_]; - my $rlines = $self->[_rlines_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $rseqno_non_indenting_brace_by_ix = - $self->[_rseqno_non_indenting_brace_by_ix_]; + my $type; + my $name = EMPTY_STRING; + my $block_type = $rblock_type_of_seqno->{$seqno}; + my $K_opening = $K_opening_container->{$seqno}; + my $K_closing = $K_closing_container->{$seqno}; + my $level = $rLL->[$K_opening]->[_LEVEL_]; - foreach my $ix ( @{$rix_side_comments} ) { - my $line_of_tokens = $rlines->[$ix]; - my $line_type = $line_of_tokens->{_line_type}; + my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_]; + my $line_of_tokens = $rlines->[$lx_open]; + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) { + my $line_type = $line_of_tokens->{_line_type}; - # This fault shouldn't happen because we only saved CODE lines with - # side comments in the TASK 1 loop above. - if ( $line_type ne 'CODE' ) { - if (DEVEL_MODE) { - my $lno = $ix + 1; - Fault(<{_code_type}; + DEVEL_MODE && Fault(<{_code_type}; - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; + my ( $max_change, $block_count, $inner_loop_plus ) = + ( 0, 0, EMPTY_STRING ); + my $item = $rlevel_info->{$seqno}; + if ( defined($item) ) { + my $starting_depth = $item->{starting_depth}; + my $maximum_depth = $item->{maximum_depth}; + $block_count = $item->{block_count}; + $max_change = $maximum_depth - $starting_depth + 1; - unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) { - if (DEVEL_MODE) { - my $lno = $ix + 1; - Fault(<{is_inner_loop} ? '+' : EMPTY_STRING; } - my $delete_side_comment = - $rOpts_delete_side_comments - && ( $Klast > $Kfirst || $CODE_type eq 'HSC' ) - && (!$CODE_type - || $CODE_type eq 'HSC' - || $CODE_type eq 'IO' - || $CODE_type eq 'NIN' ); - - # Do not delete special control side comments - if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) { - $delete_side_comment = 0; + # Skip closures unless type 'closure' is explicitely requested + if ( ( $block_type eq '}' || $block_type eq ';' ) + && $rdump_block_types->{'closure'} ) + { + $type = 'closure'; } - if ( - $rOpts_delete_closing_side_comments - && !$delete_side_comment - && $Klast > $Kfirst - && ( !$CODE_type - || $CODE_type eq 'HSC' - || $CODE_type eq 'IO' - || $CODE_type eq 'NIN' ) + # Both 'sub' and 'asub' select an anonymous sub. + # This allows anonymous subs to be explicitely selected + elsif ( + $ris_asub_block->{$seqno} + && ( $dump_all_types + || $rdump_block_types->{'sub'} + || $rdump_block_types->{'asub'} ) ) { - my $token = $rLL->[$Klast]->[_TOKEN_]; - my $K_m = $Klast - 1; - my $type_m = $rLL->[$K_m]->[_TYPE_]; - if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- } - my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_]; - if ($seqno_m) { - my $block_type_m = $rblock_type_of_seqno->{$seqno_m}; - if ( $block_type_m - && $token =~ /$closing_side_comment_prefix_pattern/ - && $block_type_m =~ /$closing_side_comment_list_pattern/ ) + $type = 'asub'; + + # Look back to try to find some kind of name, such as + # my $var = sub { - var is type 'i' + # var => sub { - var is type 'w' + # -var => sub { - var is type 'w' + # 'var' => sub { - var is type 'Q' + my ( $saw_equals, $saw_fat_comma, $blank_count ); + foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) { + my $token_type = $rLL->[$KK]->[_TYPE_]; + if ( $token_type eq 'b' ) { $blank_count++; next } + if ( $token_type eq '=>' ) { $saw_fat_comma++; next } + if ( $token_type eq '=' ) { $saw_equals++; next } + if ( $token_type eq 'i' && $saw_equals + || ( $token_type eq 'w' || $token_type eq 'Q' ) + && $saw_fat_comma ) { - $delete_side_comment = 1; + $name = $rLL->[$KK]->[_TOKEN_]; + last; } } - } ## end if ( $rOpts_delete_closing_side_comments...) - - if ($delete_side_comment) { - - # We are actually just changing the side comment to a blank. - # This may produce multiple blanks in a row, but sub respace_tokens - # will check for this and fix it. - $rLL->[$Klast]->[_TYPE_] = 'b'; - $rLL->[$Klast]->[_TOKEN_] = SPACE; + } + elsif ( $ris_sub_block->{$seqno} + && ( $dump_all_types || $rdump_block_types->{'sub'} ) ) + { + $type = 'sub'; - # The -io option outputs the line text, so we have to update - # the line text so that the comment does not reappear. - if ( $CODE_type eq 'IO' ) { - my $line = EMPTY_STRING; - foreach my $KK ( $Kfirst .. $Klast - 1 ) { - $line .= $rLL->[$KK]->[_TOKEN_]; - } - $line =~ s/\s+$//; - $line_of_tokens->{_line_text} = $line . "\n"; + # what we want: + # $block_type $name + # 'sub setidentifier($)' => 'setidentifier' + # 'method setidentifier($)' => 'setidentifier' + my @parts = split /\s+/, $block_type; + $name = $parts[1]; + $name =~ s/\(.*$//; + } + elsif ( + $block_type =~ /^(package|class)\b/ + && ( $dump_all_types + || $rdump_block_types->{'package'} + || $rdump_block_types->{'class'} ) + ) + { + $type = 'class'; + my @parts = split /\s+/, $block_type; + $name = $parts[1]; + $name =~ s/\(.*$//; + } + elsif ( + $is_loop_type{$block_type} + && ( $dump_all_types + || $rdump_block_types->{$block_type} + || $rdump_block_types->{ $block_type . $inner_loop_plus } + || $rdump_block_types->{$inner_loop_plus} ) + ) + { + $type = $block_type . $inner_loop_plus; + } + elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) { + if ( $is_loop_type{$block_type} ) { + $name = $self->find_loop_label($seqno); } - - # If we delete a hanging side comment the line becomes blank. - if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' } + $type = $block_type; + } + else { + next; } - } - return; -} ## end sub delete_side_comments - -sub dump_verbatim { - my $self = shift; - my $rlines = $self->[_rlines_]; - foreach my $line ( @{$rlines} ) { - my $input_line = $line->{_line_text}; - $self->write_unindented_line($input_line); - } - return; -} -my %wU; -my %wiq; -my %is_wit; -my %is_sigil; -my %is_nonlist_keyword; -my %is_nonlist_type; -my %is_s_y_m_slash; -my %is_unexpected_equals; + push @selected_blocks, + { + K_opening => $K_opening, + K_closing => $K_closing, + line_start => $lx_open + 1, + name => $name, + type => $type, + level => $level, + max_change => $max_change, + block_count => $block_count, + }; + } ## END loop to get info for selected blocks + return \@selected_blocks; +} ## end sub find_selected_blocks + +sub dump_block_summary { + my ($self) = @_; -BEGIN { + # Dump information about selected code blocks to STDOUT + # This sub is called when + # --dump-block-summary (-dbs) is set. - # added 'U' to fix cases b1125 b1126 b1127 - my @q = qw(w U); - @{wU}{@q} = (1) x scalar(@q); + # The following controls are available: + # --dump-block-types=s (-dbt=s), where s is a list of block types + # (if else elsif for foreach while do ... sub) ; default is 'sub' + # --dump-block-minimum-lines=n (-dbml=n), where n is the minimum + # number of lines for a block to be included; default is 20. - @q = qw(w i q Q G C Z); - @{wiq}{@q} = (1) x scalar(@q); + my $rOpts_dump_block_types = $rOpts->{'dump-block-types'}; + if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' } + $rOpts_dump_block_types =~ s/^\s+//; + $rOpts_dump_block_types =~ s/\s+$//; + my @list = split /\s+/, $rOpts_dump_block_types; + my %dump_block_types; + @{dump_block_types}{@list} = (1) x scalar(@list); - @q = qw(w i t); - @{is_wit}{@q} = (1) x scalar(@q); + # Get block info + my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types ); - @q = qw($ & % * @); - @{is_sigil}{@q} = (1) x scalar(@q); + # Get package info + my $rpackage_list = $self->find_selected_packages( \%dump_block_types ); - # Parens following these keywords will not be marked as lists. Note that - # 'for' is not included and is handled separately, by including 'f' in the - # hash %is_counted_type, since it may or may not be a c-style for loop. - @q = qw( if elsif unless and or ); - @is_nonlist_keyword{@q} = (1) x scalar(@q); + return if ( !@{$rselected_blocks} && !@{$rpackage_list} ); - # Parens following these types will not be marked as lists - @q = qw( && || ); - @is_nonlist_type{@q} = (1) x scalar(@q); + my $input_stream_name = get_input_stream_name(); - @q = qw( s y m / ); - @is_s_y_m_slash{@q} = (1) x scalar(@q); + # Get code line count + my $rcode_line_count = $self->find_code_line_count(); - @q = qw( = == != ); - @is_unexpected_equals{@q} = (1) x scalar(@q); + # Get mccabe count + my $rmccabe_count_sum = $self->find_mccabe_count(); -} + my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'}; + if ( !defined($rOpts_dump_block_minimum_lines) ) { + $rOpts_dump_block_minimum_lines = 20; + } -sub respace_tokens { + my $rLL = $self->[_rLL_]; - my $self = shift; - return if $rOpts->{'indent-only'}; + # merge blocks and packages, add various counts, filter and print to STDOUT + my $routput_lines = []; + foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) { - # This routine is called once per file to do as much formatting as possible - # before new line breaks are set. + my $K_opening = $item->{K_opening}; + my $K_closing = $item->{K_closing}; - # This routine makes all necessary and possible changes to the tokenization - # after the initial tokenization of the file. This is a tedious routine, - # but basically it consists of inserting and deleting whitespace between - # nonblank tokens according to the selected parameters. In a few cases - # non-space characters are added, deleted or modified. + # define total number of lines + my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_]; + my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_]; + my $line_count = $lx_close - $lx_open + 1; - # The goal of this routine is to create a new token array which only needs - # the definition of new line breaks and padding to complete formatting. In - # a few cases we have to cheat a little to achieve this goal. In - # particular, we may not know if a semicolon will be needed, because it - # depends on how the line breaks go. To handle this, we include the - # semicolon as a 'phantom' which can be displayed as normal or as an empty - # string. + # define total number of lines of code excluding blanks, comments, pod + my $code_lines_open = $rcode_line_count->[$lx_open]; + my $code_lines_close = $rcode_line_count->[$lx_close]; + my $code_lines = 0; + if ( defined($code_lines_open) && defined($code_lines_close) ) { + $code_lines = $code_lines_close - $code_lines_open + 1; + } - # Method: The old tokens are copied one-by-one, with changes, from the old - # linear storage array $rLL to a new array $rLL_new. + # filter out blocks below the selected code line limit + if ( $code_lines < $rOpts_dump_block_minimum_lines ) { + next; + } - my $rLL = $self->[_rLL_]; - my $Klimit_old = $self->[_Klimit_]; - my $rlines = $self->[_rlines_]; - my $length_function = $self->[_length_function_]; - my $is_encoded_data = $self->[_is_encoded_data_]; + # add mccabe_count for this block + my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 }; + my $mccabe_opening = $rmccabe_count_sum->{$K_opening}; + my $mccabe_count = 1; # add 1 to match Perl::Critic + if ( defined($mccabe_opening) && defined($mccabe_closing) ) { + $mccabe_count += $mccabe_closing - $mccabe_opening; + } - my $rLL_new = []; # This is the new array - my $rtoken_vars; - my $Ktoken_vars; # the old K value of $rtoken_vars - my ( $Kfirst_old, $Klast_old ); # Range of old line - my $Klast_old_code; # K of last token if side comment - my $Kmax = @{$rLL} - 1; + # Store the final set of print variables + push @{$routput_lines}, [ - my $CODE_type = EMPTY_STRING; - my $line_type = EMPTY_STRING; + $input_stream_name, + $item->{line_start}, + $line_count, + $code_lines, + $item->{type}, + $item->{name}, + $item->{level}, + $item->{max_change}, + $item->{block_count}, + $mccabe_count, - # Set the whitespace flags, which indicate the token spacing preference. - my $rwhitespace_flags = $self->set_whitespace_flags(); + ]; + } - # we will be setting token lengths as we go - my $cumulative_length = 0; + return unless @{$routput_lines}; - my %seqno_stack; - my %K_old_opening_by_seqno = (); # Note: old K index - my $depth_next = 0; - my $depth_next_max = 0; + # Sort blocks and packages on starting line number + my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines}; - # Note that $K_opening_container and $K_closing_container have values - # defined in sub get_line() for the previous K indexes. They were needed - # in case option 'indent-only' was set, and we didn't get here. We no longer - # need those and will eliminate them now to avoid any possible mixing of - # old and new values. - my $K_opening_container = $self->[_K_opening_container_] = {}; - my $K_closing_container = $self->[_K_closing_container_] = {}; - - my $K_closing_ternary = $self->[_K_closing_ternary_]; - my $K_opening_ternary = $self->[_K_opening_ternary_]; - my $rK_phantom_semicolons = $self->[_rK_phantom_semicolons_]; - my $rchildren_of_seqno = $self->[_rchildren_of_seqno_]; - my $rhas_broken_code_block = $self->[_rhas_broken_code_block_]; - my $rhas_broken_list = $self->[_rhas_broken_list_]; - my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; - my $rhas_code_block = $self->[_rhas_code_block_]; - my $rhas_list = $self->[_rhas_list_]; - my $rhas_ternary = $self->[_rhas_ternary_]; - my $ris_assigned_structure = $self->[_ris_assigned_structure_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $ris_permanently_broken = $self->[_ris_permanently_broken_]; - my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; - my $roverride_cab3 = $self->[_roverride_cab3_]; - my $rparent_of_seqno = $self->[_rparent_of_seqno_]; - my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + print STDOUT +"file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n"; - my $last_nonblank_code_type = ';'; - my $last_nonblank_code_token = ';'; - my $last_nonblank_block_type = EMPTY_STRING; - my $last_last_nonblank_code_type = ';'; - my $last_last_nonblank_code_token = ';'; + foreach my $rline_vars (@sorted_lines) { + my $line = join( ",", @{$rline_vars} ) . "\n"; + print STDOUT $line; + } + return; +} ## end sub dump_block_summary - my %K_first_here_doc_by_seqno; +sub set_CODE_type { + my ($self) = @_; - my $set_permanently_broken = sub { - my ($seqno) = @_; - while ( defined($seqno) ) { - $ris_permanently_broken->{$seqno} = 1; - $seqno = $rparent_of_seqno->{$seqno}; - } - return; - }; - my $store_token = sub { - my ($item) = @_; + # Examine each line of code and set a flag '$CODE_type' to describe it. + # Also return a list of lines with side comments. - # This will be the index of this item in the new array - my $KK_new = @{$rLL_new}; + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; - #------------------------------------------------------------------ - # NOTE: called once per token so coding efficiency is critical here - #------------------------------------------------------------------ + my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'}; + my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'}; + my $rOpts_static_block_comment_prefix = + $rOpts->{'static-block-comment-prefix'}; - my $type = $item->[_TYPE_]; - my $is_blank = $type eq 'b'; - my $block_type = EMPTY_STRING; + # Remember indexes of lines with side comments + my @ix_side_comments; - # Do not output consecutive blanks. This situation should have been - # prevented earlier, but it is worth checking because later routines - # make this assumption. - if ( $is_blank && $KK_new && $rLL_new->[-1]->[_TYPE_] eq 'b' ) { - return; - } - - # check for a sequenced item (i.e., container or ?/:) - my $type_sequence = $item->[_TYPE_SEQUENCE_]; - my $token = $item->[_TOKEN_]; - if ($type_sequence) { - - if ( $is_opening_token{$token} ) { - - $K_opening_container->{$type_sequence} = $KK_new; - $block_type = $rblock_type_of_seqno->{$type_sequence}; + my $In_format_skipping_section = 0; + my $Saw_VERSION_in_this_file = 0; + my $has_side_comment = 0; + my ( $Kfirst, $Klast ); + my $CODE_type; - # Fix for case b1100: Count a line ending in ', [' as having - # a line-ending comma. Otherwise, these commas can be hidden - # with something like --opening-square-bracket-right - if ( $last_nonblank_code_type eq ',' - && $Ktoken_vars == $Klast_old_code - && $Ktoken_vars > $Kfirst_old ) - { - $rlec_count_by_seqno->{$type_sequence}++; - } + # Loop to set CODE_type - if ( $last_nonblank_code_type eq '=' - || $last_nonblank_code_type eq '=>' ) - { - $ris_assigned_structure->{$type_sequence} = - $last_nonblank_code_type; - } + # Possible CODE_types + # 'VB' = Verbatim - line goes out verbatim (a quote) + # 'FS' = Format Skipping - line goes out verbatim + # 'BL' = Blank Line + # 'HSC' = Hanging Side Comment - fix this hanging side comment + # 'SBCX'= Static Block Comment Without Leading Space + # 'SBC' = Static Block Comment + # 'BC' = Block Comment - an ordinary full line comment + # 'IO' = Indent Only - line goes out unchanged except for indentation + # 'NIN' = No Internal Newlines - line does not get broken + # 'VER' = VERSION statement + # '' = ordinary line of code with no restrictions - my $seqno_parent = $seqno_stack{ $depth_next - 1 }; - $seqno_parent = SEQ_ROOT unless defined($seqno_parent); - push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence; - $rparent_of_seqno->{$type_sequence} = $seqno_parent; - $seqno_stack{$depth_next} = $type_sequence; - $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars; - $depth_next++; + my $ix_line = -1; + foreach my $line_of_tokens ( @{$rlines} ) { + $ix_line++; + my $line_type = $line_of_tokens->{_line_type}; - if ( $depth_next > $depth_next_max ) { - $depth_next_max = $depth_next; - } - } - elsif ( $is_closing_token{$token} ) { + my $Last_line_had_side_comment = $has_side_comment; + if ($has_side_comment) { + push @ix_side_comments, $ix_line - 1; + $has_side_comment = 0; + } - $K_closing_container->{$type_sequence} = $KK_new; - $block_type = $rblock_type_of_seqno->{$type_sequence}; + my $last_CODE_type = $CODE_type; + $CODE_type = EMPTY_STRING; - # Do not include terminal commas in counts - if ( $last_nonblank_code_type eq ',' - || $last_nonblank_code_type eq '=>' ) - { - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ($seqno) { - $rtype_count_by_seqno->{$seqno} - ->{$last_nonblank_code_type}--; - - if ( $Ktoken_vars == $Kfirst_old - && $last_nonblank_code_type eq ',' - && $rlec_count_by_seqno->{$seqno} ) - { - $rlec_count_by_seqno->{$seqno}--; - } - } - } + if ( $line_type ne 'CODE' ) { + next; + } - # Update the stack... - $depth_next--; - } - else { + my $Klast_prev = $Klast; - # For ternary, note parent but do not include as child - my $seqno_parent = $seqno_stack{ $depth_next - 1 }; - $seqno_parent = SEQ_ROOT unless defined($seqno_parent); - $rparent_of_seqno->{$type_sequence} = $seqno_parent; + my $rK_range = $line_of_tokens->{_rK_range}; + ( $Kfirst, $Klast ) = @{$rK_range}; - # These are not yet used but could be useful - if ( $token eq '?' ) { - $K_opening_ternary->{$type_sequence} = $KK_new; - } - elsif ( $token eq ':' ) { - $K_closing_ternary->{$type_sequence} = $KK_new; - } - else { + my $input_line = $line_of_tokens->{_line_text}; + my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1; - # We really shouldn't arrive here, just being cautious: - # The only sequenced types output by the tokenizer are the - # opening & closing containers and the ternary types. Each - # of those was checked above. So we would only get here - # if the tokenizer has been changed to mark some other - # tokens with sequence numbers. - if (DEVEL_MODE) { - Fault( -"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'" - ); - } - } - } + my $is_block_comment = 0; + if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) { + if ( $jmax == 0 ) { $is_block_comment = 1; } + else { $has_side_comment = 1 } } - # Find the length of this token. Later it may be adjusted if phantom - # or ignoring side comment lengths. - my $token_length = - $is_encoded_data - ? $length_function->($token) - : length($token); - - # handle comments - my $is_comment = $type eq '#'; - if ($is_comment) { + # Write line verbatim if we are in a formatting skip section + if ($In_format_skipping_section) { - # trim comments if necessary - my $ord = ord( substr( $token, -1, 1 ) ); + # Note: extra space appended to comment simplifies pattern matching if ( - $ord > 0 - && ( $ord < ORD_PRINTABLE_MIN - || $ord > ORD_PRINTABLE_MAX ) - && $token =~ s/\s+$// - ) - { - $token_length = $length_function->($token); - $item->[_TOKEN_] = $token; - } + $is_block_comment - # Mark length of side comments as just 1 if sc lengths are ignored - if ( $rOpts_ignore_side_comment_lengths - && ( !$CODE_type || $CODE_type eq 'HSC' ) ) - { - $token_length = 1; - } - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ( defined($seqno) - && !$ris_permanently_broken->{$seqno} ) + # optional fast pre-check + && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>' + || $rOpts_format_skipping_end ) + + && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ + /$format_skipping_pattern_end/ + ) { - $set_permanently_broken->($seqno); + $In_format_skipping_section = 0; + my $input_line_no = $line_of_tokens->{_line_number}; + write_logfile_entry( + "Line $input_line_no: Exiting format-skipping section\n"); } + $CODE_type = 'FS'; + next; } - $item->[_TOKEN_LENGTH_] = $token_length; - - # and update the cumulative length - $cumulative_length += $token_length; - - # Save the length sum to just AFTER this token - $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; - - if ( !$is_blank && !$is_comment ) { - - # Remember the most recent two non-blank, non-comment tokens. - # NOTE: the phantom semicolon code may change the output stack - # without updating these values. Phantom semicolons are considered - # the same as blanks for now, but future needs might change that. - # See the related note in sub '$add_phantom_semicolon'. - $last_last_nonblank_code_type = $last_nonblank_code_type; - $last_last_nonblank_code_token = $last_nonblank_code_token; - - $last_nonblank_code_type = $type; - $last_nonblank_code_token = $token; - $last_nonblank_block_type = $block_type; - - # count selected types - if ( $is_counted_type{$type} ) { - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ( defined($seqno) ) { - $rtype_count_by_seqno->{$seqno}->{$type}++; - - # Count line-ending commas for -bbx - if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) { - $rlec_count_by_seqno->{$seqno}++; - } + # Check for a continued quote.. + if ( $line_of_tokens->{_starting_in_quote} ) { - # Remember index of first here doc target - if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) { - $K_first_here_doc_by_seqno{$seqno} = $KK_new; - } + # A line which is entirely a quote or pattern must go out + # verbatim. Note: the \n is contained in $input_line. + if ( $jmax <= 0 ) { + if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) { + my $input_line_number = $line_of_tokens->{_line_number}; + $self->note_embedded_tab($input_line_number); } + $CODE_type = 'VB'; + next; } } - # For reference, here is how to get the parent sequence number. - # This is not used because it is slower than finding it on the fly - # in sub parent_seqno_by_K: - - # my $seqno_parent = - # $type_sequence && $is_opening_token{$token} - # ? $seqno_stack{ $depth_next - 2 } - # : $seqno_stack{ $depth_next - 1 }; - # my $KK = @{$rLL_new}; - # $rseqno_of_parent_by_K->{$KK} = $seqno_parent; - - # and finally, add this item to the new array - push @{$rLL_new}, $item; - return; - }; - - my $store_token_and_space = sub { - my ( $item, $want_space ) = @_; + # See if we are entering a formatting skip section + if ( + $is_block_comment - # store a token with preceding space if requested and needed + # optional fast pre-check + && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<' + || $rOpts_format_skipping_begin ) - # First store the space - if ( $want_space - && @{$rLL_new} - && $rLL_new->[-1]->[_TYPE_] ne 'b' - && $rOpts_add_whitespace ) + && $rOpts_format_skipping + && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ + /$format_skipping_pattern_begin/ + ) { - my $rcopy = [ @{$item} ]; - $rcopy->[_TYPE_] = 'b'; - $rcopy->[_TOKEN_] = SPACE; - $rcopy->[_TYPE_SEQUENCE_] = EMPTY_STRING; - - $rcopy->[_LINE_INDEX_] = - $rLL_new->[-1]->[_LINE_INDEX_]; - - # Patch 23-Jan-2021 to fix -lp blinkers: - # The level and ci_level of newly created spaces should be the same - # as the previous token. Otherwise the coding for the -lp option - # can create a blinking state in some rare cases. - $rcopy->[_LEVEL_] = - $rLL_new->[-1]->[_LEVEL_]; - $rcopy->[_CI_LEVEL_] = - $rLL_new->[-1]->[_CI_LEVEL_]; + $In_format_skipping_section = 1; + my $input_line_no = $line_of_tokens->{_line_number}; + write_logfile_entry( + "Line $input_line_no: Entering format-skipping section\n"); + $CODE_type = 'FS'; + next; + } - $store_token->($rcopy); + # ignore trailing blank tokens (they will get deleted later) + if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) { + $jmax--; } - # then the token - $store_token->($item); - return; - }; + # blank line.. + if ( $jmax < 0 ) { + $CODE_type = 'BL'; + next; + } - my $add_phantom_semicolon = sub { + # Handle comments + if ($is_block_comment) { - my ($KK) = @_; + # see if this is a static block comment (starts with ## by default) + my $is_static_block_comment = 0; + my $no_leading_space = substr( $input_line, 0, 1 ) eq '#'; + if ( - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - return unless ( defined($Kp) ); + # optional fast pre-check + ( + substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##' + || $rOpts_static_block_comment_prefix + ) - # we are only adding semicolons for certain block types - my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - return unless ($type_sequence); - my $block_type = $rblock_type_of_seqno->{$type_sequence}; - return unless ($block_type); - return - unless ( $ok_to_add_semicolon_for_block_type{$block_type} - || $block_type =~ /^(sub|package)/ - || $block_type =~ /^\w+\:$/ ); + && $rOpts_static_block_comments + && $input_line =~ /$static_block_comment_pattern/ + ) + { + $is_static_block_comment = 1; + } - my $type_p = $rLL_new->[$Kp]->[_TYPE_]; - my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; - my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; + # Check for comments which are line directives + # Treat exactly as static block comments without leading space + # reference: perlsyn, near end, section Plain Old Comments (Not!) + # example: '# line 42 "new_filename.plx"' + if ( + $no_leading_space + && $input_line =~ /^\# \s* + line \s+ (\d+) \s* + (?:\s("?)([^"]+)\2)? \s* + $/x + ) + { + $is_static_block_comment = 1; + } - # Do not add a semicolon if... - return - if ( + # look for hanging side comment ... + if ( + $Last_line_had_side_comment # last line had side comment + && !$no_leading_space # there is some leading space + && ! + $is_static_block_comment # do not make static comment hanging + ) + { - # it would follow a comment (and be isolated) - $type_p eq '#' + # continuing an existing HSC chain? + if ( $last_CODE_type eq 'HSC' ) { + $has_side_comment = 1; + $CODE_type = 'HSC'; + next; + } - # it follows a code block ( because they are not always wanted - # there and may add clutter) - || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p} + # starting a new HSC chain? + elsif ( - # it would follow a label - || $type_p eq 'J' + $rOpts->{'hanging-side-comments'} # user is allowing + # hanging side comments + # like this - # it would be inside a 'format' statement (and cause syntax error) - || ( $type_p eq 'k' - && $token_p =~ /format/ ) + && ( defined($Klast_prev) && $Klast_prev > 1 ) - ); + # and the previous side comment was not static (issue c070) + && !( + $rOpts->{'static-side-comments'} + && $rLL->[$Klast_prev]->[_TOKEN_] =~ + /$static_side_comment_pattern/ + ) - # Do not add a semicolon if it would impede a weld with an immediately - # following closing token...like this - # { ( some code ) } - # ^--No semicolon can go here - - # look at the previous token... note use of the _NEW rLL array here, - # but sequence numbers are invariant. - my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; - - # If it is also a CLOSING token we have to look closer... - if ( - $seqno_inner - && $is_closing_token{$token_p} - - # we only need to look if there is just one inner container.. - && defined( $rchildren_of_seqno->{$type_sequence} ) - && @{ $rchildren_of_seqno->{$type_sequence} } == 1 - ) - { + ) + { - # Go back and see if the corresponding two OPENING tokens are also - # together. Note that we are using the OLD K indexing here: - my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence}; - if ( defined($K_outer_opening) ) { - my $K_nxt = $self->K_next_nonblank($K_outer_opening); - if ( defined($K_nxt) ) { - my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_]; + # and it is not a closing side comment (issue c070). + my $K_penult = $Klast_prev - 1; + $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' ); + my $follows_csc = + ( $rLL->[$K_penult]->[_TOKEN_] eq '}' + && $rLL->[$K_penult]->[_TYPE_] eq '}' + && $rLL->[$Klast_prev]->[_TOKEN_] =~ + /$closing_side_comment_prefix_pattern/ ); - # Is the next token after the outer opening the same as - # our inner closing (i.e. same sequence number)? - # If so, do not insert a semicolon here. - return if ( $seqno_nxt && $seqno_nxt == $seqno_inner ); + if ( !$follows_csc ) { + $has_side_comment = 1; + $CODE_type = 'HSC'; + next; + } } } + + if ($is_static_block_comment) { + $CODE_type = $no_leading_space ? 'SBCX' : 'SBC'; + next; + } + elsif ($Last_line_had_side_comment + && !$rOpts_maximum_consecutive_blank_lines + && $rLL->[$Kfirst]->[_LEVEL_] > 0 ) + { + # Emergency fix to keep a block comment from becoming a hanging + # side comment. This fix is for the case that blank lines + # cannot be inserted. There is related code in sub + # 'process_line_of_CODE' + $CODE_type = 'SBCX'; + next; + } + else { + $CODE_type = 'BC'; + next; + } } - # We will insert an empty semicolon here as a placeholder. Later, if - # it becomes the last token on a line, we will bring it to life. The - # advantage of doing this is that (1) we just have to check line - # endings, and (2) the phantom semicolon has zero width and therefore - # won't cause needless breaks of one-line blocks. - my $Ktop = -1; - if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' - && $want_left_space{';'} == WS_NO ) - { + # End of comments. Handle a line of normal code: - # convert the blank into a semicolon.. - # be careful: we are working on the new stack top - # on a token which has been stored. - my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); + if ($rOpts_indent_only) { + $CODE_type = 'IO'; + next; + } - # Convert the existing blank to: - # a phantom semicolon for one_line_block option = 0 or 1 - # a real semicolon for one_line_block option = 2 - my $tok = EMPTY_STRING; - my $len_tok = 0; - if ( $rOpts_one_line_block_semicolons == 2 ) { - $tok = ';'; - $len_tok = 1; - } + if ( !$rOpts_add_newlines ) { + $CODE_type = 'NIN'; + next; + } - $rLL_new->[$Ktop]->[_TOKEN_] = $tok; - $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok; - $rLL_new->[$Ktop]->[_TYPE_] = ';'; + # Patch needed for MakeMaker. Do not break a statement + # in which $VERSION may be calculated. See MakeMaker.pm; + # this is based on the coding in it. + # The first line of a file that matches this will be eval'd: + # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ + # Examples: + # *VERSION = \'1.01'; + # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; + # We will pass such a line straight through without breaking + # it unless -npvl is used. - # NOTE: we are changing the output stack without updating variables - # $last_nonblank_code_type, etc. Future needs might require that - # those variables be updated here. For now, it seems ok to skip - # this. + # Patch for problem reported in RT #81866, where files + # had been flattened into a single line and couldn't be + # tidied without -npvl. There are two parts to this patch: + # First, it is not done for a really long line (80 tokens for now). + # Second, we will only allow up to one semicolon + # before the VERSION. We need to allow at least one semicolon + # for statements like this: + # require Exporter; our $VERSION = $Exporter::VERSION; + # where both statements must be on a single line for MakeMaker - # Save list of new K indexes of phantom semicolons. - # This will be needed if we want to undo them for iterations in - # future coding. - push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; + if ( !$Saw_VERSION_in_this_file + && $jmax < 80 + && $input_line =~ + /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) + { + $Saw_VERSION_in_this_file = 1; + write_logfile_entry("passing VERSION line; -npvl deactivates\n"); - # Then store a new blank - $store_token->($rcopy); + # This code type has lower priority than others + $CODE_type = 'VER'; + next; } - else { + } + continue { + $line_of_tokens->{_code_type} = $CODE_type; + } - # Patch for issue c078: keep line indexes in order. If the top - # token is a space that we are keeping (due to '-wls=';') then - # we have to check that old line indexes stay in order. - # In very rare - # instances in which side comments have been deleted and converted - # into blanks, we may have filtered down multiple blanks into just - # one. In that case the top blank may have a higher line number - # than the previous nonblank token. Although the line indexes of - # blanks are not really significant, we need to keep them in order - # in order to pass error checks. - if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) { - my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; - my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; - if ( $new_top_ix < $old_top_ix ) { - $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; - } - } - - my $rcopy = - copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING ); - $store_token->($rcopy); - push @{$rK_phantom_semicolons}, @{$rLL_new} - 1; - } - return; - }; + if ($has_side_comment) { + push @ix_side_comments, $ix_line; + } - my $check_Q = sub { + return \@ix_side_comments; +} ## end sub set_CODE_type - # Check that a quote looks okay - # This sub works but needs to by sync'd with the log file output - # before it can be used. - my ( $KK, $Kfirst, $line_number ) = @_; - my $token = $rLL->[$KK]->[_TOKEN_]; - $self->note_embedded_tab($line_number) if ( $token =~ "\t" ); +sub find_non_indenting_braces { - # The remainder of this routine looks for something like - # '$var = s/xxx/yyy/;' - # in case it should have been '$var =~ s/xxx/yyy/;' + my ( $self, $rix_side_comments ) = @_; + return unless ( $rOpts->{'non-indenting-braces'} ); + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + my $rlines = $self->[_rlines_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $rseqno_non_indenting_brace_by_ix = + $self->[_rseqno_non_indenting_brace_by_ix_]; - # Start by looking for a token beginning with one of: s y m / tr - return - unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) } - || substr( $token, 0, 2 ) eq 'tr' ); + foreach my $ix ( @{$rix_side_comments} ) { + my $line_of_tokens = $rlines->[$ix]; + my $line_type = $line_of_tokens->{_line_type}; + if ( $line_type ne 'CODE' ) { - # ... and preceded by one of: = == != - my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); - return unless ( defined($Kp) ); - my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; - return unless ( $is_unexpected_equals{$previous_nonblank_type} ); - my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; + # shouldn't happen + DEVEL_MODE && Fault("unexpected line_type=$line_type\n"); + next; + } + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) { - my $previous_nonblank_type_2 = 'b'; - my $previous_nonblank_token_2 = EMPTY_STRING; - my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); - if ( defined($Kpp) ) { - $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; - $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; + # shouldn't happen + DEVEL_MODE && Fault("did not get a comment\n"); + next; + } + next unless ( $Klast > $Kfirst ); # maybe HSC + my $token_sc = $rLL->[$Klast]->[_TOKEN_]; + my $K_m = $Klast - 1; + my $type_m = $rLL->[$K_m]->[_TYPE_]; + if ( $type_m eq 'b' && $K_m > $Kfirst ) { + $K_m--; + $type_m = $rLL->[$K_m]->[_TYPE_]; } + my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_]; + if ($seqno_m) { + my $block_type_m = $rblock_type_of_seqno->{$seqno_m}; - my $next_nonblank_token = EMPTY_STRING; - my $Kn = $KK + 1; - if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 } - if ( $Kn <= $Kmax ) { - $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; + # The pattern ends in \s but we have removed the newline, so + # we added it back for the match. That way we require an exact + # match to the special string and also allow additional text. + $token_sc .= "\n"; + if ( $block_type_m + && $is_opening_type{$type_m} + && $token_sc =~ /$non_indenting_brace_pattern/ ) + { + $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m; + } } + } + return; +} ## end sub find_non_indenting_braces - my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; - my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; +sub delete_side_comments { + my ( $self, $rix_side_comments ) = @_; - if ( - ##$token =~ /^(s|tr|y|m|\/)/ - ##&& $previous_nonblank_token =~ /^(=|==|!=)$/ - 1 + # Given a list of indexes of lines with side comments, handle any + # requested side comment deletions. - # preceded by simple scalar - && $previous_nonblank_type_2 eq 'i' - && $previous_nonblank_token_2 =~ /^\$/ + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $rseqno_non_indenting_brace_by_ix = + $self->[_rseqno_non_indenting_brace_by_ix_]; - # followed by some kind of termination - # (but give complaint if we can not see far enough ahead) - && $next_nonblank_token =~ /^[; \)\}]$/ + foreach my $ix ( @{$rix_side_comments} ) { + my $line_of_tokens = $rlines->[$ix]; + my $line_type = $line_of_tokens->{_line_type}; - # scalar is not declared - ## =~ /^(my|our|local)$/ - && !( $type_0 eq 'k' && $is_my_our_local{$token_0} ) - ) - { - my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_]; - my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~'; - complain( -"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" - ); + # This fault shouldn't happen because we only saved CODE lines with + # side comments in the TASK 1 loop above. + if ( $line_type ne 'CODE' ) { + if (DEVEL_MODE) { + my $lno = $ix + 1; + Fault(<{_line_number}; - my $last_line_type = $line_type; - $line_type = $line_of_tokens->{_line_type}; - next unless ( $line_type eq 'CODE' ); - my $last_CODE_type = $CODE_type; - $CODE_type = $line_of_tokens->{_code_type}; - my $rK_range = $line_of_tokens->{_rK_range}; + my $CODE_type = $line_of_tokens->{_code_type}; + my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless defined($Kfirst); - ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast ); - $Klast_old_code = $Klast_old; - - # Be sure an old K value is defined for sub $store_token - $Ktoken_vars = $Kfirst; - # Check for correct sequence of token indexes... - # An error here means that sub write_line() did not correctly - # package the tokenized lines as it received them. If we - # get a fault here it has not output a continuous sequence - # of K values. Or a line of CODE may have been mis-marked as - # something else. There is no good way to continue after such an - # error. - # FIXME: Calling Fault will produce zero output; it would be best to - # find a way to dump the input file. - if ( defined($last_K_out) ) { - if ( $Kfirst != $last_K_out + 1 ) { - Fault( - "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" - ); + unless ( defined($Kfirst) && $rLL->[$Klast]->[_TYPE_] eq '#' ) { + if (DEVEL_MODE) { + my $lno = $ix + 1; + Fault(< $Kfirst || $CODE_type eq 'HSC' ) + && (!$CODE_type + || $CODE_type eq 'HSC' + || $CODE_type eq 'IO' + || $CODE_type eq 'NIN' ); + + # Do not delete special control side comments + if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) { + $delete_side_comment = 0; } - $last_K_out = $Klast; - # Handle special lines of code - if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { + if ( + $rOpts_delete_closing_side_comments + && !$delete_side_comment + && $Klast > $Kfirst + && ( !$CODE_type + || $CODE_type eq 'HSC' + || $CODE_type eq 'IO' + || $CODE_type eq 'NIN' ) + ) + { + my $token = $rLL->[$Klast]->[_TOKEN_]; + my $K_m = $Klast - 1; + my $type_m = $rLL->[$K_m]->[_TYPE_]; + if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- } + my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_]; + if ($seqno_m) { + my $block_type_m = $rblock_type_of_seqno->{$seqno_m}; + if ( $block_type_m + && $token =~ /$closing_side_comment_prefix_pattern/ + && $block_type_m =~ /$closing_side_comment_list_pattern/ ) + { + $delete_side_comment = 1; + } + } + } ## end if ( $rOpts_delete_closing_side_comments...) - # CODE_types are as follows. - # 'BL' = Blank Line - # 'VB' = Verbatim - line goes out verbatim - # 'FS' = Format Skipping - line goes out verbatim, no blanks - # 'IO' = Indent Only - only indentation may be changed + if ($delete_side_comment) { + + # We are actually just changing the side comment to a blank. + # This may produce multiple blanks in a row, but sub respace_tokens + # will check for this and fix it. + $rLL->[$Klast]->[_TYPE_] = 'b'; + $rLL->[$Klast]->[_TOKEN_] = SPACE; + + # The -io option outputs the line text, so we have to update + # the line text so that the comment does not reappear. + if ( $CODE_type eq 'IO' ) { + my $line = EMPTY_STRING; + foreach my $KK ( $Kfirst .. $Klast - 1 ) { + $line .= $rLL->[$KK]->[_TOKEN_]; + } + $line =~ s/\s+$//; + $line_of_tokens->{_line_text} = $line . "\n"; + } + + # If we delete a hanging side comment the line becomes blank. + if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' } + } + } + return; +} ## end sub delete_side_comments + +sub dump_verbatim { + my $self = shift; + my $rlines = $self->[_rlines_]; + foreach my $line ( @{$rlines} ) { + my $input_line = $line->{_line_text}; + $self->write_unindented_line($input_line); + } + return; +} ## end sub dump_verbatim + +my %wU; +my %wiq; +my %is_wit; +my %is_sigil; +my %is_nonlist_keyword; +my %is_nonlist_type; +my %is_s_y_m_slash; +my %is_unexpected_equals; + +BEGIN { + + # added 'U' to fix cases b1125 b1126 b1127 + my @q = qw(w U); + @{wU}{@q} = (1) x scalar(@q); + + @q = qw(w i q Q G C Z); + @{wiq}{@q} = (1) x scalar(@q); + + @q = qw(w i t); + @{is_wit}{@q} = (1) x scalar(@q); + + @q = qw($ & % * @); + @{is_sigil}{@q} = (1) x scalar(@q); + + # Parens following these keywords will not be marked as lists. Note that + # 'for' is not included and is handled separately, by including 'f' in the + # hash %is_counted_type, since it may or may not be a c-style for loop. + @q = qw( if elsif unless and or ); + @is_nonlist_keyword{@q} = (1) x scalar(@q); + + # Parens following these types will not be marked as lists + @q = qw( && || ); + @is_nonlist_type{@q} = (1) x scalar(@q); + + @q = qw( s y m / ); + @is_s_y_m_slash{@q} = (1) x scalar(@q); + + @q = qw( = == != ); + @is_unexpected_equals{@q} = (1) x scalar(@q); + +} ## end BEGIN + +{ #<<< begin clousure respace_tokens + +my $rLL_new; # This will be the new array of tokens + +# These are variables in $self +my $rLL; +my $length_function; +my $is_encoded_data; + +my $K_closing_ternary; +my $K_opening_ternary; +my $rchildren_of_seqno; +my $rhas_broken_code_block; +my $rhas_broken_list; +my $rhas_broken_list_with_lec; +my $rhas_code_block; +my $rhas_list; +my $rhas_ternary; +my $ris_assigned_structure; +my $ris_broken_container; +my $ris_excluded_lp_container; +my $ris_list_by_seqno; +my $ris_permanently_broken; +my $rlec_count_by_seqno; +my $roverride_cab3; +my $rparent_of_seqno; +my $rtype_count_by_seqno; +my $rblock_type_of_seqno; + +my $K_opening_container; +my $K_closing_container; + +my %K_first_here_doc_by_seqno; + +my $last_nonblank_code_type; +my $last_nonblank_code_token; +my $last_nonblank_block_type; +my $last_last_nonblank_code_type; +my $last_last_nonblank_code_token; + +my %seqno_stack; +my %K_old_opening_by_seqno; +my $depth_next; +my $depth_next_max; + +my $cumulative_length; + +# Variables holding the current line info +my $Ktoken_vars; +my $Kfirst_old; +my $Klast_old; +my $Klast_old_code; +my $CODE_type; + +my $rwhitespace_flags; + +sub initialize_respace_tokens_closure { + + my ($self) = @_; + + $rLL_new = []; # This is the new array + + $rLL = $self->[_rLL_]; + $length_function = $self->[_length_function_]; + $is_encoded_data = $self->[_is_encoded_data_]; + + $K_closing_ternary = $self->[_K_closing_ternary_]; + $K_opening_ternary = $self->[_K_opening_ternary_]; + $rchildren_of_seqno = $self->[_rchildren_of_seqno_]; + $rhas_broken_code_block = $self->[_rhas_broken_code_block_]; + $rhas_broken_list = $self->[_rhas_broken_list_]; + $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; + $rhas_code_block = $self->[_rhas_code_block_]; + $rhas_list = $self->[_rhas_list_]; + $rhas_ternary = $self->[_rhas_ternary_]; + $ris_assigned_structure = $self->[_ris_assigned_structure_]; + $ris_broken_container = $self->[_ris_broken_container_]; + $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; + $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + $ris_permanently_broken = $self->[_ris_permanently_broken_]; + $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; + $roverride_cab3 = $self->[_roverride_cab3_]; + $rparent_of_seqno = $self->[_rparent_of_seqno_]; + $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; + $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + %K_first_here_doc_by_seqno = (); + + $last_nonblank_code_type = ';'; + $last_nonblank_code_token = ';'; + $last_nonblank_block_type = EMPTY_STRING; + $last_last_nonblank_code_type = ';'; + $last_last_nonblank_code_token = ';'; + + %seqno_stack = (); + %K_old_opening_by_seqno = (); # Note: old K index + $depth_next = 0; + $depth_next_max = 0; + + # we will be setting token lengths as we go + $cumulative_length = 0; + + $Ktoken_vars = undef; # the old K value of $rtoken_vars + $Kfirst_old = undef; # min K of old line + $Klast_old = undef; # max K of old line + $Klast_old_code = undef; # K of last token if side comment + $CODE_type = EMPTY_STRING; + + # Set the whitespace flags, which indicate the token spacing preference. + $rwhitespace_flags = $self->set_whitespace_flags(); + + # Note that $K_opening_container and $K_closing_container have values + # defined in sub get_line() for the previous K indexes. They were needed + # in case option 'indent-only' was set, and we didn't get here. We no + # longer need those and will eliminate them now to avoid any possible + # mixing of old and new values. This must be done AFTER the call to + # set_whitespace_flags, which needs these. + $K_opening_container = $self->[_K_opening_container_] = {}; + $K_closing_container = $self->[_K_closing_container_] = {}; + + return; + +} ## end sub initialize_respace_tokens_closure + +sub respace_tokens { + + my $self = shift; + + #-------------------------------------------------------------------------- + # This routine is called once per file to do as much formatting as possible + # before new line breaks are set. + #-------------------------------------------------------------------------- + + # Return parameters: + # Set $severe_error=true if processing must terminate immediately + my ( $severe_error, $rqw_lines ); + + # We change any spaces in --indent-only mode + if ( $rOpts->{'indent-only'} ) { + + # We need to define lengths for -indent-only to avoid undefs, even + # though these values are not actually needed for option --indent-only. + + $rLL = $self->[_rLL_]; + $length_function = $self->[_length_function_]; + $cumulative_length = 0; + + foreach my $item ( @{$rLL} ) { + my $token = $item->[_TOKEN_]; + my $token_length = $length_function->($token); + $cumulative_length += $token_length; + $item->[_TOKEN_LENGTH_] = $token_length; + $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; + } + + return ( $severe_error, $rqw_lines ); + } + + # This routine makes all necessary and possible changes to the tokenization + # after the initial tokenization of the file. This is a tedious routine, + # but basically it consists of inserting and deleting whitespace between + # nonblank tokens according to the selected parameters. In a few cases + # non-space characters are added, deleted or modified. + + # The goal of this routine is to create a new token array which only needs + # the definition of new line breaks and padding to complete formatting. In + # a few cases we have to cheat a little to achieve this goal. In + # particular, we may not know if a semicolon will be needed, because it + # depends on how the line breaks go. To handle this, we include the + # semicolon as a 'phantom' which can be displayed as normal or as an empty + # string. + + # Method: The old tokens are copied one-by-one, with changes, from the old + # linear storage array $rLL to a new array $rLL_new. + + # (re-)initialize closure variables for this problem + $self->initialize_respace_tokens_closure(); + + #-------------------------------- + # Main over all lines of the file + #-------------------------------- + my $rlines = $self->[_rlines_]; + my $line_type = EMPTY_STRING; + my $last_K_out; + + foreach my $line_of_tokens ( @{$rlines} ) { + + my $input_line_number = $line_of_tokens->{_line_number}; + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + $CODE_type = $line_of_tokens->{_code_type}; + + if ( $CODE_type eq 'BL' ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $self->[_rblank_and_comment_count_]->{$seqno} += 1; + $self->set_permanently_broken($seqno) + if (!$ris_permanently_broken->{$seqno} + && $rOpts_maximum_consecutive_blank_lines ); + } + } + + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless defined($Kfirst); + ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast ); + $Klast_old_code = $Klast_old; + + # Be sure an old K value is defined for sub store_token + $Ktoken_vars = $Kfirst; + + # Check for correct sequence of token indexes... + # An error here means that sub write_line() did not correctly + # package the tokenized lines as it received them. If we + # get a fault here it has not output a continuous sequence + # of K values. Or a line of CODE may have been mis-marked as + # something else. There is no good way to continue after such an + # error. + if ( defined($last_K_out) ) { + if ( $Kfirst != $last_K_out + 1 ) { + Fault_Warn( + "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" + ); + $severe_error = 1; + return ( $severe_error, $rqw_lines ); + } + } + else { + + # The first token should always have been given index 0 by sub + # write_line() + if ( $Kfirst != 0 ) { + Fault("Program Bug: first K is $Kfirst but should be 0"); + } + } + $last_K_out = $Klast; + + # Handle special lines of code + if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { + + # CODE_types are as follows. + # 'BL' = Blank Line + # 'VB' = Verbatim - line goes out verbatim + # 'FS' = Format Skipping - line goes out verbatim, no blanks + # 'IO' = Indent Only - only indentation may be changed # 'NIN' = No Internal Newlines - line does not get broken # 'HSC'=Hanging Side Comment - fix this hanging side comment # 'BC'=Block Comment - an ordinary full line comment @@ -6742,10 +7446,10 @@ sub respace_tokens { # the -extrude and -mangle options. my $rcopy = copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING ); - $store_token->($rcopy); + $self->store_token($rcopy); $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE ); - $store_token->($rcopy); - $store_token->($rvars_Kfirst); + $self->store_token($rcopy); + $self->store_token($rvars_Kfirst); next; } else { @@ -6761,20 +7465,10 @@ sub respace_tokens { } } - if ( $CODE_type eq 'BL' ) { - my $seqno = $seqno_stack{ $depth_next - 1 }; - if ( defined($seqno) - && !$ris_permanently_broken->{$seqno} - && $rOpts_maximum_consecutive_blank_lines ) - { - $set_permanently_broken->($seqno); - } - } - # Copy tokens unchanged foreach my $KK ( $Kfirst .. $Klast ) { $Ktoken_vars = $KK; - $store_token->( $rLL->[$KK] ); + $self->store_token( $rLL->[$KK] ); } next; } @@ -6798,6 +7492,7 @@ sub respace_tokens { # if last line was normal CODE. # Patch for rt #125012: use K_previous_code rather than '_nonblank' # because comments may disappear. + # Note that we must do this even if --noadd-whitespace is set if ( $last_line_type eq 'CODE' ) { my $type_next = $rLL->[$Kfirst]->[_TYPE_]; my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; @@ -6812,355 +7507,417 @@ sub respace_tokens { ) ) { - - # Copy this first token as blank, but use previous line number - my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', SPACE ); - $rcopy->[_LINE_INDEX_] = - $rLL_new->[-1]->[_LINE_INDEX_]; - - # The level and ci_level of newly created spaces should be the - # same as the previous token. Otherwise blinking states can - # be created if the -lp mode is used. See similar coding in - # sub 'store_token_and_space'. Fixes cases b1109 b1110. - $rcopy->[_LEVEL_] = - $rLL_new->[-1]->[_LEVEL_]; - $rcopy->[_CI_LEVEL_] = - $rLL_new->[-1]->[_CI_LEVEL_]; - - $store_token->($rcopy); + $self->store_space(); } } - #------------------------------------------------------- - # Loop to copy all tokens on this line, with any changes - #------------------------------------------------------- - my $type_sequence; - foreach my $KK ( $Kfirst .. $Klast ) { - $Ktoken_vars = $KK; - $rtoken_vars = $rLL->[$KK]; - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - my $last_type_sequence = $type_sequence; - $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - - # Handle a blank space ... - if ( $type eq 'b' ) { + #----------------------------------------------- + # Inner loop to respace tokens on a line of code + #----------------------------------------------- - # Delete it if not wanted by whitespace rules - # or we are deleting all whitespace - # Note that whitespace flag is a flag indicating whether a - # white space BEFORE the token is needed - next if ( $KK >= $Klast ); # skip terminal blank - my $Knext = $KK + 1; + # The inner loop is in a separate sub for clarity + $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number ); - if ($rOpts_freeze_whitespace) { - $store_token->($rtoken_vars); - next; - } + } # End line loop - my $ws = $rwhitespace_flags->[$Knext]; - if ( $ws == -1 - || $rOpts_delete_old_whitespace ) - { + # finalize data structures + $self->respace_post_loop_ops(); - my $token_next = $rLL->[$Knext]->[_TOKEN_]; - my $type_next = $rLL->[$Knext]->[_TYPE_]; + # Reset memory to be the new array + $self->[_rLL_] = $rLL_new; + my $Klimit; + if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 } + $self->[_Klimit_] = $Klimit; - my $do_not_delete = is_essential_whitespace( - $last_last_nonblank_code_token, - $last_last_nonblank_code_type, - $last_nonblank_code_token, - $last_nonblank_code_type, - $token_next, - $type_next, - ); + # During development, verify that the new array still looks okay. + DEVEL_MODE && $self->check_token_array(); - # Note that repeated blanks will get filtered out here - next unless ($do_not_delete); - } + # update the token limits of each line + ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens(); - # make it just one character - $rtoken_vars->[_TOKEN_] = SPACE; - $store_token->($rtoken_vars); - next; - } + return ( $severe_error, $rqw_lines ); +} ## end sub respace_tokens - # Handle a nonblank token... +sub respace_tokens_inner_loop { - if ($type_sequence) { + my ( $self, $Kfirst, $Klast, $input_line_number ) = @_; - # Insert a tentative missing semicolon if the next token is - # a closing block brace - if ( - $type eq '}' - && $token eq '}' + #----------------------------------------------------------------- + # Loop to copy all tokens on one line, making any spacing changes, + # while also collecting information needed by later subs. + #----------------------------------------------------------------- + foreach my $KK ( $Kfirst .. $Klast ) { - # not preceded by a ';' - && $last_nonblank_code_type ne ';' + # TODO: consider eliminating this closure var by passing directly to + # store_token following pattern of store_tokens_to_go. + $Ktoken_vars = $KK; - # and this is not a VERSION stmt (is all one line, we - # are not inserting semicolons on one-line blocks) - && $CODE_type ne 'VER' + my $rtoken_vars = $rLL->[$KK]; + my $type = $rtoken_vars->[_TYPE_]; - # and we are allowed to add semicolons - && $rOpts->{'add-semicolons'} - ) - { - $add_phantom_semicolon->($KK); - } + # Handle a blank space ... + if ( $type eq 'b' ) { + + # Delete it if not wanted by whitespace rules + # or we are deleting all whitespace + # Note that whitespace flag is a flag indicating whether a + # white space BEFORE the token is needed + next if ( $KK >= $Klast ); # skip terminal blank + my $Knext = $KK + 1; + + if ($rOpts_freeze_whitespace) { + $self->store_token($rtoken_vars); + next; } - # Modify certain tokens here for whitespace - # The following is not yet done, but could be: - # sub (x x x) - # ( $type =~ /^[wit]$/ ) - elsif ( $is_wit{$type} ) { + my $ws = $rwhitespace_flags->[$Knext]; + if ( $ws == -1 + || $rOpts_delete_old_whitespace ) + { - # change '$ var' to '$var' etc - # change '@ ' to '@' - # Examples: <> - my $ord = ord( substr( $token, 1, 1 ) ); - if ( + my $token_next = $rLL->[$Knext]->[_TOKEN_]; + my $type_next = $rLL->[$Knext]->[_TYPE_]; - # quick test for possible blank at second char - $ord > 0 && ( $ord < ORD_PRINTABLE_MIN - || $ord > ORD_PRINTABLE_MAX ) - ) - { - my ( $sigil, $word ) = split /\s+/, $token, 2; + my $do_not_delete = is_essential_whitespace( + $last_last_nonblank_code_token, + $last_last_nonblank_code_type, + $last_nonblank_code_token, + $last_nonblank_code_type, + $token_next, + $type_next, + ); - # $sigil =~ /^[\$\&\%\*\@]$/ ) - if ( $is_sigil{$sigil} ) { - $token = $sigil; - $token .= $word if ( defined($word) ); # fix c104 - $rtoken_vars->[_TOKEN_] = $token; - } - } + # Note that repeated blanks will get filtered out here + next unless ($do_not_delete); + } - # Split identifiers with leading arrows, inserting blanks - # if necessary. It is easier and safer here than in the - # tokenizer. For example '->new' becomes two tokens, '->' - # and 'new' with a possible blank between. - # - # Note: there is a related patch in sub set_whitespace_flags - elsif (length($token) > 2 - && substr( $token, 0, 2 ) eq '->' - && $token =~ /^\-\>(.*)$/ - && $1 ) - { + # make it just one character + $rtoken_vars->[_TOKEN_] = SPACE; + $self->store_token($rtoken_vars); + next; + } - my $token_save = $1; - my $type_save = $type; + my $token = $rtoken_vars->[_TOKEN_]; - # Change '-> new' to '->new' - $token_save =~ s/^\s+//g; + # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? : + if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) { - # store a blank to left of arrow if necessary - my $Kprev = $self->K_previous_nonblank($KK); - if ( defined($Kprev) - && $rLL->[$Kprev]->[_TYPE_] ne 'b' - && $rOpts_add_whitespace - && $want_left_space{'->'} == WS_YES ) - { - my $rcopy = - copy_token_as_type( $rtoken_vars, 'b', SPACE ); - $store_token->($rcopy); - } + # One of ) ] } ... + if ( $is_closing_token{$token} ) { - # then store the arrow - my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' ); - $store_token->($rcopy); + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + my $block_type = $rblock_type_of_seqno->{$type_sequence}; - # store a blank after the arrow if requested - # added for issue git #33 - if ( $want_right_space{'->'} == WS_YES ) { - my $rcopy_b = - copy_token_as_type( $rtoken_vars, 'b', SPACE ); - $store_token->($rcopy_b); - } + #--------------------------------------------- + # check for semicolon addition in a code block + #--------------------------------------------- + if ($block_type) { - # then reset the current token to be the remainder, - # and reset the whitespace flag according to the arrow - $token = $rtoken_vars->[_TOKEN_] = $token_save; - $type = $rtoken_vars->[_TYPE_] = $type_save; - $store_token->($rtoken_vars); - next; + # if not preceded by a ';' .. + if ( $last_nonblank_code_type ne ';' ) { + + # tentatively insert a semicolon if appropriate + $self->add_phantom_semicolon($KK) + if $rOpts->{'add-semicolons'}; + } } - # Trim certain spaces in identifiers - if ( $type eq 'i' ) { + #---------------------------------------------------------- + # check for addition/deletion of a trailing comma in a list + #---------------------------------------------------------- + else { - if ( - ( - substr( $token, 0, 3 ) eq 'sub' - || $rOpts_sub_alias_list - ) - && $token =~ /$SUB_PATTERN/ - ) + # if this is a list .. + my $rtype_count = $rtype_count_by_seqno->{$type_sequence}; + if ( $rtype_count + && $rtype_count->{','} + && !$rtype_count->{';'} + && !$rtype_count->{'f'} ) { - # -spp = 0 : no space before opening prototype paren - # -spp = 1 : stable (follow input spacing) - # -spp = 2 : always space before opening prototype paren - my $spp = $rOpts->{'space-prototype-paren'}; - if ( defined($spp) ) { - if ( $spp == 0 ) { $token =~ s/\s+\(/\(/; } - elsif ( $spp == 2 ) { $token =~ s/\(/ (/; } + # if NOT preceded by a comma.. + if ( $last_nonblank_code_type ne ',' ) { + + # insert a comma if requested + if ( $rOpts_add_trailing_commas + && %trailing_comma_rules ) + { + $self->add_trailing_comma( $KK, $Kfirst, + $trailing_comma_rules{$token} ); + } } - # one space max, and no tabs - $token =~ s/\s+/ /g; - $rtoken_vars->[_TOKEN_] = $token; - } + # if preceded by a comma .. + else { - # clean up spaces in package identifiers, like - # "package Bob::Dog;" - elsif ( substr( $token, 0, 7 ) eq 'package' - && $token =~ /^package\s/ ) - { - $token =~ s/\s+/ /g; - $rtoken_vars->[_TOKEN_] = $token; + # delete a trailing comma if requested + my $deleted; + if ( $rOpts_delete_trailing_commas + && %trailing_comma_rules ) + { + $deleted = + $self->delete_trailing_comma( $KK, $Kfirst, + $trailing_comma_rules{$token} ); + } + + # delete a weld-interfering comma if requested + if ( !$deleted + && $rOpts_delete_weld_interfering_commas + && $is_closing_type{ + $last_last_nonblank_code_type} ) + { + $self->delete_weld_interfering_comma($KK); + } + } } + } + } + } - # trim identifiers of trailing blanks which can occur - # under some unusual circumstances, such as if the - # identifier 'witch' has trailing blanks on input here: - # - # sub - # witch - # () # prototype may be on new line ... - # ... - my $ord_ch = ord( substr( $token, -1, 1 ) ); - if ( + # Modify certain tokens here for whitespace + # The following is not yet done, but could be: + # sub (x x x) + # ( $type =~ /^[wit]$/ ) + elsif ( $is_wit{$type} ) { - # quick check for possible ending space - $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN - || $ord_ch > ORD_PRINTABLE_MAX ) - ) + # change '$ var' to '$var' etc + # change '@ ' to '@' + # Examples: <> + my $ord = ord( substr( $token, 1, 1 ) ); + if ( + + # quick test for possible blank at second char + $ord > 0 && ( $ord < ORD_PRINTABLE_MIN + || $ord > ORD_PRINTABLE_MAX ) + ) + { + my ( $sigil, $word ) = split /\s+/, $token, 2; + + # $sigil =~ /^[\$\&\%\*\@]$/ ) + if ( $is_sigil{$sigil} ) { + $token = $sigil; + $token .= $word if ( defined($word) ); # fix c104 + $rtoken_vars->[_TOKEN_] = $token; + } + } + + # Trim certain spaces in identifiers + if ( $type eq 'i' ) { + + if ( $token =~ /$SUB_PATTERN/ ) { + + # -spp = 0 : no space before opening prototype paren + # -spp = 1 : stable (follow input spacing) + # -spp = 2 : always space before opening prototype paren + if ( !defined($rOpts_space_prototype_paren) + || $rOpts_space_prototype_paren == 1 ) { - $token =~ s/\s+$//g; - $rtoken_vars->[_TOKEN_] = $token; + ## default: stable + } + elsif ( $rOpts_space_prototype_paren == 0 ) { + $token =~ s/\s+\(/\(/; } + elsif ( $rOpts_space_prototype_paren == 2 ) { + $token =~ s/\(/ (/; + } + + # one space max, and no tabs + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; + + $self->[_ris_special_identifier_token_]->{$token} = 'sub'; + } - } - # handle semicolons - elsif ( $type eq ';' ) { + # clean up spaces in package identifiers, like + # "package Bob::Dog;" + elsif ( substr( $token, 0, 7 ) eq 'package' + && $token =~ /^package\s/ ) + { + $token =~ s/\s+/ /g; + $rtoken_vars->[_TOKEN_] = $token; + + $self->[_ris_special_identifier_token_]->{$token} = + 'package'; + + } - # Remove unnecessary semicolons, but not after bare - # blocks, where it could be unsafe if the brace is - # mis-tokenized. + # trim identifiers of trailing blanks which can occur + # under some unusual circumstances, such as if the + # identifier 'witch' has trailing blanks on input here: + # + # sub + # witch + # () # prototype may be on new line ... + # ... + my $ord_ch = ord( substr( $token, -1, 1 ) ); if ( - $rOpts->{'delete-semicolons'} - && ( - ( - $last_nonblank_block_type - && $last_nonblank_code_type eq '}' - && ( - $is_block_without_semicolon{ - $last_nonblank_block_type} - || $last_nonblank_block_type =~ /$SUB_PATTERN/ - || $last_nonblank_block_type =~ /^\w+:$/ - ) - ) - || $last_nonblank_code_type eq ';' - ) + + # quick check for possible ending space + $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN + || $ord_ch > ORD_PRINTABLE_MAX ) ) { + $token =~ s/\s+$//g; + $rtoken_vars->[_TOKEN_] = $token; + } + } + } - # This looks like a deletable semicolon, but even if a - # semicolon can be deleted it is not necessarily best to do - # so. We apply these additional rules for deletion: - # - Always ok to delete a ';' at the end of a line - # - Never delete a ';' before a '#' because it would - # promote it to a block comment. - # - If a semicolon is not at the end of line, then only - # delete if it is followed by another semicolon or closing - # token. This includes the comment rule. It may take - # two passes to get to a final state, but it is a little - # safer. For example, keep the first semicolon here: - # eval { sub bubba { ok(0) }; ok(0) } || ok(1); - # It is not required but adds some clarity. - my $ok_to_delete = 1; - if ( $KK < $Klast ) { - my $Kn = $self->K_next_nonblank($KK); - if ( defined($Kn) && $Kn <= $Klast ) { - my $next_nonblank_token_type = - $rLL->[$Kn]->[_TYPE_]; - $ok_to_delete = $next_nonblank_token_type eq ';' - || $next_nonblank_token_type eq '}'; - } - } + # handle semicolons + elsif ( $type eq ';' ) { - # do not delete only nonblank token in a file - else { - my $Kp = $self->K_previous_code( undef, $rLL_new ); - my $Kn = $self->K_next_nonblank($KK); - $ok_to_delete = defined($Kn) || defined($Kp); - } + # Remove unnecessary semicolons, but not after bare + # blocks, where it could be unsafe if the brace is + # mis-tokenized. + if ( + $rOpts->{'delete-semicolons'} + && ( + ( + $last_nonblank_block_type + && $last_nonblank_code_type eq '}' + && ( + $is_block_without_semicolon{ + $last_nonblank_block_type} + || $last_nonblank_block_type =~ /$SUB_PATTERN/ + || $last_nonblank_block_type =~ /^\w+:$/ + ) + ) + || $last_nonblank_code_type eq ';' + ) + ) + { - if ($ok_to_delete) { - $self->note_deleted_semicolon($input_line_number); - next; - } - else { - write_logfile_entry("Extra ';'\n"); + # This looks like a deletable semicolon, but even if a + # semicolon can be deleted it is not necessarily best to do + # so. We apply these additional rules for deletion: + # - Always ok to delete a ';' at the end of a line + # - Never delete a ';' before a '#' because it would + # promote it to a block comment. + # - If a semicolon is not at the end of line, then only + # delete if it is followed by another semicolon or closing + # token. This includes the comment rule. It may take + # two passes to get to a final state, but it is a little + # safer. For example, keep the first semicolon here: + # eval { sub bubba { ok(0) }; ok(0) } || ok(1); + # It is not required but adds some clarity. + my $ok_to_delete = 1; + if ( $KK < $Klast ) { + my $Kn = $self->K_next_nonblank($KK); + if ( defined($Kn) && $Kn <= $Klast ) { + my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_]; + $ok_to_delete = $next_nonblank_token_type eq ';' + || $next_nonblank_token_type eq '}'; } } + + # do not delete only nonblank token in a file + else { + my $Kp = $self->K_previous_code( undef, $rLL_new ); + my $Kn = $self->K_next_nonblank($KK); + $ok_to_delete = defined($Kn) || defined($Kp); + } + + if ($ok_to_delete) { + $self->note_deleted_semicolon($input_line_number); + next; + } + else { + write_logfile_entry("Extra ';'\n"); + } } + } - # Old patch to add space to something like "x10". - # Note: This is now done in the Tokenizer, but this code remains - # for reference. - elsif ( $type eq 'n' ) { - if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) { - $token =~ s/x/x /; - $rtoken_vars->[_TOKEN_] = $token; - if (DEVEL_MODE) { - Fault(<[_TOKEN_] = $token; + if (DEVEL_MODE) { + Fault(<[_TOKEN_] = $token; - $self->note_embedded_tab($input_line_number) - if ( $token =~ "\t" ); - $store_token_and_space->( - $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES - ); - next; - } ## end if ( $type eq 'q' ) + # check for a qw quote + elsif ( $type eq 'q' ) { - # change 'LABEL :' to 'LABEL:' - elsif ( $type eq 'J' ) { - $token =~ s/\s+//g; - $rtoken_vars->[_TOKEN_] = $token; + # trim blanks from right of qw quotes + # (To avoid trimming qw quotes use -ntqw; the tokenizer handles + # this) + $token =~ s/\s*$//; + $rtoken_vars->[_TOKEN_] = $token; + if ( $self->[_save_logfile_] && $token =~ /\t/ ) { + $self->note_embedded_tab($input_line_number); } - - # check a quote for problems - elsif ( $type eq 'Q' ) { - $check_Q->( $KK, $Kfirst, $input_line_number ); + if ( $rwhitespace_flags->[$KK] == WS_YES + && @{$rLL_new} + && $rLL_new->[-1]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace ) + { + $self->store_space(); } + $self->store_token($rtoken_vars); + next; + } ## end if ( $type eq 'q' ) - # Store this token with possible previous blank - if ( $rwhitespace_flags->[$KK] == WS_YES ) { - $store_token_and_space->( $rtoken_vars, 1 ); + # delete repeated commas if requested + elsif ( $type eq ',' ) { + if ( $last_nonblank_code_type eq ',' + && $rOpts->{'delete-repeated-commas'} ) + { + # Could note this deletion as a possible future update: + ## $self->note_deleted_comma($input_line_number); + next; } - else { - $store_token->($rtoken_vars); + + # remember input line index of first comma if -wtc is used + if (%trailing_comma_rules) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) + && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} ) + ) + { + $self->[_rfirst_comma_line_index_]->{$seqno} = + $rtoken_vars->[_LINE_INDEX_]; + } } + } - } # End token loop - } # End line loop + # change 'LABEL :' to 'LABEL:' + elsif ( $type eq 'J' ) { + $token =~ s/\s+//g; + $rtoken_vars->[_TOKEN_] = $token; + } + + # check a quote for problems + elsif ( $type eq 'Q' ) { + $self->check_Q( $KK, $Kfirst, $input_line_number ) + if ( $self->[_save_logfile_] ); + } + + # Store this token with possible previous blank + if ( $rwhitespace_flags->[$KK] == WS_YES + && @{$rLL_new} + && $rLL_new->[-1]->[_TYPE_] ne 'b' + && $rOpts_add_whitespace ) + { + $self->store_space(); + } + $self->store_token($rtoken_vars); + + } # End token loop + + return; +} ## end sub respace_tokens_inner_loop + +sub respace_post_loop_ops { + + my ($self) = @_; # Walk backwards through the tokens, making forward links to sequence items. if ( @{$rLL_new} ) { @@ -7173,6 +7930,7 @@ EOM } # Find and remember lists by sequence number + my %is_C_style_for; foreach my $seqno ( keys %{$K_opening_container} ) { my $K_opening = $K_opening_container->{$seqno}; next unless defined($K_opening); @@ -7192,7 +7950,11 @@ EOM if ($rtype_count) { my $comma_count = $rtype_count->{','}; my $fat_comma_count = $rtype_count->{'=>'}; - my $semicolon_count = $rtype_count->{';'} || $rtype_count->{'f'}; + my $semicolon_count = $rtype_count->{';'}; + if ( $rtype_count->{'f'} ) { + $semicolon_count += $rtype_count->{'f'}; + $is_C_style_for{$seqno} = 1; + } # We will define a list to be a container with one or more commas # and no semicolons. Note that we have included the semicolons @@ -7207,14 +7969,12 @@ EOM if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) { my $Kp = $self->K_previous_code( $K_opening, $rLL_new ); if ( defined($Kp) ) { - my $type_p = $rLL_new->[$Kp]->[_TYPE_]; - if ( $type_p eq 'k' ) { - my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; - $is_list = 0 if ( $is_nonlist_keyword{$token_p} ); - } - else { - $is_list = 0 if ( $is_nonlist_type{$type_p} ); - } + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; + $is_list = + $type_p eq 'k' + ? !$is_nonlist_keyword{$token_p} + : !$is_nonlist_type{$type_p}; } } } @@ -7341,12558 +8101,15375 @@ EOM next unless ( $rtype_count && $rtype_count->{'=>'} ); # override -cab=3 if this contains a sub-list - if ( $rhas_list->{$seqno} ) { - $roverride_cab3->{$seqno} = 1; - } + if ( !defined( $roverride_cab3->{$seqno} ) ) { + if ( $rhas_list->{$seqno} ) { + $roverride_cab3->{$seqno} = 2; + } - # or if this is a sub-list of its parent container - else { - my $seqno_parent = $rparent_of_seqno->{$seqno}; - if ( defined($seqno_parent) - && $ris_list_by_seqno->{$seqno_parent} ) - { - $roverride_cab3->{$seqno} = 1; + # or if this is a sub-list of its parent container + else { + my $seqno_parent = $rparent_of_seqno->{$seqno}; + if ( defined($seqno_parent) + && $ris_list_by_seqno->{$seqno_parent} ) + { + $roverride_cab3->{$seqno} = 2; + } } } } } - # Reset memory to be the new array - $self->[_rLL_] = $rLL_new; - my $Klimit; - if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 } - $self->[_Klimit_] = $Klimit; - - # During development, verify that the new array still looks okay. - DEVEL_MODE && $self->check_token_array(); + # Add -ci to C-style for loops (issue c154) + # This is much easier to do here than in the tokenizer. + foreach my $seqno ( keys %is_C_style_for ) { + my $K_opening = $K_opening_container->{$seqno}; + my $K_closing = $K_closing_container->{$seqno}; + my $type_last = 'f'; + for my $KK ( $K_opening + 1 .. $K_closing - 1 ) { + $rLL_new->[$KK]->[_CI_LEVEL_] = $type_last eq 'f' ? 0 : 1; + my $type = $rLL_new->[$KK]->[_TYPE_]; + if ( $type ne 'b' && $type ne '#' ) { $type_last = $type } + } + } - # reset the token limits of each line - $self->resync_lines_and_tokens(); + return; +} ## end sub respace_post_loop_ops +sub set_permanently_broken { + my ( $self, $seqno ) = @_; + while ( defined($seqno) ) { + $ris_permanently_broken->{$seqno} = 1; + $seqno = $rparent_of_seqno->{$seqno}; + } return; -} ## end sub respace_tokens +} ## end sub set_permanently_broken -sub copy_token_as_type { +sub store_token { - # This provides a quick way to create a new token by - # slightly modifying an existing token. - my ( $rold_token, $type, $token ) = @_; - if ( $type eq 'b' ) { - $token = SPACE unless defined($token); - } - elsif ( $type eq 'q' ) { - $token = EMPTY_STRING unless defined($token); - } - elsif ( $type eq '->' ) { - $token = '->' unless defined($token); - } - elsif ( $type eq ';' ) { - $token = ';' unless defined($token); - } - else { + my ( $self, $item ) = @_; - # Unexpected type ... this sub will work as long as both $token and - # $type are defined, but we should catch any unexpected types during - # development. - if (DEVEL_MODE) { - Fault(<' or ';' -EOM - } - else { - # shouldn't happen - } - } + #------------------------------------------ + # Store one token during respace operations + #------------------------------------------ - my @rnew_token = @{$rold_token}; - $rnew_token[_TYPE_] = $type; - $rnew_token[_TOKEN_] = $token; - $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING; - return \@rnew_token; -} ## end sub copy_token_as_type + # Input parameter: + # $item = ref to a token -sub Debug_dump_tokens { + # NOTE: this sub is called once per token so coding efficiency is critical. - # a debug routine, not normally used - my ( $self, $msg ) = @_; - my $rLL = $self->[_rLL_]; - my $nvars = @{$rLL}; - print STDERR "$msg\n"; - print STDERR "ntokens=$nvars\n"; - print STDERR "K\t_TOKEN_\t_TYPE_\n"; - my $K = 0; + # The next multiple assignment statements are significantly faster than + # doing them one-by-one. + my ( - foreach my $item ( @{$rLL} ) { - print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n"; - $K++; - } - return; -} ## end sub Debug_dump_tokens + $type, + $token, + $type_sequence, -sub K_next_code { - my ( $self, $KK, $rLL ) = @_; + ) = @{$item}[ - # return the index K of the next nonblank, non-comment token - return unless ( defined($KK) && $KK >= 0 ); + _TYPE_, + _TOKEN_, + _TYPE_SEQUENCE_, - # use the standard array unless given otherwise - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - my $Knnb = $KK + 1; - while ( $Knnb < $Num ) { - if ( !defined( $rLL->[$Knnb] ) ) { + ]; - # We seem to have encountered a gap in our array. - # This shouldn't happen because sub write_line() pushed - # items into the $rLL array. - Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); + # Set the token length. Later it may be adjusted again if phantom or + # ignoring side comment lengths. + my $token_length = + $is_encoded_data ? $length_function->($token) : length($token); + + # handle blanks + if ( $type eq 'b' ) { + + # Do not output consecutive blanks. This situation should have been + # prevented earlier, but it is worth checking because later routines + # make this assumption. + if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) { return; } - if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' - && $rLL->[$Knnb]->[_TYPE_] ne '#' ) - { - return $Knnb; - } - $Knnb++; } - return; -} ## end sub K_next_code - -sub K_next_nonblank { - my ( $self, $KK, $rLL ) = @_; - - # return the index K of the next nonblank token, or - # return undef if none - return unless ( defined($KK) && $KK >= 0 ); - # The third arg allows this routine to be used on any array. This is - # useful in sub respace_tokens when we are copying tokens from an old $rLL - # to a new $rLL array. But usually the third arg will not be given and we - # will just use the $rLL array in $self. - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - my $Knnb = $KK + 1; - return unless ( $Knnb < $Num ); - return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); - return unless ( ++$Knnb < $Num ); - return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); + # handle comments + elsif ( $type eq '#' ) { - # Backup loop. Very unlikely to get here; it means we have neighboring - # blanks in the token stream. - $Knnb++; - while ( $Knnb < $Num ) { + # trim comments if necessary + my $ord = ord( substr( $token, -1, 1 ) ); + if ( + $ord > 0 + && ( $ord < ORD_PRINTABLE_MIN + || $ord > ORD_PRINTABLE_MAX ) + && $token =~ s/\s+$// + ) + { + $token_length = $length_function->($token); + $item->[_TOKEN_] = $token; + } - # Safety check, this fault shouldn't happen: The $rLL array is the - # main array of tokens, so all entries should be used. It is - # initialized in sub write_line, and then re-initialized by sub - # $store_token() within sub respace_tokens. Tokens are pushed on - # so there shouldn't be any gaps. - if ( !defined( $rLL->[$Knnb] ) ) { - Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); - return; + # Mark length of side comments as just 1 if sc lengths are ignored + if ( $rOpts_ignore_side_comment_lengths + && ( !$CODE_type || $CODE_type eq 'HSC' ) ) + { + $token_length = 1; + } + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $self->[_rblank_and_comment_count_]->{$seqno} += 1 + if ( $CODE_type eq 'BC' ); + $self->set_permanently_broken($seqno) + if !$ris_permanently_broken->{$seqno}; } - if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } - $Knnb++; } - return; -} ## end sub K_next_nonblank -sub K_previous_code { + # handle non-blanks and non-comments + else { - # return the index K of the previous nonblank, non-comment token - # Call with $KK=undef to start search at the top of the array - my ( $self, $KK, $rLL ) = @_; + my $block_type; - # use the standard array unless given otherwise - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - if ( !defined($KK) ) { $KK = $Num } - elsif ( $KK > $Num ) { + # check for a sequenced item (i.e., container or ?/:) + if ($type_sequence) { - # This fault can be caused by a programming error in which a bad $KK is - # given. The caller should make the first call with KK_new=undef to - # avoid this error. - Fault( -"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" - ) if (DEVEL_MODE); - return; - } - my $Kpnb = $KK - 1; - while ( $Kpnb >= 0 ) { - if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' - && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) - { - return $Kpnb; - } - $Kpnb--; - } - return; -} ## end sub K_previous_code + # This will be the index of this item in the new array + my $KK_new = @{$rLL_new}; -sub K_previous_nonblank { + if ( $is_opening_token{$token} ) { - # return index of previous nonblank token before item K; - # Call with $KK=undef to start search at the top of the array - my ( $self, $KK, $rLL ) = @_; + $K_opening_container->{$type_sequence} = $KK_new; + $block_type = $rblock_type_of_seqno->{$type_sequence}; - # use the standard array unless given otherwise - $rLL = $self->[_rLL_] unless ( defined($rLL) ); - my $Num = @{$rLL}; - if ( !defined($KK) ) { $KK = $Num } - elsif ( $KK > $Num ) { + # Fix for case b1100: Count a line ending in ', [' as having + # a line-ending comma. Otherwise, these commas can be hidden + # with something like --opening-square-bracket-right + if ( $last_nonblank_code_type eq ',' + && $Ktoken_vars == $Klast_old_code + && $Ktoken_vars > $Kfirst_old ) + { + $rlec_count_by_seqno->{$type_sequence}++; + } - # This fault can be caused by a programming error in which a bad $KK is - # given. The caller should make the first call with KK_new=undef to - # avoid this error. - Fault( -"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" - ) if (DEVEL_MODE); - return; - } - my $Kpnb = $KK - 1; - return unless ( $Kpnb >= 0 ); - return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); - return unless ( --$Kpnb >= 0 ); - return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); + if ( $last_nonblank_code_type eq '=' + || $last_nonblank_code_type eq '=>' ) + { + $ris_assigned_structure->{$type_sequence} = + $last_nonblank_code_type; + } - # Backup loop. We should not get here unless some routine - # slipped repeated blanks into the token stream. - return unless ( --$Kpnb >= 0 ); - while ( $Kpnb >= 0 ) { - if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } - $Kpnb--; - } - return; -} ## end sub K_previous_nonblank + my $seqno_parent = $seqno_stack{ $depth_next - 1 }; + $seqno_parent = SEQ_ROOT unless defined($seqno_parent); + push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence; + $rparent_of_seqno->{$type_sequence} = $seqno_parent; + $seqno_stack{$depth_next} = $type_sequence; + $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars; + $depth_next++; -sub parent_seqno_by_K { + if ( $depth_next > $depth_next_max ) { + $depth_next_max = $depth_next; + } + } + elsif ( $is_closing_token{$token} ) { - # Return the sequence number of the parent container of token K, if any. + $K_closing_container->{$type_sequence} = $KK_new; + $block_type = $rblock_type_of_seqno->{$type_sequence}; - my ( $self, $KK ) = @_; - my $rLL = $self->[_rLL_]; + # Do not include terminal commas in counts + if ( $last_nonblank_code_type eq ',' + || $last_nonblank_code_type eq '=>' ) + { + $rtype_count_by_seqno->{$type_sequence} + ->{$last_nonblank_code_type}--; - # The task is to jump forward to the next container token - # and use the sequence number of either it or its parent. + if ( $Ktoken_vars == $Kfirst_old + && $last_nonblank_code_type eq ',' + && $rlec_count_by_seqno->{$type_sequence} ) + { + $rlec_count_by_seqno->{$type_sequence}--; + } + } - # For example, consider the following with seqno=5 of the '[' and ']' - # being called with index K of the first token of each line: + # Update the stack... + $depth_next--; + } + else { - # # result - # push @tests, # - - # [ # - - # sub { 99 }, 'do {&{%s} for 1,2}', # 5 - # '(&{})(&{})', undef, # 5 - # [ 2, 2, 0 ], 0 # 5 - # ]; # - + # For ternary, note parent but do not include as child + my $seqno_parent = $seqno_stack{ $depth_next - 1 }; + $seqno_parent = SEQ_ROOT unless defined($seqno_parent); + $rparent_of_seqno->{$type_sequence} = $seqno_parent; - # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For - # unbalanced files, last sequence number will either be undefined or it may - # be at a deeper level. In either case we will just return SEQ_ROOT to - # have a defined value and allow formatting to proceed. - my $parent_seqno = SEQ_ROOT; - my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - if ($type_sequence) { - $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; - } - else { - my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; - if ( defined($Kt) ) { - $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; - my $type = $rLL->[$Kt]->[_TYPE_]; + # These are not yet used but could be useful + if ( $token eq '?' ) { + $K_opening_ternary->{$type_sequence} = $KK_new; + } + elsif ( $token eq ':' ) { + $K_closing_ternary->{$type_sequence} = $KK_new; + } + else { - # if next container token is closing, it is the parent seqno - if ( $is_closing_type{$type} ) { - $parent_seqno = $type_sequence; + # We really shouldn't arrive here, just being cautious: + # The only sequenced types output by the tokenizer are the + # opening & closing containers and the ternary types. Each + # of those was checked above. So we would only get here + # if the tokenizer has been changed to mark some other + # tokens with sequence numbers. + if (DEVEL_MODE) { + Fault( +"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'" + ); + } + } } + } - # otherwise we want its parent container - else { - $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; + # Remember the most recent two non-blank, non-comment tokens. + # NOTE: the phantom semicolon code may change the output stack + # without updating these values. Phantom semicolons are considered + # the same as blanks for now, but future needs might change that. + # See the related note in sub 'add_phantom_semicolon'. + $last_last_nonblank_code_type = $last_nonblank_code_type; + $last_last_nonblank_code_token = $last_nonblank_code_token; + + $last_nonblank_code_type = $type; + $last_nonblank_code_token = $token; + $last_nonblank_block_type = $block_type; + + # count selected types + if ( $is_counted_type{$type} ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ( defined($seqno) ) { + $rtype_count_by_seqno->{$seqno}->{$type}++; + + # Count line-ending commas for -bbx + if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) { + $rlec_count_by_seqno->{$seqno}++; + } + + # Remember index of first here doc target + if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) { + my $KK_new = @{$rLL_new}; + $K_first_here_doc_by_seqno{$seqno} = $KK_new; + } } } } - $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) ); - return $parent_seqno; -} ## end sub parent_seqno_by_K -sub is_in_block_by_i { - my ( $self, $i ) = @_; + # cumulative length is the length sum including this token + $cumulative_length += $token_length; - # returns true if - # token at i is contained in a BLOCK - # or is at root level - # or there is some kind of error (i.e. unbalanced file) - # returns false otherwise - return 1 if ( $i < 0 ); # shouldn't happen, bad call - my $seqno = $parent_seqno_to_go[$i]; - return 1 if ( !$seqno || $seqno eq SEQ_ROOT ); - return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} ); + $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; + $item->[_TOKEN_LENGTH_] = $token_length; + + # For reference, here is how to get the parent sequence number. + # This is not used because it is slower than finding it on the fly + # in sub parent_seqno_by_K: + + # my $seqno_parent = + # $type_sequence && $is_opening_token{$token} + # ? $seqno_stack{ $depth_next - 2 } + # : $seqno_stack{ $depth_next - 1 }; + # my $KK = @{$rLL_new}; + # $rseqno_of_parent_by_K->{$KK} = $seqno_parent; + + # and finally, add this item to the new array + push @{$rLL_new}, $item; return; -} ## end sub is_in_block_by_i +} ## end sub store_token -sub is_in_list_by_i { - my ( $self, $i ) = @_; +sub store_space { + my ($self) = @_; - # returns true if token at i is contained in a LIST - # returns false otherwise - my $seqno = $parent_seqno_to_go[$i]; - return unless ( $seqno && $seqno ne SEQ_ROOT ); - if ( $self->[_ris_list_by_seqno_]->{$seqno} ) { - return 1; + # Store a blank space in the new array + # - but never start the array with a space + # - and never store two consecutive spaces + if ( @{$rLL_new} + && $rLL_new->[-1]->[_TYPE_] ne 'b' ) + { + my $ritem = []; + $ritem->[_TYPE_] = 'b'; + $ritem->[_TOKEN_] = SPACE; + $ritem->[_TYPE_SEQUENCE_] = EMPTY_STRING; + + $ritem->[_LINE_INDEX_] = + $rLL_new->[-1]->[_LINE_INDEX_]; + + # The level and ci_level of newly created spaces should be the same + # as the previous token. Otherwise the coding for the -lp option + # can create a blinking state in some rare cases (see b1109, b1110). + $ritem->[_LEVEL_] = + $rLL_new->[-1]->[_LEVEL_]; + $ritem->[_CI_LEVEL_] = + $rLL_new->[-1]->[_CI_LEVEL_]; + + $self->store_token($ritem); } + return; -} ## end sub is_in_list_by_i +} ## end sub store_space -sub is_list_by_K { +sub add_phantom_semicolon { - # Return true if token K is in a list my ( $self, $KK ) = @_; - my $parent_seqno = $self->parent_seqno_by_K($KK); - return unless defined($parent_seqno); - return $self->[_ris_list_by_seqno_]->{$parent_seqno}; -} + # The token at old index $KK is a closing block brace, and not preceded + # by a semicolon. Before we push it onto the new token list, we may + # want to add a phantom semicolon which can be activated if the the + # block is broken on output. -sub is_list_by_seqno { + # We are only adding semicolons for certain block types + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + return unless ($type_sequence); + my $block_type = $rblock_type_of_seqno->{$type_sequence}; + return unless ($block_type); + return + unless ( $ok_to_add_semicolon_for_block_type{$block_type} + || $block_type =~ /^(sub|package)/ + || $block_type =~ /^\w+\:$/ ); - # Return true if the immediate contents of a container appears to be a - # list. - my ( $self, $seqno ) = @_; - return unless defined($seqno); - return $self->[_ris_list_by_seqno_]->{$seqno}; -} + # Find the most recent token in the new token list + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); # shouldn't happen except for bad input -sub resync_lines_and_tokens { + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; + my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; - my $self = shift; - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rlines = $self->[_rlines_]; - my @Krange_code_without_comments; - my @Klast_valign_code; + # Do not add a semicolon if... + return + if ( - # Re-construct the arrays of tokens associated with the original input lines - # since they have probably changed due to inserting and deleting blanks - # and a few other tokens. + # it would follow a comment (and be isolated) + $type_p eq '#' - # This is the next token and its line index: - my $Knext = 0; - my $Kmax = defined($Klimit) ? $Klimit : -1; + # it follows a code block ( because they are not always wanted + # there and may add clutter) + || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p} - # Verify that old line indexes are in still order. If this error occurs, - # check locations where sub 'respace_tokens' creates new tokens (like - # blank spaces). It must have set a bad old line index. - if ( DEVEL_MODE && defined($Klimit) ) { - my $iline = $rLL->[0]->[_LINE_INDEX_]; - foreach my $KK ( 1 .. $Klimit ) { - my $iline_last = $iline; - $iline = $rLL->[$KK]->[_LINE_INDEX_]; - if ( $iline < $iline_last ) { - my $KK_m = $KK - 1; - my $token_m = $rLL->[$KK_m]->[_TOKEN_]; - my $token = $rLL->[$KK]->[_TOKEN_]; - my $type_m = $rLL->[$KK_m]->[_TYPE_]; - my $type = $rLL->[$KK]->[_TYPE_]; - Fault(<{_line_type}; - if ( $line_type eq 'CODE' ) { + # it would be inside a 'format' statement (and cause syntax error) + || ( $type_p eq 'k' + && $token_p =~ /format/ ) - # Get the old number of tokens on this line - my $rK_range_old = $line_of_tokens->{_rK_range}; - my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old}; - my $Kdiff_old = 0; - if ( defined($Kfirst_old) ) { - $Kdiff_old = $Klast_old - $Kfirst_old; - } + ); - # Find the range of NEW K indexes for the line: - # $Kfirst = index of first token on line - # $Klast = index of last token on line - my ( $Kfirst, $Klast ); + # Do not add a semicolon if it would impede a weld with an immediately + # following closing token...like this + # { ( some code ) } + # ^--No semicolon can go here - my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens + # look at the previous token... note use of the _NEW rLL array here, + # but sequence numbers are invariant. + my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; - # Optimization: Although the actual K indexes may be completely - # changed after respacing, the number of tokens on any given line - # will often be nearly unchanged. So we will see if we can start - # our search by guessing that the new line has the same number - # of tokens as the old line. - my $Knext_guess = $Knext + $Kdiff_old; - if ( $Knext_guess > $Knext - && $Knext_guess < $Kmax - && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline ) - { + # If it is also a CLOSING token we have to look closer... + if ( + $seqno_inner + && $is_closing_token{$token_p} - # the guess is good, so we can start our search here - $Knext = $Knext_guess + 1; - } + # we only need to look if there is just one inner container.. + && defined( $rchildren_of_seqno->{$type_sequence} ) + && @{ $rchildren_of_seqno->{$type_sequence} } == 1 + ) + { - while ($Knext <= $Kmax - && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline ) - { - $Knext++; - } + # Go back and see if the corresponding two OPENING tokens are also + # together. Note that we are using the OLD K indexing here: + my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence}; + if ( defined($K_outer_opening) ) { + my $K_nxt = $self->K_next_nonblank($K_outer_opening); + if ( defined($K_nxt) ) { + my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_]; - if ( $Knext > $Knext_beg ) { + # Is the next token after the outer opening the same as + # our inner closing (i.e. same sequence number)? + # If so, do not insert a semicolon here. + return if ( $seqno_nxt && $seqno_nxt == $seqno_inner ); + } + } + } - $Klast = $Knext - 1; + # We will insert an empty semicolon here as a placeholder. Later, if + # it becomes the last token on a line, we will bring it to life. The + # advantage of doing this is that (1) we just have to check line + # endings, and (2) the phantom semicolon has zero width and therefore + # won't cause needless breaks of one-line blocks. + my $Ktop = -1; + if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' + && $want_left_space{';'} == WS_NO ) + { - # Delete any terminal blank token - if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 } + # convert the blank into a semicolon.. + # be careful: we are working on the new stack top + # on a token which has been stored. + my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); - if ( $Klast < $Knext_beg ) { - $Klast = undef; - } - else { + # Convert the existing blank to: + # a phantom semicolon for one_line_block option = 0 or 1 + # a real semicolon for one_line_block option = 2 + my $tok = EMPTY_STRING; + my $len_tok = 0; + if ( $rOpts_one_line_block_semicolons == 2 ) { + $tok = ';'; + $len_tok = 1; + } - $Kfirst = $Knext_beg; + $rLL_new->[$Ktop]->[_TOKEN_] = $tok; + $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok; + $rLL_new->[$Ktop]->[_TYPE_] = ';'; - # Save ranges of non-comment code. This will be used by - # sub keep_old_line_breaks. - if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) { - push @Krange_code_without_comments, [ $Kfirst, $Klast ]; - } + $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++; - # Only save ending K indexes of code types which are blank - # or 'VER'. These will be used for a convergence check. - # See related code in sub 'convey_batch_to_vertical_aligner' - my $CODE_type = $line_of_tokens->{_code_type}; - if ( !$CODE_type - || $CODE_type eq 'VER' ) - { - push @Klast_valign_code, $Klast; - } - } - } + # NOTE: we are changing the output stack without updating variables + # $last_nonblank_code_type, etc. Future needs might require that + # those variables be updated here. For now, it seems ok to skip + # this. - # It is only safe to trim the actual line text if the input - # line had a terminal blank token. Otherwise, we may be - # in a quote. - if ( $line_of_tokens->{_ended_in_blank_token} ) { - $line_of_tokens->{_line_text} =~ s/\s+$//; - } - $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; + # Then store a new blank + $self->store_token($rcopy); + } + else { - # Deleting semicolons can create new empty code lines - # which should be marked as blank - if ( !defined($Kfirst) ) { - my $CODE_type = $line_of_tokens->{_code_type}; - if ( !$CODE_type ) { - $line_of_tokens->{_code_type} = 'BL'; - } + # Patch for issue c078: keep line indexes in order. If the top + # token is a space that we are keeping (due to '-wls=';') then + # we have to check that old line indexes stay in order. + # In very rare + # instances in which side comments have been deleted and converted + # into blanks, we may have filtered down multiple blanks into just + # one. In that case the top blank may have a higher line number + # than the previous nonblank token. Although the line indexes of + # blanks are not really significant, we need to keep them in order + # in order to pass error checks. + if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) { + my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; + my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; + if ( $new_top_ix < $old_top_ix ) { + $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; } } + + my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING ); + $self->store_token($rcopy); } + return; +} ## end sub add_phantom_semicolon - # There shouldn't be any nodes beyond the last one. This routine is - # relinking lines and tokens after the tokens have been respaced. A fault - # here indicates some kind of bug has been introduced into the above loops. - # There is not good way to keep going; we better stop here. - # FIXME: This will produce zero output. it would be best to find a way to - # dump the input file. - if ( $Knext <= $Kmax ) { +sub add_trailing_comma { - Fault("unexpected tokens at end of file when reconstructing lines"); - } - $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments; + # Implement the --add-trailing-commas flag to the line end before index $KK: - # Setup the convergence test in the FileWriter based on line-ending indexes - my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->setup_convergence_test( \@Klast_valign_code ); + my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_; - # Mark essential old breakpoints if combination -iob -lp is used. These - # two options do not work well together, but we can avoid turning -iob off - # by ignoring -iob at certain essential line breaks. - # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 - if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) { - my %is_assignment_or_fat_comma = %is_assignment; - $is_assignment_or_fat_comma{'=>'} = 1; - my $ris_essential_old_breakpoint = - $self->[_ris_essential_old_breakpoint_]; - my ( $Kfirst, $Klast ); - foreach my $line_of_tokens ( @{$rlines} ) { - my $line_type = $line_of_tokens->{_line_type}; - if ( $line_type ne 'CODE' ) { - ( $Kfirst, $Klast ) = ( undef, undef ); - next; - } - my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast ); - ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} }; + # Input parameter: + # $KK = index of closing token in old ($rLL) token list + # which starts a new line and is not preceded by a comma + # $Kfirst = index of first token on the current line of input tokens + # $add_flags = user control flags - next unless defined($Klast_prev); - next unless defined($Kfirst); - my $type_last = $rLL->[$Klast_prev]->[_TOKEN_]; - my $type_first = $rLL->[$Kfirst]->[_TOKEN_]; - next - unless ( $is_assignment_or_fat_comma{$type_last} - || $is_assignment_or_fat_comma{$type_first} ); - $ris_essential_old_breakpoint->{$Klast_prev} = 1; - } - } - return; -} ## end sub resync_lines_and_tokens + # For example, we might want to add a comma here: -sub keep_old_line_breaks { + # bless { + # _name => $name, + # _price => $price, + # _rebate => $rebate <------ location of possible bare comma + # }, $pkg; + # ^-------------------closing token at index $KK on new line - # Called once per file to find and mark any old line breaks which - # should be kept. We will be translating the input hashes into - # token indexes. + # Do not add a comma if it would follow a comment + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + my $type_p = $rLL_new->[$Kp]->[_TYPE_]; + return if ( $type_p eq '#' ); - # A flag is set as follows: - # = 1 make a hard break (flush the current batch) - # best for something like leading commas (-kbb=',') - # = 2 make a soft break (keep building current batch) - # best for something like leading -> + # see if the user wants a trailing comma here + my $match = + $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp, + $trailing_comma_rule, 1 ); - my ($self) = @_; + # if so, add a comma + if ($match) { + my $Knew = $self->store_new_token( ',', ',', $Kp ); + } - my $rLL = $self->[_rLL_]; - my $rKrange_code_without_comments = - $self->[_rKrange_code_without_comments_]; - my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_]; - my $rbreak_after_Klast = $self->[_rbreak_after_Klast_]; - my $rwant_container_open = $self->[_rwant_container_open_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + return; - # This code moved here from sub break_lists to fix b1120 - if ( $rOpts->{'break-at-old-method-breakpoints'} ) { - foreach my $item ( @{$rKrange_code_without_comments} ) { - my ( $Kfirst, $Klast ) = @{$item}; - my $type = $rLL->[$Kfirst]->[_TYPE_]; - my $token = $rLL->[$Kfirst]->[_TOKEN_]; +} ## end sub add_trailing_comma - # leading '->' use a value of 2 which causes a soft - # break rather than a hard break - if ( $type eq '->' ) { - $rbreak_before_Kfirst->{$Kfirst} = 2; - } +sub delete_trailing_comma { - # leading ')->' use a special flag to insure that both - # opening and closing parens get opened - # Fix for b1120: only for parens, not braces - elsif ( $token eq ')' ) { - my $Kn = $self->K_next_nonblank($Kfirst); - next - unless ( defined($Kn) - && $Kn <= $Klast - && $rLL->[$Kn]->[_TYPE_] eq '->' ); - my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_]; - next unless ($seqno); + my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_; - # Note: in previous versions there was a fix here to avoid - # instability between conflicting -bom and -pvt or -pvtc flags. - # The fix skipped -bom for a small line difference. But this - # was troublesome, and instead the fix has been moved to - # sub set_vertical_tightness_flags where priority is given to - # the -bom flag over -pvt and -pvtc flags. Both opening and - # closing paren flags are involved because even though -bom only - # requests breaking before the closing paren, automated logic - # opens the opening paren when the closing paren opens. - # Relevant cases are b977, b1215, b1270, b1303 + # Apply the --delete-trailing-commas flag to the comma before index $KK - $rwant_container_open->{$seqno} = 1; - } - } - } + # Input parameter: + # $KK = index of a closing token in OLD ($rLL) token list + # which is preceded by a comma on the same line. + # $Kfirst = index of first token on the current line of input tokens + # $delete_option = user control flag - return unless ( %keep_break_before_type || %keep_break_after_type ); + # Returns true if the comma was deleted - my $check_for_break = sub { - my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_; - my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + # For example, we might want to delete this comma: + # my @asset = ("FASMX", "FASGX", "FASIX",); + # | |^--------token at index $KK + # | ^------comma of interest + # ^-------------token at $Kfirst - # non-container tokens use the type as the key - if ( !$seqno ) { - my $type = $rLL->[$KK]->[_TYPE_]; - if ( $rkeep_break_hash->{$type} ) { - $rbreak_hash->{$KK} = 1; - } - } + # Verify that the previous token is a comma. Note that we are working in + # the new token list $rLL_new. + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) { - # container tokens use the token as the key - else { - my $token = $rLL->[$KK]->[_TOKEN_]; - my $flag = $rkeep_break_hash->{$token}; - if ($flag) { + # there must be a '#' between the ',' and closing token; give up. + return; + } + + # Do not delete commas when formatting under stress to avoid instability. + # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has + # been found to work well for trailing commas. + if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) { + return; + } - my $match = $flag eq '1' || $flag eq '*'; + # See if the user wants this trailing comma + my $match = + $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp, + $trailing_comma_rule, 0 ); - # check for special matching codes - if ( !$match ) { - if ( $token eq '(' || $token eq ')' ) { - $match = $self->match_paren_flag( $KK, $flag ); - } - elsif ( $token eq '{' || $token eq '}' ) { - - # These tentative codes 'b' and 'B' for brace types are - # placeholders for possible future brace types. They - # are not documented and may be changed. - my $block_type = - $self->[_rblock_type_of_seqno_]->{$seqno}; - if ( $flag eq 'b' ) { $match = $block_type } - elsif ( $flag eq 'B' ) { $match = !$block_type } - else { - # unknown code - no match - } - } - } - $rbreak_hash->{$KK} = 1 if ($match); - } + # Patch: the --noadd-whitespace flag can cause instability in complex + # structures. In this case do not delete the comma. Fixes b1409. + if ( !$match && !$rOpts_add_whitespace ) { + my $Kn = $self->K_next_nonblank($KK); + if ( defined($Kn) ) { + my $type_n = $rLL->[$Kn]->[_TYPE_]; + if ( $type_n ne ';' && $type_n ne '#' ) { return } } - }; + } - foreach my $item ( @{$rKrange_code_without_comments} ) { - my ( $Kfirst, $Klast ) = @{$item}; - $check_for_break->( - $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst - ); - $check_for_break->( - $Klast, \%keep_break_after_type, $rbreak_after_Klast - ); + # If no match, delete it + if ( !$match ) { + + return $self->unstore_last_nonblank_token(','); } return; -} ## end sub keep_old_line_breaks -sub weld_containers { +} ## end sub delete_trailing_comma - # Called once per file to do any welding operations requested by --weld* - # flags. - my ($self) = @_; +sub delete_weld_interfering_comma { - # This count is used to eliminate needless calls for weld checks elsewhere - $total_weld_count = 0; + my ( $self, $KK ) = @_; - return if ( $rOpts->{'indent-only'} ); - return unless ($rOpts_add_newlines); + # Apply the flag '--delete-weld-interfering-commas' to the comma + # before index $KK - # Important: sub 'weld_cuddled_blocks' must be called before - # sub 'weld_nested_containers'. This is because the cuddled option needs to - # use the original _LEVEL_ values of containers, but the weld nested - # containers changes _LEVEL_ of welded containers. + # Input parameter: + # $KK = index of a closing token in OLD ($rLL) token list + # which is preceded by a comma on the same line. - # Here is a good test case to be sure that both cuddling and welding - # are working and not interfering with each other: <> + # Returns true if the comma was deleted - # perltidy -wn -ce + # For example, we might want to delete this comma: - # if ($BOLD_MATH) { ( - # $labels, $comment, - # join( '', '', &make_math( $mode, '', '', $_ ), '' ) - # ) } else { ( - # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), - # $after - # ) } + # my $tmpl = { foo => {no_override => 1, default => 42}, }; + # || ^------$KK + # |^---$Kp + # $Kpp---^ + # + # Note that: + # index $KK is in the old $rLL array, but + # indexes $Kp and $Kpp are in the new $rLL_new array. - $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} ); + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + return unless ($type_sequence); - if ( $rOpts->{'weld-nested-containers'} ) { + # Find the previous token and verify that it is a comma. + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) { - $self->weld_nested_containers(); + # it is not a comma, so give up ( it is probably a '#' ) + return; + } - $self->weld_nested_quotes(); + # This must be the only comma in this list + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence}; + return + unless ( defined($rtype_count) + && $rtype_count->{','} + && $rtype_count->{','} == 1 ); + + # Back up to the previous closing token + my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); + return unless ( defined($Kpp) ); + my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_]; + my $type_pp = $rLL_new->[$Kpp]->[_TYPE_]; + + # The containers must be nesting (i.e., sequence numbers must differ by 1 ) + if ( $seqno_pp && $is_closing_type{$type_pp} ) { + if ( $seqno_pp == $type_sequence + 1 ) { + + # remove the ',' from the top of the new token list + return $self->unstore_last_nonblank_token(','); + } } + return; - #------------------------------------------------------------- - # All welding is done. Finish setting up weld data structures. - #------------------------------------------------------------- +} ## end sub delete_weld_interfering_comma - my $rLL = $self->[_rLL_]; - my $rK_weld_left = $self->[_rK_weld_left_]; - my $rK_weld_right = $self->[_rK_weld_right_]; - my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_]; +sub unstore_last_nonblank_token { - my @K_multi_weld; - my @keys = keys %{$rK_weld_right}; - $total_weld_count = @keys; + my ( $self, $type ) = @_; - # First pass to process binary welds. - # This loop is processed in unsorted order for efficiency. - foreach my $Kstart (@keys) { - my $Kend = $rK_weld_right->{$Kstart}; + # remove the most recent nonblank token from the new token list + # Input parameter: + # $type = type to be removed (for safety check) - # An error here would be due to an incorrect initialization introduced - # in one of the above weld routines, like sub weld_nested. - if ( $Kend <= $Kstart ) { - Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n") - if (DEVEL_MODE); - next; - } + # Returns true if success + # false if error - # Set weld values for all tokens this welded pair - foreach ( $Kstart + 1 .. $Kend ) { - $rK_weld_left->{$_} = $Kstart; - } - foreach my $Kx ( $Kstart .. $Kend - 1 ) { - $rK_weld_right->{$Kx} = $Kend; - $rweld_len_right_at_K->{$Kx} = - $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; - } + # This was written and is used for removing commas, but might + # be useful for other tokens. If it is ever used for other tokens + # then the issue of what to do about the other variables, such + # as token counts and the '$last...' vars needs to be considered. - # Remember the leftmost index of welds which continue to the right - if ( defined( $rK_weld_right->{$Kend} ) - && !defined( $rK_weld_left->{$Kstart} ) ) - { - push @K_multi_weld, $Kstart; - } + # Safety check, shouldn't happen + if ( @{$rLL_new} < 3 ) { + DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n"); + return; } - # Second pass to process chains of welds (these are rare). - # This has to be processed in sorted order. - if (@K_multi_weld) { - my $Kend = -1; - foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) { + my ( $rcomma, $rblank ); - # Skip any interior K which was originally missing a left link - next if ( $Kstart <= $Kend ); + # case 1: pop comma from top of stack + if ( $rLL_new->[-1]->[_TYPE_] eq $type ) { + $rcomma = pop @{$rLL_new}; + } - # Find the end of this chain - $Kend = $rK_weld_right->{$Kstart}; - my $Knext = $rK_weld_right->{$Kend}; - while ( defined($Knext) ) { - $Kend = $Knext; - $Knext = $rK_weld_right->{$Kend}; - } + # case 2: pop blank and then comma from top of stack + elsif ($rLL_new->[-1]->[_TYPE_] eq 'b' + && $rLL_new->[-2]->[_TYPE_] eq $type ) + { + $rblank = pop @{$rLL_new}; + $rcomma = pop @{$rLL_new}; + } - # Set weld values this chain - foreach ( $Kstart + 1 .. $Kend ) { - $rK_weld_left->{$_} = $Kstart; - } - foreach my $Kx ( $Kstart .. $Kend - 1 ) { - $rK_weld_right->{$Kx} = $Kend; - $rweld_len_right_at_K->{$Kx} = - $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; - } + # case 3: error, shouldn't happen unless bad call + else { + DEVEL_MODE && Fault("Could not find token type '$type' to remove\n"); + return; + } + + # A note on updating vars set by sub store_token for this comma: If we + # reduce the comma count by 1 then we also have to change the variable + # $last_nonblank_code_type to be $last_last_nonblank_code_type because + # otherwise sub store_token is going to ALSO reduce the comma count. + # Alternatively, we can leave the count alone and the + # $last_nonblank_code_type alone. Then sub store_token will produce + # the correct result. This is simpler and is done here. + + # Now add a blank space after the comma if appropriate. + # Some unusual spacing controls might need another iteration to + # reach a final state. + if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) { + if ( defined($rblank) ) { + $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma + push @{$rLL_new}, $rblank; } } + return 1; +} ## end sub unstore_last_nonblank_token + +sub match_trailing_comma_rule { + + my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_; + + # Decide if a trailing comma rule is matched. + + # Input parameter: + # $KK = index of closing token in old ($rLL) token list which follows + # the location of a possible trailing comma. See diagram below. + # $Kfirst = (old) index of first token on the current line of input tokens + # $Kp = index of previous nonblank token in new ($rLL_new) array + # $trailing_comma_rule = packed user control flags + # $if_add = true if adding comma, false if deleteing comma + + # Returns: + # false if no match + # true if match + + # For example, we might be checking for addition of a comma here: + + # bless { + # _name => $name, + # _price => $price, + # _rebate => $rebate <------ location of possible trailing comma + # }, $pkg; + # ^-------------------closing token at index $KK + + return unless ($trailing_comma_rule); + my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule}; + + # List of $trailing_comma_style values: + # undef stable: do not change + # '0' : no list should have a trailing comma + # '1' or '*' : every list should have a trailing comma + # 'm' a multi-line list should have a trailing commas + # 'b' trailing commas should be 'bare' (comma followed by newline) + # 'h' lists of key=>value pairs with a bare trailing comma + # 'i' same as s=h but also include any list with no more than about one + # comma per line + # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT]. + + # Note: an interesting generalization would be to let an upper case + # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might + # be useful for undoing operations. It would be implemented as a wrapper + # around this routine. + + #----------------------------------------- + # No style defined : do not add or delete + #----------------------------------------- + if ( !defined($trailing_comma_style) ) { return !$if_add } + + #---------------------------------------- + # Set some flags describing this location + #---------------------------------------- + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + return unless ($type_sequence); + my $closing_token = $rLL->[$KK]->[_TOKEN_]; + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence}; + return unless ( defined($rtype_count) && $rtype_count->{','} ); + my $is_permanently_broken = + $self->[_ris_permanently_broken_]->{$type_sequence}; + + # Note that _ris_broken_container_ also stores the line diff + # but it is not available at this early stage. + my $K_opening = $self->[_K_opening_container_]->{$type_sequence}; + return if ( !defined($K_opening) ); + + # multiline definition 1: opening and closing tokens on different lines + my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_]; + my $iline_c = $rLL->[$KK]->[_LINE_INDEX_]; + my $line_diff_containers = $iline_c - $iline_o; + my $has_multiline_containers = $line_diff_containers > 0; + + # multiline definition 2: first and last commas on different lines + my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence}; + my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_]; + my $has_multiline_commas; + my $line_diff_commas = 0; + if ( !defined($iline_first) ) { + + # shouldn't happen if caller checked comma count + my $type_kp = $rLL_new->[$Kp]->[_TYPE_]; + Fault( +"at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n" + ) if (DEVEL_MODE); + } + else { + $line_diff_commas = $iline_last - $iline_first; + $has_multiline_commas = $line_diff_commas > 0; + } - return; -} ## end sub weld_containers + # To avoid instability in edge cases, when adding commas we uses the + # multiline_commas definition, but when deleting we use multiline + # containers. This fixes b1384, b1396, b1397, b1398, b1400. + my $is_multiline = + $if_add ? $has_multiline_commas : $has_multiline_containers; -sub cumulative_length_before_K { - my ( $self, $KK ) = @_; - my $rLL = $self->[_rLL_]; - return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; -} + my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst; -sub weld_cuddled_blocks { - my ($self) = @_; + my $match; - # Called once per file to handle cuddled formatting + #---------------------------- + # 0 : does not match any list + #---------------------------- + if ( $trailing_comma_style eq '0' ) { + $match = 0; + } - my $rK_weld_left = $self->[_rK_weld_left_]; - my $rK_weld_right = $self->[_rK_weld_right_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + #------------------------------ + # '*' or '1' : matches any list + #------------------------------ + elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) { + $match = 1; + } - # This routine implements the -cb flag by finding the appropriate - # closing and opening block braces and welding them together. - return unless ( %{$rcuddled_block_types} ); + #----------------------------- + # 'm' matches a Multiline list + #----------------------------- + elsif ( $trailing_comma_style eq 'm' ) { + $match = $is_multiline; + } - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); - my $rbreak_container = $self->[_rbreak_container_]; + #---------------------------------- + # 'b' matches a Bare trailing comma + #---------------------------------- + elsif ( $trailing_comma_style eq 'b' ) { + $match = $is_bare_multiline_comma; + } + + #-------------------------------------------------------------------------- + # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line. + # 'i' matches a bare stable list with about 1 comma per line. + #-------------------------------------------------------------------------- + elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) { + + # We can treat these together because they are similar. + # The set of 'i' matches includes the set of 'h' matches. + + # the trailing comma must be bare for both 'h' and 'i' + return if ( !$is_bare_multiline_comma ); + + # There must be no more than one comma per line for both 'h' and 'i' + # The new_comma_count here will include the trailing comma. + my $new_comma_count = $rtype_count->{','}; + $new_comma_count += 1 if ($if_add); + my $excess_commas = $new_comma_count - $line_diff_commas - 1; + if ( $excess_commas > 0 ) { + + # Exception for a special edge case for option 'i': if the trailing + # comma is followed by a blank line or comment, then it cannot be + # covered. Then we can safely accept a small list to avoid + # instability (issue b1443). + if ( $trailing_comma_style eq 'i' + && $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1 + && $new_comma_count <= 2 ) + { + $match = 1; + } + else { + return; + } + } - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; + # a list of key=>value pairs with at least 2 fat commas is a match + # for both 'h' and 'i' + my $fat_comma_count = $rtype_count->{'=>'}; + if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) { - my $length_to_opening_seqno = sub { - my ($seqno) = @_; - my $KK = $K_opening_container->{$seqno}; - my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; - my $length_to_closing_seqno = sub { - my ($seqno) = @_; - my $KK = $K_closing_container->{$seqno}; - my $lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - return $lentot; - }; + # comma count (including trailer) and fat comma count must differ by + # by no more than 1. This allows for some small variations. + my $comma_diff = $new_comma_count - $fat_comma_count; + $match = ( $comma_diff >= -1 && $comma_diff <= 1 ); + } - my $is_broken_block = sub { + # For 'i' only, a list that can be shown to be stable is a match + if ( !$match && $trailing_comma_style eq 'i' ) { + $match = ( + $is_permanently_broken + || ( $rOpts_break_at_old_comma_breakpoints + && !$rOpts_ignore_old_breakpoints ) + ); + } + } - # a block is broken if the input line numbers of the braces differ - # we can only cuddle between broken blocks - my ($seqno) = @_; - my $K_opening = $K_opening_container->{$seqno}; - return unless ( defined($K_opening) ); - my $K_closing = $K_closing_container->{$seqno}; - return unless ( defined($K_closing) ); - return $rbreak_container->{$seqno} - || $rLL->[$K_closing]->[_LINE_INDEX_] != - $rLL->[$K_opening]->[_LINE_INDEX_]; - }; + #------------------------------------------------------------------------- + # Unrecognized parameter. This should have been caught in the input check. + #------------------------------------------------------------------------- + else { - # A stack to remember open chains at all levels: This is a hash rather than - # an array for safety because negative levels can occur in files with - # errors. This allows us to keep processing with negative levels. - # $in_chain{$level} = [$chain_type, $type_sequence]; - my %in_chain; - my $CBO = $rOpts->{'cuddled-break-option'}; + DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n"); - # loop over structure items to find cuddled pairs - my $level = 0; - my $KNEXT = $self->[_K_first_seq_item_]; - while ( defined($KNEXT) ) { - my $KK = $KNEXT; - $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; - my $rtoken_vars = $rLL->[$KK]; - my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - if ( !$type_sequence ) { - next if ( $KK == 0 ); # first token in file may not be container + # do not add or delete + return !$if_add; + } - # A fault here implies that an error was made in the little loop at - # the bottom of sub 'respace_tokens' which set the values of - # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the - # loop control lines above. - Fault("sequence = $type_sequence not defined at K=$KK") - if (DEVEL_MODE); - next; - } + # Now do any special paren check + if ( $match + && $paren_flag + && $paren_flag ne '1' + && $paren_flag ne '*' + && $closing_token eq ')' ) + { + $match &&= + $self->match_paren_control_flag( $type_sequence, $paren_flag, + $rLL_new ); + } - # NOTE: we must use the original levels here. They can get changed - # by sub 'weld_nested_containers', so this routine must be called - # before sub 'weld_nested_containers'. - my $last_level = $level; - $level = $rtoken_vars->[_LEVEL_]; + # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas + # for use by -vtc logic to avoid instability when -dtc and -atc are both + # active. + if ($match) { + if ( $if_add && $rOpts_delete_trailing_commas + || !$if_add && $rOpts_add_trailing_commas ) + { + $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1; - if ( $level < $last_level ) { $in_chain{$last_level} = undef } - elsif ( $level > $last_level ) { $in_chain{$level} = undef } + # The combination of -atc and -dtc and -cab=3 can be unstable + # (b1394). So we deactivate -cab=3 in this case. + # A value of '0' or '4' is required for stability of case b1451. + if ( $rOpts_comma_arrow_breakpoints == 3 ) { + $self->[_roverride_cab3_]->{$type_sequence} = 0; + } + } + } + return $match; +} ## end sub match_trailing_comma_rule - # We are only looking at code blocks - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - next unless ( $type eq $token ); +sub store_new_token { - if ( $token eq '{' ) { + my ( $self, $type, $token, $Kp ) = @_; - my $block_type = $rblock_type_of_seqno->{$type_sequence}; - if ( !$block_type ) { + # Create and insert a completely new token into the output stream - # patch for unrecognized block types which may not be labeled - my $Kp = $self->K_previous_nonblank($KK); - while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { - $Kp = $self->K_previous_nonblank($Kp); - } - next unless $Kp; - $block_type = $rLL->[$Kp]->[_TOKEN_]; - } - if ( $in_chain{$level} ) { + # Input parameters: + # $type = the token type + # $token = the token text + # $Kp = index of the previous token in the new list, $rLL_new - # we are in a chain and are at an opening block brace. - # See if we are welding this opening brace with the previous - # block brace. Get their identification numbers: - my $closing_seqno = $in_chain{$level}->[1]; - my $opening_seqno = $type_sequence; + # Returns: + # $Knew = index in $rLL_new of the new token - # The preceding block must be on multiple lines so that its - # closing brace will start a new line. - if ( !$is_broken_block->($closing_seqno) ) { - next unless ( $CBO == 2 ); - $rbreak_container->{$closing_seqno} = 1; - } + # This operation is a little tricky because we are creating a new token and + # we have to take care to follow the requested whitespace rules. - # we will let the trailing block be either broken or intact - ## && $is_broken_block->($opening_seqno); + my $Ktop = @{$rLL_new} - 1; + my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b'; + my $Knew; + if ( $top_is_space && $want_left_space{$type} == WS_NO ) { - # We can weld the closing brace to its following word .. - my $Ko = $K_closing_container->{$closing_seqno}; - my $Kon; - if ( defined($Ko) ) { - $Kon = $self->K_next_nonblank($Ko); - } + #---------------------------------------------------- + # Method 1: Convert the top blank into the new token. + #---------------------------------------------------- - # ..unless it is a comment - if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) { + # Be Careful: we are working on the top of the new stack, on a token + # which has been stored. - # OK to weld these two tokens... - $rK_weld_right->{$Ko} = $Kon; - $rK_weld_left->{$Kon} = $Ko; + my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); - # Set flag that we want to break the next container - # so that the cuddled line is balanced. - $rbreak_container->{$opening_seqno} = 1 - if ($CBO); - } + $Knew = $Ktop; + $rLL_new->[$Knew]->[_TOKEN_] = $token; + $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token); + $rLL_new->[$Knew]->[_TYPE_] = $type; - } - else { + # NOTE: we are changing the output stack without updating variables + # $last_nonblank_code_type, etc. Future needs might require that + # those variables be updated here. For now, we just update the + # type counts as necessary. - # We are not in a chain. Start a new chain if we see the - # starting block type. - if ( $rcuddled_block_types->{$block_type} ) { - $in_chain{$level} = [ $block_type, $type_sequence ]; - } - else { - $block_type = '*'; - $in_chain{$level} = [ $block_type, $type_sequence ]; - } + if ( $is_counted_type{$type} ) { + my $seqno = $seqno_stack{ $depth_next - 1 }; + if ($seqno) { + $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++; } } - elsif ( $token eq '}' ) { - if ( $in_chain{$level} ) { - # We are in a chain at a closing brace. See if this chain - # continues.. - my $Knn = $self->K_next_code($KK); - next unless $Knn; + # Then store a new blank + $self->store_token($rcopy); + } + else { - my $chain_type = $in_chain{$level}->[0]; - my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; - if ( - $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} - ) - { + #---------------------------------------- + # Method 2: Use the normal storage method + #---------------------------------------- - # Note that we do not weld yet because we must wait until - # we we are sure that an opening brace for this follows. - $in_chain{$level}->[1] = $type_sequence; - } - else { $in_chain{$level} = undef } + # Patch for issue c078: keep line indexes in order. If the top + # token is a space that we are keeping (due to '-wls=...) then + # we have to check that old line indexes stay in order. + # In very rare + # instances in which side comments have been deleted and converted + # into blanks, we may have filtered down multiple blanks into just + # one. In that case the top blank may have a higher line number + # than the previous nonblank token. Although the line indexes of + # blanks are not really significant, we need to keep them in order + # in order to pass error checks. + if ($top_is_space) { + my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; + my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; + if ( $new_top_ix < $old_top_ix ) { + $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; } } - } - return; -} ## end sub weld_cuddled_blocks -sub find_nested_pairs { - my $self = shift; + my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token ); + $self->store_token($rcopy); + $Knew = @{$rLL_new} - 1; + } + return $Knew; +} ## end sub store_new_token - # This routine is called once per file to do preliminary work needed for - # the --weld-nested option. This information is also needed for adding - # semicolons. +sub check_Q { - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); - my $Num = @{$rLL}; + # Check that a quote looks okay, and report possible problems + # to the logfile. - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my ( $self, $KK, $Kfirst, $line_number ) = @_; + my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $token =~ /\t/ ) { + $self->note_embedded_tab($line_number); + } - # We define an array of pairs of nested containers - my @nested_pairs; + # The remainder of this routine looks for something like + # '$var = s/xxx/yyy/;' + # in case it should have been '$var =~ s/xxx/yyy/;' - # Names of calling routines can either be marked as 'i' or 'w', - # and they may invoke a sub call with an '->'. We will consider - # any consecutive string of such types as a single unit when making - # weld decisions. We also allow a leading ! - my $is_name_type = { - 'i' => 1, - 'w' => 1, - 'U' => 1, - '->' => 1, - '!' => 1, - }; + # Start by looking for a token beginning with one of: s y m / tr + return + unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) } + || substr( $token, 0, 2 ) eq 'tr' ); - # Loop over all closing container tokens - foreach my $inner_seqno ( keys %{$K_closing_container} ) { - my $K_inner_closing = $K_closing_container->{$inner_seqno}; + # ... and preceded by one of: = == != + my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); + return unless ( defined($Kp) ); + my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; + return unless ( $is_unexpected_equals{$previous_nonblank_type} ); + my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; - # See if it is immediately followed by another, outer closing token - my $K_outer_closing = $K_inner_closing + 1; - $K_outer_closing += 1 - if ( $K_outer_closing < $Num - && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' ); + my $previous_nonblank_type_2 = 'b'; + my $previous_nonblank_token_2 = EMPTY_STRING; + my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); + if ( defined($Kpp) ) { + $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; + $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; + } - next unless ( $K_outer_closing < $Num ); - my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_]; - next unless ($outer_seqno); - my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_]; - next unless ( $is_closing_token{$token_outer_closing} ); + my $next_nonblank_token = EMPTY_STRING; + my $Kn = $KK + 1; + my $Kmax = @{$rLL} - 1; + if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 } + if ( $Kn <= $Kmax ) { + $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; + } - # Now we have to check the opening tokens. - my $K_outer_opening = $K_opening_container->{$outer_seqno}; - my $K_inner_opening = $K_opening_container->{$inner_seqno}; - next unless defined($K_outer_opening) && defined($K_inner_opening); + my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; + my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; - my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno}; - my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno}; + if ( - # Verify that the inner opening token is the next container after the - # outer opening token. - my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_]; - next unless defined($K_io_check); - if ( $K_io_check != $K_inner_opening ) { + # preceded by simple scalar + $previous_nonblank_type_2 eq 'i' + && $previous_nonblank_token_2 =~ /^\$/ - # The inner opening container does not immediately follow the outer - # opening container, but we may still allow a weld if they are - # separated by a sub signature. For example, we may have something - # like this, where $K_io_check may be at the first 'x' instead of - # 'io'. So we need to hop over the signature and see if we arrive - # at 'io'. + # followed by some kind of termination + # (but give complaint if we can not see far enough ahead) + && $next_nonblank_token =~ /^[; \)\}]$/ - # oo io - # | x x | - # $obj->then( sub ( $code ) { - # ... - # return $c->render(text => '', status => $code); - # } ); - # | | - # ic oc - - next if ( !$inner_blocktype || $inner_blocktype ne 'sub' ); - next if $rLL->[$K_io_check]->[_TOKEN_] ne '('; - my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_]; - next unless defined($seqno_signature); - my $K_signature_closing = $K_closing_container->{$seqno_signature}; - next unless defined($K_signature_closing); - my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_]; - next - unless ( defined($K_test) && $K_test == $K_inner_opening ); + # scalar is not declared + ## =~ /^(my|our|local)$/ + && !( $type_0 eq 'k' && $is_my_our_local{$token_0} ) + ) + { + my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_]; + my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~'; + complain( +"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" + ); + } + return; +} ## end sub check_Q - # OK, we have arrived at 'io' in the above diagram. We should put - # a limit on the length or complexity of the signature here. There - # is no perfect way to do this, one way is to put a limit on token - # count. For consistency with older versions, we should allow a - # signature with a single variable to weld, but not with - # multiple variables. A single variable as in 'sub ($code) {' can - # have a $Kdiff of 2 to 4, depending on spacing. +} ## end closure respace_tokens - # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to - # 7, depending on spacing. So to keep formatting consistent with - # previous versions, we will also avoid welding if there is a comma - # in the signature. +sub copy_token_as_type { - my $Kdiff = $K_signature_closing - $K_io_check; - next if ( $Kdiff > 4 ); + # This provides a quick way to create a new token by + # slightly modifying an existing token. + my ( $rold_token, $type, $token ) = @_; + if ( !defined($token) ) { + if ( $type eq 'b' ) { + $token = SPACE; + } + elsif ( $type eq 'q' ) { + $token = EMPTY_STRING; + } + elsif ( $type eq '->' ) { + $token = '->'; + } + elsif ( $type eq ';' ) { + $token = ';'; + } + elsif ( $type eq ',' ) { + $token = ','; + } + else { - my $saw_comma; - foreach my $KK ( $K_io_check + 1 .. $K_signature_closing - 1 ) { - if ( $rLL->[$KK]->[_TYPE_] eq ',' ) { $saw_comma = 1; last } + # Unexpected type ... this sub will work as long as both $token and + # $type are defined, but we should catch any unexpected types during + # development. + if (DEVEL_MODE) { + Fault(<' or ';' +EOM } - next if ($saw_comma); + + # Shouldn't get here + $token = $type; } + } - # Yes .. this is a possible nesting pair. - # They can be separated by a small amount. - my $K_diff = $K_inner_opening - $K_outer_opening; + my @rnew_token = @{$rold_token}; + $rnew_token[_TYPE_] = $type; + $rnew_token[_TOKEN_] = $token; + $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING; + return \@rnew_token; +} ## end sub copy_token_as_type - # Count nonblank characters separating them. - if ( $K_diff < 0 ) { next } # Shouldn't happen - my $nonblank_count = 0; - my $type; - my $is_name; +sub K_next_code { + my ( $self, $KK, $rLL ) = @_; - # Here is an example of a long identifier chain which counts as a - # single nonblank here (this spans about 10 K indexes): - # if ( !Boucherot::SetOfConnections->new->handler->execute( - # ^--K_o_o ^--K_i_o - # @array) ) - my $Kn_first = $K_outer_opening; - my $Kn_last_nonblank; - my $saw_comment; - foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) { - next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' ); - if ( !$nonblank_count ) { $Kn_first = $Kn } - if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; } - $Kn_last_nonblank = $Kn; + # return the index K of the next nonblank, non-comment token + return unless ( defined($KK) && $KK >= 0 ); - # skip chain of identifier tokens - my $last_type = $type; - my $last_is_name = $is_name; - $type = $rLL->[$Kn]->[_TYPE_]; - if ( $type eq '#' ) { $saw_comment = 1; last } - $is_name = $is_name_type->{$type}; - next if ( $is_name && $last_is_name ); + # use the standard array unless given otherwise + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + while ( $Knnb < $Num ) { + if ( !defined( $rLL->[$Knnb] ) ) { - $nonblank_count++; - last if ( $nonblank_count > 2 ); + # We seem to have encountered a gap in our array. + # This shouldn't happen because sub write_line() pushed + # items into the $rLL array. + Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); + return; + } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' + && $rLL->[$Knnb]->[_TYPE_] ne '#' ) + { + return $Knnb; } + $Knnb++; + } + return; +} ## end sub K_next_code - # Do not weld across a comment .. fix for c058. - next if ($saw_comment); +sub K_next_nonblank { + my ( $self, $KK, $rLL ) = @_; - # Patch for b1104: do not weld to a paren preceded by sort/map/grep - # because the special line break rules may cause a blinking state - if ( defined($Kn_last_nonblank) - && $rLL->[$K_inner_opening]->[_TOKEN_] eq '(' - && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' ) - { - my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_]; + # return the index K of the next nonblank token, or + # return undef if none + return unless ( defined($KK) && $KK >= 0 ); - # Turn off welding at sort/map/grep ( - if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 } + # The third arg allows this routine to be used on any array. This is + # useful in sub respace_tokens when we are copying tokens from an old $rLL + # to a new $rLL array. But usually the third arg will not be given and we + # will just use the $rLL array in $self. + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + my $Knnb = $KK + 1; + return unless ( $Knnb < $Num ); + return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); + return unless ( ++$Knnb < $Num ); + return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); + + # Backup loop. Very unlikely to get here; it means we have neighboring + # blanks in the token stream. + $Knnb++; + while ( $Knnb < $Num ) { + + # Safety check, this fault shouldn't happen: The $rLL array is the + # main array of tokens, so all entries should be used. It is + # initialized in sub write_line, and then re-initialized by sub + # store_token() within sub respace_tokens. Tokens are pushed on + # so there shouldn't be any gaps. + if ( !defined( $rLL->[$Knnb] ) ) { + Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); + return; } + if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } + $Knnb++; + } + return; +} ## end sub K_next_nonblank - if ( +sub K_previous_code { - # adjacent opening containers, like: do {{ - $nonblank_count == 1 + # return the index K of the previous nonblank, non-comment token + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; - # short item following opening paren, like: fun( yyy ( - || ( $nonblank_count == 2 - && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' ) + # use the standard array unless given otherwise + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { - # anonymous sub + prototype or sig: )->then( sub ($code) { - # ... but it seems best not to stack two structural blocks, like - # this - # sub make_anon_with_my_sub { sub { - # because it probably hides the structure a little too much. - || ( $inner_blocktype - && $inner_blocktype eq 'sub' - && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub' - && !$outer_blocktype ) - ) + # This fault can be caused by a programming error in which a bad $KK is + # given. The caller should make the first call with KK_new=undef to + # avoid this error. + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ) if (DEVEL_MODE); + return; + } + my $Kpnb = $KK - 1; + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' + && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) { - push @nested_pairs, - [ $inner_seqno, $outer_seqno, $K_inner_closing ]; + return $Kpnb; } - next; + $Kpnb--; } + return; +} ## end sub K_previous_code - # The weld routine expects the pairs in order in the form - # [$seqno_inner, $seqno_outer] - # And they must be in the same order as the inner closing tokens - # (otherwise, welds of three or more adjacent tokens will not work). The K - # value of this inner closing token has temporarily been stored for - # sorting. - @nested_pairs = +sub K_previous_nonblank { - # Drop the K index after sorting (it would cause trouble downstream) - map { [ $_->[0], $_->[1] ] } + # return index of previous nonblank token before item K; + # Call with $KK=undef to start search at the top of the array + my ( $self, $KK, $rLL ) = @_; - # Sort on the K values - sort { $a->[2] <=> $b->[2] } @nested_pairs; + # use the standard array unless given otherwise + $rLL = $self->[_rLL_] unless ( defined($rLL) ); + my $Num = @{$rLL}; + if ( !defined($KK) ) { $KK = $Num } + elsif ( $KK > $Num ) { - return \@nested_pairs; -} ## end sub find_nested_pairs + # This fault can be caused by a programming error in which a bad $KK is + # given. The caller should make the first call with KK_new=undef to + # avoid this error. + Fault( +"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" + ) if (DEVEL_MODE); + return; + } + my $Kpnb = $KK - 1; + return unless ( $Kpnb >= 0 ); + return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); + return unless ( --$Kpnb >= 0 ); + return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); -sub match_paren_flag { + # Backup loop. We should not get here unless some routine + # slipped repeated blanks into the token stream. + return unless ( --$Kpnb >= 0 ); + while ( $Kpnb >= 0 ) { + if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } + $Kpnb--; + } + return; +} ## end sub K_previous_nonblank - # Decide if this paren is excluded by user request: - # undef matches no parens - # '*' matches all parens - # 'k' matches only if the previous nonblank token is a perl builtin - # keyword (such as 'if', 'while'), - # 'K' matches if 'k' does not, meaning if the previous token is not a - # keyword. - # 'f' matches if the previous token is a function other than a keyword. - # 'F' matches if 'f' does not. - # 'w' matches if either 'k' or 'f' match. - # 'W' matches if 'w' does not. - my ( $self, $KK, $flag ) = @_; +sub parent_seqno_by_K { - return 0 unless ( defined($flag) ); - return 0 if $flag eq '0'; - return 1 if $flag eq '1'; - return 1 if $flag eq '*'; - return 0 unless ( defined($KK) ); + # Return the sequence number of the parent container of token K, if any. - my $rLL = $self->[_rLL_]; - my $rtoken_vars = $rLL->[$KK]; - my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; - return 0 unless ($seqno); - my $token = $rtoken_vars->[_TOKEN_]; - my $K_opening = $KK; - if ( !$is_opening_token{$token} ) { - $K_opening = $self->[_K_opening_container_]->{$seqno}; - } - return unless ( defined($K_opening) ); + my ( $self, $KK ) = @_; + my $rLL = $self->[_rLL_]; - my ( $is_f, $is_k, $is_w ); - my $Kp = $self->K_previous_nonblank($K_opening); - if ( defined($Kp) ) { - my $type_p = $rLL->[$Kp]->[_TYPE_]; + # The task is to jump forward to the next container token + # and use the sequence number of either it or its parent. - # keyword? - $is_k = $type_p eq 'k'; + # For example, consider the following with seqno=5 of the '[' and ']' + # being called with index K of the first token of each line: - # function call? - $is_f = $self->[_ris_function_call_paren_]->{$seqno}; + # # result + # push @tests, # - + # [ # - + # sub { 99 }, 'do {&{%s} for 1,2}', # 5 + # '(&{})(&{})', undef, # 5 + # [ 2, 2, 0 ], 0 # 5 + # ]; # - - # either keyword or function call? - $is_w = $is_k || $is_f; + # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For + # unbalanced files, last sequence number will either be undefined or it may + # be at a deeper level. In either case we will just return SEQ_ROOT to + # have a defined value and allow formatting to proceed. + my $parent_seqno = SEQ_ROOT; + my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + if ($type_sequence) { + $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; } - my $match; - if ( $flag eq 'k' ) { $match = $is_k } - elsif ( $flag eq 'K' ) { $match = !$is_k } - elsif ( $flag eq 'f' ) { $match = $is_f } - elsif ( $flag eq 'F' ) { $match = !$is_f } - elsif ( $flag eq 'w' ) { $match = $is_w } - elsif ( $flag eq 'W' ) { $match = !$is_w } - return $match; -} ## end sub match_paren_flag + else { + my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; + if ( defined($Kt) ) { + $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; + my $type = $rLL->[$Kt]->[_TYPE_]; -sub is_excluded_weld { + # if next container token is closing, it is the parent seqno + if ( $is_closing_type{$type} ) { + $parent_seqno = $type_sequence; + } - # decide if this weld is excluded by user request - my ( $self, $KK, $is_leading ) = @_; - my $rLL = $self->[_rLL_]; - my $rtoken_vars = $rLL->[$KK]; - my $token = $rtoken_vars->[_TOKEN_]; - my $rflags = $weld_nested_exclusion_rules{$token}; - return 0 unless ( defined($rflags) ); - my $flag = $is_leading ? $rflags->[0] : $rflags->[1]; - return 0 unless ( defined($flag) ); - return 1 if $flag eq '*'; - return $self->match_paren_flag( $KK, $flag ); -} ## end sub is_excluded_weld + # otherwise we want its parent container + else { + $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; + } + } + } + $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) ); + return $parent_seqno; +} ## end sub parent_seqno_by_K -# hashes to simplify welding logic -my %type_ok_after_bareword; -my %has_tight_paren; +sub is_in_block_by_i { + my ( $self, $i ) = @_; -BEGIN { + # returns true if + # token at i is contained in a BLOCK + # or is at root level + # or there is some kind of error (i.e. unbalanced file) + # returns false otherwise - # types needed for welding RULE 6 - my @q = qw# => -> { ( [ #; - @type_ok_after_bareword{@q} = (1) x scalar(@q); + if ( $i < 0 ) { + DEVEL_MODE && Fault("Bad call, i='$i'\n"); + return 1; + } - # these types do not 'like' to be separated from a following paren - @q = qw(w i q Q G C Z U); - @{has_tight_paren}{@q} = (1) x scalar(@q); -} + my $seqno = $parent_seqno_to_go[$i]; + return 1 if ( !$seqno || $seqno eq SEQ_ROOT ); + return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} ); + return; +} ## end sub is_in_block_by_i -use constant DEBUG_WELD => 0; +sub is_in_list_by_i { + my ( $self, $i ) = @_; -sub setup_new_weld_measurements { + # returns true if token at i is contained in a LIST + # returns false otherwise + my $seqno = $parent_seqno_to_go[$i]; + return unless ( $seqno && $seqno ne SEQ_ROOT ); + if ( $self->[_ris_list_by_seqno_]->{$seqno} ) { + return 1; + } + return; +} ## end sub is_in_list_by_i - # Define quantities to check for excess line lengths when welded. - # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes' +sub is_list_by_K { - my ( $self, $Kouter_opening, $Kinner_opening ) = @_; + # Return true if token K is in a list + my ( $self, $KK ) = @_; - # Given indexes of outer and inner opening containers to be welded: - # $Kouter_opening, $Kinner_opening + my $parent_seqno = $self->parent_seqno_by_K($KK); + return unless defined($parent_seqno); + return $self->[_ris_list_by_seqno_]->{$parent_seqno}; +} ## end sub is_list_by_K - # Returns these variables: - # $new_weld_ok = true (new weld ok) or false (do not start new weld) - # $starting_indent = starting indentation - # $starting_lentot = starting cumulative length - # $msg = diagnostic message for debugging +sub is_list_by_seqno { - my $rLL = $self->[_rLL_]; - my $rlines = $self->[_rlines_]; + # Return true if the immediate contents of a container appears to be a + # list. + my ( $self, $seqno ) = @_; + return unless defined($seqno); + return $self->[_ris_list_by_seqno_]->{$seqno}; +} ## end sub is_list_by_seqno - my $starting_level; - my $starting_ci; - my $starting_lentot; - my $maximum_text_length; - my $msg = EMPTY_STRING; +sub resync_lines_and_tokens { - my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_]; - my $rK_range = $rlines->[$iline_oo]->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; + my $self = shift; - #------------------------------------------------------------------------- - # We now define a reference index, '$Kref', from which to start measuring - # This choice turns out to be critical for keeping welds stable during - # iterations, so we go through a number of STEPS... - #------------------------------------------------------------------------- + # Re-construct the arrays of tokens associated with the original input + # lines since they have probably changed due to inserting and deleting + # blanks and a few other tokens. - # STEP 1: Our starting guess is to use measure from the first token of the - # current line. This is usually a good guess. - my $Kref = $Kfirst; + # Return paremeters: + # set severe_error = true if processing needs to terminate + my $severe_error; + my $rqw_lines = []; - # STEP 2: See if we should go back a little farther - my $Kprev = $self->K_previous_nonblank($Kfirst); - if ( defined($Kprev) ) { + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $rlines = $self->[_rlines_]; + my @Krange_code_without_comments; + my @Klast_valign_code; - # Avoid measuring from between an opening paren and a previous token - # which should stay close to it ... fixes b1185 - my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_]; - my $type_prev = $rLL->[$Kprev]->[_TYPE_]; - if ( $Kouter_opening == $Kfirst - && $token_oo eq '(' - && $has_tight_paren{$type_prev} ) - { - $Kref = $Kprev; + # This is the next token and its line index: + my $Knext = 0; + my $Kmax = defined($Klimit) ? $Klimit : -1; + + # Verify that old line indexes are in still order. If this error occurs, + # check locations where sub 'respace_tokens' creates new tokens (like + # blank spaces). It must have set a bad old line index. + if ( DEVEL_MODE && defined($Klimit) ) { + my $iline = $rLL->[0]->[_LINE_INDEX_]; + foreach my $KK ( 1 .. $Klimit ) { + my $iline_last = $iline; + $iline = $rLL->[$KK]->[_LINE_INDEX_]; + if ( $iline < $iline_last ) { + my $KK_m = $KK - 1; + my $token_m = $rLL->[$KK_m]->[_TOKEN_]; + my $token = $rLL->[$KK]->[_TOKEN_]; + my $type_m = $rLL->[$KK_m]->[_TYPE_]; + my $type = $rLL->[$KK]->[_TYPE_]; + Fault(<' if -lp - # is used (this fixes b520) - # ...or if a break is wanted before there - elsif ($rOpts_line_up_parentheses - || $want_break_before{$type_prev} ) - { + my $iline = -1; + foreach my $line_of_tokens ( @{$rlines} ) { + $iline++; + my $line_type = $line_of_tokens->{_line_type}; + if ( $line_type eq 'CODE' ) { - # If there are other sequence items between the start of this line - # and the opening token in question, then do not include tokens on - # the previous line in length calculations. This check added to - # fix case b1174 which had a '?' on the line - my $no_previous_seq_item = $Kref == $Kouter_opening - || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening; + # Get the old number of tokens on this line + my $rK_range_old = $line_of_tokens->{_rK_range}; + my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old}; + my $Kdiff_old = 0; + if ( defined($Kfirst_old) ) { + $Kdiff_old = $Klast_old - $Kfirst_old; + } - if ( $no_previous_seq_item - && substr( $type_prev, 0, 1 ) eq '=' ) + # Find the range of NEW K indexes for the line: + # $Kfirst = index of first token on line + # $Klast = index of last token on line + my ( $Kfirst, $Klast ); + + my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens + + # Optimization: Although the actual K indexes may be completely + # changed after respacing, the number of tokens on any given line + # will often be nearly unchanged. So we will see if we can start + # our search by guessing that the new line has the same number + # of tokens as the old line. + my $Knext_guess = $Knext + $Kdiff_old; + if ( $Knext_guess > $Knext + && $Knext_guess < $Kmax + && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline ) { - $Kref = $Kprev; - # Fix for b1144 and b1112: backup to the first nonblank - # character before the =>, or to the start of its line. - if ( $type_prev eq '=>' ) { - my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_]; - my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range}; - my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev}; - foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) { - next if ( $rLL->[$KK]->[_TYPE_] eq 'b' ); - $Kref = $KK; - last; - } - } + # the guess is good, so we can start our search here + $Knext = $Knext_guess + 1; } - } - } - # STEP 3: Now look ahead for a ternary and, if found, use it. - # This fixes case b1182. - # Also look for a ')' at the same level and, if found, use it. - # This fixes case b1224. - if ( $Kref < $Kouter_opening ) { - my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_]; - my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; - while ( $Knext < $Kouter_opening ) { - if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) { - if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] } - || $rLL->[$Knext]->[_TOKEN_] eq ')' ) - { - $Kref = $Knext; - last; - } + while ($Knext <= $Kmax + && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline ) + { + $Knext++; } - $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_]; - } - } - # Define the starting measurements we will need - $starting_lentot = - $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_]; - $starting_level = $rLL->[$Kref]->[_LEVEL_]; - $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_]; + if ( $Knext > $Knext_beg ) { - $maximum_text_length = $maximum_text_length_at_level[$starting_level] - - $starting_ci * $rOpts_continuation_indentation; + $Klast = $Knext - 1; - # STEP 4: Switch to using the outer opening token as the reference - # point if a line break before it would make a longer line. - # Fixes case b1055 and is also an alternate fix for b1065. - my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; - if ( $Kref < $Kouter_opening ) { - my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_]; - my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_]; - my $maximum_text_length_oo = - $maximum_text_length_at_level[$starting_level_oo] - - $starting_ci_oo * $rOpts_continuation_indentation; + # Delete any terminal blank token + if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 } - # The excess length to any cumulative length K = lenK is either - # $excess = $lenk - ($lentot + $maximum_text_length), or - # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo), - # so the worst case (maximum excess) corresponds to the configuration - # with minimum value of the sum: $lentot + $maximum_text_length - if ( $lentot_oo + $maximum_text_length_oo < - $starting_lentot + $maximum_text_length ) - { - $Kref = $Kouter_opening; - $starting_level = $starting_level_oo; - $starting_ci = $starting_ci_oo; - $starting_lentot = $lentot_oo; - $maximum_text_length = $maximum_text_length_oo; - } - } + if ( $Klast < $Knext_beg ) { + $Klast = undef; + } + else { - my $new_weld_ok = 1; + $Kfirst = $Knext_beg; - # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The - # combination -wn -lp -dws -naws does not work well and can cause blinkers. - # It will probably only occur in stress testing. For this situation we - # will only start a new weld if we start at a 'good' location. - # - Added 'if' to fix case b1032. - # - Require blank before certain previous characters to fix b1111. - # - Add ';' to fix case b1139 - # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162. - # - relaxed constraints for b1227 - if ( $starting_ci - && $rOpts_line_up_parentheses - && $rOpts_delete_old_whitespace - && !$rOpts_add_whitespace - && defined($Kprev) ) - { - my $type_first = $rLL->[$Kfirst]->[_TYPE_]; - my $token_first = $rLL->[$Kfirst]->[_TOKEN_]; - my $type_prev = $rLL->[$Kprev]->[_TYPE_]; - my $type_pp = 'b'; - if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] } - unless ( - $type_prev =~ /^[\,\.\;]/ - || $type_prev =~ /^[=\{\[\(\L]/ - && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' ) - || $type_first =~ /^[=\,\.\;\{\[\(\L]/ - || $type_first eq '||' - || ( - $type_first eq 'k' - && ( $token_first eq 'if' - || $token_first eq 'or' ) - ) - ) - { - $msg = -"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n"; - $new_weld_ok = 0; - } - } - return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg ); -} ## end sub setup_new_weld_measurements + # Save ranges of non-comment code. This will be used by + # sub keep_old_line_breaks. + if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) { + push @Krange_code_without_comments, [ $Kfirst, $Klast ]; + } -sub excess_line_length_for_Krange { - my ( $self, $Kfirst, $Klast ) = @_; + # Only save ending K indexes of code types which are blank + # or 'VER'. These will be used for a convergence check. + # See related code in sub 'convey_batch_to_vertical_aligner' + my $CODE_type = $line_of_tokens->{_code_type}; + if ( !$CODE_type + || $CODE_type eq 'VER' ) + { + push @Klast_valign_code, $Klast; + } + } + } - # returns $excess_length = - # by how many characters a line composed of tokens $Kfirst .. $Klast will - # exceed the allowed line length + # It is only safe to trim the actual line text if the input + # line had a terminal blank token. Otherwise, we may be + # in a quote. + if ( $line_of_tokens->{_ended_in_blank_token} ) { + $line_of_tokens->{_line_text} =~ s/\s+$//; + } + $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; - my $rLL = $self->[_rLL_]; - my $length_before_Kfirst = - $Kfirst <= 0 - ? 0 - : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; + # Deleting semicolons can create new empty code lines + # which should be marked as blank + if ( !defined($Kfirst) ) { + my $CODE_type = $line_of_tokens->{_code_type}; + if ( !$CODE_type ) { + $line_of_tokens->{_code_type} = 'BL'; + } + } + else { - # backup before a side comment if necessary - my $Kend = $Klast; - if ( $rOpts_ignore_side_comment_lengths - && $rLL->[$Klast]->[_TYPE_] eq '#' ) - { - my $Kprev = $self->K_previous_nonblank($Klast); - if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev } + #--------------------------------------------------- + # save indexes of all lines with a 'q' at either end + # for later use by sub find_multiline_qw + #--------------------------------------------------- + if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q' + || $rLL->[$Klast]->[_TYPE_] eq 'q' ) + { + push @{$rqw_lines}, $iline; + } + } + } } - # get the length of the text - my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst; - - # get the size of the text window - my $level = $rLL->[$Kfirst]->[_LEVEL_]; - my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_]; - my $max_text_length = $maximum_text_length_at_level[$level] - - $ci_level * $rOpts_continuation_indentation; - - my $excess_length = $length - $max_text_length; + # There shouldn't be any nodes beyond the last one. This routine is + # relinking lines and tokens after the tokens have been respaced. A fault + # here indicates some kind of bug has been introduced into the above loops. + # There is not good way to keep going; we better stop here. + if ( $Knext <= $Kmax ) { + Fault_Warn( + "unexpected tokens at end of file when reconstructing lines"); + $severe_error = 1; + return ( $severe_error, $rqw_lines ); + } + $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments; - DEBUG_WELD - && print -"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n"; - return ($excess_length); -} ## end sub excess_line_length_for_Krange + # Setup the convergence test in the FileWriter based on line-ending indexes + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->setup_convergence_test( \@Klast_valign_code ); -sub weld_nested_containers { - my ($self) = @_; + return ( $severe_error, $rqw_lines ); - # Called once per file for option '--weld-nested-containers' +} ## end sub resync_lines_and_tokens - my $rK_weld_left = $self->[_rK_weld_left_]; - my $rK_weld_right = $self->[_rK_weld_right_]; +sub check_for_old_break { + my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_; - # This routine implements the -wn flag by "welding together" - # the nested closing and opening tokens which were previously - # identified by sub 'find_nested_pairs'. "welding" simply - # involves setting certain hash values which will be checked - # later during formatting. + # This sub is called to help implement flags: + # --keep-old-breakpoints-before and --keep-old-breakpoints-after + # Given: + # $KK = index of a token, + # $rkeep_break_hash = user control for --keep-old-... + # $rbreak_hash = hash of tokens where breaks are requested + # Set $rbreak_hash as follows if a user break is requested: + # = 1 make a hard break (flush the current batch) + # best for something like leading commas (-kbb=',') + # = 2 make a soft break (keep building current batch) + # best for something like leading -> - my $rLL = $self->[_rLL_]; - my $rlines = $self->[_rlines_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $ris_asub_block = $self->[_ris_asub_block_]; - my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; + my $rLL = $self->[_rLL_]; - # Find nested pairs of container tokens for any welding. - my $rnested_pairs = $self->find_nested_pairs(); + my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - # Return unless there are nested pairs to weld - return unless defined($rnested_pairs) && @{$rnested_pairs}; + # non-container tokens use the type as the key + if ( !$seqno ) { + my $type = $rLL->[$KK]->[_TYPE_]; + if ( $rkeep_break_hash->{$type} ) { + $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1; + } + } - # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted - # pairs. But it isn't clear if this is possible because we don't know - # which sequences might actually start a weld. + # container tokens use the token as the key + else { + my $token = $rLL->[$KK]->[_TOKEN_]; + my $flag = $rkeep_break_hash->{$token}; + if ($flag) { - # Setup a hash to avoid instabilities with combination -lp -wn -pvt=2. - # We do this by reducing -vt=2 to -vt=1 where there could be a conflict - # with welding at the same tokens. - # See issues b1338, b1339, b1340, b1341, b1342, b1343. - if ($rOpts_line_up_parentheses) { + my $match = $flag eq '1' || $flag eq '*'; - # NOTE: just parens for now but this could be applied to all types if - # necessary. - if ( $opening_vertical_tightness{'('} == 2 ) { - my $rreduce_vertical_tightness_by_seqno = - $self->[_rreduce_vertical_tightness_by_seqno_]; - foreach my $item ( @{$rnested_pairs} ) { - my ( $inner_seqno, $outer_seqno ) = @{$item}; - if ( !$ris_excluded_lp_container->{$outer_seqno} ) { + # check for special matching codes + if ( !$match ) { + if ( $token eq '(' || $token eq ')' ) { + $match = $self->match_paren_control_flag( $seqno, $flag ); + } + elsif ( $token eq '{' || $token eq '}' ) { - # Set a flag which means that if a token has -vt=2 - # then reduce it to -vt=1. - $rreduce_vertical_tightness_by_seqno->{$outer_seqno} = 1; + # These tentative codes 'b' and 'B' for brace types are + # placeholders for possible future brace types. They + # are not documented and may be changed. + my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno}; + if ( $flag eq 'b' ) { $match = $block_type } + elsif ( $flag eq 'B' ) { $match = !$block_type } + else { + # unknown code - no match + } } } + if ($match) { + my $type = $rLL->[$KK]->[_TYPE_]; + $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1; + } } } + return; +} ## end sub check_for_old_break - my $rOpts_break_at_old_method_breakpoints = - $rOpts->{'break-at-old-method-breakpoints'}; +sub keep_old_line_breaks { - # This array will hold the sequence numbers of the tokens to be welded. - my @welds; + # Called once per file to find and mark any old line breaks which + # should be kept. We will be translating the input hashes into + # token indexes. - # Variables needed for estimating line lengths - my $maximum_text_length; # maximum spaces available for text - my $starting_lentot; # cumulative text to start of current line + # A flag is set as follows: + # = 1 make a hard break (flush the current batch) + # best for something like leading commas (-kbb=',') + # = 2 make a soft break (keep building current batch) + # best for something like leading -> - my $iline_outer_opening = -1; - my $weld_count_this_start = 0; + my ($self) = @_; - # OLD: $single_line_tol added to fix cases b1180 b1181 - # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0; - # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now - my $single_line_tol = 0; + my $rLL = $self->[_rLL_]; + my $rKrange_code_without_comments = + $self->[_rKrange_code_without_comments_]; + my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_]; + my $rbreak_after_Klast = $self->[_rbreak_after_Klast_]; + my $rbreak_container = $self->[_rbreak_container_]; - my $multiline_tol = $single_line_tol + 1 + - max( $rOpts_indent_columns, $rOpts_continuation_indentation ); + #---------------------------------------- + # Apply --break-at-old-method-breakpoints + #---------------------------------------- - # Define a welding cutoff level: do not start a weld if the inside - # container level equals or exceeds this level. + # This code moved here from sub break_lists to fix b1120 + if ( $rOpts->{'break-at-old-method-breakpoints'} ) { + foreach my $item ( @{$rKrange_code_without_comments} ) { + my ( $Kfirst, $Klast ) = @{$item}; + my $type = $rLL->[$Kfirst]->[_TYPE_]; + my $token = $rLL->[$Kfirst]->[_TOKEN_]; - # We use the minimum of two criteria, either of which may be more - # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and - # the 'beta' value is more restrictive in other cases (b1243). + # leading '->' use a value of 2 which causes a soft + # break rather than a hard break + if ( $type eq '->' ) { + $rbreak_before_Kfirst->{$Kfirst} = 2; + } - my $weld_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 3 ); + # leading ')->' use a special flag to insure that both + # opening and closing parens get opened + # Fix for b1120: only for parens, not braces + elsif ( $token eq ')' ) { + my $Kn = $self->K_next_nonblank($Kfirst); + next + unless ( defined($Kn) + && $Kn <= $Klast + && $rLL->[$Kn]->[_TYPE_] eq '->' ); + my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_]; + next unless ($seqno); - # The vertical tightness flags can throw off line length calculations. - # This patch was added to fix instability issue b1284. - # It works to always use a tol of 1 for 1 line block length tests, but - # this restricted value keeps test case wn6.wn working as before. - # It may be necessary to include '[' and '{' here in the future. - my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0; + # Note: in previous versions there was a fix here to avoid + # instability between conflicting -bom and -pvt or -pvtc flags. + # The fix skipped -bom for a small line difference. But this + # was troublesome, and instead the fix has been moved to + # sub set_vertical_tightness_flags where priority is given to + # the -bom flag over -pvt and -pvtc flags. Both opening and + # closing paren flags are involved because even though -bom only + # requests breaking before the closing paren, automated logic + # opens the opening paren when the closing paren opens. + # Relevant cases are b977, b1215, b1270, b1303 - my $length_to_opening_seqno = sub { - my ($seqno) = @_; - my $KK = $K_opening_container->{$seqno}; - my $lentot = defined($KK) - && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0; - return $lentot; - }; + $rbreak_container->{$seqno} = 1; + } + } + } - my $length_to_closing_seqno = sub { - my ($seqno) = @_; - my $KK = $K_closing_container->{$seqno}; - my $lentot = defined($KK) - && $KK > 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0; - return $lentot; - }; + #--------------------------------------------------------------------- + # Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after + #--------------------------------------------------------------------- - # Abbreviations: - # _oo=outer opening, i.e. first of { { - # _io=inner opening, i.e. second of { { - # _oc=outer closing, i.e. second of } { - # _ic=inner closing, i.e. first of } } + return unless ( %keep_break_before_type || %keep_break_after_type ); - my $previous_pair; + foreach my $item ( @{$rKrange_code_without_comments} ) { + my ( $Kfirst, $Klast ) = @{$item}; + $self->check_for_old_break( $Kfirst, \%keep_break_before_type, + $rbreak_before_Kfirst ); + $self->check_for_old_break( $Klast, \%keep_break_after_type, + $rbreak_after_Klast ); + } + return; +} ## end sub keep_old_line_breaks - # Main loop over nested pairs... - # We are working from outermost to innermost pairs so that - # level changes will be complete when we arrive at the inner pairs. - while ( my $item = pop( @{$rnested_pairs} ) ) { - my ( $inner_seqno, $outer_seqno ) = @{$item}; +sub weld_containers { - my $Kouter_opening = $K_opening_container->{$outer_seqno}; - my $Kinner_opening = $K_opening_container->{$inner_seqno}; - my $Kouter_closing = $K_closing_container->{$outer_seqno}; - my $Kinner_closing = $K_closing_container->{$inner_seqno}; + # Called once per file to do any welding operations requested by --weld* + # flags. + my ($self) = @_; - # RULE: do not weld if inner container has <= 3 tokens unless the next - # token is a heredoc (so we know there will be multiple lines) - if ( $Kinner_closing - $Kinner_opening <= 4 ) { - my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening); - next unless defined($Knext_nonblank); - my $type = $rLL->[$Knext_nonblank]->[_TYPE_]; - next unless ( $type eq 'h' ); - } + # This count is used to eliminate needless calls for weld checks elsewhere + $total_weld_count = 0; - my $outer_opening = $rLL->[$Kouter_opening]; - my $inner_opening = $rLL->[$Kinner_opening]; - my $outer_closing = $rLL->[$Kouter_closing]; - my $inner_closing = $rLL->[$Kinner_closing]; + return if ( $rOpts->{'indent-only'} ); + return unless ($rOpts_add_newlines); - # RULE: do not weld to a hash brace. The reason is that it has a very - # strong bond strength to the next token, so a line break after it - # may not work. Previously we allowed welding to something like @{ - # but that caused blinking states (cases b751, b779). - if ( $inner_opening->[_TYPE_] eq 'L' ) { - next; - } + # Important: sub 'weld_cuddled_blocks' must be called before + # sub 'weld_nested_containers'. This is because the cuddled option needs to + # use the original _LEVEL_ values of containers, but the weld nested + # containers changes _LEVEL_ of welded containers. - # RULE: do not weld to a square bracket which does not contain commas - if ( $inner_opening->[_TYPE_] eq '[' ) { - my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno}; - next unless ($rtype_count); - my $comma_count = $rtype_count->{','}; - next unless ($comma_count); + # Here is a good test case to be sure that both cuddling and welding + # are working and not interfering with each other: <> - # Do not weld if there is text before a '[' such as here: - # curr_opt ( @beg [2,5] ) - # It will not break into the desired sandwich structure. - # This fixes case b109, 110. - my $Kdiff = $Kinner_opening - $Kouter_opening; - next if ( $Kdiff > 2 ); - next - if ( $Kdiff == 2 - && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' ); + # perltidy -wn -ce - } + # if ($BOLD_MATH) { ( + # $labels, $comment, + # join( '', '', &make_math( $mode, '', '', $_ ), '' ) + # ) } else { ( + # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), + # $after + # ) } - # RULE: Avoid welding under stress. The idea is that we need to have a - # little space* within a welded container to avoid instability. Note - # that after each weld the level values are reduced, so long multiple - # welds can still be made. This rule will seldom be a limiting factor - # in actual working code. Fixes b1206, b1243. - my $inner_level = $inner_opening->[_LEVEL_]; - if ( $inner_level >= $weld_cutoff_level ) { next } + $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} ); - # Set flag saying if this pair starts a new weld - my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); + if ( $rOpts->{'weld-nested-containers'} ) { - # Set flag saying if this pair is adjacent to the previous nesting pair - # (even if previous pair was rejected as a weld) - my $touch_previous_pair = - defined($previous_pair) && $outer_seqno == $previous_pair->[0]; - $previous_pair = $item; + $self->weld_nested_containers(); - my $do_not_weld_rule = 0; - my $Msg = EMPTY_STRING; - my $is_one_line_weld; + $self->weld_nested_quotes(); + } - my $iline_oo = $outer_opening->[_LINE_INDEX_]; - my $iline_io = $inner_opening->[_LINE_INDEX_]; - my $iline_ic = $inner_closing->[_LINE_INDEX_]; - my $iline_oc = $outer_closing->[_LINE_INDEX_]; - my $token_oo = $outer_opening->[_TOKEN_]; - my $token_io = $inner_opening->[_TOKEN_]; + #------------------------------------------------------------- + # All welding is done. Finish setting up weld data structures. + #------------------------------------------------------------- - my $is_multiline_weld = - $iline_oo == $iline_io - && $iline_ic == $iline_oc - && $iline_io != $iline_ic; + my $rLL = $self->[_rLL_]; + my $rK_weld_left = $self->[_rK_weld_left_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_]; - if (DEBUG_WELD) { - my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_]; - my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_]; - $Msg .= <{$Kstart}; + + # An error here would be due to an incorrect initialization introduced + # in one of the above weld routines, like sub weld_nested. + if ( $Kend <= $Kstart ) { + Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n") + if (DEVEL_MODE); next; } - # If this pair is not adjacent to the previous pair (skipped or not), - # then measure lengths from the start of line of oo. - if ( - !$touch_previous_pair + # Set weld values for all tokens this welded pair + foreach ( $Kstart + 1 .. $Kend ) { + $rK_weld_left->{$_} = $Kstart; + } + foreach my $Kx ( $Kstart .. $Kend - 1 ) { + $rK_weld_right->{$Kx} = $Kend; + $rweld_len_right_at_K->{$Kx} = + $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - + $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; + } - # Also do this if restarting at a new line; fixes case b965, s001 - || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening ) - ) + # Remember the leftmost index of welds which continue to the right + if ( defined( $rK_weld_right->{$Kend} ) + && !defined( $rK_weld_left->{$Kstart} ) ) { + push @K_multi_weld, $Kstart; + } + } - # Remember the line we are using as a reference - $iline_outer_opening = $iline_oo; - $weld_count_this_start = 0; + # Second pass to process chains of welds (these are rare). + # This has to be processed in sorted order. + if (@K_multi_weld) { + my $Kend = -1; + foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) { - ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg ) - = $self->setup_new_weld_measurements( $Kouter_opening, - $Kinner_opening ); + # Skip any interior K which was originally missing a left link + next if ( $Kstart <= $Kend ); - if ( - !$new_weld_ok - && ( $iline_oo != $iline_io - || $iline_ic != $iline_oc ) - ) - { - if (DEBUG_WELD) { print $msg} - next; + # Find the end of this chain + $Kend = $rK_weld_right->{$Kstart}; + my $Knext = $rK_weld_right->{$Kend}; + while ( defined($Knext) ) { + $Kend = $Knext; + $Knext = $rK_weld_right->{$Kend}; } - my $rK_range = $rlines->[$iline_oo]->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; + # Set weld values this chain + foreach ( $Kstart + 1 .. $Kend ) { + $rK_weld_left->{$_} = $Kstart; + } + foreach my $Kx ( $Kstart .. $Kend - 1 ) { + $rK_weld_right->{$Kx} = $Kend; + $rweld_len_right_at_K->{$Kx} = + $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - + $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; + } + } + } - # An existing one-line weld is a line in which - # (1) the containers are all on one line, and - # (2) the line does not exceed the allowable length - if ( $iline_oo == $iline_oc ) { + return; +} ## end sub weld_containers - # All the tokens are on one line, now check their length. - # Start with the full line index range. We will reduce this - # in the coding below in some cases. - my $Kstart = $Kfirst; - my $Kstop = $Klast; +sub cumulative_length_before_K { + my ( $self, $KK ) = @_; + my $rLL = $self->[_rLL_]; + return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; +} - # Note that the following minimal choice for measuring will - # work and will not cause any instabilities because it is - # invariant: +sub weld_cuddled_blocks { + my ($self) = @_; - ## my $Kstart = $Kouter_opening; - ## my $Kstop = $Kouter_closing; + # Called once per file to handle cuddled formatting - # But that can lead to some undesirable welds. So a little - # more complicated method has been developed. + my $rK_weld_left = $self->[_rK_weld_left_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - # We are trying to avoid creating bad two-line welds when we are - # working on long, previously un-welded input text, such as + # This routine implements the -cb flag by finding the appropriate + # closing and opening block braces and welding them together. + return unless ( %{$rcuddled_block_types} ); - # INPUT (example of a long input line weld candidate): - ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label)); + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); - # GOOD two-line break: (not welded; result marked too long): - ## $mutation->transpos( - ## $self->RNA->position($mutation->label, $atg_label)); + my $rbreak_container = $self->[_rbreak_container_]; + my $ris_broken_container = $self->[_ris_broken_container_]; + my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_]; + my $K_closing_container = $self->[_K_closing_container_]; - # BAD two-line break: (welded; result if we weld): - ## $mutation->transpos($self->RNA->position( - ## $mutation->label, $atg_label)); + # A stack to remember open chains at all levels: This is a hash rather than + # an array for safety because negative levels can occur in files with + # errors. This allows us to keep processing with negative levels. + # $in_chain{$level} = [$chain_type, $type_sequence]; + my %in_chain; + my $CBO = $rOpts->{'cuddled-break-option'}; - # We can only get an approximate estimate of the final length, - # since the line breaks may change, and for -lp mode because - # even the indentation is not yet known. + # loop over structure items to find cuddled pairs + my $level = 0; + my $KNEXT = $self->[_K_first_seq_item_]; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + next if ( $KK == 0 ); # first token in file may not be container - my $level_first = $rLL->[$Kfirst]->[_LEVEL_]; - my $level_last = $rLL->[$Klast]->[_LEVEL_]; - my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; - my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_]; + # A fault here implies that an error was made in the little loop at + # the bottom of sub 'respace_tokens' which set the values of + # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the + # loop control lines above. + Fault("sequence = $type_sequence not defined at K=$KK") + if (DEVEL_MODE); + next; + } - # - measure to the end of the original line if balanced - # - measure to the closing container if unbalanced (fixes b1230) - #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing } - if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing } + # NOTE: we must use the original levels here. They can get changed + # by sub 'weld_nested_containers', so this routine must be called + # before sub 'weld_nested_containers'. + my $last_level = $level; + $level = $rtoken_vars->[_LEVEL_]; - # - measure from the start of the original line if balanced - # - measure from the most previous token with same level - # if unbalanced (b1232) - if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) { - $Kstart = $Kouter_opening; + if ( $level < $last_level ) { $in_chain{$last_level} = undef } + elsif ( $level > $last_level ) { $in_chain{$level} = undef } - foreach - my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) ) - { - next if ( $rLL->[$KK]->[_TYPE_] eq 'b' ); - last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo ); - $Kstart = $KK; - } - } + # We are only looking at code blocks + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + next unless ( $type eq $token ); - my $excess = - $self->excess_line_length_for_Krange( $Kstart, $Kstop ); + if ( $token eq '{' ) { - # Coding simplified here for case b1219. - # Increased tol from 0 to 1 when pvt>0 to fix b1284. - $is_one_line_weld = $excess <= $one_line_tol; + my $block_type = $rblock_type_of_seqno->{$type_sequence}; + if ( !$block_type ) { + + # patch for unrecognized block types which may not be labeled + my $Kp = $self->K_previous_nonblank($KK); + while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { + $Kp = $self->K_previous_nonblank($Kp); + } + next unless $Kp; + $block_type = $rLL->[$Kp]->[_TOKEN_]; } + if ( $in_chain{$level} ) { - # DO-NOT-WELD RULE 1: - # Do not weld something that looks like the start of a two-line - # function call, like this: <> - # $trans->add_transformation( - # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) ); - # We will look for a semicolon after the closing paren. + # we are in a chain and are at an opening block brace. + # See if we are welding this opening brace with the previous + # block brace. Get their identification numbers: + my $closing_seqno = $in_chain{$level}->[1]; + my $opening_seqno = $type_sequence; - # We want to weld something complex, like this though - # my $compass = uc( opposite_direction( line_to_canvas_direction( - # @{ $coords[0] }, @{ $coords[1] } ) ) ); - # Otherwise we will get a 'blinker'. For example, the following - # would become a blinker without this rule: - # $Self->_Add( $SortOrderDisplay{ $Field - # ->GenerateFieldForSelectSQL() } ); - # But it is okay to weld a two-line statement if it looks like - # it was already welded, meaning that the two opening containers are - # on a different line that the two closing containers. This is - # necessary to prevent blinking of something like this with - # perltidy -wn -pbp (starting indentation two levels deep): + # The preceding block must be on multiple lines so that its + # closing brace will start a new line. + if ( !$ris_broken_container->{$closing_seqno} + && !$rbreak_container->{$closing_seqno} ) + { + next unless ( $CBO == 2 ); + $rbreak_container->{$closing_seqno} = 1; + } - # $top_label->set_text( gettext( - # "Unable to create personal directory - check permissions.") ); - if ( $iline_oc == $iline_oo + 1 - && $iline_io == $iline_ic - && $token_oo eq '(' ) - { + # We can weld the closing brace to its following word .. + my $Ko = $K_closing_container->{$closing_seqno}; + my $Kon; + if ( defined($Ko) ) { + $Kon = $self->K_next_nonblank($Ko); + } - # Look for following semicolon... - my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing); - my $next_nonblank_type = - defined($Knext_nonblank) - ? $rLL->[$Knext_nonblank]->[_TYPE_] - : 'b'; - if ( $next_nonblank_type eq ';' ) { + # ..unless it is a comment + if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) { + + # OK to weld these two tokens... + $rK_weld_right->{$Ko} = $Kon; + $rK_weld_left->{$Kon} = $Ko; + + # Set flag that we want to break the next container + # so that the cuddled line is balanced. + $rbreak_container->{$opening_seqno} = 1 + if ($CBO); + + # Remember which braces are cuddled. + # The closing brace is used to set adjusted indentations. + # The opening brace is not yet used but might eventually + # be needed in setting adjusted indentation. + $ris_cuddled_closing_brace->{$closing_seqno} = 1; - # Then do not weld if no other containers between inner - # opening and closing. - my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_]; - if ( $Knext_seq_item == $Kinner_closing ) { - $do_not_weld_rule = 1; - } } - } - } ## end starting new weld sequence - else { + } + else { - # set the 1-line flag if continuing a weld sequence; fixes b1239 - $is_one_line_weld = ( $iline_oo == $iline_oc ); + # We are not in a chain. Start a new chain if we see the + # starting block type. + if ( $rcuddled_block_types->{$block_type} ) { + $in_chain{$level} = [ $block_type, $type_sequence ]; + } + else { + $block_type = '*'; + $in_chain{$level} = [ $block_type, $type_sequence ]; + } + } } + elsif ( $token eq '}' ) { + if ( $in_chain{$level} ) { - # DO-NOT-WELD RULE 2: - # Do not weld an opening paren to an inner one line brace block - # We will just use old line numbers for this test and require - # iterations if necessary for convergence + # We are in a chain at a closing brace. See if this chain + # continues.. + my $Knn = $self->K_next_code($KK); + next unless $Knn; - # For example, otherwise we could cause the opening paren - # in the following example to separate from the caller name - # as here: + my $chain_type = $in_chain{$level}->[0]; + my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; + if ( + $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} + ) + { - # $_[0]->code_handler - # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } ); + # Note that we do not weld yet because we must wait until + # we we are sure that an opening brace for this follows. + $in_chain{$level}->[1] = $type_sequence; + } + else { $in_chain{$level} = undef } + } + } + } + return; +} ## end sub weld_cuddled_blocks - # Here is another example where we do not want to weld: - # $wrapped->add_around_modifier( - # sub { push @tracelog => 'around 1'; $_[0]->(); } ); +sub find_nested_pairs { + my $self = shift; - # If the one line sub block gets broken due to length or by the - # user, then we can weld. The result will then be: - # $wrapped->add_around_modifier( sub { - # push @tracelog => 'around 1'; - # $_[0]->(); - # } ); + # This routine is called once per file to do preliminary work needed for + # the --weld-nested option. This information is also needed for adding + # semicolons. - # Updated to fix cases b1082 b1102 b1106 b1115: - # Also, do not weld to an intact inner block if the outer opening token - # is on a different line. For example, this prevents oscillation - # between these two states in case b1106: + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + my $Num = @{$rLL}; - # return map{ - # ($_,[$self->$_(@_[1..$#_])]) - # }@every; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - # return map { ( - # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ] - # ) } @every; + # We define an array of pairs of nested containers + my @nested_pairs; - # The effect of this change on typical code is very minimal. Sometimes - # it may take a second iteration to converge, but this gives protection - # against blinking. - if ( !$do_not_weld_rule - && !$is_one_line_weld - && $iline_ic == $iline_io ) - { - $do_not_weld_rule = 2 - if ( $token_oo eq '(' || $iline_oo != $iline_io ); - } + # Names of calling routines can either be marked as 'i' or 'w', + # and they may invoke a sub call with an '->'. We will consider + # any consecutive string of such types as a single unit when making + # weld decisions. We also allow a leading ! + my $is_name_type = { + 'i' => 1, + 'w' => 1, + 'U' => 1, + '->' => 1, + '!' => 1, + }; - # DO-NOT-WELD RULE 2A: - # Do not weld an opening asub brace in -lp mode if -asbl is set. This - # helps avoid instabilities in one-line block formation, and fixes - # b1241. Previously, the '$is_one_line_weld' flag was tested here - # instead of -asbl, and this fixed most cases. But it turns out that - # the real problem was the -asbl flag, and switching to this was - # necessary to fixe b1268. This also fixes b1269, b1277, b1278. - if ( - !$do_not_weld_rule - ##&& $is_one_line_weld - && $rOpts_line_up_parentheses - && $rOpts_asbl - && $ris_asub_block->{$outer_seqno} - ) - { - $do_not_weld_rule = '2A'; + # Loop over all closing container tokens + foreach my $inner_seqno ( keys %{$K_closing_container} ) { + my $K_inner_closing = $K_closing_container->{$inner_seqno}; + + # See if it is immediately followed by another, outer closing token + my $K_outer_closing = $K_inner_closing + 1; + $K_outer_closing += 1 + if ( $K_outer_closing < $Num + && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' ); + + next unless ( $K_outer_closing < $Num ); + my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_]; + next unless ($outer_seqno); + my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_]; + next unless ( $is_closing_token{$token_outer_closing} ); + + # Simple filter: No commas or semicolons in the outer container + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno}; + if ($rtype_count) { + next if ( $rtype_count->{','} || $rtype_count->{';'} ); } - # DO-NOT-WELD RULE 3: - # Do not weld if this makes our line too long. - # Use a tolerance which depends on if the old tokens were welded - # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759) - if ( !$do_not_weld_rule ) { + # Now we have to check the opening tokens. + my $K_outer_opening = $K_opening_container->{$outer_seqno}; + my $K_inner_opening = $K_opening_container->{$inner_seqno}; + next unless defined($K_outer_opening) && defined($K_inner_opening); - # Measure to a little beyond the inner opening token if it is - # followed by a bare word, which may have unusual line break rules. + my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno}; + my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno}; - # NOTE: Originally this was OLD RULE 6: do not weld to a container - # which is followed on the same line by an unknown bareword token. - # This can cause blinkers (cases b626, b611). But OK to weld one - # line welds to fix cases b1057 b1064. For generality, OLD RULE 6 - # has been merged into RULE 3 here to also fix cases b1078 b1091. + # Verify that the inner opening token is the next container after the + # outer opening token. + my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_]; + next unless defined($K_io_check); + if ( $K_io_check != $K_inner_opening ) { - my $K_for_length = $Kinner_opening; - my $Knext_io = $self->K_next_nonblank($Kinner_opening); - next unless ( defined($Knext_io) ); # shouldn't happen - my $type_io_next = $rLL->[$Knext_io]->[_TYPE_]; + # The inner opening container does not immediately follow the outer + # opening container, but we may still allow a weld if they are + # separated by a sub signature. For example, we may have something + # like this, where $K_io_check may be at the first 'x' instead of + # 'io'. So we need to hop over the signature and see if we arrive + # at 'io'. - # Note: may need to eventually also include other types here, - # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) { - if ( $type_io_next eq 'w' ) { - my $Knext_io2 = $self->K_next_nonblank($Knext_io); - next unless ( defined($Knext_io2) ); - my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_]; - if ( !$type_ok_after_bareword{$type_io_next2} ) { - $K_for_length = $Knext_io2; - } - } + # oo io + # | x x | + # $obj->then( sub ( $code ) { + # ... + # return $c->render(text => '', status => $code); + # } ); + # | | + # ic oc - # Use a tolerance for welds over multiple lines to avoid blinkers. - # We can use zero tolerance if it looks like we are working on an - # existing weld. - my $tol = - $is_one_line_weld || $is_multiline_weld - ? $single_line_tol - : $multiline_tol; + next if ( !$inner_blocktype || $inner_blocktype ne 'sub' ); + next if $rLL->[$K_io_check]->[_TOKEN_] ne '('; + my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_]; + next unless defined($seqno_signature); + my $K_signature_closing = $K_closing_container->{$seqno_signature}; + next unless defined($K_signature_closing); + my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_]; + next + unless ( defined($K_test) && $K_test == $K_inner_opening ); - # By how many characters does this exceed the text window? - my $excess = - $self->cumulative_length_before_K($K_for_length) - - $starting_lentot + 1 + $tol - - $maximum_text_length; + # OK, we have arrived at 'io' in the above diagram. We should put + # a limit on the length or complexity of the signature here. There + # is no perfect way to do this, one way is to put a limit on token + # count. For consistency with older versions, we should allow a + # signature with a single variable to weld, but not with + # multiple variables. A single variable as in 'sub ($code) {' can + # have a $Kdiff of 2 to 4, depending on spacing. - # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998 - # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018 - # Revised patch: New tolerance definition allows going back to '> 0' - # here. This fixes case b1124. See also cases b1087 and b1087a. - if ( $excess > 0 ) { $do_not_weld_rule = 3 } + # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to + # 7, depending on spacing. So to keep formatting consistent with + # previous versions, we will also avoid welding if there is a comma + # in the signature. - if (DEBUG_WELD) { - $Msg .= -"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n"; - } + my $Kdiff = $K_signature_closing - $K_io_check; + next if ( $Kdiff > 4 ); + + # backup comma count test; but we cannot get here with Kdiff<=4 + my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature}; + next if ( $rtc && $rtc->{','} ); } - # DO-NOT-WELD RULE 4; implemented for git#10: - # Do not weld an opening -ce brace if the next container is on a single - # line, different from the opening brace. (This is very rare). For - # example, given the following with -ce, we will avoid joining the { - # and [ + # Yes .. this is a possible nesting pair. + # They can be separated by a small amount. + my $K_diff = $K_inner_opening - $K_outer_opening; - # } else { - # [ $_, length($_) ] - # } + # Count the number of nonblank characters separating them. + # Note: the $nonblank_count includes the inner opening container + # but not the outer opening container, so it will be >= 1. + if ( $K_diff < 0 ) { next } # Shouldn't happen + my $nonblank_count = 0; + my $type; + my $is_name; - # because this would produce a terminal one-line block: + # Here is an example of a long identifier chain which counts as a + # single nonblank here (this spans about 10 K indexes): + # if ( !Boucherot::SetOfConnections->new->handler->execute( + # ^--K_o_o ^--K_i_o + # @array) ) + my $Kn_first = $K_outer_opening; + my $Kn_last_nonblank; + my $saw_comment; - # } else { [ $_, length($_) ] } + foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) { + next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' ); + if ( !$nonblank_count ) { $Kn_first = $Kn } + if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; } + $Kn_last_nonblank = $Kn; - # which may not be what is desired. But given this input: + # skip chain of identifier tokens + my $last_type = $type; + my $last_is_name = $is_name; + $type = $rLL->[$Kn]->[_TYPE_]; + if ( $type eq '#' ) { $saw_comment = 1; last } + $is_name = $is_name_type->{$type}; + next if ( $is_name && $last_is_name ); - # } else { [ $_, length($_) ] } + # do not count a possible leading - of bareword hash key + next if ( $type eq 'm' && !$last_type ); - # then we will do the weld and retain the one-line block - if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) { - my $block_type = $rblock_type_of_seqno->{$outer_seqno}; - if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) { - my $io_line = $inner_opening->[_LINE_INDEX_]; - my $ic_line = $inner_closing->[_LINE_INDEX_]; - my $oo_line = $outer_opening->[_LINE_INDEX_]; - if ( $oo_line < $io_line && $ic_line == $io_line ) { - $do_not_weld_rule = 4; - } - } + $nonblank_count++; + last if ( $nonblank_count > 2 ); } - # DO-NOT-WELD RULE 5: do not include welds excluded by user - if ( - !$do_not_weld_rule - && %weld_nested_exclusion_rules - && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld ) - || $self->is_excluded_weld( $Kinner_opening, 0 ) ) - ) + # Do not weld across a comment .. fix for c058. + next if ($saw_comment); + + # Patch for b1104: do not weld to a paren preceded by sort/map/grep + # because the special line break rules may cause a blinking state + if ( defined($Kn_last_nonblank) + && $rLL->[$K_inner_opening]->[_TOKEN_] eq '(' + && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' ) { - $do_not_weld_rule = 5; + my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_]; + + # Turn off welding at sort/map/grep ( + if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 } } - # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above. + my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_]; - # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom - # (case b973) - if ( !$do_not_weld_rule - && $rOpts_break_at_old_method_breakpoints - && $iline_io > $iline_oo ) - { + if ( - foreach my $iline ( $iline_oo + 1 .. $iline_io ) { - my $rK_range = $rlines->[$iline]->{_rK_range}; - next unless defined($rK_range); - my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless defined($Kfirst); - if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) { - $do_not_weld_rule = 7; - last; - } - } - } + # 1: adjacent opening containers, like: do {{ + $nonblank_count == 1 - if ($do_not_weld_rule) { + # 2. anonymous sub + prototype or sig: )->then( sub ($code) { + # ... but it seems best not to stack two structural blocks, like + # this + # sub make_anon_with_my_sub { sub { + # because it probably hides the structure a little too much. + || ( $inner_blocktype + && $inner_blocktype eq 'sub' + && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub' + && !$outer_blocktype ) - # After neglecting a pair, we start measuring from start of point - # io ... but not if previous type does not like to be separated - # from its container (fixes case b1184) - my $Kprev = $self->K_previous_nonblank($Kinner_opening); - my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w'; - if ( !$has_tight_paren{$type_prev} ) { - my $starting_level = $inner_opening->[_LEVEL_]; - my $starting_ci_level = $inner_opening->[_CI_LEVEL_]; - $starting_lentot = - $self->cumulative_length_before_K($Kinner_opening); - $maximum_text_length = - $maximum_text_length_at_level[$starting_level] - - $starting_ci_level * $rOpts_continuation_indentation; - } - - if (DEBUG_WELD) { - $Msg .= "Not welding due to RULE $do_not_weld_rule\n"; - print $Msg; - } + # 3. short item following opening paren, like: fun( yyy ( + || $nonblank_count == 2 && $token_oo eq '(' - # Normally, a broken pair should not decrease indentation of - # intermediate tokens: - ## if ( $last_pair_broken ) { next } - # However, for long strings of welded tokens, such as '{{{{{{...' - # we will allow broken pairs to also remove indentation. - # This will keep very long strings of opening and closing - # braces from marching off to the right. We will do this if the - # number of tokens in a weld before the broken weld is 4 or more. - # This rule will mainly be needed for test scripts, since typical - # welds have fewer than about 4 welded tokens. - if ( !@welds || @{ $welds[-1] } < 4 ) { next } + # 4. weld around fat commas, if requested (git #108), such as + # elf->call_method( method_name_foo => { + || ( $type eq '=>' + && $nonblank_count <= 3 + && %weld_fat_comma_rules + && $weld_fat_comma_rules{$token_oo} ) + ) + { + push @nested_pairs, + [ $inner_seqno, $outer_seqno, $K_inner_closing ]; } + next; + } - # otherwise start new weld ... - elsif ($starting_new_weld) { - $weld_count_this_start++; - if (DEBUG_WELD) { - $Msg .= "Starting new weld\n"; - print $Msg; - } - push @welds, $item; + # The weld routine expects the pairs in order in the form + # [$seqno_inner, $seqno_outer] + # And they must be in the same order as the inner closing tokens + # (otherwise, welds of three or more adjacent tokens will not work). The K + # value of this inner closing token has temporarily been stored for + # sorting. + @nested_pairs = - $rK_weld_right->{$Kouter_opening} = $Kinner_opening; - $rK_weld_left->{$Kinner_opening} = $Kouter_opening; + # Drop the K index after sorting (it would cause trouble downstream) + map { [ $_->[0], $_->[1] ] } - $rK_weld_right->{$Kinner_closing} = $Kouter_closing; - $rK_weld_left->{$Kouter_closing} = $Kinner_closing; - } + # Sort on the K values + sort { $a->[2] <=> $b->[2] } @nested_pairs; - # ... or extend current weld - else { - $weld_count_this_start++; - if (DEBUG_WELD) { - $Msg .= "Extending current weld\n"; - print $Msg; - } - unshift @{ $welds[-1] }, $inner_seqno; - $rK_weld_right->{$Kouter_opening} = $Kinner_opening; - $rK_weld_left->{$Kinner_opening} = $Kouter_opening; + return \@nested_pairs; +} ## end sub find_nested_pairs - $rK_weld_right->{$Kinner_closing} = $Kouter_closing; - $rK_weld_left->{$Kouter_closing} = $Kinner_closing; - } +sub match_paren_control_flag { - # After welding, reduce the indentation level if all intermediate tokens - my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_]; - if ( $dlevel != 0 ) { - my $Kstart = $Kinner_opening; - my $Kstop = $Kinner_closing; - foreach my $KK ( $Kstart .. $Kstop ) { - $rLL->[$KK]->[_LEVEL_] += $dlevel; - } + # Decide if this paren is excluded by user request: + # undef matches no parens + # '*' matches all parens + # 'k' matches only if the previous nonblank token is a perl builtin + # keyword (such as 'if', 'while'), + # 'K' matches if 'k' does not, meaning if the previous token is not a + # keyword. + # 'f' matches if the previous token is a function other than a keyword. + # 'F' matches if 'f' does not. + # 'w' matches if either 'k' or 'f' match. + # 'W' matches if 'w' does not. + my ( $self, $seqno, $flag, $rLL ) = @_; - # Copy opening ci level to help break at = for -lp mode (case b1124) - $rLL->[$Kinner_opening]->[_CI_LEVEL_] = - $rLL->[$Kouter_opening]->[_CI_LEVEL_]; + # Input parameters: + # $seqno = sequence number of the container (should be paren) + # $flag = the flag which defines what matches + # $rLL = an optional alternate token list needed for respace operations + $rLL = $self->[_rLL_] unless ( defined($rLL) ); - # But do not copy the closing ci level ... it can give poor results - ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] = - ## $rLL->[$Kouter_closing]->[_CI_LEVEL_]; - } - } + return 0 unless ( defined($flag) ); + return 0 if $flag eq '0'; + return 1 if $flag eq '1'; + return 1 if $flag eq '*'; + return 0 unless ($seqno); + my $K_opening = $self->[_K_opening_container_]->{$seqno}; + return unless ( defined($K_opening) ); - return; -} ## end sub weld_nested_containers + my ( $is_f, $is_k, $is_w ); + my $Kp = $self->K_previous_nonblank( $K_opening, $rLL ); + if ( defined($Kp) ) { + my $type_p = $rLL->[$Kp]->[_TYPE_]; -sub weld_nested_quotes { + # keyword? + $is_k = $type_p eq 'k'; - # Called once per file for option '--weld-nested-containers'. This - # does welding on qw quotes. + # function call? + $is_f = $self->[_ris_function_call_paren_]->{$seqno}; - my $self = shift; + # either keyword or function call? + $is_w = $is_k || $is_f; + } + my $match; + if ( $flag eq 'k' ) { $match = $is_k } + elsif ( $flag eq 'K' ) { $match = !$is_k } + elsif ( $flag eq 'f' ) { $match = $is_f } + elsif ( $flag eq 'F' ) { $match = !$is_f } + elsif ( $flag eq 'w' ) { $match = $is_w } + elsif ( $flag eq 'W' ) { $match = !$is_w } + return $match; +} ## end sub match_paren_control_flag - # See if quotes are excluded from welding - my $rflags = $weld_nested_exclusion_rules{'q'}; - return if ( defined($rflags) && defined( $rflags->[1] ) ); +sub is_excluded_weld { - my $rK_weld_left = $self->[_rK_weld_left_]; - my $rK_weld_right = $self->[_rK_weld_right_]; + # decide if this weld is excluded by user request + my ( $self, $KK, $is_leading ) = @_; + my $rLL = $self->[_rLL_]; + my $rtoken_vars = $rLL->[$KK]; + my $token = $rtoken_vars->[_TOKEN_]; + my $rflags = $weld_nested_exclusion_rules{$token}; + return 0 unless ( defined($rflags) ); + my $flag = $is_leading ? $rflags->[0] : $rflags->[1]; + return 0 unless ( defined($flag) ); + return 1 if $flag eq '*'; + my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + return $self->match_paren_control_flag( $seqno, $flag ); +} ## end sub is_excluded_weld - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); - my $Num = @{$rLL}; +# hashes to simplify welding logic +my %type_ok_after_bareword; +my %has_tight_paren; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rlines = $self->[_rlines_]; +BEGIN { - my $starting_lentot; - my $maximum_text_length; + # types needed for welding RULE 6 + my @q = qw# => -> { ( [ #; + @type_ok_after_bareword{@q} = (1) x scalar(@q); - my $is_single_quote = sub { - my ( $Kbeg, $Kend, $quote_type ) = @_; - foreach my $K ( $Kbeg .. $Kend ) { - my $test_type = $rLL->[$K]->[_TYPE_]; - next if ( $test_type eq 'b' ); - return if ( $test_type ne $quote_type ); - } - return 1; - }; + # these types do not 'like' to be separated from a following paren + @q = qw(w i q Q G C Z U); + @{has_tight_paren}{@q} = (1) x scalar(@q); +} ## end BEGIN - # Length tolerance - same as previously used for sub weld_nested - my $multiline_tol = - 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation ); +use constant DEBUG_WELD => 0; - # look for single qw quotes nested in containers - my $KNEXT = $self->[_K_first_seq_item_]; - while ( defined($KNEXT) ) { - my $KK = $KNEXT; - $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; - my $rtoken_vars = $rLL->[$KK]; - my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; - if ( !$outer_seqno ) { - next if ( $KK == 0 ); # first token in file may not be container +sub setup_new_weld_measurements { - # A fault here implies that an error was made in the little loop at - # the bottom of sub 'respace_tokens' which set the values of - # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the - # loop control lines above. - Fault("sequence = $outer_seqno not defined at K=$KK") - if (DEVEL_MODE); - next; - } + # Define quantities to check for excess line lengths when welded. + # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes' - my $token = $rtoken_vars->[_TOKEN_]; - if ( $is_opening_token{$token} ) { + my ( $self, $Kouter_opening, $Kinner_opening ) = @_; - # see if the next token is a quote of some type - my $Kn = $KK + 1; - $Kn += 1 - if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' ); - next unless ( $Kn < $Num ); + # Given indexes of outer and inner opening containers to be welded: + # $Kouter_opening, $Kinner_opening - my $next_token = $rLL->[$Kn]->[_TOKEN_]; - my $next_type = $rLL->[$Kn]->[_TYPE_]; - next - unless ( ( $next_type eq 'q' || $next_type eq 'Q' ) - && $next_token =~ /^q/ ); + # Returns these variables: + # $new_weld_ok = true (new weld ok) or false (do not start new weld) + # $starting_indent = starting indentation + # $starting_lentot = starting cumulative length + # $msg = diagnostic message for debugging - # The token before the closing container must also be a quote - my $Kouter_closing = $K_closing_container->{$outer_seqno}; - my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing); - next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type; + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; - # This is an inner opening container - my $Kinner_opening = $Kn; + my $starting_level; + my $starting_ci; + my $starting_lentot; + my $maximum_text_length; + my $msg = EMPTY_STRING; - # Do not weld to single-line quotes. Nothing is gained, and it may - # look bad. - next if ( $Kinner_closing == $Kinner_opening ); + my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_]; + my $rK_range = $rlines->[$iline_oo]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; - # Only weld to quotes delimited with container tokens. This is - # because welding to arbitrary quote delimiters can produce code - # which is less readable than without welding. - my $closing_delimiter = - substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 ); - next - unless ( $is_closing_token{$closing_delimiter} - || $closing_delimiter eq '>' ); + #------------------------------------------------------------------------- + # We now define a reference index, '$Kref', from which to start measuring + # This choice turns out to be critical for keeping welds stable during + # iterations, so we go through a number of STEPS... + #------------------------------------------------------------------------- - # Now make sure that there is just a single quote in the container - next - unless ( - $is_single_quote->( + # STEP 1: Our starting guess is to use measure from the first token of the + # current line. This is usually a good guess. + my $Kref = $Kfirst; + + # STEP 2: See if we should go back a little farther + my $Kprev = $self->K_previous_nonblank($Kfirst); + if ( defined($Kprev) ) { + + # Avoid measuring from between an opening paren and a previous token + # which should stay close to it ... fixes b1185 + my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_]; + my $type_prev = $rLL->[$Kprev]->[_TYPE_]; + if ( $Kouter_opening == $Kfirst + && $token_oo eq '(' + && $has_tight_paren{$type_prev} ) + { + $Kref = $Kprev; + } + + # Back up and count length from a token like '=' or '=>' if -lp + # is used (this fixes b520) + # ...or if a break is wanted before there + elsif ($rOpts_line_up_parentheses + || $want_break_before{$type_prev} ) + { + + # If there are other sequence items between the start of this line + # and the opening token in question, then do not include tokens on + # the previous line in length calculations. This check added to + # fix case b1174 which had a '?' on the line + my $no_previous_seq_item = $Kref == $Kouter_opening + || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening; + + if ( $no_previous_seq_item + && substr( $type_prev, 0, 1 ) eq '=' ) + { + $Kref = $Kprev; + + # Fix for b1144 and b1112: backup to the first nonblank + # character before the =>, or to the start of its line. + if ( $type_prev eq '=>' ) { + my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_]; + my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range}; + my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev}; + foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) { + next if ( $rLL->[$KK]->[_TYPE_] eq 'b' ); + $Kref = $KK; + last; + } + } + } + } + } + + # STEP 3: Now look ahead for a ternary and, if found, use it. + # This fixes case b1182. + # Also look for a ')' at the same level and, if found, use it. + # This fixes case b1224. + if ( $Kref < $Kouter_opening ) { + my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_]; + my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; + while ( $Knext < $Kouter_opening ) { + if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) { + if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] } + || $rLL->[$Knext]->[_TOKEN_] eq ')' ) + { + $Kref = $Knext; + last; + } + } + $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_]; + } + } + + # Define the starting measurements we will need + $starting_lentot = + $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_]; + $starting_level = $rLL->[$Kref]->[_LEVEL_]; + $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_]; + + $maximum_text_length = $maximum_text_length_at_level[$starting_level] - + $starting_ci * $rOpts_continuation_indentation; + + # STEP 4: Switch to using the outer opening token as the reference + # point if a line break before it would make a longer line. + # Fixes case b1055 and is also an alternate fix for b1065. + my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; + if ( $Kref < $Kouter_opening ) { + my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_]; + my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_]; + my $maximum_text_length_oo = + $maximum_text_length_at_level[$starting_level_oo] - + $starting_ci_oo * $rOpts_continuation_indentation; + + # The excess length to any cumulative length K = lenK is either + # $excess = $lenk - ($lentot + $maximum_text_length), or + # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo), + # so the worst case (maximum excess) corresponds to the configuration + # with minimum value of the sum: $lentot + $maximum_text_length + if ( $lentot_oo + $maximum_text_length_oo < + $starting_lentot + $maximum_text_length ) + { + $Kref = $Kouter_opening; + $starting_level = $starting_level_oo; + $starting_ci = $starting_ci_oo; + $starting_lentot = $lentot_oo; + $maximum_text_length = $maximum_text_length_oo; + } + } + + my $new_weld_ok = 1; + + # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The + # combination -wn -lp -dws -naws does not work well and can cause blinkers. + # It will probably only occur in stress testing. For this situation we + # will only start a new weld if we start at a 'good' location. + # - Added 'if' to fix case b1032. + # - Require blank before certain previous characters to fix b1111. + # - Add ';' to fix case b1139 + # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162. + # - relaxed constraints for b1227 + # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353 + # - added skip if type is 'Q' for b1447 + if ( $starting_ci + && $rOpts_line_up_parentheses + && $rOpts_delete_old_whitespace + && !$rOpts_add_whitespace + && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q' + && $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q' + && defined($Kprev) ) + { + my $type_first = $rLL->[$Kfirst]->[_TYPE_]; + my $token_first = $rLL->[$Kfirst]->[_TOKEN_]; + my $type_prev = $rLL->[$Kprev]->[_TYPE_]; + my $type_pp = 'b'; + if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] } + unless ( + $type_prev =~ /^[\,\.\;]/ + || $type_prev =~ /^[=\{\[\(\L]/ + && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' ) + || $type_first =~ /^[=\,\.\;\{\[\(\L]/ + || $type_first eq '||' + || ( + $type_first eq 'k' + && ( $token_first eq 'if' + || $token_first eq 'or' ) + ) + ) + { + $msg = +"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n"; + $new_weld_ok = 0; + } + } + return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg ); +} ## end sub setup_new_weld_measurements + +sub excess_line_length_for_Krange { + my ( $self, $Kfirst, $Klast ) = @_; + + # returns $excess_length = + # by how many characters a line composed of tokens $Kfirst .. $Klast will + # exceed the allowed line length + + my $rLL = $self->[_rLL_]; + my $length_before_Kfirst = + $Kfirst <= 0 + ? 0 + : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; + + # backup before a side comment if necessary + my $Kend = $Klast; + if ( $rOpts_ignore_side_comment_lengths + && $rLL->[$Klast]->[_TYPE_] eq '#' ) + { + my $Kprev = $self->K_previous_nonblank($Klast); + if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev } + } + + # get the length of the text + my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst; + + # get the size of the text window + my $level = $rLL->[$Kfirst]->[_LEVEL_]; + my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_]; + my $max_text_length = $maximum_text_length_at_level[$level] - + $ci_level * $rOpts_continuation_indentation; + + my $excess_length = $length - $max_text_length; + + DEBUG_WELD + && print +"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n"; + return ($excess_length); +} ## end sub excess_line_length_for_Krange + +sub weld_nested_containers { + my ($self) = @_; + + # Called once per file for option '--weld-nested-containers' + + my $rK_weld_left = $self->[_rK_weld_left_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + + # This routine implements the -wn flag by "welding together" + # the nested closing and opening tokens which were previously + # identified by sub 'find_nested_pairs'. "welding" simply + # involves setting certain hash values which will be checked + # later during formatting. + + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $ris_asub_block = $self->[_ris_asub_block_]; + my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_]; + + my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; + + # Find nested pairs of container tokens for any welding. + my $rnested_pairs = $self->find_nested_pairs(); + + # Return unless there are nested pairs to weld + return unless defined($rnested_pairs) && @{$rnested_pairs}; + + # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted + # pairs. But it isn't clear if this is possible because we don't know + # which sequences might actually start a weld. + + my $rOpts_break_at_old_method_breakpoints = + $rOpts->{'break-at-old-method-breakpoints'}; + + # This array will hold the sequence numbers of the tokens to be welded. + my @welds; + + # Variables needed for estimating line lengths + my $maximum_text_length; # maximum spaces available for text + my $starting_lentot; # cumulative text to start of current line + + my $iline_outer_opening = -1; + my $weld_count_this_start = 0; + + # OLD: $single_line_tol added to fix cases b1180 b1181 + # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0; + # NEW: $single_line_tol=0; fixes b1212 and b1180-1181 work now + my $single_line_tol = 0; + + my $multiline_tol = $single_line_tol + 1 + + max( $rOpts_indent_columns, $rOpts_continuation_indentation ); + + # Define a welding cutoff level: do not start a weld if the inside + # container level equals or exceeds this level. + + # We use the minimum of two criteria, either of which may be more + # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and + # the 'beta' value is more restrictive in other cases (b1243). + # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously: + # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2); + # This is now '$high_stress_level'. + + # The vertical tightness flags can throw off line length calculations. + # This patch was added to fix instability issue b1284. + # It works to always use a tol of 1 for 1 line block length tests, but + # this restricted value keeps test case wn6.wn working as before. + # It may be necessary to include '[' and '{' here in the future. + my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0; + + # Abbreviations: + # _oo=outer opening, i.e. first of { { + # _io=inner opening, i.e. second of { { + # _oc=outer closing, i.e. second of } { + # _ic=inner closing, i.e. first of } } + + my $previous_pair; + + # Main loop over nested pairs... + # We are working from outermost to innermost pairs so that + # level changes will be complete when we arrive at the inner pairs. + while ( my $item = pop( @{$rnested_pairs} ) ) { + my ( $inner_seqno, $outer_seqno ) = @{$item}; + + my $Kouter_opening = $K_opening_container->{$outer_seqno}; + my $Kinner_opening = $K_opening_container->{$inner_seqno}; + my $Kouter_closing = $K_closing_container->{$outer_seqno}; + my $Kinner_closing = $K_closing_container->{$inner_seqno}; + + # RULE: do not weld if inner container has <= 3 tokens unless the next + # token is a heredoc (so we know there will be multiple lines) + if ( $Kinner_closing - $Kinner_opening <= 4 ) { + my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening); + next unless defined($Knext_nonblank); + my $type = $rLL->[$Knext_nonblank]->[_TYPE_]; + next unless ( $type eq 'h' ); + } + + my $outer_opening = $rLL->[$Kouter_opening]; + my $inner_opening = $rLL->[$Kinner_opening]; + my $outer_closing = $rLL->[$Kouter_closing]; + my $inner_closing = $rLL->[$Kinner_closing]; + + # RULE: do not weld to a hash brace. The reason is that it has a very + # strong bond strength to the next token, so a line break after it + # may not work. Previously we allowed welding to something like @{ + # but that caused blinking states (cases b751, b779). + if ( $inner_opening->[_TYPE_] eq 'L' ) { + next; + } + + # RULE: do not weld to a square bracket which does not contain commas + if ( $inner_opening->[_TYPE_] eq '[' ) { + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno}; + next unless ( $rtype_count && $rtype_count->{','} ); + + # Do not weld if there is text before a '[' such as here: + # curr_opt ( @beg [2,5] ) + # It will not break into the desired sandwich structure. + # This fixes case b109, 110. + my $Kdiff = $Kinner_opening - $Kouter_opening; + next if ( $Kdiff > 2 ); + next + if ( $Kdiff == 2 + && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' ); + + } + + # RULE: Avoid welding under stress. The idea is that we need to have a + # little space* within a welded container to avoid instability. Note + # that after each weld the level values are reduced, so long multiple + # welds can still be made. This rule will seldom be a limiting factor + # in actual working code. Fixes b1206, b1243. + my $inner_level = $inner_opening->[_LEVEL_]; + if ( $inner_level >= $high_stress_level ) { next } + + # Set flag saying if this pair starts a new weld + my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); + + # Set flag saying if this pair is adjacent to the previous nesting pair + # (even if previous pair was rejected as a weld) + my $touch_previous_pair = + defined($previous_pair) && $outer_seqno == $previous_pair->[0]; + $previous_pair = $item; + + my $do_not_weld_rule = 0; + my $Msg = EMPTY_STRING; + my $is_one_line_weld; + + my $iline_oo = $outer_opening->[_LINE_INDEX_]; + my $iline_io = $inner_opening->[_LINE_INDEX_]; + my $iline_ic = $inner_closing->[_LINE_INDEX_]; + my $iline_oc = $outer_closing->[_LINE_INDEX_]; + my $token_oo = $outer_opening->[_TOKEN_]; + my $token_io = $inner_opening->[_TOKEN_]; + + # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom + # Added for case b973. Moved here from below to fix b1423. + if ( !$do_not_weld_rule + && $rOpts_break_at_old_method_breakpoints + && $iline_io > $iline_oo ) + { + + foreach my $iline ( $iline_oo + 1 .. $iline_io ) { + my $rK_range = $rlines->[$iline]->{_rK_range}; + next unless defined($rK_range); + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless defined($Kfirst); + if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) { + $do_not_weld_rule = 7; + last; + } + } + } + next if ($do_not_weld_rule); + + # Turn off vertical tightness at possible one-line welds. Fixes b1402, + # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339, + # b1340, b1341, b1342, b1343, which previously used a separate fix. + # Issue c161 is the latest and simplest check, using + # $iline_ic==$iline_io as the test. + if ( %opening_vertical_tightness + && $iline_ic == $iline_io + && $opening_vertical_tightness{$token_oo} ) + { + $rmax_vertical_tightness->{$outer_seqno} = 0; + } + + my $is_multiline_weld = + $iline_oo == $iline_io + && $iline_ic == $iline_oc + && $iline_io != $iline_ic; + + if (DEBUG_WELD) { + my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_]; + my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_]; + $Msg .= < $iline_outer_opening ) + ) + { + + # Remember the line we are using as a reference + $iline_outer_opening = $iline_oo; + $weld_count_this_start = 0; + + ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg ) + = $self->setup_new_weld_measurements( $Kouter_opening, + $Kinner_opening ); + + if ( + !$new_weld_ok + && ( $iline_oo != $iline_io + || $iline_ic != $iline_oc ) + ) + { + if (DEBUG_WELD) { print $msg} + next; + } + + my $rK_range = $rlines->[$iline_oo]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + + # An existing one-line weld is a line in which + # (1) the containers are all on one line, and + # (2) the line does not exceed the allowable length + if ( $iline_oo == $iline_oc ) { + + # All the tokens are on one line, now check their length. + # Start with the full line index range. We will reduce this + # in the coding below in some cases. + my $Kstart = $Kfirst; + my $Kstop = $Klast; + + # Note that the following minimal choice for measuring will + # work and will not cause any instabilities because it is + # invariant: + + ## my $Kstart = $Kouter_opening; + ## my $Kstop = $Kouter_closing; + + # But that can lead to some undesirable welds. So a little + # more complicated method has been developed. + + # We are trying to avoid creating bad two-line welds when we are + # working on long, previously un-welded input text, such as + + # INPUT (example of a long input line weld candidate): + ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label)); + + # GOOD two-line break: (not welded; result marked too long): + ## $mutation->transpos( + ## $self->RNA->position($mutation->label, $atg_label)); + + # BAD two-line break: (welded; result if we weld): + ## $mutation->transpos($self->RNA->position( + ## $mutation->label, $atg_label)); + + # We can only get an approximate estimate of the final length, + # since the line breaks may change, and for -lp mode because + # even the indentation is not yet known. + + my $level_first = $rLL->[$Kfirst]->[_LEVEL_]; + my $level_last = $rLL->[$Klast]->[_LEVEL_]; + my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; + my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_]; + + # - measure to the end of the original line if balanced + # - measure to the closing container if unbalanced (fixes b1230) + #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing } + if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing } + + # - measure from the start of the original line if balanced + # - measure from the most previous token with same level + # if unbalanced (b1232) + if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) { + $Kstart = $Kouter_opening; + + foreach + my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) ) + { + next if ( $rLL->[$KK]->[_TYPE_] eq 'b' ); + last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo ); + $Kstart = $KK; + } + } + + my $excess = + $self->excess_line_length_for_Krange( $Kstart, $Kstop ); + + # Coding simplified here for case b1219. + # Increased tol from 0 to 1 when pvt>0 to fix b1284. + $is_one_line_weld = $excess <= $one_line_tol; + } + + # DO-NOT-WELD RULE 1: + # Do not weld something that looks like the start of a two-line + # function call, like this: <> + # $trans->add_transformation( + # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) ); + # We will look for a semicolon after the closing paren. + + # We want to weld something complex, like this though + # my $compass = uc( opposite_direction( line_to_canvas_direction( + # @{ $coords[0] }, @{ $coords[1] } ) ) ); + # Otherwise we will get a 'blinker'. For example, the following + # would become a blinker without this rule: + # $Self->_Add( $SortOrderDisplay{ $Field + # ->GenerateFieldForSelectSQL() } ); + # But it is okay to weld a two-line statement if it looks like + # it was already welded, meaning that the two opening containers are + # on a different line that the two closing containers. This is + # necessary to prevent blinking of something like this with + # perltidy -wn -pbp (starting indentation two levels deep): + + # $top_label->set_text( gettext( + # "Unable to create personal directory - check permissions.") ); + if ( $iline_oc == $iline_oo + 1 + && $iline_io == $iline_ic + && $token_oo eq '(' ) + { + + # Look for following semicolon... + my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing); + my $next_nonblank_type = + defined($Knext_nonblank) + ? $rLL->[$Knext_nonblank]->[_TYPE_] + : 'b'; + if ( $next_nonblank_type eq ';' ) { + + # Then do not weld if no other containers between inner + # opening and closing. + my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_]; + if ( $Knext_seq_item == $Kinner_closing ) { + $do_not_weld_rule = 1; + } + } + } + } ## end starting new weld sequence + + else { + + # set the 1-line flag if continuing a weld sequence; fixes b1239 + $is_one_line_weld = ( $iline_oo == $iline_oc ); + } + + # DO-NOT-WELD RULE 2: + # Do not weld an opening paren to an inner one line brace block + # We will just use old line numbers for this test and require + # iterations if necessary for convergence + + # For example, otherwise we could cause the opening paren + # in the following example to separate from the caller name + # as here: + + # $_[0]->code_handler + # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } ); + + # Here is another example where we do not want to weld: + # $wrapped->add_around_modifier( + # sub { push @tracelog => 'around 1'; $_[0]->(); } ); + + # If the one line sub block gets broken due to length or by the + # user, then we can weld. The result will then be: + # $wrapped->add_around_modifier( sub { + # push @tracelog => 'around 1'; + # $_[0]->(); + # } ); + + # Updated to fix cases b1082 b1102 b1106 b1115: + # Also, do not weld to an intact inner block if the outer opening token + # is on a different line. For example, this prevents oscillation + # between these two states in case b1106: + + # return map{ + # ($_,[$self->$_(@_[1..$#_])]) + # }@every; + + # return map { ( + # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ] + # ) } @every; + + # The effect of this change on typical code is very minimal. Sometimes + # it may take a second iteration to converge, but this gives protection + # against blinking. + if ( !$do_not_weld_rule + && !$is_one_line_weld + && $iline_ic == $iline_io ) + { + $do_not_weld_rule = 2 + if ( $token_oo eq '(' || $iline_oo != $iline_io ); + } + + # DO-NOT-WELD RULE 2A: + # Do not weld an opening asub brace in -lp mode if -asbl is set. This + # helps avoid instabilities in one-line block formation, and fixes + # b1241. Previously, the '$is_one_line_weld' flag was tested here + # instead of -asbl, and this fixed most cases. But it turns out that + # the real problem was the -asbl flag, and switching to this was + # necessary to fixe b1268. This also fixes b1269, b1277, b1278. + if ( !$do_not_weld_rule + && $rOpts_line_up_parentheses + && $rOpts_asbl + && $ris_asub_block->{$outer_seqno} ) + { + $do_not_weld_rule = '2A'; + } + + # DO-NOT-WELD RULE 3: + # Do not weld if this makes our line too long. + # Use a tolerance which depends on if the old tokens were welded + # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759) + if ( !$do_not_weld_rule ) { + + # Measure to a little beyond the inner opening token if it is + # followed by a bare word, which may have unusual line break rules. + + # NOTE: Originally this was OLD RULE 6: do not weld to a container + # which is followed on the same line by an unknown bareword token. + # This can cause blinkers (cases b626, b611). But OK to weld one + # line welds to fix cases b1057 b1064. For generality, OLD RULE 6 + # has been merged into RULE 3 here to also fix cases b1078 b1091. + + my $K_for_length = $Kinner_opening; + my $Knext_io = $self->K_next_nonblank($Kinner_opening); + next unless ( defined($Knext_io) ); # shouldn't happen + my $type_io_next = $rLL->[$Knext_io]->[_TYPE_]; + + # Note: may need to eventually also include other types here, + # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) { + if ( $type_io_next eq 'w' ) { + my $Knext_io2 = $self->K_next_nonblank($Knext_io); + next unless ( defined($Knext_io2) ); + my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_]; + if ( !$type_ok_after_bareword{$type_io_next2} ) { + $K_for_length = $Knext_io2; + } + } + + # Use a tolerance for welds over multiple lines to avoid blinkers. + # We can use zero tolerance if it looks like we are working on an + # existing weld. + my $tol = + $is_one_line_weld || $is_multiline_weld + ? $single_line_tol + : $multiline_tol; + + # By how many characters does this exceed the text window? + my $excess = + $self->cumulative_length_before_K($K_for_length) - + $starting_lentot + 1 + $tol - + $maximum_text_length; + + # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998 + # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018 + # Revised patch: New tolerance definition allows going back to '> 0' + # here. This fixes case b1124. See also cases b1087 and b1087a. + if ( $excess > 0 ) { $do_not_weld_rule = 3 } + + if (DEBUG_WELD) { + $Msg .= +"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n"; + } + } + + # DO-NOT-WELD RULE 4; implemented for git#10: + # Do not weld an opening -ce brace if the next container is on a single + # line, different from the opening brace. (This is very rare). For + # example, given the following with -ce, we will avoid joining the { + # and [ + + # } else { + # [ $_, length($_) ] + # } + + # because this would produce a terminal one-line block: + + # } else { [ $_, length($_) ] } + + # which may not be what is desired. But given this input: + + # } else { [ $_, length($_) ] } + + # then we will do the weld and retain the one-line block + if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) { + my $block_type = $rblock_type_of_seqno->{$outer_seqno}; + if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) { + my $io_line = $inner_opening->[_LINE_INDEX_]; + my $ic_line = $inner_closing->[_LINE_INDEX_]; + my $oo_line = $outer_opening->[_LINE_INDEX_]; + if ( $oo_line < $io_line && $ic_line == $io_line ) { + $do_not_weld_rule = 4; + } + } + } + + # DO-NOT-WELD RULE 5: do not include welds excluded by user + if ( + !$do_not_weld_rule + && %weld_nested_exclusion_rules + && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld ) + || $self->is_excluded_weld( $Kinner_opening, 0 ) ) + ) + { + $do_not_weld_rule = 5; + } + + # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above. + + if ($do_not_weld_rule) { + + # After neglecting a pair, we start measuring from start of point + # io ... but not if previous type does not like to be separated + # from its container (fixes case b1184) + my $Kprev = $self->K_previous_nonblank($Kinner_opening); + my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w'; + if ( !$has_tight_paren{$type_prev} ) { + my $starting_level = $inner_opening->[_LEVEL_]; + my $starting_ci_level = $inner_opening->[_CI_LEVEL_]; + $starting_lentot = + $self->cumulative_length_before_K($Kinner_opening); + $maximum_text_length = + $maximum_text_length_at_level[$starting_level] - + $starting_ci_level * $rOpts_continuation_indentation; + } + + if (DEBUG_WELD) { + $Msg .= "Not welding due to RULE $do_not_weld_rule\n"; + print $Msg; + } + + # Normally, a broken pair should not decrease indentation of + # intermediate tokens: + ## if ( $last_pair_broken ) { next } + # However, for long strings of welded tokens, such as '{{{{{{...' + # we will allow broken pairs to also remove indentation. + # This will keep very long strings of opening and closing + # braces from marching off to the right. We will do this if the + # number of tokens in a weld before the broken weld is 4 or more. + # This rule will mainly be needed for test scripts, since typical + # welds have fewer than about 4 welded tokens. + if ( !@welds || @{ $welds[-1] } < 4 ) { next } + } + + # otherwise start new weld ... + elsif ($starting_new_weld) { + $weld_count_this_start++; + if (DEBUG_WELD) { + $Msg .= "Starting new weld\n"; + print $Msg; + } + push @welds, $item; + + $rK_weld_right->{$Kouter_opening} = $Kinner_opening; + $rK_weld_left->{$Kinner_opening} = $Kouter_opening; + + $rK_weld_right->{$Kinner_closing} = $Kouter_closing; + $rK_weld_left->{$Kouter_closing} = $Kinner_closing; + } + + # ... or extend current weld + else { + $weld_count_this_start++; + if (DEBUG_WELD) { + $Msg .= "Extending current weld\n"; + print $Msg; + } + unshift @{ $welds[-1] }, $inner_seqno; + $rK_weld_right->{$Kouter_opening} = $Kinner_opening; + $rK_weld_left->{$Kinner_opening} = $Kouter_opening; + + $rK_weld_right->{$Kinner_closing} = $Kouter_closing; + $rK_weld_left->{$Kouter_closing} = $Kinner_closing; + + # Keep a broken container broken at multiple welds. This might + # also be useful for simple welds, but for now it is restricted + # to multiple welds to minimize changes to existing coding. This + # fixes b1429, b1430. Updated for issue c198: but allow a + # line differences of 1 (simple shear) so that a simple shear + # can remain or become a single line. + if ( $iline_ic - $iline_io > 1 ) { + + # Only set this break if it is the last possible weld in this + # chain. This will keep some extreme test cases unchanged. + my $is_chain_end = !@{$rnested_pairs} + || $rnested_pairs->[-1]->[1] != $inner_seqno; + if ($is_chain_end) { + $self->[_rbreak_container_]->{$inner_seqno} = 1; + } + } + } + + # After welding, reduce the indentation level if all intermediate tokens + my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_]; + if ( $dlevel != 0 ) { + my $Kstart = $Kinner_opening; + my $Kstop = $Kinner_closing; + foreach my $KK ( $Kstart .. $Kstop ) { + $rLL->[$KK]->[_LEVEL_] += $dlevel; + } + + # Copy opening ci level to help break at = for -lp mode (case b1124) + $rLL->[$Kinner_opening]->[_CI_LEVEL_] = + $rLL->[$Kouter_opening]->[_CI_LEVEL_]; + + # But do not copy the closing ci level ... it can give poor results + ## $rLL->[$Kinner_closing]->[_CI_LEVEL_] = + ## $rLL->[$Kouter_closing]->[_CI_LEVEL_]; + } + } + + return; +} ## end sub weld_nested_containers + +sub weld_nested_quotes { + + # Called once per file for option '--weld-nested-containers'. This + # does welding on qw quotes. + + my $self = shift; + + # See if quotes are excluded from welding + my $rflags = $weld_nested_exclusion_rules{'q'}; + return if ( defined($rflags) && defined( $rflags->[1] ) ); + + my $rK_weld_left = $self->[_rK_weld_left_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + my $Num = @{$rLL}; + + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $rlines = $self->[_rlines_]; + + my $starting_lentot; + my $maximum_text_length; + + my $is_single_quote = sub { + my ( $Kbeg, $Kend, $quote_type ) = @_; + foreach my $K ( $Kbeg .. $Kend ) { + my $test_type = $rLL->[$K]->[_TYPE_]; + next if ( $test_type eq 'b' ); + return if ( $test_type ne $quote_type ); + } + return 1; + }; + + # Length tolerance - same as previously used for sub weld_nested + my $multiline_tol = + 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation ); + + # look for single qw quotes nested in containers + my $KNEXT = $self->[_K_first_seq_item_]; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; + my $rtoken_vars = $rLL->[$KK]; + my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$outer_seqno ) { + next if ( $KK == 0 ); # first token in file may not be container + + # A fault here implies that an error was made in the little loop at + # the bottom of sub 'respace_tokens' which set the values of + # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the + # loop control lines above. + Fault("sequence = $outer_seqno not defined at K=$KK") + if (DEVEL_MODE); + next; + } + + my $token = $rtoken_vars->[_TOKEN_]; + if ( $is_opening_token{$token} ) { + + # see if the next token is a quote of some type + my $Kn = $KK + 1; + $Kn += 1 + if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' ); + next unless ( $Kn < $Num ); + + my $next_token = $rLL->[$Kn]->[_TOKEN_]; + my $next_type = $rLL->[$Kn]->[_TYPE_]; + next + unless ( ( $next_type eq 'q' || $next_type eq 'Q' ) + && substr( $next_token, 0, 1 ) eq 'q' ); + + # The token before the closing container must also be a quote + my $Kouter_closing = $K_closing_container->{$outer_seqno}; + my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing); + next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type; + + # This is an inner opening container + my $Kinner_opening = $Kn; + + # Do not weld to single-line quotes. Nothing is gained, and it may + # look bad. + next if ( $Kinner_closing == $Kinner_opening ); + + # Only weld to quotes delimited with container tokens. This is + # because welding to arbitrary quote delimiters can produce code + # which is less readable than without welding. + my $closing_delimiter = + substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 ); + next + unless ( $is_closing_token{$closing_delimiter} + || $closing_delimiter eq '>' ); + + # Now make sure that there is just a single quote in the container + next + unless ( + $is_single_quote->( $Kinner_opening + 1, $Kinner_closing - 1, $next_type ) ); - # OK: This is a candidate for welding - my $Msg = EMPTY_STRING; - my $do_not_weld; + # OK: This is a candidate for welding + my $Msg = EMPTY_STRING; + my $do_not_weld; + + my $Kouter_opening = $K_opening_container->{$outer_seqno}; + my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_]; + my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_]; + my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_]; + my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_]; + my $is_old_weld = + ( $iline_oo == $iline_io && $iline_ic == $iline_oc ); + + # Fix for case b1189. If quote is marked as type 'Q' then only weld + # if the two closing tokens are on the same input line. Otherwise, + # the closing line will be output earlier in the pipeline than + # other CODE lines and welding will not actually occur. This will + # leave a half-welded structure with potential formatting + # instability. This might be fixed by adding a check for a weld on + # a closing Q token and sending it down the normal channel, but it + # would complicate the code and is potentially risky. + next + if (!$is_old_weld + && $next_type eq 'Q' + && $iline_ic != $iline_oc ); + + # If welded, the line must not exceed allowed line length + ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg ) + = $self->setup_new_weld_measurements( $Kouter_opening, + $Kinner_opening ); + if ( !$ok_to_weld ) { + if (DEBUG_WELD) { print $msg} + next; + } + + my $length = + $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot; + my $excess = $length + $multiline_tol - $maximum_text_length; + + my $excess_max = ( $is_old_weld ? $multiline_tol : 0 ); + if ( $excess >= $excess_max ) { + $do_not_weld = 1; + } + + if (DEBUG_WELD) { + if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING } + $Msg .= +"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n"; + } + + # Check weld exclusion rules for outer container + if ( !$do_not_weld ) { + my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} ); + if ( $self->is_excluded_weld( $KK, $is_leading ) ) { + if (DEBUG_WELD) { + $Msg .= +"No qw weld due to weld exclusion rules for outer container\n"; + } + $do_not_weld = 1; + } + } + + # Check the length of the last line (fixes case b1039) + if ( !$do_not_weld ) { + my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range}; + my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic}; + my $excess_ic = + $self->excess_line_length_for_Krange( $Kfirst_ic, + $Kouter_closing ); + + # Allow extra space for additional welded closing container(s) + # and a space and comma or semicolon. + # NOTE: weld len has not been computed yet. Use 2 spaces + # for now, correct for a single weld. This estimate could + # be made more accurate if necessary. + my $weld_len = + defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0; + if ( $excess_ic + $weld_len + 2 > 0 ) { + if (DEBUG_WELD) { + $Msg .= +"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n"; + } + $do_not_weld = 1; + } + } + + if ($do_not_weld) { + if (DEBUG_WELD) { + $Msg .= "Not Welding QW\n"; + print $Msg; + } + next; + } + + # OK to weld + if (DEBUG_WELD) { + $Msg .= "Welding QW\n"; + print $Msg; + } + + $rK_weld_right->{$Kouter_opening} = $Kinner_opening; + $rK_weld_left->{$Kinner_opening} = $Kouter_opening; + + $rK_weld_right->{$Kinner_closing} = $Kouter_closing; + $rK_weld_left->{$Kouter_closing} = $Kinner_closing; + + # Undo one indentation level if an extra level was added to this + # multiline quote + my $qw_seqno = + $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening}; + if ( $qw_seqno + && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} ) + { + foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) { + $rLL->[$K]->[_LEVEL_] -= 1; + } + $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0; + $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0; + } + + # undo CI for other welded quotes + else { + + foreach my $K ( $Kinner_opening .. $Kinner_closing ) { + $rLL->[$K]->[_CI_LEVEL_] = 0; + } + } + + # Change the level of a closing qw token to be that of the outer + # containing token. This will allow -lp indentation to function + # correctly in the vertical aligner. + # Patch to fix c002: but not if it contains text + if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) { + $rLL->[$Kinner_closing]->[_LEVEL_] = + $rLL->[$Kouter_closing]->[_LEVEL_]; + } + } + } + return; +} ## end sub weld_nested_quotes + +sub is_welded_at_seqno { + + my ( $self, $seqno ) = @_; + + # given a sequence number: + # return true if it is welded either left or right + # return false otherwise + return unless ( $total_weld_count && defined($seqno) ); + my $KK_o = $self->[_K_opening_container_]->{$seqno}; + return unless defined($KK_o); + return defined( $self->[_rK_weld_left_]->{$KK_o} ) + || defined( $self->[_rK_weld_right_]->{$KK_o} ); +} ## end sub is_welded_at_seqno + +sub mark_short_nested_blocks { + + # This routine looks at the entire file and marks any short nested blocks + # which should not be broken. The results are stored in the hash + # $rshort_nested->{$type_sequence} + # which will be true if the container should remain intact. + # + # For example, consider the following line: + + # sub cxt_two { sort { $a <=> $b } test_if_list() } + + # The 'sort' block is short and nested within an outer sub block. + # Normally, the existence of the 'sort' block will force the sub block to + # break open, but this is not always desirable. Here we will set a flag for + # the sort block to prevent this. To give the user control, we will + # follow the input file formatting. If either of the blocks is broken in + # the input file then we will allow it to remain broken. Otherwise we will + # set a flag to keep it together in later formatting steps. + + # The flag which is set here will be checked in two places: + # 'sub process_line_of_CODE' and 'sub starting_one_line_block' + + my $self = shift; + return if $rOpts->{'indent-only'}; + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + + return unless ( $rOpts->{'one-line-block-nesting'} ); + + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $rbreak_container = $self->[_rbreak_container_]; + my $ris_broken_container = $self->[_ris_broken_container_]; + my $rshort_nested = $self->[_rshort_nested_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + # Variables needed for estimating line lengths + my $maximum_text_length; + my $starting_lentot; + my $length_tol = 1; + + my $excess_length_to_K = sub { + my ($K) = @_; + + # Estimate the length from the line start to a given token + my $length = $self->cumulative_length_before_K($K) - $starting_lentot; + my $excess_length = $length + $length_tol - $maximum_text_length; + return ($excess_length); + }; + + # loop over all containers + my @open_block_stack; + my $iline = -1; + my $KNEXT = $self->[_K_first_seq_item_]; + while ( defined($KNEXT) ) { + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; + my $rtoken_vars = $rLL->[$KK]; + my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + if ( !$type_sequence ) { + next if ( $KK == 0 ); # first token in file may not be container + + # A fault here implies that an error was made in the little loop at + # the bottom of sub 'respace_tokens' which set the values of + # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the + # loop control lines above. + Fault("sequence = $type_sequence not defined at K=$KK") + if (DEVEL_MODE); + next; + } + + # Patch: do not mark short blocks with welds. + # In some cases blinkers can form (case b690). + if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) { + next; + } + + # We are just looking at code blocks + my $token = $rtoken_vars->[_TOKEN_]; + my $type = $rtoken_vars->[_TYPE_]; + next unless ( $type eq $token ); + next unless ( $rblock_type_of_seqno->{$type_sequence} ); + + # Keep a stack of all acceptable block braces seen. + # Only consider blocks entirely on one line so dump the stack when line + # changes. + my $iline_last = $iline; + $iline = $rLL->[$KK]->[_LINE_INDEX_]; + if ( $iline != $iline_last ) { @open_block_stack = () } + + if ( $token eq '}' ) { + if (@open_block_stack) { pop @open_block_stack } + } + next unless ( $token eq '{' ); + + # block must be balanced (bad scripts may be unbalanced) + my $K_opening = $K_opening_container->{$type_sequence}; + my $K_closing = $K_closing_container->{$type_sequence}; + next unless ( defined($K_opening) && defined($K_closing) ); + + # require that this block be entirely on one line + next + if ( $ris_broken_container->{$type_sequence} + || $rbreak_container->{$type_sequence} ); + + # See if this block fits on one line of allowed length (which may + # be different from the input script) + $starting_lentot = + $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + my $level = $rLL->[$KK]->[_LEVEL_]; + my $ci_level = $rLL->[$KK]->[_CI_LEVEL_]; + $maximum_text_length = + $maximum_text_length_at_level[$level] - + $ci_level * $rOpts_continuation_indentation; + + # Dump the stack if block is too long and skip this block + if ( $excess_length_to_K->($K_closing) > 0 ) { + @open_block_stack = (); + next; + } + + # OK, Block passes tests, remember it + push @open_block_stack, $type_sequence; + + # We are only marking nested code blocks, + # so check for a previous block on the stack + next unless ( @open_block_stack > 1 ); + + # Looks OK, mark this as a short nested block + $rshort_nested->{$type_sequence} = 1; + + } + return; +} ## end sub mark_short_nested_blocks + +sub special_indentation_adjustments { + + my ($self) = @_; + + # Called once per file to do special indentation adjustments. + # These routines adjust levels either by changing _CI_LEVEL_ directly or + # by setting modified levels in the array $self->[_radjusted_levels_]. + + # Initialize the adjusted levels. These will be the levels actually used + # for computing indentation. + + # NOTE: This routine is called after the weld routines, which may have + # already adjusted _LEVEL_, so we are making adjustments on top of those + # levels. It would be much nicer to have the weld routines also use this + # adjustment, but that gets complicated when we combine -gnu -wn and have + # some welded quotes. + my $Klimit = $self->[_Klimit_]; + my $rLL = $self->[_rLL_]; + my $radjusted_levels = $self->[_radjusted_levels_]; + + return unless ( defined($Klimit) ); + + foreach my $KK ( 0 .. $Klimit ) { + $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_]; + } + + # First set adjusted levels for any non-indenting braces. + $self->do_non_indenting_braces(); + + # Adjust breaks and indentation list containers + $self->break_before_list_opening_containers(); + + # Set adjusted levels for the whitespace cycle option. + $self->whitespace_cycle_adjustment(); + + $self->braces_left_setup(); + + # Adjust continuation indentation if -bli is set + $self->bli_adjustment(); + + $self->extended_ci() + if ($rOpts_extended_continuation_indentation); + + # Now clip any adjusted levels to be non-negative + $self->clip_adjusted_levels(); + + return; +} ## end sub special_indentation_adjustments + +sub clip_adjusted_levels { + + # Replace any negative adjusted levels with zero. + # Negative levels can occur in files with brace errors. + my ($self) = @_; + my $radjusted_levels = $self->[_radjusted_levels_]; + return unless defined($radjusted_levels) && @{$radjusted_levels}; + my $min = min( @{$radjusted_levels} ); # fast check for min + if ( $min < 0 ) { + + # slow loop, but rarely needed + foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) } + } + return; +} ## end sub clip_adjusted_levels + +sub do_non_indenting_braces { + + # Called once per file to handle the --non-indenting-braces parameter. + # Remove indentation within marked braces if requested + my ($self) = @_; + + # Any non-indenting braces have been found by sub find_non_indenting_braces + # and are defined by the following hash: + my $rseqno_non_indenting_brace_by_ix = + $self->[_rseqno_non_indenting_brace_by_ix_]; + return unless ( %{$rseqno_non_indenting_brace_by_ix} ); + + my $rlines = $self->[_rlines_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_]; + my $radjusted_levels = $self->[_radjusted_levels_]; + + # First locate all of the marked blocks + my @K_stack; + foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) { + my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix}; + my $KK = $K_opening_container->{$seqno}; + my $line_of_tokens = $rlines->[$ix]; + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + $rspecial_side_comment_type->{$Klast} = 'NIB'; + push @K_stack, [ $KK, 1 ]; + my $Kc = $K_closing_container->{$seqno}; + push @K_stack, [ $Kc, -1 ] if ( defined($Kc) ); + } + return unless (@K_stack); + @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack; + + # Then loop to remove indentation within marked blocks + my $KK_last = 0; + my $ndeep = 0; + foreach my $item (@K_stack) { + my ( $KK, $inc ) = @{$item}; + if ( $ndeep > 0 ) { + + foreach ( $KK_last + 1 .. $KK ) { + $radjusted_levels->[$_] -= $ndeep; + } + + # We just subtracted the old $ndeep value, which only applies to a + # '{'. The new $ndeep applies to a '}', so we undo the error. + if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 } + } + + $ndeep += $inc; + $KK_last = $KK; + } + return; +} ## end sub do_non_indenting_braces + +sub whitespace_cycle_adjustment { + + my $self = shift; + + # Called once per file to implement the --whitespace-cycle option + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + my $radjusted_levels = $self->[_radjusted_levels_]; + my $maximum_level = $self->[_maximum_level_]; + + if ( $rOpts_whitespace_cycle + && $rOpts_whitespace_cycle > 0 + && $rOpts_whitespace_cycle < $maximum_level ) + { + + my $Kmax = @{$rLL} - 1; + + my $whitespace_last_level = -1; + my @whitespace_level_stack = (); + my $last_nonblank_type = 'b'; + my $last_nonblank_token = EMPTY_STRING; + foreach my $KK ( 0 .. $Kmax ) { + my $level_abs = $radjusted_levels->[$KK]; + my $level = $level_abs; + if ( $level_abs < $whitespace_last_level ) { + pop(@whitespace_level_stack); + } + if ( !@whitespace_level_stack ) { + push @whitespace_level_stack, $level_abs; + } + elsif ( $level_abs > $whitespace_last_level ) { + $level = $whitespace_level_stack[-1] + + ( $level_abs - $whitespace_last_level ); + + if ( + # 1 Try to break at a block brace + ( + $level > $rOpts_whitespace_cycle + && $last_nonblank_type eq '{' + && $last_nonblank_token eq '{' + ) + + # 2 Then either a brace or bracket + || ( $level > $rOpts_whitespace_cycle + 1 + && $last_nonblank_token =~ /^[\{\[]$/ ) + + # 3 Then a paren too + || $level > $rOpts_whitespace_cycle + 2 + ) + { + $level = 1; + } + push @whitespace_level_stack, $level; + } + $level = $whitespace_level_stack[-1]; + $radjusted_levels->[$KK] = $level; + + $whitespace_last_level = $level_abs; + my $type = $rLL->[$KK]->[_TYPE_]; + my $token = $rLL->[$KK]->[_TOKEN_]; + if ( $type ne 'b' ) { + $last_nonblank_type = $type; + $last_nonblank_token = $token; + } + } + } + return; +} ## end sub whitespace_cycle_adjustment + +use constant DEBUG_BBX => 0; + +sub break_before_list_opening_containers { + + my ($self) = @_; + + # This routine is called once per batch to implement parameters + # --break-before-hash-brace=n and similar -bbx=n flags + # and their associated indentation flags: + # --break-before-hash-brace-and-indent and similar -bbxi=n + + # Nothing to do if none of the -bbx=n parameters has been set + return unless %break_before_container_types; + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + + # Loop over all opening container tokens + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my $ris_broken_container = $self->[_ris_broken_container_]; + my $ris_permanently_broken = $self->[_ris_permanently_broken_]; + my $rhas_list = $self->[_rhas_list_]; + my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; + my $radjusted_levels = $self->[_radjusted_levels_]; + my $rparent_of_seqno = $self->[_rparent_of_seqno_]; + my $rlines = $self->[_rlines_]; + my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; + my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; + my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_]; + my $rK_weld_right = $self->[_rK_weld_right_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + my $length_tol = + max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns ); + if ($rOpts_ignore_old_breakpoints) { + + # Patch suggested by b1231; the old tol was excessive. + ## $length_tol += $rOpts_maximum_line_length; + $length_tol *= 2; + } + + my $rbreak_before_container_by_seqno = {}; + my $rwant_reduced_ci = {}; + foreach my $seqno ( keys %{$K_opening_container} ) { + + #---------------------------------------------------------------- + # Part 1: Examine any -bbx=n flags + #---------------------------------------------------------------- + + next if ( $rblock_type_of_seqno->{$seqno} ); + my $KK = $K_opening_container->{$seqno}; + + # This must be a list or contain a list. + # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024. + # Note2: 'has_list' holds the depth to the sub-list. We will require + # a depth of just 1 + my $is_list = $self->is_list_by_seqno($seqno); + my $has_list = $rhas_list->{$seqno}; + + # Fix for b1173: if welded opening container, use flag of innermost + # seqno. Otherwise, the restriction $has_list==1 prevents triple and + # higher welds from following the -BBX parameters. + if ($total_weld_count) { + my $KK_test = $rK_weld_right->{$KK}; + if ( defined($KK_test) ) { + my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_]; + $is_list ||= $self->is_list_by_seqno($seqno_inner); + $has_list = $rhas_list->{$seqno_inner}; + } + } + + next unless ( $is_list || $has_list && $has_list == 1 ); + + my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno}; + + # Only for types of container tokens with a non-default break option + my $token = $rLL->[$KK]->[_TOKEN_]; + my $break_option = $break_before_container_types{$token}; + next unless ($break_option); + + # Do not use -bbx under stress for stability ... fixes b1300 + # TODO: review this; do we also need to look at stress_level_lalpha? + my $level = $rLL->[$KK]->[_LEVEL_]; + if ( $level >= $stress_level_beta ) { + DEBUG_BBX + && print +"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n"; + next; + } + + # Require previous nonblank to be '=' or '=>' + my $Kprev = $KK - 1; + next if ( $Kprev < 0 ); + my $prev_type = $rLL->[$Kprev]->[_TYPE_]; + if ( $prev_type eq 'b' ) { + $Kprev--; + next if ( $Kprev < 0 ); + $prev_type = $rLL->[$Kprev]->[_TYPE_]; + } + next unless ( $is_equal_or_fat_comma{$prev_type} ); + + my $ci = $rLL->[$KK]->[_CI_LEVEL_]; + + #-------------------------------------------- + # New coding for option 2 (break if complex). + #-------------------------------------------- + # This new coding uses clues which are invariant under formatting to + # decide if a list is complex. For now it is only applied when -lp + # and -vmll are used, but eventually it may become the standard method. + # Fixes b1274, b1275, and others, including b1099. + if ( $break_option == 2 ) { + + if ( $rOpts_line_up_parentheses + || $rOpts_variable_maximum_line_length ) + { + + # Start with the basic definition of a complex list... + my $is_complex = $is_list && $has_list; + + # and it is also complex if the parent is a list + if ( !$is_complex ) { + my $parent = $rparent_of_seqno->{$seqno}; + if ( $self->is_list_by_seqno($parent) ) { + $is_complex = 1; + } + } + + # finally, we will call it complex if there are inner opening + # and closing container tokens, not parens, within the outer + # container tokens. + if ( !$is_complex ) { + my $Kp = $self->K_next_nonblank($KK); + my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b'; + if ( $is_opening_token{$token_p} && $token_p ne '(' ) { + + my $Kc = $K_closing_container->{$seqno}; + my $Km = $self->K_previous_nonblank($Kc); + my $token_m = + defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b'; + + # ignore any optional ending comma + if ( $token_m eq ',' ) { + $Km = $self->K_previous_nonblank($Km); + $token_m = + defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b'; + } + + $is_complex ||= + $is_closing_token{$token_m} && $token_m ne ')'; + } + } + + # Convert to option 3 (always break) if complex + next unless ($is_complex); + $break_option = 3; + } + } + + # Fix for b1231: the has_list_with_lec does not cover all cases. + # A broken container containing a list and with line-ending commas + # will stay broken, so can be treated as if it had a list with lec. + $has_list_with_lec ||= + $has_list + && $ris_broken_container->{$seqno} + && $rlec_count_by_seqno->{$seqno}; + + DEBUG_BBX + && print STDOUT +"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n"; + + # -bbx=1 = stable, try to follow input + if ( $break_option == 1 ) { + + my $iline = $rLL->[$KK]->[_LINE_INDEX_]; + my $rK_range = $rlines->[$iline]->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless ( $KK == $Kfirst ); + } + + # -bbx=2 => apply this style only for a 'complex' list + elsif ( $break_option == 2 ) { + + # break if this list contains a broken list with line-ending comma + my $ok_to_break; + my $Msg = EMPTY_STRING; + if ($has_list_with_lec) { + $ok_to_break = 1; + DEBUG_BBX && do { $Msg = "has list with lec;" }; + } + + if ( !$ok_to_break ) { + + # Turn off -xci if -bbx=2 and this container has a sublist but + # not a broken sublist. This avoids creating blinkers. The + # problem is that -xci can cause one-line lists to break open, + # and thereby creating formatting instability. + # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044 + # b1045 b1046 b1047 b1051 b1052 b1061. + if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 } + + my $parent = $rparent_of_seqno->{$seqno}; + if ( $self->is_list_by_seqno($parent) ) { + DEBUG_BBX && do { $Msg = "parent is list" }; + $ok_to_break = 1; + } + } + + if ( !$ok_to_break ) { + DEBUG_BBX + && print STDOUT "Not breaking at seqno=$seqno: $Msg\n"; + next; + } + + DEBUG_BBX + && print STDOUT "OK to break at seqno=$seqno: $Msg\n"; + + # Patch: turn off -xci if -bbx=2 and -lp + # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122 + $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses); + } + + # -bbx=3 = always break + elsif ( $break_option == 3 ) { + + # ok to break + } + + # Shouldn't happen! Bad flag, but make behavior same as 3 + else { + # ok to break + } + + # Set a flag for actual implementation later in + # sub insert_breaks_before_list_opening_containers + $rbreak_before_container_by_seqno->{$seqno} = 1; + DEBUG_BBX + && print STDOUT "BBX: ok to break at seqno=$seqno\n"; + + # -bbxi=0: Nothing more to do if the ci value remains unchanged + my $ci_flag = $container_indentation_options{$token}; + next unless ($ci_flag); + + # -bbxi=1: This option removes ci and is handled in + # later sub get_final_indentation + if ( $ci_flag == 1 ) { + $rwant_reduced_ci->{$seqno} = 1; + next; + } + + # -bbxi=2: This option changes the level ... + # This option can conflict with -xci in some cases. We can turn off + # -xci for this container to avoid blinking. For now, only do this if + # -vmll is set. ( fixes b1335, b1336 ) + if ($rOpts_variable_maximum_line_length) { + $rno_xci_by_seqno->{$seqno} = 1; + } + + #---------------------------------------------------------------- + # Part 2: Perform tests before committing to changing ci and level + #---------------------------------------------------------------- + + # Before changing the ci level of the opening container, we need + # to be sure that the container will be broken in the later stages of + # formatting. We have to do this because we are working early in the + # formatting pipeline. A problem can occur if we change the ci or + # level of the opening token but do not actually break the container + # open as expected. In most cases it wouldn't make any difference if + # we changed ci or not, but there are some edge cases where this + # can cause blinking states, so we need to try to only change ci if + # the container will really be broken. + + # Only consider containers already broken + next if ( !$ris_broken_container->{$seqno} ); + + # Patch to fix issue b1305: the combination of -naws and ci>i appears + # to cause an instability. It should almost never occur in practice. + next + if (!$rOpts_add_whitespace + && $rOpts_continuation_indentation > $rOpts_indent_columns ); + + # Always ok to change ci for permanently broken containers + if ( $ris_permanently_broken->{$seqno} ) { } + + # Always OK if this list contains a broken sub-container with + # a non-terminal line-ending comma + elsif ($has_list_with_lec) { } + + # Otherwise, we are considering a single container... + else { + + # A single container must have at least 1 line-ending comma: + next unless ( $rlec_count_by_seqno->{$seqno} ); + + my $OK; + + # Since it has a line-ending comma, it will stay broken if the + # -boc flag is set + if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 } + + # OK if the container contains multiple fat commas + # Better: multiple lines with fat commas + if ( !$OK && !$rOpts_ignore_old_breakpoints ) { + my $rtype_count = $rtype_count_by_seqno->{$seqno}; + next unless ($rtype_count); + my $fat_comma_count = $rtype_count->{'=>'}; + DEBUG_BBX + && print STDOUT "BBX: fat comma count=$fat_comma_count\n"; + if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 } + } + + # The last check we can make is to see if this container could + # fit on a single line. Use the least possible indentation + # estimate, ci=0, so we are not subtracting $ci * + # $rOpts_continuation_indentation from tabulated + # $maximum_text_length value. + if ( !$OK ) { + my $maximum_text_length = $maximum_text_length_at_level[$level]; + my $K_closing = $K_closing_container->{$seqno}; + my $length = $self->cumulative_length_before_K($K_closing) - + $self->cumulative_length_before_K($KK); + my $excess_length = $length - $maximum_text_length; + DEBUG_BBX + && print STDOUT +"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n"; + + # OK if the net container definitely breaks on length + if ( $excess_length > $length_tol ) { + $OK = 1; + DEBUG_BBX + && print STDOUT "BBX: excess_length=$excess_length\n"; + } + + # Otherwise skip it + else { next } + } + } + + #------------------------------------------------------------ + # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag + #------------------------------------------------------------ + + DEBUG_BBX && print STDOUT "BBX: OK to break\n"; + + # -bbhbi=n + # -bbsbi=n + # -bbpi=n + + # where: + + # n=0 default indentation (usually one ci) + # n=1 outdent one ci + # n=2 indent one level (minus one ci) + # n=3 indent one extra ci [This may be dropped] + + # NOTE: We are adjusting indentation of the opening container. The + # closing container will normally follow the indentation of the opening + # container automatically, so this is not currently done. + next unless ($ci); + + # option 1: outdent + if ( $ci_flag == 1 ) { + $ci -= 1; + } + + # option 2: indent one level + elsif ( $ci_flag == 2 ) { + $ci -= 1; + $radjusted_levels->[$KK] += 1; + } + + # unknown option + else { + # Shouldn't happen - leave ci unchanged + } + + $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 ); + } + + $self->[_rbreak_before_container_by_seqno_] = + $rbreak_before_container_by_seqno; + $self->[_rwant_reduced_ci_] = $rwant_reduced_ci; + return; +} ## end sub break_before_list_opening_containers + +use constant DEBUG_XCI => 0; + +sub extended_ci { + + # This routine implements the -xci (--extended-continuation-indentation) + # flag. We add CI to interior tokens of a container which itself has CI but + # only if a token does not already have CI. + + # To do this, we will locate opening tokens which themselves have + # continuation indentation (CI). We track them with their sequence + # numbers. These sequence numbers are called 'controlling sequence + # numbers'. They apply continuation indentation to the tokens that they + # contain. These inner tokens remember their controlling sequence numbers. + # Later, when these inner tokens are output, they have to see if the output + # lines with their controlling tokens were output with CI or not. If not, + # then they must remove their CI too. + + # The controlling CI concept works hierarchically. But CI itself is not + # hierarchical; it is either on or off. There are some rare instances where + # it would be best to have hierarchical CI too, but not enough to be worth + # the programming effort. + + # The operations to remove unwanted CI are done in sub 'undo_ci'. + + my ($self) = @_; + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + + my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_]; + my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; + my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_]; + my $ris_bli_container = $self->[_ris_bli_container_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + + my %available_space; + + # Loop over all opening container tokens + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + my @seqno_stack; + my $seqno_top; + my $KLAST; + my $KNEXT = $self->[_K_first_seq_item_]; + + # The following variable can be used to allow a little extra space to + # avoid blinkers. A value $len_tol = 20 fixed the following + # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031. + # It turned out that the real problem was mis-parsing a list brace as + # a code block in a 'use' statement when the line length was extremely + # small. A value of 0 works now, but a slightly larger value can + # be used to minimize the chance of a blinker. + my $len_tol = 0; + + while ( defined($KNEXT) ) { + + # Fix all tokens up to the next sequence item if we are changing CI + if ($seqno_top) { + + my $is_list = $ris_list_by_seqno->{$seqno_top}; + my $space = $available_space{$seqno_top}; + my $count = 0; + foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) { + + next if ( $rLL->[$Kt]->[_CI_LEVEL_] ); + + # But do not include tokens which might exceed the line length + # and are not in a list. + # ... This fixes case b1031 + if ( $is_list + || $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space + || $rLL->[$Kt]->[_TYPE_] eq '#' ) + { + $rLL->[$Kt]->[_CI_LEVEL_] = 1; + $rseqno_controlling_my_ci->{$Kt} = $seqno_top; + $count++; + } + } + $ris_seqno_controlling_ci->{$seqno_top} += $count; + } - my $Kouter_opening = $K_opening_container->{$outer_seqno}; - my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_]; - my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_]; - my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_]; - my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_]; - my $is_old_weld = - ( $iline_oo == $iline_io && $iline_ic == $iline_oc ); + $KLAST = $KNEXT; + my $KK = $KNEXT; + $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; - # Fix for case b1189. If quote is marked as type 'Q' then only weld - # if the two closing tokens are on the same input line. Otherwise, - # the closing line will be output earlier in the pipeline than - # other CODE lines and welding will not actually occur. This will - # leave a half-welded structure with potential formatting - # instability. This might be fixed by adding a check for a weld on - # a closing Q token and sending it down the normal channel, but it - # would complicate the code and is potentially risky. - next - if (!$is_old_weld - && $next_type eq 'Q' - && $iline_ic != $iline_oc ); + my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + + # see if we have reached the end of the current controlling container + if ( $seqno_top && $seqno == $seqno_top ) { + $seqno_top = pop @seqno_stack; + } + + # Patch to fix some block types... + # Certain block types arrive from the tokenizer without CI but should + # have it for this option. These include anonymous subs and + # do sort map grep eval + my $block_type = $rblock_type_of_seqno->{$seqno}; + if ( $block_type && $is_block_with_ci{$block_type} ) { + $rLL->[$KK]->[_CI_LEVEL_] = 1; + if ($seqno_top) { + $rseqno_controlling_my_ci->{$KK} = $seqno_top; + $ris_seqno_controlling_ci->{$seqno_top}++; + } + } + + # If this does not have ci, update ci if necessary and continue looking + elsif ( !$rLL->[$KK]->[_CI_LEVEL_] ) { + if ($seqno_top) { + $rLL->[$KK]->[_CI_LEVEL_] = 1; + $rseqno_controlling_my_ci->{$KK} = $seqno_top; + $ris_seqno_controlling_ci->{$seqno_top}++; + } + next; + } + + # We are looking for opening container tokens with ci + my $K_opening = $K_opening_container->{$seqno}; + next unless ( defined($K_opening) && $KK == $K_opening ); + + # Make sure there is a corresponding closing container + # (could be missing if the script has a brace error) + my $K_closing = $K_closing_container->{$seqno}; + next unless defined($K_closing); + + # Skip if requested by -bbx to avoid blinkers + next if ( $rno_xci_by_seqno->{$seqno} ); + + # Skip if this is a -bli container (this fixes case b1065) Note: case + # b1065 is also fixed by the update for b1055, so this update is not + # essential now. But there does not seem to be a good reason to add + # xci and bli together, so the update is retained. + next if ( $ris_bli_container->{$seqno} ); + + # Require different input lines. This will filter out a large number + # of small hash braces and array brackets. If we accidentally filter + # out an important container, it will get fixed on the next pass. + if ( + $rLL->[$K_opening]->[_LINE_INDEX_] == + $rLL->[$K_closing]->[_LINE_INDEX_] + && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - + $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] > + $rOpts_maximum_line_length ) + ) + { + DEBUG_XCI + && print "XCI: Skipping seqno=$seqno, require different lines\n"; + next; + } + + # Do not apply -xci if adding extra ci will put the container contents + # beyond the line length limit (fixes cases b899 b935) + my $level = $rLL->[$K_opening]->[_LEVEL_]; + my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_]; + my $maximum_text_length = + $maximum_text_length_at_level[$level] - + $ci_level * $rOpts_continuation_indentation; + + # Fix for b1197 b1198 b1199 b1200 b1201 b1202 + # Do not apply -xci if we are running out of space + # TODO: review this; do we also need to look at stress_level_alpha? + if ( $level >= $stress_level_beta ) { + DEBUG_XCI + && print +"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n"; + next; + } + + # remember how much space is available for patch b1031 above + my $space = + $maximum_text_length - $len_tol - $rOpts_continuation_indentation; + + if ( $space < 0 ) { + DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n"; + next; + } + DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n"; + + $available_space{$seqno} = $space; + + # This becomes the next controlling container + push @seqno_stack, $seqno_top if ($seqno_top); + $seqno_top = $seqno; + } + return; +} ## end sub extended_ci + +sub braces_left_setup { + + # Called once per file to mark all -bl, -sbl, and -asbl containers + my $self = shift; + + my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'}; + my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'}; + my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; + return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl ); + + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + + # We will turn on this hash for braces controlled by these flags: + my $rbrace_left = $self->[_rbrace_left_]; + + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $ris_asub_block = $self->[_ris_asub_block_]; + my $ris_sub_block = $self->[_ris_sub_block_]; + foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { + + my $block_type = $rblock_type_of_seqno->{$seqno}; + + # use -asbl flag for an anonymous sub block + if ( $ris_asub_block->{$seqno} ) { + if ($rOpts_asbl) { + $rbrace_left->{$seqno} = 1; + } + } + + # use -sbl flag for a named sub + elsif ( $ris_sub_block->{$seqno} ) { + if ($rOpts_sbl) { + $rbrace_left->{$seqno} = 1; + } + } + + # use -bl flag if not a sub block of any type + else { + if ( $rOpts_bl + && $block_type =~ /$bl_pattern/ + && $block_type !~ /$bl_exclusion_pattern/ ) + { + $rbrace_left->{$seqno} = 1; + } + } + } + return; +} ## end sub braces_left_setup + +sub bli_adjustment { + + # Called once per file to implement the --brace-left-and-indent option. + # If -bli is set, adds one continuation indentation for certain braces + my $self = shift; + return unless ( $rOpts->{'brace-left-and-indent'} ); + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); + + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $ris_bli_container = $self->[_ris_bli_container_]; + my $rbrace_left = $self->[_rbrace_left_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $K_closing_container = $self->[_K_closing_container_]; + + foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { + my $block_type = $rblock_type_of_seqno->{$seqno}; + if ( $block_type + && $block_type =~ /$bli_pattern/ + && $block_type !~ /$bli_exclusion_pattern/ ) + { + $ris_bli_container->{$seqno} = 1; + $rbrace_left->{$seqno} = 1; + my $Ko = $K_opening_container->{$seqno}; + my $Kc = $K_closing_container->{$seqno}; + if ( defined($Ko) && defined($Kc) ) { + $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_]; + } + } + } + return; +} ## end sub bli_adjustment + +sub find_multiline_qw { + + my ( $self, $rqw_lines ) = @_; + + # Multiline qw quotes are not sequenced items like containers { [ ( + # but behave in some respects in a similar way. So this routine finds them + # and creates a separate sequence number system for later use. + + # This is straightforward because they always begin at the end of one line + # and end at the beginning of a later line. This is true no matter how we + # finally make our line breaks, so we can find them before deciding on new + # line breaks. + + # Input parameter: + # if $rqw_lines is defined it is a ref to array of all line index numbers + # for which there is a type 'q' qw quote at either end of the line. This + # was defined by sub resync_lines_and_tokens for efficiency. + # + + my $rlines = $self->[_rlines_]; + + # if $rqw_lines is not defined (this will occur with -io option) then we + # will have to scan all lines. + if ( !defined($rqw_lines) ) { + $rqw_lines = [ 0 .. @{$rlines} - 1 ]; + } + + # if $rqw_lines is defined but empty, just return because there are no + # multiline qw's + else { + if ( !@{$rqw_lines} ) { return } + } + + my $rstarting_multiline_qw_seqno_by_K = {}; + my $rending_multiline_qw_seqno_by_K = {}; + my $rKrange_multiline_qw_by_seqno = {}; + my $rmultiline_qw_has_extra_level = {}; + + my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; + + my $rLL = $self->[_rLL_]; + my $qw_seqno; + my $num_qw_seqno = 0; + my $K_start_multiline_qw; + + # For reference, here is the old loop, before $rqw_lines became available: + ## foreach my $line_of_tokens ( @{$rlines} ) { + foreach my $iline ( @{$rqw_lines} ) { + my $line_of_tokens = $rlines->[$iline]; + + # Note that these first checks are required in case we have to scan + # all lines, not just lines with type 'q' at the ends. + my $line_type = $line_of_tokens->{_line_type}; + next unless ( $line_type eq 'CODE' ); + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $Kfirst, $Klast ) = @{$rK_range}; + next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line + + # Continuing a sequence of qw lines ... + if ( defined($K_start_multiline_qw) ) { + my $type = $rLL->[$Kfirst]->[_TYPE_]; - # If welded, the line must not exceed allowed line length - ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg ) - = $self->setup_new_weld_measurements( $Kouter_opening, - $Kinner_opening ); - if ( !$ok_to_weld ) { - if (DEBUG_WELD) { print $msg} + # shouldn't happen + if ( $type ne 'q' ) { + DEVEL_MODE && print STDERR <[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot; - my $excess = $length + $multiline_tol - $maximum_text_length; - - my $excess_max = ( $is_old_weld ? $multiline_tol : 0 ); - if ( $excess >= $excess_max ) { - $do_not_weld = 1; + my $Kprev = $self->K_previous_nonblank($Kfirst); + my $Knext = $self->K_next_nonblank($Kfirst); + my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b'; + my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b'; + if ( $type_m eq 'q' && $type_p ne 'q' ) { + $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno; + $rKrange_multiline_qw_by_seqno->{$qw_seqno} = + [ $K_start_multiline_qw, $Kfirst ]; + $K_start_multiline_qw = undef; + $qw_seqno = undef; } + } - if (DEBUG_WELD) { - if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING } - $Msg .= -"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n"; + # Starting a new a sequence of qw lines ? + if ( !defined($K_start_multiline_qw) + && $rLL->[$Klast]->[_TYPE_] eq 'q' ) + { + my $Kprev = $self->K_previous_nonblank($Klast); + my $Knext = $self->K_next_nonblank($Klast); + my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b'; + my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b'; + if ( $type_m ne 'q' && $type_p eq 'q' ) { + $num_qw_seqno++; + $qw_seqno = 'q' . $num_qw_seqno; + $K_start_multiline_qw = $Klast; + $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno; } + } + } - # Check weld exclusion rules for outer container - if ( !$do_not_weld ) { - my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} ); - if ( $self->is_excluded_weld( $KK, $is_leading ) ) { - if (DEBUG_WELD) { - $Msg .= -"No qw weld due to weld exclusion rules for outer container\n"; - } - $do_not_weld = 1; - } - } + # Give multiline qw lists extra indentation instead of CI. This option + # works well but is currently only activated when the -xci flag is set. + # The reason is to avoid unexpected changes in formatting. + if ($rOpts_extended_continuation_indentation) { + while ( my ( $qw_seqno_x, $rKrange ) = + each %{$rKrange_multiline_qw_by_seqno} ) + { + my ( $Kbeg, $Kend ) = @{$rKrange}; - # Check the length of the last line (fixes case b1039) - if ( !$do_not_weld ) { - my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range}; - my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic}; - my $excess_ic = - $self->excess_line_length_for_Krange( $Kfirst_ic, - $Kouter_closing ); + # require isolated closing token + my $token_end = $rLL->[$Kend]->[_TOKEN_]; + next + unless ( length($token_end) == 1 + && ( $is_closing_token{$token_end} || $token_end eq '>' ) ); - # Allow extra space for additional welded closing container(s) - # and a space and comma or semicolon. - # NOTE: weld len has not been computed yet. Use 2 spaces - # for now, correct for a single weld. This estimate could - # be made more accurate if necessary. - my $weld_len = - defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0; - if ( $excess_ic + $weld_len + 2 > 0 ) { - if (DEBUG_WELD) { - $Msg .= -"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n"; - } - $do_not_weld = 1; - } - } + # require isolated opening token + my $token_beg = $rLL->[$Kbeg]->[_TOKEN_]; - if ($do_not_weld) { - if (DEBUG_WELD) { - $Msg .= "Not Welding QW\n"; - print $Msg; - } - next; + # allow space(s) after the qw + if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ ) + { + $token_beg =~ s/\s+//; } - # OK to weld - if (DEBUG_WELD) { - $Msg .= "Welding QW\n"; - print $Msg; + next unless ( length($token_beg) == 3 ); + + foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) { + $rLL->[$KK]->[_LEVEL_]++; + $rLL->[$KK]->[_CI_LEVEL_] = 0; } - $rK_weld_right->{$Kouter_opening} = $Kinner_opening; - $rK_weld_left->{$Kinner_opening} = $Kouter_opening; + # set flag for -wn option, which will remove the level + $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1; + } + } - $rK_weld_right->{$Kinner_closing} = $Kouter_closing; - $rK_weld_left->{$Kouter_closing} = $Kinner_closing; + # For the -lp option we need to mark all parent containers of + # multiline quotes + if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) { - # Undo one indentation level if an extra level was added to this - # multiline quote - my $qw_seqno = - $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening}; - if ( $qw_seqno - && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} ) - { - foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) { - $rLL->[$K]->[_LEVEL_] -= 1; - } - $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0; - $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0; - } + while ( my ( $qw_seqno_x, $rKrange ) = + each %{$rKrange_multiline_qw_by_seqno} ) + { + my ( $Kbeg, $Kend ) = @{$rKrange}; + my $parent_seqno = $self->parent_seqno_by_K($Kend); + next unless ($parent_seqno); - # undo CI for other welded quotes - else { + # If the parent container exactly surrounds this qw, then -lp + # formatting seems to work so we will not mark it. + my $is_tightly_contained; + my $Kn = $self->K_next_nonblank($Kend); + my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef; + if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) { - foreach my $K ( $Kinner_opening .. $Kinner_closing ) { - $rLL->[$K]->[_CI_LEVEL_] = 0; + my $Kp = $self->K_previous_nonblank($Kbeg); + my $seqno_p = + defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef; + if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) { + $is_tightly_contained = 1; } } - # Change the level of a closing qw token to be that of the outer - # containing token. This will allow -lp indentation to function - # correctly in the vertical aligner. - # Patch to fix c002: but not if it contains text - if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) { - $rLL->[$Kinner_closing]->[_LEVEL_] = - $rLL->[$Kouter_closing]->[_LEVEL_]; + $ris_excluded_lp_container->{$parent_seqno} = 1 + unless ($is_tightly_contained); + + # continue up the tree marking parent containers + while (1) { + $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno}; + last + unless ( defined($parent_seqno) + && $parent_seqno ne SEQ_ROOT ); + $ris_excluded_lp_container->{$parent_seqno} = 1; } } } + + $self->[_rstarting_multiline_qw_seqno_by_K_] = + $rstarting_multiline_qw_seqno_by_K; + $self->[_rending_multiline_qw_seqno_by_K_] = + $rending_multiline_qw_seqno_by_K; + $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno; + $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level; + return; -} ## end sub weld_nested_quotes +} ## end sub find_multiline_qw -sub is_welded_at_seqno { +use constant DEBUG_COLLAPSED_LENGTHS => 0; - my ( $self, $seqno ) = @_; +# Minimum space reserved for contents of a code block. A value of 40 has given +# reasonable results. With a large line length, say -l=120, this will not +# normally be noticeable but it will prevent making a mess in some edge cases. +use constant MIN_BLOCK_LEN => 40; - # given a sequence number: - # return true if it is welded either left or right - # return false otherwise - return unless ( $total_weld_count && defined($seqno) ); - my $KK_o = $self->[_K_opening_container_]->{$seqno}; - return unless defined($KK_o); - return defined( $self->[_rK_weld_left_]->{$KK_o} ) - || defined( $self->[_rK_weld_right_]->{$KK_o} ); -} ## end sub is_welded_at_seqno +my %is_handle_type; -sub mark_short_nested_blocks { +BEGIN { + my @q = qw( w C U G i k => ); + @is_handle_type{@q} = (1) x scalar(@q); - # This routine looks at the entire file and marks any short nested blocks - # which should not be broken. The results are stored in the hash - # $rshort_nested->{$type_sequence} - # which will be true if the container should remain intact. - # - # For example, consider the following line: + my $i = 0; + use constant { + _max_prong_len_ => $i++, + _handle_len_ => $i++, + _seqno_o_ => $i++, + _iline_o_ => $i++, + _K_o_ => $i++, + _K_c_ => $i++, + _interrupted_list_rule_ => $i++, + }; +} ## end BEGIN - # sub cxt_two { sort { $a <=> $b } test_if_list() } +sub is_fragile_block_type { + my ( $self, $block_type, $seqno ) = @_; - # The 'sort' block is short and nested within an outer sub block. - # Normally, the existence of the 'sort' block will force the sub block to - # break open, but this is not always desirable. Here we will set a flag for - # the sort block to prevent this. To give the user control, we will - # follow the input file formatting. If either of the blocks is broken in - # the input file then we will allow it to remain broken. Otherwise we will - # set a flag to keep it together in later formatting steps. + # Given: + # $block_type = the block type of a token, and + # $seqno = its sequence number - # The flag which is set here will be checked in two places: - # 'sub process_line_of_CODE' and 'sub starting_one_line_block' + # Return: + # true if this block type stays broken after being broken, + # false otherwise - my $self = shift; - return if $rOpts->{'indent-only'}; + # This sub has been added to isolate a tricky decision needed + # to fix issue b1428. - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); + # The coding here needs to agree with: + # - sub process_line where variable '$rbrace_follower' is set + # - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set, - return unless ( $rOpts->{'one-line-block-nesting'} ); + if ( $is_sort_map_grep_eval{$block_type} + || $block_type eq 't' + || $self->[_rshort_nested_]->{$seqno} ) + { + return 0; + } - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rbreak_container = $self->[_rbreak_container_]; - my $rshort_nested = $self->[_rshort_nested_]; - my $rlines = $self->[_rlines_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + return 1; - # Variables needed for estimating line lengths - my $maximum_text_length; - my $starting_lentot; - my $length_tol = 1; +} ## end sub is_fragile_block_type - my $excess_length_to_K = sub { - my ($K) = @_; +{ ## closure xlp_collapsed_lengths - # Estimate the length from the line start to a given token - my $length = $self->cumulative_length_before_K($K) - $starting_lentot; - my $excess_length = $length + $length_tol - $maximum_text_length; - return ($excess_length); - }; + my $max_prong_len; + my $len; + my $last_nonblank_type; + my @stack; - my $is_broken_block = sub { + sub xlp_collapsed_lengths_initialize { - # a block is broken if the input line numbers of the braces differ - my ($seqno) = @_; - my $K_opening = $K_opening_container->{$seqno}; - return unless ( defined($K_opening) ); - my $K_closing = $K_closing_container->{$seqno}; - return unless ( defined($K_closing) ); - return $rbreak_container->{$seqno} - || $rLL->[$K_closing]->[_LINE_INDEX_] != - $rLL->[$K_opening]->[_LINE_INDEX_]; - }; + $max_prong_len = 0; + $len = 0; + $last_nonblank_type = 'b'; + @stack = (); - # loop over all containers - my @open_block_stack; - my $iline = -1; - my $KNEXT = $self->[_K_first_seq_item_]; - while ( defined($KNEXT) ) { - my $KK = $KNEXT; - $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; - my $rtoken_vars = $rLL->[$KK]; - my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - if ( !$type_sequence ) { - next if ( $KK == 0 ); # first token in file may not be container + push @stack, [ + 0, # $max_prong_len, + 0, # $handle_len, + SEQ_ROOT, # $seqno, + undef, # $iline, + undef, # $KK, + undef, # $K_c, + undef, # $interrupted_list_rule + ]; - # A fault here implies that an error was made in the little loop at - # the bottom of sub 'respace_tokens' which set the values of - # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the - # loop control lines above. - Fault("sequence = $type_sequence not defined at K=$KK") - if (DEVEL_MODE); - next; - } + return; + } ## end sub xlp_collapsed_lengths_initialize - # Patch: do not mark short blocks with welds. - # In some cases blinkers can form (case b690). - if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) { - next; - } + sub cumulative_length_to_comma { + my ( $self, $KK, $K_comma, $K_closing ) = @_; - # We are just looking at code blocks - my $token = $rtoken_vars->[_TOKEN_]; - my $type = $rtoken_vars->[_TYPE_]; - next unless ( $type eq $token ); - next unless ( $rblock_type_of_seqno->{$type_sequence} ); + # Given: + # $KK = index of starting token, or blank before start + # $K_comma = index of line-ending comma + # $K_closing = index of the container closing token - # Keep a stack of all acceptable block braces seen. - # Only consider blocks entirely on one line so dump the stack when line - # changes. - my $iline_last = $iline; - $iline = $rLL->[$KK]->[_LINE_INDEX_]; - if ( $iline != $iline_last ) { @open_block_stack = () } + # Return: + # $length = cumulative length of the term - if ( $token eq '}' ) { - if (@open_block_stack) { pop @open_block_stack } - } - next unless ( $token eq '{' ); + my $rLL = $self->[_rLL_]; + if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ } + my $length = 0; + if ( + $KK < $K_comma + && $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true - # block must be balanced (bad scripts may be unbalanced) - my $K_opening = $K_opening_container->{$type_sequence}; - my $K_closing = $K_closing_container->{$type_sequence}; - next unless ( defined($K_opening) && defined($K_closing) ); + # Ignore if terminal comma, causes instability (b1297, + # b1330) + && ( + $K_closing - $K_comma > 2 + || ( $K_closing - $K_comma == 2 + && $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' ) + ) - # require that this block be entirely on one line - next if ( $is_broken_block->($type_sequence) ); + # The comma should be in this container + && ( $rLL->[$K_comma]->[_LEVEL_] - 1 == + $rLL->[$K_closing]->[_LEVEL_] ) + ) + { - # See if this block fits on one line of allowed length (which may - # be different from the input script) - $starting_lentot = - $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - my $level = $rLL->[$KK]->[_LEVEL_]; - my $ci_level = $rLL->[$KK]->[_CI_LEVEL_]; - $maximum_text_length = - $maximum_text_length_at_level[$level] - - $ci_level * $rOpts_continuation_indentation; + # An additional check: if line ends in ), and the ) has vtc then + # skip this estimate. Otherwise, vtc can give oscillating results. + # Fixes b1448. For example, this could be unstable: - # Dump the stack if block is too long and skip this block - if ( $excess_length_to_K->($K_closing) > 0 ) { - @open_block_stack = (); - next; + # ( $os ne 'win' ? ( -selectcolor => "red" ) : () ), + # | |^--K_comma + # | ^-- K_prev + # ^--- KK + + # An alternative, possibly better strategy would be to try to turn + # off -vtc locally, but it turns out to be difficult to locate the + # appropriate closing token when it is not on the same line as its + # opening token. + + my $K_prev = $self->K_previous_nonblank($K_comma); + if ( defined($K_prev) + && $K_prev >= $KK + && $rLL->[$K_prev]->[_TYPE_SEQUENCE_] ) + { + my $token = $rLL->[$K_prev]->[_TOKEN_]; + my $type = $rLL->[$K_prev]->[_TYPE_]; + if ( $closing_vertical_tightness{$token} && $type ne 'R' ) { + ## type 'R' does not normally get broken, so ignore + ## skip length calculation + return 0; + } + } + my $starting_len = + $KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0; + $length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len; } + return $length; + } ## end sub cumulative_length_to_comma - # OK, Block passes tests, remember it - push @open_block_stack, $type_sequence; + sub xlp_collapsed_lengths { - # We are only marking nested code blocks, - # so check for a previous block on the stack - next unless ( @open_block_stack > 1 ); + my $self = shift; - # Looks OK, mark this as a short nested block - $rshort_nested->{$type_sequence} = 1; + #---------------------------------------------------------------- + # Define the collapsed lengths of containers for -xlp indentation + #---------------------------------------------------------------- - } - return; -} ## end sub mark_short_nested_blocks + # We need an estimate of the minimum required line length starting at + # any opening container for the -xlp style. This is needed to avoid + # using too much indentation space for lower level containers and + # thereby running out of space for outer container tokens due to the + # maximum line length limit. -sub adjust_indentation_levels { + # The basic idea is that at each node in the tree we imagine that we + # have a fork with a handle and collapsible prongs: + # + # |------------ + # |-------- + # ------------|------- + # handle |------------ + # |-------- + # prongs + # + # Each prong has a minimum collapsed length. The collapsed length at a + # node is the maximum of these minimum lengths, plus the handle length. + # Each of the prongs may itself be a tree node. - my ($self) = @_; + # This is just a rough calculation to get an approximate starting point + # for indentation. Later routines will be more precise. It is + # important that these estimates be independent of the line breaks of + # the input stream in order to avoid instabilities. - # Called once per file to do special indentation adjustments. - # These routines adjust levels either by changing _CI_LEVEL_ directly or - # by setting modified levels in the array $self->[_radjusted_levels_]. + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; + my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; + my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; - # Initialize the adjusted levels. These will be the levels actually used - # for computing indentation. + my $K_start_multiline_qw; + my $level_start_multiline_qw = 0; - # NOTE: This routine is called after the weld routines, which may have - # already adjusted _LEVEL_, so we are making adjustments on top of those - # levels. It would be much nicer to have the weld routines also use this - # adjustment, but that gets complicated when we combine -gnu -wn and have - # some welded quotes. - my $Klimit = $self->[_Klimit_]; - my $rLL = $self->[_rLL_]; - my $radjusted_levels = $self->[_radjusted_levels_]; + xlp_collapsed_lengths_initialize(); - return unless ( defined($Klimit) ); + #-------------------------------- + # Loop over all lines in the file + #-------------------------------- + my $iline = -1; + my $skip_next_line; + foreach my $line_of_tokens ( @{$rlines} ) { + $iline++; + if ($skip_next_line) { + $skip_next_line = 0; + next; + } + my $line_type = $line_of_tokens->{_line_type}; + next if ( $line_type ne 'CODE' ); + my $CODE_type = $line_of_tokens->{_code_type}; - foreach my $KK ( 0 .. $Klimit ) { - $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_]; - } + # Always skip blank lines + next if ( $CODE_type eq 'BL' ); + + # Note on other line types: + # 'FS' (Format Skipping) lines may contain opening/closing tokens so + # we have to process them to keep the stack correctly sequenced + # 'VB' (Verbatim) lines could be skipped, but testing shows that + # results look better if we include their lengths. + + # Also note that we could exclude -xlp formatting of containers with + # 'FS' and 'VB' lines, but in testing that was not really beneficial + + # So we process tokens in 'FS' and 'VB' lines like all the rest... + + my $rK_range = $line_of_tokens->{_rK_range}; + my ( $K_first, $K_last ) = @{$rK_range}; + next unless ( defined($K_first) && defined($K_last) ); + + my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#'; + + # Always ignore block comments + next if ( $has_comment && $K_first == $K_last ); + + # Handle an intermediate line of a multiline qw quote. These may + # require including some -ci or -i spaces. See cases c098/x063. + # Updated to check all lines (not just $K_first==$K_last) to fix + # b1316 + my $K_begin_loop = $K_first; + if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) { + + my $KK = $K_first; + my $level = $rLL->[$KK]->[_LEVEL_]; + my $ci_level = $rLL->[$KK]->[_CI_LEVEL_]; + + # remember the level of the start + if ( !defined($K_start_multiline_qw) ) { + $K_start_multiline_qw = $K_first; + $level_start_multiline_qw = $level; + my $seqno_qw = + $self->[_rstarting_multiline_qw_seqno_by_K_] + ->{$K_start_multiline_qw}; + if ( !$seqno_qw ) { + my $Kp = $self->K_previous_nonblank($K_first); + if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) { + + $K_start_multiline_qw = $Kp; + $level_start_multiline_qw = + $rLL->[$K_start_multiline_qw]->[_LEVEL_]; + } + else { - # First set adjusted levels for any non-indenting braces. - $self->do_non_indenting_braces(); + # Fix for b1319, b1320 + $K_start_multiline_qw = undef; + } + } + } - # Adjust breaks and indentation list containers - $self->break_before_list_opening_containers(); + if ( defined($K_start_multiline_qw) ) { + $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] - + $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - # Set adjusted levels for the whitespace cycle option. - $self->whitespace_cycle_adjustment(); + # We may have to add the spaces of one level or ci level + # ... it depends depends on the -xci flag, the -wn flag, + # and if the qw uses a container token as the quote + # delimiter. - $self->braces_left_setup(); + # First rule: add ci if there is a $ci_level + if ($ci_level) { + $len += $rOpts_continuation_indentation; + } - # Adjust continuation indentation if -bli is set - $self->bli_adjustment(); + # Second rule: otherwise, look for an extra indentation + # level from the start and add one indentation level if + # found. + elsif ( $level > $level_start_multiline_qw ) { + $len += $rOpts_indent_columns; + } - $self->extended_ci() - if ($rOpts_extended_continuation_indentation); + if ( $len > $max_prong_len ) { $max_prong_len = $len } - # Now clip any adjusted levels to be non-negative - $self->clip_adjusted_levels(); + $last_nonblank_type = 'q'; - return; -} ## end sub adjust_indentation_levels + $K_begin_loop = $K_first + 1; -sub clip_adjusted_levels { + # We can skip to the next line if more tokens + next if ( $K_begin_loop > $K_last ); + } + } - # Replace any negative adjusted levels with zero. - # Negative levels can occur in files with brace errors. - my ($self) = @_; - my $radjusted_levels = $self->[_radjusted_levels_]; - return unless defined($radjusted_levels) && @{$radjusted_levels}; - foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) } - return; -} ## end sub clip_adjusted_levels + $K_start_multiline_qw = undef; -sub do_non_indenting_braces { + # Find the terminal token, before any side comment + my $K_terminal = $K_last; + if ($has_comment) { + $K_terminal -= 1; + $K_terminal -= 1 + if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b' + && $K_terminal > $K_first ); + } - # Called once per file to handle the --non-indenting-braces parameter. - # Remove indentation within marked braces if requested - my ($self) = @_; + # Use length to terminal comma if interrupted list rule applies + if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) { + my $K_c = $stack[-1]->[_K_c_]; + if ( defined($K_c) ) { - # Any non-indenting braces have been found by sub find_non_indenting_braces - # and are defined by the following hash: - my $rseqno_non_indenting_brace_by_ix = - $self->[_rseqno_non_indenting_brace_by_ix_]; - return unless ( %{$rseqno_non_indenting_brace_by_ix} ); + #---------------------------------------------------------- + # BEGIN patch for issue b1408: If this line ends in an + # opening token, look for the closing token and comma at + # the end of the next line. If so, combine the two lines to + # get the correct sums. This problem seems to require -xlp + # -vtc=2 and blank lines to occur. Use %is_opening_type to + # fix b1431. + #---------------------------------------------------------- + if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] } + && !$has_comment ) + { + my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_]; + my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_]; - my $rLL = $self->[_rLL_]; - my $rlines = $self->[_rlines_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_]; - my $radjusted_levels = $self->[_radjusted_levels_]; + # We are looking for a short broken remnant on the next + # line; something like the third line here (b1408): - # First locate all of the marked blocks - my @K_stack; - foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) { - my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix}; - my $KK = $K_opening_container->{$seqno}; - my $line_of_tokens = $rlines->[$ix]; - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - $rspecial_side_comment_type->{$Klast} = 'NIB'; - push @K_stack, [ $KK, 1 ]; - my $Kc = $K_closing_container->{$seqno}; - push @K_stack, [ $Kc, -1 ] if ( defined($Kc) ); - } - return unless (@K_stack); - @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack; + # parent => + # Moose::Util::TypeConstraints::find_type_constraint( + # 'RefXX' ), + # or this + # + # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user( + # $story_set_all_chores), + # or this (b1431): + # $issue->{ + # 'borrowernumber'}, # borrowernumber + if ( defined($Kc_test) + && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_] + && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 ) + { + my $line_of_tokens_next = $rlines->[ $iline + 1 ]; + my $rtype_count = + $rtype_count_by_seqno->{$seqno_end}; + my ( $K_first_next, $K_terminal_next ) = + @{ $line_of_tokens_next->{_rK_range} }; + + # backup at a side comment + if ( defined($K_terminal_next) + && $rLL->[$K_terminal_next]->[_TYPE_] eq '#' ) + { + my $Kprev = + $self->K_previous_nonblank($K_terminal_next); + if ( defined($Kprev) + && $Kprev >= $K_first_next ) + { + $K_terminal_next = $Kprev; + } + } - # Then loop to remove indentation within marked blocks - my $KK_last = 0; - my $ndeep = 0; - foreach my $item (@K_stack) { - my ( $KK, $inc ) = @{$item}; - if ( $ndeep > 0 ) { + if ( + defined($K_terminal_next) - foreach ( $KK_last + 1 .. $KK ) { - $radjusted_levels->[$_] -= $ndeep; - } + # next line ends with a comma + && $rLL->[$K_terminal_next]->[_TYPE_] eq ',' - # We just subtracted the old $ndeep value, which only applies to a - # '{'. The new $ndeep applies to a '}', so we undo the error. - if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 } - } + # which follows the closing container token + && ( + $K_terminal_next - $Kc_test == 1 + || ( $K_terminal_next - $Kc_test == 2 + && $rLL->[ $K_terminal_next - 1 ] + ->[_TYPE_] eq 'b' ) + ) - $ndeep += $inc; - $KK_last = $KK; - } - return; -} ## end sub do_non_indenting_braces + # no commas in the container + && ( !defined($rtype_count) + || !$rtype_count->{','} ) -sub whitespace_cycle_adjustment { + # for now, restrict this to a container with + # just 1 or two tokens + && $K_terminal_next - $K_terminal <= 5 - my $self = shift; + ) + { - # Called once per file to implement the --whitespace-cycle option - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); - my $radjusted_levels = $self->[_radjusted_levels_]; - my $maximum_level = $self->[_maximum_level_]; + # combine the next line with the current line + $K_terminal = $K_terminal_next; + $skip_next_line = 1; + if (DEBUG_COLLAPSED_LENGTHS) { + print "Combining lines at line $iline\n"; + } + } + } + } - if ( $rOpts_whitespace_cycle - && $rOpts_whitespace_cycle > 0 - && $rOpts_whitespace_cycle < $maximum_level ) - { + #-------------------------- + # END patch for issue b1408 + #-------------------------- + if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) { - my $Kmax = @{$rLL} - 1; + my $length = + $self->cumulative_length_to_comma( $K_first, + $K_terminal, $K_c ); - my $whitespace_last_level = -1; - my @whitespace_level_stack = (); - my $last_nonblank_type = 'b'; - my $last_nonblank_token = EMPTY_STRING; - foreach my $KK ( 0 .. $Kmax ) { - my $level_abs = $radjusted_levels->[$KK]; - my $level = $level_abs; - if ( $level_abs < $whitespace_last_level ) { - pop(@whitespace_level_stack); - } - if ( !@whitespace_level_stack ) { - push @whitespace_level_stack, $level_abs; + # Fix for b1331: at a broken => item, include the + # length of the previous half of the item plus one for + # the missing space + if ( $last_nonblank_type eq '=>' ) { + $length += $len + 1; + } + if ( $length > $max_prong_len ) { + $max_prong_len = $length; + } + } + } } - elsif ( $level_abs > $whitespace_last_level ) { - $level = $whitespace_level_stack[-1] + - ( $level_abs - $whitespace_last_level ); - - if ( - # 1 Try to break at a block brace - ( - $level > $rOpts_whitespace_cycle - && $last_nonblank_type eq '{' - && $last_nonblank_token eq '{' - ) - # 2 Then either a brace or bracket - || ( $level > $rOpts_whitespace_cycle + 1 - && $last_nonblank_token =~ /^[\{\[]$/ ) + #---------------------------------- + # Loop over all tokens on this line + #---------------------------------- + $self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop, + $K_terminal, $K_last ); - # 3 Then a paren too - || $level > $rOpts_whitespace_cycle + 2 - ) - { - $level = 1; + # Now take care of any side comment; + if ($has_comment) { + if ($rOpts_ignore_side_comment_lengths) { + $len = 0; + } + else { + + # For a side comment when -iscl is not set, measure length from + # the start of the previous nonblank token + my $len0 = + $K_terminal > 0 + ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_] + : 0; + $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0; + if ( $len > $max_prong_len ) { $max_prong_len = $len } } - push @whitespace_level_stack, $level; } - $level = $whitespace_level_stack[-1]; - $radjusted_levels->[$KK] = $level; - $whitespace_last_level = $level_abs; - my $type = $rLL->[$KK]->[_TYPE_]; - my $token = $rLL->[$KK]->[_TOKEN_]; - if ( $type ne 'b' ) { - $last_nonblank_type = $type; - $last_nonblank_token = $token; + } ## end loop over lines + + if (DEBUG_COLLAPSED_LENGTHS) { + print "\nCollapsed lengths--\n"; + foreach + my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} ) + { + my $clen = $rcollapsed_length_by_seqno->{$key}; + print "$key -> $clen\n"; } } - } - return; -} ## end sub whitespace_cycle_adjustment -use constant DEBUG_BBX => 0; + return; + } ## end sub xlp_collapsed_lengths -sub break_before_list_opening_containers { + sub xlp_collapse_lengths_inner_loop { - my ($self) = @_; + my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_; - # This routine is called once per batch to implement parameters - # --break-before-hash-brace=n and similar -bbx=n flags - # and their associated indentation flags: - # --break-before-hash-brace-and-indent and similar -bbxi=n + my $rLL = $self->[_rLL_]; + my $K_closing_container = $self->[_K_closing_container_]; - # Nothing to do if none of the -bbx=n parameters has been set - return unless %break_before_container_types; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; + my $ris_permanently_broken = $self->[_ris_permanently_broken_]; + my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + my $rhas_broken_list = $self->[_rhas_broken_list_]; + my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); + #---------------------------------- + # Loop over tokens on this line ... + #---------------------------------- + foreach my $KK ( $K_begin_loop .. $K_terminal ) { - # Loop over all opening container tokens - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my $ris_permanently_broken = $self->[_ris_permanently_broken_]; - my $rhas_list = $self->[_rhas_list_]; - my $rhas_broken_list = $self->[_rhas_broken_list_]; - my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; - my $radjusted_levels = $self->[_radjusted_levels_]; - my $rparent_of_seqno = $self->[_rparent_of_seqno_]; - my $rlines = $self->[_rlines_]; - my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; - my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; - my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_]; - my $rK_weld_right = $self->[_rK_weld_right_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + my $type = $rLL->[$KK]->[_TYPE_]; + next if ( $type eq 'b' ); - my $length_tol = - max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns ); - if ($rOpts_ignore_old_breakpoints) { + #------------------------ + # Handle sequenced tokens + #------------------------ + my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; + if ($seqno) { - # Patch suggested by b1231; the old tol was excessive. - ## $length_tol += $rOpts_maximum_line_length; - $length_tol *= 2; - } + my $token = $rLL->[$KK]->[_TOKEN_]; - my $rbreak_before_container_by_seqno = {}; - my $rwant_reduced_ci = {}; - foreach my $seqno ( keys %{$K_opening_container} ) { + #---------------------------- + # Entering a new container... + #---------------------------- + if ( $is_opening_token{$token} + && defined( $K_closing_container->{$seqno} ) ) + { - #---------------------------------------------------------------- - # Part 1: Examine any -bbx=n flags - #---------------------------------------------------------------- + # save current prong length + $stack[-1]->[_max_prong_len_] = $max_prong_len; + $max_prong_len = 0; - next if ( $rblock_type_of_seqno->{$seqno} ); - my $KK = $K_opening_container->{$seqno}; + # Start new prong one level deeper + my $handle_len = 0; + if ( $rblock_type_of_seqno->{$seqno} ) { - # This must be a list or contain a list. - # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024. - # Note2: 'has_list' holds the depth to the sub-list. We will require - # a depth of just 1 - my $is_list = $self->is_list_by_seqno($seqno); - my $has_list = $rhas_list->{$seqno}; + # code blocks do not use -lp indentation, but behave as + # if they had a handle of one indentation length + $handle_len = $rOpts_indent_columns; - # Fix for b1173: if welded opening container, use flag of innermost - # seqno. Otherwise, the restriction $has_list==1 prevents triple and - # higher welds from following the -BBX parameters. - if ($total_weld_count) { - my $KK_test = $rK_weld_right->{$KK}; - if ( defined($KK_test) ) { - my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_]; - $is_list ||= $self->is_list_by_seqno($seqno_inner); - $has_list = $rhas_list->{$seqno_inner}; - } - } + } + elsif ( $is_handle_type{$last_nonblank_type} ) { + $handle_len = $len; + $handle_len += 1 + if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' ); + } - next unless ( $is_list || $has_list && $has_list == 1 ); + # Set a flag if the 'Interrupted List Rule' will be applied + # (see sub copy_old_breakpoints). + # - Added check on has_broken_list to fix issue b1298 - my $has_broken_list = $rhas_broken_list->{$seqno}; - my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno}; + my $interrupted_list_rule = + $ris_permanently_broken->{$seqno} + && $ris_list_by_seqno->{$seqno} + && !$rhas_broken_list->{$seqno} + && !$rOpts_ignore_old_breakpoints; - # Only for types of container tokens with a non-default break option - my $token = $rLL->[$KK]->[_TOKEN_]; - my $break_option = $break_before_container_types{$token}; - next unless ($break_option); + # NOTES: Since we are looking at old line numbers we have + # to be very careful not to introduce an instability. - # Do not use -bbx under stress for stability ... fixes b1300 - my $level = $rLL->[$KK]->[_LEVEL_]; - if ( $level >= $stress_level_beta ) { - DEBUG_BBX - && print -"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n"; - next; - } + # This following causes instability (b1288-b1296): + # $interrupted_list_rule ||= + # $rOpts_break_at_old_comma_breakpoints; - # Require previous nonblank to be '=' or '=>' - my $Kprev = $KK - 1; - next if ( $Kprev < 0 ); - my $prev_type = $rLL->[$Kprev]->[_TYPE_]; - if ( $prev_type eq 'b' ) { - $Kprev--; - next if ( $Kprev < 0 ); - $prev_type = $rLL->[$Kprev]->[_TYPE_]; - } - next unless ( $is_equal_or_fat_comma{$prev_type} ); + # - We could turn off the interrupted list rule if there is + # a broken sublist, to follow 'Compound List Rule 1'. + # - We could use the _rhas_broken_list_ flag for this. + # - But it seems safer not to do this, to avoid + # instability, since the broken sublist could be + # temporary. It seems better to let the formatting + # stabilize by itself after one or two iterations. + # - So, not doing this for now - my $ci = $rLL->[$KK]->[_CI_LEVEL_]; + # Turn off the interrupted list rule if -vmll is set and a + # list has '=>' characters. This avoids instabilities due + # to dependence on old line breaks; issue b1325. + if ( $interrupted_list_rule + && $rOpts_variable_maximum_line_length ) + { + my $rtype_count = $rtype_count_by_seqno->{$seqno}; + if ( $rtype_count && $rtype_count->{'=>'} ) { + $interrupted_list_rule = 0; + } + } - #-------------------------------------------- - # New coding for option 2 (break if complex). - #-------------------------------------------- - # This new coding uses clues which are invariant under formatting to - # decide if a list is complex. For now it is only applied when -lp - # and -vmll are used, but eventually it may become the standard method. - # Fixes b1274, b1275, and others, including b1099. - if ( $break_option == 2 ) { + my $K_c = $K_closing_container->{$seqno}; - if ( $rOpts_line_up_parentheses - || $rOpts_variable_maximum_line_length ) - { + # Add length of any terminal list item if interrupted + # so that the result is the same as if the term is + # in the next line (b1446). - # Start with the basic definition of a complex list... - my $is_complex = $is_list && $has_list; + if ( + $interrupted_list_rule + && $KK < $K_terminal - # and it is also complex if the parent is a list - if ( !$is_complex ) { - my $parent = $rparent_of_seqno->{$seqno}; - if ( $self->is_list_by_seqno($parent) ) { - $is_complex = 1; + # The line should end in a comma + # NOTE: this currently assumes break after comma. + # As long as the other call to cumulative_length.. + # makes the same assumption we should remain stable. + && $rLL->[$K_terminal]->[_TYPE_] eq ',' + + ) + { + $max_prong_len = + $self->cumulative_length_to_comma( $KK + 1, + $K_terminal, $K_c ); } + + push @stack, [ + + $max_prong_len, + $handle_len, + $seqno, + $iline, + $KK, + $K_c, + $interrupted_list_rule + ]; + } - # finally, we will call it complex if there are inner opening - # and closing container tokens, not parens, within the outer - # container tokens. - if ( !$is_complex ) { - my $Kp = $self->K_next_nonblank($KK); - my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b'; - if ( $is_opening_token{$token_p} && $token_p ne '(' ) { + #-------------------- + # Exiting a container + #-------------------- + elsif ( $is_closing_token{$token} && @stack ) { + + # The current prong ends - get its handle + my $item = pop @stack; + my $handle_len = $item->[_handle_len_]; + my $seqno_o = $item->[_seqno_o_]; + my $iline_o = $item->[_iline_o_]; + my $K_o = $item->[_K_o_]; + my $K_c_expect = $item->[_K_c_]; + my $collapsed_len = $max_prong_len; + + if ( $seqno_o ne $seqno ) { + + # This can happen if input file has brace errors. + # Otherwise it shouldn't happen. Not fatal but -lp + # formatting could get messed up. + if ( DEVEL_MODE && !get_saw_brace_error() ) { + Fault(<{$seqno}; - my $Km = $self->K_previous_nonblank($Kc); - my $token_m = - defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b'; + #------------------------------------------ + # Rules to avoid scrunching code blocks ... + #------------------------------------------ + # Some test cases: + # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119 + my $block_type = $rblock_type_of_seqno->{$seqno}; + if ($block_type) { + + my $K_c = $KK; + my $block_length = MIN_BLOCK_LEN; + my $is_one_line_block; + my $level = $rLL->[$K_o]->[_LEVEL_]; + if ( defined($K_o) && defined($K_c) ) { + + # note: fixed 3 May 2022 (removed 'my') + $block_length = + $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] - + $rLL->[$K_o]->[_CUMULATIVE_LENGTH_]; + $is_one_line_block = $iline == $iline_o; + } - # ignore any optional ending comma - if ( $token_m eq ',' ) { - $Km = $self->K_previous_nonblank($Km); - $token_m = - defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b'; + # Code block rule 1: Use the total block length if + # it is less than the minimum. + if ( $block_length < MIN_BLOCK_LEN ) { + $collapsed_len = $block_length; + } + + # Code block rule 2: Use the full length of a + # one-line block to avoid breaking it, unless + # extremely long. We do not need to do a precise + # check here, because if it breaks then it will + # stay broken on later iterations. + elsif ( + $is_one_line_block + && $block_length < + $maximum_line_length_at_level[$level] + + # But skip this for blocks types which can reform, + # like sort/map/grep/eval blocks, to avoid + # instability (b1345, b1428) + && $self->is_fragile_block_type( $block_type, + $seqno ) + ) + { + $collapsed_len = $block_length; } - $is_complex ||= - $is_closing_token{$token_m} && $token_m ne ')'; - } - } + # Code block rule 3: Otherwise the length should be + # at least MIN_BLOCK_LEN to avoid scrunching code + # blocks. + elsif ( $collapsed_len < MIN_BLOCK_LEN ) { + $collapsed_len = MIN_BLOCK_LEN; + } + } + + # Store the result. Some extra space, '2', allows for + # length of an opening token, inside space, comma, ... + # This constant has been tuned to give good overall + # results. + $collapsed_len += 2; + $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len; + + # Restart scanning the lower level prong + if (@stack) { + $max_prong_len = $stack[-1]->[_max_prong_len_]; + $collapsed_len += $handle_len; + if ( $collapsed_len > $max_prong_len ) { + $max_prong_len = $collapsed_len; + } + } + } + + # it is a ternary - no special processing for these yet + else { + + } + + $len = 0; + $last_nonblank_type = $type; + next; + } + + #---------------------------- + # Handle non-container tokens + #---------------------------- + my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_]; + + # Count lengths of things like 'xx => yy' as a single item + if ( $type eq '=>' ) { + $len += $token_length + 1; + if ( $len > $max_prong_len ) { $max_prong_len = $len } + } + elsif ( $last_nonblank_type eq '=>' ) { + $len += $token_length; + if ( $len > $max_prong_len ) { $max_prong_len = $len } - # Convert to option 3 (always break) if complex - next unless ($is_complex); - $break_option = 3; + # but only include one => per item + $len = $token_length; } - } - # Fix for b1231: the has_list_with_lec does not cover all cases. - # A broken container containing a list and with line-ending commas - # will stay broken, so can be treated as if it had a list with lec. - $has_list_with_lec ||= - $has_list - && $ris_broken_container->{$seqno} - && $rlec_count_by_seqno->{$seqno}; + # include everything to end of line after a here target + elsif ( $type eq 'h' ) { + $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - + $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + if ( $len > $max_prong_len ) { $max_prong_len = $len } + } - DEBUG_BBX - && print STDOUT -"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n"; + # for everything else just use the token length + else { + $len = $token_length; + if ( $len > $max_prong_len ) { $max_prong_len = $len } + } + $last_nonblank_type = $type; - # -bbx=1 = stable, try to follow input - if ( $break_option == 1 ) { + } ## end loop over tokens on this line - my $iline = $rLL->[$KK]->[_LINE_INDEX_]; - my $rK_range = $rlines->[$iline]->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless ( $KK == $Kfirst ); - } + return; - # -bbx=2 => apply this style only for a 'complex' list - elsif ( $break_option == 2 ) { + } ## end sub xlp_collapse_lengths_inner_loop - # break if this list contains a broken list with line-ending comma - my $ok_to_break; - my $Msg = EMPTY_STRING; - if ($has_list_with_lec) { - $ok_to_break = 1; - DEBUG_BBX && do { $Msg = "has list with lec;" }; - } +} ## end closure xlp_collapsed_lengths - if ( !$ok_to_break ) { +sub is_excluded_lp { - # Turn off -xci if -bbx=2 and this container has a sublist but - # not a broken sublist. This avoids creating blinkers. The - # problem is that -xci can cause one-line lists to break open, - # and thereby creating formatting instability. - # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044 - # b1045 b1046 b1047 b1051 b1052 b1061. - if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 } + # Decide if this container is excluded by user request: + # returns true if this token is excluded (i.e., may not use -lp) + # returns false otherwise - my $parent = $rparent_of_seqno->{$seqno}; - if ( $self->is_list_by_seqno($parent) ) { - DEBUG_BBX && do { $Msg = "parent is list" }; - $ok_to_break = 1; - } - } + # The control hash can either describe: + # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or + # what to include: $line_up_parentheses_control_is_lxpl = 0 - if ( !$ok_to_break ) { - DEBUG_BBX - && print STDOUT "Not breaking at seqno=$seqno: $Msg\n"; - next; - } + # Input parameter: + # $KK = index of the container opening token - DEBUG_BBX - && print STDOUT "OK to break at seqno=$seqno: $Msg\n"; + my ( $self, $KK ) = @_; + my $rLL = $self->[_rLL_]; + my $rtoken_vars = $rLL->[$KK]; + my $token = $rtoken_vars->[_TOKEN_]; + my $rflags = $line_up_parentheses_control_hash{$token}; - # Patch: turn off -xci if -bbx=2 and -lp - # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122 - $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses); - } + #----------------------------------------------- + # TEST #1: check match to listed container types + #----------------------------------------------- + if ( !defined($rflags) ) { - # -bbx=3 = always break - elsif ( $break_option == 3 ) { + # There is no entry for this container, so we are done + return !$line_up_parentheses_control_is_lxpl; + } - # ok to break - } + my ( $flag1, $flag2 ) = @{$rflags}; - # Shouldn't happen! Bad flag, but make behavior same as 3 - else { - # ok to break - } + #----------------------------------------------------------- + # TEST #2: check match to flag1, the preceding nonblank word + #----------------------------------------------------------- + my $match_flag1 = !defined($flag1) || $flag1 eq '*'; + if ( !$match_flag1 ) { - # Set a flag for actual implementation later in - # sub insert_breaks_before_list_opening_containers - $rbreak_before_container_by_seqno->{$seqno} = 1; - DEBUG_BBX - && print STDOUT "BBX: ok to break at seqno=$seqno\n"; + # Find the previous token + my ( $is_f, $is_k, $is_w ); + my $Kp = $self->K_previous_nonblank($KK); + if ( defined($Kp) ) { + my $type_p = $rLL->[$Kp]->[_TYPE_]; + my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; - # -bbxi=0: Nothing more to do if the ci value remains unchanged - my $ci_flag = $container_indentation_options{$token}; - next unless ($ci_flag); + # keyword? + $is_k = $type_p eq 'k'; - # -bbxi=1: This option removes ci and is handled in - # later sub final_indentation_adjustment - if ( $ci_flag == 1 ) { - $rwant_reduced_ci->{$seqno} = 1; - next; - } + # function call? + $is_f = $self->[_ris_function_call_paren_]->{$seqno}; - # -bbxi=2: This option changes the level ... - # This option can conflict with -xci in some cases. We can turn off - # -xci for this container to avoid blinking. For now, only do this if - # -vmll is set. ( fixes b1335, b1336 ) - if ($rOpts_variable_maximum_line_length) { - $rno_xci_by_seqno->{$seqno} = 1; + # either keyword or function call? + $is_w = $is_k || $is_f; } - #---------------------------------------------------------------- - # Part 2: Perform tests before committing to changing ci and level - #---------------------------------------------------------------- + # Check for match based on flag1 and the previous token: + if ( $flag1 eq 'k' ) { $match_flag1 = $is_k } + elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k } + elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f } + elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f } + elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w } + elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w } + ## else { no match found } + } - # Before changing the ci level of the opening container, we need - # to be sure that the container will be broken in the later stages of - # formatting. We have to do this because we are working early in the - # formatting pipeline. A problem can occur if we change the ci or - # level of the opening token but do not actually break the container - # open as expected. In most cases it wouldn't make any difference if - # we changed ci or not, but there are some edge cases where this - # can cause blinking states, so we need to try to only change ci if - # the container will really be broken. + # See if we can exclude this based on the flag1 test... + if ($line_up_parentheses_control_is_lxpl) { + return 1 if ($match_flag1); + } + else { + return 1 if ( !$match_flag1 ); + } - # Only consider containers already broken - next if ( !$ris_broken_container->{$seqno} ); + #------------------------------------------------------------- + # TEST #3: exclusion based on flag2 and the container contents + #------------------------------------------------------------- - # Patch to fix issue b1305: the combination of -naws and ci>i appears - # to cause an instability. It should almost never occur in practice. - next - if (!$rOpts_add_whitespace - && $rOpts_continuation_indentation > $rOpts_indent_columns ); + # Note that this is an exclusion test for both -lpxl or -lpil input methods + # The options are: + # 0 or blank: ignore container contents + # 1 exclude non-lists or lists with sublists + # 2 same as 1 but also exclude lists with code blocks - # Always ok to change ci for permanently broken containers - if ( $ris_permanently_broken->{$seqno} ) { - goto OK; + my $match_flag2; + if ($flag2) { + + my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + + my $is_list = $self->[_ris_list_by_seqno_]->{$seqno}; + my $has_list = $self->[_rhas_list_]->{$seqno}; + my $has_code_block = $self->[_rhas_code_block_]->{$seqno}; + my $has_ternary = $self->[_rhas_ternary_]->{$seqno}; + + if ( !$is_list + || $has_list + || $flag2 eq '2' && ( $has_code_block || $has_ternary ) ) + { + $match_flag2 = 1; } + } + return $match_flag2; +} ## end sub is_excluded_lp - # Always OK if this list contains a broken sub-container with - # a non-terminal line-ending comma - if ($has_list_with_lec) { goto OK } +sub set_excluded_lp_containers { - # From here on we are considering a single container... + my ($self) = @_; + return unless ($rOpts_line_up_parentheses); + my $rLL = $self->[_rLL_]; + return unless ( defined($rLL) && @{$rLL} ); - # A single container must have at least 1 line-ending comma: - next unless ( $rlec_count_by_seqno->{$seqno} ); + my $K_opening_container = $self->[_K_opening_container_]; + my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - # Since it has a line-ending comma, it will stay broken if the -boc - # flag is set - if ($rOpts_break_at_old_comma_breakpoints) { goto OK } + foreach my $seqno ( keys %{$K_opening_container} ) { - # OK if the container contains multiple fat commas - # Better: multiple lines with fat commas - if ( !$rOpts_ignore_old_breakpoints ) { - my $rtype_count = $rtype_count_by_seqno->{$seqno}; - next unless ($rtype_count); - my $fat_comma_count = $rtype_count->{'=>'}; - DEBUG_BBX - && print STDOUT "BBX: fat comma count=$fat_comma_count\n"; - if ( $fat_comma_count && $fat_comma_count >= 2 ) { goto OK } - } - - # The last check we can make is to see if this container could fit on a - # single line. Use the least possible indentation estimate, ci=0, - # so we are not subtracting $ci * $rOpts_continuation_indentation from - # tabulated $maximum_text_length value. - my $maximum_text_length = $maximum_text_length_at_level[$level]; - my $K_closing = $K_closing_container->{$seqno}; - my $length = $self->cumulative_length_before_K($K_closing) - - $self->cumulative_length_before_K($KK); - my $excess_length = $length - $maximum_text_length; - DEBUG_BBX - && print STDOUT -"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n"; + # code blocks are always excluded by the -lp coding so we can skip them + next if ( $rblock_type_of_seqno->{$seqno} ); - # OK if the net container definitely breaks on length - if ( $excess_length > $length_tol ) { - DEBUG_BBX - && print STDOUT "BBX: excess_length=$excess_length\n"; - goto OK; - } + my $KK = $K_opening_container->{$seqno}; + next unless defined($KK); - # Otherwise skip it - next; + # see if a user exclusion rule turns off -lp for this container + if ( $self->is_excluded_lp($KK) ) { + $ris_excluded_lp_container->{$seqno} = 1; + } + } + return; +} ## end sub set_excluded_lp_containers - ################################################################# - # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag - ################################################################# +###################################### +# CODE SECTION 6: Process line-by-line +###################################### - OK: +sub process_all_lines { - DEBUG_BBX && print STDOUT "BBX: OK to break\n"; + #---------------------------------------------------------- + # Main loop to format all lines of a file according to type + #---------------------------------------------------------- - # -bbhbi=n - # -bbsbi=n - # -bbpi=n + my $self = shift; + my $rlines = $self->[_rlines_]; + my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; + my $file_writer_object = $self->[_file_writer_object_]; + my $logger_object = $self->[_logger_object_]; + my $vertical_aligner_object = $self->[_vertical_aligner_object_]; + my $save_logfile = $self->[_save_logfile_]; - # where: + # Flag to prevent blank lines when POD occurs in a format skipping sect. + my $in_format_skipping_section; - # n=0 default indentation (usually one ci) - # n=1 outdent one ci - # n=2 indent one level (minus one ci) - # n=3 indent one extra ci [This may be dropped] + # set locations for blanks around long runs of keywords + my $rwant_blank_line_after = $self->keyword_group_scan(); - # NOTE: We are adjusting indentation of the opening container. The - # closing container will normally follow the indentation of the opening - # container automatically, so this is not currently done. - next unless ($ci); + my $line_type = EMPTY_STRING; + my $i_last_POD_END = -10; + my $i = -1; + foreach my $line_of_tokens ( @{$rlines} ) { - # option 1: outdent - if ( $ci_flag == 1 ) { - $ci -= 1; + # insert blank lines requested for keyword sequences + if ( defined( $rwant_blank_line_after->{$i} ) + && $rwant_blank_line_after->{$i} == 1 ) + { + $self->want_blank_line(); } - # option 2: indent one level - elsif ( $ci_flag == 2 ) { - $ci -= 1; - $radjusted_levels->[$KK] += 1; - } + $i++; - # unknown option - else { - # Shouldn't happen - leave ci unchanged - } + my $last_line_type = $line_type; + $line_type = $line_of_tokens->{_line_type}; + my $input_line = $line_of_tokens->{_line_text}; - $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 ); - } + # _line_type codes are: + # SYSTEM - system-specific code before hash-bang line + # CODE - line of perl code (including comments) + # POD_START - line starting pod, such as '=head' + # POD - pod documentation text + # POD_END - last line of pod section, '=cut' + # HERE - text of here-document + # HERE_END - last line of here-doc (target word) + # FORMAT - format section + # FORMAT_END - last line of format section, '.' + # SKIP - code skipping section + # SKIP_END - last line of code skipping section, '#>>V' + # DATA_START - __DATA__ line + # DATA - unidentified text following __DATA__ + # END_START - __END__ line + # END - unidentified text following __END__ + # ERROR - we are in big trouble, probably not a perl script - $self->[_rbreak_before_container_by_seqno_] = - $rbreak_before_container_by_seqno; - $self->[_rwant_reduced_ci_] = $rwant_reduced_ci; - return; -} ## end sub break_before_list_opening_containers + # put a blank line after an =cut which comes before __END__ and __DATA__ + # (required by podchecker) + if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) { + $i_last_POD_END = $i; + $file_writer_object->reset_consecutive_blank_lines(); + if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) { + $self->want_blank_line(); + } + } -use constant DEBUG_XCI => 0; + # handle line of code.. + if ( $line_type eq 'CODE' ) { -sub extended_ci { + my $CODE_type = $line_of_tokens->{_code_type}; + $in_format_skipping_section = $CODE_type eq 'FS'; - # This routine implements the -xci (--extended-continuation-indentation) - # flag. We add CI to interior tokens of a container which itself has CI but - # only if a token does not already have CI. + # Handle blank lines + if ( $CODE_type eq 'BL' ) { - # To do this, we will locate opening tokens which themselves have - # continuation indentation (CI). We track them with their sequence - # numbers. These sequence numbers are called 'controlling sequence - # numbers'. They apply continuation indentation to the tokens that they - # contain. These inner tokens remember their controlling sequence numbers. - # Later, when these inner tokens are output, they have to see if the output - # lines with their controlling tokens were output with CI or not. If not, - # then they must remove their CI too. + # Keep this blank? Start with the flag -kbl=n, where + # n=0 ignore all old blank lines + # n=1 stable: keep old blanks, but limited by -mbl=n + # n=2 keep all old blank lines, regardless of -mbl=n + # If n=0 we delete all old blank lines and let blank line + # rules generate any needed blank lines. + my $kgb_keep = $rOpts_keep_old_blank_lines; - # The controlling CI concept works hierarchically. But CI itself is not - # hierarchical; it is either on or off. There are some rare instances where - # it would be best to have hierarchical CI too, but not enough to be worth - # the programming effort. + # Then delete lines requested by the keyword-group logic if + # allowed + if ( $kgb_keep == 1 + && defined( $rwant_blank_line_after->{$i} ) + && $rwant_blank_line_after->{$i} == 2 ) + { + $kgb_keep = 0; + } - # The operations to remove unwanted CI are done in sub 'undo_ci'. + # But always keep a blank line following an =cut + if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) { + $kgb_keep = 1; + } - my ($self) = @_; + if ($kgb_keep) { + $self->flush($CODE_type); + $file_writer_object->write_blank_code_line( + $rOpts_keep_old_blank_lines == 2 ); + $self->[_last_line_leading_type_] = 'b'; + } + next; + } + else { - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); + # Let logger see all non-blank lines of code. This is a slow + # operation so we avoid it if it is not going to be saved. + if ( $save_logfile && $logger_object ) { + $logger_object->black_box( $line_of_tokens, + $vertical_aligner_object->get_output_line_number ); + } + } - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_]; - my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; - my $rlines = $self->[_rlines_]; - my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_]; - my $ris_bli_container = $self->[_ris_bli_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + # Handle Format Skipping (FS) and Verbatim (VB) Lines + if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) { + $self->write_unindented_line("$input_line"); + $file_writer_object->reset_consecutive_blank_lines(); + next; + } - my %available_space; + # Handle all other lines of code + $self->process_line_of_CODE($line_of_tokens); + } - # Loop over all opening container tokens - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my @seqno_stack; - my $seqno_top; - my $KLAST; - my $KNEXT = $self->[_K_first_seq_item_]; + # handle line of non-code.. + else { - # The following variable can be used to allow a little extra space to - # avoid blinkers. A value $len_tol = 20 fixed the following - # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031. - # It turned out that the real problem was mis-parsing a list brace as - # a code block in a 'use' statement when the line length was extremely - # small. A value of 0 works now, but a slightly larger value can - # be used to minimize the chance of a blinker. - my $len_tol = 0; + # set special flags + my $skip_line = 0; + if ( substr( $line_type, 0, 3 ) eq 'POD' ) { - while ( defined($KNEXT) ) { + # Pod docs should have a preceding blank line. But stay + # out of __END__ and __DATA__ sections, because + # the user may be using this section for any purpose whatsoever + if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } + if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } + if ( !$skip_line + && !$in_format_skipping_section + && $line_type eq 'POD_START' + && !$self->[_saw_END_or_DATA_] ) + { + $self->want_blank_line(); + } + } - # Fix all tokens up to the next sequence item if we are changing CI - if ($seqno_top) { + # leave the blank counters in a predictable state + # after __END__ or __DATA__ + elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) { + $file_writer_object->reset_consecutive_blank_lines(); + $self->[_saw_END_or_DATA_] = 1; + } - my $is_list = $ris_list_by_seqno->{$seqno_top}; - my $space = $available_space{$seqno_top}; - my $length = $rLL->[$KLAST]->[_CUMULATIVE_LENGTH_]; - my $count = 0; - foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) { + # Patch to avoid losing blank lines after a code-skipping block; + # fixes case c047. + elsif ( $line_type eq 'SKIP_END' ) { + $file_writer_object->reset_consecutive_blank_lines(); + } - # But do not include tokens which might exceed the line length - # and are not in a list. - # ... This fixes case b1031 - my $length_before = $length; - $length = $rLL->[$Kt]->[_CUMULATIVE_LENGTH_]; - if ( - !$rLL->[$Kt]->[_CI_LEVEL_] - && ( $is_list - || $length - $length_before < $space - || $rLL->[$Kt]->[_TYPE_] eq '#' ) - ) - { - $rLL->[$Kt]->[_CI_LEVEL_] = 1; - $rseqno_controlling_my_ci->{$Kt} = $seqno_top; - $count++; - } + # write unindented non-code line + if ( !$skip_line ) { + $self->write_unindented_line($input_line); } - $ris_seqno_controlling_ci->{$seqno_top} += $count; } + } + return; - $KLAST = $KNEXT; - my $KK = $KNEXT; - $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; +} ## end sub process_all_lines - my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - my $K_opening = $K_opening_container->{$seqno}; +{ ## closure keyword_group_scan - # see if we have reached the end of the current controlling container - if ( $seqno_top && $seqno == $seqno_top ) { - $seqno_top = pop @seqno_stack; - } + # this is the return var + my $rhash_of_desires; - # Patch to fix some block types... - # Certain block types arrive from the tokenizer without CI but should - # have it for this option. These include anonymous subs and - # do sort map grep eval - my $block_type = $rblock_type_of_seqno->{$seqno}; - if ( $block_type && $is_block_with_ci{$block_type} ) { - $rLL->[$KK]->[_CI_LEVEL_] = 1; - if ($seqno_top) { - $rseqno_controlling_my_ci->{$KK} = $seqno_top; - $ris_seqno_controlling_ci->{$seqno_top}++; - } - } + # user option variables for -kgb + my ( - # If this does not have ci, update ci if necessary and continue looking - if ( !$rLL->[$KK]->[_CI_LEVEL_] ) { - if ($seqno_top) { - $rLL->[$KK]->[_CI_LEVEL_] = 1; - $rseqno_controlling_my_ci->{$KK} = $seqno_top; - $ris_seqno_controlling_ci->{$seqno_top}++; - } - next; - } + $rOpts_kgb_after, + $rOpts_kgb_before, + $rOpts_kgb_delete, + $rOpts_kgb_inside, + $rOpts_kgb_size_max, + $rOpts_kgb_size_min, + + ); + + # group variables, initialized by kgb_initialize_group_vars + my ( $ibeg, $iend, $count, $level_beg, $K_closing ); + my ( @iblanks, @group, @subgroup ); + + # line variables, updated by sub keyword_group_scan + my ( $line_type, $CODE_type, $K_first, $K_last ); + my $number_of_groups_seen; + + #------------------------ + # -kgb helper subroutines + #------------------------ + + sub kgb_initialize_options { + + # check and initialize user options for -kgb + # return error flag: + # true for some input error, do not continue + # false if ok + + # Local copies of the various control parameters + $rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba' + $rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb' + $rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd' + $rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi' + + # A range of sizes can be input with decimal notation like 'min.max' + # with any number of dots between the two numbers. Examples: + # string => min max matches + # 1.1 1 1 exactly 1 + # 1.3 1 3 1,2, or 3 + # 1..3 1 3 1,2, or 3 + # 5 5 - 5 or more + # 6. 6 - 6 or more + # .2 - 2 up to 2 + # 1.0 1 0 nothing + my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs' + ( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/, + $rOpts_kgb_size; + if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/ + || $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ ) + { + Warn(<{$seqno} ) { - next; + # Turn this option off so that this message does not keep repeating + # during iterations and other files. + $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING; + return $rhash_of_desires; } + $rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min); - # Skip if this is a -bli container (this fixes case b1065) Note: case - # b1065 is also fixed by the update for b1055, so this update is not - # essential now. But there does not seem to be a good reason to add - # xci and bli together, so the update is retained. - if ( $ris_bli_container->{$seqno} ) { - next; + if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min ) + { + return $rhash_of_desires; } - # We are looking for opening container tokens with ci - next unless ( defined($K_opening) && $KK == $K_opening ); + # check codes for $rOpts_kgb_before and + # $rOpts_kgb_after: + # 0 = never (delete if exist) + # 1 = stable (keep unchanged) + # 2 = always (insert if missing) + return $rhash_of_desires + unless $rOpts_kgb_size_min > 0 + && ( $rOpts_kgb_before != 1 + || $rOpts_kgb_after != 1 + || $rOpts_kgb_inside + || $rOpts_kgb_delete ); - # Make sure there is a corresponding closing container - # (could be missing if the script has a brace error) - my $K_closing = $K_closing_container->{$seqno}; - next unless defined($K_closing); + return; + } ## end sub kgb_initialize_options + + sub kgb_initialize_group_vars { + + # Definitions: + # $ibeg = first line index of this entire group + # $iend = last line index of this entire group + # $count = total number of keywords seen in this entire group + # $level_beg = indentation level of this group + # @group = [ $i, $token, $count ] =list of all keywords & blanks + # @subgroup = $j, index of group where token changes + # @iblanks = line indexes of blank lines in input stream in this group + # where i=starting line index + # token (the keyword) + # count = number of this token in this subgroup + # j = index in group where token changes + $ibeg = -1; + $iend = undef; + $level_beg = -1; + $K_closing = undef; + $count = 0; + @group = (); + @subgroup = (); + @iblanks = (); + return; + } ## end sub kgb_initialize_group_vars - # Require different input lines. This will filter out a large number - # of small hash braces and array brackets. If we accidentally filter - # out an important container, it will get fixed on the next pass. - if ( - $rLL->[$K_opening]->[_LINE_INDEX_] == - $rLL->[$K_closing]->[_LINE_INDEX_] - && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - - $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] > - $rOpts_maximum_line_length ) - ) - { - DEBUG_XCI - && print "XCI: Skipping seqno=$seqno, require different lines\n"; - next; - } + sub kgb_initialize_line_vars { + $CODE_type = EMPTY_STRING; + $K_first = undef; + $K_last = undef; + $line_type = EMPTY_STRING; + return; + } ## end sub kgb_initialize_line_vars - # Do not apply -xci if adding extra ci will put the container contents - # beyond the line length limit (fixes cases b899 b935) - my $level = $rLL->[$K_opening]->[_LEVEL_]; - my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_]; - my $maximum_text_length = - $maximum_text_length_at_level[$level] - - $ci_level * $rOpts_continuation_indentation; + sub kgb_initialize { - # Fix for b1197 b1198 b1199 b1200 b1201 b1202 - # Do not apply -xci if we are running out of space - if ( $level >= $stress_level_beta ) { - DEBUG_XCI - && print -"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n"; - next; - } + # initialize all closure variables for -kgb + # return: + # true to cause immediate exit (something is wrong) + # false to continue ... all is okay - # remember how much space is available for patch b1031 above - my $space = - $maximum_text_length - $len_tol - $rOpts_continuation_indentation; + # This is the return variable: + $rhash_of_desires = {}; - if ( $space < 0 ) { - DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n"; - next; - } - DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n"; + # initialize and check user options; + my $quit = kgb_initialize_options(); + if ($quit) { return $quit } - $available_space{$seqno} = $space; + # initialize variables for the current group and subgroups: + kgb_initialize_group_vars(); - # This becomes the next controlling container - push @seqno_stack, $seqno_top if ($seqno_top); - $seqno_top = $seqno; - } - return; -} ## end sub extended_ci + # initialize variables for the most recently seen line: + kgb_initialize_line_vars(); -sub braces_left_setup { + $number_of_groups_seen = 0; - # Called once per file to mark all -bl, -sbl, and -asbl containers - my $self = shift; + # all okay + return; + } ## end sub kgb_initialize - my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'}; - my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'}; - my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; - return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl ); + sub kgb_insert_blank_after { + my ($i) = @_; + $rhash_of_desires->{$i} = 1; + my $ip = $i + 1; + if ( defined( $rhash_of_desires->{$ip} ) + && $rhash_of_desires->{$ip} == 2 ) + { + $rhash_of_desires->{$ip} = 0; + } + return; + } ## end sub kgb_insert_blank_after - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); + sub kgb_split_into_sub_groups { - # We will turn on this hash for braces controlled by these flags: - my $rbrace_left = $self->[_rbrace_left_]; + # place blanks around long sub-groups of keywords + # ...if requested + return unless ($rOpts_kgb_inside); - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $ris_asub_block = $self->[_ris_asub_block_]; - my $ris_sub_block = $self->[_ris_sub_block_]; - foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { + # loop over sub-groups, index k + push @subgroup, scalar @group; + my $kbeg = 1; + my $kend = @subgroup - 1; + foreach my $k ( $kbeg .. $kend ) { - my $block_type = $rblock_type_of_seqno->{$seqno}; + # index j runs through all keywords found + my $j_b = $subgroup[ $k - 1 ]; + my $j_e = $subgroup[$k] - 1; - # use -asbl flag for an anonymous sub block - if ( $ris_asub_block->{$seqno} ) { - if ($rOpts_asbl) { - $rbrace_left->{$seqno} = 1; - } - } + # index i is the actual line number of a keyword + my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] }; + my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] }; + my $num = $count_e - $count_b + 1; - # use -sbl flag for a named sub - elsif ( $ris_sub_block->{$seqno} ) { - if ($rOpts_sbl) { - $rbrace_left->{$seqno} = 1; - } - } + # This subgroup runs from line $ib to line $ie-1, but may contain + # blank lines + if ( $num >= $rOpts_kgb_size_min ) { - # use -bl flag if not a sub block of any type - else { - if ( $rOpts_bl - && $block_type =~ /$bl_pattern/ - && $block_type !~ /$bl_exclusion_pattern/ ) - { - $rbrace_left->{$seqno} = 1; + # if there are blank lines, we require that at least $num lines + # be non-blank up to the boundary with the next subgroup. + my $nog_b = my $nog_e = 1; + if ( @iblanks && !$rOpts_kgb_delete ) { + my $j_bb = $j_b + $num - 1; + my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] }; + $nog_b = $count_bb - $count_b + 1 == $num; + + my $j_ee = $j_e - ( $num - 1 ); + my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] }; + $nog_e = $count_e - $count_ee + 1 == $num; + } + if ( $nog_b && $k > $kbeg ) { + kgb_insert_blank_after( $i_b - 1 ); + } + if ( $nog_e && $k < $kend ) { + my ( $i_ep, $tok_ep, $count_ep ) = + @{ $group[ $j_e + 1 ] }; + kgb_insert_blank_after( $i_ep - 1 ); + } } } - } - return; -} ## end sub braces_left_setup + return; + } ## end sub kgb_split_into_sub_groups -sub bli_adjustment { + sub kgb_delete_if_blank { + my ( $self, $i ) = @_; - # Called once per file to implement the --brace-left-and-indent option. - # If -bli is set, adds one continuation indentation for certain braces - my $self = shift; - return unless ( $rOpts->{'brace-left-and-indent'} ); - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); + # delete line $i if it is blank + my $rlines = $self->[_rlines_]; + return unless ( $i >= 0 && $i < @{$rlines} ); + return if ( $rlines->[$i]->{_line_type} ne 'CODE' ); + my $code_type = $rlines->[$i]->{_code_type}; + if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; } + return; + } ## end sub kgb_delete_if_blank - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $ris_bli_container = $self->[_ris_bli_container_]; - my $rbrace_left = $self->[_rbrace_left_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; + sub kgb_delete_inner_blank_lines { - foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { - my $block_type = $rblock_type_of_seqno->{$seqno}; - if ( $block_type - && $block_type =~ /$bli_pattern/ - && $block_type !~ /$bli_exclusion_pattern/ ) - { - $ris_bli_container->{$seqno} = 1; - $rbrace_left->{$seqno} = 1; - my $Ko = $K_opening_container->{$seqno}; - my $Kc = $K_closing_container->{$seqno}; - if ( defined($Ko) && defined($Kc) ) { - $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_]; - } + # always remove unwanted trailing blank lines from our list + return unless (@iblanks); + while ( my $ibl = pop(@iblanks) ) { + if ( $ibl < $iend ) { push @iblanks, $ibl; last } + $iend = $ibl; } - } - return; -} ## end sub bli_adjustment -sub find_multiline_qw { + # now mark mark interior blank lines for deletion if requested + return unless ($rOpts_kgb_delete); - my $self = shift; + while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 } - # Multiline qw quotes are not sequenced items like containers { [ ( - # but behave in some respects in a similar way. So this routine finds them - # and creates a separate sequence number system for later use. + return; + } ## end sub kgb_delete_inner_blank_lines - # This is straightforward because they always begin at the end of one line - # and and at the beginning of a later line. This is true no matter how we - # finally make our line breaks, so we can find them before deciding on new - # line breaks. + sub kgb_end_group { - my $rstarting_multiline_qw_seqno_by_K = {}; - my $rending_multiline_qw_seqno_by_K = {}; - my $rKrange_multiline_qw_by_seqno = {}; - my $rmultiline_qw_has_extra_level = {}; + # end a group of keywords + my ( $self, $bad_ending ) = @_; + if ( defined($ibeg) && $ibeg >= 0 ) { - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; + # then handle sufficiently large groups + if ( $count >= $rOpts_kgb_size_min ) { - my $rlines = $self->[_rlines_]; - my $rLL = $self->[_rLL_]; - my $qw_seqno; - my $num_qw_seqno = 0; - my $K_start_multiline_qw; + $number_of_groups_seen++; - foreach my $line_of_tokens ( @{$rlines} ) { + # do any blank deletions regardless of the count + kgb_delete_inner_blank_lines(); - my $line_type = $line_of_tokens->{_line_type}; - next unless ( $line_type eq 'CODE' ); - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $Kfirst, $Klast ) = @{$rK_range}; - next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line - if ( defined($K_start_multiline_qw) ) { - my $type = $rLL->[$Kfirst]->[_TYPE_]; + my $rlines = $self->[_rlines_]; + if ( $ibeg > 0 ) { + my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type}; + + # patch for hash bang line which is not currently marked as + # a comment; mark it as a comment + if ( $ibeg == 1 && !$code_type ) { + my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text}; + $code_type = 'BC' + if ( $line_text && $line_text =~ /^#/ ); + } + + # Do not insert a blank after a comment + # (this could be subject to a flag in the future) + if ( $code_type !~ /(BC|SBC|SBCX)/ ) { + if ( $rOpts_kgb_before == INSERT ) { + kgb_insert_blank_after( $ibeg - 1 ); + + } + elsif ( $rOpts_kgb_before == DELETE ) { + $self->kgb_delete_if_blank( $ibeg - 1 ); + } + } + } + + # We will only put blanks before code lines. We could loosen + # this rule a little, but we have to be very careful because + # for example we certainly don't want to drop a blank line + # after a line like this: + # my $var = <[_rLL_]; + my $level = $rLL->[$K_first]->[_LEVEL_]; + my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; - # shouldn't happen - if ( $type ne 'q' ) { - DEVEL_MODE && print STDERR <K_previous_nonblank($Kfirst); - my $Knext = $self->K_next_nonblank($Kfirst); - my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b'; - my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b'; - if ( $type_m eq 'q' && $type_p ne 'q' ) { - $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno; - $rKrange_multiline_qw_by_seqno->{$qw_seqno} = - [ $K_start_multiline_qw, $Kfirst ]; - $K_start_multiline_qw = undef; - $qw_seqno = undef; - } - } - if ( !defined($K_start_multiline_qw) - && $rLL->[$Klast]->[_TYPE_] eq 'q' ) - { - my $Kprev = $self->K_previous_nonblank($Klast); - my $Knext = $self->K_next_nonblank($Klast); - my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b'; - my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b'; - if ( $type_m ne 'q' && $type_p eq 'q' ) { - $num_qw_seqno++; - $qw_seqno = 'q' . $num_qw_seqno; - $K_start_multiline_qw = $Klast; - $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno; + if ( $level == $level_beg + && $ci_level == 0 + && !$bad_ending + && $iend < @{$rlines} + && $CODE_type ne 'HSC' ) + { + if ( $rOpts_kgb_after == INSERT ) { + kgb_insert_blank_after($iend); + } + elsif ( $rOpts_kgb_after == DELETE ) { + $self->kgb_delete_if_blank( $iend + 1 ); + } + } + } } + kgb_split_into_sub_groups(); } - } - # Give multiline qw lists extra indentation instead of CI. This option - # works well but is currently only activated when the -xci flag is set. - # The reason is to avoid unexpected changes in formatting. - if ($rOpts_extended_continuation_indentation) { - while ( my ( $qw_seqno_x, $rKrange ) = - each %{$rKrange_multiline_qw_by_seqno} ) - { - my ( $Kbeg, $Kend ) = @{$rKrange}; + # reset for another group + kgb_initialize_group_vars(); - # require isolated closing token - my $token_end = $rLL->[$Kend]->[_TOKEN_]; - next - unless ( length($token_end) == 1 - && ( $is_closing_token{$token_end} || $token_end eq '>' ) ); + return; + } ## end sub kgb_end_group - # require isolated opening token - my $token_beg = $rLL->[$Kbeg]->[_TOKEN_]; + sub kgb_find_container_end { - # allow space(s) after the qw - if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ ) - { - $token_beg =~ s/\s+//; - } + # If the keyword line is continued onto subsequent lines, find the + # closing token '$K_closing' so that we can easily skip past the + # contents of the container. - next unless ( length($token_beg) == 3 ); + # We only set this value if we find a simple list, meaning + # -contents only one level deep + # -not welded - foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) { - $rLL->[$KK]->[_LEVEL_]++; - $rLL->[$KK]->[_CI_LEVEL_] = 0; - } + my ($self) = @_; - # set flag for -wn option, which will remove the level - $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1; - } - } + # First check: skip if next line is not one deeper + my $Knext_nonblank = $self->K_next_nonblank($K_last); + return if ( !defined($Knext_nonblank) ); + my $rLL = $self->[_rLL_]; + my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_]; + return if ( $level_next != $level_beg + 1 ); - # For the -lp option we need to mark all parent containers of - # multiline quotes - if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) { + # Find the parent container of the first token on the next line + my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank); + return unless ( defined($parent_seqno) ); - while ( my ( $qw_seqno_x, $rKrange ) = - each %{$rKrange_multiline_qw_by_seqno} ) - { - my ( $Kbeg, $Kend ) = @{$rKrange}; - my $parent_seqno = $self->parent_seqno_by_K($Kend); - next unless ($parent_seqno); + # Must not be a weld (can be unstable) + return + if ( $total_weld_count + && $self->is_welded_at_seqno($parent_seqno) ); - # If the parent container exactly surrounds this qw, then -lp - # formatting seems to work so we will not mark it. - my $is_tightly_contained; - my $Kn = $self->K_next_nonblank($Kend); - my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef; - if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) { + # Opening container must exist and be on this line + my $Ko = $self->[_K_opening_container_]->{$parent_seqno}; + return unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last ); - my $Kp = $self->K_previous_nonblank($Kbeg); - my $seqno_p = - defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef; - if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) { - $is_tightly_contained = 1; - } - } + # Verify that the closing container exists and is on a later line + my $Kc = $self->[_K_closing_container_]->{$parent_seqno}; + return unless ( defined($Kc) && $Kc > $K_last ); - $ris_excluded_lp_container->{$parent_seqno} = 1 - unless ($is_tightly_contained); + # That's it + $K_closing = $Kc; - # continue up the tree marking parent containers - while (1) { - $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno}; - last - unless ( defined($parent_seqno) - && $parent_seqno ne SEQ_ROOT ); - $ris_excluded_lp_container->{$parent_seqno} = 1; - } - } - } + return; + } ## end sub kgb_find_container_end - $self->[_rstarting_multiline_qw_seqno_by_K_] = - $rstarting_multiline_qw_seqno_by_K; - $self->[_rending_multiline_qw_seqno_by_K_] = - $rending_multiline_qw_seqno_by_K; - $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno; - $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level; + sub kgb_add_to_group { + my ( $self, $i, $token, $level ) = @_; - return; -} ## end sub find_multiline_qw + # End the previous group if we have reached the maximum + # group size + if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) { + $self->kgb_end_group(); + } -use constant DEBUG_COLLAPSED_LENGTHS => 0; + if ( @group == 0 ) { + $ibeg = $i; + $level_beg = $level; + $count = 0; + } -# Minimum space reserved for contents of a code block. A value of 40 has given -# reasonable results. With a large line length, say -l=120, this will not -# normally be noticeable but it will prevent making a mess in some edge cases. -use constant MIN_BLOCK_LEN => 40; + $count++; + $iend = $i; -my %is_handle_type; + # New sub-group? + if ( !@group || $token ne $group[-1]->[1] ) { + push @subgroup, scalar(@group); + } + push @group, [ $i, $token, $count ]; -BEGIN { - my @q = qw( w C U G i k => ); - @is_handle_type{@q} = (1) x scalar(@q); + # remember if this line ends in an open container + $self->kgb_find_container_end(); - my $i = 0; - use constant { - _max_prong_len_ => $i++, - _handle_len_ => $i++, - _seqno_o_ => $i++, - _iline_o_ => $i++, - _K_o_ => $i++, - _K_c_ => $i++, - _interrupted_list_rule_ => $i++, - }; -} + return; + } ## end sub kgb_add_to_group -sub collapsed_lengths { + #--------------------- + # -kgb main subroutine + #--------------------- - my $self = shift; + sub keyword_group_scan { + my $self = shift; - #---------------------------------------------------------------- - # Define the collapsed lengths of containers for -xlp indentation - #---------------------------------------------------------------- + # Called once per file to process --keyword-group-blanks-* parameters. - # We need an estimate of the minimum required line length starting at any - # opening container for the -xlp style. This is needed to avoid using too - # much indentation space for lower level containers and thereby running - # out of space for outer container tokens due to the maximum line length - # limit. + # Task: + # Manipulate blank lines around keyword groups (kgb* flags) + # Scan all lines looking for runs of consecutive lines beginning with + # selected keywords. Example keywords are 'my', 'our', 'local', ... but + # they may be anything. We will set flags requesting that blanks be + # inserted around and within them according to input parameters. Note + # that we are scanning the lines as they came in in the input stream, so + # they are not necessarily well formatted. - # The basic idea is that at each node in the tree we imagine that we have a - # fork with a handle and collapsible prongs: - # - # |------------ - # |-------- - # ------------|------- - # handle |------------ - # |-------- - # prongs - # - # Each prong has a minimum collapsed length. The collapsed length at a node - # is the maximum of these minimum lengths, plus the handle length. Each of - # the prongs may itself be a tree node. + # Returns: + # The output of this sub is a return hash ref whose keys are the indexes + # of lines after which we desire a blank line. For line index $i: + # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i + # $rhash_of_desires->{$i} = 2 means we want blank line $i removed + + # Nothing to do if no blanks can be output. This test added to fix + # case b760. + if ( !$rOpts_maximum_consecutive_blank_lines ) { + return $rhash_of_desires; + } - # This is just a rough calculation to get an approximate starting point for - # indentation. Later routines will be more precise. It is important that - # these estimates be independent of the line breaks of the input stream in - # order to avoid instabilities. + #--------------- + # initialization + #--------------- + my $quit = kgb_initialize(); + if ($quit) { return $rhash_of_desires } - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rlines = $self->[_rlines_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $ris_permanently_broken = $self->[_ris_permanently_broken_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $rhas_broken_list = $self->[_rhas_broken_list_]; - my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; + my $rLL = $self->[_rLL_]; + my $rlines = $self->[_rlines_]; - my $K_start_multiline_qw; - my $level_start_multiline_qw = 0; - my $max_prong_len = 0; - my $handle_len_x = 0; - my @stack; - my $len = 0; - my $last_nonblank_type = 'b'; - push @stack, - [ $max_prong_len, $handle_len_x, SEQ_ROOT, undef, undef, undef, undef ]; + $self->kgb_end_group(); + my $i = -1; + my $Opt_repeat_count = + $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr' - my $iline = -1; - foreach my $line_of_tokens ( @{$rlines} ) { - $iline++; - my $line_type = $line_of_tokens->{_line_type}; - next if ( $line_type ne 'CODE' ); - my $CODE_type = $line_of_tokens->{_code_type}; + #---------------------------------- + # loop over all lines of the source + #---------------------------------- + foreach my $line_of_tokens ( @{$rlines} ) { - # Always skip blank lines - next if ( $CODE_type eq 'BL' ); + $i++; + last + if ( $Opt_repeat_count > 0 + && $number_of_groups_seen >= $Opt_repeat_count ); - # Note on other line types: - # 'FS' (Format Skipping) lines may contain opening/closing tokens so - # we have to process them to keep the stack correctly sequenced. - # 'VB' (Verbatim) lines could be skipped, but testing shows that - # results look better if we include their lengths. + kgb_initialize_line_vars(); - # Also note that we could exclude -xlp formatting of containers with - # 'FS' and 'VB' lines, but in testing that was not really beneficial. + $line_type = $line_of_tokens->{_line_type}; - # So we process tokens in 'FS' and 'VB' lines like all the rest... + # always end a group at non-CODE + if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next } - my $rK_range = $line_of_tokens->{_rK_range}; - my ( $K_first, $K_last ) = @{$rK_range}; - next unless ( defined($K_first) && defined($K_last) ); - - my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#'; - - # Always ignore block comments - next if ( $has_comment && $K_first == $K_last ); - - # Handle an intermediate line of a multiline qw quote. These may - # require including some -ci or -i spaces. See cases c098/x063. - # Updated to check all lines (not just $K_first==$K_last) to fix b1316 - my $K_begin_loop = $K_first; - if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) { - - my $KK = $K_first; - my $level = $rLL->[$KK]->[_LEVEL_]; - my $ci_level = $rLL->[$KK]->[_CI_LEVEL_]; - - # remember the level of the start - if ( !defined($K_start_multiline_qw) ) { - $K_start_multiline_qw = $K_first; - $level_start_multiline_qw = $level; - my $seqno_qw = - $self->[_rstarting_multiline_qw_seqno_by_K_] - ->{$K_start_multiline_qw}; - if ( !$seqno_qw ) { - my $Kp = $self->K_previous_nonblank($K_first); - if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) { - - $K_start_multiline_qw = $Kp; - $level_start_multiline_qw = - $rLL->[$K_start_multiline_qw]->[_LEVEL_]; - } - else { + $CODE_type = $line_of_tokens->{_code_type}; - # Fix for b1319, b1320 - goto NOT_MULTILINE_QW; - } - } + # end any group at a format skipping line + if ( $CODE_type && $CODE_type eq 'FS' ) { + $self->kgb_end_group(); + next; } - $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] - - $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; + # continue in a verbatim (VB) type; it may be quoted text + if ( $CODE_type eq 'VB' ) { + if ( $ibeg >= 0 ) { $iend = $i; } + next; + } - # We may have to add the spaces of one level or ci level ... it - # depends depends on the -xci flag, the -wn flag, and if the qw - # uses a container token as the quote delimiter. + # and continue in blank (BL) types + if ( $CODE_type eq 'BL' ) { + if ( $ibeg >= 0 ) { + $iend = $i; + push @{iblanks}, $i; - # First rule: add ci if there is a $ci_level - if ($ci_level) { - $len += $rOpts_continuation_indentation; + # propagate current subgroup token + my $tok = $group[-1]->[1]; + push @group, [ $i, $tok, $count ]; + } + next; } - # Second rule: otherwise, look for an extra indentation level - # from the start and add one indentation level if found. - elsif ( $level > $level_start_multiline_qw ) { - $len += $rOpts_indent_columns; - } + # examine the first token of this line + my $rK_range = $line_of_tokens->{_rK_range}; + ( $K_first, $K_last ) = @{$rK_range}; + if ( !defined($K_first) ) { + + # Somewhat unexpected blank line.. + # $rK_range is normally defined for line type CODE, but this can + # happen for example if the input line was a single semicolon + # which is being deleted. In that case there was code in the + # input file but it is not being retained. So we can silently + # return. + return $rhash_of_desires; + } + + my $level = $rLL->[$K_first]->[_LEVEL_]; + my $type = $rLL->[$K_first]->[_TYPE_]; + my $token = $rLL->[$K_first]->[_TOKEN_]; + my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; + + # End a group 'badly' at an unexpected level. This will prevent + # blank lines being incorrectly placed after the end of the group. + # We are looking for any deviation from two acceptable patterns: + # PATTERN 1: a simple list; secondary lines are at level+1 + # PATTERN 2: a long statement; all secondary lines same level + # This was added as a fix for case b1177, in which a complex + # structure got incorrectly inserted blank lines. + if ( $ibeg >= 0 ) { - if ( $len > $max_prong_len ) { $max_prong_len = $len } + # Check for deviation from PATTERN 1, simple list: + if ( defined($K_closing) && $K_first < $K_closing ) { + $self->kgb_end_group(1) if ( $level != $level_beg + 1 ); + } - $last_nonblank_type = 'q'; + # Check for deviation from PATTERN 2, single statement: + elsif ( $level != $level_beg ) { $self->kgb_end_group(1) } + } - $K_begin_loop = $K_first + 1; + # Do not look for keywords in lists ( keyword 'my' can occur in + # lists, see case b760); fixed for c048. + if ( $self->is_list_by_K($K_first) ) { + if ( $ibeg >= 0 ) { $iend = $i } + next; + } - # We can skip to the next line if more tokens - next if ( $K_begin_loop > $K_last ); + # see if this is a code type we seek (i.e. comment) + if ( $CODE_type + && $keyword_group_list_comment_pattern + && $CODE_type =~ /$keyword_group_list_comment_pattern/ ) + { - } + my $tok = $CODE_type; - NOT_MULTILINE_QW: - $K_start_multiline_qw = undef; + # Continuing a group + if ( $ibeg >= 0 && $level == $level_beg ) { + $self->kgb_add_to_group( $i, $tok, $level ); + } - # Find the terminal token, before any side comment - my $K_terminal = $K_last; - if ($has_comment) { - $K_terminal -= 1; - $K_terminal -= 1 - if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b' - && $K_terminal > $K_first ); - } + # Start new group + else { - # Use length to terminal comma if interrupted list rule applies - if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) { - my $K_c = $stack[-1]->[_K_c_]; - if ( - defined($K_c) - && $rLL->[$K_terminal]->[_TYPE_] eq ',' + # first end old group if any; we might be starting new + # keywords at different level + if ( $ibeg >= 0 ) { $self->kgb_end_group(); } + $self->kgb_add_to_group( $i, $tok, $level ); + } + next; + } - # Ignore if terminal comma, causes instability (b1297, b1330) - && ( - $K_c - $K_terminal > 2 - || ( $K_c - $K_terminal == 2 - && $rLL->[ $K_terminal + 1 ]->[_TYPE_] ne 'b' ) - ) - ) + # See if it is a keyword we seek, but never start a group in a + # continuation line; the code may be badly formatted. + if ( $ci_level == 0 + && $type eq 'k' + && $token =~ /$keyword_group_list_pattern/ ) { - my $Kend = $K_terminal; - # This caused an instability in b1311 by making the result - # dependent on input. It is not really necessary because the - # comment length is added at the end of the loop. - ##if ( $has_comment - ## && !$rOpts_ignore_side_comment_lengths ) - ##{ - ## $Kend = $K_last; - ##} + # Continuing a keyword group + if ( $ibeg >= 0 && $level == $level_beg ) { + $self->kgb_add_to_group( $i, $token, $level ); + } - # changed from $len to my $leng to fix b1302 b1306 b1317 b1321 - my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[ $K_first - 1 ]->[_CUMULATIVE_LENGTH_]; + # Start new keyword group + else { - # Fix for b1331: at a broken => item, include the length of - # the previous half of the item plus one for the missing space - if ( $last_nonblank_type eq '=>' ) { - $leng += $len + 1; + # first end old group if any; we might be starting new + # keywords at different level + if ( $ibeg >= 0 ) { $self->kgb_end_group(); } + $self->kgb_add_to_group( $i, $token, $level ); } - - if ( $leng > $max_prong_len ) { $max_prong_len = $leng } + next; } - } - # Loop over tokens on this line ... - foreach my $KK ( $K_begin_loop .. $K_terminal ) { + # This is not one of our keywords, but we are in a keyword group + # so see if we should continue or quit + elsif ( $ibeg >= 0 ) { - my $type = $rLL->[$KK]->[_TYPE_]; - next if ( $type eq 'b' ); + # - bail out on a large level change; we may have walked into a + # data structure or anonymous sub code. + if ( $level > $level_beg + 1 || $level < $level_beg ) { + $self->kgb_end_group(1); + next; + } - #------------------------ - # Handle sequenced tokens - #------------------------ - my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; - if ($seqno) { + # - keep going on a continuation line of the same level, since + # it is probably a continuation of our previous keyword, + # - and keep going past hanging side comments because we never + # want to interrupt them. + if ( ( ( $level == $level_beg ) && $ci_level > 0 ) + || $CODE_type eq 'HSC' ) + { + $iend = $i; + next; + } - my $token = $rLL->[$KK]->[_TOKEN_]; + # - continue if if we are within in a container which started + # with the line of the previous keyword. + if ( defined($K_closing) && $K_first <= $K_closing ) { + + # continue if entire line is within container + if ( $K_last <= $K_closing ) { $iend = $i; next } + + # continue at ); or }; or ]; + my $KK = $K_closing + 1; + if ( $rLL->[$KK]->[_TYPE_] eq ';' ) { + if ( $KK < $K_last ) { + if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK } + if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) + { + $self->kgb_end_group(1); + next; + } + } + $iend = $i; + next; + } - #---------------------------- - # Entering a new container... - #---------------------------- - if ( $is_opening_token{$token} - && defined( $K_closing_container->{$seqno} ) ) - { + $self->kgb_end_group(1); + next; + } - # save current prong length - $stack[-1]->[_max_prong_len_] = $max_prong_len; - $max_prong_len = 0; + # - end the group if none of the above + $self->kgb_end_group(); + next; + } - # Start new prong one level deeper - my $handle_len = 0; - if ( $rblock_type_of_seqno->{$seqno} ) { + # not in a keyword group; continue + else { next } + } ## end of loop over all lines - # code blocks do not use -lp indentation, but behave as - # if they had a handle of one indentation length - $handle_len = $rOpts_indent_columns; + $self->kgb_end_group(); + return $rhash_of_desires; - } - elsif ( $is_handle_type{$last_nonblank_type} ) { - $handle_len = $len; - $handle_len += 1 - if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' ); - } + } ## end sub keyword_group_scan +} ## end closure keyword_group_scan - # Set a flag if the 'Interrupted List Rule' will be applied - # (see sub copy_old_breakpoints). - # - Added check on has_broken_list to fix issue b1298 +####################################### +# CODE SECTION 7: Process lines of code +####################################### - my $interrupted_list_rule = - $ris_permanently_broken->{$seqno} - && $ris_list_by_seqno->{$seqno} - && !$rhas_broken_list->{$seqno} - && !$rOpts_ignore_old_breakpoints; +{ ## begin closure process_line_of_CODE - # NOTES: Since we are looking at old line numbers we have - # to be very careful not to introduce an instability. + # The routines in this closure receive lines of code and combine them into + # 'batches' and send them along. A 'batch' is the unit of code which can be + # processed further as a unit. It has the property that it is the largest + # amount of code into which which perltidy is free to place one or more + # line breaks within it without violating any constraints. - # This following causes instability (b1288-b1296): - # $interrupted_list_rule ||= - # $rOpts_break_at_old_comma_breakpoints; + # When a new batch is formed it is sent to sub 'grind_batch_of_code'. - # - We could turn off the interrupted list rule if there is - # a broken sublist, to follow 'Compound List Rule 1'. - # - We could use the _rhas_broken_list_ flag for this. - # - But it seems safer not to do this, to avoid - # instability, since the broken sublist could be - # temporary. It seems better to let the formatting - # stabilize by itself after one or two iterations. - # - So, not doing this for now + # flags needed by the store routine + my $line_of_tokens; + my $no_internal_newlines; + my $CODE_type; - # Turn off the interrupted list rule if -vmll is set and a - # list has '=>' characters. This avoids instabilities due - # to dependence on old line breaks; issue b1325. - if ( $interrupted_list_rule - && $rOpts_variable_maximum_line_length ) - { - my $rtype_count = $rtype_count_by_seqno->{$seqno}; - if ( $rtype_count && $rtype_count->{'=>'} ) { - $interrupted_list_rule = 0; - } - } + # range of K of tokens for the current line + my ( $K_first, $K_last ); - # Include length to a comma ending this line - if ( $interrupted_list_rule - && $rLL->[$K_terminal]->[_TYPE_] eq ',' ) - { - my $Kend = $K_terminal; - - # fix for b1332: side comments handled at end of loop - ##if ( $Kend < $K_last - ## && !$rOpts_ignore_side_comment_lengths ) - ##{ - ## $Kend = $K_last; - ##} - - # Measure from the next blank if any (fixes b1301) - my $Kbeg = $KK; - if ( $rLL->[ $Kbeg + 1 ]->[_TYPE_] eq 'b' - && $Kbeg < $Kend ) - { - $Kbeg++; - } + my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno, + $rblock_type_of_seqno, $ri_starting_one_line_block ); - my $leng = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Kbeg]->[_CUMULATIVE_LENGTH_]; - if ( $leng > $max_prong_len ) { $max_prong_len = $leng } - } + # past stored nonblank tokens and flags + my ( + $K_last_nonblank_code, $looking_for_else, + $is_static_block_comment, $last_CODE_type, + $last_line_had_side_comment, $next_parent_seqno, + $next_slevel, + ); - my $K_c = $K_closing_container->{$seqno}; + # Called once at the start of a new file + sub initialize_process_line_of_CODE { + $K_last_nonblank_code = undef; + $looking_for_else = 0; + $is_static_block_comment = 0; + $last_line_had_side_comment = 0; + $next_parent_seqno = SEQ_ROOT; + $next_slevel = undef; + return; + } ## end sub initialize_process_line_of_CODE - push @stack, - [ - $max_prong_len, $handle_len, - $seqno, $iline, - $KK, $K_c, - $interrupted_list_rule - ]; - } + # Batch variables: these describe the current batch of code being formed + # and sent down the pipeline. They are initialized in the next + # sub. + my ( + $rbrace_follower, $index_start_one_line_block, + $starting_in_quote, $ending_in_quote, + ); - #-------------------- - # Exiting a container - #-------------------- - elsif ( $is_closing_token{$token} ) { - if (@stack) { + # Called before the start of each new batch + sub initialize_batch_variables { - # The current prong ends - get its handle - my $item = pop @stack; - my $handle_len = $item->[_handle_len_]; - my $seqno_o = $item->[_seqno_o_]; - my $iline_o = $item->[_iline_o_]; - my $K_o = $item->[_K_o_]; - my $K_c_expect = $item->[_K_c_]; - my $collapsed_len = $max_prong_len; - - if ( $seqno_o ne $seqno ) { - - # This can happen if input file has brace errors. - # Otherwise it shouldn't happen. Not fatal but -lp - # formatting could get messed up. - if ( DEVEL_MODE && !get_saw_brace_error() ) { - Fault(<{$seqno}; - if ($block_type) { - - my $K_c = $KK; - my $block_length = MIN_BLOCK_LEN; - my $is_one_line_block; - my $level = $rLL->[$K_o]->[_LEVEL_]; - if ( defined($K_o) && defined($K_c) ) { - - # note: fixed 3 May 2022 (removed 'my') - $block_length = - $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] - - $rLL->[$K_o]->[_CUMULATIVE_LENGTH_]; - $is_one_line_block = $iline == $iline_o; - } + $max_index_to_go = UNDEFINED_INDEX; + $summed_lengths_to_go[0] = 0; + $nesting_depth_to_go[0] = 0; + $ri_starting_one_line_block = []; - # Code block rule 1: Use the total block length if - # it is less than the minimum. - if ( $block_length < MIN_BLOCK_LEN ) { - $collapsed_len = $block_length; - } + # Redefine some sparse arrays. + # It is more efficient to redefine these sparse arrays and rely on + # undef's instead of initializing to 0's. Testing showed that using + # @array=() is more efficient than $#array=-1 - # Code block rule 2: Use the full length of a - # one-line block to avoid breaking it, unless - # extremely long. We do not need to do a precise - # check here, because if it breaks then it will - # stay broken on later iterations. - elsif ( - $is_one_line_block - && $block_length < - $maximum_line_length_at_level[$level] - - # But skip this for sort/map/grep/eval blocks - # because they can reform (b1345) - && !$is_sort_map_grep_eval{$block_type} - ) - { - $collapsed_len = $block_length; - } + @old_breakpoint_to_go = (); + @forced_breakpoint_to_go = (); + @block_type_to_go = (); + @mate_index_to_go = (); + @type_sequence_to_go = (); - # Code block rule 3: Otherwise the length should be - # at least MIN_BLOCK_LEN to avoid scrunching code - # blocks. - elsif ( $collapsed_len < MIN_BLOCK_LEN ) { - $collapsed_len = MIN_BLOCK_LEN; - } - } + # NOTE: @nobreak_to_go is sparse and could be treated this way, but + # testing showed that there would be very little efficiency gain + # because an 'if' test must be added in store_token_to_go. - # Store the result. Some extra space, '2', allows for - # length of an opening token, inside space, comma, ... - # This constant has been tuned to give good overall - # results. - $collapsed_len += 2; - $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len; - - # Restart scanning the lower level prong - if (@stack) { - $max_prong_len = $stack[-1]->[_max_prong_len_]; - $collapsed_len += $handle_len; - if ( $collapsed_len > $max_prong_len ) { - $max_prong_len = $collapsed_len; - } - } - } - } + # The initialization code for the remaining batch arrays is as follows + # and can be activated for testing. But profiling shows that it is + # time-consuming to re-initialize the batch arrays and is not necessary + # because the maximum valid token, $max_index_to_go, is carefully + # controlled. This means however that it is not possible to do any + # type of filter or map operation directly on these arrays. And it is + # not possible to use negative indexes. As a precaution against program + # changes which might do this, sub pad_array_to_go adds some undefs at + # the end of the current batch of data. - # it is a ternary - no special processing for these yet - else { + ## 0 && do { #<<< + ## @nobreak_to_go = (); + ## @token_lengths_to_go = (); + ## @levels_to_go = (); + ## @ci_levels_to_go = (); + ## @tokens_to_go = (); + ## @K_to_go = (); + ## @types_to_go = (); + ## @leading_spaces_to_go = (); + ## @reduced_spaces_to_go = (); + ## @inext_to_go = (); + ## @parent_seqno_to_go = (); + ## }; - } + $rbrace_follower = undef; + $ending_in_quote = 0; - $len = 0; - $last_nonblank_type = $type; - next; - } + $index_start_one_line_block = undef; - #---------------------------- - # Handle non-container tokens - #---------------------------- - my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_]; + # initialize forced breakpoint vars associated with each output batch + $forced_breakpoint_count = 0; + $index_max_forced_break = UNDEFINED_INDEX; + $forced_breakpoint_undo_count = 0; - # Count lengths of things like 'xx => yy' as a single item - if ( $type eq '=>' ) { - $len += $token_length + 1; - if ( $len > $max_prong_len ) { $max_prong_len = $len } - } - elsif ( $last_nonblank_type eq '=>' ) { - $len += $token_length; - if ( $len > $max_prong_len ) { $max_prong_len = $len } + return; + } ## end sub initialize_batch_variables - # but only include one => per item - $len = $token_length; - } + sub leading_spaces_to_go { - # include everything to end of line after a here target - elsif ( $type eq 'h' ) { - $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - - $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; - if ( $len > $max_prong_len ) { $max_prong_len = $len } - } + # return the number of indentation spaces for a token in the output + # stream - # for everything else just use the token length - else { - $len = $token_length; - if ( $len > $max_prong_len ) { $max_prong_len = $len } - } - $last_nonblank_type = $type; + my ($ii) = @_; + return 0 if ( $ii < 0 ); + my $indentation = $leading_spaces_to_go[$ii]; + return ref($indentation) ? $indentation->get_spaces() : $indentation; + } ## end sub leading_spaces_to_go - } ## end loop over tokens on this line + sub create_one_line_block { - # Now take care of any side comment - if ($has_comment) { - if ($rOpts_ignore_side_comment_lengths) { - $len = 0; - } - else { + # set index starting next one-line block + # call with no args to delete the current one-line block + ($index_start_one_line_block) = @_; + return; + } ## end sub create_one_line_block - # For a side comment when -iscl is not set, measure length from - # the start of the previous nonblank token - my $len0 = - $K_terminal > 0 - ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_] - : 0; - $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0; - if ( $len > $max_prong_len ) { $max_prong_len = $len } - } - } + # Routine to place the current token into the output stream. + # Called once per output token. - } ## end loop over lines + use constant DEBUG_STORE => 0; - if (DEBUG_COLLAPSED_LENGTHS) { - print "\nCollapsed lengths--\n"; - foreach - my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} ) - { - my $clen = $rcollapsed_length_by_seqno->{$key}; - print "$key -> $clen\n"; - } - } + sub store_token_to_go { - return; -} ## end sub collapsed_lengths + my ( $self, $Ktoken_vars, $rtoken_vars ) = @_; -sub is_excluded_lp { + #------------------------------------------------------- + # Token storage utility for sub process_line_of_CODE. + # Add one token to the next batch of '_to_go' variables. + #------------------------------------------------------- - # Decide if this container is excluded by user request: - # returns true if this token is excluded (i.e., may not use -lp) - # returns false otherwise + # Input parameters: + # $Ktoken_vars = the index K in the global token array + # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values + # unless they are temporarily being overridden - # The control hash can either describe: - # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or - # what to include: $line_up_parentheses_control_is_lxpl = 0 + #------------------------------------------------------------------ + # NOTE: called once per token so coding efficiency is critical here. + # All changes need to be benchmarked with Devel::NYTProf. + #------------------------------------------------------------------ - my ( $self, $KK ) = @_; - my $rLL = $self->[_rLL_]; - my $rtoken_vars = $rLL->[$KK]; - my $token = $rtoken_vars->[_TOKEN_]; - my $rflags = $line_up_parentheses_control_hash{$token}; + my ( - #----------------------------------------------- - # TEST #1: check match to listed container types - #----------------------------------------------- - if ( !defined($rflags) ) { + $type, + $token, + $ci_level, + $level, + $seqno, + $length, - # There is no entry for this container, so we are done - return !$line_up_parentheses_control_is_lxpl; - } + ) = @{$rtoken_vars}[ - my ( $flag1, $flag2 ) = @{$rflags}; + _TYPE_, + _TOKEN_, + _CI_LEVEL_, + _LEVEL_, + _TYPE_SEQUENCE_, + _TOKEN_LENGTH_, - #----------------------------------------------------------- - # TEST #2: check match to flag1, the preceding nonblank word - #----------------------------------------------------------- - my $match_flag1 = !defined($flag1) || $flag1 eq '*'; - if ( !$match_flag1 ) { + ]; - # Find the previous token - my ( $is_f, $is_k, $is_w ); - my $Kp = $self->K_previous_nonblank($KK); - if ( defined($Kp) ) { - my $type_p = $rLL->[$Kp]->[_TYPE_]; - my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + # Check for emergency flush... + # The K indexes in the batch must always be a continuous sequence of + # the global token array. The batch process programming assumes this. + # If storing this token would cause this relation to fail we must dump + # the current batch before storing the new token. It is extremely rare + # for this to happen. One known example is the following two-line + # snippet when run with parameters + # --noadd-newlines --space-terminal-semicolon: + # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ; + # $yy=1; + if ( $max_index_to_go >= 0 ) { + if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) { + $self->flush_batch_of_CODE(); + } - # keyword? - $is_k = $type_p eq 'k'; + # Do not output consecutive blank tokens ... this should not + # happen, but it is worth checking. Later code can then make the + # simplifying assumption that blank tokens are not consecutive. + elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) { - # function call? - $is_f = $self->[_ris_function_call_paren_]->{$seqno}; + if (DEVEL_MODE) { - # either keyword or function call? - $is_w = $is_k || $is_f; + # if this happens, it is may be that consecutive blanks + # were inserted into the token stream in 'respace_tokens' + my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; + Fault("consecutive blanks near line $lno; please fix"); + } + return; + } } - # Check for match based on flag1 and the previous token: - if ( $flag1 eq 'k' ) { $match_flag1 = $is_k } - elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k } - elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f } - elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f } - elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w } - elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w } - } - - # See if we can exclude this based on the flag1 test... - if ($line_up_parentheses_control_is_lxpl) { - return 1 if ($match_flag1); - } - else { - return 1 if ( !$match_flag1 ); - } - - #------------------------------------------------------------- - # TEST #3: exclusion based on flag2 and the container contents - #------------------------------------------------------------- + # Do not start a batch with a blank token. + # Fixes cases b149 b888 b984 b985 b986 b987 + else { + if ( $type eq 'b' ) { return } + } - # Note that this is an exclusion test for both -lpxl or -lpil input methods - # The options are: - # 0 or blank: ignore container contents - # 1 exclude non-lists or lists with sublists - # 2 same as 1 but also exclude lists with code blocks + # Update counter and do initializations if first token of new batch + if ( !++$max_index_to_go ) { - my $match_flag2; - if ($flag2) { + # Reset flag '$starting_in_quote' for a new batch. It must be set + # to the value of '$in_continued_quote', but here for efficiency we + # set it to zero, which is its normal value. Then in coding below + # we will change it if we find we are actually in a continued quote. + $starting_in_quote = 0; - my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; + # Update the next parent sequence number for each new batch. - my $is_list = $self->[_ris_list_by_seqno_]->{$seqno}; - my $has_list = $self->[_rhas_list_]->{$seqno}; - my $has_code_block = $self->[_rhas_code_block_]->{$seqno}; - my $has_ternary = $self->[_rhas_ternary_]->{$seqno}; + #---------------------------------------- + # Begin coding from sub parent_seqno_by_K + #---------------------------------------- - if ( !$is_list - || $has_list - || $flag2 eq '2' && ( $has_code_block || $has_ternary ) ) - { - $match_flag2 = 1; - } - } - return $match_flag2; -} ## end sub is_excluded_lp + # The following is equivalent to this call but much faster: + # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars); -sub set_excluded_lp_containers { + $next_parent_seqno = SEQ_ROOT; + if ($seqno) { + $next_parent_seqno = $rparent_of_seqno->{$seqno}; + } + else { + my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_]; + if ( defined($Kt) ) { + my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; + my $type_t = $rLL->[$Kt]->[_TYPE_]; - my ($self) = @_; - return unless ($rOpts_line_up_parentheses); - my $rLL = $self->[_rLL_]; - return unless ( defined($rLL) && @{$rLL} ); + # if next container token is closing, it is the parent seqno + if ( $is_closing_type{$type_t} ) { + $next_parent_seqno = $type_sequence_t; + } - my $K_opening_container = $self->[_K_opening_container_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + # otherwise we want its parent container + else { + $next_parent_seqno = + $rparent_of_seqno->{$type_sequence_t}; + } + } + } + $next_parent_seqno = SEQ_ROOT + unless ( defined($next_parent_seqno) ); - foreach my $seqno ( keys %{$K_opening_container} ) { + #-------------------------------------- + # End coding from sub parent_seqno_by_K + #-------------------------------------- - # code blocks are always excluded by the -lp coding so we can skip them - next if ( $rblock_type_of_seqno->{$seqno} ); + $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1; + } - my $KK = $K_opening_container->{$seqno}; - next unless defined($KK); + # Clip levels to zero if there are level errors in the file. + # We had to wait until now for reasons explained in sub 'write_line'. + if ( $level < 0 ) { $level = 0 } - # see if a user exclusion rule turns off -lp for this container - if ( $self->is_excluded_lp($KK) ) { - $ris_excluded_lp_container->{$seqno} = 1; + # Safety check that length is defined. This is slow and should not be + # needed now, so just do it in DEVEL_MODE to check programming changes. + # Formerly needed for --indent-only, in which the entire set of tokens + # is normally turned into type 'q'. Lengths are now defined in sub + # 'respace_tokens' so this check is no longer needed. + if ( DEVEL_MODE && !defined($length) ) { + my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; + $length = length($token); + Fault(<[_rlines_]; - my $sink_object = $self->[_sink_object_]; - my $fh_tee = $self->[_fh_tee_]; - my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; - my $file_writer_object = $self->[_file_writer_object_]; - my $logger_object = $self->[_logger_object_]; - my $vertical_aligner_object = $self->[_vertical_aligner_object_]; - my $save_logfile = $self->[_save_logfile_]; + # NOTE2: Eventually '$type_sequence_to_go' can be also handled as a + # sparse array with undef's, but this will require extensive testing + # because of its heavy use. - # Note for RT#118553, leave only one newline at the end of a file. - # Example code to do this is in comments below: - # my $Opt_trim_ending_blank_lines = 0; - # if ($Opt_trim_ending_blank_lines) { - # while ( my $line_of_tokens = pop @{$rlines} ) { - # my $line_type = $line_of_tokens->{_line_type}; - # if ( $line_type eq 'CODE' ) { - # my $CODE_type = $line_of_tokens->{_code_type}; - # next if ( $CODE_type eq 'BL' ); - # } - # push @{$rlines}, $line_of_tokens; - # last; - # } - # } - - # But while this would be a trivial update, it would have very undesirable - # side effects when perltidy is run from within an editor on a small snippet. - # So this is best done with a separate filter, such - # as 'delete_ending_blank_lines.pl' in the examples folder. + # We keep a running sum of token lengths from the start of this batch: + # summed_lengths_to_go[$i] = total length to just before token $i + # summed_lengths_to_go[$i+1] = total length to just after token $i + $summed_lengths_to_go[ $max_index_to_go + 1 ] = + $summed_lengths_to_go[$max_index_to_go] + $length; - # Flag to prevent blank lines when POD occurs in a format skipping sect. - my $in_format_skipping_section; + # Initialize some sequence-dependent variables to their normal values + $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno; + $nesting_depth_to_go[$max_index_to_go] = $next_slevel; - # set locations for blanks around long runs of keywords - my $rwant_blank_line_after = $self->keyword_group_scan(); + # Then fix them at container tokens: + if ($seqno) { - my $line_type = EMPTY_STRING; - my $i_last_POD_END = -10; - my $i = -1; - foreach my $line_of_tokens ( @{$rlines} ) { - $i++; + $type_sequence_to_go[$max_index_to_go] = $seqno; - # insert blank lines requested for keyword sequences - if ( $i > 0 - && defined( $rwant_blank_line_after->{ $i - 1 } ) - && $rwant_blank_line_after->{ $i - 1 } == 1 ) - { - $self->want_blank_line(); - } + $block_type_to_go[$max_index_to_go] = + $rblock_type_of_seqno->{$seqno}; - my $last_line_type = $line_type; - $line_type = $line_of_tokens->{_line_type}; - my $input_line = $line_of_tokens->{_line_text}; + if ( $is_opening_token{$token} ) { - # _line_type codes are: - # SYSTEM - system-specific code before hash-bang line - # CODE - line of perl code (including comments) - # POD_START - line starting pod, such as '=head' - # POD - pod documentation text - # POD_END - last line of pod section, '=cut' - # HERE - text of here-document - # HERE_END - last line of here-doc (target word) - # FORMAT - format section - # FORMAT_END - last line of format section, '.' - # SKIP - code skipping section - # SKIP_END - last line of code skipping section, '#>>V' - # DATA_START - __DATA__ line - # DATA - unidentified text following __DATA__ - # END_START - __END__ line - # END - unidentified text following __END__ - # ERROR - we are in big trouble, probably not a perl script + my $slevel = $rdepth_of_opening_seqno->[$seqno]; + $nesting_depth_to_go[$max_index_to_go] = $slevel; + $next_slevel = $slevel + 1; + + $next_parent_seqno = $seqno; - # put a blank line after an =cut which comes before __END__ and __DATA__ - # (required by podchecker) - if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) { - $i_last_POD_END = $i; - $file_writer_object->reset_consecutive_blank_lines(); - if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) { - $self->want_blank_line(); } - } + elsif ( $is_closing_token{$token} ) { - # handle line of code.. - if ( $line_type eq 'CODE' ) { + $next_slevel = $rdepth_of_opening_seqno->[$seqno]; + my $slevel = $next_slevel + 1; + $nesting_depth_to_go[$max_index_to_go] = $slevel; - my $CODE_type = $line_of_tokens->{_code_type}; - $in_format_skipping_section = $CODE_type eq 'FS'; + my $parent_seqno = $rparent_of_seqno->{$seqno}; + $parent_seqno = SEQ_ROOT unless defined($parent_seqno); + $parent_seqno_to_go[$max_index_to_go] = $parent_seqno; + $next_parent_seqno = $parent_seqno; - # Handle blank lines - if ( $CODE_type eq 'BL' ) { + } + else { + # ternary token: nothing to do + } + } - # Keep this blank? Start with the flag -kbl=n, where - # n=0 ignore all old blank lines - # n=1 stable: keep old blanks, but limited by -mbl=n - # n=2 keep all old blank lines, regardless of -mbl=n - # If n=0 we delete all old blank lines and let blank line - # rules generate any needed blank lines. - my $kgb_keep = $rOpts_keep_old_blank_lines; + # Define the indentation that this token will have in two cases: + # Without CI = reduced_spaces_to_go + # With CI = leading_spaces_to_go + if ( ( $Ktoken_vars == $K_first ) + && $line_of_tokens->{_starting_in_quote} ) + { + # in a continued quote - correct value set above if first token + if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 } - # Then delete lines requested by the keyword-group logic if - # allowed - if ( $kgb_keep == 1 - && defined( $rwant_blank_line_after->{$i} ) - && $rwant_blank_line_after->{$i} == 2 ) - { - $kgb_keep = 0; - } + $leading_spaces_to_go[$max_index_to_go] = 0; + $reduced_spaces_to_go[$max_index_to_go] = 0; + } + else { + $leading_spaces_to_go[$max_index_to_go] = + $reduced_spaces_to_go[$max_index_to_go] = + $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars]; - # But always keep a blank line following an =cut - if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) { - $kgb_keep = 1; - } + $leading_spaces_to_go[$max_index_to_go] += + $rOpts_continuation_indentation * $ci_level + if ($ci_level); + } - if ($kgb_keep) { - $self->flush($CODE_type); - $file_writer_object->write_blank_code_line( - $rOpts_keep_old_blank_lines == 2 ); - $self->[_last_line_leading_type_] = 'b'; - } - next; - } - else { + DEBUG_STORE && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n"; + }; + return; + } ## end sub store_token_to_go - # Let logger see all non-blank lines of code. This is a slow - # operation so we avoid it if it is not going to be saved. - if ( $save_logfile && $logger_object ) { - $logger_object->black_box( $line_of_tokens, - $vertical_aligner_object->get_output_line_number ); - } - } + sub flush_batch_of_CODE { - # Handle Format Skipping (FS) and Verbatim (VB) Lines - if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) { - $self->write_unindented_line("$input_line"); - $file_writer_object->reset_consecutive_blank_lines(); - next; - } + # Finish and process the current batch. + # This must be the only call to grind_batch_of_CODE() + my ($self) = @_; - # Handle all other lines of code - $self->process_line_of_CODE($line_of_tokens); - } + # If a batch has been started ... + if ( $max_index_to_go >= 0 ) { - # handle line of non-code.. - else { + # Create an array to hold variables for this batch + my $this_batch = []; - # set special flags - my $skip_line = 0; - if ( substr( $line_type, 0, 3 ) eq 'POD' ) { + $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote); + $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote); - # Pod docs should have a preceding blank line. But stay - # out of __END__ and __DATA__ sections, because - # the user may be using this section for any purpose whatsoever - if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } - if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// } - if ( !$skip_line - && !$in_format_skipping_section - && $line_type eq 'POD_START' - && !$self->[_saw_END_or_DATA_] ) - { - $self->want_blank_line(); - } + if ( $CODE_type || $last_CODE_type ) { + $this_batch->[_batch_CODE_type_] = + $K_to_go[$max_index_to_go] >= $K_first + ? $CODE_type + : $last_CODE_type; } - # leave the blank counters in a predictable state - # after __END__ or __DATA__ - elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) { - $file_writer_object->reset_consecutive_blank_lines(); - $self->[_saw_END_or_DATA_] = 1; - } + $last_line_had_side_comment = + ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' ); - # Patch to avoid losing blank lines after a code-skipping block; - # fixes case c047. - elsif ( $line_type eq 'SKIP_END' ) { - $file_writer_object->reset_consecutive_blank_lines(); + # The flag $is_static_block_comment applies to the line which just + # arrived. So it only applies if we are outputting that line. + if ( $is_static_block_comment && !$last_line_had_side_comment ) { + $this_batch->[_is_static_block_comment_] = + $K_to_go[0] == $K_first; } - # write unindented non-code line - if ( !$skip_line ) { - $self->write_unindented_line($input_line); - } - } - } - return; + $this_batch->[_ri_starting_one_line_block_] = + $ri_starting_one_line_block; -} ## end sub process_all_lines + $self->[_this_batch_] = $this_batch; -sub keyword_group_scan { - my $self = shift; + #------------------- + # process this batch + #------------------- + $self->grind_batch_of_CODE(); - #------------------------------------------------------------------------- - # Called once per file to process any --keyword-group-blanks-* parameters. - #------------------------------------------------------------------------- + # Done .. this batch is history + $self->[_this_batch_] = undef; - # Manipulate blank lines around keyword groups (kgb* flags) - # Scan all lines looking for runs of consecutive lines beginning with - # selected keywords. Example keywords are 'my', 'our', 'local', ... but - # they may be anything. We will set flags requesting that blanks be - # inserted around and within them according to input parameters. Note - # that we are scanning the lines as they came in in the input stream, so - # they are not necessarily well formatted. - - # The output of this sub is a return hash ref whose keys are the indexes of - # lines after which we desire a blank line. For line index i: - # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i - # $rhash_of_desires->{$i} = 2 means we want blank line $i removed - my $rhash_of_desires = {}; - - # Nothing to do if no blanks can be output. This test added to fix - # case b760. - if ( !$rOpts_maximum_consecutive_blank_lines ) { - return $rhash_of_desires; - } + initialize_batch_variables(); + } - my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb' - my $Opt_blanks_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba' - my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi' - my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd' - my $Opt_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs' - - # A range of sizes can be input with decimal notation like 'min.max' with - # any number of dots between the two numbers. Examples: - # string => min max matches - # 1.1 1 1 exactly 1 - # 1.3 1 3 1,2, or 3 - # 1..3 1 3 1,2, or 3 - # 5 5 - 5 or more - # 6. 6 - 6 or more - # .2 - 2 up to 2 - # 1.0 1 0 nothing - my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size; - if ( $Opt_size_min && $Opt_size_min !~ /^\d+$/ - || $Opt_size_max && $Opt_size_max !~ /^\d+$/ ) - { - Warn(<{'keyword-group-blanks-size'} = EMPTY_STRING; - return $rhash_of_desires; - } - $Opt_size_min = 1 unless ($Opt_size_min); + sub end_batch { - if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) { - return $rhash_of_desires; - } + # End the current batch, EXCEPT for a few special cases + my ($self) = @_; - # codes for $Opt_blanks_before and $Opt_blanks_after: - # 0 = never (delete if exist) - # 1 = stable (keep unchanged) - # 2 = always (insert if missing) + if ( $max_index_to_go < 0 ) { - return $rhash_of_desires - unless $Opt_size_min > 0 - && ( $Opt_blanks_before != 1 - || $Opt_blanks_after != 1 - || $Opt_blanks_inside - || $Opt_blanks_delete ); + # nothing to do .. this is harmless but wastes time. + if (DEVEL_MODE) { + Fault("sub end_batch called with nothing to do; please fix\n"); + } + return; + } - my $Opt_pattern = $keyword_group_list_pattern; - my $Opt_comment_pattern = $keyword_group_list_comment_pattern; - my $Opt_repeat_count = - $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr' + # Exceptions when a line does not end with a comment... (fixes c058) + if ( $types_to_go[$max_index_to_go] ne '#' ) { - my $rlines = $self->[_rlines_]; - my $rLL = $self->[_rLL_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $rK_weld_right = $self->[_rK_weld_right_]; - - # variables for the current group and subgroups: - my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group, - @subgroup ); - - # Definitions: - # ($ibeg, $iend) = starting and ending line indexes of this entire group - # $count = total number of keywords seen in this entire group - # $level_beg = indentation level of this group - # @group = [ $i, $token, $count ] =list of all keywords & blanks - # @subgroup = $j, index of group where token changes - # @iblanks = line indexes of blank lines in input stream in this group - # where i=starting line index - # token (the keyword) - # count = number of this token in this subgroup - # j = index in group where token changes - # - # These vars will contain values for the most recently seen line: - my ( $line_type, $CODE_type, $K_first, $K_last ); + # Exception 1: Do not end line in a weld + return + if ( $total_weld_count + && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } ); - my $number_of_groups_seen = 0; + # Exception 2: just set a tentative breakpoint if we might be in a + # one-line block + if ( defined($index_start_one_line_block) ) { + $self->set_forced_breakpoint($max_index_to_go); + return; + } + } - #------------------- - # helper subroutines - #------------------- + $self->flush_batch_of_CODE(); + return; + } ## end sub end_batch - my $insert_blank_after = sub { - my ($i) = @_; - $rhash_of_desires->{$i} = 1; - my $ip = $i + 1; - if ( defined( $rhash_of_desires->{$ip} ) - && $rhash_of_desires->{$ip} == 2 ) - { - $rhash_of_desires->{$ip} = 0; - } + sub flush_vertical_aligner { + my ($self) = @_; + my $vao = $self->[_vertical_aligner_object_]; + $vao->flush(); return; - }; + } ## end sub flush_vertical_aligner - my $split_into_sub_groups = sub { + # flush is called to output any tokens in the pipeline, so that + # an alternate source of lines can be written in the correct order + sub flush { + my ( $self, $CODE_type_flush ) = @_; - # place blanks around long sub-groups of keywords - # ...if requested - return unless ($Opt_blanks_inside); + # end the current batch with 1 exception - # loop over sub-groups, index k - push @subgroup, scalar @group; - my $kbeg = 1; - my $kend = @subgroup - 1; - foreach my $k ( $kbeg .. $kend ) { + $index_start_one_line_block = undef; - # index j runs through all keywords found - my $j_b = $subgroup[ $k - 1 ]; - my $j_e = $subgroup[$k] - 1; + # Exception: if we are flushing within the code stream only to insert + # blank line(s), then we can keep the batch intact at a weld. This + # improves formatting of -ce. See test 'ce1.ce' + if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) { + $self->end_batch() if ( $max_index_to_go >= 0 ); + } - # index i is the actual line number of a keyword - my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] }; - my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] }; - my $num = $count_e - $count_b + 1; + # otherwise, we have to shut things down completely. + else { $self->flush_batch_of_CODE() } - # This subgroup runs from line $ib to line $ie-1, but may contain - # blank lines - if ( $num >= $Opt_size_min ) { + $self->flush_vertical_aligner(); + return; + } ## end sub flush - # if there are blank lines, we require that at least $num lines - # be non-blank up to the boundary with the next subgroup. - my $nog_b = my $nog_e = 1; - if ( @iblanks && !$Opt_blanks_delete ) { - my $j_bb = $j_b + $num - 1; - my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] }; - $nog_b = $count_bb - $count_b + 1 == $num; + my %is_assignment_or_fat_comma; - my $j_ee = $j_e - ( $num - 1 ); - my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] }; - $nog_e = $count_e - $count_ee + 1 == $num; - } - if ( $nog_b && $k > $kbeg ) { - $insert_blank_after->( $i_b - 1 ); - } - if ( $nog_e && $k < $kend ) { - my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] }; - $insert_blank_after->( $i_ep - 1 ); - } - } - } - return; - }; + BEGIN { + %is_assignment_or_fat_comma = %is_assignment; + $is_assignment_or_fat_comma{'=>'} = 1; + } - my $delete_if_blank = sub { - my ($i) = @_; + sub process_line_of_CODE { - # delete line $i if it is blank - return unless ( $i >= 0 && $i < @{$rlines} ); - return if ( $rlines->[$i]->{_line_type} ne 'CODE' ); - my $code_type = $rlines->[$i]->{_code_type}; - if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; } - return; - }; + my ( $self, $my_line_of_tokens ) = @_; - my $delete_inner_blank_lines = sub { + #---------------------------------------------------------------- + # This routine is called once per INPUT line to format all of the + # tokens on that line. + #---------------------------------------------------------------- - # always remove unwanted trailing blank lines from our list - return unless (@iblanks); - while ( my $ibl = pop(@iblanks) ) { - if ( $ibl < $iend ) { push @iblanks, $ibl; last } - $iend = $ibl; - } + # It outputs full-line comments and blank lines immediately. - # now mark mark interior blank lines for deletion if requested - return unless ($Opt_blanks_delete); + # For lines of code: + # - Tokens are copied one-by-one from the global token + # array $rLL to a set of '_to_go' arrays which collect batches of + # tokens. This is done with calls to 'store_token_to_go'. + # - A batch is closed and processed upon reaching a well defined + # structural break point (i.e. code block boundary) or forced + # breakpoint (i.e. side comment or special user controls). + # - Subsequent stages of formatting make additional line breaks + # appropriate for lists and logical structures, and as necessary to + # keep line lengths below the requested maximum line length. - while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 } + #----------------------------------- + # begin initialize closure variables + #----------------------------------- + $line_of_tokens = $my_line_of_tokens; + my $rK_range = $line_of_tokens->{_rK_range}; + if ( !defined( $rK_range->[0] ) ) { - return; - }; + # Empty line: This can happen if tokens are deleted, for example + # with the -mangle parameter + return; + } - my $end_group = sub { + ( $K_first, $K_last ) = @{$rK_range}; + $last_CODE_type = $CODE_type; + $CODE_type = $line_of_tokens->{_code_type}; - # end a group of keywords - my ($bad_ending) = @_; - if ( defined($ibeg) && $ibeg >= 0 ) { + $rLL = $self->[_rLL_]; + $radjusted_levels = $self->[_radjusted_levels_]; + $rparent_of_seqno = $self->[_rparent_of_seqno_]; + $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; + $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - # then handle sufficiently large groups - if ( $count >= $Opt_size_min ) { + #--------------------------------- + # end initialize closure variables + #--------------------------------- + + # This flag will become nobreak_to_go and should be set to 2 to prevent + # a line break AFTER the current token. + $no_internal_newlines = 0; + if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) { + $no_internal_newlines = 2; + } - $number_of_groups_seen++; + my $input_line = $line_of_tokens->{_line_text}; - # do any blank deletions regardless of the count - $delete_inner_blank_lines->(); + my ( $is_block_comment, $has_side_comment ); + if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) { + if ( $K_last == $K_first ) { $is_block_comment = 1 } + else { $has_side_comment = 1 } + } - if ( $ibeg > 0 ) { - my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type}; + my $is_static_block_comment_without_leading_space = + $CODE_type eq 'SBCX'; + $is_static_block_comment = + $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space; - # patch for hash bang line which is not currently marked as - # a comment; mark it as a comment - if ( $ibeg == 1 && !$code_type ) { - my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text}; - $code_type = 'BC' - if ( $line_text && $line_text =~ /^#/ ); - } + # check for a $VERSION statement + if ( $CODE_type eq 'VER' ) { + $self->[_saw_VERSION_in_this_file_] = 1; + $no_internal_newlines = 2; + } - # Do not insert a blank after a comment - # (this could be subject to a flag in the future) - if ( $code_type !~ /(BC|SBC|SBCX)/ ) { - if ( $Opt_blanks_before == INSERT ) { - $insert_blank_after->( $ibeg - 1 ); + # Add interline blank if any + my $last_old_nonblank_type = "b"; + my $first_new_nonblank_token = EMPTY_STRING; + my $K_first_true = $K_first; + if ( $max_index_to_go >= 0 ) { + $last_old_nonblank_type = $types_to_go[$max_index_to_go]; + $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_]; + if ( !$is_block_comment + && $types_to_go[$max_index_to_go] ne 'b' + && $K_first > 0 + && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' ) + { + $K_first -= 1; + } + } - } - elsif ( $Opt_blanks_before == DELETE ) { - $delete_if_blank->( $ibeg - 1 ); - } - } - } + my $rtok_first = $rLL->[$K_first]; - # We will only put blanks before code lines. We could loosen - # this rule a little, but we have to be very careful because - # for example we certainly don't want to drop a blank line - # after a line like this: - # my $var = <{_ending_in_quote}; + $ending_in_quote = $in_quote; - # - Do not put a blank before a line of different level - # - Do not put a blank line if we ended the search badly - # - Do not put a blank at the end of the file - # - Do not put a blank line before a hanging side comment - my $level = $rLL->[$K_first]->[_LEVEL_]; - my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; + #------------------------------------ + # Handle a block (full-line) comment. + #------------------------------------ + if ($is_block_comment) { - if ( $level == $level_beg - && $ci_level == 0 - && !$bad_ending - && $iend < @{$rlines} - && $CODE_type ne 'HSC' ) - { - if ( $Opt_blanks_after == INSERT ) { - $insert_blank_after->($iend); - } - elsif ( $Opt_blanks_after == DELETE ) { - $delete_if_blank->( $iend + 1 ); - } - } - } + if ( $rOpts->{'delete-block-comments'} ) { + $self->flush(); + return; } - $split_into_sub_groups->(); - } - # reset for another group - $ibeg = -1; - $iend = undef; - $level_beg = -1; - $K_closing = undef; - @group = (); - @subgroup = (); - @iblanks = (); + $index_start_one_line_block = undef; + $self->end_batch() if ( $max_index_to_go >= 0 ); - return; - }; + # output a blank line before block comments + if ( + # unless we follow a blank or comment line + $self->[_last_line_leading_type_] ne '#' + && $self->[_last_line_leading_type_] ne 'b' - my $find_container_end = sub { + # only if allowed + && $rOpts->{'blanks-before-comments'} - # If the keyword line is continued onto subsequent lines, find the - # closing token '$K_closing' so that we can easily skip past the - # contents of the container. + # if this is NOT an empty comment, unless it follows a side + # comment and could become a hanging side comment. + && ( + $rtok_first->[_TOKEN_] ne '#' + || ( $last_line_had_side_comment + && $rLL->[$K_first]->[_LEVEL_] > 0 ) + ) - # We only set this value if we find a simple list, meaning - # -contents only one level deep - # -not welded + # not after a short line ending in an opening token + # because we already have space above this comment. + # Note that the first comment in this if block, after + # the 'if (', does not get a blank line because of this. + && !$self->[_last_output_short_opening_token_] - # First check: skip if next line is not one deeper - my $Knext_nonblank = $self->K_next_nonblank($K_last); - goto RETURN if ( !defined($Knext_nonblank) ); - my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_]; - goto RETURN if ( $level_next != $level_beg + 1 ); + # never before static block comments + && !$is_static_block_comment + ) + { + $self->flush(); # switching to new output stream + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->write_blank_code_line(); + $self->[_last_line_leading_type_] = 'b'; + } - # Find the parent container of the first token on the next line - my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank); - goto RETURN unless ( defined($parent_seqno) ); + if ( + $rOpts->{'indent-block-comments'} + && ( !$rOpts->{'indent-spaced-block-comments'} + || $input_line =~ /^\s+/ ) + && !$is_static_block_comment_without_leading_space + ) + { + my $Ktoken_vars = $K_first; + my $rtoken_vars = $rLL->[$Ktoken_vars]; + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + $self->end_batch(); + } + else { - # Must not be a weld (can be unstable) - goto RETURN - if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) ); + # switching to new output stream + $self->flush(); - # Opening container must exist and be on this line - my $Ko = $K_opening_container->{$parent_seqno}; - goto RETURN unless ( defined($Ko) && $Ko > $K_first && $Ko <= $K_last ); + # Note that last arg in call here is 'undef' for comments + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->write_code_line( + $rtok_first->[_TOKEN_] . "\n", undef ); + $self->[_last_line_leading_type_] = '#'; + } + return; + } - # Verify that the closing container exists and is on a later line - my $Kc = $K_closing_container->{$parent_seqno}; - goto RETURN unless ( defined($Kc) && $Kc > $K_last ); + #-------------------------------------------- + # Compare input/output indentation in logfile + #-------------------------------------------- + if ( $self->[_save_logfile_] ) { + + # Compare input/output indentation except for: + # - hanging side comments + # - continuation lines (have unknown leading blank space) + # - and lines which are quotes (they may have been outdented) + my $guessed_indentation_level = + $line_of_tokens->{_guessed_indentation_level}; + + unless ( $CODE_type eq 'HSC' + || $rtok_first->[_CI_LEVEL_] > 0 + || $guessed_indentation_level == 0 + && $rtok_first->[_TYPE_] eq 'Q' ) + { + my $input_line_number = $line_of_tokens->{_line_number}; + $self->compare_indentation_levels( $K_first, + $guessed_indentation_level, $input_line_number ); + } + } - # That's it - $K_closing = $Kc; - goto RETURN; + #----------------------------------------- + # Handle a line marked as indentation-only + #----------------------------------------- - RETURN: - return; - }; + if ( $CODE_type eq 'IO' ) { + $self->flush(); + my $line = $input_line; - my $add_to_group = sub { - my ( $i, $token, $level ) = @_; + # Fix for rt #125506 Unexpected string formating + # in which leading space of a terminal quote was removed + $line =~ s/\s+$//; + $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} ); - # End the previous group if we have reached the maximum - # group size - if ( $Opt_size_max && @group >= $Opt_size_max ) { - $end_group->(); - } + my $Ktoken_vars = $K_first; - if ( @group == 0 ) { - $ibeg = $i; - $level_beg = $level; - $count = 0; - } + # We work with a copy of the token variables and change the + # first token to be the entire line as a quote variable + my $rtoken_vars = $rLL->[$Ktoken_vars]; + $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line ); - $count++; - $iend = $i; + # Patch: length is not really important here but must be defined + $rtoken_vars->[_TOKEN_LENGTH_] = length($line); - # New sub-group? - if ( !@group || $token ne $group[-1]->[1] ) { - push @subgroup, scalar(@group); + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + $self->end_batch(); + return; } - push @group, [ $i, $token, $count ]; - # remember if this line ends in an open container - $find_container_end->(); + #--------------------------- + # Handle all other lines ... + #--------------------------- - return; - }; + # If we just saw the end of an elsif block, write nag message + # if we do not see another elseif or an else. + if ($looking_for_else) { - #---------------------------------- - # loop over all lines of the source - #---------------------------------- - $end_group->(); - my $i = -1; - foreach my $line_of_tokens ( @{$rlines} ) { + ## /^(elsif|else)$/ + if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) { + write_logfile_entry("(No else block)\n"); + } + $looking_for_else = 0; + } - $i++; - last - if ( $Opt_repeat_count > 0 - && $number_of_groups_seen >= $Opt_repeat_count ); + # This is a good place to kill incomplete one-line blocks + if ( $max_index_to_go >= 0 ) { - $CODE_type = EMPTY_STRING; - $K_first = undef; - $K_last = undef; - $line_type = $line_of_tokens->{_line_type}; + # For -iob and -lp, mark essential old breakpoints. + # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 + # See related code below. + if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) { + my $type_first = $rLL->[$K_first_true]->[_TYPE_]; + if ( $is_assignment_or_fat_comma{$type_first} ) { + $old_breakpoint_to_go[$max_index_to_go] = 1; + } + } - # always end a group at non-CODE - if ( $line_type ne 'CODE' ) { $end_group->(); next } + if ( - $CODE_type = $line_of_tokens->{_code_type}; + # this check needed -mangle (for example rt125012) + ( + ( !$index_start_one_line_block ) + && ( $last_old_nonblank_type eq ';' ) + && ( $first_new_nonblank_token ne '}' ) + ) - # end any group at a format skipping line - if ( $CODE_type && $CODE_type eq 'FS' ) { - $end_group->(); - next; - } + # Patch for RT #98902. Honor request to break at old commas. + || ( $rOpts_break_at_old_comma_breakpoints + && $last_old_nonblank_type eq ',' ) + ) + { + $forced_breakpoint_to_go[$max_index_to_go] = 1 + if ($rOpts_break_at_old_comma_breakpoints); + $index_start_one_line_block = undef; + $self->end_batch(); + } - # continue in a verbatim (VB) type; it may be quoted text - if ( $CODE_type eq 'VB' ) { - if ( $ibeg >= 0 ) { $iend = $i; } - next; + # Keep any requested breaks before this line. Note that we have to + # use the original K_first because it may have been reduced above + # to add a blank. The value of the flag is as follows: + # 1 => hard break, flush the batch + # 2 => soft break, set breakpoint and continue building the batch + # added check on max_index_to_go for c177 + if ( $max_index_to_go >= 0 + && $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) + { + $index_start_one_line_block = undef; + if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { + $self->set_forced_breakpoint($max_index_to_go); + } + else { + $self->end_batch(); + } + } } - # and continue in blank (BL) types - if ( $CODE_type eq 'BL' ) { - if ( $ibeg >= 0 ) { - $iend = $i; - push @{iblanks}, $i; + #-------------------------------------- + # loop to process the tokens one-by-one + #-------------------------------------- + $self->process_line_inner_loop($has_side_comment); - # propagate current subgroup token - my $tok = $group[-1]->[1]; - push @group, [ $i, $tok, $count ]; - } - next; - } + # if there is anything left in the output buffer ... + if ( $max_index_to_go >= 0 ) { - # examine the first token of this line - my $rK_range = $line_of_tokens->{_rK_range}; - ( $K_first, $K_last ) = @{$rK_range}; - if ( !defined($K_first) ) { + my $type = $rLL->[$K_last]->[_TYPE_]; + my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last}; - # Somewhat unexpected blank line.. - # $rK_range is normally defined for line type CODE, but this can - # happen for example if the input line was a single semicolon which - # is being deleted. In that case there was code in the input - # file but it is not being retained. So we can silently return. - return $rhash_of_desires; - } + # we have to flush .. + if ( - my $level = $rLL->[$K_first]->[_LEVEL_]; - my $type = $rLL->[$K_first]->[_TYPE_]; - my $token = $rLL->[$K_first]->[_TOKEN_]; - my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; + # if there is a side comment... + $type eq '#' - # End a group 'badly' at an unexpected level. This will prevent - # blank lines being incorrectly placed after the end of the group. - # We are looking for any deviation from two acceptable patterns: - # PATTERN 1: a simple list; secondary lines are at level+1 - # PATTERN 2: a long statement; all secondary lines same level - # This was added as a fix for case b1177, in which a complex structure - # got incorrectly inserted blank lines. - if ( $ibeg >= 0 ) { + # if this line ends in a quote + # NOTE: This is critically important for insuring that quoted + # lines do not get processed by things like -sot and -sct + || $in_quote - # Check for deviation from PATTERN 1, simple list: - if ( defined($K_closing) && $K_first < $K_closing ) { - $end_group->(1) if ( $level != $level_beg + 1 ); - } + # if this is a VERSION statement + || $CODE_type eq 'VER' - # Check for deviation from PATTERN 2, single statement: - elsif ( $level != $level_beg ) { $end_group->(1) } - } + # to keep a label at the end of a line + || ( $type eq 'J' && $rOpts_break_after_labels != 2 ) - # Do not look for keywords in lists ( keyword 'my' can occur in lists, - # see case b760); fixed for c048. - if ( $self->is_list_by_K($K_first) ) { - if ( $ibeg >= 0 ) { $iend = $i } - next; - } + # if we have a hard break request + || $break_flag && $break_flag != 2 - # see if this is a code type we seek (i.e. comment) - if ( $CODE_type - && $Opt_comment_pattern - && $CODE_type =~ /$Opt_comment_pattern/ ) - { + # if we are instructed to keep all old line breaks + || !$rOpts->{'delete-old-newlines'} - my $tok = $CODE_type; + # if this is a line of the form 'use overload'. A break here in + # the input file is a good break because it will allow the + # operators which follow to be formatted well. Without this + # break the formatting with -ci=4 -xci is poor, for example. - # Continuing a group - if ( $ibeg >= 0 && $level == $level_beg ) { - $add_to_group->( $i, $tok, $level ); + # use overload + # '+' => sub { + # print length $_[2], "\n"; + # my ( $x, $y ) = _order(@_); + # Number::Roman->new( int $x + $y ); + # }, + # '-' => sub { + # my ( $x, $y ) = _order(@_); + # Number::Roman->new( int $x - $y ); + # }; + || ( $max_index_to_go == 2 + && $types_to_go[0] eq 'k' + && $tokens_to_go[0] eq 'use' + && $tokens_to_go[$max_index_to_go] eq 'overload' ) + ) + { + $index_start_one_line_block = undef; + $self->end_batch(); } - # Start new group else { - # first end old group if any; we might be starting new - # keywords at different level - if ( $ibeg >= 0 ) { $end_group->(); } - $add_to_group->( $i, $tok, $level ); + # Check for a soft break request + if ( $break_flag && $break_flag == 2 ) { + $self->set_forced_breakpoint($max_index_to_go); + } + + # mark old line breakpoints in current output stream + if ( + !$rOpts_ignore_old_breakpoints + + # Mark essential old breakpoints if combination -iob -lp is + # used. These two options do not work well together, but + # we can avoid turning -iob off by ignoring -iob at certain + # essential line breaks. See also related code above. + # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 + || ( $rOpts_line_up_parentheses + && $is_assignment_or_fat_comma{$type} ) + ) + { + $old_breakpoint_to_go[$max_index_to_go] = 1; + } } - next; } - # See if it is a keyword we seek, but never start a group in a - # continuation line; the code may be badly formatted. - if ( $ci_level == 0 - && $type eq 'k' - && $token =~ /$Opt_pattern/ ) - { + return; + } ## end sub process_line_of_CODE - # Continuing a keyword group - if ( $ibeg >= 0 && $level == $level_beg ) { - $add_to_group->( $i, $token, $level ); - } + sub process_line_inner_loop { - # Start new keyword group - else { + my ( $self, $has_side_comment ) = @_; - # first end old group if any; we might be starting new - # keywords at different level - if ( $ibeg >= 0 ) { $end_group->(); } - $add_to_group->( $i, $token, $level ); - } - next; + #-------------------------------------------------------------------- + # Loop to move all tokens from one input line to a newly forming batch + #-------------------------------------------------------------------- + + # Do not start a new batch with a blank space + if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { + $K_first++; } - # This is not one of our keywords, but we are in a keyword group - # so see if we should continue or quit - elsif ( $ibeg >= 0 ) { + foreach my $Ktoken_vars ( $K_first .. $K_last ) { + + my $rtoken_vars = $rLL->[$Ktoken_vars]; - # - bail out on a large level change; we may have walked into a - # data structure or anonymous sub code. - if ( $level > $level_beg + 1 || $level < $level_beg ) { - $end_group->(1); + #-------------- + # handle blanks + #-------------- + if ( $rtoken_vars->[_TYPE_] eq 'b' ) { + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); next; } - # - keep going on a continuation line of the same level, since - # it is probably a continuation of our previous keyword, - # - and keep going past hanging side comments because we never - # want to interrupt them. - if ( ( ( $level == $level_beg ) && $ci_level > 0 ) - || $CODE_type eq 'HSC' ) - { - $iend = $i; - next; + #------------------ + # handle non-blanks + #------------------ + my $type = $rtoken_vars->[_TYPE_]; + + # If we are continuing after seeing a right curly brace, flush + # buffer unless we see what we are looking for, as in + # } else ... + if ($rbrace_follower) { + my $token = $rtoken_vars->[_TOKEN_]; + unless ( $rbrace_follower->{$token} ) { + $self->end_batch() if ( $max_index_to_go >= 0 ); + } + $rbrace_follower = undef; } - # - continue if if we are within in a container which started with - # the line of the previous keyword. - if ( defined($K_closing) && $K_first <= $K_closing ) { + my ( + $block_type, $type_sequence, + $is_opening_BLOCK, $is_closing_BLOCK, + $nobreak_BEFORE_BLOCK + ); + + if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) { - # continue if entire line is within container - if ( $K_last <= $K_closing ) { $iend = $i; next } + my $token = $rtoken_vars->[_TOKEN_]; + $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; + $block_type = $rblock_type_of_seqno->{$type_sequence}; - # continue at ); or }; or ]; - my $KK = $K_closing + 1; - if ( $rLL->[$KK]->[_TYPE_] eq ';' ) { - if ( $KK < $K_last ) { - if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK } - if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) { - $end_group->(1); - next; - } + if ( $block_type + && $token eq $type + && $block_type ne 't' + && !$self->[_rshort_nested_]->{$type_sequence} ) + { + + if ( $type eq '{' ) { + $is_opening_BLOCK = 1; + $nobreak_BEFORE_BLOCK = $no_internal_newlines; } - $iend = $i; + elsif ( $type eq '}' ) { + $is_closing_BLOCK = 1; + $nobreak_BEFORE_BLOCK = $no_internal_newlines; + } + } + } + + #--------------------- + # handle side comments + #--------------------- + if ($has_side_comment) { + + # if at last token ... + if ( $Ktoken_vars == $K_last ) { + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); next; } - $end_group->(1); - next; + # if before last token ... do not allow breaks which would + # promote a side comment to a block comment + elsif ($Ktoken_vars == $K_last - 1 + || $Ktoken_vars == $K_last - 2 + && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' ) + { + $no_internal_newlines = 2; + } } - # - end the group if none of the above - $end_group->(); - next; - } + # Process non-blank and non-comment tokens ... - # not in a keyword group; continue - else { next } - } + #----------------- + # handle semicolon + #----------------- + if ( $type eq ';' ) { - # end of loop over all lines - $end_group->(); - return $rhash_of_desires; + my $next_nonblank_token_type = 'b'; + my $next_nonblank_token = EMPTY_STRING; + if ( $Ktoken_vars < $K_last ) { + my $Knnb = $Ktoken_vars + 1; + $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' ); + $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_]; + $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; + } -} ## end sub keyword_group_scan + if ( $rOpts_break_at_old_semicolon_breakpoints + && ( $Ktoken_vars == $K_first ) + && $max_index_to_go >= 0 + && !defined($index_start_one_line_block) ) + { + $self->end_batch(); + } -####################################### -# CODE SECTION 7: Process lines of code -####################################### + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); -{ ## begin closure process_line_of_CODE + $self->end_batch() + unless ( + $no_internal_newlines + || ( $rOpts_keep_interior_semicolons + && $Ktoken_vars < $K_last ) + || ( $next_nonblank_token eq '}' ) + ); + } - # The routines in this closure receive lines of code and combine them into - # 'batches' and send them along. A 'batch' is the unit of code which can be - # processed further as a unit. It has the property that it is the largest - # amount of code into which which perltidy is free to place one or more - # line breaks within it without violating any constraints. + #----------- + # handle '{' + #----------- + elsif ($is_opening_BLOCK) { - # When a new batch is formed it is sent to sub 'grind_batch_of_code'. + # Tentatively output this token. This is required before + # calling starting_one_line_block. We may have to unstore + # it, though, if we have to break before it. + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - # flags needed by the store routine - my $line_of_tokens; - my $no_internal_newlines; - my $CODE_type; + # Look ahead to see if we might form a one-line block.. + my $too_long = + $self->starting_one_line_block( $Ktoken_vars, + $K_last_nonblank_code, $K_last ); + $self->clear_breakpoint_undo_stack(); - # range of K of tokens for the current line - my ( $K_first, $K_last ); + # to simplify the logic below, set a flag to indicate if + # this opening brace is far from the keyword which introduces it + my $keyword_on_same_line = 1; + if ( + $max_index_to_go >= 0 + && defined($K_last_nonblank_code) + && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')' + && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] ) + || $too_long ) + ) + { + $keyword_on_same_line = 0; + } - my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno, - $rblock_type_of_seqno, $ri_starting_one_line_block ); + # Break before '{' if requested with -bl or -bli flag + my $want_break = $self->[_rbrace_left_]->{$type_sequence}; - # past stored nonblank tokens and flags - my ( - $K_last_nonblank_code, $looking_for_else, - $is_static_block_comment, $last_CODE_type, - $last_line_had_side_comment, $next_parent_seqno, - $next_slevel, - ); + # But do not break if this token is welded to the left + if ( $total_weld_count + && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) ) + { + $want_break = 0; + } - # Called once at the start of a new file - sub initialize_process_line_of_CODE { - $K_last_nonblank_code = undef; - $looking_for_else = 0; - $is_static_block_comment = 0; - $last_line_had_side_comment = 0; - $next_parent_seqno = SEQ_ROOT; - $next_slevel = undef; - return; - } + # Break BEFORE an opening '{' ... + if ( - # Batch variables: these describe the current batch of code being formed - # and sent down the pipeline. They are initialized in the next - # sub. - my ( $rbrace_follower, $index_start_one_line_block, - $semicolons_before_block_self_destruct, - $starting_in_quote, $ending_in_quote, ); + # if requested + $want_break + + # and we were unable to start looking for a block, + && !defined($index_start_one_line_block) + + # or if it will not be on same line as its keyword, so that + # it will be outdented (eval.t, overload.t), and the user + # has not insisted on keeping it on the right + || ( !$keyword_on_same_line + && !$rOpts_opening_brace_always_on_right ) + ) + { - # Called before the start of each new batch - sub initialize_batch_variables { + # but only if allowed + unless ($nobreak_BEFORE_BLOCK) { - $max_index_to_go = UNDEFINED_INDEX; - $summed_lengths_to_go[0] = 0; - $nesting_depth_to_go[0] = 0; - ##@summed_lengths_to_go = @nesting_depth_to_go = (0); - $ri_starting_one_line_block = []; + # since we already stored this token, we must unstore it + $self->unstore_token_to_go(); - # The initialization code for the remaining batch arrays is as follows - # and can be activated for testing. But profiling shows that it is - # time-consuming to re-initialize the batch arrays and is not necessary - # because the maximum valid token, $max_index_to_go, is carefully - # controlled. This means however that it is not possible to do any - # type of filter or map operation directly on these arrays. And it is - # not possible to use negative indexes. As a precaution against program - # changes which might do this, sub pad_array_to_go adds some undefs at - # the end of the current batch of data. + # then output the line + $self->end_batch() if ( $max_index_to_go >= 0 ); - # So 'long story short': this is a waste of time - 0 && do { #<<< - @block_type_to_go = (); - @type_sequence_to_go = (); - @forced_breakpoint_to_go = (); - @token_lengths_to_go = (); - @levels_to_go = (); - @mate_index_to_go = (); - @ci_levels_to_go = (); - @nobreak_to_go = (); - @old_breakpoint_to_go = (); - @tokens_to_go = (); - @K_to_go = (); - @types_to_go = (); - @leading_spaces_to_go = (); - @reduced_spaces_to_go = (); - @inext_to_go = (); - @iprev_to_go = (); - @parent_seqno_to_go = (); - }; + # and now store this token at the start of a new line + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + } + } - $rbrace_follower = undef; - $ending_in_quote = 0; + # now output this line + $self->end_batch() + if ( $max_index_to_go >= 0 && !$no_internal_newlines ); + } - # These get re-initialized by calls to sub destroy_one_line_block(): - $index_start_one_line_block = UNDEFINED_INDEX; - $semicolons_before_block_self_destruct = 0; + #----------- + # handle '}' + #----------- + elsif ($is_closing_BLOCK) { - # initialize forced breakpoint vars associated with each output batch - $forced_breakpoint_count = 0; - $index_max_forced_break = UNDEFINED_INDEX; - $forced_breakpoint_undo_count = 0; + my $next_nonblank_token_type = 'b'; + my $next_nonblank_token = EMPTY_STRING; + my $Knnb; + if ( $Ktoken_vars < $K_last ) { + $Knnb = $Ktoken_vars + 1; + $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' ); + $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_]; + $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; + } - return; - } ## end sub initialize_batch_variables + # If there is a pending one-line block .. + if ( defined($index_start_one_line_block) ) { - sub leading_spaces_to_go { + # Fix for b1208: if a side comment follows this closing + # brace then we must include its length in the length test + # ... unless the -issl flag is set (fixes b1307-1309). + # Assume a minimum of 1 blank space to the comment. + my $added_length = 0; + if ( $has_side_comment + && !$rOpts_ignore_side_comment_lengths + && $next_nonblank_token_type eq '#' ) + { + $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_]; + } - # return the number of indentation spaces for a token in the output - # stream + # we have to terminate it if.. + if ( - my ($ii) = @_; - return 0 if ( $ii < 0 ); - my $indentation = $leading_spaces_to_go[$ii]; - return ref($indentation) ? $indentation->get_spaces() : $indentation; - } ## end sub leading_spaces_to_go + # it is too long (final length may be different from + # initial estimate). note: must allow 1 space for this + # token + $self->excess_line_length( $index_start_one_line_block, + $max_index_to_go ) + $added_length >= 0 + ) + { + $index_start_one_line_block = undef; + } + } - sub create_one_line_block { - ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) - = @_; - return; - } + # put a break before this closing curly brace if appropriate + $self->end_batch() + if ( $max_index_to_go >= 0 + && !$nobreak_BEFORE_BLOCK + && !defined($index_start_one_line_block) ); - sub destroy_one_line_block { - $index_start_one_line_block = UNDEFINED_INDEX; - $semicolons_before_block_self_destruct = 0; - return; - } + # store the closing curly brace + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - # Routine to place the current token into the output stream. - # Called once per output token. + # ok, we just stored a closing curly brace. Often, but + # not always, we want to end the line immediately. + # So now we have to check for special cases. - use constant DEBUG_STORE => 0; + # if this '}' successfully ends a one-line block.. + my $one_line_block_type = EMPTY_STRING; + my $keep_going; + if ( defined($index_start_one_line_block) ) { - sub store_token_to_go { + # Remember the type of token just before the + # opening brace. It would be more general to use + # a stack, but this will work for one-line blocks. + $one_line_block_type = + $types_to_go[$index_start_one_line_block]; - my ( $self, $Ktoken_vars, $rtoken_vars ) = @_; + # we have to actually make it by removing tentative + # breaks that were set within it + $self->undo_forced_breakpoint_stack(0); - # Add one token to the next batch. - # $Ktoken_vars = the index K in the global token array - # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values - # unless they are temporarily being overridden + # For -lp, extend the nobreak to include a trailing + # terminal ','. This is because the -lp indentation was + # not known when making one-line blocks, so we may be able + # to move the line back to fit. Otherwise we may create a + # needlessly stranded comma on the next line. + my $iend_nobreak = $max_index_to_go - 1; + if ( $rOpts_line_up_parentheses + && $next_nonblank_token_type eq ',' + && $Knnb eq $K_last ) + { + my $p_seqno = $parent_seqno_to_go[$max_index_to_go]; + my $is_excluded = + $self->[_ris_excluded_lp_container_]->{$p_seqno}; + $iend_nobreak = $max_index_to_go if ( !$is_excluded ); + } - #------------------------------------------------------------------ - # NOTE: called once per token so coding efficiency is critical here - #------------------------------------------------------------------ + $self->set_nobreaks( $index_start_one_line_block, + $iend_nobreak ); - my $type = $rtoken_vars->[_TYPE_]; + # save starting block indexes so that sub correct_lp can + # check and adjust -lp indentation (c098) + push @{$ri_starting_one_line_block}, + $index_start_one_line_block; - # Check for emergency flush... - # The K indexes in the batch must always be a continuous sequence of - # the global token array. The batch process programming assumes this. - # If storing this token would cause this relation to fail we must dump - # the current batch before storing the new token. It is extremely rare - # for this to happen. One known example is the following two-line - # snippet when run with parameters - # --noadd-newlines --space-terminal-semicolon: - # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ; - # $yy=1; - if ( $max_index_to_go >= 0 ) { - if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) { - $self->flush_batch_of_CODE(); - } + # then re-initialize for the next one-line block + $index_start_one_line_block = undef; - # Do not output consecutive blank tokens ... this should not - # happen, but it is worth checking. Later code can then make the - # simplifying assumption that blank tokens are not consecutive. - elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) { + # then decide if we want to break after the '}' .. + # We will keep going to allow certain brace followers as in: + # do { $ifclosed = 1; last } unless $losing; + # + # But make a line break if the curly ends a + # significant block: + if ( + ( + $is_block_without_semicolon{$block_type} - if (DEVEL_MODE) { + # Follow users break point for + # one line block types U & G, such as a 'try' block + || $one_line_block_type =~ /^[UG]$/ + && $Ktoken_vars == $K_last + ) - # if this happens, it is may be that consecutive blanks - # were inserted into the token stream in 'respace_tokens' - my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; - Fault("consecutive blanks near line $lno; please fix"); + # if needless semicolon follows we handle it later + && $next_nonblank_token ne ';' + ) + { + $self->end_batch() + unless ($no_internal_newlines); + } } - return; - } - } - - # Do not start a batch with a blank token. - # Fixes cases b149 b888 b984 b985 b986 b987 - else { - if ( $type eq 'b' ) { return } - } - #---------------------------- - # add this token to the batch - #---------------------------- - $K_to_go[ ++$max_index_to_go ] = $Ktoken_vars; - $types_to_go[$max_index_to_go] = $type; - - $old_breakpoint_to_go[$max_index_to_go] = 0; - $forced_breakpoint_to_go[$max_index_to_go] = 0; - $mate_index_to_go[$max_index_to_go] = -1; + # set string indicating what we need to look for brace follower + # tokens + if ( $is_if_unless_elsif_else{$block_type} ) { + $rbrace_follower = undef; + } + elsif ( $block_type eq 'do' ) { + $rbrace_follower = \%is_do_follower; + if ( + $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars ) + ) + { + $rbrace_follower = { ')' => 1 }; + } + } - my $token = $tokens_to_go[$max_index_to_go] = $rtoken_vars->[_TOKEN_]; + # added eval for borris.t + elsif ($is_sort_map_grep_eval{$block_type} + || $one_line_block_type eq 'G' ) + { + $rbrace_follower = undef; + $keep_going = 1; + } - my $ci_level = $ci_levels_to_go[$max_index_to_go] = - $rtoken_vars->[_CI_LEVEL_]; + # anonymous sub + elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) { + if ($one_line_block_type) { - # Clip levels to zero if there are level errors in the file. - # We had to wait until now for reasons explained in sub 'write_line'. - my $level = $rtoken_vars->[_LEVEL_]; - if ( $level < 0 ) { $level = 0 } - $levels_to_go[$max_index_to_go] = $level; + $rbrace_follower = \%is_anon_sub_1_brace_follower; - my $seqno = $type_sequence_to_go[$max_index_to_go] = - $rtoken_vars->[_TYPE_SEQUENCE_]; + # Exceptions to help keep -lp intact, see git #74 ... + # Exception 1: followed by '}' on this line + if ( $Ktoken_vars < $K_last + && $next_nonblank_token eq '}' ) + { + $rbrace_follower = undef; + $keep_going = 1; + } - my $in_continued_quote = - ( $Ktoken_vars == $K_first ) && $line_of_tokens->{_starting_in_quote}; + # Exception 2: followed by '}' on next line if -lp set. + # The -lp requirement allows the formatting to follow + # old breaks when -lp is not used, minimizing changes. + # Fixes issue c087. + elsif ($Ktoken_vars == $K_last + && $rOpts_line_up_parentheses ) + { + my $K_closing_container = + $self->[_K_closing_container_]; + my $p_seqno = $parent_seqno_to_go[$max_index_to_go]; + my $Kc = $K_closing_container->{$p_seqno}; + my $is_excluded = + $self->[_ris_excluded_lp_container_]->{$p_seqno}; + $keep_going = + ( defined($Kc) + && $rLL->[$Kc]->[_TOKEN_] eq '}' + && !$is_excluded + && $Kc - $Ktoken_vars <= 2 ); + $rbrace_follower = undef if ($keep_going); + } + } + else { + $rbrace_follower = \%is_anon_sub_brace_follower; + } + } - # Initializations for first token of new batch - if ( $max_index_to_go == 0 ) { + # None of the above: specify what can follow a closing + # brace of a block which is not an + # if/elsif/else/do/sort/map/grep/eval + # Testfiles: + # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t + else { + $rbrace_follower = \%is_other_brace_follower; + } - $starting_in_quote = $in_continued_quote; + # See if an elsif block is followed by another elsif or else; + # complain if not. + if ( $block_type eq 'elsif' ) { - # Update the next parent sequence number for each new batch. + if ( $next_nonblank_token_type eq 'b' ) { # end of line? + $looking_for_else = 1; # ok, check on next line + } + else { + ## /^(elsif|else)$/ + if ( !$is_elsif_else{$next_nonblank_token} ) { + write_logfile_entry("No else block :(\n"); + } + } + } - #---------------------------------------- - # Begin coding from sub parent_seqno_by_K - #---------------------------------------- + # keep going after certain block types (map,sort,grep,eval) + # added eval for borris.t + if ($keep_going) { - # The following is equivalent to this call but much faster: - # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars); + # keep going + $rbrace_follower = undef; - $next_parent_seqno = SEQ_ROOT; - if ($seqno) { - $next_parent_seqno = $rparent_of_seqno->{$seqno}; - } - else { - my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_]; - if ( defined($Kt) ) { - my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; - my $type_t = $rLL->[$Kt]->[_TYPE_]; + } - # if next container token is closing, it is the parent seqno - if ( $is_closing_type{$type_t} ) { - $next_parent_seqno = $type_sequence_t; + # if no more tokens, postpone decision until re-entering + elsif ( ( $next_nonblank_token_type eq 'b' ) + && $rOpts_add_newlines ) + { + unless ($rbrace_follower) { + $self->end_batch() + unless ( $no_internal_newlines + || $max_index_to_go < 0 ); } + } + elsif ($rbrace_follower) { - # otherwise we want its parent container + if ( $rbrace_follower->{$next_nonblank_token} ) { + + # Fix for b1385: keep break after a comma following a + # 'do' block. This could also be used for other block + # types, but that would cause a significant change in + # existing formatting without much benefit. + if ( $next_nonblank_token eq ',' + && $Knnb eq $K_last + && $block_type eq 'do' + && $rOpts_add_newlines + && $self->is_trailing_comma($Knnb) ) + { + $self->[_rbreak_after_Klast_]->{$K_last} = 1; + } + } else { - $next_parent_seqno = - $rparent_of_seqno->{$type_sequence_t}; + $self->end_batch() + unless ( $no_internal_newlines + || $max_index_to_go < 0 ); } - } - } - $next_parent_seqno = SEQ_ROOT - unless ( defined($next_parent_seqno) ); - - #-------------------------------------- - # End coding from sub parent_seqno_by_K - #-------------------------------------- - $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1; - } + $rbrace_follower = undef; + } - # Initialize some sequence-dependent variables to their normal values - $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno; - $nesting_depth_to_go[$max_index_to_go] = $next_slevel; - $block_type_to_go[$max_index_to_go] = EMPTY_STRING; + else { + $self->end_batch() + unless ( $no_internal_newlines + || $max_index_to_go < 0 ); + } - # Then fix them at container tokens: - if ($seqno) { + } ## end treatment of closing block token - $block_type_to_go[$max_index_to_go] = - $rblock_type_of_seqno->{$seqno} - if ( $rblock_type_of_seqno->{$seqno} ); + #------------------------------ + # handle here_doc target string + #------------------------------ + elsif ( $type eq 'h' ) { - if ( $is_opening_token{$token} ) { + # no newlines after seeing here-target + $no_internal_newlines = 2; + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + } - my $slevel = $rdepth_of_opening_seqno->[$seqno]; - $nesting_depth_to_go[$max_index_to_go] = $slevel; - $next_slevel = $slevel + 1; + #----------------------------- + # handle all other token types + #----------------------------- + else { - $next_parent_seqno = $seqno; + $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + # break after a label if requested + if ( $rOpts_break_after_labels + && $type eq 'J' + && $rOpts_break_after_labels == 1 ) + { + $self->end_batch() + unless ($no_internal_newlines); + } } - elsif ( $is_closing_token{$token} ) { - $next_slevel = $rdepth_of_opening_seqno->[$seqno]; - my $slevel = $next_slevel + 1; - $nesting_depth_to_go[$max_index_to_go] = $slevel; + # remember previous nonblank, non-comment OUTPUT token + $K_last_nonblank_code = $Ktoken_vars; - my $parent_seqno = $rparent_of_seqno->{$seqno}; - $parent_seqno = SEQ_ROOT unless defined($parent_seqno); - $parent_seqno_to_go[$max_index_to_go] = $parent_seqno; - $next_parent_seqno = $parent_seqno; + } ## end of loop over all tokens in this line + return; + } ## end sub process_line_inner_loop - } - else { - # ternary token: nothing to do - } - } +} ## end closure process_line_of_CODE - $nobreak_to_go[$max_index_to_go] = $no_internal_newlines; +sub is_trailing_comma { + my ( $self, $KK ) = @_; - my $length = $rtoken_vars->[_TOKEN_LENGTH_]; + # Given: + # $KK - index of a comma in token list + # Return: + # true if the comma at index $KK is a trailing comma + # false if not - # Safety check that length is defined. Should not be needed now. - # Former patch for indent-only, in which the entire set of tokens is - # turned into type 'q'. Lengths may have not been defined because sub - # 'respace_tokens' is bypassed. We do not need lengths in this case, - # but we will use the character count to have a defined value. In the - # future, it would be nicer to have 'respace_tokens' convert the lines - # to quotes and get correct lengths. - if ( !defined($length) ) { - $length = length($token); + my $rLL = $self->[_rLL_]; + my $type_KK = $rLL->[$KK]->[_TYPE_]; + if ( $type_KK ne ',' ) { + DEVEL_MODE + && Fault("Bad call: expected type ',' but received '$type_KK'\n"); + return; + } + my $Knnb = $self->K_next_nonblank($KK); + if ( defined($Knnb) ) { + my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_]; + my $type_Knnb = $rLL->[$Knnb]->[_TYPE_]; + if ( $type_sequence && $is_closing_type{$type_Knnb} ) { + return 1; } + } + return; +} ## end sub is_trailing_comma - $token_lengths_to_go[$max_index_to_go] = $length; +sub tight_paren_follows { - # We keep a running sum of token lengths from the start of this batch: - # summed_lengths_to_go[$i] = total length to just before token $i - # summed_lengths_to_go[$i+1] = total length to just after token $i - $summed_lengths_to_go[ $max_index_to_go + 1 ] = - $summed_lengths_to_go[$max_index_to_go] + $length; + my ( $self, $K_to_go_0, $K_ic ) = @_; - # Define the indentation that this token will have in two cases: - # Without CI = reduced_spaces_to_go - # With CI = leading_spaces_to_go - if ($in_continued_quote) { - $leading_spaces_to_go[$max_index_to_go] = 0; - $reduced_spaces_to_go[$max_index_to_go] = 0; - } - else { - $leading_spaces_to_go[$max_index_to_go] = - $reduced_spaces_to_go[$max_index_to_go] = - $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars]; + # Input parameters: + # $K_to_go_0 = first token index K of this output batch (=K_to_go[0]) + # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go]) + # Return parameter: + # false if we want a break after the closing do brace + # true if we do not want a break after the closing do brace - $leading_spaces_to_go[$max_index_to_go] += - $rOpts_continuation_indentation * $ci_level - if ($ci_level); - } + # We are at the closing brace of a 'do' block. See if this brace is + # followed by a closing paren, and if so, set a flag which indicates + # that we do not want a line break between the '}' and ')'. - DEBUG_STORE && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n"; - }; - return; - } ## end sub store_token_to_go + # xxxxx ( ...... do { ... } ) { + # ^-------looking at this brace, K_ic - sub flush_batch_of_CODE { + # Subscript notation: + # _i = inner container (braces in this case) + # _o = outer container (parens in this case) + # _io = inner opening = '{' + # _ic = inner closing = '}' + # _oo = outer opening = '(' + # _oc = outer closing = ')' - # Finish any batch packaging and call the process routine. - # This must be the only call to grind_batch_of_CODE() - my ($self) = @_; + # |--K_oo |--K_oc = outer container + # xxxxx ( ...... do { ...... } ) { + # |--K_io |--K_ic = inner container - if ( $max_index_to_go >= 0 ) { + # In general, the safe thing to do is return a 'false' value + # if the statement appears to be complex. This will have + # the downstream side-effect of opening up outer containers + # to help make complex code readable. But for simpler + # do blocks it can be preferable to keep the code compact + # by returning a 'true' value. - # Create an array to hold variables for this batch - my $this_batch = []; + return unless defined($K_ic); + my $rLL = $self->[_rLL_]; - $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote); - $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote); + # we should only be called at a closing block + my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_]; + return unless ($seqno_i); # shouldn't happen; - if ( $CODE_type || $last_CODE_type ) { - $this_batch->[_batch_CODE_type_] = - $K_to_go[$max_index_to_go] >= $K_first - ? $CODE_type - : $last_CODE_type; - } + # This only applies if the next nonblank is a ')' + my $K_oc = $self->K_next_nonblank($K_ic); + return unless defined($K_oc); + my $token_next = $rLL->[$K_oc]->[_TOKEN_]; + return unless ( $token_next eq ')' ); - $last_line_had_side_comment = - ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' ); + my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_]; + my $K_io = $self->[_K_opening_container_]->{$seqno_i}; + my $K_oo = $self->[_K_opening_container_]->{$seqno_o}; + return unless ( defined($K_io) && defined($K_oo) ); - # The flag $is_static_block_comment applies to the line which just - # arrived. So it only applies if we are outputting that line. - if ( $is_static_block_comment && !$last_line_had_side_comment ) { - $this_batch->[_is_static_block_comment_] = - $K_to_go[0] == $K_first; + # RULE 1: Do not break before a closing signature paren + # (regardless of complexity). This is a fix for issue git#22. + # Looking for something like: + # sub xxx ( ... do { ... } ) { + # ^----- next block_type + my $K_test = $self->K_next_nonblank($K_oc); + if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) { + my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_]; + if ($seqno_test) { + if ( $self->[_ris_asub_block_]->{$seqno_test} + || $self->[_ris_sub_block_]->{$seqno_test} ) + { + return 1; } + } + } - $this_batch->[_ri_starting_one_line_block_] = - $ri_starting_one_line_block; + # RULE 2: Break if the contents within braces appears to be 'complex'. We + # base this decision on the number of tokens between braces. - $self->[_this_batch_] = $this_batch; + # xxxxx ( ... do { ... } ) { + # ^^^^^^ + + # Although very simple, it has the advantages of (1) being insensitive to + # changes in lengths of identifier names, (2) easy to understand, implement + # and test. A test case for this is 't/snippets/long_line.in'. + + # Example: $K_ic - $K_oo = 9 [Pass Rule 2] + # if ( do { $2 !~ /&/ } ) { ... } + + # Example: $K_ic - $K_oo = 10 [Pass Rule 2] + # for ( split /\s*={70,}\s*/, do { local $/; }) { ... } - $self->grind_batch_of_CODE(); + # Example: $K_ic - $K_oo = 20 [Fail Rule 2] + # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; }); - # Done .. this batch is history - $self->[_this_batch_] = undef; + return if ( $K_ic - $K_io > 16 ); - initialize_batch_variables(); - } + # RULE 3: break if the code between the opening '(' and the '{' is 'complex' + # As with the previous rule, we decide based on the token count - return; - } ## end sub flush_batch_of_CODE + # xxxxx ( ... do { ... } ) { + # ^^^^^^^^ - sub end_batch { + # Example: $K_ic - $K_oo = 9 [Pass Rule 2] + # $K_io - $K_oo = 4 [Pass Rule 3] + # if ( do { $2 !~ /&/ } ) { ... } - # end the current batch, EXCEPT for a few special cases - my ($self) = @_; + # Example: $K_ic - $K_oo = 10 [Pass rule 2] + # $K_io - $K_oo = 9 [Pass rule 3] + # for ( split /\s*={70,}\s*/, do { local $/; }) { ... } - if ( $max_index_to_go < 0 ) { + return if ( $K_io - $K_oo > 9 ); - # This is harmless but should be eliminated in development - if (DEVEL_MODE) { - Fault("End batch called with nothing to do; please fix\n"); - } - return; - } + # RULE 4: Break if we have already broken this batch of output tokens + return if ( $K_oo < $K_to_go_0 ); - # Exceptions when a line does not end with a comment... (fixes c058) - if ( $types_to_go[$max_index_to_go] ne '#' ) { + # RULE 5: Break if input is not on one line + # For example, we will set the flag for the following expression + # written in one line: - # Exception 1: Do not end line in a weld - return - if ( $total_weld_count - && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } ); + # This has: $K_ic - $K_oo = 10 [Pass rule 2] + # $K_io - $K_oo = 8 [Pass rule 3] + # $self->debug( 'Error: ' . do { local $/; <$err> } ); - # Exception 2: just set a tentative breakpoint if we might be in a - # one-line block - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { - $self->set_forced_breakpoint($max_index_to_go); - return; - } - } + # but we break after the brace if it is on multiple lines on input, since + # the user may prefer it on multiple lines: - $self->flush_batch_of_CODE(); - return; - } ## end sub end_batch + # [Fail rule 5] + # $self->debug( + # 'Error: ' . do { local $/; <$err> } + # ); - sub flush_vertical_aligner { - my ($self) = @_; - my $vao = $self->[_vertical_aligner_object_]; - $vao->flush(); - return; + if ( !$rOpts_ignore_old_breakpoints ) { + my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_]; + my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_]; + return if ( $iline_oo != $iline_oc ); } - # flush is called to output any tokens in the pipeline, so that - # an alternate source of lines can be written in the correct order - sub flush { - my ( $self, $CODE_type_flush ) = @_; + # OK to keep the paren tight + return 1; +} ## end sub tight_paren_follows - # end the current batch with 1 exception +my %is_brace_semicolon_colon; - destroy_one_line_block(); +BEGIN { + my @q = qw( { } ; : ); + @is_brace_semicolon_colon{@q} = (1) x scalar(@q); +} - # Exception: if we are flushing within the code stream only to insert - # blank line(s), then we can keep the batch intact at a weld. This - # improves formatting of -ce. See test 'ce1.ce' - if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) { - $self->end_batch() if ( $max_index_to_go >= 0 ); - } +sub starting_one_line_block { - # otherwise, we have to shut things down completely. - else { $self->flush_batch_of_CODE() } + # After seeing an opening curly brace, look for the closing brace and see + # if the entire block will fit on a line. This routine is not always right + # so a check is made later (at the closing brace) to make sure we really + # have a one-line block. We have to do this preliminary check, though, + # because otherwise we would always break at a semicolon within a one-line + # block if the block contains multiple statements. - $self->flush_vertical_aligner(); - return; - } ## end sub flush + # Given: + # $Kj = index of opening brace + # $K_last_nonblank = index of previous nonblank code token + # $K_last = index of last token of input line - sub process_line_of_CODE { + # Calls 'create_one_line_block' if one-line block might be formed. - my ( $self, $my_line_of_tokens ) = @_; + # Also returns a flag '$too_long': + # true = distance from opening keyword to OPENING brace exceeds + # the maximum line length. + # false (simple return) => not too long + # Note that this flag is for distance from the statement start to the + # OPENING brace, not the closing brace. - #---------------------------------------------------------------- - # This routine is called once per INPUT line to format all of the - # tokens on that line. - #---------------------------------------------------------------- + my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_; - # It outputs full-line comments and blank lines immediately. + my $rbreak_container = $self->[_rbreak_container_]; + my $rshort_nested = $self->[_rshort_nested_]; + my $rLL = $self->[_rLL_]; + my $K_opening_container = $self->[_K_opening_container_]; + my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - # The tokens are copied one-by-one from the global token array $rLL to - # a set of '_to_go' arrays which collect batches of tokens for a - # further processing via calls to 'sub store_token_to_go', until a well - # defined 'structural' break point* or 'forced' breakpoint* is reached. - # Then, the batch of collected '_to_go' tokens is passed along to 'sub - # grind_batch_of_CODE' for further processing. + # kill any current block - we can only go 1 deep + create_one_line_block(); - # * 'structural' break points are basically line breaks corresponding - # to code blocks. An example is a chain of if-elsif-else statements, - # which should typically be broken at the opening and closing braces. + my $i_start = 0; - # * 'forced' break points are breaks required by side comments or by - # special user controls. + # This routine should not have been called if there are no tokens in the + # 'to_go' arrays of previously stored tokens. A previous call to + # 'store_token_to_go' should have stored an opening brace. An error here + # indicates that a programming change may have caused a flush operation to + # clean out the previously stored tokens. + if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) { + Fault("program bug: store_token_to_go called incorrectly\n") + if (DEVEL_MODE); + return; + } - # So this routine is just making an initial set of required line - # breaks, basically regardless of the maximum requested line length. - # The subsequent stage of formatting make additional line breaks - # appropriate for lists and logical structures, and to keep line - # lengths below the requested maximum line length. + # Return if block should be broken + my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_]; + if ( $rbreak_container->{$type_sequence_j} ) { + return; + } - #----------------------------------- - # begin initialize closure variables - #----------------------------------- - $line_of_tokens = $my_line_of_tokens; - my $rK_range = $line_of_tokens->{_rK_range}; - if ( !defined( $rK_range->[0] ) ) { + my $ris_bli_container = $self->[_ris_bli_container_]; + my $is_bli = $ris_bli_container->{$type_sequence_j}; - # Empty line: This can happen if tokens are deleted, for example - # with the -mangle parameter - return; + my $block_type = $rblock_type_of_seqno->{$type_sequence_j}; + $block_type = EMPTY_STRING unless ( defined($block_type) ); + + my $previous_nonblank_token = EMPTY_STRING; + my $i_last_nonblank = -1; + if ( defined($K_last_nonblank) ) { + $i_last_nonblank = $K_last_nonblank - $K_to_go[0]; + if ( $i_last_nonblank >= 0 ) { + $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_]; } + } - ( $K_first, $K_last ) = @{$rK_range}; - $last_CODE_type = $CODE_type; - $CODE_type = $line_of_tokens->{_code_type}; + #--------------------------------------------------------------------- + # find the starting keyword for this block (such as 'if', 'else', ...) + #--------------------------------------------------------------------- + if ( + $max_index_to_go == 0 + ##|| $block_type =~ /^[\{\}\;\:]$/ + || $is_brace_semicolon_colon{$block_type} + || substr( $block_type, 0, 7 ) eq 'package' + ) + { + $i_start = $max_index_to_go; + } - $rLL = $self->[_rLL_]; - $radjusted_levels = $self->[_radjusted_levels_]; - $rparent_of_seqno = $self->[_rparent_of_seqno_]; - $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; - $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + # the previous nonblank token should start these block types + elsif ( + $i_last_nonblank >= 0 + && ( $previous_nonblank_token eq $block_type + || $self->[_ris_asub_block_]->{$type_sequence_j} + || $self->[_ris_sub_block_]->{$type_sequence_j} + || substr( $block_type, -2, 2 ) eq '()' ) + ) + { + $i_start = $i_last_nonblank; - #--------------------------------- - # end initialize closure variables - #--------------------------------- + # For signatures and extended syntax ... + # If this brace follows a parenthesized list, we should look back to + # find the keyword before the opening paren because otherwise we might + # form a one line block which stays intact, and cause the parenthesized + # expression to break open. That looks bad. + if ( $tokens_to_go[$i_start] eq ')' ) { - # This flag will become nobreak_to_go and should be set to 2 to prevent - # a line break AFTER the current token. - $no_internal_newlines = 0; - if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) { - $no_internal_newlines = 2; - } + # Find the opening paren + my $K_start = $K_to_go[$i_start]; + return unless defined($K_start); + my $seqno = $type_sequence_to_go[$i_start]; + return unless ($seqno); + my $K_opening = $K_opening_container->{$seqno}; + return unless defined($K_opening); + my $i_opening = $i_start + ( $K_opening - $K_start ); - my $input_line = $line_of_tokens->{_line_text}; + # give up if not on this line + return unless ( $i_opening >= 0 ); + $i_start = $i_opening; - my ( $is_block_comment, $has_side_comment ); - if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) { - if ( $K_last == $K_first ) { $is_block_comment = 1 } - else { $has_side_comment = 1 } + # go back one token before the opening paren + if ( $i_start > 0 ) { $i_start-- } + if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; } + my $lev = $levels_to_go[$i_start]; + if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return } } + } - my $is_static_block_comment_without_leading_space = - $CODE_type eq 'SBCX'; - $is_static_block_comment = - $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space; + elsif ( $previous_nonblank_token eq ')' ) { - # check for a $VERSION statement - if ( $CODE_type eq 'VER' ) { - $self->[_saw_VERSION_in_this_file_] = 1; - $no_internal_newlines = 2; + # For something like "if (xxx) {", the keyword "if" will be + # just after the most recent break. This will be 0 unless + # we have just killed a one-line block and are starting another. + # (doif.t) + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { + $i_start++; } - # Add interline blank if any - my $last_old_nonblank_type = "b"; - my $first_new_nonblank_token = EMPTY_STRING; - my $K_first_true = $K_first; - if ( $max_index_to_go >= 0 ) { - $last_old_nonblank_type = $types_to_go[$max_index_to_go]; - $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_]; - if ( !$is_block_comment - && $types_to_go[$max_index_to_go] ne 'b' - && $K_first > 0 - && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' ) - { - $K_first -= 1; - } + # Patch to avoid breaking short blocks defined with extended_syntax: + # Strip off any trailing () which was added in the parser to mark + # the opening keyword. For example, in the following + # create( TypeFoo $e) {$bubba} + # the blocktype would be marked as create() + my $stripped_block_type = $block_type; + if ( substr( $block_type, -2, 2 ) eq '()' ) { + $stripped_block_type = substr( $block_type, 0, -2 ); } + unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { + return; + } + } - my $rtok_first = $rLL->[$K_first]; - - my $in_quote = $line_of_tokens->{_ending_in_quote}; - $ending_in_quote = $in_quote; - - #------------------------------------ - # Handle a block (full-line) comment. - #------------------------------------ - if ($is_block_comment) { + # patch for SWITCH/CASE to retain one-line case/when blocks + elsif ( $block_type eq 'case' || $block_type eq 'when' ) { - if ( $rOpts->{'delete-block-comments'} ) { - $self->flush(); - return; - } + # Note: cannot use inext_index_to_go[] here because that array + # is still being constructed. + $i_start = $index_max_forced_break + 1; + if ( $types_to_go[$i_start] eq 'b' ) { + $i_start++; + } + unless ( $tokens_to_go[$i_start] eq $block_type ) { + return; + } + } + else { - destroy_one_line_block(); - $self->end_batch() if ( $max_index_to_go >= 0 ); + #------------------------------------------- + # Couldn't find start - return too_long flag + #------------------------------------------- + return 1; + } - # output a blank line before block comments - if ( - # unless we follow a blank or comment line - $self->[_last_line_leading_type_] ne '#' - && $self->[_last_line_leading_type_] ne 'b' + my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; - # only if allowed - && $rOpts->{'blanks-before-comments'} + my $maximum_line_length = + $maximum_line_length_at_level[ $levels_to_go[$i_start] ]; - # if this is NOT an empty comment, unless it follows a side - # comment and could become a hanging side comment. - && ( - $rtok_first->[_TOKEN_] ne '#' - || ( $last_line_had_side_comment - && $rLL->[$K_first]->[_LEVEL_] > 0 ) - ) + # see if distance to the opening container is too great to even start + if ( $pos > $maximum_line_length ) { - # not after a short line ending in an opening token - # because we already have space above this comment. - # Note that the first comment in this if block, after - # the 'if (', does not get a blank line because of this. - && !$self->[_last_output_short_opening_token_] + #------------------------------ + # too long to the opening token + #------------------------------ + return 1; + } - # never before static block comments - && !$is_static_block_comment - ) - { - $self->flush(); # switching to new output stream - my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->write_blank_code_line(); - $self->[_last_line_leading_type_] = 'b'; - } + #----------------------------------------------------------------------- + # OK so far: the statement is not to long just to the OPENING token. Now + # see if everything to the closing token will fit on one line + #----------------------------------------------------------------------- - if ( - $rOpts->{'indent-block-comments'} - && ( !$rOpts->{'indent-spaced-block-comments'} - || $input_line =~ /^\s+/ ) - && !$is_static_block_comment_without_leading_space - ) - { - my $Ktoken_vars = $K_first; - my $rtoken_vars = $rLL->[$Ktoken_vars]; - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - $self->end_batch(); - } - else { + # This is part of an update to fix cases b562 .. b983 + my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j}; + return unless ( defined($K_closing) ); + my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - + $rLL->[$Kj]->[_CUMULATIVE_LENGTH_]; - # switching to new output stream - $self->flush(); + my $excess = $pos + 1 + $container_length - $maximum_line_length; - # Note that last arg in call here is 'undef' for comments - my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->write_code_line( - $rtok_first->[_TOKEN_] . "\n", undef ); - $self->[_last_line_leading_type_] = '#'; - } - return; - } + # Add a small tolerance for welded tokens (case b901) + if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) { + $excess += 2; + } - # Compare input/output indentation except for: - # - hanging side comments - # - continuation lines (have unknown amount of initial blank space) - # - and lines which are quotes (because they may have been outdented) - my $guessed_indentation_level = - $line_of_tokens->{_guessed_indentation_level}; + if ( $excess > 0 ) { - unless ( $CODE_type eq 'HSC' - || $rtok_first->[_CI_LEVEL_] > 0 - || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q' ) - { - my $input_line_number = $line_of_tokens->{_line_number}; - $self->compare_indentation_levels( $K_first, - $guessed_indentation_level, $input_line_number ); - } + # line is too long... there is no chance of forming a one line block + # if the excess is more than 1 char + return if ( $excess > 1 ); - #------------------------ - # Handle indentation-only - #------------------------ + # ... and give up if it is not a one-line block on input. + # note: for a one-line block on input, it may be possible to keep + # it as a one-line block (by removing a needless semicolon ). + my $K_start = $K_to_go[$i_start]; + my $ldiff = + $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_]; + return if ($ldiff); + } - # NOTE: In previous versions we sent all qw lines out immediately here. - # No longer doing this: also write a line which is entirely a 'qw' list - # to allow stacking of opening and closing tokens. Note that interior - # qw lines will still go out at the end of this routine. - if ( $CODE_type eq 'IO' ) { - $self->flush(); - my $line = $input_line; + #------------------------------------------------------------------ + # Loop to check contents and length of the potential one-line block + #------------------------------------------------------------------ + foreach my $Ki ( $Kj + 1 .. $K_last ) { - # Fix for rt #125506 Unexpected string formating - # in which leading space of a terminal quote was removed - $line =~ s/\s+$//; - $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} ); + # old whitespace could be arbitrarily large, so don't use it + if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 } + else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] } - my $Ktoken_vars = $K_first; + # ignore some small blocks + my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_]; + my $nobreak = $rshort_nested->{$type_sequence_i}; - # We work with a copy of the token variables and change the - # first token to be the entire line as a quote variable - my $rtoken_vars = $rLL->[$Ktoken_vars]; - $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line ); + # Return false result if we exceed the maximum line length, + if ( $pos > $maximum_line_length ) { + return; + } - # Patch: length is not really important here - $rtoken_vars->[_TOKEN_LENGTH_] = length($line); + # keep going for non-containers + elsif ( !$type_sequence_i ) { - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - $self->end_batch(); - return; } - #--------------------------- - # Handle all other lines ... - #--------------------------- + # return if we encounter another opening brace before finding the + # closing brace. + elsif ($rLL->[$Ki]->[_TOKEN_] eq '{' + && $rLL->[$Ki]->[_TYPE_] eq '{' + && $rblock_type_of_seqno->{$type_sequence_i} + && !$nobreak ) + { + return; + } - # If we just saw the end of an elsif block, write nag message - # if we do not see another elseif or an else. - if ($looking_for_else) { + # if we find our closing brace.. + elsif ($rLL->[$Ki]->[_TOKEN_] eq '}' + && $rLL->[$Ki]->[_TYPE_] eq '}' + && $rblock_type_of_seqno->{$type_sequence_i} + && !$nobreak ) + { - ## /^(elsif|else)$/ - if ( !$is_elsif_else{ $rLL->[$K_first_true]->[_TOKEN_] } ) { - write_logfile_entry("(No else block)\n"); + # be sure any trailing comment also fits on the line + my $Ki_nonblank = $Ki; + if ( $Ki_nonblank < $K_last ) { + $Ki_nonblank++; + if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b' + && $Ki_nonblank < $K_last ) + { + $Ki_nonblank++; + } } - $looking_for_else = 0; - } - # This is a good place to kill incomplete one-line blocks - if ( $max_index_to_go >= 0 ) { - if ( - ( - ( $semicolons_before_block_self_destruct == 0 ) - && ( $last_old_nonblank_type eq ';' ) - && ( $first_new_nonblank_token ne '}' ) - ) + # Patch for one-line sort/map/grep/eval blocks with side comments: + # We will ignore the side comment length for sort/map/grep/eval + # because this can lead to statements which change every time + # perltidy is run. Here is an example from Denis Moskowitz which + # oscillates between these two states without this patch: - # Patch for RT #98902. Honor request to break at old commas. - || ( $rOpts_break_at_old_comma_breakpoints - && $last_old_nonblank_type eq ',' ) - ) +## -------- +## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## +## grep { +## $_->foo ne 'bar' +## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf +## @baz; +## -------- + + # When the first line is input it gets broken apart by the main + # line break logic in sub process_line_of_CODE. + # When the second line is input it gets recombined by + # process_line_of_CODE and passed to the output routines. The + # output routines (break_long_lines) do not break it apart + # because the bond strengths are set to the highest possible value + # for grep/map/eval/sort blocks, so the first version gets output. + # It would be possible to fix this by changing bond strengths, + # but they are high to prevent errors in older versions of perl. + # See c100 for eval test. + if ( $Ki < $K_last + && $rLL->[$K_last]->[_TYPE_] eq '#' + && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_] + && !$rOpts_ignore_side_comment_lengths + && !$is_sort_map_grep_eval{$block_type} + && $K_last - $Ki_nonblank <= 2 ) { - $forced_breakpoint_to_go[$max_index_to_go] = 1 - if ($rOpts_break_at_old_comma_breakpoints); - destroy_one_line_block(); - $self->end_batch(); - } + # Only include the side comment for if/else/elsif/unless if it + # immediately follows (because the current '$rbrace_follower' + # logic for these will give an immediate brake after these + # closing braces). So for example a line like this + # if (...) { ... } ; # very long comment...... + # will already break like this: + # if (...) { ... } + # ; # very long comment...... + # so we do not need to include the length of the comment, which + # would break the block. Project 'bioperl' has coding like this. + ## !~ /^(if|else|elsif|unless)$/ + if ( !$is_if_unless_elsif_else{$block_type} + || $K_last == $Ki_nonblank ) + { + $Ki_nonblank = $K_last; + $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_]; - # Keep any requested breaks before this line. Note that we have to - # use the original K_first because it may have been reduced above - # to add a blank. The value of the flag is as follows: - # 1 => hard break, flush the batch - # 2 => soft break, set breakpoint and continue building the batch - if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) { - destroy_one_line_block(); - if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { - $self->set_forced_breakpoint($max_index_to_go); - } - else { - $self->end_batch() if ( $max_index_to_go >= 0 ); - } - } - } + if ( $Ki_nonblank > $Ki + 1 ) { - #-------------------------------------- - # loop to process the tokens one-by-one - #-------------------------------------- + # source whitespace could be anything, assume + # at least one space before the hash on output + if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) { + $pos += 1; + } + else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] } + } - # We do not want a leading blank if the previous batch just got output + if ( $pos >= $maximum_line_length ) { + return; + } + } + } - if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { - $K_first++; + #-------------------------- + # ok, it's a one-line block + #-------------------------- + create_one_line_block($i_start); + return; } - foreach my $Ktoken_vars ( $K_first .. $K_last ) { + # just keep going for other characters + else { + } + } - my $rtoken_vars = $rLL->[$Ktoken_vars]; + #-------------------------------------------------- + # End Loop to examine tokens in potential one-block + #-------------------------------------------------- - #-------------- - # handle blanks - #-------------- - if ( $rtoken_vars->[_TYPE_] eq 'b' ) { - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - next; - } + # We haven't hit the closing brace, but there is still space. So the + # question here is, should we keep going to look at more lines in hopes of + # forming a new one-line block, or should we stop right now. The problem + # with continuing is that we will not be able to honor breaks before the + # opening brace if we continue. - #------------------ - # handle non-blanks - #------------------ - my $type = $rtoken_vars->[_TYPE_]; + # Typically we will want to keep trying to make one-line blocks for things + # like sort/map/grep/eval. But it is not always a good idea to make as + # many one-line blocks as possible, so other types are not done. The user + # can always use -mangle. - # If we are continuing after seeing a right curly brace, flush - # buffer unless we see what we are looking for, as in - # } else ... - if ($rbrace_follower) { - my $token = $rtoken_vars->[_TOKEN_]; - unless ( $rbrace_follower->{$token} ) { - $self->end_batch() if ( $max_index_to_go >= 0 ); - } - $rbrace_follower = undef; + # If we want to keep going, we will create a new one-line block. + # The blocks which we can keep going are in a hash, but we never want + # to continue if we are at a '-bli' block. + if ( $want_one_line_block{$block_type} && !$is_bli ) { + my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j}; + my $semicolon_count = $rtype_count + && $rtype_count->{';'} ? $rtype_count->{';'} : 0; + + # Ignore a terminal semicolon in the count + if ( $semicolon_count <= 2 ) { + my $K_closing_container = $self->[_K_closing_container_]; + my $K_closing_j = $K_closing_container->{$type_sequence_j}; + my $Kp = $self->K_previous_nonblank($K_closing_j); + if ( defined($Kp) + && $rLL->[$Kp]->[_TYPE_] eq ';' ) + { + $semicolon_count -= 1; } + } + if ( $semicolon_count <= 0 ) { + create_one_line_block($i_start); + } + elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) { - my ( - $block_type, $type_sequence, - $is_opening_BLOCK, $is_closing_BLOCK, - $nobreak_BEFORE_BLOCK - ); - - if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) { + # Mark short broken eval blocks for possible later use in + # avoiding adding spaces before a 'package' line. This is not + # essential but helps keep newer and older formatting the same. + $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1; + } + } + return; +} ## end sub starting_one_line_block - my $token = $rtoken_vars->[_TOKEN_]; - $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; - $block_type = $rblock_type_of_seqno->{$type_sequence}; +sub unstore_token_to_go { - if ( $block_type - && $token eq $type - && $block_type ne 't' - && !$self->[_rshort_nested_]->{$type_sequence} ) - { + # remove most recent token from output stream + my $self = shift; + if ( $max_index_to_go > 0 ) { + $max_index_to_go--; + } + else { + $max_index_to_go = UNDEFINED_INDEX; + } + return; +} ## end sub unstore_token_to_go - if ( $type eq '{' ) { - $is_opening_BLOCK = 1; - $nobreak_BEFORE_BLOCK = $no_internal_newlines; - } - elsif ( $type eq '}' ) { - $is_closing_BLOCK = 1; - $nobreak_BEFORE_BLOCK = $no_internal_newlines; - } - } - } +sub compare_indentation_levels { - # if at last token ... - if ( $Ktoken_vars == $K_last ) { + # Check to see if output line tabbing agrees with input line + # this can be very useful for debugging a script which has an extra + # or missing brace. - #--------------------- - # handle side comments - #--------------------- - if ($has_side_comment) { - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - next; - } - } + my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_; + return unless ( defined($K_first) ); - # if before last token ... do not allow breaks which would promote - # a side comment to a block comment - elsif ( - $has_side_comment - && ( $Ktoken_vars == $K_last - 1 - || $Ktoken_vars == $K_last - 2 - && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' ) - ) - { - $no_internal_newlines = 2; - } + my $rLL = $self->[_rLL_]; - # Process non-blank and non-comment tokens ... + # ignore a line with a leading blank token - issue c195 + my $type = $rLL->[$K_first]->[_TYPE_]; + return if ( $type eq 'b' ); - #----------------- - # handle semicolon - #----------------- - if ( $type eq ';' ) { + my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first]; - my $next_nonblank_token_type = 'b'; - my $next_nonblank_token = EMPTY_STRING; - if ( $Ktoken_vars < $K_last ) { - my $Knnb = $Ktoken_vars + 1; - $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' ); - $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_]; - $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; - } + # record max structural depth for log file + if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) { + $self->[_maximum_BLOCK_level_] = $structural_indentation_level; + $self->[_maximum_BLOCK_level_at_line_] = $line_number; + } - my $break_before_semicolon = ( $Ktoken_vars == $K_first ) - && $rOpts_break_at_old_semicolon_breakpoints; + my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_]; + my $is_closing_block = + $type_sequence + && $self->[_rblock_type_of_seqno_]->{$type_sequence} + && $type eq '}'; - # kill one-line blocks with too many semicolons - $semicolons_before_block_self_destruct--; - if ( - $break_before_semicolon - || ( $semicolons_before_block_self_destruct < 0 ) - || ( $semicolons_before_block_self_destruct == 0 - && $next_nonblank_token_type !~ /^[b\}]$/ ) - ) - { - destroy_one_line_block(); - $self->end_batch() - if ( $break_before_semicolon - && $max_index_to_go >= 0 ); - } + if ( $guessed_indentation_level ne $structural_indentation_level ) { + $self->[_last_tabbing_disagreement_] = $line_number; - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + if ($is_closing_block) { - $self->end_batch() - unless ( - $no_internal_newlines - || ( $rOpts_keep_interior_semicolons - && $Ktoken_vars < $K_last ) - || ( $next_nonblank_token eq '}' ) - ); + if ( !$self->[_in_brace_tabbing_disagreement_] ) { + $self->[_in_brace_tabbing_disagreement_] = $line_number; } + if ( !$self->[_first_brace_tabbing_disagreement_] ) { + $self->[_first_brace_tabbing_disagreement_] = $line_number; + } + } - #----------- - # handle '{' - #----------- - elsif ($is_opening_BLOCK) { + if ( !$self->[_in_tabbing_disagreement_] ) { + $self->[_tabbing_disagreement_count_]++; - # Tentatively output this token. This is required before - # calling starting_one_line_block. We may have to unstore - # it, though, if we have to break before it. - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) { + write_logfile_entry( +"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" + ); + } + $self->[_in_tabbing_disagreement_] = $line_number; + $self->[_first_tabbing_disagreement_] = $line_number + unless ( $self->[_first_tabbing_disagreement_] ); + } + } + else { - # Look ahead to see if we might form a one-line block.. - my $too_long = - $self->starting_one_line_block( $Ktoken_vars, - $K_last_nonblank_code, $K_last ); - $self->clear_breakpoint_undo_stack(); + $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block); - # to simplify the logic below, set a flag to indicate if - # this opening brace is far from the keyword which introduces it - my $keyword_on_same_line = 1; - if ( - $max_index_to_go >= 0 - && defined($K_last_nonblank_code) - && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')' - && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] ) - || $too_long ) - ) - { - $keyword_on_same_line = 0; - } + my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_]; + if ($in_tabbing_disagreement) { - # Break before '{' if requested with -bl or -bli flag - my $want_break = $self->[_rbrace_left_]->{$type_sequence}; + if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) { + write_logfile_entry( +"End indentation disagreement from input line $in_tabbing_disagreement\n" + ); - # But do not break if this token is welded to the left - if ( $total_weld_count - && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) ) + if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES ) { - $want_break = 0; + write_logfile_entry( + "No further tabbing disagreements will be noted\n"); } + } + $self->[_in_tabbing_disagreement_] = 0; - # Break BEFORE an opening '{' ... - if ( + } + } + return; +} ## end sub compare_indentation_levels - # if requested - $want_break +################################################### +# CODE SECTION 8: Utilities for setting breakpoints +################################################### - # and we were unable to start looking for a block, - && $index_start_one_line_block == UNDEFINED_INDEX +{ ## begin closure set_forced_breakpoint - # or if it will not be on same line as its keyword, so that - # it will be outdented (eval.t, overload.t), and the user - # has not insisted on keeping it on the right - || ( !$keyword_on_same_line - && !$rOpts_opening_brace_always_on_right ) - ) - { + my @forced_breakpoint_undo_stack; - # but only if allowed - unless ($nobreak_BEFORE_BLOCK) { + # These are global vars for efficiency: + # my $forced_breakpoint_count; + # my $forced_breakpoint_undo_count; + # my $index_max_forced_break; - # since we already stored this token, we must unstore it - $self->unstore_token_to_go(); + # Break before or after certain tokens based on user settings + my %break_before_or_after_token; - # then output the line - $self->end_batch() if ( $max_index_to_go >= 0 ); + BEGIN { - # and now store this token at the start of a new line - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); - } - } + # Updated to use all operators. This fixes case b1054 + # Here is the previous simplified version: + ## my @q = qw( . : ? and or xor && || ); + my @q = @all_operators; - # now output this line - $self->end_batch() - if ( $max_index_to_go >= 0 && !$no_internal_newlines ); - } + push @q, ','; + @break_before_or_after_token{@q} = (1) x scalar(@q); + } ## end BEGIN - #----------- - # handle '}' - #----------- - elsif ($is_closing_BLOCK) { + sub set_fake_breakpoint { - my $next_nonblank_token_type = 'b'; - my $next_nonblank_token = EMPTY_STRING; - my $Knnb; - if ( $Ktoken_vars < $K_last ) { - $Knnb = $Ktoken_vars + 1; - $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' ); - $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_]; - $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; - } + # Just bump up the breakpoint count as a signal that there are breaks. + # This is useful if we have breaks but may want to postpone deciding + # where to make them. + $forced_breakpoint_count++; + return; + } ## end sub set_fake_breakpoint - # If there is a pending one-line block .. - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + use constant DEBUG_FORCE => 0; - # Fix for b1208: if a side comment follows this closing - # brace then we must include its length in the length test - # ... unless the -issl flag is set (fixes b1307-1309). - # Assume a minimum of 1 blank space to the comment. - my $added_length = 0; - if ( $has_side_comment - && !$rOpts_ignore_side_comment_lengths - && $next_nonblank_token_type eq '#' ) - { - $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_]; - } + sub set_forced_breakpoint { + my ( $self, $i ) = @_; - # we have to terminate it if.. - if ( + # Set a breakpoint AFTER the token at index $i in the _to_go arrays. - # it is too long (final length may be different from - # initial estimate). note: must allow 1 space for this - # token - $self->excess_line_length( $index_start_one_line_block, - $max_index_to_go ) + $added_length >= 0 + # Exceptions: + # - If the token at index $i is a blank, backup to $i-1 to + # get to the previous nonblank token. + # - For certain tokens, the break may be placed BEFORE the token + # at index $i, depending on user break preference settings. + # - If a break is made after an opening token, then a break will + # also be made before the corresponding closing token. - # or if it has too many semicolons - || ( $semicolons_before_block_self_destruct == 0 - && defined($K_last_nonblank_code) - && $rLL->[$K_last_nonblank_code]->[_TYPE_] ne ';' ) - ) - { - destroy_one_line_block(); - } - } + # Returns '$i_nonblank': + # = index of the token after which the breakpoint was actually placed + # = undef if breakpoint was not set. + my $i_nonblank; - # put a break before this closing curly brace if appropriate - $self->end_batch() - if ( $max_index_to_go >= 0 - && !$nobreak_BEFORE_BLOCK - && $index_start_one_line_block == UNDEFINED_INDEX ); + if ( !defined($i) || $i < 0 ) { - # store the closing curly brace - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + # Calls with bad index $i are harmless but waste time and should + # be caught and eliminated during code development. + if (DEVEL_MODE) { + my ( $a, $b, $c ) = caller(); + Fault( +"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n" + ); + } + return; + } - # ok, we just stored a closing curly brace. Often, but - # not always, we want to end the line immediately. - # So now we have to check for special cases. + # Break after token $i + $i_nonblank = $self->set_forced_breakpoint_AFTER($i); - # if this '}' successfully ends a one-line block.. - my $is_one_line_block = 0; - my $keep_going = 0; - if ( $index_start_one_line_block != UNDEFINED_INDEX ) { + # If we break at an opening container..break at the closing + my $set_closing; + if ( defined($i_nonblank) + && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } ) + { + $set_closing = 1; + $self->set_closing_breakpoint($i_nonblank); + } - # Remember the type of token just before the - # opening brace. It would be more general to use - # a stack, but this will work for one-line blocks. - $is_one_line_block = - $types_to_go[$index_start_one_line_block]; + DEBUG_FORCE && do { + my ( $a, $b, $c ) = caller(); + my $msg = +"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go"; + if ( !defined($i_nonblank) ) { + $i = EMPTY_STRING unless defined($i); + $msg .= " but could not set break after i='$i'\n"; + } + else { + my $nobr = $nobreak_to_go[$i_nonblank]; + $nobr = 0 if ( !defined($nobr) ); + $msg .= <undo_forced_breakpoint_stack(0); + return $i_nonblank; + } ## end sub set_forced_breakpoint - # For -lp, extend the nobreak to include a trailing - # terminal ','. This is because the -lp indentation was - # not known when making one-line blocks, so we may be able - # to move the line back to fit. Otherwise we may create a - # needlessly stranded comma on the next line. - my $iend_nobreak = $max_index_to_go - 1; - if ( $rOpts_line_up_parentheses - && $next_nonblank_token_type eq ',' - && $Knnb eq $K_last ) - { - my $p_seqno = $parent_seqno_to_go[$max_index_to_go]; - my $is_excluded = - $self->[_ris_excluded_lp_container_]->{$p_seqno}; - $iend_nobreak = $max_index_to_go if ( !$is_excluded ); - } + sub set_forced_breakpoint_AFTER { + my ( $self, $i ) = @_; - $self->set_nobreaks( $index_start_one_line_block, - $iend_nobreak ); + # This routine is only called by sub set_forced_breakpoint and + # sub set_closing_breakpoint. - # save starting block indexes so that sub correct_lp can - # check and adjust -lp indentation (c098) - push @{$ri_starting_one_line_block}, - $index_start_one_line_block; + # Set a breakpoint AFTER the token at index $i in the _to_go arrays. - # then re-initialize for the next one-line block - destroy_one_line_block(); + # Exceptions: + # - If the token at index $i is a blank, backup to $i-1 to + # get to the previous nonblank token. + # - For certain tokens, the break may be placed BEFORE the token + # at index $i, depending on user break preference settings. - # then decide if we want to break after the '}' .. - # We will keep going to allow certain brace followers as in: - # do { $ifclosed = 1; last } unless $losing; - # - # But make a line break if the curly ends a - # significant block: - if ( - ( - $is_block_without_semicolon{$block_type} + # Returns: + # - the index of the token after which the break was set, or + # - undef if no break was set - # Follow users break point for - # one line block types U & G, such as a 'try' block - || $is_one_line_block =~ /^[UG]$/ - && $Ktoken_vars == $K_last - ) + return unless ( defined($i) && $i >= 0 ); - # if needless semicolon follows we handle it later - && $next_nonblank_token ne ';' - ) - { - $self->end_batch() - unless ($no_internal_newlines); - } - } + # Back up at a blank so we have a token to examine. + # This was added to fix for cases like b932 involving an '=' break. + if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- } - # set string indicating what we need to look for brace follower - # tokens - if ( $is_if_unless_elsif_else{$block_type} ) { - $rbrace_follower = undef; - } - elsif ( $block_type eq 'do' ) { - $rbrace_follower = \%is_do_follower; - if ( - $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars ) - ) - { - $rbrace_follower = { ')' => 1 }; - } - } + # Never break between welded tokens + return + if ( $total_weld_count + && $self->[_rK_weld_right_]->{ $K_to_go[$i] } ); - # added eval for borris.t - elsif ($is_sort_map_grep_eval{$block_type} - || $is_one_line_block eq 'G' ) - { - $rbrace_follower = undef; - $keep_going = 1; - } + my $token = $tokens_to_go[$i]; + my $type = $types_to_go[$i]; - # anonymous sub - elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) { - if ($is_one_line_block) { + # For certain tokens, use user settings to decide if we break before or + # after it + if ( $break_before_or_after_token{$token} + && ( $type eq $token || $type eq 'k' ) ) + { + if ( $want_break_before{$token} && $i >= 0 ) { $i-- } + } - $rbrace_follower = \%is_anon_sub_1_brace_follower; + # breaks are forced before 'if' and 'unless' + elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- } - # Exceptions to help keep -lp intact, see git #74 ... - # Exception 1: followed by '}' on this line - if ( $Ktoken_vars < $K_last - && $next_nonblank_token eq '}' ) - { - $rbrace_follower = undef; - $keep_going = 1; - } + if ( $i >= 0 && $i <= $max_index_to_go ) { + my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; - # Exception 2: followed by '}' on next line if -lp set. - # The -lp requirement allows the formatting to follow - # old breaks when -lp is not used, minimizing changes. - # Fixes issue c087. - elsif ($Ktoken_vars == $K_last - && $rOpts_line_up_parentheses ) - { - my $K_closing_container = - $self->[_K_closing_container_]; - my $K_opening_container = - $self->[_K_opening_container_]; - my $p_seqno = $parent_seqno_to_go[$max_index_to_go]; - my $Kc = $K_closing_container->{$p_seqno}; - my $is_excluded = - $self->[_ris_excluded_lp_container_]->{$p_seqno}; - if ( defined($Kc) - && $rLL->[$Kc]->[_TOKEN_] eq '}' - && !$is_excluded - && $Kc - $Ktoken_vars <= 2 ) - { - $rbrace_follower = undef; - $keep_going = 1; - } - } - } - else { - $rbrace_follower = \%is_anon_sub_brace_follower; - } - } + if ( $i_nonblank >= 0 + && !$nobreak_to_go[$i_nonblank] + && !$forced_breakpoint_to_go[$i_nonblank] ) + { + $forced_breakpoint_to_go[$i_nonblank] = 1; - # None of the above: specify what can follow a closing - # brace of a block which is not an - # if/elsif/else/do/sort/map/grep/eval - # Testfiles: - # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t - else { - $rbrace_follower = \%is_other_brace_follower; + if ( $i_nonblank > $index_max_forced_break ) { + $index_max_forced_break = $i_nonblank; } + $forced_breakpoint_count++; + $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] + = $i_nonblank; - # See if an elsif block is followed by another elsif or else; - # complain if not. - if ( $block_type eq 'elsif' ) { + # success + return $i_nonblank; + } + } + return; + } ## end sub set_forced_breakpoint_AFTER - if ( $next_nonblank_token_type eq 'b' ) { # end of line? - $looking_for_else = 1; # ok, check on next line - } - else { - ## /^(elsif|else)$/ - if ( !$is_elsif_else{$next_nonblank_token} ) { - write_logfile_entry("No else block :(\n"); - } - } - } + sub clear_breakpoint_undo_stack { + my ($self) = @_; + $forced_breakpoint_undo_count = 0; + return; + } - # keep going after certain block types (map,sort,grep,eval) - # added eval for borris.t - if ($keep_going) { + use constant DEBUG_UNDOBP => 0; - # keep going - } + sub undo_forced_breakpoint_stack { - # if no more tokens, postpone decision until re-entering - elsif ( ( $next_nonblank_token_type eq 'b' ) - && $rOpts_add_newlines ) - { - unless ($rbrace_follower) { - $self->end_batch() - unless ( $no_internal_newlines - || $max_index_to_go < 0 ); - } - } - elsif ($rbrace_follower) { + my ( $self, $i_start ) = @_; - unless ( $rbrace_follower->{$next_nonblank_token} ) { - $self->end_batch() - unless ( $no_internal_newlines - || $max_index_to_go < 0 ); - } - $rbrace_follower = undef; - } + # Given $i_start, a non-negative index the 'undo stack' of breakpoints, + # remove all breakpoints from the top of the 'undo stack' down to and + # including index $i_start. - else { - $self->end_batch() - unless ( $no_internal_newlines - || $max_index_to_go < 0 ); - } + # The 'undo stack' is a stack of all breakpoints made for a batch of + # code. - } ## end treatment of closing block token + if ( $i_start < 0 ) { + $i_start = 0; + my ( $a, $b, $c ) = caller(); - #------------------------------ - # handle here_doc target string - #------------------------------ - elsif ( $type eq 'h' ) { + # Bad call, can only be due to a recent programming change. + Fault( +"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start " + ) if (DEVEL_MODE); + return; + } - # no newlines after seeing here-target - $no_internal_newlines = 2; - ## destroy_one_line_block(); # deleted to fix case b529 - $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); + while ( $forced_breakpoint_undo_count > $i_start ) { + my $i = + $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; + if ( $i >= 0 && $i <= $max_index_to_go ) { + $forced_breakpoint_to_go[$i] = 0; + $forced_breakpoint_count--; + + DEBUG_UNDOBP && do { + my ( $a, $b, $c ) = caller(); + print STDOUT +"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; + }; } - #----------------------------- - # handle all other token types - #----------------------------- + # shouldn't happen, but not a critical error else { + if (DEVEL_MODE) { + my ( $a, $b, $c ) = caller(); + Fault(<store_token_to_go( $Ktoken_vars, $rtoken_vars ); +{ ## begin closure set_closing_breakpoint - # break after a label if requested - if ( $rOpts_break_after_labels - && $type eq 'J' - && $rOpts_break_after_labels == 1 ) - { - $self->end_batch() - unless ($no_internal_newlines); - } + my %postponed_breakpoint; + + sub initialize_postponed_breakpoint { + %postponed_breakpoint = (); + return; + } + + sub has_postponed_breakpoint { + my ($seqno) = @_; + return $postponed_breakpoint{$seqno}; + } + + sub set_closing_breakpoint { + + # set a breakpoint at a matching closing token + my ( $self, $i_break ) = @_; + + if ( defined( $mate_index_to_go[$i_break] ) ) { + + # Don't reduce the '2' in the statement below. + # Test files: attrib.t, BasicLyx.pm.html + if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { + + # break before } ] and ), but sub set_forced_breakpoint will decide + # to break before or after a ? and : + my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; + $self->set_forced_breakpoint_AFTER( + $mate_index_to_go[$i_break] - $inc ); } + } + else { + my $type_sequence = $type_sequence_to_go[$i_break]; + if ($type_sequence) { + $postponed_breakpoint{$type_sequence} = 1; + } + } + return; + } ## end sub set_closing_breakpoint +} ## end closure set_closing_breakpoint - # remember previous nonblank, non-comment OUTPUT token - $K_last_nonblank_code = $Ktoken_vars; +######################################### +# CODE SECTION 9: Process batches of code +######################################### - } ## end of loop over all tokens in this line +{ ## begin closure grind_batch_of_CODE - # if there is anything left in the output buffer ... - if ( $max_index_to_go >= 0 ) { + # The routines in this closure begin the processing of a 'batch' of code. - my $type = $rLL->[$K_last]->[_TYPE_]; - my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last}; + # A variable to keep track of consecutive nonblank lines so that we can + # insert occasional blanks + my @nonblank_lines_at_depth; - # we have to flush .. - if ( + # A variable to remember maximum size of previous batches; this is needed + # by the logical padding routine + my $peak_batch_size; + my $batch_count; - # if there is a side comment... - $type eq '#' + # variables to keep track of indentation of unmatched containers. + my %saved_opening_indentation; - # if this line ends in a quote - # NOTE: This is critically important for insuring that quoted - # lines do not get processed by things like -sot and -sct - || $in_quote + sub initialize_grind_batch_of_CODE { + @nonblank_lines_at_depth = (); + $peak_batch_size = 0; + $batch_count = 0; + %saved_opening_indentation = (); + return; + } ## end sub initialize_grind_batch_of_CODE - # if this is a VERSION statement - || $CODE_type eq 'VER' + # sub grind_batch_of_CODE receives sections of code which are the longest + # possible lines without a break. In other words, it receives what is left + # after applying all breaks forced by blank lines, block comments, side + # comments, pod text, and structural braces. Its job is to break this code + # down into smaller pieces, if necessary, which fit within the maximum + # allowed line length. Then it sends the resulting lines of code on down + # the pipeline to the VerticalAligner package, breaking the code into + # continuation lines as necessary. The batch of tokens are in the "to_go" + # arrays. The name 'grind' is slightly suggestive of a machine continually + # breaking down long lines of code, but mainly it is unique and easy to + # remember and find with an editor search. - # to keep a label at the end of a line - || ( $type eq 'J' && $rOpts_break_after_labels != 2 ) + # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work + # together in the following way: - # if we have a hard break request - || $break_flag && $break_flag != 2 + # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and + # combines them into the largest sequences of tokens which might form a new + # line. + # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT + # lines. - # if we are instructed to keep all old line breaks - || !$rOpts->{'delete-old-newlines'} + # So sub 'process_line_of_CODE' builds up the longest possible continuous + # sequences of tokens, regardless of line length, and then + # grind_batch_of_CODE breaks these sequences back down into the new output + # lines. - # if this is a line of the form 'use overload'. A break here in - # the input file is a good break because it will allow the - # operators which follow to be formatted well. Without this - # break the formatting with -ci=4 -xci is poor, for example. + # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner. - # use overload - # '+' => sub { - # print length $_[2], "\n"; - # my ( $x, $y ) = _order(@_); - # Number::Roman->new( int $x + $y ); - # }, - # '-' => sub { - # my ( $x, $y ) = _order(@_); - # Number::Roman->new( int $x - $y ); - # }; - || ( $max_index_to_go == 2 - && $types_to_go[0] eq 'k' - && $tokens_to_go[0] eq 'use' - && $tokens_to_go[$max_index_to_go] eq 'overload' ) - ) - { - destroy_one_line_block(); - $self->end_batch(); - } + use constant DEBUG_GRIND => 0; + + sub check_grind_input { + + # Check for valid input to sub grind_batch_of_CODE. An error here + # would most likely be due to an error in 'sub store_token_to_go'. + my ($self) = @_; + + # Be sure there are tokens in the batch + if ( $max_index_to_go < 0 ) { + Fault(<[_Klimit_]; - else { + # The local batch tokens must be a continuous part of the global token + # array. + my $KK; + foreach my $ii ( 0 .. $max_index_to_go ) { - # Check for a soft break request - if ( $break_flag && $break_flag == 2 ) { - $self->set_forced_breakpoint($max_index_to_go); - } + my $Km = $KK; - # mark old line breakpoints in current output stream - if ( !$rOpts_ignore_old_breakpoints - || $self->[_ris_essential_old_breakpoint_]->{$K_last} ) - { - my $jobp = $max_index_to_go; - if ( $types_to_go[$max_index_to_go] eq 'b' - && $max_index_to_go > 0 ) - { - $jobp--; - } - $old_breakpoint_to_go[$jobp] = 1; - } + $KK = $K_to_go[$ii]; + if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) { + $KK = '(undef)' unless defined($KK); + Fault(< 0 && $KK != $Km + 1 ) { + my $im = $ii - 1; + Fault(< #; + push @q, ','; + @quick_filter{@q} = (1) x scalar(@q); + } - # Input parameters: - # $K_to_go_0 = first token index K of this output batch (=K_to_go[0]) - # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go]) - # Return parameter: - # false if we want a break after the closing do brace - # true if we do not want a break after the closing do brace + sub grind_batch_of_CODE { - # We are at the closing brace of a 'do' block. See if this brace is - # followed by a closing paren, and if so, set a flag which indicates - # that we do not want a line break between the '}' and ')'. + my ($self) = @_; - # xxxxx ( ...... do { ... } ) { - # ^-------looking at this brace, K_ic + #----------------------------------------------------------------- + # This sub directs the formatting of one complete batch of tokens. + # The tokens of the batch are in the '_to_go' arrays. + #----------------------------------------------------------------- - # Subscript notation: - # _i = inner container (braces in this case) - # _o = outer container (parens in this case) - # _io = inner opening = '{' - # _ic = inner closing = '}' - # _oo = outer opening = '(' - # _oc = outer closing = ')' + my $this_batch = $self->[_this_batch_]; + $this_batch->[_peak_batch_size_] = $peak_batch_size; + $this_batch->[_batch_count_] = ++$batch_count; - # |--K_oo |--K_oc = outer container - # xxxxx ( ...... do { ...... } ) { - # |--K_io |--K_ic = inner container + $self->check_grind_input() if (DEVEL_MODE); - # In general, the safe thing to do is return a 'false' value - # if the statement appears to be complex. This will have - # the downstream side-effect of opening up outer containers - # to help make complex code readable. But for simpler - # do blocks it can be preferable to keep the code compact - # by returning a 'true' value. + # This routine is only called from sub flush_batch_of_code, so that + # routine is a better spot for debugging. + DEBUG_GRIND && do { + my $token = my $type = EMPTY_STRING; + if ( $max_index_to_go >= 0 ) { + $token = $tokens_to_go[$max_index_to_go]; + $type = $types_to_go[$max_index_to_go]; + } + my $output_str = EMPTY_STRING; + if ( $max_index_to_go > 20 ) { + my $mm = $max_index_to_go - 10; + $output_str = + join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... " + . join( EMPTY_STRING, + @tokens_to_go[ $mm .. $max_index_to_go ] ); + } + else { + $output_str = join EMPTY_STRING, + @tokens_to_go[ 0 .. $max_index_to_go ]; + } + print STDERR <[_rLL_]; + # Remove any trailing blank, which is possible (c192 has example) + if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) { + $max_index_to_go -= 1; + } - # we should only be called at a closing block - my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_]; - return unless ($seqno_i); # shouldn't happen; + return if ( $max_index_to_go < 0 ); - # This only applies if the next nonblank is a ')' - my $K_oc = $self->K_next_nonblank($K_ic); - return unless defined($K_oc); - my $token_next = $rLL->[$K_oc]->[_TOKEN_]; - return unless ( $token_next eq ')' ); + if ($rOpts_line_up_parentheses) { + $self->set_lp_indentation(); + } - my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_]; - my $K_io = $self->[_K_opening_container_]->{$seqno_i}; - my $K_oo = $self->[_K_opening_container_]->{$seqno_o}; - return unless ( defined($K_io) && defined($K_oo) ); + #-------------------------------------------------- + # Shortcut for block comments + # Note that this shortcut does not work for -lp yet + #-------------------------------------------------- + elsif ( !$max_index_to_go && $types_to_go[0] eq '#' ) { + my $ibeg = 0; + $this_batch->[_ri_first_] = [$ibeg]; + $this_batch->[_ri_last_] = [$ibeg]; - # RULE 1: Do not break before a closing signature paren - # (regardless of complexity). This is a fix for issue git#22. - # Looking for something like: - # sub xxx ( ... do { ... } ) { - # ^----- next block_type - my $K_test = $self->K_next_nonblank($K_oc); - if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) { - my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_]; - if ($seqno_test) { - if ( $self->[_ris_asub_block_]->{$seqno_test} - || $self->[_ris_sub_block_]->{$seqno_test} ) - { - return 1; - } + $self->convey_batch_to_vertical_aligner(); + + my $level = $levels_to_go[$ibeg]; + $self->[_last_line_leading_type_] = $types_to_go[$ibeg]; + $self->[_last_line_leading_level_] = $level; + $nonblank_lines_at_depth[$level] = 1; + return; } - } - # RULE 2: Break if the contents within braces appears to be 'complex'. We - # base this decision on the number of tokens between braces. + #------------- + # Normal route + #------------- - # xxxxx ( ... do { ... } ) { - # ^^^^^^ + my $rLL = $self->[_rLL_]; - # Although very simple, it has the advantages of (1) being insensitive to - # changes in lengths of identifier names, (2) easy to understand, implement - # and test. A test case for this is 't/snippets/long_line.in'. + #------------------------------------------------------- + # Loop over the batch to initialize some batch variables + #------------------------------------------------------- + my $comma_count_in_batch = 0; + my @colon_list; + my @ix_seqno_controlling_ci; + my %comma_arrow_count; + my $comma_arrow_count_contained = 0; + my @unmatched_closing_indexes_in_this_batch; + my @unmatched_opening_indexes_in_this_batch; - # Example: $K_ic - $K_oo = 9 [Pass Rule 2] - # if ( do { $2 !~ /&/ } ) { ... } + my @i_for_semicolon; + foreach my $i ( 0 .. $max_index_to_go ) { - # Example: $K_ic - $K_oo = 10 [Pass Rule 2] - # for ( split /\s*={70,}\s*/, do { local $/; }) { ... } + if ( $types_to_go[$i] eq 'b' ) { + $inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1; + next; + } - # Example: $K_ic - $K_oo = 20 [Fail Rule 2] - # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; }); + $inext_to_go[$i] = $i + 1; - return if ( $K_ic - $K_io > 16 ); + # This is an optional shortcut to save a bit of time by skipping + # most tokens. Note: the filter may need to be updated if the + # next 'if' tests are ever changed to include more token types. + next if ( !$quick_filter{ $types_to_go[$i] } ); - # RULE 3: break if the code between the opening '(' and the '{' is 'complex' - # As with the previous rule, we decide based on the token count + my $type = $types_to_go[$i]; - # xxxxx ( ... do { ... } ) { - # ^^^^^^^^ + # gather info needed by sub break_long_lines + if ( $type_sequence_to_go[$i] ) { + my $seqno = $type_sequence_to_go[$i]; + my $token = $tokens_to_go[$i]; - # Example: $K_ic - $K_oo = 9 [Pass Rule 2] - # $K_io - $K_oo = 4 [Pass Rule 3] - # if ( do { $2 !~ /&/ } ) { ... } + # remember indexes of any tokens controlling xci + # in this batch. This list is needed by sub undo_ci. + if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) { + push @ix_seqno_controlling_ci, $i; + } - # Example: $K_ic - $K_oo = 10 [Pass rule 2] - # $K_io - $K_oo = 9 [Pass rule 3] - # for ( split /\s*={70,}\s*/, do { local $/; }) { ... } + if ( $is_opening_sequence_token{$token} ) { + if ( $self->[_rbreak_container_]->{$seqno} ) { + $self->set_forced_breakpoint($i); + } + push @unmatched_opening_indexes_in_this_batch, $i; + if ( $type eq '?' ) { + push @colon_list, $type; + } + } + elsif ( $is_closing_sequence_token{$token} ) { - return if ( $K_io - $K_oo > 9 ); + if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) { + $self->set_forced_breakpoint( $i - 1 ); + } - # RULE 4: Break if we have already broken this batch of output tokens - return if ( $K_oo < $K_to_go_0 ); + my $i_mate = pop @unmatched_opening_indexes_in_this_batch; + if ( defined($i_mate) && $i_mate >= 0 ) { + if ( $type_sequence_to_go[$i_mate] == + $type_sequence_to_go[$i] ) + { + $mate_index_to_go[$i] = $i_mate; + $mate_index_to_go[$i_mate] = $i; + my $cac = $comma_arrow_count{$seqno}; + $comma_arrow_count_contained += $cac if ($cac); + } + else { + push @unmatched_opening_indexes_in_this_batch, + $i_mate; + push @unmatched_closing_indexes_in_this_batch, $i; + } + } + else { + push @unmatched_closing_indexes_in_this_batch, $i; + } + if ( $type eq ':' ) { + push @colon_list, $type; + } + } ## end elsif ( $is_closing_sequence_token...) - # RULE 5: Break if input is not on one line - # For example, we will set the flag for the following expression - # written in one line: + } ## end if ($seqno) - # This has: $K_ic - $K_oo = 10 [Pass rule 2] - # $K_io - $K_oo = 8 [Pass rule 3] - # $self->debug( 'Error: ' . do { local $/; <$err> } ); + elsif ( $type eq ',' ) { $comma_count_in_batch++; } + elsif ( $type eq '=>' ) { + if (@unmatched_opening_indexes_in_this_batch) { + my $j = $unmatched_opening_indexes_in_this_batch[-1]; + my $seqno = $type_sequence_to_go[$j]; + $comma_arrow_count{$seqno}++; + } + } + elsif ( $type eq 'f' ) { + push @i_for_semicolon, $i; + } - # but we break after the brace if it is on multiple lines on input, since - # the user may prefer it on multiple lines: + } ## end for ( my $i = 0 ; $i <=...) - # [Fail rule 5] - # $self->debug( - # 'Error: ' . do { local $/; <$err> } - # ); + # Break at a single interior C-style for semicolon in this batch (c154) + if ( @i_for_semicolon && @i_for_semicolon == 1 ) { + my $i = $i_for_semicolon[0]; + my $inext = $inext_to_go[$i]; + if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) { + $self->set_forced_breakpoint($i); + } + } - if ( !$rOpts_ignore_old_breakpoints ) { - my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_]; - my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_]; - return if ( $iline_oo != $iline_oc ); - } + my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch + + @unmatched_closing_indexes_in_this_batch; - # OK to keep the paren tight - return 1; -} ## end sub tight_paren_follows + if (@unmatched_opening_indexes_in_this_batch) { + $this_batch->[_runmatched_opening_indexes_] = + \@unmatched_opening_indexes_in_this_batch; + } -my %is_brace_semicolon_colon; + if (@ix_seqno_controlling_ci) { + $this_batch->[_rix_seqno_controlling_ci_] = + \@ix_seqno_controlling_ci; + } -BEGIN { - my @q = qw( { } ; : ); - @is_brace_semicolon_colon{@q} = (1) x scalar(@q); -} + #------------------------ + # Set special breakpoints + #------------------------ + # If this line ends in a code block brace, set breaks at any + # previous closing code block braces to breakup a chain of code + # blocks on one line. This is very rare but can happen for + # user-defined subs. For example we might be looking at this: + # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { + my $saw_good_break; # flag to force breaks even if short line + if ( + + # looking for opening or closing block brace + $block_type_to_go[$max_index_to_go] + + # never any good breaks if just one token + && $max_index_to_go > 0 + + # but not one of these which are never duplicated on a line: + # until|while|for|if|elsif|else + && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] + } + ) + { + my $lev = $nesting_depth_to_go[$max_index_to_go]; -sub starting_one_line_block { + # Walk backwards from the end and + # set break at any closing block braces at the same level. + # But quit if we are not in a chain of blocks. + foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) { + last if ( $levels_to_go[$i] < $lev ); # stop at a lower level + next if ( $levels_to_go[$i] > $lev ); # skip past higher level - # after seeing an opening curly brace, look for the closing brace and see - # if the entire block will fit on a line. This routine is not always right - # so a check is made later (at the closing brace) to make sure we really - # have a one-line block. We have to do this preliminary check, though, - # because otherwise we would always break at a semicolon within a one-line - # block if the block contains multiple statements. + if ( $block_type_to_go[$i] ) { + if ( $tokens_to_go[$i] eq '}' ) { + $self->set_forced_breakpoint($i); + $saw_good_break = 1; + } + } - my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_; + # quit if we see anything besides words, function, blanks + # at this level + elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } + } + } - my $rbreak_container = $self->[_rbreak_container_]; - my $rshort_nested = $self->[_rshort_nested_]; - my $rLL = $self->[_rLL_]; - my $K_opening_container = $self->[_K_opening_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; + #----------------------------------------------- + # insertion of any blank lines before this batch + #----------------------------------------------- - # kill any current block - we can only go 1 deep - destroy_one_line_block(); + my $imin = 0; + my $imax = $max_index_to_go; - # return value: - # 1=distance from start of block to opening brace exceeds line length - # 0=otherwise + # trim any blank tokens - for safety, but should not be necessary + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - my $i_start = 0; + if ( $imin > $imax ) { + if (DEVEL_MODE) { + my $K0 = $K_to_go[0]; + my $lno = EMPTY_STRING; + if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 } + Fault(<[_last_line_leading_type_]; + my $last_line_leading_level = $self->[_last_line_leading_level_]; - # Return if block should be broken - my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_]; - if ( $rbreak_container->{$type_sequence_j} ) { - return 0; - } + my $leading_type = $types_to_go[0]; + my $leading_level = $levels_to_go[0]; - my $ris_bli_container = $self->[_ris_bli_container_]; - my $is_bli = $ris_bli_container->{$type_sequence_j}; + # add blank line(s) before certain key types but not after a comment + if ( $last_line_leading_type ne '#' ) { + my $blank_count = 0; + my $leading_token = $tokens_to_go[0]; - my $block_type = $rblock_type_of_seqno->{$type_sequence_j}; - $block_type = EMPTY_STRING unless ( defined($block_type) ); + # break before certain key blocks except one-liners + if ( $leading_type eq 'k' ) { + if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) { + $blank_count = $rOpts->{'blank-lines-before-subs'} + if ( terminal_type_i( 0, $max_index_to_go ) ne '}' ); + } - my $previous_nonblank_token = EMPTY_STRING; - my $i_last_nonblank = -1; - if ( defined($K_last_nonblank) ) { - $i_last_nonblank = $K_last_nonblank - $K_to_go[0]; - if ( $i_last_nonblank >= 0 ) { - $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_]; - } - } + # Break before certain block types if we haven't had a + # break at this level for a while. This is the + # difficult decision.. + elsif ($last_line_leading_type ne 'b' + && $is_if_unless_while_until_for_foreach{$leading_token} ) + { + my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; + if ( !defined($lc) ) { $lc = 0 } - # find the starting keyword for this block (such as 'if', 'else', ...) - if ( - $max_index_to_go == 0 - ##|| $block_type =~ /^[\{\}\;\:]$/ - || $is_brace_semicolon_colon{$block_type} - || substr( $block_type, 0, 7 ) eq 'package' - ) - { - $i_start = $max_index_to_go; - } + # patch for RT #128216: no blank line inserted at a level + # change + if ( $levels_to_go[0] != $last_line_leading_level ) { + $lc = 0; + } - # the previous nonblank token should start these block types - elsif ( - $i_last_nonblank >= 0 - && ( $previous_nonblank_token eq $block_type - || $self->[_ris_asub_block_]->{$type_sequence_j} - || $self->[_ris_sub_block_]->{$type_sequence_j} - || substr( $block_type, -2, 2 ) eq '()' ) - ) - { - $i_start = $i_last_nonblank; + if ( $rOpts->{'blanks-before-blocks'} + && $lc >= $rOpts->{'long-block-line-count'} + && $self->consecutive_nonblank_lines() >= + $rOpts->{'long-block-line-count'} + && terminal_type_i( 0, $max_index_to_go ) ne '}' ) + { + $blank_count = 1; + } + } + } - # For signatures and extended syntax ... - # If this brace follows a parenthesized list, we should look back to - # find the keyword before the opening paren because otherwise we might - # form a one line block which stays intact, and cause the parenthesized - # expression to break open. That looks bad. - if ( $tokens_to_go[$i_start] eq ')' ) { + # blank lines before subs except declarations and one-liners + elsif ( $leading_type eq 'i' ) { + my $special_identifier = + $self->[_ris_special_identifier_token_]->{$leading_token}; + if ($special_identifier) { + ## $leading_token =~ /$SUB_PATTERN/ + if ( $special_identifier eq 'sub' ) { + + $blank_count = $rOpts->{'blank-lines-before-subs'} + if ( terminal_type_i( 0, $max_index_to_go ) !~ + /^[\;\}\,]$/ ); + } - # Find the opening paren - my $K_start = $K_to_go[$i_start]; - return 0 unless defined($K_start); - my $seqno = $type_sequence_to_go[$i_start]; - return 0 unless ($seqno); - my $K_opening = $K_opening_container->{$seqno}; - return 0 unless defined($K_opening); - my $i_opening = $i_start + ( $K_opening - $K_start ); + # break before all package declarations + ## substr( $leading_token, 0, 8 ) eq 'package ' + elsif ( $special_identifier eq 'package' ) { - # give up if not on this line - return 0 unless ( $i_opening >= 0 ); - $i_start = $i_opening; ##$index_max_forced_break + 1; + # ... except in a very short eval block + my $pseqno = $parent_seqno_to_go[0]; + $blank_count = $rOpts->{'blank-lines-before-packages'} + if ( + !$self->[_ris_short_broken_eval_block_]->{$pseqno} + ); + } + } + } - # go back one token before the opening paren - if ( $i_start > 0 ) { $i_start-- } - if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; } - my $lev = $levels_to_go[$i_start]; - if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return 0 } - } - } + # Check for blank lines wanted before a closing brace + elsif ( $leading_token eq '}' ) { + if ( $rOpts->{'blank-lines-before-closing-block'} + && $block_type_to_go[0] + && $block_type_to_go[0] =~ + /$blank_lines_before_closing_block_pattern/ ) + { + my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; + if ( $nblanks > $blank_count ) { + $blank_count = $nblanks; + } + } + } - elsif ( $previous_nonblank_token eq ')' ) { + if ($blank_count) { - # For something like "if (xxx) {", the keyword "if" will be - # just after the most recent break. This will be 0 unless - # we have just killed a one-line block and are starting another. - # (doif.t) - # Note: cannot use inext_index_to_go[] here because that array - # is still being constructed. - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { - $i_start++; + # future: send blank line down normal path to VerticalAligner? + $self->flush_vertical_aligner(); + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->require_blank_code_lines($blank_count); + } } - # Patch to avoid breaking short blocks defined with extended_syntax: - # Strip off any trailing () which was added in the parser to mark - # the opening keyword. For example, in the following - # create( TypeFoo $e) {$bubba} - # the blocktype would be marked as create() - my $stripped_block_type = $block_type; - if ( substr( $block_type, -2, 2 ) eq '()' ) { - $stripped_block_type = substr( $block_type, 0, -2 ); + # update blank line variables and count number of consecutive + # non-blank, non-comment lines at this level + if ( $leading_level == $last_line_leading_level + && $leading_type ne '#' + && defined( $nonblank_lines_at_depth[$leading_level] ) ) + { + $nonblank_lines_at_depth[$leading_level]++; } - unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) { - return 0; + else { + $nonblank_lines_at_depth[$leading_level] = 1; } - } - # patch for SWITCH/CASE to retain one-line case/when blocks - elsif ( $block_type eq 'case' || $block_type eq 'when' ) { + $self->[_last_line_leading_type_] = $leading_type; + $self->[_last_line_leading_level_] = $leading_level; - # Note: cannot use inext_index_to_go[] here because that array - # is still being constructed. - $i_start = $index_max_forced_break + 1; - if ( $types_to_go[$i_start] eq 'b' ) { - $i_start++; - } - unless ( $tokens_to_go[$i_start] eq $block_type ) { - return 0; - } - } + #-------------------------- + # scan lists and long lines + #-------------------------- - else { - return 1; - } + # Flag to remember if we called sub 'pad_array_to_go'. + # Some routines (break_lists(), break_long_lines() ) need some + # extra tokens added at the end of the batch. Most batches do not + # use these routines, so we will avoid calling 'pad_array_to_go' + # unless it is needed. + my $called_pad_array_to_go; - my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; + # set all forced breakpoints for good list formatting + my $is_long_line; + my $multiple_old_lines_in_batch; + if ( $max_index_to_go > 0 ) { + $is_long_line = + $self->excess_line_length( $imin, $max_index_to_go ) > 0; - my $maximum_line_length = - $maximum_line_length_at_level[ $levels_to_go[$i_start] ]; + my $Kbeg = $K_to_go[0]; + my $Kend = $K_to_go[$max_index_to_go]; + $multiple_old_lines_in_batch = + $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_]; + } - # see if block starting location is too great to even start - if ( $pos > $maximum_line_length ) { - return 1; - } + my $rbond_strength_bias = []; + if ( + $is_long_line + || $multiple_old_lines_in_batch - # See if everything to the closing token will fit on one line - # This is part of an update to fix cases b562 .. b983 - my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j}; - return 0 unless ( defined($K_closing) ); - my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - - $rLL->[$Kj]->[_CUMULATIVE_LENGTH_]; + # must always call break_lists() with unbalanced batches because + # it is maintaining some stacks + || $is_unbalanced_batch - my $excess = $pos + 1 + $container_length - $maximum_line_length; + # call break_lists if we might want to break at commas + || ( + $comma_count_in_batch + && ( $rOpts_maximum_fields_per_table > 0 + && $rOpts_maximum_fields_per_table <= $comma_count_in_batch + || $rOpts_comma_arrow_breakpoints == 0 ) + ) - # Add a small tolerance for welded tokens (case b901) - if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) { - $excess += 2; - } + # call break_lists if user may want to break open some one-line + # hash references + || ( $comma_arrow_count_contained + && $rOpts_comma_arrow_breakpoints != 3 ) + ) + { + # add a couple of extra terminal blank tokens + $self->pad_array_to_go(); + $called_pad_array_to_go = 1; + + my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias ); + $saw_good_break ||= $sgb; + } + + # let $ri_first and $ri_last be references to lists of + # first and last tokens of line fragments to output.. + my ( $ri_first, $ri_last ); - if ( $excess > 0 ) { + #----------------------------- + # a single token uses one line + #----------------------------- + if ( !$max_index_to_go ) { + $ri_first = [$imin]; + $ri_last = [$imax]; + } - # line is too long... there is no chance of forming a one line block - # if the excess is more than 1 char - return 0 if ( $excess > 1 ); + # for multiple tokens + else { - # ... and give up if it is not a one-line block on input. - # note: for a one-line block on input, it may be possible to keep - # it as a one-line block (by removing a needless semicolon ). - my $K_start = $K_to_go[$i_start]; - my $ldiff = - $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_]; - return 0 if ($ldiff); - } + #------------------------- + # write a single line if.. + #------------------------- + if ( + ( - foreach my $Ki ( $Kj + 1 .. $K_last ) { + # this line is 'short' + !$is_long_line - # old whitespace could be arbitrarily large, so don't use it - if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 } - else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] } + # and we didn't see a good breakpoint + && !$saw_good_break - # ignore some small blocks - my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_]; - my $nobreak = $rshort_nested->{$type_sequence_i}; + # and we don't already have an interior breakpoint + && !$forced_breakpoint_count + ) - # Return false result if we exceed the maximum line length, - if ( $pos > $maximum_line_length ) { - return 0; - } + # or, we aren't allowed to add any newlines + || !$rOpts_add_newlines - # keep going for non-containers - elsif ( !$type_sequence_i ) { + ) + { + $ri_first = [$imin]; + $ri_last = [$imax]; + } - } + #----------------------------- + # otherwise use multiple lines + #----------------------------- + else { - # return if we encounter another opening brace before finding the - # closing brace. - elsif ($rLL->[$Ki]->[_TOKEN_] eq '{' - && $rLL->[$Ki]->[_TYPE_] eq '{' - && $rblock_type_of_seqno->{$type_sequence_i} - && !$nobreak ) - { - return 0; - } + # add a couple of extra terminal blank tokens if we haven't + # already done so + $self->pad_array_to_go() unless ($called_pad_array_to_go); - # if we find our closing brace.. - elsif ($rLL->[$Ki]->[_TOKEN_] eq '}' - && $rLL->[$Ki]->[_TYPE_] eq '}' - && $rblock_type_of_seqno->{$type_sequence_i} - && !$nobreak ) - { + ( $ri_first, $ri_last, my $rbond_strength_to_go ) = + $self->break_long_lines( $saw_good_break, \@colon_list, + $rbond_strength_bias ); - # be sure any trailing comment also fits on the line - my $Ki_nonblank = $Ki; - if ( $Ki_nonblank < $K_last ) { - $Ki_nonblank++; - if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b' - && $Ki_nonblank < $K_last ) - { - $Ki_nonblank++; - } - } + $self->break_all_chain_tokens( $ri_first, $ri_last ); - # Patch for one-line sort/map/grep/eval blocks with side comments: - # We will ignore the side comment length for sort/map/grep/eval - # because this can lead to statements which change every time - # perltidy is run. Here is an example from Denis Moskowitz which - # oscillates between these two states without this patch: + $self->break_equals( $ri_first, $ri_last ) + if @{$ri_first} >= 3; -## -------- -## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf -## @baz; -## -## grep { -## $_->foo ne 'bar' -## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf -## @baz; -## -------- + # now we do a correction step to clean this up a bit + # (The only time we would not do this is for debugging) + $self->recombine_breakpoints( $ri_first, $ri_last, + $rbond_strength_to_go ) + if ( $rOpts_recombine && @{$ri_first} > 1 ); - # When the first line is input it gets broken apart by the main - # line break logic in sub process_line_of_CODE. - # When the second line is input it gets recombined by - # process_line_of_CODE and passed to the output routines. The - # output routines (break_long_lines) do not break it apart - # because the bond strengths are set to the highest possible value - # for grep/map/eval/sort blocks, so the first version gets output. - # It would be possible to fix this by changing bond strengths, - # but they are high to prevent errors in older versions of perl. - # See c100 for eval test. - if ( $Ki < $K_last - && $rLL->[$K_last]->[_TYPE_] eq '#' - && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_] - && !$rOpts_ignore_side_comment_lengths - && !$is_sort_map_grep_eval{$block_type} - && $K_last - $Ki_nonblank <= 2 ) - { - # Only include the side comment for if/else/elsif/unless if it - # immediately follows (because the current '$rbrace_follower' - # logic for these will give an immediate brake after these - # closing braces). So for example a line like this - # if (...) { ... } ; # very long comment...... - # will already break like this: - # if (...) { ... } - # ; # very long comment...... - # so we do not need to include the length of the comment, which - # would break the block. Project 'bioperl' has coding like this. - ## !~ /^(if|else|elsif|unless)$/ - if ( !$is_if_unless_elsif_else{$block_type} - || $K_last == $Ki_nonblank ) - { - $Ki_nonblank = $K_last; - $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_]; + $self->insert_final_ternary_breaks( $ri_first, $ri_last ) + if (@colon_list); + } - if ( $Ki_nonblank > $Ki + 1 ) { + $self->insert_breaks_before_list_opening_containers( $ri_first, + $ri_last ) + if ( %break_before_container_types && $max_index_to_go > 0 ); - # source whitespace could be anything, assume - # at least one space before the hash on output - if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) { - $pos += 1; - } - else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] } - } + # Check for a phantom semicolon at the end of the batch + if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) { + $self->unmask_phantom_token($imax); + } - if ( $pos >= $maximum_line_length ) { - return 0; - } - } + if ( $rOpts_one_line_block_semicolons == 0 ) { + $self->delete_one_line_semicolons( $ri_first, $ri_last ); } - # ok, it's a one-line block - create_one_line_block( $i_start, 20 ); - return 0; + # Remember the largest batch size processed. This is needed by the + # logical padding routine to avoid padding the first nonblank token + if ( $max_index_to_go > $peak_batch_size ) { + $peak_batch_size = $max_index_to_go; + } } - # just keep going for other characters - else { + #------------------- + # -lp corrector step + #------------------- + if ($rOpts_line_up_parentheses) { + $self->correct_lp_indentation( $ri_first, $ri_last ); } - } - # We haven't hit the closing brace, but there is still space. So the - # question here is, should we keep going to look at more lines in hopes of - # forming a new one-line block, or should we stop right now. The problem - # with continuing is that we will not be able to honor breaks before the - # opening brace if we continue. + #-------------------- + # ship this batch out + #-------------------- + $this_batch->[_ri_first_] = $ri_first; + $this_batch->[_ri_last_] = $ri_last; - # Typically we will want to keep trying to make one-line blocks for things - # like sort/map/grep/eval. But it is not always a good idea to make as - # many one-line blocks as possible, so other types are not done. The user - # can always use -mangle. + $self->convey_batch_to_vertical_aligner(); - # If we want to keep going, we will create a new one-line block. - # The blocks which we can keep going are in a hash, but we never want - # to continue if we are at a '-bli' block. - if ( $want_one_line_block{$block_type} && !$is_bli ) { - create_one_line_block( $i_start, 1 ); - } - return 0; -} ## end sub starting_one_line_block + #------------------------------------------------------------------- + # Write requested number of blank lines after an opening block brace + #------------------------------------------------------------------- + if ($rOpts_blank_lines_after_opening_block) { + my $iterm = $imax; + if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) { + $iterm -= 1; + if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) { + $iterm -= 1; + } + } -sub unstore_token_to_go { + if ( $types_to_go[$iterm] eq '{' + && $block_type_to_go[$iterm] + && $block_type_to_go[$iterm] =~ + /$blank_lines_after_opening_block_pattern/ ) + { + my $nblanks = $rOpts_blank_lines_after_opening_block; + $self->flush_vertical_aligner(); + my $file_writer_object = $self->[_file_writer_object_]; + $file_writer_object->require_blank_code_lines($nblanks); + } + } - # remove most recent token from output stream - my $self = shift; - if ( $max_index_to_go > 0 ) { - $max_index_to_go--; - } - else { - $max_index_to_go = UNDEFINED_INDEX; + return; + } ## end sub grind_batch_of_CODE + + sub iprev_to_go { + my ($i) = @_; + return $i - 1 > 0 + && $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1; } - return; -} ## end sub unstore_token_to_go -sub compare_indentation_levels { + sub unmask_phantom_token { + my ( $self, $iend ) = @_; - # Check to see if output line tabbing agrees with input line - # this can be very useful for debugging a script which has an extra - # or missing brace. + # Turn a phantom token into a real token. - my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_; - return unless ( defined($K_first) ); + # Input parameter: + # $iend = the index in the output batch array of this token. - my $rLL = $self->[_rLL_]; + # Phantom tokens are specially marked token types (such as ';') with + # no token text which only become real tokens if they occur at the end + # of an output line. At one time phantom ',' tokens were handled + # here, but now they are processed elsewhere. - my $structural_indentation_level = $rLL->[$K_first]->[_LEVEL_]; - my $radjusted_levels = $self->[_radjusted_levels_]; - if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) { - $structural_indentation_level = $radjusted_levels->[$K_first]; - } + my $rLL = $self->[_rLL_]; + my $KK = $K_to_go[$iend]; + my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_]; - # record max structural depth for log file - if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) { - $self->[_maximum_BLOCK_level_] = $structural_indentation_level; - $self->[_maximum_BLOCK_level_at_line_] = $line_number; - } + my $type = $types_to_go[$iend]; + return unless ( $type eq ';' ); + my $tok = $type; + my $tok_len = length($tok); + if ( $want_left_space{$type} != WS_NO ) { + $tok = SPACE . $tok; + $tok_len += 1; + } - my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_]; - my $is_closing_block = - $type_sequence - && $self->[_rblock_type_of_seqno_]->{$type_sequence} - && $rLL->[$K_first]->[_TYPE_] eq '}'; + $tokens_to_go[$iend] = $tok; + $token_lengths_to_go[$iend] = $tok_len; - if ( $guessed_indentation_level ne $structural_indentation_level ) { - $self->[_last_tabbing_disagreement_] = $line_number; + $rLL->[$KK]->[_TOKEN_] = $tok; + $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len; - if ($is_closing_block) { + $self->note_added_semicolon($line_number); - if ( !$self->[_in_brace_tabbing_disagreement_] ) { - $self->[_in_brace_tabbing_disagreement_] = $line_number; - } - if ( !$self->[_first_brace_tabbing_disagreement_] ) { - $self->[_first_brace_tabbing_disagreement_] = $line_number; - } + # This changes the summed lengths of the rest of this batch + foreach ( $iend .. $max_index_to_go ) { + $summed_lengths_to_go[ $_ + 1 ] += $tok_len; } + return; + } ## end sub unmask_phantom_token - if ( !$self->[_in_tabbing_disagreement_] ) { - $self->[_tabbing_disagreement_count_]++; + sub save_opening_indentation { - if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) { - write_logfile_entry( -"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" - ); + # This should be called after each batch of tokens is output. It + # saves indentations of lines of all unmatched opening tokens. + # These will be used by sub get_opening_indentation. + + my ( $self, $ri_first, $ri_last, $rindentation_list, + $runmatched_opening_indexes ) + = @_; + + $runmatched_opening_indexes = [] + if ( !defined($runmatched_opening_indexes) ); + + # QW INDENTATION PATCH 1: + # Also save indentation for multiline qw quotes + my @i_qw; + my $seqno_qw_opening; + if ( $types_to_go[$max_index_to_go] eq 'q' ) { + my $KK = $K_to_go[$max_index_to_go]; + $seqno_qw_opening = + $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK}; + if ($seqno_qw_opening) { + push @i_qw, $max_index_to_go; } - $self->[_in_tabbing_disagreement_] = $line_number; - $self->[_first_tabbing_disagreement_] = $line_number - unless ( $self->[_first_tabbing_disagreement_] ); } - } - else { - - $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block); - my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_]; - if ($in_tabbing_disagreement) { + # we need to save indentations of any unmatched opening tokens + # in this batch because we may need them in a subsequent batch. + foreach ( @{$runmatched_opening_indexes}, @i_qw ) { - if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) { - write_logfile_entry( -"End indentation disagreement from input line $in_tabbing_disagreement\n" - ); + my $seqno = $type_sequence_to_go[$_]; - if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES ) - { - write_logfile_entry( - "No further tabbing disagreements will be noted\n"); + if ( !$seqno ) { + if ( $seqno_qw_opening && $_ == $max_index_to_go ) { + $seqno = $seqno_qw_opening; + } + else { + + # shouldn't happen + $seqno = 'UNKNOWN'; + DEVEL_MODE && Fault("unable to find sequence number\n"); } } - $self->[_in_tabbing_disagreement_] = 0; + $saved_opening_indentation{$seqno} = [ + lookup_opening_indentation( + $_, $ri_first, $ri_last, $rindentation_list + ) + ]; } - } - return; -} ## end sub compare_indentation_levels + return; + } ## end sub save_opening_indentation -################################################### -# CODE SECTION 8: Utilities for setting breakpoints -################################################### + sub get_saved_opening_indentation { + my ($seqno) = @_; + my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 ); -{ ## begin closure set_forced_breakpoint + if ($seqno) { + if ( $saved_opening_indentation{$seqno} ) { + ( $indent, $offset, $is_leading ) = + @{ $saved_opening_indentation{$seqno} }; + $exists = 1; + } + } - my @forced_breakpoint_undo_stack; + # some kind of serious error it doesn't exist + # (example is badfile.t) - # These are global vars for efficiency: - # my $forced_breakpoint_count; - # my $forced_breakpoint_undo_count; - # my $index_max_forced_break; + return ( $indent, $offset, $is_leading, $exists ); + } ## end sub get_saved_opening_indentation +} ## end closure grind_batch_of_CODE - # Break before or after certain tokens based on user settings - my %break_before_or_after_token; +sub lookup_opening_indentation { - BEGIN { + # get the indentation of the line in the current output batch + # which output a selected opening token + # + # given: + # $i_opening - index of an opening token in the current output batch + # whose line indentation we need + # $ri_first - reference to list of the first index $i for each output + # line in this batch + # $ri_last - reference to list of the last index $i for each output line + # in this batch + # $rindentation_list - reference to a list containing the indentation + # used for each line. (NOTE: the first slot in + # this list is the last returned line number, and this is + # followed by the list of indentations). + # + # return + # -the indentation of the line which contained token $i_opening + # -and its offset (number of columns) from the start of the line - # Updated to use all operators. This fixes case b1054 - # Here is the previous simplified version: - ## my @q = qw( . : ? and or xor && || ); - my @q = @all_operators; + my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; - push @q, ','; - @break_before_or_after_token{@q} = (1) x scalar(@q); - } + if ( !@{$ri_last} ) { - # This is no longer called - global vars - moved into initialize_batch_vars - sub initialize_forced_breakpoint_vars { - $forced_breakpoint_count = 0; - $index_max_forced_break = UNDEFINED_INDEX; - $forced_breakpoint_undo_count = 0; - ##@forced_breakpoint_undo_stack = (); # not needed - return; + # An error here implies a bug introduced by a recent program change. + # Every batch of code has lines, so this should never happen. + if (DEVEL_MODE) { + Fault("Error in opening_indentation: no lines"); + } + return ( 0, 0, 0 ); } - sub set_fake_breakpoint { + my $nline = $rindentation_list->[0]; # line number of previous lookup - # Just bump up the breakpoint count as a signal that there are breaks. - # This is useful if we have breaks but may want to postpone deciding - # where to make them. - $forced_breakpoint_count++; - return; + # reset line location if necessary + $nline = 0 if ( $i_opening < $ri_start->[$nline] ); + + # find the correct line + unless ( $i_opening > $ri_last->[-1] ) { + while ( $i_opening > $ri_last->[$nline] ) { $nline++; } } - use constant DEBUG_FORCE => 0; + # Error - token index is out of bounds - shouldn't happen + # A program bug has been introduced in one of the calling routines. + # We better stop here. + else { + my $i_last_line = $ri_last->[-1]; + if (DEVEL_MODE) { + Fault(< $i_last_line = max index of last line +This batch has max index = $max_index_to_go, +EOM + } + $nline = $#{$ri_last}; + } - sub set_forced_breakpoint { - my ( $self, $i ) = @_; + $rindentation_list->[0] = + $nline; # save line number to start looking next call + my $ibeg = $ri_start->[$nline]; + my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; + my $is_leading = ( $ibeg == $i_opening ); + return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); +} ## end sub lookup_opening_indentation - # Set a breakpoint AFTER the token at index $i in the _to_go arrays. +sub terminal_type_i { - # Exceptions: - # - If the token at index $i is a blank, backup to $i-1 to - # get to the previous nonblank token. - # - For certain tokens, the break may be placed BEFORE the token - # at index $i, depending on user break preference settings. - # - If a break is made after an opening token, then a break will - # also be made before the corresponding closing token. + # returns type of last token on this line (terminal token), as follows: + # returns # for a full-line comment + # returns ' ' for a blank line + # otherwise returns final token type - # Returns '$i_nonblank': - # = index of the token after which the breakpoint was actually placed - # = undef if breakpoint was not set. - my $i_nonblank; + my ( $ibeg, $iend ) = @_; - if ( !defined($i) || $i < 0 ) { + # Start at the end and work backwards + my $i = $iend; + my $type_i = $types_to_go[$i]; - # Calls with bad index $i are harmless but waste time and should - # be caught and eliminated during code development. - if (DEVEL_MODE) { - my ( $a, $b, $c ) = caller(); - Fault( -"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n" - ); - } - return; + # Check for side comment + if ( $type_i eq '#' ) { + $i--; + if ( $i < $ibeg ) { + return wantarray ? ( $type_i, $ibeg ) : $type_i; } + $type_i = $types_to_go[$i]; + } - # Break after token $i - $i_nonblank = $self->set_forced_breakpoint_AFTER($i); - - # If we break at an opening container..break at the closing - my $set_closing; - if ( defined($i_nonblank) - && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } ) - { - $set_closing = 1; - $self->set_closing_breakpoint($i_nonblank); + # Skip past a blank + if ( $type_i eq 'b' ) { + $i--; + if ( $i < $ibeg ) { + return wantarray ? ( $type_i, $ibeg ) : $type_i; } + $type_i = $types_to_go[$i]; + } - DEBUG_FORCE && do { - my ( $a, $b, $c ) = caller(); - my $msg = -"FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go"; - if ( !defined($i_nonblank) ) { - $i = EMPTY_STRING unless defined($i); - $msg .= " but could not set break after i='$i'\n"; - } - else { - $msg .= <=0 in sub write_line, so it should + # not be possible to get here unless the code has a bracing error + # which leaves a closing brace with zero nesting depth. + unless ( get_saw_brace_error() ) { + if (DEVEL_MODE) { + Fault(<= 0 ); +sub break_all_chain_tokens { - # Back up at a blank so we have a token to examine. - # This was added to fix for cases like b932 involving an '=' break. - if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- } + # scan the current breakpoints looking for breaks at certain "chain + # operators" (. : && || + etc) which often occur repeatedly in a long + # statement. If we see a break at any one, break at all similar tokens + # within the same container. + # + my ( $self, $ri_left, $ri_right ) = @_; - # Never break between welded tokens - return - if ( $total_weld_count - && $self->[_rK_weld_right_]->{ $K_to_go[$i] } ); + my %saw_chain_type; + my %left_chain_type; + my %right_chain_type; + my %interior_chain_type; + my $nmax = @{$ri_right} - 1; - my $token = $tokens_to_go[$i]; - my $type = $types_to_go[$i]; + # scan the left and right end tokens of all lines + my $count = 0; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + $typel = '+' if ( $typel eq '-' ); # treat + and - the same + $typer = '+' if ( $typer eq '-' ); + $typel = '*' if ( $typel eq '/' ); # treat * and / the same + $typer = '*' if ( $typer eq '/' ); - # For certain tokens, use user settings to decide if we break before or - # after it - if ( $break_before_or_after_token{$token} - && ( $type eq $token || $type eq 'k' ) ) - { - if ( $want_break_before{$token} && $i >= 0 ) { $i-- } + my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel; + my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer; + if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) { + next if ( $typel eq '?' ); + push @{ $left_chain_type{$keyl} }, $il; + $saw_chain_type{$keyl} = 1; + $count++; } + if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) { + next if ( $typer eq '?' ); + push @{ $right_chain_type{$keyr} }, $ir; + $saw_chain_type{$keyr} = 1; + $count++; + } + } + return unless $count; - # breaks are forced before 'if' and 'unless' - elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- } - - if ( $i >= 0 && $i <= $max_index_to_go ) { - my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; - - if ( $i_nonblank >= 0 - && $nobreak_to_go[$i_nonblank] == 0 - && !$forced_breakpoint_to_go[$i_nonblank] ) - { - $forced_breakpoint_to_go[$i_nonblank] = 1; - - if ( $i_nonblank > $index_max_forced_break ) { - $index_max_forced_break = $i_nonblank; - } - $forced_breakpoint_count++; - $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] - = $i_nonblank; - - # success - return $i_nonblank; + # now look for any interior tokens of the same types + $count = 0; + my $has_interior_dot_or_plus; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + foreach my $i ( $il + 1 .. $ir - 1 ) { + my $type = $types_to_go[$i]; + my $key = $type eq 'k' ? $tokens_to_go[$i] : $type; + $key = '+' if ( $key eq '-' ); + $key = '*' if ( $key eq '/' ); + if ( $saw_chain_type{$key} ) { + push @{ $interior_chain_type{$key} }, $i; + $count++; + $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' ); } } - return; - } ## end sub set_forced_breakpoint_AFTER - - sub clear_breakpoint_undo_stack { - my ($self) = @_; - $forced_breakpoint_undo_count = 0; - return; } + return unless $count; - use constant DEBUG_UNDOBP => 0; - - sub undo_forced_breakpoint_stack { - - my ( $self, $i_start ) = @_; + my @keys = keys %saw_chain_type; - # Given $i_start, a non-negative index the 'undo stack' of breakpoints, - # remove all breakpoints from the top of the 'undo stack' down to and - # including index $i_start. + # quit if just ONE continuation line with leading . For example-- + # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' + # . $contents; + # Fixed for b1399. + if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) { + return; + } - # The 'undo stack' is a stack of all breakpoints made for a batch of - # code. + # now make a list of all new break points + my @insert_list; - if ( $i_start < 0 ) { - $i_start = 0; - my ( $a, $b, $c ) = caller(); + # loop over all chain types + foreach my $key (@keys) { - # Bad call, can only be due to a recent programming change. - Fault( -"Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start " - ) if (DEVEL_MODE); - return; - } + # loop over all interior chain tokens + foreach my $itest ( @{ $interior_chain_type{$key} } ) { - while ( $forced_breakpoint_undo_count > $i_start ) { - my $i = - $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; - if ( $i >= 0 && $i <= $max_index_to_go ) { - $forced_breakpoint_to_go[$i] = 0; - $forced_breakpoint_count--; + # loop over all left end tokens of same type + if ( $left_chain_type{$key} ) { + next if $nobreak_to_go[ $itest - 1 ]; + foreach my $i ( @{ $left_chain_type{$key} } ) { + next unless $self->in_same_container_i( $i, $itest ); + push @insert_list, $itest - 1; - DEBUG_UNDOBP && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; - }; + # Break at matching ? if this : is at a different level. + # For example, the ? before $THRf_DEAD in the following + # should get a break if its : gets a break. + # + # my $flags = + # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE + # : ( $_ & 4 ) ? $THRf_R_DETACHED + # : $THRf_R_JOINABLE; + if ( $key eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( defined($i_question) && $i_question > 0 ) { + push @insert_list, $i_question - 1; + } + } + last; + } } - # shouldn't happen, but not a critical error - else { - DEBUG_UNDOBP && do { - my ( $a, $b, $c ) = caller(); - print STDOUT -"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"; - }; + # loop over all right end tokens of same type + if ( $right_chain_type{$key} ) { + next if $nobreak_to_go[$itest]; + foreach my $i ( @{ $right_chain_type{$key} } ) { + next unless $self->in_same_container_i( $i, $itest ); + push @insert_list, $itest; + + # break at matching ? if this : is at a different level + if ( $key eq ':' + && $levels_to_go[$i] != $levels_to_go[$itest] ) + { + my $i_question = $mate_index_to_go[$itest]; + if ( defined($i_question) ) { + push @insert_list, $i_question; + } + } + last; + } } } - return; - } ## end sub undo_forced_breakpoint_stack -} ## end closure set_forced_breakpoint - -{ ## begin closure set_closing_breakpoint - - my %postponed_breakpoint; - - sub initialize_postponed_breakpoint { - %postponed_breakpoint = (); - return; } - sub has_postponed_breakpoint { - my ($seqno) = @_; - return $postponed_breakpoint{$seqno}; + # insert any new break points + if (@insert_list) { + $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); } + return; +} ## end sub break_all_chain_tokens - sub set_closing_breakpoint { - - # set a breakpoint at a matching closing token - my ( $self, $i_break ) = @_; +sub insert_additional_breaks { - if ( $mate_index_to_go[$i_break] >= 0 ) { + # this routine will add line breaks at requested locations after + # sub break_long_lines has made preliminary breaks. - # Don't reduce the '2' in the statement below. - # Test files: attrib.t, BasicLyx.pm.html - if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { + my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_; + my $i_f; + my $i_l; + my $line_number = 0; + foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) { - # break before } ] and ), but sub set_forced_breakpoint will decide - # to break before or after a ? and : - my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; - $self->set_forced_breakpoint_AFTER( - $mate_index_to_go[$i_break] - $inc ); - } - } - else { - my $type_sequence = $type_sequence_to_go[$i_break]; - if ($type_sequence) { - my $closing_token = $matching_token{ $tokens_to_go[$i_break] }; - $postponed_breakpoint{$type_sequence} = 1; - } - } - return; - } ## end sub set_closing_breakpoint -} ## end closure set_closing_breakpoint + next if ( $nobreak_to_go[$i_break_left] ); -######################################### -# CODE SECTION 9: Process batches of code -######################################### + $i_f = $ri_first->[$line_number]; + $i_l = $ri_last->[$line_number]; + while ( $i_break_left >= $i_l ) { + $line_number++; -{ ## begin closure grind_batch_of_CODE + # shouldn't happen unless caller passes bad indexes + if ( $line_number >= @{$ri_last} ) { + if (DEVEL_MODE) { + Fault(<[$line_number]; + $i_l = $ri_last->[$line_number]; + } - # The routines in this closure begin the processing of a 'batch' of code. + # Do not leave a blank at the end of a line; back up if necessary + if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } - # A variable to keep track of consecutive nonblank lines so that we can - # insert occasional blanks - my @nonblank_lines_at_depth; + my $i_break_right = $inext_to_go[$i_break_left]; + if ( $i_break_left >= $i_f + && $i_break_left < $i_l + && $i_break_right > $i_f + && $i_break_right <= $i_l ) + { + splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) ); + splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) ); + } + } + return; +} ## end sub insert_additional_breaks - # A variable to remember maximum size of previous batches; this is needed - # by the logical padding routine - my $peak_batch_size; - my $batch_count; +{ ## begin closure in_same_container_i + my $ris_break_token; + my $ris_comma_token; - # variables to keep track of unbalanced containers. - my %saved_opening_indentation; - my @unmatched_opening_indexes_in_this_batch; + BEGIN { - sub initialize_grind_batch_of_CODE { - @nonblank_lines_at_depth = (); - $peak_batch_size = 0; - $batch_count = 0; - %saved_opening_indentation = (); - return; - } + # all cases break on seeing commas at same level + my @q = qw( => ); + push @q, ','; + @{$ris_comma_token}{@q} = (1) x scalar(@q); - # sub grind_batch_of_CODE receives sections of code which are the longest - # possible lines without a break. In other words, it receives what is left - # after applying all breaks forced by blank lines, block comments, side - # comments, pod text, and structural braces. Its job is to break this code - # down into smaller pieces, if necessary, which fit within the maximum - # allowed line length. Then it sends the resulting lines of code on down - # the pipeline to the VerticalAligner package, breaking the code into - # continuation lines as necessary. The batch of tokens are in the "to_go" - # arrays. The name 'grind' is slightly suggestive of a machine continually - # breaking down long lines of code, but mainly it is unique and easy to - # remember and find with an editor search. + # Non-ternary text also breaks on seeing any of qw(? : || or ) + # Example: we would not want to break at any of these .'s + # : "$str" + push @q, qw( or || ? : ); + @{$ris_break_token}{@q} = (1) x scalar(@q); + } ## end BEGIN - # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work - # together in the following way: + sub in_same_container_i { - # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and - # combines them into the largest sequences of tokens which might form a new - # line. - # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT - # lines. + # Check to see if tokens at i1 and i2 are in the same container, and + # not separated by certain characters: => , ? : || or + # This is an interface between the _to_go arrays to the rLL array + my ( $self, $i1, $i2 ) = @_; - # So sub 'process_line_of_CODE' builds up the longest possible continuous - # sequences of tokens, regardless of line length, and then - # grind_batch_of_CODE breaks these sequences back down into the new output - # lines. + # quick check + my $parent_seqno_1 = $parent_seqno_to_go[$i1]; + return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 ); - # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner. + if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } + my $K1 = $K_to_go[$i1]; + my $K2 = $K_to_go[$i2]; + my $rLL = $self->[_rLL_]; - use constant DEBUG_GRIND => 0; + my $depth_1 = $nesting_depth_to_go[$i1]; + return if ( $depth_1 < 0 ); - sub check_grind_input { + # Shouldn't happen since i1 and i2 have same parent: + return unless ( $nesting_depth_to_go[$i2] == $depth_1 ); - # Check for valid input to sub grind_batch_of_CODE. An error here - # would most likely be due to an error in 'sub store_token_to_go'. - my ($self) = @_; + # Select character set to scan for + my $type_1 = $types_to_go[$i1]; + my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token; - # Be sure there are tokens in the batch - if ( $max_index_to_go < 0 ) { - Fault(<[$KK]->[_KNEXT_SEQ_ITEM_]; + last if !defined($KK); + last if ( $KK >= $K2 ); + my $ii = $i1 + $KK - $K1; + my $depth_i = $nesting_depth_to_go[$ii]; + return if ( $depth_i < $depth_1 ); + next if ( $depth_i > $depth_1 ); + if ( $type_1 ne ':' ) { + my $tok_i = $tokens_to_go[$ii]; + return if ( $tok_i eq '?' || $tok_i eq ':' ); + } } - my $Klimit = $self->[_Klimit_]; - # The local batch tokens must be a continuous part of the global token - # array. - my $KK; - foreach my $ii ( 0 .. $max_index_to_go ) { + # Slow loop checking for certain characters - my $Km = $KK; + #----------------------------------------------------- + # This is potentially a slow routine and not critical. + # For safety just give up for large differences. + # See test file 'infinite_loop.txt' + #----------------------------------------------------- + return if ( $i2 - $i1 > 200 ); - $KK = $K_to_go[$ii]; - if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) { - $KK = '(undef)' unless defined($KK); - Fault(< 0 && $KK != $Km + 1 ) { - my $im = $ii - 1; - Fault(< $depth_1 ); + return if ( $depth_i < $depth_1 ); + my $tok_i = $tokens_to_go[$ii]; + return if ( $rbreak->{$tok_i} ); } - return; - } ## end sub check_grind_input + return 1; + } ## end sub in_same_container_i +} ## end closure in_same_container_i - sub grind_batch_of_CODE { +sub break_equals { - my ($self) = @_; + # Look for assignment operators that could use a breakpoint. + # For example, in the following snippet + # + # $HOME = $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # we could break at the = to get this, which is a little nicer: + # $HOME = + # $ENV{HOME} + # || $ENV{LOGDIR} + # || $pw[7] + # || die "no home directory for user $<"; + # + # The logic here follows the logic in set_logical_padding, which + # will add the padding in the second line to improve alignment. + # + my ( $self, $ri_left, $ri_right ) = @_; + my $nmax = @{$ri_right} - 1; + return unless ( $nmax >= 2 ); - my $this_batch = $self->[_this_batch_]; - $batch_count++; + # scan the left ends of first two lines + my $tokbeg = EMPTY_STRING; + my $depth_beg; + for my $n ( 1 .. 2 ) { + my $il = $ri_left->[$n]; + my $typel = $types_to_go[$il]; + my $tokenl = $tokens_to_go[$il]; + my $keyl = $typel eq 'k' ? $tokenl : $typel; - $self->check_grind_input() if (DEVEL_MODE); + my $has_leading_op = $is_chain_operator{$keyl}; + return unless ($has_leading_op); + if ( $n > 1 ) { + return + unless ( $tokenl eq $tokbeg + && $nesting_depth_to_go[$il] eq $depth_beg ); + } + $tokbeg = $tokenl; + $depth_beg = $nesting_depth_to_go[$il]; + } - # This routine is only called from sub flush_batch_of_code, so that - # routine is a better spot for debugging. - DEBUG_GRIND && do { - my $token = my $type = EMPTY_STRING; - if ( $max_index_to_go >= 0 ) { - $token = $tokens_to_go[$max_index_to_go]; - $type = $types_to_go[$max_index_to_go]; - } - my $output_str = EMPTY_STRING; - if ( $max_index_to_go > 20 ) { - my $mm = $max_index_to_go - 10; - $output_str = - join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... " - . join( EMPTY_STRING, - @tokens_to_go[ $mm .. $max_index_to_go ] ); + # now look for any interior tokens of the same types + my $il = $ri_left->[0]; + my $ir = $ri_right->[0]; + + # now make a list of all new break points + my @insert_list; + foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) { + my $type = $types_to_go[$i]; + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + if ( $want_break_before{$type} ) { + push @insert_list, $i - 1; } else { - $output_str = join EMPTY_STRING, - @tokens_to_go[ 0 .. $max_index_to_go ]; + push @insert_list, $i; } - print STDERR <set_lp_indentation() - if ($rOpts_line_up_parentheses); - - #---------------------------- - # Shortcut for block comments - #---------------------------- - if ( - $max_index_to_go == 0 - && $types_to_go[0] eq '#' + } + } - # this shortcut does not work for -lp yet - && !$rOpts_line_up_parentheses - ) - { - my $ibeg = 0; - $this_batch->[_ri_first_] = [$ibeg]; - $this_batch->[_ri_last_] = [$ibeg]; - $this_batch->[_peak_batch_size_] = $peak_batch_size; - $this_batch->[_do_not_pad_] = 0; - $this_batch->[_batch_count_] = $batch_count; - $this_batch->[_rix_seqno_controlling_ci_] = []; + # Break after a 'return' followed by a chain of operators + # return ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + # To give: + # return + # ( $^O !~ /win32|dos/i ) + # && ( $^O ne 'VMS' ) + # && ( $^O ne 'OS2' ) + # && ( $^O ne 'MacOS' ); + my $i = 0; + if ( $types_to_go[$i] eq 'k' + && $tokens_to_go[$i] eq 'return' + && $ir > $il + && $nesting_depth_to_go[$i] eq $depth_beg ) + { + push @insert_list, $i; + } - $self->convey_batch_to_vertical_aligner(); + return unless (@insert_list); - my $level = $levels_to_go[$ibeg]; - $self->[_last_last_line_leading_level_] = - $self->[_last_line_leading_level_]; - $self->[_last_line_leading_type_] = $types_to_go[$ibeg]; - $self->[_last_line_leading_level_] = $level; - $nonblank_lines_at_depth[$level] = 1; - return; + # One final check... + # scan second and third lines and be sure there are no assignments + # we want to avoid breaking at an = to make something like this: + # unless ( $icon = + # $html_icons{"$type-$state"} + # or $icon = $html_icons{$type} + # or $icon = $html_icons{$state} ) + for my $n ( 1 .. 2 ) { + my $il_n = $ri_left->[$n]; + my $ir_n = $ri_right->[$n]; + foreach my $i ( $il_n + 1 .. $ir_n ) { + my $type = $types_to_go[$i]; + return + if ( $is_assignment{$type} + && $nesting_depth_to_go[$i] eq $depth_beg ); } + } - #------------- - # Normal route - #------------- - - my $rLL = $self->[_rLL_]; - my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_]; - my $rwant_container_open = $self->[_rwant_container_open_]; - - #------------------------------------------------------- - # Loop over the batch to initialize some batch variables - #------------------------------------------------------- - my $comma_count_in_batch = 0; - my $ilast_nonblank = -1; - my @colon_list; - my @ix_seqno_controlling_ci; - my %comma_arrow_count; - my $comma_arrow_count_contained = 0; - my @unmatched_closing_indexes_in_this_batch; + # ok, insert any new break point + if (@insert_list) { + $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} ## end sub break_equals - @unmatched_opening_indexes_in_this_batch = (); +{ ## begin closure recombine_breakpoints - foreach my $i ( 0 .. $max_index_to_go ) { - $iprev_to_go[$i] = $ilast_nonblank; - $inext_to_go[$i] = $i + 1; + # This routine is called once per batch to see if it would be better + # to combine some of the lines into which the batch has been broken. - my $type = $types_to_go[$i]; - if ( $type ne 'b' ) { - if ( $ilast_nonblank >= 0 ) { - $inext_to_go[$ilast_nonblank] = $i; + my %is_amp_amp; + my %is_math_op; + my %is_plus_minus; + my %is_mult_div; - # just in case there are two blanks in a row (shouldn't - # happen) - if ( ++$ilast_nonblank < $i ) { - $inext_to_go[$ilast_nonblank] = $i; - } - } - $ilast_nonblank = $i; + BEGIN { - # This is a good spot to efficiently collect information needed - # for breaking lines... + my @q; + @q = qw( && || ); + @is_amp_amp{@q} = (1) x scalar(@q); - # gather info needed by sub break_long_lines - if ( $type_sequence_to_go[$i] ) { - my $seqno = $type_sequence_to_go[$i]; - my $token = $tokens_to_go[$i]; + @q = qw( + - * / ); + @is_math_op{@q} = (1) x scalar(@q); - # remember indexes of any tokens controlling xci - # in this batch. This list is needed by sub undo_ci. - if ( $ris_seqno_controlling_ci->{$seqno} ) { - push @ix_seqno_controlling_ci, $i; - } + @q = qw( + - ); + @is_plus_minus{@q} = (1) x scalar(@q); - if ( $is_opening_sequence_token{$token} ) { - if ( $rwant_container_open->{$seqno} ) { - $self->set_forced_breakpoint($i); - } - push @unmatched_opening_indexes_in_this_batch, $i; - if ( $type eq '?' ) { - push @colon_list, $type; - } - } - elsif ( $is_closing_sequence_token{$token} ) { + @q = qw( * / ); + @is_mult_div{@q} = (1) x scalar(@q); + } ## end BEGIN - if ( $i > 0 && $rwant_container_open->{$seqno} ) { - $self->set_forced_breakpoint( $i - 1 ); - } + sub Debug_dump_breakpoints { - my $i_mate = - pop @unmatched_opening_indexes_in_this_batch; - if ( defined($i_mate) && $i_mate >= 0 ) { - if ( $type_sequence_to_go[$i_mate] == - $type_sequence_to_go[$i] ) - { - $mate_index_to_go[$i] = $i_mate; - $mate_index_to_go[$i_mate] = $i; - if ( $comma_arrow_count{$seqno} ) { - $comma_arrow_count_contained += - $comma_arrow_count{$seqno}; - } - } - else { - push @unmatched_opening_indexes_in_this_batch, - $i_mate; - push @unmatched_closing_indexes_in_this_batch, - $i; - } - } - else { - push @unmatched_closing_indexes_in_this_batch, $i; - } - if ( $type eq ':' ) { - push @colon_list, $type; - } - } ## end elsif ( $is_closing_sequence_token...) + # Debug routine to dump current breakpoints...not normally called + # We are given indexes to the current lines: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + my ( $self, $ri_beg, $ri_end, $msg ) = @_; + print STDERR "----Dumping breakpoints from: $msg----\n"; + for my $n ( 0 .. @{$ri_end} - 1 ) { + my $ibeg = $ri_beg->[$n]; + my $iend = $ri_end->[$n]; + my $text = EMPTY_STRING; + foreach my $i ( $ibeg .. $iend ) { + $text .= $tokens_to_go[$i]; + } + print STDERR "$n ($ibeg:$iend) $text\n"; + } + print STDERR "----\n"; + return; + } ## end sub Debug_dump_breakpoints - } ## end if ($seqno) + sub delete_one_line_semicolons { - elsif ( $type eq ',' ) { $comma_count_in_batch++; } - elsif ( $tokens_to_go[$i] eq '=>' ) { - if (@unmatched_opening_indexes_in_this_batch) { - my $j = $unmatched_opening_indexes_in_this_batch[-1]; - my $seqno = $type_sequence_to_go[$j]; - $comma_arrow_count{$seqno}++; - } - } - } ## end if ( $type ne 'b' ) - } ## end for ( my $i = 0 ; $i <=...) + my ( $self, $ri_beg, $ri_end ) = @_; + my $rLL = $self->[_rLL_]; + my $K_opening_container = $self->[_K_opening_container_]; - my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch + - @unmatched_closing_indexes_in_this_batch; + # Walk down the lines of this batch and delete any semicolons + # terminating one-line blocks; + my $nmax = @{$ri_end} - 1; - #------------------------ - # Set special breakpoints - #------------------------ - # If this line ends in a code block brace, set breaks at any - # previous closing code block braces to breakup a chain of code - # blocks on one line. This is very rare but can happen for - # user-defined subs. For example we might be looking at this: - # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { - my $saw_good_break = 0; # flag to force breaks even if short line - if ( + foreach my $n ( 0 .. $nmax ) { + my $i_beg = $ri_beg->[$n]; + my $i_e = $ri_end->[$n]; + my $K_beg = $K_to_go[$i_beg]; + my $K_e = $K_to_go[$i_e]; + my $K_end = $K_e; + my $type_end = $rLL->[$K_end]->[_TYPE_]; + if ( $type_end eq '#' ) { + $K_end = $self->K_previous_nonblank($K_end); + if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; } + } - # looking for opening or closing block brace - $block_type_to_go[$max_index_to_go] + # we are looking for a line ending in closing brace + next + unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' ); - # never any good breaks if just one token - && $max_index_to_go > 0 + # ...and preceded by a semicolon on the same line + my $K_semicolon = $self->K_previous_nonblank($K_end); + next unless defined($K_semicolon); + my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg ); + next if ( $i_semicolon <= $i_beg ); + next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' ); - # but not one of these which are never duplicated on a line: - # until|while|for|if|elsif|else - && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] + # Safety check - shouldn't happen - not critical + # This is not worth throwing a Fault, except in DEVEL_MODE + if ( $types_to_go[$i_semicolon] ne ';' ) { + DEVEL_MODE + && Fault("unexpected type looking for semicolon"); + next; } - ) - { - my $lev = $nesting_depth_to_go[$max_index_to_go]; - # Walk backwards from the end and - # set break at any closing block braces at the same level. - # But quit if we are not in a chain of blocks. - foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) { - last if ( $levels_to_go[$i] < $lev ); # stop at a lower level - next if ( $levels_to_go[$i] > $lev ); # skip past higher level + # ... with the corresponding opening brace on the same line + my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_]; + my $K_opening = $K_opening_container->{$type_sequence}; + next unless ( defined($K_opening) ); + my $i_opening = $i_beg + ( $K_opening - $K_beg ); + next if ( $i_opening < $i_beg ); - if ( $block_type_to_go[$i] ) { - if ( $tokens_to_go[$i] eq '}' ) { - $self->set_forced_breakpoint($i); - $saw_good_break = 1; - } + # ... and only one semicolon between these braces + my $semicolon_count = 0; + foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) { + if ( $rLL->[$K]->[_TYPE_] eq ';' ) { + $semicolon_count++; + last; } - - # quit if we see anything besides words, function, blanks - # at this level - elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } } - } - - #----------------------------------------------- - # insertion of any blank lines before this batch - #----------------------------------------------- - - my $imin = 0; - my $imax = $max_index_to_go; - - # trim any blank tokens - if ( $types_to_go[$imin] eq 'b' ) { $imin++ } - if ( $types_to_go[$imax] eq 'b' ) { $imax-- } + next if ($semicolon_count); - if ( $imin > $imax ) { - if (DEVEL_MODE) { - my $K0 = $K_to_go[0]; - my $lno = EMPTY_STRING; - if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 } - Fault(<[$K_semicolon]->[_TOKEN_] = EMPTY_STRING; + $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0; + foreach ( $i_semicolon .. $max_index_to_go ) { + $summed_lengths_to_go[ $_ + 1 ] -= $len; } - return; } + return; + } ## end sub delete_one_line_semicolons - my $last_line_leading_type = $self->[_last_line_leading_type_]; - my $last_line_leading_level = $self->[_last_line_leading_level_]; - my $last_last_line_leading_level = - $self->[_last_last_line_leading_level_]; + use constant DEBUG_RECOMBINE => 0; - # add a blank line before certain key types but not after a comment - if ( $last_line_leading_type ne '#' ) { - my $want_blank = 0; - my $leading_token = $tokens_to_go[$imin]; - my $leading_type = $types_to_go[$imin]; + sub recombine_breakpoints { - # break before certain key blocks except one-liners - if ( $leading_type eq 'k' ) { - if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) { - $want_blank = $rOpts->{'blank-lines-before-subs'} - if ( terminal_type_i( $imin, $imax ) ne '}' ); - } + my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_; - # Break before certain block types if we haven't had a - # break at this level for a while. This is the - # difficult decision.. - elsif ($last_line_leading_type ne 'b' - && $is_if_unless_while_until_for_foreach{$leading_token} ) - { - my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; - if ( !defined($lc) ) { $lc = 0 } + # This sub implements the 'recombine' operation on a batch. + # Its task is to combine some of these lines back together to + # improve formatting. The need for this arises because + # sub 'break_long_lines' is very liberal in setting line breaks + # for long lines, always setting breaks at good breakpoints, even + # when that creates small lines. Sometimes small line fragments + # are produced which would look better if they were combined. - # patch for RT #128216: no blank line inserted at a level - # change - if ( $levels_to_go[$imin] != $last_line_leading_level ) { - $lc = 0; - } + # Input parameters: + # $ri_beg = ref to array of BEGinning indexes of each line + # $ri_end = ref to array of ENDing indexes of each line + # $rbond_strength_to_go = array of bond strengths pulling + # tokens together, used to decide where best to recombine lines. - $want_blank = - $rOpts->{'blanks-before-blocks'} - && $lc >= $rOpts->{'long-block-line-count'} - && $self->consecutive_nonblank_lines() >= - $rOpts->{'long-block-line-count'} - && terminal_type_i( $imin, $imax ) ne '}'; - } - } + #------------------------------------------------------------------- + # Do nothing under extreme stress; use <= 2 for c171. + # (NOTE: New optimizations make this unnecessary. But removing this + # check is not really useful because this condition only occurs in + # test runs, and another formatting pass will fix things anyway.) + # This routine has a long history of improvements. Some past + # relevant issues are : c118, c167, c171, c186, c187, c193, c200. + #------------------------------------------------------------------- + return if ( $high_stress_level <= 2 ); + + my $nmax_start = @{$ri_end} - 1; + return if ( $nmax_start <= 0 ); - # blank lines before subs except declarations and one-liners - elsif ( $leading_type eq 'i' ) { - if ( + my $iend_max = $ri_end->[$nmax_start]; + if ( $types_to_go[$iend_max] eq '#' ) { + $iend_max = iprev_to_go($iend_max); + } + my $has_terminal_semicolon = + $iend_max >= 0 && $types_to_go[$iend_max] eq ';'; - # quick check - ( - substr( $leading_token, 0, 3 ) eq 'sub' - || $rOpts_sub_alias_list - ) + #-------------------------------------------------------------------- + # Break into the smallest possible sub-sections to improve efficiency + #-------------------------------------------------------------------- - # slow check - && $leading_token =~ /$SUB_PATTERN/ - ) - { - $want_blank = $rOpts->{'blank-lines-before-subs'} - if ( terminal_type_i( $imin, $imax ) !~ /^[\;\}\,]$/ ); - } + # Also make a list of all good joining tokens between the lines + # n-1 and n. + my @joint; - # break before all package declarations - elsif ( substr( $leading_token, 0, 8 ) eq 'package ' ) { - $want_blank = $rOpts->{'blank-lines-before-packages'}; - } - } + my $rsections = []; + my $nbeg_sec = 0; + my $nend_sec; + my $nmax_section = 0; + foreach my $nn ( 1 .. $nmax_start ) { + my $ibeg_1 = $ri_beg->[ $nn - 1 ]; + my $iend_1 = $ri_end->[ $nn - 1 ]; + my $iend_2 = $ri_end->[$nn]; + my $ibeg_2 = $ri_beg->[$nn]; - # Check for blank lines wanted before a closing brace - elsif ( $leading_token eq '}' ) { - if ( $rOpts->{'blank-lines-before-closing-block'} - && $block_type_to_go[$imin] - && $block_type_to_go[$imin] =~ - /$blank_lines_before_closing_block_pattern/ ) + # Define certain good joint tokens + my ( $itok, $itokp, $itokm ); + foreach my $itest ( $iend_1, $ibeg_2 ) { + my $type = $types_to_go[$itest]; + if ( $is_math_op{$type} + || $is_amp_amp{$type} + || $is_assignment{$type} + || $type eq ':' ) { - my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; - if ( $nblanks > $want_blank ) { - $want_blank = $nblanks; - } + $itok = $itest; } } - if ($want_blank) { + # joint[$nn] = index of joint character + $joint[$nn] = $itok; - # future: send blank line down normal path to VerticalAligner - $self->flush_vertical_aligner(); - my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->require_blank_code_lines($want_blank); + # Update the section list + my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 ); + if ( + $excess <= 1 + + # The number 5 here is an arbitrary small number intended + # to keep most small matches in one sub-section. + || ( defined($nend_sec) + && ( $nn < 5 || $nmax_start - $nn < 5 ) ) + ) + { + $nend_sec = $nn; + } + else { + if ( defined($nend_sec) ) { + push @{$rsections}, [ $nbeg_sec, $nend_sec ]; + my $num = $nend_sec - $nbeg_sec; + if ( $num > $nmax_section ) { $nmax_section = $num } + $nbeg_sec = $nn; + $nend_sec = undef; + } + $nbeg_sec = $nn; } } - # update blank line variables and count number of consecutive - # non-blank, non-comment lines at this level - $last_last_line_leading_level = $last_line_leading_level; - $last_line_leading_level = $levels_to_go[$imin]; - if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } - $last_line_leading_type = $types_to_go[$imin]; - if ( $last_line_leading_level == $last_last_line_leading_level - && $last_line_leading_type ne 'b' - && $last_line_leading_type ne '#' - && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) - { - $nonblank_lines_at_depth[$last_line_leading_level]++; - } - else { - $nonblank_lines_at_depth[$last_line_leading_level] = 1; + if ( defined($nend_sec) ) { + push @{$rsections}, [ $nbeg_sec, $nend_sec ]; + my $num = $nend_sec - $nbeg_sec; + if ( $num > $nmax_section ) { $nmax_section = $num } } - $self->[_last_line_leading_type_] = $last_line_leading_type; - $self->[_last_line_leading_level_] = $last_line_leading_level; - $self->[_last_last_line_leading_level_] = $last_last_line_leading_level; - - #-------------------------- - # scan lists and long lines - #-------------------------- - - # Flag to remember if we called sub 'pad_array_to_go'. - # Some routines (break_lists(), break_long_lines() ) need some - # extra tokens added at the end of the batch. Most batches do not - # use these routines, so we will avoid calling 'pad_array_to_go' - # unless it is needed. - my $called_pad_array_to_go; + my $num_sections = @{$rsections}; - # set all forced breakpoints for good list formatting - my $is_long_line = $max_index_to_go > 0 - && $self->excess_line_length( $imin, $max_index_to_go ) > 0; + if ( DEBUG_RECOMBINE > 1 ) { + print STDERR < 0 ) { - my $Kbeg = $K_to_go[0]; - my $Kend = $K_to_go[$max_index_to_go]; - $old_line_count_in_batch += - $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_]; + if ( DEBUG_RECOMBINE > 0 ) { + my $max = 0; + print STDERR + "-----\n$num_sections sections found for nmax=$nmax_start\n"; + foreach my $sect ( @{$rsections} ) { + my ( $nbeg, $nend ) = @{$sect}; + my $num = $nend - $nbeg; + if ( $num > $max ) { $max = $num } + print STDERR "$nbeg $nend\n"; + } + print STDERR "max size=$max of $nmax_start lines\n"; } - my $rbond_strength_bias = []; - if ( - $is_long_line - || $old_line_count_in_batch > 1 + # Loop over all sub-sections. Note that we have to work backwards + # from the end of the batch since the sections use original line + # numbers, and the line numbers change as we go. + while ( my $section = pop @{$rsections} ) { + my ( $nbeg, $nend ) = @{$section}; + $self->recombine_section_loop( + { + _ri_beg => $ri_beg, + _ri_end => $ri_end, + _nbeg => $nbeg, + _nend => $nend, + _rjoint => \@joint, + _rbond_strength_to_go => $rbond_strength_to_go, + _has_terminal_semicolon => $has_terminal_semicolon, + } + ); + } - # must always call break_lists() with unbalanced batches because - # it is maintaining some stacks - || $is_unbalanced_batch + return; + } ## end sub recombine_breakpoints - # call break_lists if we might want to break at commas - || ( - $comma_count_in_batch - && ( $rOpts_maximum_fields_per_table > 0 - && $rOpts_maximum_fields_per_table <= $comma_count_in_batch - || $rOpts_comma_arrow_breakpoints == 0 ) - ) + sub recombine_section_loop { + my ( $self, $rhash ) = @_; - # call break_lists if user may want to break open some one-line - # hash references - || ( $comma_arrow_count_contained - && $rOpts_comma_arrow_breakpoints != 3 ) - ) - { - # add a couple of extra terminal blank tokens - $self->pad_array_to_go(); - $called_pad_array_to_go = 1; + # Recombine breakpoints for one section of lines in the current batch - my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias ); - $saw_good_break ||= $sgb; - } + # Given: + # $ri_beg, $ri_end = ref to arrays with token indexes of the first + # and last line + # $nbeg, $nend = line numbers bounding this section + # $rjoint = ref to array of good joining tokens per line - # let $ri_first and $ri_last be references to lists of - # first and last tokens of line fragments to output.. - my ( $ri_first, $ri_last ); + # Update: $ri_beg, $ri_end, $rjoint if lines are joined - #------------------------- - # write a single line if.. - #------------------------- - if ( + # Returns: + # nothing - # we aren't allowed to add any newlines - !$rOpts_add_newlines + #------------- + # Definitions: + #------------- + # $rhash = { - # or, - || ( + # _ri_beg = ref to array with starting token index by line + # _ri_end = ref to array with ending token index by line + # _nbeg = first line number of this section + # _nend = last line number of this section + # _rjoint = ref to array of good joining tokens for each line + # _rbond_strength_to_go = array of bond strengths + # _has_terminal_semicolon = true if last line of batch has ';' - # this line is 'short' - !$is_long_line + # _num_freeze = fixed number of lines at end of this batch + # _optimization_on = true during final optimization loop + # _num_compares = total number of line compares made so far + # _pair_list = list of line pairs in optimal search order - # and we didn't see a good breakpoint - && !$saw_good_break + # }; - # and we don't already have an interior breakpoint - && !$forced_breakpoint_count - ) - ) - { - @{$ri_first} = ($imin); - @{$ri_last} = ($imax); - } + my $ri_beg = $rhash->{_ri_beg}; + my $ri_end = $rhash->{_ri_end}; + + # Line index range of this section: + my $nbeg = $rhash->{_nbeg}; # stays constant + my $nend = $rhash->{_nend}; # will decrease + + # $nmax_batch = starting number of lines in the full batch + # $num_freeze = number of lines following this section to leave alone + my $nmax_batch = @{$ri_end} - 1; + $rhash->{_num_freeze} = $nmax_batch - $nend; + + # Setup the list of line pairs to test. This stores the following + # values for each line pair: + # [ $n=index of the second line of the pair, $bs=bond strength] + my @pair_list; + my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go}; + foreach my $n ( $nbeg + 1 .. $nend ) { + my $iend_1 = $ri_end->[ $n - 1 ]; + my $ibeg_2 = $ri_beg->[$n]; + my $bs_tweak = 0; + if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 } + my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak; + push @pair_list, [ $n, $bs ]; + } + + # Any order for testing is possible, but optimization is only possible + # if we sort the line pairs on decreasing joint strength. + @pair_list = + sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list; + $rhash->{_rpair_list} = \@pair_list; + + #---------------- + # Iteration limit + #---------------- + + # This was originally an O(n-squared) loop which required a check on + # the maximum number of iterations for safety. It is now a very fast + # loop which runs in O(n) time, but a check on total number of + # iterations is retained to guard against future programming errors. + + # Most cases require roughly 1 comparison per line pair (1 full pass). + # The upper bound is estimated to be about 3 comparisons per line pair + # unless optimization is deactivated. The approximate breakdown is: + # 1 pass with 1 compare per joint to do any special cases, plus + # 1 pass with up to 2 compares per joint in optimization mode + # The most extreme cases in my collection are: + # camel1.t - needs 2.7 compares per line (12 without optimization) + # ternary.t - needs 2.8 compares per line (12 without optimization) + # So a value of MAX_COMPARE_RATIO = 3 looks like an upper bound as + # long as optimization is used. A value of 20 should allow all code to + # pass even if optimization is turned off for testing. + + # The OPTIMIZE_OK flag should be true except for testing. + use constant MAX_COMPARE_RATIO => 20; + use constant OPTIMIZE_OK => 1; + + my $num_pairs = $nend - $nbeg + 1; + my $max_compares = MAX_COMPARE_RATIO * $num_pairs; + + # Always start with optimization off + $rhash->{_num_compares} = 0; + $rhash->{_optimization_on} = 0; + $rhash->{_ix_best_last} = 0; - #----------------------------- - # otherwise use multiple lines - #----------------------------- - else { + #-------------------------------------------- + # loop until there are no more recombinations + #-------------------------------------------- + my $nmax_last = $nmax_batch + 1; + while (1) { - # add a couple of extra terminal blank tokens if we haven't - # already done so - $self->pad_array_to_go() unless ($called_pad_array_to_go); + # Stop when the number of lines in the batch does not decrease + $nmax_batch = @{$ri_end} - 1; + if ( $nmax_batch >= $nmax_last ) { + last; + } + $nmax_last = $nmax_batch; - ( $ri_first, $ri_last, my $rbond_strength_to_go ) = - $self->break_long_lines( $saw_good_break, \@colon_list, - $rbond_strength_bias ); + #----------------------------------------- + # inner loop to find next best combination + #----------------------------------------- + $self->recombine_inner_loop($rhash); - $self->break_all_chain_tokens( $ri_first, $ri_last ); + # Iteration limit check: + if ( $rhash->{_num_compares} > $max_compares ) { - $self->break_equals( $ri_first, $ri_last ); + # See note above; should only get here on a programming error + if (DEVEL_MODE) { + my $ibeg = $ri_beg->[$nbeg]; + my $Kbeg = $K_to_go[$ibeg]; + my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_]; + Fault(<{_num_compares} exceeds max=$max_compares, near line $lno +EOM + } + last; + } - # now we do a correction step to clean this up a bit - # (The only time we would not do this is for debugging) - $self->recombine_breakpoints( $ri_first, $ri_last, - $rbond_strength_to_go ) - if ( $rOpts_recombine && @{$ri_first} > 1 ); + } ## end iteration loop - $self->insert_final_ternary_breaks( $ri_first, $ri_last ) - if (@colon_list); + if (DEBUG_RECOMBINE) { + my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs; + print STDERR +"exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n"; } - $self->insert_breaks_before_list_opening_containers( $ri_first, - $ri_last ) - if ( %break_before_container_types && $max_index_to_go > 0 ); + return; + } ## end sub recombine_section_loop - #------------------- - # -lp corrector step - #------------------- - my $do_not_pad = 0; - if ($rOpts_line_up_parentheses) { - $do_not_pad = $self->correct_lp_indentation( $ri_first, $ri_last ); - } + sub recombine_inner_loop { + my ( $self, $rhash ) = @_; - #-------------------------- - # unmask phantom semicolons - #-------------------------- - if ( !$tokens_to_go[$imax] && $types_to_go[$imax] eq ';' ) { - my $i = $imax; - my $tok = ';'; - my $tok_len = 1; - if ( $want_left_space{';'} != WS_NO ) { - $tok = ' ;'; - $tok_len = 2; - } - $tokens_to_go[$i] = $tok; - $token_lengths_to_go[$i] = $tok_len; - my $KK = $K_to_go[$i]; - $rLL->[$KK]->[_TOKEN_] = $tok; - $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len; - my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_]; - $self->note_added_semicolon($line_number); + # This is the inner loop of the recombine operation. We look at all of + # the remaining joints in this section and select the best joint to be + # recombined. If a recombination is made, the number of lines + # in this section will be reduced by one. - foreach ( $imax .. $max_index_to_go ) { - $summed_lengths_to_go[ $_ + 1 ] += $tok_len; - } - } + # Returns: nothing - if ( $rOpts_one_line_block_semicolons == 0 ) { - $self->delete_one_line_semicolons( $ri_first, $ri_last ); - } + my $rK_weld_right = $self->[_rK_weld_right_]; + my $rK_weld_left = $self->[_rK_weld_left_]; - #-------------------- - # ship this batch out - #-------------------- - $this_batch->[_ri_first_] = $ri_first; - $this_batch->[_ri_last_] = $ri_last; - $this_batch->[_peak_batch_size_] = $peak_batch_size; - $this_batch->[_do_not_pad_] = $do_not_pad; - $this_batch->[_batch_count_] = $batch_count; - $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci; + my $ri_beg = $rhash->{_ri_beg}; + my $ri_end = $rhash->{_ri_end}; + my $nbeg = $rhash->{_nbeg}; + my $rjoint = $rhash->{_rjoint}; + my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go}; + my $rpair_list = $rhash->{_rpair_list}; - $self->convey_batch_to_vertical_aligner(); + # This will remember the best joint: + my $n_best = 0; + my $bs_best = 0.; + my $ix_best = 0; + my $num_bs = 0; - #------------------------------------------------------------------- - # Write requested number of blank lines after an opening block brace - #------------------------------------------------------------------- - if ($rOpts_blank_lines_after_opening_block) { - my $iterm = $imax; - if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) { - $iterm -= 1; - if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) { - $iterm -= 1; - } - } + # The range of lines in this group is $nbeg to $nstop + my $nmax = @{$ri_end} - 1; + my $nstop = $nmax - $rhash->{_num_freeze}; + my $num_joints = $nstop - $nbeg; - if ( $types_to_go[$iterm] eq '{' - && $block_type_to_go[$iterm] - && $block_type_to_go[$iterm] =~ - /$blank_lines_after_opening_block_pattern/ ) - { - my $nblanks = $rOpts_blank_lines_after_opening_block; - $self->flush_vertical_aligner(); - my $file_writer_object = $self->[_file_writer_object_]; - $file_writer_object->require_blank_code_lines($nblanks); - } + # Turn off optimization if just two joints remain to allow + # special two-line logic to be checked (c193) + if ( $rhash->{_optimization_on} && $num_joints <= 2 ) { + $rhash->{_optimization_on} = 0; } - # Remember the largest batch size processed. This is needed by the - # logical padding routine to avoid padding the first nonblank token - if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) { - $peak_batch_size = $max_index_to_go; - } + # Start where we ended the last search + my $ix_start = $rhash->{_ix_best_last}; - return; - } ## end sub grind_batch_of_CODE + # Keep the starting index in bounds + $ix_start = max( 0, $ix_start ); - sub save_opening_indentation { + # Make a search order list which cycles around to visit + # all line pairs. + my $ix_max = @{$rpair_list} - 1; + my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 ); + my $ix_last = $ix_list[-1]; - # This should be called after each batch of tokens is output. It - # saves indentations of lines of all unmatched opening tokens. - # These will be used by sub get_opening_indentation. + #------------------------- + # loop over all line pairs + #------------------------- + my $incomplete_loop; + foreach my $ix (@ix_list) { + my $item = $rpair_list->[$ix]; + my ( $n, $bs ) = @{$item}; + + # This flag will be true if we 'last' out of this loop early. + # We cannot turn on optimization if this is true. + $incomplete_loop = $ix != $ix_last; + + # Update the count of the number of times through this inner loop + $rhash->{_num_compares}++; + + #---------------------------------------------------------- + # If we join the current pair of lines, + # line $n-1 will become the left part of the joined line + # line $n will become the right part of the joined line + # + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # We want to decide if we should remove the line break + # between the tokens at $iend_1 and $ibeg_2 + # + # We will apply a number of ad-hoc tests to see if joining + # here will look ok. The code will just move to the next + # pair if the join doesn't look good. If we get through + # the gauntlet of tests, the lines will be recombined. + #---------------------------------------------------------- + # + # beginning and ending tokens of the lines we are working on + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; - my ( $self, $ri_first, $ri_last, $rindentation_list ) = @_; + # The combined line cannot be too long + my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 ); + next if ( $excess > 0 ); - # QW INDENTATION PATCH 1: - # Also save indentation for multiline qw quotes - my @i_qw; - my $seqno_qw_opening; - if ( $types_to_go[$max_index_to_go] eq 'q' ) { - my $KK = $K_to_go[$max_index_to_go]; - $seqno_qw_opening = - $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK}; - if ($seqno_qw_opening) { - push @i_qw, $max_index_to_go; - } - } + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; - # we need to save indentations of any unmatched opening tokens - # in this batch because we may need them in a subsequent batch. - foreach ( @unmatched_opening_indexes_in_this_batch, @i_qw ) { + DEBUG_RECOMBINE > 1 && do { + print STDERR +"RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; + }; - my $seqno = $type_sequence_to_go[$_]; + # If line $n is the last line, we set some flags and + # do any special checks for it + my $this_line_is_semicolon_terminated; + if ( $n == $nmax ) { - if ( !$seqno ) { - if ( $seqno_qw_opening && $_ == $max_index_to_go ) { - $seqno = $seqno_qw_opening; - } - else { + if ( $type_ibeg_2 eq '{' ) { - # shouldn't happen - $seqno = 'UNKNOWN'; + # join isolated ')' and '{' if requested (git #110) + if ( $rOpts_cuddled_paren_brace + && $type_iend_1 eq '}' + && $iend_1 == $ibeg_1 + && $ibeg_2 == $iend_2 ) + { + if ( $tokens_to_go[$iend_1] eq ')' + && $tokens_to_go[$ibeg_2] eq '{' ) + { + $n_best = $n; + $ix_best = $ix; + last; + } + } + + # otherwise, a terminal '{' should stay where it is + # unless preceded by a fat comma + next if ( $type_iend_1 ne '=>' ); } - } - $saved_opening_indentation{$seqno} = [ - lookup_opening_indentation( - $_, $ri_first, $ri_last, $rindentation_list - ) - ]; - } - return; - } ## end sub save_opening_indentation + $this_line_is_semicolon_terminated = + $rhash->{_has_terminal_semicolon}; - sub get_saved_opening_indentation { - my ($seqno) = @_; - my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 ); + } - if ($seqno) { - if ( $saved_opening_indentation{$seqno} ) { - ( $indent, $offset, $is_leading ) = - @{ $saved_opening_indentation{$seqno} }; - $exists = 1; + #---------------------------------------------------------- + # Recombine Section 0: + # Examine the special token joining this line pair, if any. + # Put as many tests in this section to avoid duplicate code + # and to make formatting independent of whether breaks are + # to the left or right of an operator. + #---------------------------------------------------------- + + my $itok = $rjoint->[$n]; + if ($itok) { + my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n ); + next if ( !$ok_0 ); } - } - # some kind of serious error it doesn't exist - # (example is badfile.t) + #---------------------------------------------------------- + # Recombine Section 1: + # Join welded nested containers immediately + #---------------------------------------------------------- - return ( $indent, $offset, $is_leading, $exists ); - } ## end sub get_saved_opening_indentation -} ## end closure grind_batch_of_CODE + if ( + $total_weld_count + && ( $type_sequence_to_go[$iend_1] + && defined( $rK_weld_right->{ $K_to_go[$iend_1] } ) + || $type_sequence_to_go[$ibeg_2] + && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) ) + ) + { + $n_best = $n; + $ix_best = $ix; + last; + } -sub lookup_opening_indentation { + #---------------------------------------------------------- + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) + #---------------------------------------------------------- + + my ( $ok_2, $skip_Section_3 ) = + recombine_section_2( $ri_beg, $ri_end, $n, + $this_line_is_semicolon_terminated ); + next if ( !$ok_2 ); + + #---------------------------------------------------------- + # Recombine Section 3: + # Examine token at $ibeg_2 (left end of second line of pair) + #---------------------------------------------------------- + + # Join lines identified above as capable of + # causing an outdented line with leading closing paren. + # Note that we are skipping the rest of this section + # and the rest of the loop to do the join. + if ($skip_Section_3) { + $forced_breakpoint_to_go[$iend_1] = 0; + $n_best = $n; + $ix_best = $ix; + $incomplete_loop = 1; + last; + } - # get the indentation of the line in the current output batch - # which output a selected opening token - # - # given: - # $i_opening - index of an opening token in the current output batch - # whose line indentation we need - # $ri_first - reference to list of the first index $i for each output - # line in this batch - # $ri_last - reference to list of the last index $i for each output line - # in this batch - # $rindentation_list - reference to a list containing the indentation - # used for each line. (NOTE: the first slot in - # this list is the last returned line number, and this is - # followed by the list of indentations). - # - # return - # -the indentation of the line which contained token $i_opening - # -and its offset (number of columns) from the start of the line + my ( $ok_3, $bs_tweak ) = + recombine_section_3( $ri_beg, $ri_end, $n, + $this_line_is_semicolon_terminated ); + next if ( !$ok_3 ); - my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; + #---------------------------------------------------------- + # Recombine Section 4: + # Combine the lines if we arrive here and it is possible + #---------------------------------------------------------- - if ( !@{$ri_last} ) { + # honor hard breakpoints + next if ( $forced_breakpoint_to_go[$iend_1] ); - # An error here implies a bug introduced by a recent program change. - # Every batch of code has lines, so this should never happen. - if (DEVEL_MODE) { - Fault("Error in opening_indentation: no lines"); - } - return ( 0, 0, 0 ); - } + if (DEVEL_MODE) { - my $nline = $rindentation_list->[0]; # line number of previous lookup + # This fault can only occur if an array index error has been + # introduced by a recent programming change. + my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak; + if ( $bs_check != $bs ) { + Fault(<[$nline] ); + # Require a few extra spaces before recombining lines if we + # are at an old breakpoint unless this is a simple list or + # terminal line. The goal is to avoid oscillating between + # two quasi-stable end states. For example this snippet + # caused problems: - # find the correct line - unless ( $i_opening > $ri_last->[-1] ) { - while ( $i_opening > $ri_last->[$nline] ) { $nline++; } - } +## my $this = +## bless { +## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" +## }, +## $type; + next + if ( $old_breakpoint_to_go[$iend_1] + && !$this_line_is_semicolon_terminated + && $n < $nmax + && $excess + 4 > 0 + && $type_iend_2 ne ',' ); + + # do not recombine if we would skip in indentation levels + if ( $n < $nmax ) { + my $if_next = $ri_beg->[ $n + 1 ]; + next + if ( + $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] + && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] - # Error - token index is out of bounds - shouldn't happen - # A program bug has been introduced in one of the calling routines. - # We better stop here. - else { - my $i_last_line = $ri_last->[-1]; - if (DEVEL_MODE) { - Fault(< $i_last_line = max index of last line -This batch has max index = $max_index_to_go, -EOM - } - $nline = $#{$ri_last}; - } + # but an isolated 'if (' is undesirable + && !( + $n == 1 + && $iend_1 - $ibeg_1 <= 2 + && $type_ibeg_1 eq 'k' + && $tokens_to_go[$ibeg_1] eq 'if' + && $tokens_to_go[$iend_1] ne '(' + ) + ); + } - $rindentation_list->[0] = - $nline; # save line number to start looking next call - my $ibeg = $ri_start->[$nline]; - my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; - my $is_leading = ( $ibeg == $i_opening ); - return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); -} ## end sub lookup_opening_indentation + ## OLD: honor no-break's + ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257 -sub terminal_type_i { + # remember the pair with the greatest bond strength + if ( !$n_best ) { - # returns type of last token on this line (terminal token), as follows: - # returns # for a full-line comment - # returns ' ' for a blank line - # otherwise returns final token type + # First good joint ... + $n_best = $n; + $ix_best = $ix; + $bs_best = $bs; + $num_bs = 1; - my ( $ibeg, $iend ) = @_; + # In optimization mode: stop on the first acceptable joint + # because we already know it has the highest strength + if ( $rhash->{_optimization_on} == 1 ) { + last; + } + } + else { - # Start at the end and work backwards - my $i = $iend; - my $type_i = $types_to_go[$i]; + # Second and later joints .. + $num_bs++; - # Check for side comment - if ( $type_i eq '#' ) { - $i--; - if ( $i < $ibeg ) { - return wantarray ? ( $type_i, $ibeg ) : $type_i; - } - $type_i = $types_to_go[$i]; - } + # save maximum strength; in case of a tie select min $n + if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) { + $n_best = $n; + $ix_best = $ix; + $bs_best = $bs; + } + } - # Skip past a blank - if ( $type_i eq 'b' ) { - $i--; - if ( $i < $ibeg ) { - return wantarray ? ( $type_i, $ibeg ) : $type_i; - } - $type_i = $types_to_go[$i]; - } + } ## end loop over all line pairs - # Found it..make sure it is a BLOCK termination, - # but hide a terminal } after sort/map/grep/eval/do because it is not - # necessarily the end of the line. (terminal.t) - my $block_type = $block_type_to_go[$i]; - if ( - $type_i eq '}' - && ( !$block_type - || $is_sort_map_grep_eval_do{$block_type} ) - ) - { - $type_i = 'b'; - } - return wantarray ? ( $type_i, $i ) : $type_i; -} ## end sub terminal_type_i + #--------------------------------------------------- + # recombine the pair with the greatest bond strength + #--------------------------------------------------- + if ($n_best) { + DEBUG_RECOMBINE > 1 + && print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n"; + splice @{$ri_beg}, $n_best, 1; + splice @{$ri_end}, $n_best - 1, 1; + splice @{$rjoint}, $n_best, 1; + + splice @{$rpair_list}, $ix_best, 1; + + # Update the line indexes in the pair list: + # Old $n values greater than the best $n decrease by 1 + # because of the splice we just did. + foreach my $item ( @{$rpair_list} ) { + my $n_old = $item->[0]; + if ( $n_old > $n_best ) { $item->[0] -= 1 } + } + + # Store the index of this location for starting the next search. + # We must subtract 1 to get an updated index because the splice + # above just removed the best pair. + # BUT CAUTION: if this is the first pair in the pair list, then + # this produces an invalid index. So this index must be tested + # before use in the next pass through the outer loop. + $rhash->{_ix_best_last} = $ix_best - 1; + + # Turn on optimization if ... + if ( -sub pad_array_to_go { + # it is not already on, and + !$rhash->{_optimization_on} - # To simplify coding in break_lists and set_bond_strengths, it helps to - # create some extra blank tokens at the end of the arrays. We also add - # some undef's to help guard against using invalid data. - my ($self) = @_; - $K_to_go[ $max_index_to_go + 1 ] = undef; - $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING; - $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING; - $tokens_to_go[ $max_index_to_go + 3 ] = undef; - $types_to_go[ $max_index_to_go + 1 ] = 'b'; - $types_to_go[ $max_index_to_go + 2 ] = 'b'; - $types_to_go[ $max_index_to_go + 3 ] = undef; - $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef; - $nesting_depth_to_go[ $max_index_to_go + 1 ] = - $nesting_depth_to_go[$max_index_to_go]; + # we have not taken a shortcut to get here, and + && !$incomplete_loop - # /^[R\}\)\]]$/ - if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { - if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { + # we have seen a good break on strength, and + && $num_bs - # Nesting depths are set to be >=0 in sub write_line, so it should - # not be possible to get here unless the code has a bracing error - # which leaves a closing brace with zero nesting depth. - unless ( get_saw_brace_error() ) { - if (DEVEL_MODE) { - Fault(<{_optimization_on} = 1; + if (DEBUG_RECOMBINE) { + my $num_compares = $rhash->{_num_compares}; + my $pair_count = @ix_list; + print STDERR +"Entering optimization phase at $num_compares compares, pair count = $pair_count\n"; } } } - else { - $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; - } - } + return; + } ## end sub recombine_inner_loop - # /^[L\{\(\[]$/ - elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { - $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; - } - return; -} ## end sub pad_array_to_go + sub recombine_section_0 { + my ( $itok, $ri_beg, $ri_end, $n ) = @_; -sub break_all_chain_tokens { + # Recombine Section 0: + # Examine special candidate joining token $itok - # scan the current breakpoints looking for breaks at certain "chain - # operators" (. : && || + etc) which often occur repeatedly in a long - # statement. If we see a break at any one, break at all similar tokens - # within the same container. - # - my ( $self, $ri_left, $ri_right ) = @_; + # Given: + # $itok = index of token at a possible join of lines $n-1 and $n - my %saw_chain_type; - my %left_chain_type; - my %right_chain_type; - my %interior_chain_type; - my $nmax = @{$ri_right} - 1; + # Return: + # true => ok to combine + # false => do not combine lines - # scan the left and right end tokens of all lines - my $count = 0; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - my $typel = $types_to_go[$il]; - my $typer = $types_to_go[$ir]; - $typel = '+' if ( $typel eq '-' ); # treat + and - the same - $typer = '+' if ( $typer eq '-' ); - $typel = '*' if ( $typel eq '/' ); # treat * and / the same - $typer = '*' if ( $typer eq '/' ); + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ ^ + # | | + # ------------$itok is one of these tokens - my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel; - my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer; - if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) { - next if ( $typel eq '?' ); - push @{ $left_chain_type{$keyl} }, $il; - $saw_chain_type{$keyl} = 1; - $count++; - } - if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) { - next if ( $typer eq '?' ); - push @{ $right_chain_type{$keyr} }, $ir; - $saw_chain_type{$keyr} = 1; - $count++; - } - } - return unless $count; + # Put as many tests in this section to avoid duplicate code + # and to make formatting independent of whether breaks are + # to the left or right of an operator. - # now look for any interior tokens of the same types - $count = 0; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - foreach my $i ( $il + 1 .. $ir - 1 ) { - my $type = $types_to_go[$i]; - my $key = $type eq 'k' ? $tokens_to_go[$i] : $type; - $key = '+' if ( $key eq '-' ); - $key = '*' if ( $key eq '/' ); - if ( $saw_chain_type{$key} ) { - push @{ $interior_chain_type{$key} }, $i; - $count++; - } - } - } - return unless $count; + my $nmax = @{$ri_end} - 1; + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $ibeg_2 = $ri_beg->[$n]; + my $iend_2 = $ri_end->[$n]; + + if ($itok) { + + my $type = $types_to_go[$itok]; + + if ( $type eq ':' ) { + + # do not join at a colon unless it disobeys the + # break request + if ( $itok eq $iend_1 ) { + return unless $want_break_before{$type}; + } + else { + return if $want_break_before{$type}; + } + } ## end if ':' - # now make a list of all new break points - my @insert_list; + # handle math operators + - * / + elsif ( $is_math_op{$type} ) { - # loop over all chain types - foreach my $key ( keys %saw_chain_type ) { + # Combine these lines if this line is a single + # number, or if it is a short term with same + # operator as the previous line. For example, in + # the following code we will combine all of the + # short terms $A, $B, $C, $D, $E, $F, together + # instead of leaving them one per line: + # my $time = + # $A * $B * $C * $D * $E * $F * + # ( 2. * $eps * $sigma * $area ) * + # ( 1. / $tcold**3 - 1. / $thot**3 ); - # quit if just ONE continuation line with leading . For example-- - # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' - # . $contents; - last if ( $nmax == 1 && $key =~ /^[\.\+]$/ ); + # This can be important in math-intensive code. - # loop over all interior chain tokens - foreach my $itest ( @{ $interior_chain_type{$key} } ) { + my $good_combo; - # loop over all left end tokens of same type - if ( $left_chain_type{$key} ) { - next if $nobreak_to_go[ $itest - 1 ]; - foreach my $i ( @{ $left_chain_type{$key} } ) { - next unless $self->in_same_container_i( $i, $itest ); - push @insert_list, $itest - 1; + my $itokp = min( $inext_to_go[$itok], $iend_2 ); + my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); + my $itokm = max( iprev_to_go($itok), $ibeg_1 ); + my $itokmm = max( iprev_to_go($itokm), $ibeg_1 ); - # Break at matching ? if this : is at a different level. - # For example, the ? before $THRf_DEAD in the following - # should get a break if its : gets a break. - # - # my $flags = - # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE - # : ( $_ & 4 ) ? $THRf_R_DETACHED - # : $THRf_R_JOINABLE; - if ( $key eq ':' - && $levels_to_go[$i] != $levels_to_go[$itest] ) - { - my $i_question = $mate_index_to_go[$itest]; - if ( $i_question > 0 ) { - push @insert_list, $i_question - 1; - } + # check for a number on the right + if ( $types_to_go[$itokp] eq 'n' ) { + + # ok if nothing else on right + if ( $itokp == $iend_2 ) { + $good_combo = 1; + } + else { + + # look one more token to right.. + # okay if math operator or some termination + $good_combo = + ( ( $itokpp == $iend_2 ) + && $is_math_op{ $types_to_go[$itokpp] } ) + || $types_to_go[$itokpp] =~ /^[#,;]$/; } - last; } - } - # loop over all right end tokens of same type - if ( $right_chain_type{$key} ) { - next if $nobreak_to_go[$itest]; - foreach my $i ( @{ $right_chain_type{$key} } ) { - next unless $self->in_same_container_i( $i, $itest ); - push @insert_list, $itest; + # check for a number on the left + if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { - # break at matching ? if this : is at a different level - if ( $key eq ':' - && $levels_to_go[$i] != $levels_to_go[$itest] ) - { - my $i_question = $mate_index_to_go[$itest]; - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } + # okay if nothing else to left + if ( $itokm == $ibeg_1 ) { + $good_combo = 1; + } + + # otherwise look one more token to left + else { + + # okay if math operator, comma, or assignment + $good_combo = ( $itokmm == $ibeg_1 ) + && ( $is_math_op{ $types_to_go[$itokmm] } + || $types_to_go[$itokmm] =~ /^[,]$/ + || $is_assignment{ $types_to_go[$itokmm] } ); } - last; } - } - } - } - # insert any new break points - if (@insert_list) { - $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - return; -} ## end sub break_all_chain_tokens + # look for a single short token either side of the + # operator + if ( !$good_combo ) { -sub insert_additional_breaks { + # Slight adjustment factor to make results + # independent of break before or after operator + # in long summed lists. (An operator and a + # space make two spaces). + my $two = ( $itok eq $iend_1 ) ? 2 : 0; - # this routine will add line breaks at requested locations after - # sub break_long_lines has made preliminary breaks. + $good_combo = - my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_; - my $i_f; - my $i_l; - my $line_number = 0; - foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) { + # numbers or id's on both sides of this joint + $types_to_go[$itokp] =~ /^[in]$/ + && $types_to_go[$itokm] =~ /^[in]$/ - next if ( $nobreak_to_go[$i_break_left] ); + # one of the two lines must be short: + && ( + ( + # no more than 2 nonblank tokens right + # of joint + $itokpp == $iend_2 - $i_f = $ri_first->[$line_number]; - $i_l = $ri_last->[$line_number]; - while ( $i_break_left >= $i_l ) { - $line_number++; + # short + && token_sequence_length( $itokp, $iend_2 ) < + $two + $rOpts_short_concatenation_item_length + ) + || ( + # no more than 2 nonblank tokens left of + # joint + $itokmm == $ibeg_1 - # shouldn't happen unless caller passes bad indexes - if ( $line_number >= @{$ri_last} ) { - if (DEVEL_MODE) { - Fault(<[$line_number]; - $i_l = $ri_last->[$line_number]; - } - # Do not leave a blank at the end of a line; back up if necessary - if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } + # it is also good to combine if we can reduce to 2 + # lines + if ( !$good_combo ) { - my $i_break_right = $inext_to_go[$i_break_left]; - if ( $i_break_left >= $i_f - && $i_break_left < $i_l - && $i_break_right > $i_f - && $i_break_right <= $i_l ) - { - splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) ); - splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) ); - } - } - return; -} ## end sub insert_additional_breaks + # index on other line where same token would be + # in a long chain. + my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; -{ ## begin closure in_same_container_i - my $ris_break_token; - my $ris_comma_token; + $good_combo = + $n == 2 + && $n == $nmax + && $types_to_go[$iother] ne $type; + } - BEGIN { + return unless ($good_combo); - # all cases break on seeing commas at same level - my @q = qw( => ); - push @q, ','; - @{$ris_comma_token}{@q} = (1) x scalar(@q); + } ## end math - # Non-ternary text also breaks on seeing any of qw(? : || or ) - # Example: we would not want to break at any of these .'s - # : "$str" - push @q, qw( or || ? : ); - @{$ris_break_token}{@q} = (1) x scalar(@q); - } + elsif ( $is_amp_amp{$type} ) { + ##TBD + } ## end &&, || - sub in_same_container_i { + elsif ( $is_assignment{$type} ) { + ##TBD + } ## end assignment + } - # Check to see if tokens at i1 and i2 are in the same container, and - # not separated by certain characters: => , ? : || or - # This is an interface between the _to_go arrays to the rLL array - my ( $self, $i1, $i2 ) = @_; + # ok to combine lines + return 1; + } ## end sub recombine_section_0 - # quick check - my $parent_seqno_1 = $parent_seqno_to_go[$i1]; - return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 ); + sub recombine_section_2 { - if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } - my $K1 = $K_to_go[$i1]; - my $K2 = $K_to_go[$i2]; - my $rLL = $self->[_rLL_]; + my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_; - my $depth_1 = $nesting_depth_to_go[$i1]; - return if ( $depth_1 < 0 ); + # Recombine Section 2: + # Examine token at $iend_1 (right end of first line of pair) - # Shouldn't happen since i1 and i2 have same parent: - return unless ( $nesting_depth_to_go[$i2] == $depth_1 ); + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # -----Section 2 looks at this token - # Select character set to scan for - my $type_1 = $types_to_go[$i1]; - my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token; + # Returns: + # (nothing) => do not join lines + # 1, skip_Section_3 => ok to join lines + + # $skip_Section_3 is a flag for skipping the next section + my $skip_Section_3 = 0; + + my $nmax = @{$ri_end} - 1; + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; + my $ibeg_nmax = $ri_beg->[$nmax]; + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # an isolated '}' may join with a ';' terminated segment + if ( $type_iend_1 eq '}' ) { + + # Check for cases where combining a semicolon terminated + # statement with a previous isolated closing paren will + # allow the combined line to be outdented. This is + # generally a good move. For example, we can join up + # the last two lines here: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) + # = stat($file); + # + # to get: + # ( + # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, + # $size, $atime, $mtime, $ctime, $blksize, $blocks + # ) = stat($file); + # + # which makes the parens line up. + # + # Another example, from Joe Matarazzo, probably looks best + # with the 'or' clause appended to the trailing paren: + # $self->some_method( + # PARAM1 => 'foo', + # PARAM2 => 'bar' + # ) or die "Some_method didn't work"; + # + # But we do not want to do this for something like the -lp + # option where the paren is not outdentable because the + # trailing clause will be far to the right. + # + # The logic here is synchronized with the logic in sub + # sub get_final_indentation, which actually does + # the outdenting. + # + my $combine_ok = $this_line_is_semicolon_terminated + + # only one token on last line + && $ibeg_1 == $iend_1 + + # must be structural paren + && $tokens_to_go[$iend_1] eq ')' + + # style must allow outdenting, + && !$closing_token_indentation{')'} + + # but leading colons probably line up with a + # previous colon or question (count could be wrong). + && $type_ibeg_2 ne ':' + + # only one step in depth allowed. this line must not + # begin with a ')' itself. + && ( $nesting_depth_to_go[$iend_1] == + $nesting_depth_to_go[$iend_2] + 1 ); + + # But only combine leading '&&', '||', if no previous && || : + # seen. This count includes these tokens at all levels. The + # idea is that seeing these at any level can make it hard to read + # formatting if we recombine. + if ( $is_amp_amp{$type_ibeg_2} ) { + foreach my $n_t ( reverse( 0 .. $n - 2 ) ) { + my $ibeg_t = $ri_beg->[$n_t]; + my $type_t = $types_to_go[$ibeg_t]; + if ( $is_amp_amp{$type_t} || $type_t eq ':' ) { + $combine_ok = 0; + last; + } + } + } - # Fast preliminary loop to verify that tokens are in the same container - my $KK = $K1; - while (1) { - $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; - last if !defined($KK); - last if ( $KK >= $K2 ); - my $ii = $i1 + $KK - $K1; - my $depth_i = $nesting_depth_to_go[$ii]; - return if ( $depth_i < $depth_1 ); - next if ( $depth_i > $depth_1 ); - if ( $type_1 ne ':' ) { - my $tok_i = $tokens_to_go[$ii]; - return if ( $tok_i eq '?' || $tok_i eq ':' ); + $skip_Section_3 ||= $combine_ok; + + # YVES patch 2 of 2: + # Allow cuddled eval chains, like this: + # eval { + # #STUFF; + # 1; # return true + # } or do { + # #handle error + # }; + # This patch works together with a patch in + # setting adjusted indentation (where the closing eval + # brace is outdented if possible). + # The problem is that an 'eval' block has continuation + # indentation and it looks better to undo it in some + # cases. If we do not use this patch we would get: + # eval { + # #STUFF; + # 1; # return true + # } + # or do { + # #handle error + # }; + # The alternative, for uncuddled style, is to create + # a patch in get_final_indentation which undoes + # the indentation of a leading line like 'or do {'. + # This doesn't work well with -icb through + if ( + $block_type_to_go[$iend_1] + && $rOpts_brace_follower_vertical_tightness > 0 + && ( + + # -bfvt=1, allow cuddled eval chains [default] + ( + $tokens_to_go[$iend_2] eq '{' + && $block_type_to_go[$iend_1] eq 'eval' + && !ref( $leading_spaces_to_go[$iend_1] ) + && !$rOpts_indent_closing_brace + ) + + # -bfvt=2, allow most brace followers [part of git #110] + || ( $rOpts_brace_follower_vertical_tightness > 1 + && $ibeg_1 == $iend_1 ) + + ) + + && ( + ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ ) + || ( $type_ibeg_2 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_2] } ) + || $is_if_unless{ $tokens_to_go[$ibeg_2] } + ) + ) + { + $skip_Section_3 ||= 1; } - } - # Slow loop checking for certain characters + return + unless ( + $skip_Section_3 - #----------------------------------------------------- - # This is potentially a slow routine and not critical. - # For safety just give up for large differences. - # See test file 'infinite_loop.txt' - #----------------------------------------------------- - return if ( $i2 - $i1 > 200 ); + # handle '.' and '?' specially below + || ( $type_ibeg_2 =~ /^[\.\?]$/ ) - foreach my $ii ( $i1 + 1 .. $i2 - 1 ) { + # fix for c054 (unusual -pbp case) + || $type_ibeg_2 eq '==' - my $depth_i = $nesting_depth_to_go[$ii]; - next if ( $depth_i > $depth_1 ); - return if ( $depth_i < $depth_1 ); - my $tok_i = $tokens_to_go[$ii]; - return if ( $rbreak->{$tok_i} ); + ); } - return 1; - } ## end sub in_same_container_i -} ## end closure in_same_container_i -sub break_equals { + elsif ( $type_iend_1 eq '{' ) { - # Look for assignment operators that could use a breakpoint. - # For example, in the following snippet - # - # $HOME = $ENV{HOME} - # || $ENV{LOGDIR} - # || $pw[7] - # || die "no home directory for user $<"; - # - # we could break at the = to get this, which is a little nicer: - # $HOME = - # $ENV{HOME} - # || $ENV{LOGDIR} - # || $pw[7] - # || die "no home directory for user $<"; - # - # The logic here follows the logic in set_logical_padding, which - # will add the padding in the second line to improve alignment. - # - my ( $self, $ri_left, $ri_right ) = @_; - my $nmax = @{$ri_right} - 1; - return unless ( $nmax >= 2 ); + # YVES + # honor breaks at opening brace + # Added to prevent recombining something like this: + # } || eval { package main; + return if ( $forced_breakpoint_to_go[$iend_1] ); + } - # scan the left ends of first two lines - my $tokbeg = EMPTY_STRING; - my $depth_beg; - for my $n ( 1 .. 2 ) { - my $il = $ri_left->[$n]; - my $typel = $types_to_go[$il]; - my $tokenl = $tokens_to_go[$il]; - my $keyl = $typel eq 'k' ? $tokenl : $typel; + # do not recombine lines with ending &&, ||, + elsif ( $is_amp_amp{$type_iend_1} ) { + return unless ( $want_break_before{$type_iend_1} ); + } - my $has_leading_op = $is_chain_operator{$keyl}; - return unless ($has_leading_op); - if ( $n > 1 ) { + # Identify and recombine a broken ?/: chain + elsif ( $type_iend_1 eq '?' ) { + + # Do not recombine different levels return - unless ( $tokenl eq $tokbeg - && $nesting_depth_to_go[$il] eq $depth_beg ); + if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); + + # do not recombine unless next line ends in : + return unless $type_iend_2 eq ':'; } - $tokbeg = $tokenl; - $depth_beg = $nesting_depth_to_go[$il]; - } - # now look for any interior tokens of the same types - my $il = $ri_left->[0]; - my $ir = $ri_right->[0]; + # for lines ending in a comma... + elsif ( $type_iend_1 eq ',' ) { - # now make a list of all new break points - my @insert_list; - foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) { - my $type = $types_to_go[$i]; - if ( $is_assignment{$type} - && $nesting_depth_to_go[$i] eq $depth_beg ) - { - if ( $want_break_before{$type} ) { - push @insert_list, $i - 1; + # Do not recombine at comma which is following the + # input bias. + # NOTE: this could be controlled by a special flag, + # but it seems to work okay. + return if ( $old_breakpoint_to_go[$iend_1] ); + + # An isolated '},' may join with an identifier + ';' + # This is useful for the class of a 'bless' statement + # (bless.t) + if ( $type_ibeg_1 eq '}' + && $type_ibeg_2 eq 'i' ) + { + return + unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) + && ( $iend_2 == ( $ibeg_2 + 1 ) ) + && $this_line_is_semicolon_terminated ); + + # override breakpoint + $forced_breakpoint_to_go[$iend_1] = 0; } + + # but otherwise .. else { - push @insert_list, $i; + + # do not recombine after a comma unless this will + # leave just 1 more line + return unless ( $n + 1 >= $nmax ); + + # do not recombine if there is a change in + # indentation depth + return + if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); + + # do not recombine a "complex expression" after a + # comma. "complex" means no parens. + my $saw_paren; + foreach my $ii ( $ibeg_2 .. $iend_2 ) { + if ( $tokens_to_go[$ii] eq '(' ) { + $saw_paren = 1; + last; + } + } + return if $saw_paren; } } - } - # Break after a 'return' followed by a chain of operators - # return ( $^O !~ /win32|dos/i ) - # && ( $^O ne 'VMS' ) - # && ( $^O ne 'OS2' ) - # && ( $^O ne 'MacOS' ); - # To give: - # return - # ( $^O !~ /win32|dos/i ) - # && ( $^O ne 'VMS' ) - # && ( $^O ne 'OS2' ) - # && ( $^O ne 'MacOS' ); - my $i = 0; - if ( $types_to_go[$i] eq 'k' - && $tokens_to_go[$i] eq 'return' - && $ir > $il - && $nesting_depth_to_go[$i] eq $depth_beg ) - { - push @insert_list, $i; - } + # opening paren.. + elsif ( $type_iend_1 eq '(' ) { - return unless (@insert_list); + # No longer doing this + } - # One final check... - # scan second and third lines and be sure there are no assignments - # we want to avoid breaking at an = to make something like this: - # unless ( $icon = - # $html_icons{"$type-$state"} - # or $icon = $html_icons{$type} - # or $icon = $html_icons{$state} ) - for my $n ( 1 .. 2 ) { - my $il_n = $ri_left->[$n]; - my $ir_n = $ri_right->[$n]; - foreach my $i ( $il_n + 1 .. $ir_n ) { - my $type = $types_to_go[$i]; - return - if ( $is_assignment{$type} - && $nesting_depth_to_go[$i] eq $depth_beg ); + elsif ( $type_iend_1 eq ')' ) { + + # No longer doing this } - } - # ok, insert any new break point - if (@insert_list) { - $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - return; -} ## end sub break_equals + # keep a terminal for-semicolon + elsif ( $type_iend_1 eq 'f' ) { + return; + } -{ ## begin closure recombine_breakpoints + # if '=' at end of line ... + elsif ( $is_assignment{$type_iend_1} ) { - # This routine is called once per batch to see if it would be better - # to combine some of the lines into which the batch has been broken. + # keep break after = if it was in input stream + # this helps prevent 'blinkers' + return + if ( + $old_breakpoint_to_go[$iend_1] - my %is_amp_amp; - my %is_math_op; - my %is_plus_minus; - my %is_mult_div; + # don't strand an isolated '=' + && $iend_1 != $ibeg_1 + ); - BEGIN { + my $is_short_quote = + ( $type_ibeg_2 eq 'Q' + && $ibeg_2 == $iend_2 + && token_sequence_length( $ibeg_2, $ibeg_2 ) < + $rOpts_short_concatenation_item_length ); + my $is_ternary = ( + $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0 + && $types_to_go[$ibeg_3] eq ':' ) + ); - my @q; - @q = qw( && || ); - @is_amp_amp{@q} = (1) x scalar(@q); + # always join an isolated '=', a short quote, or if this + # will put ?/: at start of adjacent lines + if ( $ibeg_1 != $iend_1 + && !$is_short_quote + && !$is_ternary ) + { + return + unless ( + ( - @q = qw( + - * / ); - @is_math_op{@q} = (1) x scalar(@q); + # unless we can reduce this to two lines + $nmax < $n + 2 - @q = qw( + - ); - @is_plus_minus{@q} = (1) x scalar(@q); + # or three lines, the last with a leading + # semicolon + || ( $nmax == $n + 2 + && $types_to_go[$ibeg_nmax] eq ';' ) - @q = qw( * / ); - @is_mult_div{@q} = (1) x scalar(@q); - } + # or the next line ends with a here doc + || $type_iend_2 eq 'h' - sub Debug_dump_breakpoints { + # or the next line ends in an open paren or + # brace and the break hasn't been forced + # [dima.t] + || ( !$forced_breakpoint_to_go[$iend_1] + && $type_iend_2 eq '{' ) + ) - # Debug routine to dump current breakpoints...not normally called - # We are given indexes to the current lines: - # $ri_beg = ref to array of BEGinning indexes of each line - # $ri_end = ref to array of ENDing indexes of each line - my ( $self, $ri_beg, $ri_end, $msg ) = @_; - print STDERR "----Dumping breakpoints from: $msg----\n"; - for my $n ( 0 .. @{$ri_end} - 1 ) { - my $ibeg = $ri_beg->[$n]; - my $iend = $ri_end->[$n]; - my $text = EMPTY_STRING; - foreach my $i ( $ibeg .. $iend ) { - $text .= $tokens_to_go[$i]; + # do not recombine if the two lines might align + # well this is a very approximate test for this + && ( + + # RT#127633 - the leading tokens are not + # operators + ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] ) + + # or they are different + || ( $ibeg_3 >= 0 + && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) + ) + ); + + if ( + + # Recombine if we can make two lines + $nmax >= $n + 2 + + # -lp users often prefer this: + # my $title = function($env, $env, $sysarea, + # "bubba Borrower Entry"); + # so we will recombine if -lp is used we have + # ending comma + && !( + $ibeg_3 > 0 + && ref( $leading_spaces_to_go[$ibeg_3] ) + && $type_iend_2 eq ',' + ) + ) + { + + # otherwise, scan the rhs line up to last token for + # complexity. Note that we are not counting the last token + # in case it is an opening paren. + my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ); + return if ( !$ok ); + + } + } + + unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { + $forced_breakpoint_to_go[$iend_1] = 0; } - print STDERR "$n ($ibeg:$iend) $text\n"; } - print STDERR "----\n"; - return; - } ## end sub Debug_dump_breakpoints - sub delete_one_line_semicolons { + # for keywords.. + elsif ( $type_iend_1 eq 'k' ) { - my ( $self, $ri_beg, $ri_end ) = @_; - my $rLL = $self->[_rLL_]; - my $K_opening_container = $self->[_K_opening_container_]; + # make major control keywords stand out + # (recombine.t) + return + if ( - # Walk down the lines of this batch and delete any semicolons - # terminating one-line blocks; - my $nmax = @{$ri_end} - 1; + #/^(last|next|redo|return)$/ + $is_last_next_redo_return{ $tokens_to_go[$iend_1] } - foreach my $n ( 0 .. $nmax ) { - my $i_beg = $ri_beg->[$n]; - my $i_e = $ri_end->[$n]; - my $K_beg = $K_to_go[$i_beg]; - my $K_e = $K_to_go[$i_e]; - my $K_end = $K_e; - my $type_end = $rLL->[$K_end]->[_TYPE_]; - if ( $type_end eq '#' ) { - $K_end = $self->K_previous_nonblank($K_end); - if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; } + # but only if followed by multiple lines + && $n < $nmax + ); + + if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { + return + unless $want_break_before{ $tokens_to_go[$iend_1] }; } + } + elsif ( $type_iend_1 eq '.' ) { - # we are looking for a line ending in closing brace - next - unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' ); + # NOTE: the logic here should match that of section 3 so that + # line breaks are independent of choice of break before or after. + # It would be nice to combine them in section 0, but the + # special junction case ') .' makes that difficult. + # This section added to fix issues c172, c174. + my $i_next_nonblank = $ibeg_2; + my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] - + $summed_lengths_to_go[$ibeg_1]; + my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] - + $summed_lengths_to_go[$ibeg_2]; + my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) ); - # ...and preceded by a semicolon on the same line - my $K_semicolon = $self->K_previous_nonblank($K_end); - next unless defined($K_semicolon); - my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg ); - next if ( $i_semicolon <= $i_beg ); - next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' ); + return + unless ( - # Safety check - shouldn't happen - not critical - # This is not worth throwing a Fault, except in DEVEL_MODE - if ( $types_to_go[$i_semicolon] ne ';' ) { - DEVEL_MODE - && Fault("unexpected type looking for semicolon"); - next; + # ... unless there is just one and we can reduce + # this to two lines if we do. For example, this + # + # + # $bodyA .= + # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' + # + # looks better than this: + # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' . + # '$args .= $pat;' + + # check for 2 lines, not in a long broken '.' chain + ( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 ) + + # ... or this would strand a short quote , like this + # "some long quote" . + # "\n"; + || ( + $types_to_go[$i_next_nonblank] eq 'Q' + && $i_next_nonblank >= $iend_2 - 2 + && $token_lengths_to_go[$i_next_nonblank] < + $rOpts_short_concatenation_item_length + + # additional constraints to fix c167 + && ( $types_to_go[$iend_1_minus] ne 'Q' + || $summed_len_2 < $summed_len_1 ) + ) + ); + } + return ( 1, $skip_Section_3 ); + } ## end sub recombine_section_2 + + sub simple_rhs { + + my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_; + + # Scan line ibeg_2 to $iend_2 up to last token for complexity. + # We are not counting the last token in case it is an opening paren. + # Return: + # true if rhs is simple, ok to recombine + # false otherwise + + my $tv = 0; + my $depth = $nesting_depth_to_go[$ibeg_2]; + foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 1 ); } + $depth = $nesting_depth_to_go[$i]; + } - # ... with the corresponding opening brace on the same line - my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_]; - my $K_opening = $K_opening_container->{$type_sequence}; - next unless ( defined($K_opening) ); - my $i_opening = $i_beg + ( $K_opening - $K_beg ); - next if ( $i_opening < $i_beg ); + # ok to recombine if no level changes before + # last token + if ( $tv > 0 ) { - # ... and only one semicolon between these braces - my $semicolon_count = 0; - foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) { - if ( $rLL->[$K]->[_TYPE_] eq ';' ) { - $semicolon_count++; - last; + # otherwise, do not recombine if more than + # two level changes. + return if ( $tv > 1 ); + + # check total complexity of the two + # adjacent lines that will occur if we do + # this join + my $istop = + ( $n < $nmax ) + ? $ri_end->[ $n + 1 ] + : $iend_2; + foreach my $i ( $iend_2 .. $istop ) { + if ( $nesting_depth_to_go[$i] != $depth ) { + $tv++; + last if ( $tv > 2 ); } + $depth = $nesting_depth_to_go[$i]; } - next if ($semicolon_count); - # ...ok, then make the semicolon invisible - my $len = $token_lengths_to_go[$i_semicolon]; - $tokens_to_go[$i_semicolon] = EMPTY_STRING; - $token_lengths_to_go[$i_semicolon] = 0; - $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING; - $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0; - foreach ( $i_semicolon .. $max_index_to_go ) { - $summed_lengths_to_go[ $_ + 1 ] -= $len; - } + # do not recombine if total is more than 2 + # level changes + return if ( $tv > 2 ); } - return; - } ## end sub delete_one_line_semicolons + return 1; + } ## end sub simple_rhs - use constant DEBUG_RECOMBINE => 0; + sub recombine_section_3 { - sub recombine_breakpoints { + my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_; - # We are given indexes to the current lines: - # $ri_beg = ref to array of BEGinning indexes of each line - # $ri_end = ref to array of ENDing indexes of each line - my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_; + # Recombine Section 3: + # Examine token at $ibeg_2 (right end of first line of pair) - # sub break_long_lines is very liberal in setting line breaks - # for long lines, always setting breaks at good breakpoints, even - # when that creates small lines. Sometimes small line fragments - # are produced which would look better if they were combined. - # That's the task of this routine. + # Here are Indexes of the endpoint tokens of the two lines: + # + # -----line $n-1--- | -----line $n----- + # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 + # ^ + # | + # -----Section 3 looks at this token - # do nothing under extreme stress - return if ( $stress_level_alpha < 1 && !DEVEL_MODE ); + # Returns: + # (nothing) => do not join lines + # 1, bs_tweak => ok to join lines + + # $bstweak is a small tolerance to add to bond strengths + my $bs_tweak = 0; + + my $nmax = @{$ri_end} - 1; + my $ibeg_1 = $ri_beg->[ $n - 1 ]; + my $iend_1 = $ri_end->[ $n - 1 ]; + my $iend_2 = $ri_end->[$n]; + my $ibeg_2 = $ri_beg->[$n]; + + my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; + my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; + my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; + my $ibeg_nmax = $ri_beg->[$nmax]; + + my $type_iend_1 = $types_to_go[$iend_1]; + my $type_iend_2 = $types_to_go[$iend_2]; + my $type_ibeg_1 = $types_to_go[$ibeg_1]; + my $type_ibeg_2 = $types_to_go[$ibeg_2]; + + # handle lines with leading &&, || + if ( $is_amp_amp{$type_ibeg_2} ) { + + # ok to recombine if it follows a ? or : + # and is followed by an open paren.. + my $ok = + ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' ) + + # or is followed by a ? or : at same depth + # + # We are looking for something like this. We can + # recombine the && line with the line above to make the + # structure more clear: + # return + # exists $G->{Attr}->{V} + # && exists $G->{Attr}->{V}->{$u} + # ? %{ $G->{Attr}->{V}->{$u} } + # : (); + # + # We should probably leave something like this alone: + # return + # exists $G->{Attr}->{E} + # && exists $G->{Attr}->{E}->{$u} + # && exists $G->{Attr}->{E}->{$u}->{$v} + # ? %{ $G->{Attr}->{E}->{$u}->{$v} } + # : (); + # so that we either have all of the &&'s (or ||'s) + # on one line, as in the first example, or break at + # each one as in the second example. However, it + # sometimes makes things worse to check for this because + # it prevents multiple recombinations. So this is not done. + || ( $ibeg_3 >= 0 + && $is_ternary{ $types_to_go[$ibeg_3] } + && $nesting_depth_to_go[$ibeg_3] == + $nesting_depth_to_go[$ibeg_2] ); + + # Combine a trailing && term with an || term: fix for + # c060 This is rare but can happen. + $ok ||= 1 + if ( $ibeg_3 < 0 + && $type_ibeg_2 eq '&&' + && $type_ibeg_1 eq '||' + && $nesting_depth_to_go[$ibeg_2] == + $nesting_depth_to_go[$ibeg_1] ); + + return if !$ok && $want_break_before{$type_ibeg_2}; + $forced_breakpoint_to_go[$iend_1] = 0; + + # tweak the bond strength to give this joint priority + # over ? and : + $bs_tweak = 0.25; + } + + # Identify and recombine a broken ?/: chain + elsif ( $type_ibeg_2 eq '?' ) { + + # Do not recombine different levels + my $lev = $levels_to_go[$ibeg_2]; + return if ( $lev ne $levels_to_go[$ibeg_1] ); + + # Do not recombine a '?' if either next line or + # previous line does not start with a ':'. The reasons + # are that (1) no alignment of the ? will be possible + # and (2) the expression is somewhat complex, so the + # '?' is harder to see in the interior of the line. + my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; + my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; + return unless ( $follows_colon || $precedes_colon ); + + # we will always combining a ? line following a : line + if ( !$follows_colon ) { + + # ...otherwise recombine only if it looks like a + # chain. we will just look at a few nearby lines + # to see if this looks like a chain. + my $local_count = 0; + foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { + $local_count++ + if $ii >= 0 + && $types_to_go[$ii] eq ':' + && $levels_to_go[$ii] == $lev; + } + return unless ( $local_count > 1 ); + } + $forced_breakpoint_to_go[$iend_1] = 0; + } + + # do not recombine lines with leading '.' + elsif ( $type_ibeg_2 eq '.' ) { + my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); + my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] - + $summed_lengths_to_go[$ibeg_1]; + my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] - + $summed_lengths_to_go[$ibeg_2]; - my $rK_weld_right = $self->[_rK_weld_right_]; - my $rK_weld_left = $self->[_rK_weld_left_]; + return + unless ( - my $nmax_start = @{$ri_end} - 1; - return if ( $nmax_start <= 0 ); + # ... unless there is just one and we can reduce + # this to two lines if we do. For example, this + # + # + # $bodyA .= + # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' + # + # looks better than this: + # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' + # . '$args .= $pat;' - # Make a list of all good joining tokens between the lines - # n-1 and n. - my @joint; + ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 ) - # Break the total batch sub-sections with lengths short enough to - # recombine - my $rsections = []; - my $nbeg_sec = 0; - my $nend_sec; - my $nmax_section = 0; - foreach my $nn ( 1 .. $nmax_start ) { - my $ibeg_1 = $ri_beg->[ $nn - 1 ]; - my $iend_1 = $ri_end->[ $nn - 1 ]; - my $iend_2 = $ri_end->[$nn]; - my $ibeg_2 = $ri_beg->[$nn]; + # ... or this would strand a short quote , like this + # . "some long quote" + # . "\n"; + || ( + $types_to_go[$i_next_nonblank] eq 'Q' + && $i_next_nonblank >= $iend_2 - 1 + && $token_lengths_to_go[$i_next_nonblank] < + $rOpts_short_concatenation_item_length + + # additional constraints to fix c167 + && ( + $types_to_go[$iend_1] ne 'Q' + + # allow a term shorter than the previous term + || $summed_len_2 < $summed_len_1 + + # or allow a short semicolon-terminated term if this + # makes two lines (see c169) + || ( $n == 2 + && $n == $nmax + && $this_line_is_semicolon_terminated ) + ) + ) + ); + } + + # handle leading keyword.. + elsif ( $type_ibeg_2 eq 'k' ) { + + # handle leading "or" + if ( $tokens_to_go[$ibeg_2] eq 'or' ) { + return + unless ( + $this_line_is_semicolon_terminated + && ( + $type_ibeg_1 eq '}' + || ( + + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + # important: only combine a very simple + # or statement because the step below + # may have combined a trailing 'and' + # with this or, and we do not want to + # then combine everything together + && ( $iend_2 - $ibeg_2 <= 7 ) + ) + ) + ); + + #X: RT #81854 + $forced_breakpoint_to_go[$iend_1] = 0 + unless ( $old_breakpoint_to_go[$iend_1] ); + } + + # handle leading 'and' and 'xor' + elsif ($tokens_to_go[$ibeg_2] eq 'and' + || $tokens_to_go[$ibeg_2] eq 'xor' ) + { + + # Decide if we will combine a single terminal 'and' + # after an 'if' or 'unless'. + + # This looks best with the 'and' on the same + # line as the 'if': + # + # $a = 1 + # if $seconds and $nu < 2; + # + # But this looks better as shown: + # + # $a = 1 + # if !$this->{Parents}{$_} + # or $this->{Parents}{$_} eq $_; + # + return + unless ( + $this_line_is_semicolon_terminated + && ( - # Define the joint variable - my ( $itok, $itokp, $itokm ); - foreach my $itest ( $iend_1, $ibeg_2 ) { - my $type = $types_to_go[$itest]; - if ( $is_math_op{$type} - || $is_amp_amp{$type} - || $is_assignment{$type} - || $type eq ':' ) - { - $itok = $itest; - } + # following 'if' or 'unless' or 'or' + $type_ibeg_1 eq 'k' + && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } + || $tokens_to_go[$ibeg_1] eq 'or' ) + ) + ); } - $joint[$nn] = [$itok]; - # Update the section list - my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 ); - if ( - $excess <= 1 + # handle leading "if" and "unless" + elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { - # The number 5 here is an arbitrary small number intended - # to keep most small matches in one sub-section. - || ( defined($nend_sec) - && ( $nn < 5 || $nmax_start - $nn < 5 ) ) - ) - { - $nend_sec = $nn; + # Combine something like: + # next + # if ( $lang !~ /${l}$/i ); + # into: + # next if ( $lang !~ /${l}$/i ); + return + unless ( + $this_line_is_semicolon_terminated + + # previous line begins with 'and' or 'or' + && $type_ibeg_1 eq 'k' + && $is_and_or{ $tokens_to_go[$ibeg_1] } + + ); } + + # handle all other leading keywords else { - if ( defined($nend_sec) ) { - push @{$rsections}, [ $nbeg_sec, $nend_sec ]; - my $num = $nend_sec - $nbeg_sec; - if ( $num > $nmax_section ) { $nmax_section = $num } - $nbeg_sec = $nn; - $nend_sec = undef; + + # keywords look best at start of lines, + # but combine things like "1 while" + unless ( $is_assignment{$type_iend_1} ) { + return + if ( ( $type_iend_1 ne 'k' ) + && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); } - $nbeg_sec = $nn; } } - if ( defined($nend_sec) ) { - push @{$rsections}, [ $nbeg_sec, $nend_sec ]; - my $num = $nend_sec - $nbeg_sec; - if ( $num > $nmax_section ) { $nmax_section = $num } - } - my $num_sections = @{$rsections}; + # similar treatment of && and || as above for 'and' and + # 'or': NOTE: This block of code is currently bypassed + # because of a previous block but is retained for possible + # future use. + elsif ( $is_amp_amp{$type_ibeg_2} ) { - # This is potentially an O(n-squared) loop, but not critical, so we can - # put a finite limit on the total number of iterations. This is - # suggested by issue c118, which pushed about 5.e5 lines through here - # and caused an excessive run time. - - # Three lines of defense have been put in place to prevent excessive - # run times: - # 1. do nothing if formatting under stress (c118 was under stress) - # 2. break into small sub-sections to decrease the maximum n-squared. - # 3. put a finite limit on the number of iterations. - - # Testing shows that most batches only require one or two iterations. - # A very large batch which is broken into sub-sections can require one - # iteration per section. This suggests the limit here, which allows - # up to 10 iterations plus one pass per sub-section. - my $it_count = 0; - my $it_count_max = - 10 + int( 1000 / ( 1 + $nmax_section ) ) + $num_sections; + # maybe looking at something like: + # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; - if ( DEBUG_RECOMBINE > 1 ) { - my $max = 0; - print STDERR - "-----\n$num_sections sections found for nmax=$nmax_start\n"; - foreach my $sect ( @{$rsections} ) { - my ( $nbeg, $nend ) = @{$sect}; - my $num = $nend - $nbeg; - if ( $num > $max ) { $max = $num } - print STDERR "$nbeg $nend\n"; - } - print STDERR "max size=$max of $nmax_start lines\n"; + return + unless ( + $this_line_is_semicolon_terminated + + # previous line begins with an 'if' or 'unless' + # keyword + && $type_ibeg_1 eq 'k' + && $is_if_unless{ $tokens_to_go[$ibeg_1] } + + ); } - # Loop over all sub-sections. Note that we have to work backwards - # from the end of the batch since the sections use original line - # numbers, and the line numbers change as we go. - while ( my $section = pop @{$rsections} ) { - my ( $nbeg, $nend ) = @{$section}; + # handle line with leading = or similar + elsif ( $is_assignment{$type_ibeg_2} ) { + return unless ( $n == 1 || $n == $nmax ); + return if ( $old_breakpoint_to_go[$iend_1] ); + return + unless ( - # number of ending lines to leave untouched in this pass - my $nmax_sec = @{$ri_end} - 1; - my $num_freeze = $nmax_sec - $nend; + # unless we can reduce this to two lines + $nmax == 2 - my $more_to_do = 1; + # or three lines, the last with a leading semicolon + || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) - # We keep looping over all of the lines of this batch - # until there are no more possible recombinations - my $nmax_last = $nmax_sec + 1; - my $reverse = 0; + # or the next line ends with a here doc + || $type_iend_2 eq 'h' - while ($more_to_do) { + # or this is a short line ending in ; + || ( $n == $nmax + && $this_line_is_semicolon_terminated ) + ); + $forced_breakpoint_to_go[$iend_1] = 0; + } + return ( 1, $bs_tweak ); + } ## end sub recombine_section_3 - # Safety check for excess total iterations - $it_count++; - if ( $it_count > $it_count_max ) { - goto RETURN; - } +} ## end closure recombine_breakpoints - my $n_best = 0; - my $bs_best; - my $nmax = @{$ri_end} - 1; +sub insert_final_ternary_breaks { - # Safety check for infinite loop: the line count must decrease - unless ( $nmax < $nmax_last ) { + my ( $self, $ri_left, $ri_right ) = @_; - # Shouldn't happen because splice below decreases nmax on - # each iteration. An error can only be due to a recent - # programming change. We better stop here. - if (DEVEL_MODE) { - Fault( -"Program bug-infinite loop in recombine breakpoints\n" - ); - } - $more_to_do = 0; + # Called once per batch to look for and do any final line breaks for + # long ternary chains + + my $nmax = @{$ri_right} - 1; + + # scan the left and right end tokens of all lines + my $i_first_colon = -1; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + my $typel = $types_to_go[$il]; + my $typer = $types_to_go[$ir]; + return if ( $typel eq '?' ); + return if ( $typer eq '?' ); + if ( $typel eq ':' ) { $i_first_colon = $il; last; } + elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } + } + + # For long ternary chains, + # if the first : we see has its ? is in the interior + # of a preceding line, then see if there are any good + # breakpoints before the ?. + if ( $i_first_colon > 0 ) { + my $i_question = $mate_index_to_go[$i_first_colon]; + if ( defined($i_question) && $i_question > 0 ) { + my @insert_list; + foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) { + my $token = $tokens_to_go[$ii]; + my $type = $types_to_go[$ii]; + + # For now, a good break is either a comma or, + # in a long chain, a 'return'. + # Patch for RT #126633: added the $nmax>1 check to avoid + # breaking after a return for a simple ternary. For longer + # chains the break after return allows vertical alignment, so + # it is still done. So perltidy -wba='?' will not break + # immediately after the return in the following statement: + # sub x { + # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' : + # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'; + # } + if ( + ( + $type eq ',' + || $type eq 'k' && ( $nmax > 1 && $token eq 'return' ) + ) + && $self->in_same_container_i( $ii, $i_question ) + ) + { + push @insert_list, $ii; last; } - $nmax_last = $nmax; - $more_to_do = 0; - my $skip_Section_3; - my $leading_amp_count = 0; - my $this_line_is_semicolon_terminated; + } + + # insert any new break points + if (@insert_list) { + $self->insert_additional_breaks( \@insert_list, $ri_left, + $ri_right ); + } + } + } + return; +} ## end sub insert_final_ternary_breaks - # loop over all remaining lines in this batch - my $nstop = $nmax - $num_freeze; - for my $iter ( $nbeg + 1 .. $nstop ) { +sub insert_breaks_before_list_opening_containers { - # alternating sweep direction gives symmetric results - # for recombining lines which exceed the line length - # such as eval {{{{.... }}}} - my $n; - if ($reverse) { $n = $nbeg + 1 + $nstop - $iter; } - else { $n = $iter } + my ( $self, $ri_left, $ri_right ) = @_; - #---------------------------------------------------------- - # If we join the current pair of lines, - # line $n-1 will become the left part of the joined line - # line $n will become the right part of the joined line - # - # Here are Indexes of the endpoint tokens of the two lines: - # - # -----line $n-1--- | -----line $n----- - # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 - # ^ - # | - # We want to decide if we should remove the line break - # between the tokens at $iend_1 and $ibeg_2 - # - # We will apply a number of ad-hoc tests to see if joining - # here will look ok. The code will just issue a 'next' - # command if the join doesn't look good. If we get through - # the gauntlet of tests, the lines will be recombined. - #---------------------------------------------------------- - # - # beginning and ending tokens of the lines we are working on - my $ibeg_1 = $ri_beg->[ $n - 1 ]; - my $iend_1 = $ri_end->[ $n - 1 ]; - my $iend_2 = $ri_end->[$n]; - my $ibeg_2 = $ri_beg->[$n]; - my $ibeg_nmax = $ri_beg->[$nmax]; - - # combined line cannot be too long - my $excess = - $self->excess_line_length( $ibeg_1, $iend_2, 1 ); - next if ( $excess > 0 ); - - my $type_iend_1 = $types_to_go[$iend_1]; - my $type_iend_2 = $types_to_go[$iend_2]; - my $type_ibeg_1 = $types_to_go[$ibeg_1]; - my $type_ibeg_2 = $types_to_go[$ibeg_2]; - - # terminal token of line 2 if any side comment is ignored: - my $iend_2t = $iend_2; - my $type_iend_2t = $type_iend_2; - - # some beginning indexes of other lines, which may not exist - my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; - my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; - my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; - - my $bs_tweak = 0; - - #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] - - # $nesting_depth_to_go[$ibeg_1] ); - - DEBUG_RECOMBINE > 1 && do { - print STDERR -"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; - }; + # This routine is called once per batch to implement the parameters + # --break-before-hash-brace, etc. - # If line $n is the last line, we set some flags and - # do any special checks for it - if ( $n == $nmax ) { + # Nothing to do if none of these parameters has been set + return unless %break_before_container_types; - # a terminal '{' should stay where it is - # unless preceded by a fat comma - next if ( $type_ibeg_2 eq '{' && $type_iend_1 ne '=>' ); + my $nmax = @{$ri_right} - 1; + return unless ( $nmax >= 0 ); - if ( $type_iend_2 eq '#' - && $iend_2 - $ibeg_2 >= 2 - && $types_to_go[ $iend_2 - 1 ] eq 'b' ) - { - $iend_2t = $iend_2 - 2; - $type_iend_2t = $types_to_go[$iend_2t]; - } + my $rLL = $self->[_rLL_]; - $this_line_is_semicolon_terminated = - $type_iend_2t eq ';'; - } + my $rbreak_before_container_by_seqno = + $self->[_rbreak_before_container_by_seqno_]; + my $rK_weld_left = $self->[_rK_weld_left_]; - #---------------------------------------------------------- - # Recombine Section 0: - # Examine the special token joining this line pair, if any. - # Put as many tests in this section to avoid duplicate code - # and to make formatting independent of whether breaks are - # to the left or right of an operator. - #---------------------------------------------------------- + # scan the ends of all lines + my @insert_list; + for my $n ( 0 .. $nmax ) { + my $il = $ri_left->[$n]; + my $ir = $ri_right->[$n]; + next unless ( $ir > $il ); + my $Kl = $K_to_go[$il]; + my $Kr = $K_to_go[$ir]; + my $Kend = $Kr; + my $type_end = $rLL->[$Kr]->[_TYPE_]; + + # Backup before any side comment + if ( $type_end eq '#' ) { + $Kend = $self->K_previous_nonblank($Kr); + next unless defined($Kend); + $type_end = $rLL->[$Kend]->[_TYPE_]; + } - my ($itok) = @{ $joint[$n] }; - if ($itok) { + # Backup to the start of any weld; fix for b1173. + if ($total_weld_count) { + my $Kend_test = $rK_weld_left->{$Kend}; + if ( defined($Kend_test) && $Kend_test > $Kl ) { + $Kend = $Kend_test; + $Kend_test = $rK_weld_left->{$Kend}; + } - my $type = $types_to_go[$itok]; + # Do not break if we did not back up to the start of a weld + # (shouldn't happen) + next if ( defined($Kend_test) ); + } - if ( $type eq ':' ) { + my $token = $rLL->[$Kend]->[_TOKEN_]; + next unless ( $is_opening_token{$token} ); + next unless ( $Kl < $Kend - 1 ); - # do not join at a colon unless it disobeys the - # break request - if ( $itok eq $iend_1 ) { - next unless $want_break_before{$type}; - } - else { - $leading_amp_count++; - next if $want_break_before{$type}; - } - } ## end if ':' + my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_]; + next unless ( defined($seqno) ); + + # Use the flag which was previously set + next unless ( $rbreak_before_container_by_seqno->{$seqno} ); - # handle math operators + - * / - elsif ( $is_math_op{$type} ) { + # Install a break before this opening token. + my $Kbreak = $self->K_previous_nonblank($Kend); + my $ibreak = $Kbreak - $Kl + $il; + next if ( $ibreak < $il ); + next if ( $nobreak_to_go[$ibreak] ); + push @insert_list, $ibreak; + } - # Combine these lines if this line is a single - # number, or if it is a short term with same - # operator as the previous line. For example, in - # the following code we will combine all of the - # short terms $A, $B, $C, $D, $E, $F, together - # instead of leaving them one per line: - # my $time = - # $A * $B * $C * $D * $E * $F * - # ( 2. * $eps * $sigma * $area ) * - # ( 1. / $tcold**3 - 1. / $thot**3 ); + # insert any new break points + if (@insert_list) { + $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); + } + return; +} ## end sub insert_breaks_before_list_opening_containers - # This can be important in math-intensive code. +sub note_added_semicolon { + my ( $self, $line_number ) = @_; + $self->[_last_added_semicolon_at_] = $line_number; + if ( $self->[_added_semicolon_count_] == 0 ) { + $self->[_first_added_semicolon_at_] = $line_number; + } + $self->[_added_semicolon_count_]++; + write_logfile_entry("Added ';' here\n"); + return; +} ## end sub note_added_semicolon - my $good_combo; +sub note_deleted_semicolon { + my ( $self, $line_number ) = @_; + $self->[_last_deleted_semicolon_at_] = $line_number; + if ( $self->[_deleted_semicolon_count_] == 0 ) { + $self->[_first_deleted_semicolon_at_] = $line_number; + } + $self->[_deleted_semicolon_count_]++; + write_logfile_entry("Deleted unnecessary ';' at line $line_number\n"); + return; +} ## end sub note_deleted_semicolon - my $itokp = min( $inext_to_go[$itok], $iend_2 ); - my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); - my $itokm = max( $iprev_to_go[$itok], $ibeg_1 ); - my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 ); +sub note_embedded_tab { + my ( $self, $line_number ) = @_; + $self->[_embedded_tab_count_]++; + $self->[_last_embedded_tab_at_] = $line_number; + if ( !$self->[_first_embedded_tab_at_] ) { + $self->[_first_embedded_tab_at_] = $line_number; + } - # check for a number on the right - if ( $types_to_go[$itokp] eq 'n' ) { + if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) { + write_logfile_entry("Embedded tabs in quote or pattern\n"); + } + return; +} ## end sub note_embedded_tab - # ok if nothing else on right - if ( $itokp == $iend_2 ) { - $good_combo = 1; - } - else { - - # look one more token to right.. - # okay if math operator or some termination - $good_combo = - ( ( $itokpp == $iend_2 ) - && $is_math_op{ $types_to_go[$itokpp] - } ) - || $types_to_go[$itokpp] =~ /^[#,;]$/; - } - } +use constant DEBUG_CORRECT_LP => 0; - # check for a number on the left - if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { +sub correct_lp_indentation { - # okay if nothing else to left - if ( $itokm == $ibeg_1 ) { - $good_combo = 1; - } + # When the -lp option is used, we need to make a last pass through + # each line to correct the indentation positions in case they differ + # from the predictions. This is necessary because perltidy uses a + # predictor/corrector method for aligning with opening parens. The + # predictor is usually good, but sometimes stumbles. The corrector + # tries to patch things up once the actual opening paren locations + # are known. + my ( $self, $ri_first, $ri_last ) = @_; - # otherwise look one more token to left - else { + # first remove continuation indentation if appropriate + my $max_line = @{$ri_first} - 1; - # okay if math operator, comma, or assignment - $good_combo = ( $itokmm == $ibeg_1 ) - && ( $is_math_op{ $types_to_go[$itokmm] } - || $types_to_go[$itokmm] =~ /^[,]$/ - || $is_assignment{ $types_to_go[$itokmm] - } ); - } - } + #--------------------------------------------------------------------------- + # PASS 1: reduce indentation if necessary at any long one-line blocks (c098) + #--------------------------------------------------------------------------- - # look for a single short token either side of the - # operator - if ( !$good_combo ) { + # The point is that sub 'starting_one_line_block' made one-line blocks based + # on default indentation, not -lp indentation. So some of the one-line + # blocks may be too long when given -lp indentation. We will fix that now + # if possible, using the list of these closing block indexes. + my $ri_starting_one_line_block = + $self->[_this_batch_]->[_ri_starting_one_line_block_]; + if ( @{$ri_starting_one_line_block} ) { + $self->correct_lp_indentation_pass_1( $ri_first, $ri_last, + $ri_starting_one_line_block ); + } - # Slight adjustment factor to make results - # independent of break before or after operator - # in long summed lists. (An operator and a - # space make two spaces). - my $two = ( $itok eq $iend_1 ) ? 2 : 0; + #------------------------------------------------------------------- + # PASS 2: look for and fix other problems in each line of this batch + #------------------------------------------------------------------- - $good_combo = + # look at each output line ... + foreach my $line ( 0 .. $max_line ) { + my $ibeg = $ri_first->[$line]; + my $iend = $ri_last->[$line]; - # numbers or id's on both sides of this joint - $types_to_go[$itokp] =~ /^[in]$/ - && $types_to_go[$itokm] =~ /^[in]$/ + # looking at each token in this output line ... + foreach my $i ( $ibeg .. $iend ) { - # one of the two lines must be short: - && ( - ( - # no more than 2 nonblank tokens right - # of joint - $itokpp == $iend_2 - - # short - && token_sequence_length( - $itokp, $iend_2 - ) < $two + - $rOpts_short_concatenation_item_length - ) - || ( - # no more than 2 nonblank tokens left of - # joint - $itokmm == $ibeg_1 - - # short - && token_sequence_length( - $ibeg_1, $itokm - ) < 2 - $two + - $rOpts_short_concatenation_item_length - ) + # How many space characters to place before this token + # for special alignment. Actual padding is done in the + # continue block. - ) + # looking for next unvisited indentation item ... + my $indentation = $leading_spaces_to_go[$i]; - # keep pure terms; don't mix +- with */ - && !( - $is_plus_minus{$type} - && ( $is_mult_div{ $types_to_go[$itokmm] } - || $is_mult_div{ $types_to_go[$itokpp] } - ) - ) - && !( - $is_mult_div{$type} - && ( $is_plus_minus{ $types_to_go[$itokmm] } - || $is_plus_minus{ $types_to_go[$itokpp] - } ) - ) + # This is just for indentation objects (c098) + next unless ( ref($indentation) ); - ; - } + # Visit each indentation object just once + next if ( $indentation->get_marked() ); - # it is also good to combine if we can reduce to 2 - # lines - if ( !$good_combo ) { + # Mark first visit + $indentation->set_marked(1); - # index on other line where same token would be - # in a long chain. - my $iother = - ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; + # Skip indentation objects which do not align with container tokens + my $align_seqno = $indentation->get_align_seqno(); + next unless ($align_seqno); - $good_combo = - $n == 2 - && $n == $nmax - && $types_to_go[$iother] ne $type; - } + # Skip a container which is entirely on this line + my $Ko = $self->[_K_opening_container_]->{$align_seqno}; + my $Kc = $self->[_K_closing_container_]->{$align_seqno}; + if ( defined($Ko) && defined($Kc) ) { + next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] ); + } - next unless ($good_combo); + # Note on flag '$do_not_pad': + # We want to avoid a situation like this, where the aligner + # inserts whitespace before the '=' to align it with a previous + # '=', because otherwise the parens might become mis-aligned in a + # situation like this, where the '=' has become aligned with the + # previous line, pushing the opening '(' forward beyond where we + # want it. + # + # $mkFloor::currentRoom = ''; + # $mkFloor::c_entry = $c->Entry( + # -width => '10', + # -relief => 'sunken', + # ... + # ); + # + # We leave it to the aligner to decide how to do this. + if ( $line == 1 && $i == $ibeg ) { + $self->[_this_batch_]->[_do_not_pad_] = 1; + } - } ## end math + #-------------------------------------------- + # Now see what the error is and try to fix it + #-------------------------------------------- + my $closing_index = $indentation->get_closed(); + my $predicted_pos = $indentation->get_spaces(); - elsif ( $is_amp_amp{$type} ) { - ##TBD - } ## end &&, || + # Find actual position: + my $actual_pos; - elsif ( $is_assignment{$type} ) { - ##TBD - } ## end assignment - } + if ( $i == $ibeg ) { - #---------------------------------------------------------- - # Recombine Section 1: - # Join welded nested containers immediately - #---------------------------------------------------------- + # Case 1: token is first character of of batch - table lookup + if ( $line == 0 ) { - if ( - $total_weld_count - && ( $type_sequence_to_go[$iend_1] - && defined( $rK_weld_right->{ $K_to_go[$iend_1] } ) - || $type_sequence_to_go[$ibeg_2] - && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) ) - ) - { - $n_best = $n; - last; - } + $actual_pos = $predicted_pos; - $reverse = 0; + my ( $indent, $offset, $is_leading, $exists ) = + get_saved_opening_indentation($align_seqno); + if ( defined($indent) ) { - #---------------------------------------------------------- - # Recombine Section 2: - # Examine token at $iend_1 (right end of first line of pair) - #---------------------------------------------------------- + # NOTE: we could use '1' here if no space after + # opening and '2' if want space; it is hardwired at 1 + # like -gnu-style. But it is probably best to leave + # this alone because changing it would change + # formatting of much existing code without any + # significant benefit. + $actual_pos = get_spaces($indent) + $offset + 1; + } + } - # an isolated '}' may join with a ';' terminated segment - if ( $type_iend_1 eq '}' ) { - - # Check for cases where combining a semicolon terminated - # statement with a previous isolated closing paren will - # allow the combined line to be outdented. This is - # generally a good move. For example, we can join up - # the last two lines here: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) - # = stat($file); - # - # to get: - # ( - # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, - # $size, $atime, $mtime, $ctime, $blksize, $blocks - # ) = stat($file); - # - # which makes the parens line up. - # - # Another example, from Joe Matarazzo, probably looks best - # with the 'or' clause appended to the trailing paren: - # $self->some_method( - # PARAM1 => 'foo', - # PARAM2 => 'bar' - # ) or die "Some_method didn't work"; - # - # But we do not want to do this for something like the -lp - # option where the paren is not outdentable because the - # trailing clause will be far to the right. - # - # The logic here is synchronized with the logic in sub - # sub final_indentation_adjustment, which actually does - # the outdenting. - # - $skip_Section_3 ||= $this_line_is_semicolon_terminated - - # only one token on last line - && $ibeg_1 == $iend_1 - - # must be structural paren - && $tokens_to_go[$iend_1] eq ')' - - # style must allow outdenting, - && !$closing_token_indentation{')'} - - # only leading '&&', '||', and ':' if no others seen - # (but note: our count made below could be wrong - # due to intervening comments) - && ( $leading_amp_count == 0 - || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ ) - - # but leading colons probably line up with a - # previous colon or question (count could be wrong). - && $type_ibeg_2 ne ':' - - # only one step in depth allowed. this line must not - # begin with a ')' itself. - && ( $nesting_depth_to_go[$iend_1] == - $nesting_depth_to_go[$iend_2] + 1 ); - - # YVES patch 2 of 2: - # Allow cuddled eval chains, like this: - # eval { - # #STUFF; - # 1; # return true - # } or do { - # #handle error - # }; - # This patch works together with a patch in - # setting adjusted indentation (where the closing eval - # brace is outdented if possible). - # The problem is that an 'eval' block has continuation - # indentation and it looks better to undo it in some - # cases. If we do not use this patch we would get: - # eval { - # #STUFF; - # 1; # return true - # } - # or do { - # #handle error - # }; - # The alternative, for uncuddled style, is to create - # a patch in final_indentation_adjustment which undoes - # the indentation of a leading line like 'or do {'. - # This doesn't work well with -icb through - if ( - $block_type_to_go[$iend_1] eq 'eval' - && !ref( $leading_spaces_to_go[$iend_1] ) - && !$rOpts_indent_closing_brace - && $tokens_to_go[$iend_2] eq '{' - && ( - ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ ) - || ( $type_ibeg_2 eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_2] } ) - || $is_if_unless{ $tokens_to_go[$ibeg_2] } - ) - ) - { - $skip_Section_3 ||= 1; - } + # Case 2: token starts a new line - use length of previous line + else { - next - unless ( - $skip_Section_3 + my $ibegm = $ri_first->[ $line - 1 ]; + my $iendm = $ri_last->[ $line - 1 ]; + $actual_pos = total_line_length( $ibegm, $iendm ); - # handle '.' and '?' specially below - || ( $type_ibeg_2 =~ /^[\.\?]$/ ) + # follow -pt style + ++$actual_pos + if ( $types_to_go[ $iendm + 1 ] eq 'b' ); - # fix for c054 (unusual -pbp case) - || $type_ibeg_2 eq '==' + } + } - ); - } + # Case 3: $i>$ibeg: token is mid-line - use length to previous token + else { - elsif ( $type_iend_1 eq '{' ) { + $actual_pos = total_line_length( $ibeg, $i - 1 ); - # YVES - # honor breaks at opening brace - # Added to prevent recombining something like this: - # } || eval { package main; - next if $forced_breakpoint_to_go[$iend_1]; + # for mid-line token, we must check to see if all + # additional lines have continuation indentation, + # and remove it if so. Otherwise, we do not get + # good alignment. + if ( $closing_index > $iend ) { + my $ibeg_next = $ri_first->[ $line + 1 ]; + if ( $ci_levels_to_go[$ibeg_next] > 0 ) { + $self->undo_lp_ci( $line, $i, $closing_index, + $ri_first, $ri_last ); } + } + } - # do not recombine lines with ending &&, ||, - elsif ( $is_amp_amp{$type_iend_1} ) { - next unless $want_break_before{$type_iend_1}; - } + # By how many spaces (plus or minus) would we need to increase the + # indentation to get alignment with the opening token? + my $move_right = $actual_pos - $predicted_pos; - # Identify and recombine a broken ?/: chain - elsif ( $type_iend_1 eq '?' ) { + if (DEBUG_CORRECT_LP) { + my $tok = substr( $tokens_to_go[$i], 0, 8 ); + my $avail = $self->get_available_spaces_to_go($ibeg); + print +"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n"; + } - # Do not recombine different levels - next - if ( - $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); + # nothing more to do if no error to correct (gnu2.t) + if ( $move_right == 0 ) { + $indentation->set_recoverable_spaces($move_right); + next; + } - # do not recombine unless next line ends in : - next unless $type_iend_2 eq ':'; - } + # Get any collapsed length defined for -xlp + my $collapsed_length = + $self->[_rcollapsed_length_by_seqno_]->{$align_seqno}; + $collapsed_length = 0 unless ( defined($collapsed_length) ); - # for lines ending in a comma... - elsif ( $type_iend_1 eq ',' ) { + if (DEBUG_CORRECT_LP) { + print +"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n"; + } - # Do not recombine at comma which is following the - # input bias. - # TODO: might be best to make a special flag - next if ( $old_breakpoint_to_go[$iend_1] ); + # if we have not seen closure for this indentation in this batch, + # and do not have a collapsed length estimate, we can only pass on + # a request to the vertical aligner + if ( $closing_index < 0 && !$collapsed_length ) { + $indentation->set_recoverable_spaces($move_right); + next; + } - # An isolated '},' may join with an identifier + ';' - # This is useful for the class of a 'bless' statement - # (bless.t) - if ( $type_ibeg_1 eq '}' - && $type_ibeg_2 eq 'i' ) - { - next - unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) - && ( $iend_2 == ( $ibeg_2 + 1 ) ) - && $this_line_is_semicolon_terminated ); + # If necessary, look ahead to see if there is really any leading + # whitespace dependent on this whitespace, and also find the + # longest line using this whitespace. Since it is always safe to + # move left if there are no dependents, we only need to do this if + # we may have dependent nodes or need to move right. - # override breakpoint - $forced_breakpoint_to_go[$iend_1] = 0; - } + my $have_child = $indentation->get_have_child(); + my %saw_indentation; + my $line_count = 1; + $saw_indentation{$indentation} = $indentation; - # but otherwise .. - else { + # How far can we move right before we hit the limit? + # let $right_margen = the number of spaces that we can increase + # the current indentation before hitting the maximum line length. + my $right_margin = 0; - # do not recombine after a comma unless this will - # leave just 1 more line - next unless ( $n + 1 >= $nmax ); + if ( $have_child || $move_right > 0 ) { + $have_child = 0; - # do not recombine if there is a change in - # indentation depth - next - if ( $levels_to_go[$iend_1] != - $levels_to_go[$iend_2] ); - - # do not recombine a "complex expression" after a - # comma. "complex" means no parens. - my $saw_paren; - foreach my $ii ( $ibeg_2 .. $iend_2 ) { - if ( $tokens_to_go[$ii] eq '(' ) { - $saw_paren = 1; - last; - } - } - next if $saw_paren; - } - } + # include estimated collapsed length for incomplete containers + my $max_length = 0; + if ( $Kc > $K_to_go[$max_index_to_go] ) { + $max_length = $collapsed_length + $predicted_pos; + } - # opening paren.. - elsif ( $type_iend_1 eq '(' ) { + if ( $i == $ibeg ) { + my $length = total_line_length( $ibeg, $iend ); + if ( $length > $max_length ) { $max_length = $length } + } - # No longer doing this - } + # look ahead at the rest of the lines of this batch.. + foreach my $line_t ( $line + 1 .. $max_line ) { + my $ibeg_t = $ri_first->[$line_t]; + my $iend_t = $ri_last->[$line_t]; + last if ( $closing_index <= $ibeg_t ); - elsif ( $type_iend_1 eq ')' ) { + # remember all different indentation objects + my $indentation_t = $leading_spaces_to_go[$ibeg_t]; + $saw_indentation{$indentation_t} = $indentation_t; + $line_count++; - # No longer doing this + # remember longest line in the group + my $length_t = total_line_length( $ibeg_t, $iend_t ); + if ( $length_t > $max_length ) { + $max_length = $length_t; } + } - # keep a terminal for-semicolon - elsif ( $type_iend_1 eq 'f' ) { - next; - } + $right_margin = + $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] - + $max_length; + if ( $right_margin < 0 ) { $right_margin = 0 } + } - # if '=' at end of line ... - elsif ( $is_assignment{$type_iend_1} ) { + my $first_line_comma_count = + grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; + my $comma_count = $indentation->get_comma_count(); + my $arrow_count = $indentation->get_arrow_count(); - # keep break after = if it was in input stream - # this helps prevent 'blinkers' - next - if ( - $old_breakpoint_to_go[$iend_1] + # This is a simple approximate test for vertical alignment: + # if we broke just after an opening paren, brace, bracket, + # and there are 2 or more commas in the first line, + # and there are no '=>'s, + # then we are probably vertically aligned. We could set + # an exact flag in sub break_lists, but this is good + # enough. + my $indentation_count = keys %saw_indentation; + my $is_vertically_aligned = + ( $i == $ibeg + && $first_line_comma_count > 1 + && $indentation_count == 1 + && ( $arrow_count == 0 || $arrow_count == $line_count ) ); - # don't strand an isolated '=' - && $iend_1 != $ibeg_1 - ); + # Make the move if possible .. + if ( - my $is_short_quote = - ( $type_ibeg_2 eq 'Q' - && $ibeg_2 == $iend_2 - && token_sequence_length( $ibeg_2, $ibeg_2 ) < - $rOpts_short_concatenation_item_length ); - my $is_ternary = ( - $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0 - && $types_to_go[$ibeg_3] eq ':' ) - ); + # we can always move left + $move_right < 0 - # always join an isolated '=', a short quote, or if this - # will put ?/: at start of adjacent lines - if ( $ibeg_1 != $iend_1 - && !$is_short_quote - && !$is_ternary ) - { - next - unless ( - ( + # -xlp - # unless we can reduce this to two lines - $nmax < $n + 2 + # incomplete container + || ( $rOpts_extended_line_up_parentheses + && $Kc > $K_to_go[$max_index_to_go] ) + || $closing_index < 0 - # or three lines, the last with a leading - # semicolon - || ( $nmax == $n + 2 - && $types_to_go[$ibeg_nmax] eq ';' ) + # but we should only move right if we are sure it will + # not spoil vertical alignment + || ( $comma_count == 0 ) + || ( $comma_count > 0 && !$is_vertically_aligned ) + ) + { + my $move = + ( $move_right <= $right_margin ) + ? $move_right + : $right_margin; - # or the next line ends with a here doc - || $type_iend_2 eq 'h' + if (DEBUG_CORRECT_LP) { + print + "CORRECT_LP for seq=$align_seqno, moving $move spaces\n"; + } - # or the next line ends in an open paren or - # brace and the break hasn't been forced - # [dima.t] - || ( !$forced_breakpoint_to_go[$iend_1] - && $type_iend_2 eq '{' ) - ) + foreach ( keys %saw_indentation ) { + $saw_indentation{$_} + ->permanently_decrease_available_spaces( -$move ); + } + } - # do not recombine if the two lines might align - # well this is a very approximate test for this - && ( + # Otherwise, record what we want and the vertical aligner + # will try to recover it. + else { + $indentation->set_recoverable_spaces($move_right); + } + } ## end loop over tokens in a line + } ## end loop over lines + return; +} ## end sub correct_lp_indentation - # RT#127633 - the leading tokens are not - # operators - ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] ) +sub correct_lp_indentation_pass_1 { + my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_; - # or they are different - || ( $ibeg_3 >= 0 - && $type_ibeg_2 ne - $types_to_go[$ibeg_3] ) - ) - ); + # So some of the one-line blocks may be too long when given -lp + # indentation. We will fix that now if possible, using the list of these + # closing block indexes. - if ( + my @ilist = @{$ri_starting_one_line_block}; + return unless (@ilist); - # Recombine if we can make two lines - $nmax >= $n + 2 - - # -lp users often prefer this: - # my $title = function($env, $env, $sysarea, - # "bubba Borrower Entry"); - # so we will recombine if -lp is used we have - # ending comma - && !( - $ibeg_3 > 0 - && ref( $leading_spaces_to_go[$ibeg_3] ) - && $type_iend_2 eq ',' - ) - ) - { + my $max_line = @{$ri_first} - 1; + my $inext = shift(@ilist); - # otherwise, scan the rhs line up to last token - # for complexity. Note that we are not - # counting the last token in case it is an - # opening paren. - my $tv = 0; - my $depth = $nesting_depth_to_go[$ibeg_2]; - foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { - if ( $nesting_depth_to_go[$i] != $depth ) { - $tv++; - last if ( $tv > 1 ); - } - $depth = $nesting_depth_to_go[$i]; - } + # loop over lines, checking length of each with a one-line block + my ( $ibeg, $iend ); + foreach my $line ( 0 .. $max_line ) { + $iend = $ri_last->[$line]; + next if ( $inext > $iend ); + $ibeg = $ri_first->[$line]; - # ok to recombine if no level changes before - # last token - if ( $tv > 0 ) { - - # otherwise, do not recombine if more than - # two level changes. - next if ( $tv > 1 ); - - # check total complexity of the two - # adjacent lines that will occur if we do - # this join - my $istop = - ( $n < $nmax ) - ? $ri_end->[ $n + 1 ] - : $iend_2; - foreach my $i ( $iend_2 .. $istop ) { - if ( - $nesting_depth_to_go[$i] != $depth ) - { - $tv++; - last if ( $tv > 2 ); - } - $depth = $nesting_depth_to_go[$i]; - } - - # do not recombine if total is more than 2 - # level changes - next if ( $tv > 2 ); - } - } - } + # This is just for lines with indentation objects (c098) + my $excess = + ref( $leading_spaces_to_go[$ibeg] ) + ? $self->excess_line_length( $ibeg, $iend ) + : 0; - unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) { - $forced_breakpoint_to_go[$iend_1] = 0; - } - } + if ( $excess > 0 ) { + my $available_spaces = $self->get_available_spaces_to_go($ibeg); - # for keywords.. - elsif ( $type_iend_1 eq 'k' ) { + if ( $available_spaces > 0 ) { + my $delete_want = min( $available_spaces, $excess ); + my $deleted_spaces = + $self->reduce_lp_indentation( $ibeg, $delete_want ); + $available_spaces = $self->get_available_spaces_to_go($ibeg); + } + } - # make major control keywords stand out - # (recombine.t) - next - if ( + # skip forward to next one-line block to check + while (@ilist) { + $inext = shift @ilist; + next if ( $inext <= $iend ); + last if ( $inext > $iend ); + } + last if ( $inext <= $iend ); + } + return; +} ## end sub correct_lp_indentation_pass_1 - #/^(last|next|redo|return)$/ - $is_last_next_redo_return{ $tokens_to_go[$iend_1] } +sub undo_lp_ci { - # but only if followed by multiple lines - && $n < $nmax - ); + # If there is a single, long parameter within parens, like this: + # + # $self->command( "/msg " + # . $infoline->chan + # . " You said $1, but did you know that it's square was " + # . $1 * $1 . " ?" ); + # + # we can remove the continuation indentation of the 2nd and higher lines + # to achieve this effect, which is more pleasing: + # + # $self->command("/msg " + # . $infoline->chan + # . " You said $1, but did you know that it's square was " + # . $1 * $1 . " ?"); - if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { - next - unless $want_break_before{ $tokens_to_go[$iend_1] - }; - } - } + my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = + @_; + my $max_line = @{$ri_first} - 1; - #---------------------------------------------------------- - # Recombine Section 3: - # Examine token at $ibeg_2 (left end of second line of pair) - #---------------------------------------------------------- + # must be multiple lines + return unless $max_line > $line_open; - # join lines identified above as capable of - # causing an outdented line with leading closing paren - # Note that we are skipping the rest of this section - # and the rest of the loop to do the join - if ($skip_Section_3) { - $forced_breakpoint_to_go[$iend_1] = 0; - $n_best = $n; - last; - } + my $lev_start = $levels_to_go[$i_start]; + my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; - # handle lines with leading &&, || - elsif ( $is_amp_amp{$type_ibeg_2} ) { + # see if all additional lines in this container have continuation + # indentation + my $line_1 = 1 + $line_open; + my $n = $line_open; - $leading_amp_count++; + while ( ++$n <= $max_line ) { + my $ibeg = $ri_first->[$n]; + my $iend = $ri_last->[$n]; + if ( $ibeg eq $closing_index ) { $n--; last } + return if ( $lev_start != $levels_to_go[$ibeg] ); + return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); + last if ( $closing_index <= $iend ); + } - # ok to recombine if it follows a ? or : - # and is followed by an open paren.. - my $ok = - ( $is_ternary{$type_ibeg_1} - && $tokens_to_go[$iend_2] eq '(' ) + # we can reduce the indentation of all continuation lines + my $continuation_line_count = $n - $line_open; + @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = + (0) x ($continuation_line_count); + @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = + @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ]; + return; +} ## end sub undo_lp_ci - # or is followed by a ? or : at same depth - # - # We are looking for something like this. We can - # recombine the && line with the line above to make the - # structure more clear: - # return - # exists $G->{Attr}->{V} - # && exists $G->{Attr}->{V}->{$u} - # ? %{ $G->{Attr}->{V}->{$u} } - # : (); - # - # We should probably leave something like this alone: - # return - # exists $G->{Attr}->{E} - # && exists $G->{Attr}->{E}->{$u} - # && exists $G->{Attr}->{E}->{$u}->{$v} - # ? %{ $G->{Attr}->{E}->{$u}->{$v} } - # : (); - # so that we either have all of the &&'s (or ||'s) - # on one line, as in the first example, or break at - # each one as in the second example. However, it - # sometimes makes things worse to check for this because - # it prevents multiple recombinations. So this is not done. - || ( $ibeg_3 >= 0 - && $is_ternary{ $types_to_go[$ibeg_3] } - && $nesting_depth_to_go[$ibeg_3] == - $nesting_depth_to_go[$ibeg_2] ); - - # Combine a trailing && term with an || term: fix for - # c060 This is rare but can happen. - $ok ||= 1 - if ( $ibeg_3 < 0 - && $type_ibeg_2 eq '&&' - && $type_ibeg_1 eq '||' - && $nesting_depth_to_go[$ibeg_2] == - $nesting_depth_to_go[$ibeg_1] ); - - next if !$ok && $want_break_before{$type_ibeg_2}; - $forced_breakpoint_to_go[$iend_1] = 0; - - # tweak the bond strength to give this joint priority - # over ? and : - $bs_tweak = 0.25; - } +############################################### +# CODE SECTION 10: Code to break long statments +############################################### - # Identify and recombine a broken ?/: chain - elsif ( $type_ibeg_2 eq '?' ) { - - # Do not recombine different levels - my $lev = $levels_to_go[$ibeg_2]; - next if ( $lev ne $levels_to_go[$ibeg_1] ); - - # Do not recombine a '?' if either next line or - # previous line does not start with a ':'. The reasons - # are that (1) no alignment of the ? will be possible - # and (2) the expression is somewhat complex, so the - # '?' is harder to see in the interior of the line. - my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; - my $precedes_colon = - $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; - next unless ( $follows_colon || $precedes_colon ); - - # we will always combining a ? line following a : line - if ( !$follows_colon ) { - - # ...otherwise recombine only if it looks like a - # chain. we will just look at a few nearby lines - # to see if this looks like a chain. - my $local_count = 0; - foreach - my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) - { - $local_count++ - if $ii >= 0 - && $types_to_go[$ii] eq ':' - && $levels_to_go[$ii] == $lev; - } - next unless ( $local_count > 1 ); - } - $forced_breakpoint_to_go[$iend_1] = 0; - } +use constant DEBUG_BREAK_LINES => 0; - # do not recombine lines with leading '.' - elsif ( $type_ibeg_2 eq '.' ) { - my $i_next_nonblank = - min( $inext_to_go[$ibeg_2], $iend_2 ); - next - unless ( - - # ... unless there is just one and we can reduce - # this to two lines if we do. For example, this - # - # - # $bodyA .= - # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' - # - # looks better than this: - # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' - # . '$args .= $pat;' - - ( - $n == 2 - && $n == $nmax - && $type_ibeg_1 ne $type_ibeg_2 - ) +sub break_long_lines { - # ... or this would strand a short quote , like this - # . "some long quote" - # . "\n"; + #----------------------------------------------------------- + # Break a batch of tokens into lines which do not exceed the + # maximum line length. + #----------------------------------------------------------- - || ( $types_to_go[$i_next_nonblank] eq 'Q' - && $i_next_nonblank >= $iend_2 - 1 - && $token_lengths_to_go[$i_next_nonblank] < - $rOpts_short_concatenation_item_length ) - ); - } + my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_; - # handle leading keyword.. - elsif ( $type_ibeg_2 eq 'k' ) { + # Input parameters: + # $saw_good_break - a flag set by break_lists + # $rcolon_list - ref to a list of all the ? and : tokens in the batch, + # in order. + # $rbond_strength_bias - small bond strength bias values set by break_lists - # handle leading "or" - if ( $tokens_to_go[$ibeg_2] eq 'or' ) { - next - unless ( - $this_line_is_semicolon_terminated - && ( - $type_ibeg_1 eq '}' - || ( - - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] - } - - # important: only combine a very simple - # or statement because the step below - # may have combined a trailing 'and' - # with this or, and we do not want to - # then combine everything together - && ( $iend_2 - $ibeg_2 <= 7 ) - ) - ) - ); + # Output: returns references to the arrays: + # @i_first + # @i_last + # which contain the indexes $i of the first and last tokens on each + # line. - #X: RT #81854 - $forced_breakpoint_to_go[$iend_1] = 0 - unless ( $old_breakpoint_to_go[$iend_1] ); - } + # In addition, the array: + # $forced_breakpoint_to_go[$i] + # may be updated to be =1 for any index $i after which there must be + # a break. This signals later routines not to undo the breakpoint. - # handle leading 'and' and 'xor' - elsif ($tokens_to_go[$ibeg_2] eq 'and' - || $tokens_to_go[$ibeg_2] eq 'xor' ) - { + # Method: + # This routine is called if a statement is longer than the maximum line + # length, or if a preliminary scanning located desirable break points. + # Sub break_lists has already looked at these tokens and set breakpoints + # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for + # example after commas, after opening parens, and before closing parens). + # This routine will honor these breakpoints and also add additional + # breakpoints as necessary to keep the line length below the maximum + # requested. It bases its decision on where the 'bond strength' is + # lowest. - # Decide if we will combine a single terminal 'and' - # after an 'if' or 'unless'. - - # This looks best with the 'and' on the same - # line as the 'if': - # - # $a = 1 - # if $seconds and $nu < 2; - # - # But this looks better as shown: - # - # $a = 1 - # if !$this->{Parents}{$_} - # or $this->{Parents}{$_} eq $_; - # - next - unless ( - $this_line_is_semicolon_terminated - && ( + my @i_first = (); # the first index to output + my @i_last = (); # the last index to output + my @i_colon_breaks = (); # needed to decide if we have to break at ?'s + if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } - # following 'if' or 'unless' or 'or' - $type_ibeg_1 eq 'k' - && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } - || $tokens_to_go[$ibeg_1] eq 'or' ) - ) - ); - } + # Get the 'bond strengths' between tokens + my $rbond_strength_to_go = $self->set_bond_strengths(); - # handle leading "if" and "unless" - elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { + # Add any comma bias set by break_lists + if ( @{$rbond_strength_bias} ) { + foreach my $item ( @{$rbond_strength_bias} ) { + my ( $ii, $bias ) = @{$item}; + if ( $ii >= 0 && $ii <= $max_index_to_go ) { + $rbond_strength_to_go->[$ii] += $bias; + } + elsif (DEVEL_MODE) { + my $KK = $K_to_go[0]; + my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; + Fault( +"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n" + ); + } + } + } - # Combine something like: - # next - # if ( $lang !~ /${l}$/i ); - # into: - # next if ( $lang !~ /${l}$/i ); - next - unless ( - $this_line_is_semicolon_terminated + my $imin = 0; + my $imax = $max_index_to_go; + if ( $types_to_go[$imin] eq 'b' ) { $imin++ } + if ( $types_to_go[$imax] eq 'b' ) { $imax-- } - # previous line begins with 'and' or 'or' - && $type_ibeg_1 eq 'k' - && $is_and_or{ $tokens_to_go[$ibeg_1] } + my $i_begin = $imin; + my $last_break_strength = NO_BREAK; + my $i_last_break = -1; + my $line_count = 0; - ); - } + # see if any ?/:'s are in order + my $colons_in_order = 1; + my $last_tok = EMPTY_STRING; + foreach ( @{$rcolon_list} ) { + if ( $_ eq $last_tok ) { $colons_in_order = 0; last } + $last_tok = $_; + } - # handle all other leading keywords - else { + # This is a sufficient but not necessary condition for colon chain + my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 ); - # keywords look best at start of lines, - # but combine things like "1 while" - unless ( $is_assignment{$type_iend_1} ) { - next - if ( ( $type_iend_1 ne 'k' ) - && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); - } - } - } + #------------------------------------------ + # BEGINNING of main loop to set breakpoints + # Keep iterating until we reach the end + #------------------------------------------ + while ( $i_begin <= $imax ) { - # similar treatment of && and || as above for 'and' and - # 'or': NOTE: This block of code is currently bypassed - # because of a previous block but is retained for possible - # future use. - elsif ( $is_amp_amp{$type_ibeg_2} ) { + #------------------------------------------------------------------ + # Find the best next breakpoint based on token-token bond strengths + #------------------------------------------------------------------ + my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) = + $self->break_lines_inner_loop( - # maybe looking at something like: - # unless $TEXTONLY || $item =~ m%|p>|a|img)%i; + $i_begin, + $i_last_break, + $imax, + $last_break_strength, + $line_count, + $rbond_strength_to_go, + $saw_good_break, - next - unless ( - $this_line_is_semicolon_terminated + ); - # previous line begins with an 'if' or 'unless' - # keyword - && $type_ibeg_1 eq 'k' - && $is_if_unless{ $tokens_to_go[$ibeg_1] } + # Now make any adjustments required by ternary breakpoint rules + if ( @{$rcolon_list} ) { - ); - } + my $i_next_nonblank = $inext_to_go[$i_lowest]; - # handle line with leading = or similar - elsif ( $is_assignment{$type_ibeg_2} ) { - next unless ( $n == 1 || $n == $nmax ); - next if ( $old_breakpoint_to_go[$iend_1] ); - next - unless ( + #------------------------------------------------------- + # ?/: rule 1 : if a break here will separate a '?' on this + # line from its closing ':', then break at the '?' instead. + # But do not break a sequential chain of ?/: statements + #------------------------------------------------------- + if ( !$is_colon_chain ) { + foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { + next unless ( $tokens_to_go[$i] eq '?' ); - # unless we can reduce this to two lines - $nmax == 2 + # do not break if statement is broken by side comment + next + if ( $tokens_to_go[$max_index_to_go] eq '#' + && terminal_type_i( 0, $max_index_to_go ) !~ + /^[\;\}]$/ ); - # or three lines, the last with a leading semicolon - || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) + # no break needed if matching : is also on the line + next + if ( defined( $mate_index_to_go[$i] ) + && $mate_index_to_go[$i] <= $i_next_nonblank ); - # or the next line ends with a here doc - || $type_iend_2 eq 'h' + $i_lowest = $i; + if ( $want_break_before{'?'} ) { $i_lowest-- } + $i_next_nonblank = $inext_to_go[$i_lowest]; + last; + } + } - # or this is a short line ending in ; - || ( $n == $nmax - && $this_line_is_semicolon_terminated ) - ); - $forced_breakpoint_to_go[$iend_1] = 0; - } + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - #---------------------------------------------------------- - # Recombine Section 4: - # Combine the lines if we arrive here and it is possible - #---------------------------------------------------------- + #------------------------------------------------------------- + # ?/: rule 2 : if we break at a '?', then break at its ':' + # + # Note: this rule is also in sub break_lists to handle a break + # at the start and end of a line (in case breaks are dictated + # by side comments). + #------------------------------------------------------------- + if ( $next_nonblank_type eq '?' ) { + $self->set_closing_breakpoint($i_next_nonblank); + } + elsif ( $types_to_go[$i_lowest] eq '?' ) { + $self->set_closing_breakpoint($i_lowest); + } - # honor hard breakpoints - next if ( $forced_breakpoint_to_go[$iend_1] > 0 ); + #-------------------------------------------------------- + # ?/: rule 3 : if we break at a ':' then we save + # its location for further work below. We may need to go + # back and break at its '?'. + #-------------------------------------------------------- + if ( $next_nonblank_type eq ':' ) { + push @i_colon_breaks, $i_next_nonblank; + } + elsif ( $types_to_go[$i_lowest] eq ':' ) { + push @i_colon_breaks, $i_lowest; + } - my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak; + # here we should set breaks for all '?'/':' pairs which are + # separated by this line + } - # Require a few extra spaces before recombining lines if we are - # at an old breakpoint unless this is a simple list or terminal - # line. The goal is to avoid oscillating between two - # quasi-stable end states. For example this snippet caused - # problems: -## my $this = -## bless { -## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" -## }, -## $type; - next - if ( $old_breakpoint_to_go[$iend_1] - && !$this_line_is_semicolon_terminated - && $n < $nmax - && $excess + 4 > 0 - && $type_iend_2 ne ',' ); - - # do not recombine if we would skip in indentation levels - if ( $n < $nmax ) { - my $if_next = $ri_beg->[ $n + 1 ]; - next - if ( - $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] - && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] - - # but an isolated 'if (' is undesirable - && !( - $n == 1 - && $iend_1 - $ibeg_1 <= 2 - && $type_ibeg_1 eq 'k' - && $tokens_to_go[$ibeg_1] eq 'if' - && $tokens_to_go[$iend_1] ne '(' - ) - ); - } + # guard against infinite loop (should never happen) + if ( $i_lowest <= $i_last_break ) { + DEVEL_MODE + && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n"); + $i_lowest = $imax; + } - # honor no-break's - ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257 + DEBUG_BREAK_LINES + && print STDOUT +"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n"; - # remember the pair with the greatest bond strength - if ( !$n_best ) { - $n_best = $n; - $bs_best = $bs; - } - else { + $line_count++; - if ( $bs > $bs_best ) { - $n_best = $n; - $bs_best = $bs; - } - } - } + # save this line segment, after trimming blanks at the ends + push( @i_first, + ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); + push( @i_last, + ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); - # recombine the pair with the greatest bond strength - if ($n_best) { - splice @{$ri_beg}, $n_best, 1; - splice @{$ri_end}, $n_best - 1, 1; - splice @joint, $n_best, 1; + # set a forced breakpoint at a container opening, if necessary, to + # signal a break at a closing container. Excepting '(' for now. + if ( + ( + $tokens_to_go[$i_lowest] eq '{' + || $tokens_to_go[$i_lowest] eq '[' + ) + && !$forced_breakpoint_to_go[$i_lowest] + ) + { + $self->set_closing_breakpoint($i_lowest); + } - # keep going if we are still making progress - $more_to_do++; - } - } # end iteration loop + # get ready to find the next breakpoint + $last_break_strength = $lowest_strength; + $i_last_break = $i_lowest; + $i_begin = $i_lowest + 1; - } # end loop over sections + # skip past a blank + if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { + $i_begin++; + } + } - RETURN: + #------------------------------------------------- + # END of main loop to set continuation breakpoints + #------------------------------------------------- - if (DEBUG_RECOMBINE) { - my $nmax_last = @{$ri_end} - 1; - print STDERR -"exiting recombine with $nmax_last lines, starting lines=$nmax_start, iterations=$it_count, max_it=$it_count_max numsec=$num_sections\n"; - } - return; - } ## end sub recombine_breakpoints -} ## end closure recombine_breakpoints + #----------------------------------------------------------- + # ?/: rule 4 -- if we broke at a ':', then break at + # corresponding '?' unless this is a chain of ?: expressions + #----------------------------------------------------------- + if (@i_colon_breaks) { + my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); + if ( !$is_chain ) { + $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last ); + } + } -sub insert_final_ternary_breaks { + return ( \@i_first, \@i_last, $rbond_strength_to_go ); +} ## end sub break_long_lines - my ( $self, $ri_left, $ri_right ) = @_; +# small bond strength numbers to help break ties +use constant TINY_BIAS => 0.0001; +use constant MAX_BIAS => 0.001; - # Called once per batch to look for and do any final line breaks for - # long ternary chains +sub break_lines_inner_loop { - my $nmax = @{$ri_right} - 1; + #----------------------------------------------------------------- + # Find the best next breakpoint in index range ($i_begin .. $imax) + # which, if possible, does not exceed the maximum line length. + #----------------------------------------------------------------- - # scan the left and right end tokens of all lines - my $count = 0; - my $i_first_colon = -1; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - my $typel = $types_to_go[$il]; - my $typer = $types_to_go[$ir]; - return if ( $typel eq '?' ); - return if ( $typer eq '?' ); - if ( $typel eq ':' ) { $i_first_colon = $il; last; } - elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; } - } + my ( + $self, # - # For long ternary chains, - # if the first : we see has its ? is in the interior - # of a preceding line, then see if there are any good - # breakpoints before the ?. - if ( $i_first_colon > 0 ) { - my $i_question = $mate_index_to_go[$i_first_colon]; - if ( $i_question > 0 ) { - my @insert_list; - foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) { - my $token = $tokens_to_go[$ii]; - my $type = $types_to_go[$ii]; + $i_begin, + $i_last_break, + $imax, + $last_break_strength, + $line_count, + $rbond_strength_to_go, + $saw_good_break, - # For now, a good break is either a comma or, - # in a long chain, a 'return'. - # Patch for RT #126633: added the $nmax>1 check to avoid - # breaking after a return for a simple ternary. For longer - # chains the break after return allows vertical alignment, so - # it is still done. So perltidy -wba='?' will not break - # immediately after the return in the following statement: - # sub x { - # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' : - # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'; - # } - if ( - ( - $type eq ',' - || $type eq 'k' && ( $nmax > 1 && $token eq 'return' ) - ) - && $self->in_same_container_i( $ii, $i_question ) - ) - { - push @insert_list, $ii; - last; - } - } + ) = @_; - # insert any new break points - if (@insert_list) { - $self->insert_additional_breaks( \@insert_list, $ri_left, - $ri_right ); - } + # Given: + # $i_begin = first index of range + # $i_last_break = index of previous break + # $imax = last index of range + # $last_break_strength = bond strength of last break + # $line_count = number of output lines so far + # $rbond_strength_to_go = ref to array of bond strengths + # $saw_good_break = true if old line had a good breakpoint + + # Returns: + # $i_lowest = index of best breakpoint + # $lowest_strength = 'bond strength' at best breakpoint + # $leading_alignment_type = special token type after break + # $Msg = string of debug info + + my $Msg = EMPTY_STRING; + my $strength = NO_BREAK; + my $i_test = $i_begin - 1; + my $i_lowest = -1; + my $starting_sum = $summed_lengths_to_go[$i_begin]; + my $lowest_strength = NO_BREAK; + my $leading_alignment_type = EMPTY_STRING; + my $leading_spaces = leading_spaces_to_go($i_begin); + my $maximum_line_length = + $maximum_line_length_at_level[ $levels_to_go[$i_begin] ]; + DEBUG_BREAK_LINES + && do { + $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n"; + }; + + # Do not separate an isolated bare word from an opening paren. + # Alternate Fix #2 for issue b1299. This waits as long as possible + # to make the decision. + if ( $types_to_go[$i_begin] eq 'i' + && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ ) + { + my $i_next_nonblank = $inext_to_go[$i_begin]; + if ( $tokens_to_go[$i_next_nonblank] eq '(' ) { + $rbond_strength_to_go->[$i_begin] = NO_BREAK; } } - return; -} ## end sub insert_final_ternary_breaks -sub insert_breaks_before_list_opening_containers { + # Avoid a break which would strand a single punctuation + # token. For example, we do not want to strand a leading + # '.' which is followed by a long quoted string. + # But note that we do want to do this with -extrude (l=1) + # so please test any changes to this code on -extrude. + if ( + ( $i_begin < $imax ) + && ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] ) + && !$forced_breakpoint_to_go[$i_begin] + && !( + + # Allow break after a closing eval brace. This is an + # approximate way to simulate a forced breakpoint made in + # Section B below. No differences have been found, but if + # necessary the full logic of Section B could be used here + # (see c165). + $tokens_to_go[$i_begin] eq '}' + && $block_type_to_go[$i_begin] + && $block_type_to_go[$i_begin] eq 'eval' + ) + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_begin + 1 ] - + $starting_sum + ) < $maximum_line_length + ) + ) + { + $i_test = min( $imax, $inext_to_go[$i_begin] ) - 1; + DEBUG_BREAK_LINES && do { + $Msg .= " :skip ahead at i=$i_test"; + }; + } - my ( $self, $ri_left, $ri_right ) = @_; + #------------------------------------------------------- + # Begin INNER_LOOP over the indexes in the _to_go arrays + #------------------------------------------------------- + while ( ++$i_test <= $imax ) { + my $type = $types_to_go[$i_test]; + my $token = $tokens_to_go[$i_test]; + my $i_next_nonblank = $inext_to_go[$i_test]; + my $next_nonblank_type = $types_to_go[$i_next_nonblank]; + my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - # This routine is called once per batch to implement the parameters - # --break-before-hash-brace, etc. + #--------------------------------------------------------------- + # Section A: Get token-token strength and handle any adjustments + #--------------------------------------------------------------- - # Nothing to do if none of these parameters has been set - return unless %break_before_container_types; + # adjustments to the previous bond strength may have been made, and + # we must keep the bond strength of a token and its following blank + # the same; + my $last_strength = $strength; + $strength = $rbond_strength_to_go->[$i_test]; + if ( $type eq 'b' ) { $strength = $last_strength } - my $nmax = @{$ri_right} - 1; - return unless ( $nmax >= 0 ); + # reduce strength a bit to break ties at an old comma breakpoint ... + if ( - my $rLL = $self->[_rLL_]; + $old_breakpoint_to_go[$i_test] - my $rbreak_before_container_by_seqno = - $self->[_rbreak_before_container_by_seqno_]; - my $rK_weld_left = $self->[_rK_weld_left_]; + # Patch: limited to just commas to avoid blinking states + && $type eq ',' - # scan the ends of all lines - my @insert_list; - for my $n ( 0 .. $nmax ) { - my $il = $ri_left->[$n]; - my $ir = $ri_right->[$n]; - next unless ( $ir > $il ); - my $Kl = $K_to_go[$il]; - my $Kr = $K_to_go[$ir]; - my $Kend = $Kr; - my $type_end = $rLL->[$Kr]->[_TYPE_]; + # which is a 'good' breakpoint, meaning ... + # we don't want to break before it + && !$want_break_before{$type} - # Backup before any side comment - if ( $type_end eq '#' ) { - $Kend = $self->K_previous_nonblank($Kr); - next unless defined($Kend); - $type_end = $rLL->[$Kend]->[_TYPE_]; + # and either we want to break before the next token + # or the next token is not short (i.e. not a '*', '/' etc.) + && $i_next_nonblank <= $imax + && ( $want_break_before{$next_nonblank_type} + || $token_lengths_to_go[$i_next_nonblank] > 2 + || $next_nonblank_type eq ',' + || $is_opening_type{$next_nonblank_type} ) + ) + { + $strength -= TINY_BIAS; + DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" }; } - # Backup to the start of any weld; fix for b1173. - if ($total_weld_count) { - my $Kend_test = $rK_weld_left->{$Kend}; - if ( defined($Kend_test) && $Kend_test > $Kl ) { - $Kend = $Kend_test; - $Kend_test = $rK_weld_left->{$Kend}; + # otherwise increase strength a bit if this token would be at the + # maximum line length. This is necessary to avoid blinking + # in the above example when the -iob flag is added. + else { + my $len = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 1 ] - + $starting_sum; + if ( $len >= $maximum_line_length ) { + $strength += TINY_BIAS; + DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" }; } - - # Do not break if we did not back up to the start of a weld - # (shouldn't happen) - next if ( defined($Kend_test) ); } - my $token = $rLL->[$Kend]->[_TOKEN_]; - next unless ( $is_opening_token{$token} ); - next unless ( $Kl < $Kend - 1 ); - - my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_]; - next unless ( defined($seqno) ); + #------------------------------------- + # Section B: Handle forced breakpoints + #------------------------------------- + my $must_break; - # Use the flag which was previously set - next unless ( $rbreak_before_container_by_seqno->{$seqno} ); + # Force an immediate break at certain operators + # with lower level than the start of the line, + # unless we've already seen a better break. + # + # Note on an issue with a preceding '?' : - # Install a break before this opening token. - my $Kbreak = $self->K_previous_nonblank($Kend); - my $ibreak = $Kbreak - $Kl + $il; - next if ( $ibreak < $il ); - next if ( $nobreak_to_go[$ibreak] ); - push @insert_list, $ibreak; - } + # There may be a break at a previous ? if the line is long. Because + # of this we do not want to force a break if there is a previous ? on + # this line. For now the best way to do this is to not break if we + # have seen a lower strength point, which is probably a ?. + # + # Example of unwanted breaks we are avoiding at a '.' following a ? + # from pod2html using perltidy -gnu: + # ) + # ? "\n<A NAME=\"" + # . $value + # . "\">\n$text</A>\n" + # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; + if ( + ( $strength <= $lowest_strength ) + && ( $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_next_nonblank] ) + && ( + $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ + || ( + $next_nonblank_type eq 'k' - # insert any new break points - if (@insert_list) { - $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); - } - return; -} ## end sub insert_breaks_before_list_opening_containers + ## /^(and|or)$/ # note: includes 'xor' now + && $is_and_or{$next_nonblank_token} + ) + ) + ) + { + $self->set_forced_breakpoint($i_next_nonblank); + DEBUG_BREAK_LINES + && do { $Msg .= " :Forced break at i=$i_next_nonblank" }; + } -sub note_added_semicolon { - my ( $self, $line_number ) = @_; - $self->[_last_added_semicolon_at_] = $line_number; - if ( $self->[_added_semicolon_count_] == 0 ) { - $self->[_first_added_semicolon_at_] = $line_number; - } - $self->[_added_semicolon_count_]++; - write_logfile_entry("Added ';' here\n"); - return; -} ## end sub note_added_semicolon + if ( -sub note_deleted_semicolon { - my ( $self, $line_number ) = @_; - $self->[_last_deleted_semicolon_at_] = $line_number; - if ( $self->[_deleted_semicolon_count_] == 0 ) { - $self->[_first_deleted_semicolon_at_] = $line_number; - } - $self->[_deleted_semicolon_count_]++; - write_logfile_entry("Deleted unnecessary ';' at line $line_number\n"); - return; -} ## end sub note_deleted_semicolon + # Try to put a break where requested by break_lists + $forced_breakpoint_to_go[$i_test] -sub note_embedded_tab { - my ( $self, $line_number ) = @_; - $self->[_embedded_tab_count_]++; - $self->[_last_embedded_tab_at_] = $line_number; - if ( !$self->[_first_embedded_tab_at_] ) { - $self->[_first_embedded_tab_at_] = $line_number; - } + # break between ) { in a continued line so that the '{' can + # be outdented + # See similar logic in break_lists which catches instances + # where a line is just something like ') {'. We have to + # be careful because the corresponding block keyword might + # not be on the first line, such as 'for' here: + # + # eval { + # for ("a") { + # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } + # } + # }; + # + || ( + $line_count + && ( $token eq ')' ) + && ( $next_nonblank_type eq '{' ) + && ($next_nonblank_block_type) + && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) - if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) { - write_logfile_entry("Embedded tabs in quote or pattern\n"); - } - return; -} ## end sub note_embedded_tab + # RT #104427: Dont break before opening sub brace because + # sub block breaks handled at higher level, unless + # it looks like the preceding list is long and broken + && !( -use constant DEBUG_CORRECT_LP => 0; + ( + $next_nonblank_block_type =~ /$SUB_PATTERN/ + || $next_nonblank_block_type =~ /$ASUB_PATTERN/ + ) + && ( $nesting_depth_to_go[$i_begin] == + $nesting_depth_to_go[$i_next_nonblank] ) + ) -sub correct_lp_indentation { + && !$rOpts_opening_brace_always_on_right + ) - # When the -lp option is used, we need to make a last pass through - # each line to correct the indentation positions in case they differ - # from the predictions. This is necessary because perltidy uses a - # predictor/corrector method for aligning with opening parens. The - # predictor is usually good, but sometimes stumbles. The corrector - # tries to patch things up once the actual opening paren locations - # are known. - my ( $self, $ri_first, $ri_last ) = @_; - my $K_opening_container = $self->[_K_opening_container_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $do_not_pad = 0; - - # Note on flag '$do_not_pad': - # We want to avoid a situation like this, where the aligner inserts - # whitespace before the '=' to align it with a previous '=', because - # otherwise the parens might become mis-aligned in a situation like - # this, where the '=' has become aligned with the previous line, - # pushing the opening '(' forward beyond where we want it. - # - # $mkFloor::currentRoom = ''; - # $mkFloor::c_entry = $c->Entry( - # -width => '10', - # -relief => 'sunken', - # ... - # ); - # - # We leave it to the aligner to decide how to do this. + # There is an implied forced break at a terminal opening brace + || ( ( $type eq '{' ) && ( $i_test == $imax ) ) + ) + { - # first remove continuation indentation if appropriate - my $rLL = $self->[_rLL_]; - my $max_line = @{$ri_first} - 1; + # Forced breakpoints must sometimes be overridden, for example + # because of a side comment causing a NO_BREAK. It is easier + # to catch this here than when they are set. + if ( $strength < NO_BREAK - 1 ) { + $strength = $lowest_strength - TINY_BIAS; + $must_break = 1; + DEBUG_BREAK_LINES + && do { $Msg .= " :set must_break at i=$i_next_nonblank" }; + } + } - #--------------------------------------------------------------------------- - # PASS 1: reduce indentation if necessary at any long one-line blocks (c098) - #--------------------------------------------------------------------------- + # quit if a break here would put a good terminal token on + # the next line and we already have a possible break + if ( + ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) + && !$must_break + && ( + ( + $leading_spaces + + $summed_lengths_to_go[ $i_next_nonblank + 1 ] - + $starting_sum + ) > $maximum_line_length + ) + ) + { + if ( $i_lowest >= 0 ) { + DEBUG_BREAK_LINES && do { + $Msg .= " :quit at good terminal='$next_nonblank_type'"; + }; + last; + } + } - # The point is that sub 'starting_one_line_block' made one-line blocks based - # on default indentation, not -lp indentation. So some of the one-line - # blocks may be too long when given -lp indentation. We will fix that now - # if possible, using the list of these closing block indexes. - my $ri_starting_one_line_block = - $self->[_this_batch_]->[_ri_starting_one_line_block_]; - if ( @{$ri_starting_one_line_block} ) { - my @ilist = @{$ri_starting_one_line_block}; - my $inext = shift(@ilist); + #------------------------------------------------------------ + # Section C: Look for the lowest bond strength between tokens + #------------------------------------------------------------ + if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) { - # loop over lines, checking length of each with a one-line block - my ( $ibeg, $iend ); - foreach my $line ( 0 .. $max_line ) { - $iend = $ri_last->[$line]; - next if ( $inext > $iend ); - $ibeg = $ri_first->[$line]; + # break at previous best break if it would have produced + # a leading alignment of certain common tokens, and it + # is different from the latest candidate break + if ($leading_alignment_type) { + DEBUG_BREAK_LINES && do { + $Msg .= + " :last at leading_alignment='$leading_alignment_type'"; + }; + last; + } - # This is just for lines with indentation objects (c098) - my $excess = - ref( $leading_spaces_to_go[$ibeg] ) - ? $self->excess_line_length( $ibeg, $iend ) - : 0; + # Force at least one breakpoint if old code had good + # break It is only called if a breakpoint is required or + # desired. This will probably need some adjustments + # over time. A goal is to try to be sure that, if a new + # side comment is introduced into formatted text, then + # the same breakpoints will occur. scbreak.t + if ( + $i_test == $imax # we are at the end + && !$forced_breakpoint_count + && $saw_good_break # old line had good break + && $type =~ /^[#;\{]$/ # and this line ends in + # ';' or side comment + && $i_last_break < 0 # and we haven't made a break + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $imax - 1 # (but not just before this ;) + && $strength - $lowest_strength < 0.5 * WEAK # and it's good + ) + { - if ( $excess > 0 ) { - my $available_spaces = $self->get_available_spaces_to_go($ibeg); + DEBUG_BREAK_LINES && do { + $Msg .= " :last at good old break\n"; + }; + last; + } - if ( $available_spaces > 0 ) { - my $delete_want = min( $available_spaces, $excess ); - my $deleted_spaces = - $self->reduce_lp_indentation( $ibeg, $delete_want ); - $available_spaces = - $self->get_available_spaces_to_go($ibeg); + # Do not skip past an important break point in a short final + # segment. For example, without this check we would miss the + # break at the final / in the following code: + # + # $depth_stop = + # ( $tau * $mass_pellet * $q_0 * + # ( 1. - exp( -$t_stop / $tau ) ) - + # 4. * $pi * $factor * $k_ice * + # ( $t_melt - $t_ice ) * + # $r_pellet * + # $t_stop ) / + # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); + # + if ( + $line_count > 2 + && $i_lowest >= 0 # and we saw a possible break + && $i_lowest < $i_test + && $i_test > $imax - 2 + && $nesting_depth_to_go[$i_begin] > + $nesting_depth_to_go[$i_lowest] + && $lowest_strength < $last_break_strength - .5 * WEAK + ) + { + # Make this break for math operators for now + my $ir = $inext_to_go[$i_lowest]; + my $il = iprev_to_go($ir); + if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ + || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ) + { + DEBUG_BREAK_LINES && do { + $Msg .= " :last-noskip_short"; + }; + last; } } - # skip forward to next one-line block to check - while (@ilist) { - $inext = shift @ilist; - next if ( $inext <= $iend ); - last if ( $inext > $iend ); + # Update the minimum bond strength location + $lowest_strength = $strength; + $i_lowest = $i_test; + if ($must_break) { + DEBUG_BREAK_LINES && do { + $Msg .= " :last-must_break"; + }; + last; } - last if ( $inext <= $iend ); - } - } - - #------------------------------------------------------------------- - # PASS 2: look for and fix other problems in each line of this batch - #------------------------------------------------------------------- - - # look at each output line ... - my ( $ibeg, $iend ); - foreach my $line ( 0 .. $max_line ) { - $ibeg = $ri_first->[$line]; - $iend = $ri_last->[$line]; - # looking at each token in this output line ... - foreach my $i ( $ibeg .. $iend ) { + # set flags to remember if a break here will produce a + # leading alignment of certain common tokens + if ( $line_count > 0 + && $i_test < $imax + && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) ) + { + my $i_last_end = iprev_to_go($i_begin); + my $tok_beg = $tokens_to_go[$i_begin]; + my $type_beg = $types_to_go[$i_begin]; + if ( - # How many space characters to place before this token - # for special alignment. Actual padding is done in the - # continue block. + # check for leading alignment of certain tokens + ( + $tok_beg eq $next_nonblank_token + && $is_chain_operator{$tok_beg} + && ( $type_beg eq 'k' + || $type_beg eq $tok_beg ) + && $nesting_depth_to_go[$i_begin] >= + $nesting_depth_to_go[$i_next_nonblank] + ) - # looking for next unvisited indentation item ... - my $indentation = $leading_spaces_to_go[$i]; + || ( $tokens_to_go[$i_last_end] eq $token + && $is_chain_operator{$token} + && ( $type eq 'k' || $type eq $token ) + && $nesting_depth_to_go[$i_last_end] >= + $nesting_depth_to_go[$i_test] ) + ) + { + $leading_alignment_type = $next_nonblank_type; + } + } + } - # This is just for indentation objects (c098) - next unless ( ref($indentation) ); + #----------------------------------------------------------- + # Section D: See if the maximum line length will be exceeded + #----------------------------------------------------------- - # Visit each indentation object just once - next if ( $indentation->get_marked() ); + # Quit if there are no more tokens to test + last if ( $i_test >= $imax ); - # Mark first visit - $indentation->set_marked(1); + # Keep going if we have not reached the limit + my $excess = + $leading_spaces + + $summed_lengths_to_go[ $i_test + 2 ] - + $starting_sum - + $maximum_line_length; - # Skip indentation objects which do not align with container tokens - my $align_seqno = $indentation->get_align_seqno(); - next unless ($align_seqno); + if ( $excess < 0 ) { + next; + } + elsif ( $excess == 0 ) { - # Skip a container which is entirely on this line - my $Ko = $K_opening_container->{$align_seqno}; - my $Kc = $K_closing_container->{$align_seqno}; - if ( defined($Ko) && defined($Kc) ) { - next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] ); + # To prevent blinkers we will avoid leaving a token exactly at + # the line length limit unless it is the last token or one of + # several "good" types. + # + # The following code was a blinker with -pbp before this + # modification: + # $last_nonblank_token eq '(' + # && $is_indirect_object_taker{ $paren_type + # [$paren_depth] } + # The issue causing the problem is that if the + # term [$paren_depth] gets broken across a line then + # the whitespace routine doesn't see both opening and closing + # brackets and will format like '[ $paren_depth ]'. This + # leads to an oscillation in length depending if we break + # before the closing bracket or not. + if ( $i_test + 1 < $imax + && $next_nonblank_type ne ',' + && !$is_closing_type{$next_nonblank_type} ) + { + # too long + DEBUG_BREAK_LINES && do { + $Msg .= " :too_long"; + } } - - if ( $line == 1 && $i == $ibeg ) { - $do_not_pad = 1; + else { + next; } + } + else { + # too long + } - #-------------------------------------------- - # Now see what the error is and try to fix it - #-------------------------------------------- - my $closing_index = $indentation->get_closed(); - my $predicted_pos = $indentation->get_spaces(); - - # Find actual position: - my $actual_pos; + # a break here makes the line too long ... - if ( $i == $ibeg ) { + DEBUG_BREAK_LINES && do { + my $ltok = $token; + my $rtok = + $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING; + my $i_testp2 = $i_test + 2; + if ( $i_testp2 > $max_index_to_go + 1 ) { + $i_testp2 = $max_index_to_go + 1; + } + if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } + if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } + print STDOUT +"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n"; + }; - # Case 1: token is first character of of batch - table lookup - if ( $line == 0 ) { + # Exception: allow one extra terminal token after exceeding line length + # if it would strand this token. + if ( $i_lowest == $i_test + && $token_lengths_to_go[$i_test] > 1 + && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) + && $rOpts_fuzzy_line_length ) + { + DEBUG_BREAK_LINES && do { + $Msg .= " :do_not_strand next='$next_nonblank_type'"; + }; + next; + } - $actual_pos = $predicted_pos; + # Stop if here if we have a solution and the line will be too long + if ( $i_lowest >= 0 ) { + DEBUG_BREAK_LINES && do { + $Msg .= +" :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax"; + }; + last; + } + } - my ( $indent, $offset, $is_leading, $exists ) = - get_saved_opening_indentation($align_seqno); - if ( defined($indent) ) { + #----------------------------------------------------- + # End INNER_LOOP over the indexes in the _to_go arrays + #----------------------------------------------------- - # FIXME: should use '1' here if no space after opening - # and '2' if want space; hardwired at 1 like -gnu-style - $actual_pos = get_spaces($indent) + $offset + 1; - } - } + # Be sure we return an index in the range ($ibegin .. $imax). + # We will break at imax if no other break was found. + if ( $i_lowest < 0 ) { $i_lowest = $imax } - # Case 2: token starts a new line - use length of previous line - else { + return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ); +} ## end sub break_lines_inner_loop - my $ibegm = $ri_first->[ $line - 1 ]; - my $iendm = $ri_last->[ $line - 1 ]; - $actual_pos = total_line_length( $ibegm, $iendm ); +sub do_colon_breaks { + my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_; - # follow -pt style - ++$actual_pos - if ( $types_to_go[ $iendm + 1 ] eq 'b' ); + # using a simple method for deciding if we are in a ?/: chain -- + # this is a chain if it has multiple ?/: pairs all in order; + # otherwise not. + # Note that if line starts in a ':' we count that above as a break - } + my @insert_list = (); + foreach ( @{$ri_colon_breaks} ) { + my $i_question = $mate_index_to_go[$_]; + if ( defined($i_question) ) { + if ( $want_break_before{'?'} ) { + $i_question = iprev_to_go($i_question); } - # Case 3: $i>$ibeg: token is mid-line - use length to previous token - else { - - $actual_pos = total_line_length( $ibeg, $i - 1 ); - - # for mid-line token, we must check to see if all - # additional lines have continuation indentation, - # and remove it if so. Otherwise, we do not get - # good alignment. - if ( $closing_index > $iend ) { - my $ibeg_next = $ri_first->[ $line + 1 ]; - if ( $ci_levels_to_go[$ibeg_next] > 0 ) { - $self->undo_lp_ci( $line, $i, $closing_index, - $ri_first, $ri_last ); - } - } + if ( $i_question >= 0 ) { + push @insert_list, $i_question; } + } + $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last ); + } + return; +} ## end sub do_colon_breaks - # By how many spaces (plus or minus) would we need to increase the - # indentation to get alignment with the opening token? - my $move_right = $actual_pos - $predicted_pos; - - if (DEBUG_CORRECT_LP) { - my $tok = substr( $tokens_to_go[$i], 0, 8 ); - my $avail = $self->get_available_spaces_to_go($ibeg); - print -"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n"; - } +########################################### +# CODE SECTION 11: Code to break long lists +########################################### - # nothing more to do if no error to correct (gnu2.t) - if ( $move_right == 0 ) { - $indentation->set_recoverable_spaces($move_right); - next; - } +{ ## begin closure break_lists - # Get any collapsed length defined for -xlp - my $collapsed_length = - $self->[_rcollapsed_length_by_seqno_]->{$align_seqno}; - $collapsed_length = 0 unless ( defined($collapsed_length) ); + # These routines and variables are involved in finding good + # places to break long lists. - if (DEBUG_CORRECT_LP) { - print -"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n"; - } + use constant DEBUG_BREAK_LISTS => 0; - # if we have not seen closure for this indentation in this batch, - # and do not have a collapsed length estimate, we can only pass on - # a request to the vertical aligner - if ( $closing_index < 0 && !$collapsed_length ) { - $indentation->set_recoverable_spaces($move_right); - next; - } + my ( - # If necessary, look ahead to see if there is really any leading - # whitespace dependent on this whitespace, and also find the - # longest line using this whitespace. Since it is always safe to - # move left if there are no dependents, we only need to do this if - # we may have dependent nodes or need to move right. + $block_type, + $current_depth, + $depth, + $i, + $i_last_colon, + $i_line_end, + $i_line_start, + $i_last_nonblank_token, + $last_nonblank_block_type, + $last_nonblank_token, + $last_nonblank_type, + $last_old_breakpoint_count, + $minimum_depth, + $next_nonblank_block_type, + $next_nonblank_token, + $next_nonblank_type, + $old_breakpoint_count, + $starting_breakpoint_count, + $starting_depth, + $token, + $type, + $type_sequence, - my $have_child = $indentation->get_have_child(); - my %saw_indentation; - my $line_count = 1; - $saw_indentation{$indentation} = $indentation; + ); - # How far can we move right before we hit the limit? - # let $right_margen = the number of spaces that we can increase - # the current indentation before hitting the maximum line length. - my $right_margin = 0; + my ( - if ( $have_child || $move_right > 0 ) { - $have_child = 0; + @breakpoint_stack, + @breakpoint_undo_stack, + @comma_index, + @container_type, + @identifier_count_stack, + @index_before_arrow, + @interrupted_list, + @item_count_stack, + @last_comma_index, + @last_dot_index, + @last_nonblank_type, + @old_breakpoint_count_stack, + @opening_structure_index_stack, + @rfor_semicolon_list, + @has_old_logical_breakpoints, + @rand_or_list, + @i_equals, + @override_cab3, + @type_sequence_stack, - # include estimated collapsed length for incomplete containers - my $max_length = 0; - if ( $Kc > $K_to_go[$max_index_to_go] ) { - $max_length = $collapsed_length + $predicted_pos; - } + ); - if ( $i == $ibeg ) { - my $length = total_line_length( $ibeg, $iend ); - if ( $length > $max_length ) { $max_length = $length } - } + # these arrays must retain values between calls + my ( @has_broken_sublist, @dont_align, @want_comma_break ); - # look ahead at the rest of the lines of this batch.. - foreach my $line_t ( $line + 1 .. $max_line ) { - my $ibeg_t = $ri_first->[$line_t]; - my $iend_t = $ri_last->[$line_t]; - last if ( $closing_index <= $ibeg_t ); + my $length_tol; + my $lp_tol_boost; - # remember all different indentation objects - my $indentation_t = $leading_spaces_to_go[$ibeg_t]; - $saw_indentation{$indentation_t} = $indentation_t; - $line_count++; + sub initialize_break_lists { + @dont_align = (); + @has_broken_sublist = (); + @want_comma_break = (); - # remember longest line in the group - my $length_t = total_line_length( $ibeg_t, $iend_t ); - if ( $length_t > $max_length ) { - $max_length = $length_t; - } - } + #--------------------------------------------------- + # Set tolerances to prevent formatting instabilities + #--------------------------------------------------- - $right_margin = - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] - - $max_length; - if ( $right_margin < 0 ) { $right_margin = 0 } - } + # Define tolerances to use when checking if closed + # containers will fit on one line. This is necessary to avoid + # formatting instability. The basic tolerance is based on the + # following: - my $first_line_comma_count = - grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; - my $comma_count = $indentation->get_comma_count(); - my $arrow_count = $indentation->get_arrow_count(); + # - Always allow for at least one extra space after a closing token so + # that we do not strand a comma or semicolon. (oneline.t). - # This is a simple approximate test for vertical alignment: - # if we broke just after an opening paren, brace, bracket, - # and there are 2 or more commas in the first line, - # and there are no '=>'s, - # then we are probably vertically aligned. We could set - # an exact flag in sub break_lists, but this is good - # enough. - my $indentation_count = keys %saw_indentation; - my $is_vertically_aligned = - ( $i == $ibeg - && $first_line_comma_count > 1 - && $indentation_count == 1 - && ( $arrow_count == 0 || $arrow_count == $line_count ) ); + # - Use an increased line length tolerance when -ci > -i to avoid + # blinking states (case b923 and others). + $length_tol = + 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns ); - # Make the move if possible .. - if ( + # In addition, it may be necessary to use a few extra tolerance spaces + # when -lp is used and/or when -xci is used. The history of this + # so far is as follows: - # we can always move left - $move_right < 0 + # FIX1: At least 3 characters were been found to be required for -lp + # to fixes cases b1059 b1063 b1117. - # -xlp + # FIX2: Further testing showed that we need a total of 3 extra spaces + # when -lp is set for non-lists, and at least 2 spaces when -lp and + # -xci are set. + # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144 + # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164 + # b1165 - # incomplete container - || ( $rOpts_extended_line_up_parentheses - && $Kc > $K_to_go[$max_index_to_go] ) - || $closing_index < 0 + # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub + # 'find_token_starting_list' to go back before an initial blank space. + # This fixed these three cases, and allowed the tolerances to be + # reduced to continue to fix all other known cases of instability. + # This gives the current tolerance formulation. - # but we should only move right if we are sure it will - # not spoil vertical alignment - || ( $comma_count == 0 ) - || ( $comma_count > 0 && !$is_vertically_aligned ) - ) - { - my $move = - ( $move_right <= $right_margin ) - ? $move_right - : $right_margin; + $lp_tol_boost = 0; - if (DEBUG_CORRECT_LP) { - print - "CORRECT_LP for seq=$align_seqno, moving $move spaces\n"; - } + if ($rOpts_line_up_parentheses) { - foreach ( keys %saw_indentation ) { - $saw_indentation{$_} - ->permanently_decrease_available_spaces( -$move ); - } + # boost tol for combination -lp -xci + if ($rOpts_extended_continuation_indentation) { + $lp_tol_boost = 2; } - # Otherwise, record what we want and the vertical aligner - # will try to recover it. + # boost tol for combination -lp and any -vtc > 0, but only for + # non-list containers else { - $indentation->set_recoverable_spaces($move_right); + foreach ( keys %closing_vertical_tightness ) { + next + unless ( $closing_vertical_tightness{$_} ); + $lp_tol_boost = 1; # Fixes B1193; + last; + } } - } ## end loop over tokens in a line - } ## end loop over lines - return $do_not_pad; -} ## end sub correct_lp_indentation + } -sub undo_lp_ci { + # Define a level where list formatting becomes highly stressed and + # needs to be simplified. Introduced for case b1262. + # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2); + # This is now '$high_stress_level'. - # If there is a single, long parameter within parens, like this: - # - # $self->command( "/msg " - # . $infoline->chan - # . " You said $1, but did you know that it's square was " - # . $1 * $1 . " ?" ); - # - # we can remove the continuation indentation of the 2nd and higher lines - # to achieve this effect, which is more pleasing: - # - # $self->command("/msg " - # . $infoline->chan - # . " You said $1, but did you know that it's square was " - # . $1 * $1 . " ?"); + return; + } ## end sub initialize_break_lists - my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = - @_; - my $max_line = @{$ri_first} - 1; + # routine to define essential variables when we go 'up' to + # a new depth + sub check_for_new_minimum_depth { + my ( $self, $depth_t, $seqno ) = @_; + if ( $depth_t < $minimum_depth ) { - # must be multiple lines - return unless $max_line > $line_open; + $minimum_depth = $depth_t; - my $lev_start = $levels_to_go[$i_start]; - my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; + # these arrays need not retain values between calls + $type_sequence_stack[$depth_t] = $seqno; + $override_cab3[$depth_t] = undef; + if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) { + $override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno}; + } + $breakpoint_stack[$depth_t] = $starting_breakpoint_count; + $container_type[$depth_t] = EMPTY_STRING; + $identifier_count_stack[$depth_t] = 0; + $index_before_arrow[$depth_t] = -1; + $interrupted_list[$depth_t] = 1; + $item_count_stack[$depth_t] = 0; + $last_nonblank_type[$depth_t] = EMPTY_STRING; + $opening_structure_index_stack[$depth_t] = -1; - # see if all additional lines in this container have continuation - # indentation - my $line_1 = 1 + $line_open; - my $n = $line_open; + $breakpoint_undo_stack[$depth_t] = undef; + $comma_index[$depth_t] = undef; + $last_comma_index[$depth_t] = undef; + $last_dot_index[$depth_t] = undef; + $old_breakpoint_count_stack[$depth_t] = undef; + $has_old_logical_breakpoints[$depth_t] = 0; + $rand_or_list[$depth_t] = []; + $rfor_semicolon_list[$depth_t] = []; + $i_equals[$depth_t] = -1; - while ( ++$n <= $max_line ) { - my $ibeg = $ri_first->[$n]; - my $iend = $ri_last->[$n]; - if ( $ibeg eq $closing_index ) { $n--; last } - return if ( $lev_start != $levels_to_go[$ibeg] ); - return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); - last if ( $closing_index <= $iend ); - } + # these arrays must retain values between calls + if ( !defined( $has_broken_sublist[$depth_t] ) ) { + $dont_align[$depth_t] = 0; + $has_broken_sublist[$depth_t] = 0; + $want_comma_break[$depth_t] = 0; + } + } + return; + } ## end sub check_for_new_minimum_depth - # we can reduce the indentation of all continuation lines - my $continuation_line_count = $n - $line_open; - @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = - (0) x ($continuation_line_count); - @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = - @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ]; - return; -} ## end sub undo_lp_ci + # routine to decide which commas to break at within a container; + # returns: + # $bp_count = number of comma breakpoints set + # $do_not_break_apart = a flag indicating if container need not + # be broken open + sub set_comma_breakpoints { -############################################### -# CODE SECTION 10: Code to break long statments -############################################### + my ( $self, $dd, $rbond_strength_bias ) = @_; + my $bp_count = 0; + my $do_not_break_apart = 0; -sub break_long_lines { + # anything to do? + if ( $item_count_stack[$dd] ) { - #----------------------------------------------------------- - # Break a batch of tokens into lines which do not exceed the - # maximum line length. - #----------------------------------------------------------- + # Do not break a list unless there are some non-line-ending commas. + # This avoids getting different results with only non-essential + # commas, and fixes b1192. + my $seqno = $type_sequence_stack[$dd]; - # Define an array of indexes for inserting newline characters to - # keep the line lengths below the maximum desired length. There is - # an implied break after the last token, so it need not be included. + my $real_comma_count = + $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1; - # Method: - # This routine is part of series of routines which adjust line - # lengths. It is only called if a statement is longer than the - # maximum line length, or if a preliminary scanning located - # desirable break points. Sub break_lists has already looked at - # these tokens and set breakpoints (in array - # $forced_breakpoint_to_go[$i]) where it wants breaks (for example - # after commas, after opening parens, and before closing parens). - # This routine will honor these breakpoints and also add additional - # breakpoints as necessary to keep the line length below the maximum - # requested. It bases its decision on where the 'bond strength' is - # lowest. + # handle commas not in containers... + if ( $dont_align[$dd] ) { + $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias ); + } - # Output: returns references to the arrays: - # @i_first - # @i_last - # which contain the indexes $i of the first and last tokens on each - # line. + # handle commas within containers... + elsif ($real_comma_count) { + my $fbc = $forced_breakpoint_count; - # In addition, the array: - # $forced_breakpoint_to_go[$i] - # may be updated to be =1 for any index $i after which there must be - # a break. This signals later routines not to undo the breakpoint. + # always open comma lists not preceded by keywords, + # barewords, identifiers (that is, anything that doesn't + # look like a function call) + my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; - my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_; + $self->table_maker( + { + depth => $dd, + i_opening_paren => $opening_structure_index_stack[$dd], + i_closing_paren => $i, + item_count => $item_count_stack[$dd], + identifier_count => $identifier_count_stack[$dd], + rcomma_index => $comma_index[$dd], + next_nonblank_type => $next_nonblank_type, + list_type => $container_type[$dd], + interrupted => $interrupted_list[$dd], + rdo_not_break_apart => \$do_not_break_apart, + must_break_open => $must_break_open, + has_broken_sublist => $has_broken_sublist[$dd], + } + ); + $bp_count = $forced_breakpoint_count - $fbc; + $do_not_break_apart = 0 if $must_break_open; + } + } + return ( $bp_count, $do_not_break_apart ); + } ## end sub set_comma_breakpoints - # @{$rcolon_list} is a list of all the ? and : tokens in the batch, in - # order. + # These types are excluded at breakpoints to prevent blinking + # Switched from excluded to included as part of fix for b1214 + my %is_uncontained_comma_break_included_type; - use constant DEBUG_BREAK_LINES => 0; + BEGIN { - my @i_first = (); # the first index to output - my @i_last = (); # the last index to output - my @i_colon_breaks = (); # needed to decide if we have to break at ?'s - if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } + my @q = qw< k R } ) ] Y Z U w i q Q . + = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>; + @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q); + } ## end BEGIN - my $rbond_strength_to_go = $self->set_bond_strengths(); + sub do_uncontained_comma_breaks { - # Add any comma bias set by break_lists - if ( @{$rbond_strength_bias} ) { - foreach my $item ( @{$rbond_strength_bias} ) { - my ( $ii, $bias ) = @{$item}; - if ( $ii >= 0 && $ii <= $max_index_to_go ) { - $rbond_strength_to_go->[$ii] += $bias; - } - elsif (DEVEL_MODE) { - my $KK = $K_to_go[0]; - my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; - Fault( -"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n" - ); + # Handle commas not in containers... + # This is a catch-all routine for commas that we + # don't know what to do with because the don't fall + # within containers. We will bias the bond strength + # to break at commas which ended lines in the input + # file. This usually works better than just trying + # to put as many items on a line as possible. A + # downside is that if the input file is garbage it + # won't work very well. However, the user can always + # prevent following the old breakpoints with the + # -iob flag. + my ( $self, $dd, $rbond_strength_bias ) = @_; + + # Check added for issue c131; an error here would be due to an + # error initializing @comma_index when entering depth $dd. + if (DEVEL_MODE) { + foreach my $ii ( @{ $comma_index[$dd] } ) { + if ( $ii < 0 || $ii > $max_index_to_go ) { + my $KK = $K_to_go[0]; + my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; + Fault(< 2 ); + if ( $old_breakpoint_to_go[$ii] ) { + $old_comma_break_count++; - my $Msg = EMPTY_STRING; + # Store the bias info for use by sub set_bond_strength + push @{$rbond_strength_bias}, [ $ii, $bias ]; - #------------------------------------------------------- - # BEGINNING of main loop to set continuation breakpoints - # Keep iterating until we reach the end - #------------------------------------------------------- - while ( $i_begin <= $imax ) { - my $lowest_strength = NO_BREAK; - my $starting_sum = $summed_lengths_to_go[$i_begin]; - my $i_lowest = -1; - my $i_test = -1; - my $lowest_next_token = EMPTY_STRING; - my $lowest_next_type = 'b'; - my $i_lowest_next_nonblank = -1; - my $maximum_line_length = - $maximum_line_length_at_level[ $levels_to_go[$i_begin] ]; - - # Do not separate an isolated bare word from an opening paren. - # Alternate Fix #2 for issue b1299. This waits as long as possible - # to make the decision. - if ( $types_to_go[$i_begin] eq 'i' - && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ ) - { - my $i_next_nonblank = $inext_to_go[$i_begin]; - if ( $tokens_to_go[$i_next_nonblank] eq '(' ) { - $rbond_strength_to_go->[$i_begin] = NO_BREAK; + # reduce bias magnitude to force breaks in order + $bias *= 0.99; } } - #------------------------------------------------------- - # BEGINNING of inner loop to find the best next breakpoint - #------------------------------------------------------- - my $strength = NO_BREAK; - $i_test = $i_begin - 1; - while ( ++$i_test <= $imax ) { - my $type = $types_to_go[$i_test]; - my $token = $tokens_to_go[$i_test]; - my $next_type = $types_to_go[ $i_test + 1 ]; - my $next_token = $tokens_to_go[ $i_test + 1 ]; - my $i_next_nonblank = $inext_to_go[$i_test]; - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - - # adjustments to the previous bond strength may have been made, and - # we must keep the bond strength of a token and its following blank - # the same; - my $last_strength = $strength; - $strength = $rbond_strength_to_go->[$i_test]; - if ( $type eq 'b' ) { $strength = $last_strength } - - # reduce strength a bit to break ties at an old comma breakpoint ... - if ( - - $old_breakpoint_to_go[$i_test] - - # Patch: limited to just commas to avoid blinking states - && $type eq ',' + # Also put a break before the first comma if + # (1) there was a break there in the input, and + # (2) there was exactly one old break before the first comma break + # (3) OLD: there are multiple old comma breaks + # (3) NEW: there are one or more old comma breaks (see return example) + # (4) the first comma is at the starting level ... + # ... fixes cases b064 b065 b068 b210 b747 + # (5) the batch does not start with a ci>0 [ignore a ci change by -xci] + # ... fixes b1220. If ci>0 we are in the middle of a snippet, + # maybe because -boc has been forcing out previous lines. - # which is a 'good' breakpoint, meaning ... - # we don't want to break before it - && !$want_break_before{$type} + # For example, we will follow the user and break after + # 'print' in this snippet: + # print + # "conformability (Not the same dimension)\n", + # "\t", $have, " is ", text_unit($hu), "\n", + # "\t", $want, " is ", text_unit($wu), "\n", + # ; + # + # Another example, just one comma, where we will break after + # the return: + # return + # $x * cos($a) - $y * sin($a), + # $x * sin($a) + $y * cos($a); - # and either we want to break before the next token - # or the next token is not short (i.e. not a '*', '/' etc.) - && $i_next_nonblank <= $imax - && ( $want_break_before{$next_nonblank_type} - || $token_lengths_to_go[$i_next_nonblank] > 2 - || $next_nonblank_type eq ',' - || $is_opening_type{$next_nonblank_type} ) - ) - { - $strength -= $tiny_bias; - DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" }; - } + # Breaking a print statement: + # print SAVEOUT + # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", + # ( $? & 128 ) ? " -- core dumped" : "", "\n"; + # + # But we will not force a break after the opening paren here + # (causes a blinker): + # $heap->{stream}->set_output_filter( + # poe::filter::reference->new('myotherfreezer') ), + # ; + # + my $i_first_comma = $comma_index[$dd]->[0]; + my $level_comma = $levels_to_go[$i_first_comma]; + my $ci_start = $ci_levels_to_go[0]; - # otherwise increase strength a bit if this token would be at the - # maximum line length. This is necessary to avoid blinking - # in the above example when the -iob flag is added. - else { - my $len = - $leading_spaces + - $summed_lengths_to_go[ $i_test + 1 ] - - $starting_sum; - if ( $len >= $maximum_line_length ) { - $strength += $tiny_bias; - DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" }; + # Here we want to use the value of ci before any -xci adjustment + if ( $ci_start && $rOpts_extended_continuation_indentation ) { + my $K0 = $K_to_go[0]; + if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 } + } + if ( !$ci_start + && $old_breakpoint_to_go[$i_first_comma] + && $level_comma == $levels_to_go[0] ) + { + my $ibreak = -1; + my $obp_count = 0; + foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) { + if ( $old_breakpoint_to_go[$ii] ) { + $obp_count++; + last if ( $obp_count > 1 ); + $ibreak = $ii + if ( $levels_to_go[$ii] == $level_comma ); } } - my $must_break = 0; - - # Force an immediate break at certain operators - # with lower level than the start of the line, - # unless we've already seen a better break. - # - #------------------------------------ - # Note on an issue with a preceding ? - #------------------------------------ - # We don't include a ? in the above list, but there may - # be a break at a previous ? if the line is long. - # Because of this we do not want to force a break if - # there is a previous ? on this line. For now the best way - # to do this is to not break if we have seen a lower strength - # point, which is probably a ?. - # - # Example of unwanted breaks we are avoiding at a '.' following a ? - # from pod2html using perltidy -gnu: - # ) - # ? "\n<A NAME=\"" - # . $value - # . "\">\n$text</A>\n" - # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; - if ( - ( $strength <= $lowest_strength ) - && ( $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_next_nonblank] ) - && ( - $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ - || ( - $next_nonblank_type eq 'k' - - ## /^(and|or)$/ # note: includes 'xor' now - && $is_and_or{$next_nonblank_token} - ) - ) - ) + # Changed rule from multiple old commas to just one here: + if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) { - $self->set_forced_breakpoint($i_next_nonblank); - DEBUG_BREAK_LINES - && do { $Msg .= " :Forced break at i=$i_next_nonblank" }; - } + my $ibreak_m = $ibreak; + $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' ); + if ( $ibreak_m >= 0 ) { - if ( + # In order to avoid blinkers we have to be fairly + # restrictive: - # Try to put a break where requested by break_lists - $forced_breakpoint_to_go[$i_test] + # OLD Rules: + # Rule 1: Do not to break before an opening token + # Rule 2: avoid breaking at ternary operators + # (see b931, which is similar to the above print example) + # Rule 3: Do not break at chain operators to fix case b1119 + # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/' - # break between ) { in a continued line so that the '{' can - # be outdented - # See similar logic in break_lists which catches instances - # where a line is just something like ') {'. We have to - # be careful because the corresponding block keyword might - # not be on the first line, such as 'for' here: - # - # eval { - # for ("a") { - # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } - # } - # }; - # - || ( - $line_count - && ( $token eq ')' ) - && ( $next_nonblank_type eq '{' ) - && ($next_nonblank_block_type) - && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) - - # RT #104427: Dont break before opening sub brace because - # sub block breaks handled at higher level, unless - # it looks like the preceding list is long and broken - && !( + # NEW Rule, replaced above rules after case b1214: + # only break at one of the included types - ( - $next_nonblank_block_type =~ /$SUB_PATTERN/ - || $next_nonblank_block_type =~ /$ASUB_PATTERN/ - ) - && ( $nesting_depth_to_go[$i_begin] == - $nesting_depth_to_go[$i_next_nonblank] ) - ) + # Be sure to test any changes to these rules against runs + # with -l=0 such as the 'bbvt' test (perltidyrc_colin) + # series. + my $type_m = $types_to_go[$ibreak_m]; - && !$rOpts_opening_brace_always_on_right - ) + # Switched from excluded to included for b1214. If necessary + # the token could also be checked if type_m eq 'k' + if ( $is_uncontained_comma_break_included_type{$type_m} ) { - # There is an implied forced break at a terminal opening brace - || ( ( $type eq '{' ) && ( $i_test == $imax ) ) - ) - { + # Rule added to fix b1449: + # Do not break before a '?' if -nbot is set + # Otherwise, we may alternately arrive here and + # set the break, or not, depending on the input. + my $no_break; + my $ibreak_p = $inext_to_go[$ibreak_m]; + if ( !$rOpts_break_at_old_ternary_breakpoints + && $ibreak_p <= $max_index_to_go ) + { + my $type_p = $types_to_go[$ibreak_p]; + $no_break = $type_p eq '?'; + } - # Forced breakpoints must sometimes be overridden, for example - # because of a side comment causing a NO_BREAK. It is easier - # to catch this here than when they are set. - if ( $strength < NO_BREAK - 1 ) { - $strength = $lowest_strength - $tiny_bias; - $must_break = 1; - DEBUG_BREAK_LINES - && do { $Msg .= " :set must_break at i=$i_next_nonblank" }; + $self->set_forced_breakpoint($ibreak) + if ( !$no_break ); + } } } + } + return; + } ## end sub do_uncontained_comma_breaks - # quit if a break here would put a good terminal token on - # the next line and we already have a possible break - if ( - !$must_break - && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) - && ( - ( - $leading_spaces + - $summed_lengths_to_go[ $i_next_nonblank + 1 ] - - $starting_sum - ) > $maximum_line_length - ) - ) - { - if ( $i_lowest >= 0 ) { - DEBUG_BREAK_LINES && do { - $Msg .= " :quit at good terminal='$next_nonblank_type'"; - }; - last; - } - } + my %is_logical_container; + my %quick_filter; - # Avoid a break which would strand a single punctuation - # token. For example, we do not want to strand a leading - # '.' which is followed by a long quoted string. - # But note that we do want to do this with -extrude (l=1) - # so please test any changes to this code on -extrude. - if ( - !$must_break - && ( $i_test == $i_begin ) - && ( $i_test < $imax ) - && ( $token eq $type ) - && ( - ( - $leading_spaces + - $summed_lengths_to_go[ $i_test + 1 ] - - $starting_sum - ) < $maximum_line_length - ) - ) - { - $i_test = min( $imax, $inext_to_go[$i_test] ); - DEBUG_BREAK_LINES && do { - $Msg .= " :redo at i=$i_test"; - }; - redo; - } + BEGIN { + my @q = qw# if elsif unless while and or err not && | || ? : ! #; + @is_logical_container{@q} = (1) x scalar(@q); - if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) - { + # This filter will allow most tokens to skip past a section of code + %quick_filter = %is_assignment; + @q = qw# => . ; < > ~ #; + push @q, ','; + push @q, 'f'; # added for ';' for issue c154 + @quick_filter{@q} = (1) x scalar(@q); + } ## end BEGIN - # break at previous best break if it would have produced - # a leading alignment of certain common tokens, and it - # is different from the latest candidate break - if ($leading_alignment_type) { - DEBUG_BREAK_LINES && do { - $Msg .= -" :last at leading_alignment='$leading_alignment_type'"; - }; - last; - } + sub set_for_semicolon_breakpoints { + my ( $self, $dd ) = @_; + foreach ( @{ $rfor_semicolon_list[$dd] } ) { + $self->set_forced_breakpoint($_); + } + return; + } ## end sub set_for_semicolon_breakpoints - # Force at least one breakpoint if old code had good - # break It is only called if a breakpoint is required or - # desired. This will probably need some adjustments - # over time. A goal is to try to be sure that, if a new - # side comment is introduced into formatted text, then - # the same breakpoints will occur. scbreak.t - if ( - $i_test == $imax # we are at the end - && !$forced_breakpoint_count - && $saw_good_break # old line had good break - && $type =~ /^[#;\{]$/ # and this line ends in - # ';' or side comment - && $i_last_break < 0 # and we haven't made a break - && $i_lowest >= 0 # and we saw a possible break - && $i_lowest < $imax - 1 # (but not just before this ;) - && $strength - $lowest_strength < 0.5 * WEAK # and it's good - ) - { + sub set_logical_breakpoints { + my ( $self, $dd ) = @_; + if ( + $item_count_stack[$dd] == 0 + && $is_logical_container{ $container_type[$dd] } - DEBUG_BREAK_LINES && do { - $Msg .= " :last at good old break\n"; - }; - last; - } + || $has_old_logical_breakpoints[$dd] + ) + { - # Do not skip past an important break point in a short final - # segment. For example, without this check we would miss the - # break at the final / in the following code: - # - # $depth_stop = - # ( $tau * $mass_pellet * $q_0 * - # ( 1. - exp( -$t_stop / $tau ) ) - - # 4. * $pi * $factor * $k_ice * - # ( $t_melt - $t_ice ) * - # $r_pellet * - # $t_stop ) / - # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); - # - if ( - $line_count > 2 - && $i_lowest >= 0 # and we saw a possible break - && $i_lowest < $i_test - && $i_test > $imax - 2 - && $nesting_depth_to_go[$i_begin] > - $nesting_depth_to_go[$i_lowest] - && $lowest_strength < $last_break_strength - .5 * WEAK - ) - { - # Make this break for math operators for now - my $ir = $inext_to_go[$i_lowest]; - my $il = $iprev_to_go[$ir]; - if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ - || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ) - { - DEBUG_BREAK_LINES && do { - $Msg .= " :last-noskip_short"; - }; - last; + # Look for breaks in this order: + # 0 1 2 3 + # or and || && + foreach my $i ( 0 .. 3 ) { + if ( $rand_or_list[$dd][$i] ) { + foreach ( @{ $rand_or_list[$dd][$i] } ) { + $self->set_forced_breakpoint($_); } - } - # Update the minimum bond strength location - $lowest_strength = $strength; - $i_lowest = $i_test; - $lowest_next_token = $next_nonblank_token; - $lowest_next_type = $next_nonblank_type; - $i_lowest_next_nonblank = $i_next_nonblank; - if ($must_break) { - DEBUG_BREAK_LINES && do { - $Msg .= " :last-must_break"; - }; + # break at any 'if' and 'unless' too + foreach ( @{ $rand_or_list[$dd][4] } ) { + $self->set_forced_breakpoint($_); + } + $rand_or_list[$dd] = []; last; } + } + } + return; + } ## end sub set_logical_breakpoints - # set flags to remember if a break here will produce a - # leading alignment of certain common tokens - if ( $line_count > 0 - && $i_test < $imax - && ( $lowest_strength - $last_break_strength <= $max_bias ) - ) - { - my $i_last_end = $iprev_to_go[$i_begin]; - my $tok_beg = $tokens_to_go[$i_begin]; - my $type_beg = $types_to_go[$i_begin]; - if ( + sub is_unbreakable_container { - # check for leading alignment of certain tokens - ( - $tok_beg eq $next_nonblank_token - && $is_chain_operator{$tok_beg} - && ( $type_beg eq 'k' - || $type_beg eq $tok_beg ) - && $nesting_depth_to_go[$i_begin] >= - $nesting_depth_to_go[$i_next_nonblank] - ) + # never break a container of one of these types + # because bad things can happen (map1.t) + my $dd = shift; + return $is_sort_map_grep{ $container_type[$dd] }; + } ## end sub is_unbreakable_container + + sub break_lists { + + my ( $self, $is_long_line, $rbond_strength_bias ) = @_; + + #-------------------------------------------------------------------- + # This routine is called once per batch, if the batch is a list, to + # set line breaks so that hierarchical structure can be displayed and + # so that list items can be vertically aligned. The output of this + # routine is stored in the array @forced_breakpoint_to_go, which is + # used by sub 'break_long_lines' to set final breakpoints. This is + # probably the most complex routine in perltidy, so I have + # broken it into pieces and over-commented it. + #-------------------------------------------------------------------- + + $starting_depth = $nesting_depth_to_go[0]; + + $block_type = SPACE; + $current_depth = $starting_depth; + $i = -1; + $i_last_colon = -1; + $i_line_end = -1; + $i_line_start = -1; + $last_nonblank_token = ';'; + $last_nonblank_type = ';'; + $last_nonblank_block_type = SPACE; + $last_old_breakpoint_count = 0; + $minimum_depth = $current_depth + 1; # forces update in check below + $old_breakpoint_count = 0; + $starting_breakpoint_count = $forced_breakpoint_count; + $token = ';'; + $type = ';'; + $type_sequence = EMPTY_STRING; - || ( $tokens_to_go[$i_last_end] eq $token - && $is_chain_operator{$token} - && ( $type eq 'k' || $type eq $token ) - && $nesting_depth_to_go[$i_last_end] >= - $nesting_depth_to_go[$i_test] ) - ) - { - $leading_alignment_token = $next_nonblank_token; - $leading_alignment_type = $next_nonblank_type; - } - } - } + my $total_depth_variation = 0; + my $i_old_assignment_break; + my $depth_last = $starting_depth; + my $comma_follows_last_closing_token; - my $too_long = ( $i_test >= $imax ); - if ( !$too_long ) { - my $next_length = - $leading_spaces + - $summed_lengths_to_go[ $i_test + 2 ] - - $starting_sum; - $too_long = $next_length > $maximum_line_length; + $self->check_for_new_minimum_depth( $current_depth, + $parent_seqno_to_go[0] ) + if ( $current_depth < $minimum_depth ); - # To prevent blinkers we will avoid leaving a token exactly at - # the line length limit unless it is the last token or one of - # several "good" types. - # - # The following code was a blinker with -pbp before this - # modification: -## $last_nonblank_token eq '(' -## && $is_indirect_object_taker{ $paren_type -## [$paren_depth] } - # The issue causing the problem is that if the - # term [$paren_depth] gets broken across a line then - # the whitespace routine doesn't see both opening and closing - # brackets and will format like '[ $paren_depth ]'. This - # leads to an oscillation in length depending if we break - # before the closing bracket or not. - if ( !$too_long - && $i_test + 1 < $imax - && $next_nonblank_type ne ',' - && !$is_closing_type{$next_nonblank_type} ) - { - $too_long = $next_length >= $maximum_line_length; - DEBUG_BREAK_LINES && do { - $Msg .= " :too_long=$too_long" if ($too_long); - } - } - } + my $i_want_previous_break = -1; - DEBUG_BREAK_LINES && do { - my $ltok = $token; - my $rtok = - $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING; - my $i_testp2 = $i_test + 2; - if ( $i_testp2 > $max_index_to_go + 1 ) { - $i_testp2 = $max_index_to_go + 1; - } - if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } - if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } - print STDOUT -"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n"; - }; + my $saw_good_breakpoint; - # allow one extra terminal token after exceeding line length - # if it would strand this token. - if ( $rOpts_fuzzy_line_length - && $too_long - && $i_lowest == $i_test - && $token_lengths_to_go[$i_test] > 1 - && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) - ) - { - $too_long = 0; - DEBUG_BREAK_LINES && do { - $Msg .= " :do_not_strand next='$next_nonblank_type'"; - }; + #---------------------------------------- + # Main loop over all tokens in this batch + #---------------------------------------- + while ( ++$i <= $max_index_to_go ) { + if ( $type ne 'b' ) { + $i_last_nonblank_token = $i - 1; + $last_nonblank_type = $type; + $last_nonblank_token = $token; + $last_nonblank_block_type = $block_type; } + $type = $types_to_go[$i]; + $block_type = $block_type_to_go[$i]; + $token = $tokens_to_go[$i]; + $type_sequence = $type_sequence_to_go[$i]; - # we are done if... - if ( + my $i_next_nonblank = $inext_to_go[$i]; + $next_nonblank_type = $types_to_go[$i_next_nonblank]; + $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; - # ... no more space and we have a break - $too_long && $i_lowest >= 0 + #------------------------------------------- + # Loop Section A: Look for special breakpoints... + #------------------------------------------- - # ... or no more tokens - || $i_test == $imax - ) - { - DEBUG_BREAK_LINES && do { - $Msg .= -" :Done-too_long=$too_long or i_lowest=$i_lowest or $i_test==imax"; - }; - last; + # set break if flag was set + if ( $i_want_previous_break >= 0 ) { + $self->set_forced_breakpoint($i_want_previous_break); + $i_want_previous_break = -1; } - } - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint - # Now decide exactly where to put the breakpoint - #------------------------------------------------------- + $last_old_breakpoint_count = $old_breakpoint_count; - # it's always ok to break at imax if no other break was found - if ( $i_lowest < 0 ) { $i_lowest = $imax } + # Check for a good old breakpoint .. + if ( $old_breakpoint_to_go[$i] ) { + ( $i_want_previous_break, $i_old_assignment_break ) = + $self->examine_old_breakpoint( $i_next_nonblank, + $i_want_previous_break, $i_old_assignment_break ); + } - # semi-final index calculation - my $i_next_nonblank = $inext_to_go[$i_lowest]; - my $next_nonblank_type = $types_to_go[$i_next_nonblank]; - my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + next if ( $type eq 'b' ); - #------------------------------------------------------- - # ?/: rule 1 : if a break here will separate a '?' on this - # line from its closing ':', then break at the '?' instead. - #------------------------------------------------------- - foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { - next unless ( $tokens_to_go[$i] eq '?' ); + $depth = $nesting_depth_to_go[ $i + 1 ]; - # do not break if probable sequence of ?/: statements - next if ($is_colon_chain); + $total_depth_variation += abs( $depth - $depth_last ); + $depth_last = $depth; - # do not break if statement is broken by side comment - next - if ( $tokens_to_go[$max_index_to_go] eq '#' - && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ ); + # safety check - be sure we always break after a comment + # Shouldn't happen .. an error here probably means that the + # nobreak flag did not get turned off correctly during + # formatting. + if ( $type eq '#' ) { + if ( $i != $max_index_to_go ) { + if (DEVEL_MODE) { + Fault(<set_forced_breakpoint($i); + } ## end if ( $i != $max_index_to_go) + } ## end if ( $type eq '#' ) - # no break needed if matching : is also on the line - next - if ( $mate_index_to_go[$i] >= 0 - && $mate_index_to_go[$i] <= $i_next_nonblank ); + # Force breakpoints at certain tokens in long lines. + # Note that such breakpoints will be undone later if these tokens + # are fully contained within parens on a line. + if ( - $i_lowest = $i; - if ( $want_break_before{'?'} ) { $i_lowest-- } - last; - } + # break before a keyword within a line + $type eq 'k' + && $i > 0 - #------------------------------------------------------- - # END of inner loop to find the best next breakpoint: - # Break the line after the token with index i=$i_lowest - #------------------------------------------------------- + # if one of these keywords: + && $is_if_unless_while_until_for_foreach{$token} - # final index calculation - $i_next_nonblank = $inext_to_go[$i_lowest]; - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; + # but do not break at something like '1 while' + && ( $last_nonblank_type ne 'n' || $i > 2 ) - DEBUG_BREAK_LINES - && print STDOUT -"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n"; - $Msg = EMPTY_STRING; + # and let keywords follow a closing 'do' brace + && ( !$last_nonblank_block_type + || $last_nonblank_block_type ne 'do' ) - #------------------------------------------------------- - # ?/: rule 2 : if we break at a '?', then break at its ':' - # - # Note: this rule is also in sub break_lists to handle a break - # at the start and end of a line (in case breaks are dictated - # by side comments). - #------------------------------------------------------- - if ( $next_nonblank_type eq '?' ) { - $self->set_closing_breakpoint($i_next_nonblank); - } - elsif ( $types_to_go[$i_lowest] eq '?' ) { - $self->set_closing_breakpoint($i_lowest); - } + && ( + $is_long_line - #------------------------------------------------------- - # ?/: rule 3 : if we break at a ':' then we save - # its location for further work below. We may need to go - # back and break at its '?'. - #------------------------------------------------------- - if ( $next_nonblank_type eq ':' ) { - push @i_colon_breaks, $i_next_nonblank; - } - elsif ( $types_to_go[$i_lowest] eq ':' ) { - push @i_colon_breaks, $i_lowest; - } + # or container is broken (by side-comment, etc) + || ( + $next_nonblank_token eq '(' + && ( !defined( $mate_index_to_go[$i_next_nonblank] ) + || $mate_index_to_go[$i_next_nonblank] < $i ) + ) + ) + ) + { + $self->set_forced_breakpoint( $i - 1 ); + } - # here we should set breaks for all '?'/':' pairs which are - # separated by this line + # remember locations of '||' and '&&' for possible breaks if we + # decide this is a long logical expression. + if ( $type eq '||' ) { + push @{ $rand_or_list[$depth][2] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } + elsif ( $type eq '&&' ) { + push @{ $rand_or_list[$depth][3] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } + elsif ( $type eq 'f' ) { + push @{ $rfor_semicolon_list[$depth] }, $i; + } + elsif ( $type eq 'k' ) { + if ( $token eq 'and' ) { + push @{ $rand_or_list[$depth][1] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + } - $line_count++; + # break immediately at 'or's which are probably not in a logical + # block -- but we will break in logical breaks below so that + # they do not add to the forced_breakpoint_count + elsif ( $token eq 'or' ) { + push @{ $rand_or_list[$depth][0] }, $i; + ++$has_old_logical_breakpoints[$depth] + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ); + if ( $is_logical_container{ $container_type[$depth] } ) { + } + else { + if ($is_long_line) { $self->set_forced_breakpoint($i) } + elsif ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + $saw_good_breakpoint = 1; + } + } + } + elsif ( $token eq 'if' || $token eq 'unless' ) { + push @{ $rand_or_list[$depth][4] }, $i; + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_logical_breakpoints ) + { + $self->set_forced_breakpoint($i); + } + } + } + elsif ( $is_assignment{$type} ) { + $i_equals[$depth] = $i; + } - # save this line segment, after trimming blanks at the ends - push( @i_first, - ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); - push( @i_last, - ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); + #----------------------------------------- + # Loop Section B: Handle a sequenced token + #----------------------------------------- + if ($type_sequence) { + $self->break_lists_type_sequence; + } - # set a forced breakpoint at a container opening, if necessary, to - # signal a break at a closing container. Excepting '(' for now. - if ( - ( - $tokens_to_go[$i_lowest] eq '{' - || $tokens_to_go[$i_lowest] eq '[' - ) - && !$forced_breakpoint_to_go[$i_lowest] - ) - { - $self->set_closing_breakpoint($i_lowest); - } + #------------------------------------------ + # Loop Section C: Handle Increasing Depth.. + #------------------------------------------ - # get ready to go again - $i_begin = $i_lowest + 1; - $last_break_strength = $lowest_strength; - $i_last_break = $i_lowest; - $leading_alignment_token = EMPTY_STRING; - $leading_alignment_type = EMPTY_STRING; - $lowest_next_token = EMPTY_STRING; - $lowest_next_type = 'b'; + # hardened against bad input syntax: depth jump must be 1 and type + # must be opening..fixes c102 + if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) { + $self->break_lists_increasing_depth(); + } - if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { - $i_begin++; - } + #------------------------------------------ + # Loop Section D: Handle Decreasing Depth.. + #------------------------------------------ - # update indentation size - if ( $i_begin <= $imax ) { - $leading_spaces = leading_spaces_to_go($i_begin); - DEBUG_BREAK_LINES - && print STDOUT - "updating leading spaces to be $leading_spaces at i=$i_begin\n"; - } - } + # hardened against bad input syntax: depth jump must be 1 and type + # must be closing .. fixes c102 + elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) { - #------------------------------------------------------- - # END of main loop to set continuation breakpoints - # Now go back and make any necessary corrections - #------------------------------------------------------- + $self->break_lists_decreasing_depth(); - #------------------------------------------------------- - # ?/: rule 4 -- if we broke at a ':', then break at - # corresponding '?' unless this is a chain of ?: expressions - #------------------------------------------------------- - if (@i_colon_breaks) { + $comma_follows_last_closing_token = + $next_nonblank_type eq ',' || $next_nonblank_type eq '=>'; - # using a simple method for deciding if we are in a ?/: chain -- - # this is a chain if it has multiple ?/: pairs all in order; - # otherwise not. - # Note that if line starts in a ':' we count that above as a break - my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); + } - unless ($is_chain) { - my @insert_list = (); - foreach (@i_colon_breaks) { - my $i_question = $mate_index_to_go[$_]; - if ( $i_question >= 0 ) { - if ( $want_break_before{'?'} ) { - $i_question = $iprev_to_go[$i_question]; - } + #---------------------------------- + # Loop Section E: Handle this token + #---------------------------------- - if ( $i_question >= 0 ) { - push @insert_list, $i_question; - } - } - $self->insert_additional_breaks( \@insert_list, \@i_first, - \@i_last ); - } - } - } - return ( \@i_first, \@i_last, $rbond_strength_to_go ); -} ## end sub break_long_lines + $current_depth = $depth; -########################################### -# CODE SECTION 11: Code to break long lists -########################################### + # most token types can skip the rest of this loop + next unless ( $quick_filter{$type} ); -{ ## begin closure break_lists + # handle comma-arrow + if ( $type eq '=>' ) { + next if ( $last_nonblank_type eq '=>' ); + next if $rOpts_break_at_old_comma_breakpoints; + next + if ( $rOpts_comma_arrow_breakpoints == 3 + && !defined( $override_cab3[$depth] ) ); + $want_comma_break[$depth] = 1; + $index_before_arrow[$depth] = $i_last_nonblank_token; + next; + } - # These routines and variables are involved in finding good - # places to break long lists. + elsif ( $type eq '.' ) { + $last_dot_index[$depth] = $i; + } - use constant DEBUG_BREAK_LISTS => 0; + # Turn off comma alignment if we are sure that this is not a list + # environment. To be safe, we will do this if we see certain + # non-list tokens, such as ';', '=', and also the environment is + # not a list. + ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type} + elsif ( $is_non_list_type{$type} + && !$self->is_in_list_by_i($i) ) + { + $dont_align[$depth] = 1; + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; - my ( - $block_type, $current_depth, - $depth, $i, - $i_last_nonblank_token, $last_nonblank_token, - $last_nonblank_type, $last_nonblank_block_type, - $last_old_breakpoint_count, $minimum_depth, - $next_nonblank_block_type, $next_nonblank_token, - $next_nonblank_type, $old_breakpoint_count, - $starting_breakpoint_count, $starting_depth, - $token, $type, - $type_sequence, - ); + # no special comma breaks in C-style 'for' terms (c154) + if ( $type eq 'f' ) { $last_comma_index[$depth] = undef } + } - my ( - @breakpoint_stack, @breakpoint_undo_stack, - @comma_index, @container_type, - @identifier_count_stack, @index_before_arrow, - @interrupted_list, @item_count_stack, - @last_comma_index, @last_dot_index, - @last_nonblank_type, @old_breakpoint_count_stack, - @opening_structure_index_stack, @rfor_semicolon_list, - @has_old_logical_breakpoints, @rand_or_list, - @i_equals, @override_cab3, - @type_sequence_stack, - ); + # now just handle any commas + next if ( $type ne ',' ); + $self->study_comma($comma_follows_last_closing_token); - # these arrays must retain values between calls - my ( @has_broken_sublist, @dont_align, @want_comma_break ); + } ## end while ( ++$i <= $max_index_to_go) - my $length_tol; - my $lp_tol_boost; - my $list_stress_level; + #------------------------------------------- + # END of loop over all tokens in this batch + # Now set breaks for any unfinished lists .. + #------------------------------------------- - sub initialize_break_lists { - @dont_align = (); - @has_broken_sublist = (); - @want_comma_break = (); + foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) { - #--------------------------------------------------- - # Set tolerances to prevent formatting instabilities - #--------------------------------------------------- + $interrupted_list[$dd] = 1; + $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); + $self->set_comma_breakpoints( $dd, $rbond_strength_bias ) + if ( $item_count_stack[$dd] ); + $self->set_logical_breakpoints($dd) + if ( $has_old_logical_breakpoints[$dd] ); + $self->set_for_semicolon_breakpoints($dd); - # Define tolerances to use when checking if closed - # containers will fit on one line. This is necessary to avoid - # formatting instability. The basic tolerance is based on the - # following: + # break open container... + my $i_opening = $opening_structure_index_stack[$dd]; + if ( defined($i_opening) && $i_opening >= 0 ) { + $self->set_forced_breakpoint($i_opening) + unless ( + is_unbreakable_container($dd) - # - Always allow for at least one extra space after a closing token so - # that we do not strand a comma or semicolon. (oneline.t). + # Avoid a break which would place an isolated ' or " + # on a line + || ( $type eq 'Q' + && $i_opening >= $max_index_to_go - 2 + && ( $token eq "'" || $token eq '"' ) ) + ); + } + } ## end for ( my $dd = $current_depth...) - # - Use an increased line length tolerance when -ci > -i to avoid - # blinking states (case b923 and others). - $length_tol = - 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns ); + #---------------------------------------- + # Return the flag '$saw_good_breakpoint'. + #---------------------------------------- + # This indicates if the input file had some good breakpoints. This + # flag will be used to force a break in a line shorter than the + # allowed line length. + if ( $has_old_logical_breakpoints[$current_depth] ) { + $saw_good_breakpoint = 1; + } - # In addition, it may be necessary to use a few extra tolerance spaces - # when -lp is used and/or when -xci is used. The history of this - # so far is as follows: + # A complex line with one break at an = has a good breakpoint. + # This is not complex ($total_depth_variation=0): + # $res1 + # = 10; + # + # This is complex ($total_depth_variation=6): + # $res2 = + # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); - # FIX1: At least 3 characters were been found to be required for -lp - # to fixes cases b1059 b1063 b1117. + # The check ($i_old_.. < $max_index_to_go) was added to fix b1333 + elsif ($i_old_assignment_break + && $total_depth_variation > 4 + && $old_breakpoint_count == 1 + && $i_old_assignment_break < $max_index_to_go ) + { + $saw_good_breakpoint = 1; + } - # FIX2: Further testing showed that we need a total of 3 extra spaces - # when -lp is set for non-lists, and at least 2 spaces when -lp and - # -xci are set. - # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144 - # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164 - # b1165 + return $saw_good_breakpoint; + } ## end sub break_lists - # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub - # 'find_token_starting_list' to go back before an initial blank space. - # This fixed these three cases, and allowed the tolerances to be - # reduced to continue to fix all other known cases of instability. - # This gives the current tolerance formulation. + sub study_comma { - $lp_tol_boost = 0; + # study and store info for a list comma - if ($rOpts_line_up_parentheses) { + my ( $self, $comma_follows_last_closing_token ) = @_; - # boost tol for combination -lp -xci - if ($rOpts_extended_continuation_indentation) { - $lp_tol_boost = 2; - } + $last_dot_index[$depth] = undef; + $last_comma_index[$depth] = $i; - # boost tol for combination -lp and any -vtc > 0, but only for - # non-list containers - else { - foreach ( keys %closing_vertical_tightness ) { - next - unless ( $closing_vertical_tightness{$_} ); - $lp_tol_boost = 1; # Fixes B1193; - last; + # break here if this comma follows a '=>' + # but not if there is a side comment after the comma + if ( $want_comma_break[$depth] ) { + + if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { + if ($rOpts_comma_arrow_breakpoints) { + $want_comma_break[$depth] = 0; + return; } } - } - # Define a level where list formatting becomes highly stressed and - # needs to be simplified. Introduced for case b1262. - $list_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 ); + $self->set_forced_breakpoint($i) + unless ( $next_nonblank_type eq '#' ); - return; - } ## end sub initialize_break_lists + # break before the previous token if it looks safe + # Example of something that we will not try to break before: + # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, + # Also we don't want to break at a binary operator (like +): + # $c->createOval( + # $x + $R, $y + + # $R => $x - $R, + # $y - $R, -fill => 'black', + # ); + my $ibreak = $index_before_arrow[$depth] - 1; + if ( $ibreak > 0 + && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) + { + if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } + if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } + if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { + + # don't break before a comma, as in the following: + # ( LONGER_THAN,=> 1, + # EIGHTY_CHARACTERS,=> 2, + # CAUSES_FORMATTING,=> 3, + # LIKE_THIS,=> 4, + # ); + # This example is for -tso but should be general rule + if ( $tokens_to_go[ $ibreak + 1 ] ne '->' + && $tokens_to_go[ $ibreak + 1 ] ne ',' ) + { + $self->set_forced_breakpoint($ibreak); + } + } + } - # routine to define essential variables when we go 'up' to - # a new depth - sub check_for_new_minimum_depth { - my ( $self, $depth_t, $seqno ) = @_; - if ( $depth_t < $minimum_depth ) { + $want_comma_break[$depth] = 0; + $index_before_arrow[$depth] = -1; - $minimum_depth = $depth_t; + # handle list which mixes '=>'s and ','s: + # treat any list items so far as an interrupted list + $interrupted_list[$depth] = 1; + return; + } - # these arrays need not retain values between calls - $type_sequence_stack[$depth_t] = $seqno; - $override_cab3[$depth_t] = - $rOpts_comma_arrow_breakpoints == 3 - && $seqno - && $self->[_roverride_cab3_]->{$seqno}; + # Break after all commas above starting depth... + # But only if the last closing token was followed by a comma, + # to avoid breaking a list operator (issue c119) + if ( $depth < $starting_depth + && $comma_follows_last_closing_token + && !$dont_align[$depth] ) + { + $self->set_forced_breakpoint($i) + unless ( $next_nonblank_type eq '#' ); + return; + } - $override_cab3[$depth_t] = undef; - $breakpoint_stack[$depth_t] = $starting_breakpoint_count; - $container_type[$depth_t] = EMPTY_STRING; - $identifier_count_stack[$depth_t] = 0; - $index_before_arrow[$depth_t] = -1; - $interrupted_list[$depth_t] = 1; - $item_count_stack[$depth_t] = 0; - $last_nonblank_type[$depth_t] = EMPTY_STRING; - $opening_structure_index_stack[$depth_t] = -1; + # add this comma to the list.. + my $item_count = $item_count_stack[$depth]; + if ( $item_count == 0 ) { - $breakpoint_undo_stack[$depth_t] = undef; - $comma_index[$depth_t] = undef; - $last_comma_index[$depth_t] = undef; - $last_dot_index[$depth_t] = undef; - $old_breakpoint_count_stack[$depth_t] = undef; - $has_old_logical_breakpoints[$depth_t] = 0; - $rand_or_list[$depth_t] = []; - $rfor_semicolon_list[$depth_t] = []; - $i_equals[$depth_t] = -1; + # but do not form a list with no opening structure + # for example: - # these arrays must retain values between calls - if ( !defined( $has_broken_sublist[$depth_t] ) ) { - $dont_align[$depth_t] = 0; - $has_broken_sublist[$depth_t] = 0; - $want_comma_break[$depth_t] = 0; + # open INFILE_COPY, ">$input_file_copy" + # or die ("very long message"); + if ( ( $opening_structure_index_stack[$depth] < 0 ) + && $self->is_in_block_by_i($i) ) + { + $dont_align[$depth] = 1; } } + + $comma_index[$depth][$item_count] = $i; + ++$item_count_stack[$depth]; + if ( $last_nonblank_type =~ /^[iR\]]$/ ) { + $identifier_count_stack[$depth]++; + } return; - } ## end sub check_for_new_minimum_depth + } ## end sub study_comma - # routine to decide which commas to break at within a container; - # returns: - # $bp_count = number of comma breakpoints set - # $do_not_break_apart = a flag indicating if container need not - # be broken open - sub set_comma_breakpoints { + my %poor_types; + my %poor_keywords; + my %poor_next_types; + my %poor_next_keywords; - my ( $self, $dd, $rbond_strength_bias ) = @_; - my $bp_count = 0; - my $do_not_break_apart = 0; + BEGIN { - # Do not break a list unless there are some non-line-ending commas. - # This avoids getting different results with only non-essential commas, - # and fixes b1192. - my $seqno = $type_sequence_stack[$dd]; - my $real_comma_count = - $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1; + # Setup filters for detecting very poor breaks to ignore. + # b1097: old breaks after type 'L' and before 'R' are poor + # b1450: old breaks at 'eq' and related operators are poor + my @q = qw(== <= >= !=); - # anything to do? - if ( $item_count_stack[$dd] ) { + @{poor_types}{@q} = (1) x scalar(@q); + @{poor_next_types}{@q} = (1) x scalar(@q); + $poor_types{'L'} = 1; + $poor_next_types{'R'} = 1; - # handle commas not in containers... - if ( $dont_align[$dd] ) { - $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias ); - } + @q = qw(eq ne le ge lt gt); + @{poor_keywords}{@q} = (1) x scalar(@q); + @{poor_next_keywords}{@q} = (1) x scalar(@q); + } ## end BEGIN - # handle commas within containers... - elsif ($real_comma_count) { - my $fbc = $forced_breakpoint_count; + sub examine_old_breakpoint { - # always open comma lists not preceded by keywords, - # barewords, identifiers (that is, anything that doesn't - # look like a function call) - my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; + my ( $self, $i_next_nonblank, $i_want_previous_break, + $i_old_assignment_break ) + = @_; - $self->set_comma_breakpoints_do( - { - depth => $dd, - i_opening_paren => $opening_structure_index_stack[$dd], - i_closing_paren => $i, - item_count => $item_count_stack[$dd], - identifier_count => $identifier_count_stack[$dd], - rcomma_index => $comma_index[$dd], - next_nonblank_type => $next_nonblank_type, - list_type => $container_type[$dd], - interrupted => $interrupted_list[$dd], - rdo_not_break_apart => \$do_not_break_apart, - must_break_open => $must_break_open, - has_broken_sublist => $has_broken_sublist[$dd], - } - ); - $bp_count = $forced_breakpoint_count - $fbc; - $do_not_break_apart = 0 if $must_break_open; - } + # Look at an old breakpoint and set/update certain flags: + + # Given indexes of three tokens in this batch: + # $i_next_nonblank - index of the next nonblank token + # $i_want_previous_break - we want a break before this index + # $i_old_assignment_break - the index of an '=' or equivalent + # Update: + # $old_breakpoint_count - a counter to increment unless poor break + # Update and return: + # $i_want_previous_break + # $i_old_assignment_break + + #----------------------- + # Filter out poor breaks + #----------------------- + # Just return if this is a poor break and pretend it does not exist. + # Otherwise, poor breaks made under stress can cause instability. + my $poor_break; + if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} } + else { $poor_break ||= $poor_types{$type} } + + if ( $next_nonblank_type eq 'k' ) { + $poor_break ||= $poor_next_keywords{$next_nonblank_token}; } - return ( $bp_count, $do_not_break_apart ); - } ## end sub set_comma_breakpoints + else { $poor_break ||= $poor_next_types{$next_nonblank_type} } - # These types are excluded at breakpoints to prevent blinking - # Switched from excluded to included as part of fix for b1214 - my %is_uncontained_comma_break_included_type; + # Also ignore any high stress level breaks; fixes b1395 + $poor_break ||= $levels_to_go[$i] >= $high_stress_level; + if ($poor_break) { goto RETURN } - BEGIN { + #-------------------------------------------- + # Not a poor break, so continue to examine it + #-------------------------------------------- + $old_breakpoint_count++; + $i_line_end = $i; + $i_line_start = $i_next_nonblank; + + #--------------------------------------- + # Do we want to break before this token? + #--------------------------------------- + + # Break before certain keywords if user broke there and + # this is a 'safe' break point. The idea is to retain + # any preferred breaks for sequential list operations, + # like a schwartzian transform. + if ($rOpts_break_at_old_keyword_breakpoints) { + if ( + $next_nonblank_type eq 'k' + && $is_keyword_returning_list{$next_nonblank_token} + && ( $type =~ /^[=\)\]\}Riw]$/ + || $type eq 'k' && $is_keyword_returning_list{$token} ) + ) + { - my @q = qw< k R } ) ] Y Z U w i q Q . - = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>; - @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q); - } + # we actually have to set this break next time through + # the loop because if we are at a closing token (such + # as '}') which forms a one-line block, this break might + # get undone. - sub do_uncontained_comma_breaks { + # But do not do this at an '=' if: + # - the user wants breaks before an equals (b434 b903) + # - or -naws is set (can be unstable, see b1354) + my $skip = $type eq '=' + && ( $want_break_before{$type} + || !$rOpts_add_whitespace ); - # Handle commas not in containers... - # This is a catch-all routine for commas that we - # don't know what to do with because the don't fall - # within containers. We will bias the bond strength - # to break at commas which ended lines in the input - # file. This usually works better than just trying - # to put as many items on a line as possible. A - # downside is that if the input file is garbage it - # won't work very well. However, the user can always - # prevent following the old breakpoints with the - # -iob flag. - my ( $self, $dd, $rbond_strength_bias ) = @_; + $i_want_previous_break = $i + unless ($skip); - # Check added for issue c131; an error here would be due to an - # error initializing @comma_index when entering depth $dd. - if (DEVEL_MODE) { - foreach my $ii ( @{ $comma_index[$dd] } ) { - if ( $ii < 0 || $ii > $max_index_to_go ) { - my $KK = $K_to_go[0]; - my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; - Fault(<0 [ignore a ci change by -xci] - # ... fixes b1220. If ci>0 we are in the middle of a snippet, - # maybe because -boc has been forcing out previous lines. + if ( $type eq ':' ) { + $i_last_colon = $i; - # For example, we will follow the user and break after - # 'print' in this snippet: - # print - # "conformability (Not the same dimension)\n", - # "\t", $have, " is ", text_unit($hu), "\n", - # "\t", $want, " is ", text_unit($wu), "\n", - # ; - # - # Another example, just one comma, where we will break after - # the return: - # return - # $x * cos($a) - $y * sin($a), - # $x * sin($a) + $y * cos($a); + # retain break at a ':' line break + if ( ( $i == $i_line_start || $i == $i_line_end ) + && $rOpts_break_at_old_ternary_breakpoints + && $levels_to_go[$i] < $high_stress_level ) + { - # Breaking a print statement: - # print SAVEOUT - # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", - # ( $? & 128 ) ? " -- core dumped" : "", "\n"; - # - # But we will not force a break after the opening paren here - # (causes a blinker): - # $heap->{stream}->set_output_filter( - # poe::filter::reference->new('myotherfreezer') ), - # ; - # - my $i_first_comma = $comma_index[$dd]->[0]; - my $level_comma = $levels_to_go[$i_first_comma]; - my $ci_start = $ci_levels_to_go[0]; + $self->set_forced_breakpoint($i); - # Here we want to use the value of ci before any -xci adjustment - if ( $ci_start && $rOpts_extended_continuation_indentation ) { - my $K0 = $K_to_go[0]; - if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 } - } - if ( !$ci_start - && $old_breakpoint_to_go[$i_first_comma] - && $level_comma == $levels_to_go[0] ) - { - my $ibreak = -1; - my $obp_count = 0; - foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) { - if ( $old_breakpoint_to_go[$ii] ) { - $obp_count++; - last if ( $obp_count > 1 ); - $ibreak = $ii - if ( $levels_to_go[$ii] == $level_comma ); + # Break at a previous '=', but only if it is before + # the mating '?'. Mate_index test fixes b1287. + my $ieq = $i_equals[$depth]; + my $mix = $mate_index_to_go[$i]; + if ( !defined($mix) ) { $mix = -1 } + if ( $ieq > 0 && $ieq < $mix ) { + $self->set_forced_breakpoint( $i_equals[$depth] ); + $i_equals[$depth] = -1; + } } } - # Changed rule from multiple old commas to just one here: - if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) - { - my $ibreak_m = $ibreak; - $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' ); - if ( $ibreak_m >= 0 ) { + # handle any postponed closing breakpoints + if ( has_postponed_breakpoint($type_sequence) ) { + my $inc = ( $type eq ':' ) ? 0 : 1; + if ( $i >= $inc ) { + $self->set_forced_breakpoint( $i - $inc ); + } + } + } - # In order to avoid blinkers we have to be fairly - # restrictive: + # must be opening token, one of { ( [ ? + else { - # OLD Rules: - # Rule 1: Do not to break before an opening token - # Rule 2: avoid breaking at ternary operators - # (see b931, which is similar to the above print example) - # Rule 3: Do not break at chain operators to fix case b1119 - # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/' + # set breaks at ?/: if they will get separated (and are + # not a ?/: chain), or if the '?' is at the end of the + # line + if ( $token eq '?' ) { + my $i_colon = $mate_index_to_go[$i]; + if ( + !defined($i_colon) # the ':' is not in this batch + || $i == 0 # this '?' is the first token of the line + || $i == $max_index_to_go # or this '?' is the last token + ) + { - # NEW Rule, replaced above rules after case b1214: - # only break at one of the included types + # don't break if # this has a side comment, and + # don't break at a '?' if preceded by ':' on + # this line of previous ?/: pair on this line. + # This is an attempt to preserve a chain of ?/: + # expressions (elsif2.t). + if ( + ( + $i_last_colon < 0 + || $parent_seqno_to_go[$i_last_colon] != + $parent_seqno_to_go[$i] + ) + && $tokens_to_go[$max_index_to_go] ne '#' + ) + { + $self->set_forced_breakpoint($i); + } + $self->set_closing_breakpoint($i); + } + } - # Be sure to test any changes to these rules against runs - # with -l=0 such as the 'bbvt' test (perltidyrc_colin) - # series. - my $type_m = $types_to_go[$ibreak_m]; + # must be one of { ( [ + else { - # Switched from excluded to included for b1214. If necessary - # the token could also be checked if type_m eq 'k' - if ( $is_uncontained_comma_break_included_type{$type_m} ) { - $self->set_forced_breakpoint($ibreak); + # do requested -lp breaks at the OPENING token for BROKEN + # blocks. NOTE: this can be done for both -lp and -xlp, + # but only -xlp can really take advantage of this. So this + # is currently restricted to -xlp to avoid excess changes to + # existing -lp formatting. + if ( $rOpts_extended_line_up_parentheses + && !defined( $mate_index_to_go[$i] ) ) + { + my $lp_object = + $self->[_rlp_object_by_seqno_]->{$type_sequence}; + if ($lp_object) { + my $K_begin_line = $lp_object->get_K_begin_line(); + my $i_begin_line = $K_begin_line - $K_to_go[0]; + $self->set_forced_lp_break( $i_begin_line, $i ); } } } } return; - } ## end sub do_uncontained_comma_breaks + } ## end sub break_lists_type_sequence - my %is_logical_container; - my %quick_filter; + sub break_lists_increasing_depth { - BEGIN { - my @q = qw# if elsif unless while and or err not && | || ? : ! #; - @is_logical_container{@q} = (1) x scalar(@q); + my ($self) = @_; - # This filter will allow most tokens to skip past a section of code - %quick_filter = %is_assignment; - @q = qw# => . ; < > ~ #; - push @q, ','; - @quick_filter{@q} = (1) x scalar(@q); - } + #-------------------------------------------- + # prepare for a new list when depth increases + # token $i is a '(','{', or '[' + #-------------------------------------------- - sub set_for_semicolon_breakpoints { - my ( $self, $dd ) = @_; - foreach ( @{ $rfor_semicolon_list[$dd] } ) { - $self->set_forced_breakpoint($_); - } - return; - } + #---------------------------------------------------------- + # BEGIN initialize depth arrays + # ... use the same order as sub check_for_new_minimum_depth + #---------------------------------------------------------- + $type_sequence_stack[$depth] = $type_sequence; + + $override_cab3[$depth] = undef; + if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) { + $override_cab3[$depth] = + $self->[_roverride_cab3_]->{$type_sequence}; + } + + $breakpoint_stack[$depth] = $forced_breakpoint_count; + $container_type[$depth] = + + # k => && || ? : . + $is_container_label_type{$last_nonblank_type} + ? $last_nonblank_token + : EMPTY_STRING; + $identifier_count_stack[$depth] = 0; + $index_before_arrow[$depth] = -1; + $interrupted_list[$depth] = 0; + $item_count_stack[$depth] = 0; + $last_nonblank_type[$depth] = $last_nonblank_type; + $opening_structure_index_stack[$depth] = $i; + + $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; + $comma_index[$depth] = undef; + $last_comma_index[$depth] = undef; + $last_dot_index[$depth] = undef; + $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; + $has_old_logical_breakpoints[$depth] = 0; + $rand_or_list[$depth] = []; + $rfor_semicolon_list[$depth] = []; + $i_equals[$depth] = -1; + + # if line ends here then signal closing token to break + if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) { + $self->set_closing_breakpoint($i); + } + + # Not all lists of values should be vertically aligned.. + $dont_align[$depth] = + + # code BLOCKS are handled at a higher level + ##( $block_type ne EMPTY_STRING ) + $block_type + + # certain paren lists + || ( $type eq '(' ) && ( + + # it does not usually look good to align a list of + # identifiers in a parameter list, as in: + # my($var1, $var2, ...) + # (This test should probably be refined, for now I'm just + # testing for any keyword) + ( $last_nonblank_type eq 'k' ) + + # a trailing '(' usually indicates a non-list + || ( $next_nonblank_type eq '(' ) + ); + $has_broken_sublist[$depth] = 0; + $want_comma_break[$depth] = 0; - sub set_logical_breakpoints { - my ( $self, $dd ) = @_; + #---------------------------- + # END initialize depth arrays + #---------------------------- + + # patch to outdent opening brace of long if/for/.. + # statements (like this one). See similar coding in + # set_continuation breaks. We have also catch it here for + # short line fragments which otherwise will not go through + # break_long_lines. if ( - $item_count_stack[$dd] == 0 - && $is_logical_container{ $container_type[$dd] } + $block_type - || $has_old_logical_breakpoints[$dd] + # if we have the ')' but not its '(' in this batch.. + && ( $last_nonblank_token eq ')' ) + && !defined( $mate_index_to_go[$i_last_nonblank_token] ) + + # and user wants brace to left + && !$rOpts_opening_brace_always_on_right + + && ( $type eq '{' ) # should be true + && ( $token eq '{' ) # should be true ) { + $self->set_forced_breakpoint( $i - 1 ); + } - # Look for breaks in this order: - # 0 1 2 3 - # or and || && - foreach my $i ( 0 .. 3 ) { - if ( $rand_or_list[$dd][$i] ) { - foreach ( @{ $rand_or_list[$dd][$i] } ) { - $self->set_forced_breakpoint($_); - } + return; + } ## end sub break_lists_increasing_depth + + sub break_lists_decreasing_depth { + + my ( $self, $rbond_strength_bias ) = @_; + + # We have arrived at a closing container token in sub break_lists: + # the token at index $i is one of these: ')','}', ']' + # A number of important breakpoints for this container can now be set + # based on the information that we have collected. This includes: + # - breaks at commas to format tables + # - breaks at certain logical operators and other good breakpoints + # - breaks at opening and closing containers if needed by selected + # formatting styles + # These breaks are made by calling sub 'set_forced_breakpoint' + + $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] ) + if ( $depth < $minimum_depth ); + + # force all outer logical containers to break after we see on + # old breakpoint + $has_old_logical_breakpoints[$depth] ||= + $has_old_logical_breakpoints[$current_depth]; + + # Patch to break between ') {' if the paren list is broken. + # There is similar logic in break_long_lines for + # non-broken lists. + if ( $token eq ')' + && $next_nonblank_block_type + && $interrupted_list[$current_depth] + && $next_nonblank_type eq '{' + && !$rOpts_opening_brace_always_on_right ) + { + $self->set_forced_breakpoint($i); + } - # break at any 'if' and 'unless' too - foreach ( @{ $rand_or_list[$dd][4] } ) { - $self->set_forced_breakpoint($_); - } - $rand_or_list[$dd] = []; - last; - } - } +#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; + + #----------------------------------------------------------------- + # Set breaks at commas to display a table of values if appropriate + #----------------------------------------------------------------- + my ( $bp_count, $do_not_break_apart ) = ( 0, 0 ); + ( $bp_count, $do_not_break_apart ) = + $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias ) + if ( $item_count_stack[$current_depth] ); + + #----------------------------------------------------------- + # Now set flags needed to decide if we should break open the + # container ... This is a long rambling section which has + # grown over time to handle all situations. + #----------------------------------------------------------- + my $i_opening = $opening_structure_index_stack[$current_depth]; + my $saw_opening_structure = ( $i_opening >= 0 ); + my $lp_object; + if ( $rOpts_line_up_parentheses && $saw_opening_structure ) { + $lp_object = $self->[_rlp_object_by_seqno_] + ->{ $type_sequence_to_go[$i_opening] }; + } + + # this term is long if we had to break at interior commas.. + my $is_long_term = $bp_count > 0; + + # If this is a short container with one or more comma arrows, + # then we will mark it as a long term to open it if requested. + # $rOpts_comma_arrow_breakpoints = + # 0 - open only if comma precedes closing brace + # 1 - stable: except for one line blocks + # 2 - try to form 1 line blocks + # 3 - ignore => + # 4 - always open up if vt=0 + # 5 - stable: even for one line blocks if vt=0 + + my $cab_flag = $rOpts_comma_arrow_breakpoints; + + # replace -cab=3 if overriden + if ( $cab_flag == 3 && $type_sequence ) { + my $test_cab = $self->[_roverride_cab3_]->{$type_sequence}; + if ( defined($test_cab) ) { $cab_flag = $test_cab } + } + + # PATCH: Modify the -cab flag if we are not processing a list: + # We only want the -cab flag to apply to list containers, so + # for non-lists we use the default and stable -cab=5 value. + # Fixes case b939a. + if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} ) + { + $cab_flag = 5; + } + + # Ignore old breakpoints when under stress. + # Fixes b1203 b1204 as well as b1197-b1200. + # But not if -lp: fixes b1264, b1265. NOTE: rechecked with + # b1264 to see if this check is still required at all, and + # these still require a check, but at higher level beta+3 + # instead of beta: b1193 b780 + if ( $saw_opening_structure + && !$lp_object + && $levels_to_go[$i_opening] >= $high_stress_level ) + { + $cab_flag = 2; + + # Do not break hash braces under stress (fixes b1238) + $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L'; + + # This option fixes b1235, b1237, b1240 with old and new + # -lp, but formatting is nicer with next option. + ## $is_long_term ||= + ## $levels_to_go[$i_opening] > $stress_level_beta + 1; + + # This option fixes b1240 but not b1235, b1237 with new -lp, + # but this gives better formatting than the previous option. + # TODO: see if stress_level_alpha should also be considered + $do_not_break_apart ||= + $levels_to_go[$i_opening] > $stress_level_beta; } - return; - } ## end sub set_logical_breakpoints - sub is_unbreakable_container { + if ( !$is_long_term + && $saw_opening_structure + && $is_opening_token{ $tokens_to_go[$i_opening] } + && $index_before_arrow[ $depth + 1 ] > 0 + && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } ) + { + $is_long_term = + $cab_flag == 4 + || $cab_flag == 0 && $last_nonblank_token eq ',' + || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening]; + } - # never break a container of one of these types - # because bad things can happen (map1.t) - my $dd = shift; - return $is_sort_map_grep{ $container_type[$dd] }; - } + # mark term as long if the length between opening and closing + # parens exceeds allowed line length + if ( !$is_long_term && $saw_opening_structure ) { - sub break_lists { + my $i_opening_minus = $self->find_token_starting_list($i_opening); - my ( $self, $is_long_line, $rbond_strength_bias ) = @_; + my $excess = $self->excess_line_length( $i_opening_minus, $i ); - #---------------------------------------------------------------------- - # This routine is called once per batch, if the batch is a list, to set - # line breaks so that hierarchical structure can be displayed and so - # that list items can be vertically aligned. The output of this - # routine is stored in the array @forced_breakpoint_to_go, which is - # used by sub 'break_long_lines' to set final breakpoints. - #---------------------------------------------------------------------- + # Use standard spaces for indentation of lists in -lp mode + # if it gives a longer line length. This helps to avoid an + # instability due to forming and breaking one-line blocks. + # This fixes case b1314. + my $indentation = $leading_spaces_to_go[$i_opening_minus]; + if ( ref($indentation) + && $self->[_ris_broken_container_]->{$type_sequence} ) + { + my $lp_spaces = $indentation->get_spaces(); + my $std_spaces = $indentation->get_standard_spaces(); + my $diff = $std_spaces - $lp_spaces; + if ( $diff > 0 ) { $excess += $diff } + } - my $rLL = $self->[_rLL_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $ris_broken_container = $self->[_ris_broken_container_]; - my $rbreak_before_container_by_seqno = - $self->[_rbreak_before_container_by_seqno_]; + my $tol = $length_tol; - $starting_depth = $nesting_depth_to_go[0]; + # boost tol for an -lp container + if ( + $lp_tol_boost + && $lp_object + && ( $rOpts_extended_continuation_indentation + || !$self->[_ris_list_by_seqno_]->{$type_sequence} ) + ) + { + $tol += $lp_tol_boost; + } - $block_type = SPACE; - $current_depth = $starting_depth; - $i = -1; - $last_nonblank_token = ';'; - $last_nonblank_type = ';'; - $last_nonblank_block_type = SPACE; - $last_old_breakpoint_count = 0; - $minimum_depth = $current_depth + 1; # forces update in check below - $old_breakpoint_count = 0; - $starting_breakpoint_count = $forced_breakpoint_count; - $token = ';'; - $type = ';'; - $type_sequence = EMPTY_STRING; + # Patch to avoid blinking with -bbxi=2 and -cab=2 + # in which variations in -ci cause unstable formatting + # in edge cases. We just always add one ci level so that + # the formatting is independent of the -BBX results. + # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160 + # b1161 b1166 b1167 b1168 + if ( !$ci_levels_to_go[$i_opening] + && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence} + ) + { + $tol += $rOpts_continuation_indentation; + } - my $total_depth_variation = 0; - my $i_old_assignment_break; - my $depth_last = $starting_depth; - my $comma_follows_last_closing_token; + $is_long_term = $excess + $tol > 0; - $self->check_for_new_minimum_depth( $current_depth, - $parent_seqno_to_go[0] ); + } - my $want_previous_breakpoint = -1; + # We've set breaks after all comma-arrows. Now we have to + # undo them if this can be a one-line block + # (the only breakpoints set will be due to comma-arrows) - my $saw_good_breakpoint; - my $i_line_end = -1; - my $i_line_start = -1; - my $i_last_colon = -1; + if ( - #---------------------------------------- - # Main loop over all tokens in this batch - #---------------------------------------- - while ( ++$i <= $max_index_to_go ) { - if ( $type ne 'b' ) { - $i_last_nonblank_token = $i - 1; - $last_nonblank_type = $type; - $last_nonblank_token = $token; - $last_nonblank_block_type = $block_type; - } ## end if ( $type ne 'b' ) - $type = $types_to_go[$i]; - $block_type = $block_type_to_go[$i]; - $token = $tokens_to_go[$i]; - $type_sequence = $type_sequence_to_go[$i]; - my $next_type = $types_to_go[ $i + 1 ]; - my $next_token = $tokens_to_go[ $i + 1 ]; - my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); - $next_nonblank_type = $types_to_go[$i_next_nonblank]; - $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; - $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; + # user doesn't require breaking after all comma-arrows + ( $cab_flag != 0 ) && ( $cab_flag != 4 ) - # set break if flag was set - if ( $want_previous_breakpoint >= 0 ) { - $self->set_forced_breakpoint($want_previous_breakpoint); - $want_previous_breakpoint = -1; - } + # and if the opening structure is in this batch + && $saw_opening_structure - $last_old_breakpoint_count = $old_breakpoint_count; + # and either on the same old line + && ( + $old_breakpoint_count_stack[$current_depth] == + $last_old_breakpoint_count + + # or user wants to form long blocks with arrows + || $cab_flag == 2 + ) + + # and we made breakpoints between the opening and closing + && ( $breakpoint_undo_stack[$current_depth] < + $forced_breakpoint_undo_count ) + + # and this block is short enough to fit on one line + # Note: use < because need 1 more space for possible comma + && !$is_long_term + + ) + { + $self->undo_forced_breakpoint_stack( + $breakpoint_undo_stack[$current_depth] ); + } + + # now see if we have any comma breakpoints left + my $has_comma_breakpoints = + ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count ); + + # update broken-sublist flag of the outer container + $has_broken_sublist[$depth] = + $has_broken_sublist[$depth] + || $has_broken_sublist[$current_depth] + || $is_long_term + || $has_comma_breakpoints; + + # Having come to the closing ')', '}', or ']', now we have to decide + # if we should 'open up' the structure by placing breaks at the + # opening and closing containers. This is a tricky decision. Here + # are some of the basic considerations: + # + # -If this is a BLOCK container, then any breakpoints will have + # already been set (and according to user preferences), so we need do + # nothing here. + # + # -If we have a comma-separated list for which we can align the list + # items, then we need to do so because otherwise the vertical aligner + # cannot currently do the alignment. + # + # -If this container does itself contain a container which has been + # broken open, then it should be broken open to properly show the + # structure. + # + # -If there is nothing to align, and no other reason to break apart, + # then do not do it. + # + # We will not break open the parens of a long but 'simple' logical + # expression. For example: + # + # This is an example of a simple logical expression and its formatting: + # + # if ( $bigwasteofspace1 && $bigwasteofspace2 + # || $bigwasteofspace3 && $bigwasteofspace4 ) + # + # Most people would prefer this than the 'spacey' version: + # + # if ( + # $bigwasteofspace1 && $bigwasteofspace2 + # || $bigwasteofspace3 && $bigwasteofspace4 + # ) + # + # To illustrate the rules for breaking logical expressions, consider: + # + # FULLY DENSE: + # if ( $opt_excl + # and ( exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc )) + # + # This is on the verge of being difficult to read. The current + # default is to open it up like this: + # + # DEFAULT: + # if ( + # $opt_excl + # and ( exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc ) + # ) + # + # This is a compromise which tries to avoid being too dense and to + # spacey. A more spaced version would be: + # + # SPACEY: + # if ( + # $opt_excl + # and ( + # exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc + # ) + # ) + # + # Some people might prefer the spacey version -- an option could be + # added. The innermost expression contains a long block '( exists + # $ids_... ')'. + # + # Here is how the logic goes: We will force a break at the 'or' that + # the innermost expression contains, but we will not break apart its + # opening and closing containers because (1) it contains no + # multi-line sub-containers itself, and (2) there is no alignment to + # be gained by breaking it open like this + # + # and ( + # exists $ids_excl_uc{$id_uc} + # or grep $id_uc =~ /$_/, @ids_excl_uc + # ) + # + # (although this looks perfectly ok and might be good for long + # expressions). The outer 'if' container, though, contains a broken + # sub-container, so it will be broken open to avoid too much density. + # Also, since it contains no 'or's, there will be a forced break at + # its 'and'. + + # Handle the experimental flag --break-open-compact-parens + # NOTE: This flag is not currently used and may eventually be removed. + # If this flag is set, we will implement it by + # pretending we did not see the opening structure, since in that case + # parens always get opened up. + if ( $saw_opening_structure + && $rOpts_break_open_compact_parens ) + { - # Fixed for case b1097 to not consider old breaks at highly - # stressed locations, such as types 'L' and 'R'. It might be - # useful to generalize this concept in the future by looking at - # actual bond strengths. - if ( $old_breakpoint_to_go[$i] - && $type ne 'L' - && $next_nonblank_type ne 'R' ) + # This parameter is a one-character flag, as follows: + # '0' matches no parens -> break open NOT OK + # '1' matches all parens -> break open OK + # Other values are same as used by the weld-exclusion-list + my $flag = $rOpts_break_open_compact_parens; + if ( $flag eq '*' + || $flag eq '1' ) { - $i_line_end = $i; - $i_line_start = $i_next_nonblank; - - $old_breakpoint_count++; - - # Break before certain keywords if user broke there and - # this is a 'safe' break point. The idea is to retain - # any preferred breaks for sequential list operations, - # like a schwartzian transform. - if ($rOpts_break_at_old_keyword_breakpoints) { - if ( - $next_nonblank_type eq 'k' - && $is_keyword_returning_list{$next_nonblank_token} - && ( $type =~ /^[=\)\]\}Riw]$/ - || $type eq 'k' - && $is_keyword_returning_list{$token} ) - ) - { + $saw_opening_structure = 0; + } + else { - # we actually have to set this break next time through - # the loop because if we are at a closing token (such - # as '}') which forms a one-line block, this break might - # get undone. + # NOTE: $seqno will be equal to closure var $type_sequence here + my $seqno = $type_sequence_to_go[$i_opening]; + $saw_opening_structure = + !$self->match_paren_control_flag( $seqno, $flag ); + } + } - # And do not do this at an equals if the user wants - # breaks before an equals (blinker cases b434 b903) - unless ( $type eq '=' && $want_break_before{$type} ) { - $want_previous_breakpoint = $i; - } - } ## end if ( $next_nonblank_type...) - } ## end if ($rOpts_break_at_old_keyword_breakpoints) + # Set some more flags telling something about this container.. + my $is_simple_logical_expression; + if ( $item_count_stack[$current_depth] == 0 + && $saw_opening_structure + && $tokens_to_go[$i_opening] eq '(' + && $is_logical_container{ $container_type[$current_depth] } ) + { - # Break before attributes if user broke there - if ($rOpts_break_at_old_attribute_breakpoints) { - if ( $next_nonblank_type eq 'A' ) { - $want_previous_breakpoint = $i; - } - } + # This seems to be a simple logical expression with + # no existing breakpoints. Set a flag to prevent + # opening it up. + if ( !$has_comma_breakpoints ) { + $is_simple_logical_expression = 1; + } - # remember an = break as possible good break point - if ( $is_assignment{$type} ) { - $i_old_assignment_break = $i; - } - elsif ( $is_assignment{$next_nonblank_type} ) { - $i_old_assignment_break = $i_next_nonblank; - } - } ## end if ( $old_breakpoint_to_go...) + #--------------------------------------------------- + # This seems to be a simple logical expression with + # breakpoints (broken sublists, for example). Break + # at all 'or's and '||'s. + #--------------------------------------------------- + else { + $self->set_logical_breakpoints($current_depth); + } + } - next if ( $type eq 'b' ); - $depth = $nesting_depth_to_go[ $i + 1 ]; + # break long terms at any C-style for semicolons (c154) + if ( $is_long_term + && @{ $rfor_semicolon_list[$current_depth] } ) + { + $self->set_for_semicolon_breakpoints($current_depth); - $total_depth_variation += abs( $depth - $depth_last ); - $depth_last = $depth; + # and open up a long 'for' or 'foreach' container to allow + # leading term alignment unless -lp is used. + $has_comma_breakpoints = 1 unless ($lp_object); + } - # safety check - be sure we always break after a comment - # Shouldn't happen .. an error here probably means that the - # nobreak flag did not get turned off correctly during - # formatting. - if ( $type eq '#' ) { - if ( $i != $max_index_to_go ) { - if (DEVEL_MODE) { - Fault(<set_forced_breakpoint($i); - } ## end if ( $i != $max_index_to_go) - } ## end if ( $type eq '#' ) + #---------------------------------------------------------------- + # FINALLY: Break open container according to the flags which have + # been set. + #---------------------------------------------------------------- + if ( - # Force breakpoints at certain tokens in long lines. - # Note that such breakpoints will be undone later if these tokens - # are fully contained within parens on a line. - if ( + # breaks for code BLOCKS are handled at a higher level + !$block_type - # break before a keyword within a line - $type eq 'k' - && $i > 0 + # we do not need to break at the top level of an 'if' + # type expression + && !$is_simple_logical_expression - # if one of these keywords: - && $is_if_unless_while_until_for_foreach{$token} + ## modification to keep ': (' containers vertically tight; + ## but probably better to let user set -vt=1 to avoid + ## inconsistency with other paren types + ## && ($container_type[$current_depth] ne ':') - # but do not break at something like '1 while' - && ( $last_nonblank_type ne 'n' || $i > 2 ) + # otherwise, we require one of these reasons for breaking: + && ( - # and let keywords follow a closing 'do' brace - && $last_nonblank_block_type ne 'do' + # - this term has forced line breaks + $has_comma_breakpoints - && ( - $is_long_line + # - the opening container is separated from this batch + # for some reason (comment, blank line, code block) + # - this is a non-paren container spanning multiple lines + || !$saw_opening_structure - # or container is broken (by side-comment, etc) - || ( $next_nonblank_token eq '(' - && $mate_index_to_go[$i_next_nonblank] < $i ) - ) - ) - { - $self->set_forced_breakpoint( $i - 1 ); - } ## end if ( $type eq 'k' && $i...) + # - this is a long block contained in another breakable + # container + || $is_long_term && !$self->is_in_block_by_i($i_opening) + ) + ) + { - # remember locations of '||' and '&&' for possible breaks if we - # decide this is a long logical expression. - if ( $type eq '||' ) { - push @{ $rand_or_list[$depth][2] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end elsif ( $type eq '||' ) - elsif ( $type eq '&&' ) { - push @{ $rand_or_list[$depth][3] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end elsif ( $type eq '&&' ) - elsif ( $type eq 'f' ) { - push @{ $rfor_semicolon_list[$depth] }, $i; + # do special -lp breaks at the CLOSING token for INTACT + # blocks (because we might not do them if the block does + # not break open) + if ($lp_object) { + my $K_begin_line = $lp_object->get_K_begin_line(); + my $i_begin_line = $K_begin_line - $K_to_go[0]; + $self->set_forced_lp_break( $i_begin_line, $i_opening ); } - elsif ( $type eq 'k' ) { - if ( $token eq 'and' ) { - push @{ $rand_or_list[$depth][1] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - } ## end if ( $token eq 'and' ) - # break immediately at 'or's which are probably not in a logical - # block -- but we will break in logical breaks below so that - # they do not add to the forced_breakpoint_count - elsif ( $token eq 'or' ) { - push @{ $rand_or_list[$depth][0] }, $i; - ++$has_old_logical_breakpoints[$depth] - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ); - if ( $is_logical_container{ $container_type[$depth] } ) { - } - else { - if ($is_long_line) { $self->set_forced_breakpoint($i) } - elsif ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) + # break after opening structure. + # note: break before closing structure will be automatic + if ( $minimum_depth <= $current_depth ) { + + if ( $i_opening >= 0 ) { + if ( !$do_not_break_apart + && !is_unbreakable_container($current_depth) ) + { + $self->set_forced_breakpoint($i_opening); + + # Do not let brace types L/R use vertical tightness + # flags to recombine if we have to break on length + # because instability is possible if both vt and vtc + # flags are set ... see issue b1444. + if ( $is_long_term + && $types_to_go[$i_opening] eq 'L' + && $opening_vertical_tightness{'{'} + && $closing_vertical_tightness{'}'} ) { - $saw_good_breakpoint = 1; + my $seqno = $type_sequence_to_go[$i_opening]; + if ($seqno) { + $self->[_rbreak_container_]->{$seqno} = 1; + } } - } ## end else [ if ( $is_logical_container...)] - } ## end elsif ( $token eq 'or' ) - elsif ( $token eq 'if' || $token eq 'unless' ) { - push @{ $rand_or_list[$depth][4] }, $i; - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_logical_breakpoints ) - { - $self->set_forced_breakpoint($i); } - } ## end elsif ( $token eq 'if' ||...) - } ## end elsif ( $type eq 'k' ) - elsif ( $is_assignment{$type} ) { - $i_equals[$depth] = $i; - } - - if ($type_sequence) { - - # handle any postponed closing breakpoints - if ( $is_closing_sequence_token{$token} ) { - if ( $type eq ':' ) { - $i_last_colon = $i; + } - # retain break at a ':' line break - if ( ( $i == $i_line_start || $i == $i_line_end ) - && $rOpts_break_at_old_ternary_breakpoints - && $levels_to_go[$i] < $list_stress_level ) - { + # break at ',' of lower depth level before opening token + if ( $last_comma_index[$depth] ) { + $self->set_forced_breakpoint( $last_comma_index[$depth] ); + } - $self->set_forced_breakpoint($i); + # break at '.' of lower depth level before opening token + if ( $last_dot_index[$depth] ) { + $self->set_forced_breakpoint( $last_dot_index[$depth] ); + } - # Break at a previous '=', but only if it is before - # the mating '?'. Mate_index test fixes b1287. - my $ieq = $i_equals[$depth]; - if ( $ieq > 0 && $ieq < $mate_index_to_go[$i] ) { - $self->set_forced_breakpoint( - $i_equals[$depth] ); - $i_equals[$depth] = -1; - } - } ## end if ( ( $i == $i_line_start...)) - } ## end if ( $type eq ':' ) - if ( has_postponed_breakpoint($type_sequence) ) { - my $inc = ( $type eq ':' ) ? 0 : 1; - if ( $i >= $inc ) { - $self->set_forced_breakpoint( $i - $inc ); - } - } - } ## end if ( $is_closing_sequence_token{$token} ) + # break before opening structure if preceded by another + # closing structure and a comma. This is normally + # done by the previous closing brace, but not + # if it was a one-line block. + if ( $i_opening > 2 ) { + my $i_prev = + ( $types_to_go[ $i_opening - 1 ] eq 'b' ) + ? $i_opening - 2 + : $i_opening - 1; - # set breaks at ?/: if they will get separated (and are - # not a ?/: chain), or if the '?' is at the end of the - # line - elsif ( $token eq '?' ) { - my $i_colon = $mate_index_to_go[$i]; + my $type_prev = $types_to_go[$i_prev]; + my $token_prev = $tokens_to_go[$i_prev]; if ( - $i_colon <= 0 # the ':' is not in this batch - || $i == 0 # this '?' is the first token of the line - || $i == - $max_index_to_go # or this '?' is the last token + $type_prev eq ',' + && ( $types_to_go[ $i_prev - 1 ] eq ')' + || $types_to_go[ $i_prev - 1 ] eq '}' ) ) { + $self->set_forced_breakpoint($i_prev); + } - # don't break if # this has a side comment, and - # don't break at a '?' if preceded by ':' on - # this line of previous ?/: pair on this line. - # This is an attempt to preserve a chain of ?/: - # expressions (elsif2.t). - if ( - ( - $i_last_colon < 0 - || $parent_seqno_to_go[$i_last_colon] != - $parent_seqno_to_go[$i] - ) - && $tokens_to_go[$max_index_to_go] ne '#' - ) - { - $self->set_forced_breakpoint($i); - } - $self->set_closing_breakpoint($i); - } ## end if ( $i_colon <= 0 ||...) - } ## end elsif ( $token eq '?' ) - - elsif ( $is_opening_token{$token} ) { - - # do requested -lp breaks at the OPENING token for BROKEN - # blocks. NOTE: this can be done for both -lp and -xlp, - # but only -xlp can really take advantage of this. So this - # is currently restricted to -xlp to avoid excess changes to - # existing -lp formatting. - if ( $rOpts_extended_line_up_parentheses - && $mate_index_to_go[$i] < 0 ) + # also break before something like ':(' or '?(' + # if appropriate. + elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/ + && $want_break_before{$token_prev} ) { - my $lp_object = - $self->[_rlp_object_by_seqno_]->{$type_sequence}; - if ($lp_object) { - my $K_begin_line = $lp_object->get_K_begin_line(); - my $i_begin_line = $K_begin_line - $K_to_go[0]; - $self->set_forced_lp_break( $i_begin_line, $i ); - } + $self->set_forced_breakpoint($i_prev); } } + } - } ## end if ($type_sequence) + # break after comma following closing structure + if ( $types_to_go[ $i + 1 ] eq ',' ) { + $self->set_forced_breakpoint( $i + 1 ); + } -#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; + # break before an '=' following closing structure + if ( + $is_assignment{$next_nonblank_type} + && ( $breakpoint_stack[$current_depth] != + $forced_breakpoint_count ) + ) + { + $self->set_forced_breakpoint($i); + } - #------------------------------------------------------------ - # Handle Increasing Depth.. - # - # prepare for a new list when depth increases - # token $i is a '(','{', or '[' - #------------------------------------------------------------ - # hardened against bad input syntax: depth jump must be 1 and type - # must be opening..fixes c102 - if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) { + # break at any comma before the opening structure Added + # for -lp, but seems to be good in general. It isn't + # obvious how far back to look; the '5' below seems to + # work well and will catch the comma in something like + # push @list, myfunc( $param, $param, .. - #---------------------------------------------------------- - # BEGIN initialize depth arrays - # ... use the same order as sub check_for_new_minimum_depth - #---------------------------------------------------------- - $type_sequence_stack[$depth] = $type_sequence; - $override_cab3[$depth] = - $rOpts_comma_arrow_breakpoints == 3 - && $type_sequence - && $self->[_roverride_cab3_]->{$type_sequence}; - - $breakpoint_stack[$depth] = $forced_breakpoint_count; - $container_type[$depth] = - - # k => && || ? : . - $is_container_label_type{$last_nonblank_type} - ? $last_nonblank_token - : EMPTY_STRING; - $identifier_count_stack[$depth] = 0; - $index_before_arrow[$depth] = -1; - $interrupted_list[$depth] = 0; - $item_count_stack[$depth] = 0; - $last_nonblank_type[$depth] = $last_nonblank_type; - $opening_structure_index_stack[$depth] = $i; - - $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; - $comma_index[$depth] = undef; - $last_comma_index[$depth] = undef; - $last_dot_index[$depth] = undef; - $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; - $has_old_logical_breakpoints[$depth] = 0; - $rand_or_list[$depth] = []; - $rfor_semicolon_list[$depth] = []; - $i_equals[$depth] = -1; - - # if line ends here then signal closing token to break - if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) - { - $self->set_closing_breakpoint($i); + my $icomma = $last_comma_index[$depth]; + if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { + unless ( $forced_breakpoint_to_go[$icomma] ) { + $self->set_forced_breakpoint($icomma); } + } + } - # Not all lists of values should be vertically aligned.. - $dont_align[$depth] = - - # code BLOCKS are handled at a higher level - ( $block_type ne EMPTY_STRING ) + #----------------------------------------------------------- + # Break open a logical container open if it was already open + #----------------------------------------------------------- + elsif ($is_simple_logical_expression + && $has_old_logical_breakpoints[$current_depth] ) + { + $self->set_logical_breakpoints($current_depth); + } - # certain paren lists - || ( $type eq '(' ) && ( + # Handle long container which does not get opened up + elsif ($is_long_term) { - # it does not usually look good to align a list of - # identifiers in a parameter list, as in: - # my($var1, $var2, ...) - # (This test should probably be refined, for now I'm just - # testing for any keyword) - ( $last_nonblank_type eq 'k' ) + # must set fake breakpoint to alert outer containers that + # they are complex + set_fake_breakpoint(); + } - # a trailing '(' usually indicates a non-list - || ( $next_nonblank_type eq '(' ) - ); - $has_broken_sublist[$depth] = 0; - $want_comma_break[$depth] = 0; + return; + } ## end sub break_lists_decreasing_depth +} ## end closure break_lists - #------------------------------------- - # END initialize depth arrays - #------------------------------------- +my %is_kwiZ; +my %is_key_type; - # patch to outdent opening brace of long if/for/.. - # statements (like this one). See similar coding in - # set_continuation breaks. We have also catch it here for - # short line fragments which otherwise will not go through - # break_long_lines. - if ( - $block_type +BEGIN { - # if we have the ')' but not its '(' in this batch.. - && ( $last_nonblank_token eq ')' ) - && $mate_index_to_go[$i_last_nonblank_token] < 0 + # Added 'w' to fix b1172 + my @q = qw(k w i Z ->); + @is_kwiZ{@q} = (1) x scalar(@q); - # and user wants brace to left - && !$rOpts_opening_brace_always_on_right + # added = for b1211 + @q = qw<( [ { L R } ] ) = b>; + push @q, ','; + @is_key_type{@q} = (1) x scalar(@q); +} ## end BEGIN - && ( $type eq '{' ) # should be true - && ( $token eq '{' ) # should be true - ) - { - $self->set_forced_breakpoint( $i - 1 ); - } ## end if ( $block_type && ( ...)) - } ## end if ( $depth > $current_depth) +use constant DEBUG_FIND_START => 0; - #------------------------------------------------------------ - # Handle Decreasing Depth.. - # - # finish off any old list when depth decreases - # token $i is a ')','}', or ']' - #------------------------------------------------------------ - # hardened against bad input syntax: depth jump must be 1 and type - # must be closing .. fixes c102 - elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) { +sub find_token_starting_list { - $self->check_for_new_minimum_depth( $depth, - $parent_seqno_to_go[$i] ); + # When testing to see if a block will fit on one line, some + # previous token(s) may also need to be on the line; particularly + # if this is a sub call. So we will look back at least one + # token. + my ( $self, $i_opening_paren ) = @_; - $comma_follows_last_closing_token = - $next_nonblank_type eq ',' || $next_nonblank_type eq '=>'; + # This will be the return index + my $i_opening_minus = $i_opening_paren; - # force all outer logical containers to break after we see on - # old breakpoint - $has_old_logical_breakpoints[$depth] ||= - $has_old_logical_breakpoints[$current_depth]; - - # Patch to break between ') {' if the paren list is broken. - # There is similar logic in break_long_lines for - # non-broken lists. - if ( $token eq ')' - && $next_nonblank_block_type - && $interrupted_list[$current_depth] - && $next_nonblank_type eq '{' - && !$rOpts_opening_brace_always_on_right ) - { - $self->set_forced_breakpoint($i); - } ## end if ( $token eq ')' && ... + if ( $i_opening_minus <= 0 ) { + return $i_opening_minus; + } -#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; + my $im1 = $i_opening_paren - 1; + my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] ); + if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) { + $iprev_nb -= 1; + $type_prev_nb = $types_to_go[$iprev_nb]; + } - # set breaks at commas if necessary - my ( $bp_count, $do_not_break_apart ) = - $self->set_comma_breakpoints( $current_depth, - $rbond_strength_bias ); + if ( $type_prev_nb eq ',' ) { - my $i_opening = $opening_structure_index_stack[$current_depth]; - my $saw_opening_structure = ( $i_opening >= 0 ); - my $lp_object; - if ( $rOpts_line_up_parentheses && $saw_opening_structure ) { - $lp_object = $self->[_rlp_object_by_seqno_] - ->{ $type_sequence_to_go[$i_opening] }; - } - - # this term is long if we had to break at interior commas.. - my $is_long_term = $bp_count > 0; - - # If this is a short container with one or more comma arrows, - # then we will mark it as a long term to open it if requested. - # $rOpts_comma_arrow_breakpoints = - # 0 - open only if comma precedes closing brace - # 1 - stable: except for one line blocks - # 2 - try to form 1 line blocks - # 3 - ignore => - # 4 - always open up if vt=0 - # 5 - stable: even for one line blocks if vt=0 - - # PATCH: Modify the -cab flag if we are not processing a list: - # We only want the -cab flag to apply to list containers, so - # for non-lists we use the default and stable -cab=5 value. - # Fixes case b939a. - my $cab_flag = $rOpts_comma_arrow_breakpoints; - if ( $type_sequence && !$ris_list_by_seqno->{$type_sequence} ) { - $cab_flag = 5; - } - - # Ignore old breakpoints when under stress. - # Fixes b1203 b1204 as well as b1197-b1200. - # But not if -lp: fixes b1264, b1265. NOTE: rechecked with - # b1264 to see if this check is still required at all, and - # these still require a check, but at higher level beta+3 - # instead of beta: b1193 b780 - if ( $saw_opening_structure - && !$lp_object - && $levels_to_go[$i_opening] >= $list_stress_level ) - { - $cab_flag = 2; + # a previous comma is a good break point + # $i_opening_minus = $i_opening_paren; + } - # Do not break hash braces under stress (fixes b1238) - $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L'; + elsif ( + $tokens_to_go[$i_opening_paren] eq '(' - # This option fixes b1235, b1237, b1240 with old and new - # -lp, but formatting is nicer with next option. - ## $is_long_term ||= - ## $levels_to_go[$i_opening] > $stress_level_beta + 1; + # non-parens added here to fix case b1186 + || $is_kwiZ{$type_prev_nb} + ) + { + $i_opening_minus = $im1; - # This option fixes b1240 but not b1235, b1237 with new -lp, - # but this gives better formatting than the previous option. - $do_not_break_apart ||= - $levels_to_go[$i_opening] > $stress_level_beta; - } + # Walk back to improve length estimate... + # FIX for cases b1169 b1170 b1171: start walking back + # at the previous nonblank. This makes the result insensitive + # to the flag --space-function-paren, and similar. + # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) { + foreach my $j ( reverse( 0 .. $iprev_nb ) ) { + if ( $is_key_type{ $types_to_go[$j] } ) { - if ( !$is_long_term - && $saw_opening_structure - && $is_opening_token{ $tokens_to_go[$i_opening] } - && $index_before_arrow[ $depth + 1 ] > 0 - && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } - ) - { - $is_long_term = - $cab_flag == 4 - || $cab_flag == 0 && $last_nonblank_token eq ',' - || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening]; - } ## end if ( !$is_long_term &&...) - - # mark term as long if the length between opening and closing - # parens exceeds allowed line length - if ( !$is_long_term && $saw_opening_structure ) { - - my $i_opening_minus = - $self->find_token_starting_list($i_opening); - - my $excess = - $self->excess_line_length( $i_opening_minus, $i ); - - # Use standard spaces for indentation of lists in -lp mode - # if it gives a longer line length. This helps to avoid an - # instability due to forming and breaking one-line blocks. - # This fixes case b1314. - my $indentation = $leading_spaces_to_go[$i_opening_minus]; - if ( ref($indentation) - && $ris_broken_container->{$type_sequence} ) - { - my $lp_spaces = $indentation->get_spaces(); - my $std_spaces = $indentation->get_standard_spaces(); - my $diff = $std_spaces - $lp_spaces; - if ( $diff > 0 ) { $excess += $diff } - } + # fix for b1211 + if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j } + last; + } + $i_opening_minus = $j; + } + if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } + } - my $tol = $length_tol; + DEBUG_FIND_START && print < im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus] +EOM - # boost tol for an -lp container - if ( - $lp_tol_boost - && $lp_object - && ( $rOpts_extended_continuation_indentation - || !$ris_list_by_seqno->{$type_sequence} ) - ) - { - $tol += $lp_tol_boost; - } + return $i_opening_minus; +} ## end sub find_token_starting_list - # Patch to avoid blinking with -bbxi=2 and -cab=2 - # in which variations in -ci cause unstable formatting - # in edge cases. We just always add one ci level so that - # the formatting is independent of the -BBX results. - # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160 - # b1161 b1166 b1167 b1168 - if ( !$ci_levels_to_go[$i_opening] - && $rbreak_before_container_by_seqno->{$type_sequence} ) - { - $tol += $rOpts->{'continuation-indentation'}; - } +{ ## begin closure table_maker - $is_long_term = $excess + $tol > 0; + my %is_keyword_with_special_leading_term; - } ## end if ( !$is_long_term &&...) + BEGIN { - # We've set breaks after all comma-arrows. Now we have to - # undo them if this can be a one-line block - # (the only breakpoints set will be due to comma-arrows) + # These keywords have prototypes which allow a special leading item + # followed by a list + my @q = qw( + chmod + formline + grep + join + kill + map + pack + printf + push + sprintf + unshift + ); + @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q); + } ## end BEGIN - if ( + use constant DEBUG_SPARSE => 0; - # user doesn't require breaking after all comma-arrows - ( $cab_flag != 0 ) && ( $cab_flag != 4 ) + sub table_maker { - # and if the opening structure is in this batch - && $saw_opening_structure + # Given a list of comma-separated items, set breakpoints at some of + # the commas, if necessary, to make it easy to read. + # This is done by making calls to 'set_forced_breakpoint'. + # This is a complex routine because there are many special cases. - # and either on the same old line - && ( - $old_breakpoint_count_stack[$current_depth] == - $last_old_breakpoint_count + # Returns: nothing - # or user wants to form long blocks with arrows - || $cab_flag == 2 + # The numerous variables involved are contained three hashes: + # $rhash_IN : For contents see the calling routine + # $rhash_A: For contents see return from sub 'table_layout_A' + # $rhash_B: For contents see return from sub 'table_layout_B' - # if -cab=3 is overridden then use -cab=2 behavior - || $cab_flag == 3 && $override_cab3[$current_depth] - ) + my ( $self, $rhash_IN ) = @_; - # and we made breakpoints between the opening and closing - && ( $breakpoint_undo_stack[$current_depth] < - $forced_breakpoint_undo_count ) + # Find lengths of all list items needed for calculating page layout + my $rhash_A = table_layout_A($rhash_IN); + return if ( !defined($rhash_A) ); - # and this block is short enough to fit on one line - # Note: use < because need 1 more space for possible comma - && !$is_long_term + # Some variables received from caller... + my $i_closing_paren = $rhash_IN->{i_closing_paren}; + my $i_opening_paren = $rhash_IN->{i_opening_paren}; + my $has_broken_sublist = $rhash_IN->{has_broken_sublist}; + my $interrupted = $rhash_IN->{interrupted}; - ) - { - $self->undo_forced_breakpoint_stack( - $breakpoint_undo_stack[$current_depth] ); - } ## end if ( ( $rOpts_comma_arrow_breakpoints...)) - - # now see if we have any comma breakpoints left - my $has_comma_breakpoints = - ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ); - - # update broken-sublist flag of the outer container - $has_broken_sublist[$depth] = - $has_broken_sublist[$depth] - || $has_broken_sublist[$current_depth] - || $is_long_term - || $has_comma_breakpoints; - -# Having come to the closing ')', '}', or ']', now we have to decide if we -# should 'open up' the structure by placing breaks at the opening and -# closing containers. This is a tricky decision. Here are some of the -# basic considerations: -# -# -If this is a BLOCK container, then any breakpoints will have already -# been set (and according to user preferences), so we need do nothing here. -# -# -If we have a comma-separated list for which we can align the list items, -# then we need to do so because otherwise the vertical aligner cannot -# currently do the alignment. -# -# -If this container does itself contain a container which has been broken -# open, then it should be broken open to properly show the structure. -# -# -If there is nothing to align, and no other reason to break apart, -# then do not do it. -# -# We will not break open the parens of a long but 'simple' logical expression. -# For example: -# -# This is an example of a simple logical expression and its formatting: -# -# if ( $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 ) -# -# Most people would prefer this than the 'spacey' version: -# -# if ( -# $bigwasteofspace1 && $bigwasteofspace2 -# || $bigwasteofspace3 && $bigwasteofspace4 -# ) -# -# To illustrate the rules for breaking logical expressions, consider: -# -# FULLY DENSE: -# if ( $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc )) -# -# This is on the verge of being difficult to read. The current default is to -# open it up like this: -# -# DEFAULT: -# if ( -# $opt_excl -# and ( exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc ) -# ) -# -# This is a compromise which tries to avoid being too dense and to spacey. -# A more spaced version would be: -# -# SPACEY: -# if ( -# $opt_excl -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# ) -# -# Some people might prefer the spacey version -- an option could be added. The -# innermost expression contains a long block '( exists $ids_... ')'. -# -# Here is how the logic goes: We will force a break at the 'or' that the -# innermost expression contains, but we will not break apart its opening and -# closing containers because (1) it contains no multi-line sub-containers itself, -# and (2) there is no alignment to be gained by breaking it open like this -# -# and ( -# exists $ids_excl_uc{$id_uc} -# or grep $id_uc =~ /$_/, @ids_excl_uc -# ) -# -# (although this looks perfectly ok and might be good for long expressions). The -# outer 'if' container, though, contains a broken sub-container, so it will be -# broken open to avoid too much density. Also, since it contains no 'or's, there -# will be a forced break at its 'and'. - - # Open-up if parens if requested. We do this by pretending we - # did not see the opening structure, since in that case parens - # always get opened up. - if ( $saw_opening_structure - && $rOpts_break_open_compact_parens ) - { + #----------------------------------------- + # Section A: Handle some special cases ... + #----------------------------------------- - # This parameter is a one-character flag, as follows: - # '0' matches no parens -> break open NOT OK - # '1' matches all parens -> break open OK - # Other values are same as used by the weld-exclusion-list - my $flag = $rOpts_break_open_compact_parens; - if ( $flag eq '*' - || $flag eq '1' ) - { - $saw_opening_structure = 0; - } - else { - my $KK = $K_to_go[$i_opening]; - $saw_opening_structure = - !$self->match_paren_flag( $KK, $flag ); - } - } + #------------------------------------------------------------- + # Special Case A1: Compound List Rule 1: + # Break at (almost) every comma for a list containing a broken + # sublist. This has higher priority than the Interrupted List + # Rule. + #------------------------------------------------------------- + if ($has_broken_sublist) { - # set some flags telling something about this container.. - my $is_simple_logical_expression = 0; - if ( $item_count_stack[$current_depth] == 0 - && $saw_opening_structure - && $tokens_to_go[$i_opening] eq '(' - && $is_logical_container{ $container_type[$current_depth] } - ) - { + $self->apply_broken_sublist_rule( $rhash_A, $interrupted ); - # This seems to be a simple logical expression with - # no existing breakpoints. Set a flag to prevent - # opening it up. - if ( !$has_comma_breakpoints ) { - $is_simple_logical_expression = 1; - } + return; + } - # This seems to be a simple logical expression with - # breakpoints (broken sublists, for example). Break - # at all 'or's and '||'s. - else { - $self->set_logical_breakpoints($current_depth); - } - } ## end if ( $item_count_stack...) + #-------------------------------------------------------------- + # Special Case A2: Interrupted List Rule: + # A list is forced to use old breakpoints if it was interrupted + # by side comments or blank lines, or requested by user. + #-------------------------------------------------------------- + if ( $rOpts_break_at_old_comma_breakpoints + || $interrupted + || $i_opening_paren < 0 ) + { + my $i_first_comma = $rhash_A->{_i_first_comma}; + my $i_true_last_comma = $rhash_A->{_i_true_last_comma}; + $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); + return; + } - if ( $is_long_term - && @{ $rfor_semicolon_list[$current_depth] } ) - { - $self->set_for_semicolon_breakpoints($current_depth); + #----------------------------------------------------------------- + # Special Case A3: If it fits on one line, return and let the line + # break logic decide if and where to break. + #----------------------------------------------------------------- - # open up a long 'for' or 'foreach' container to allow - # leading term alignment unless -lp is used. - $has_comma_breakpoints = 1 unless ($lp_object); - } ## end if ( $is_long_term && ...) + # The -bbxi=2 parameters can add an extra hidden level of indentation + # so they need a tolerance to avoid instability. Fixes b1259, 1260. + my $opening_token = $tokens_to_go[$i_opening_paren]; + my $tol = 0; + if ( $break_before_container_types{$opening_token} + && $container_indentation_options{$opening_token} + && $container_indentation_options{$opening_token} == 2 ) + { + $tol = $rOpts_indent_columns; - if ( + # use greater of -ci and -i (fix for case b1334) + if ( $tol < $rOpts_continuation_indentation ) { + $tol = $rOpts_continuation_indentation; + } + } - # breaks for code BLOCKS are handled at a higher level - !$block_type + my $i_opening_minus = $self->find_token_starting_list($i_opening_paren); + my $excess = + $self->excess_line_length( $i_opening_minus, $i_closing_paren ); + return if ( $excess + $tol <= 0 ); - # we do not need to break at the top level of an 'if' - # type expression - && !$is_simple_logical_expression + #--------------------------------------- + # Section B: Handle a multiline list ... + #--------------------------------------- - ## modification to keep ': (' containers vertically tight; - ## but probably better to let user set -vt=1 to avoid - ## inconsistency with other paren types - ## && ($container_type[$current_depth] ne ':') + $self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus ); + return; - # otherwise, we require one of these reasons for breaking: - && ( + } ## end sub table_maker - # - this term has forced line breaks - $has_comma_breakpoints + sub apply_broken_sublist_rule { - # - the opening container is separated from this batch - # for some reason (comment, blank line, code block) - # - this is a non-paren container spanning multiple lines - || !$saw_opening_structure + my ( $self, $rhash_A, $interrupted ) = @_; - # - this is a long block contained in another breakable - # container - || $is_long_term && !$self->is_in_block_by_i($i_opening) - ) - ) - { + my $ritem_lengths = $rhash_A->{_ritem_lengths}; + my $ri_term_begin = $rhash_A->{_ri_term_begin}; + my $ri_term_end = $rhash_A->{_ri_term_end}; + my $ri_term_comma = $rhash_A->{_ri_term_comma}; + my $item_count = $rhash_A->{_item_count_A}; + my $i_first_comma = $rhash_A->{_i_first_comma}; + my $i_true_last_comma = $rhash_A->{_i_true_last_comma}; - # do special -lp breaks at the CLOSING token for INTACT - # blocks (because we might not do them if the block does - # not break open) - if ($lp_object) { - my $K_begin_line = $lp_object->get_K_begin_line(); - my $i_begin_line = $K_begin_line - $K_to_go[0]; - $self->set_forced_lp_break( $i_begin_line, $i_opening ); - } + # Break at every comma except for a comma between two + # simple, small terms. This prevents long vertical + # columns of, say, just 0's. + my $small_length = 10; # 2 + actual maximum length wanted - # break after opening structure. - # note: break before closing structure will be automatic - if ( $minimum_depth <= $current_depth ) { + # We'll insert a break in long runs of small terms to + # allow alignment in uniform tables. + my $skipped_count = 0; + my $columns = table_columns_available($i_first_comma); + my $fields = int( $columns / $small_length ); + if ( $rOpts_maximum_fields_per_table + && $fields > $rOpts_maximum_fields_per_table ) + { + $fields = $rOpts_maximum_fields_per_table; + } + my $max_skipped_count = $fields - 1; + + my $is_simple_last_term = 0; + my $is_simple_next_term = 0; + foreach my $j ( 0 .. $item_count ) { + $is_simple_last_term = $is_simple_next_term; + $is_simple_next_term = 0; + if ( $j < $item_count + && $ri_term_end->[$j] == $ri_term_begin->[$j] + && $ritem_lengths->[$j] <= $small_length ) + { + $is_simple_next_term = 1; + } + next if $j == 0; + if ( $is_simple_last_term + && $is_simple_next_term + && $skipped_count < $max_skipped_count ) + { + $skipped_count++; + } + else { + $skipped_count = 0; + my $i_tc = $ri_term_comma->[ $j - 1 ]; + last unless defined $i_tc; + $self->set_forced_breakpoint($i_tc); + } + } - if ( $i_opening >= 0 ) { - $self->set_forced_breakpoint($i_opening) - unless ( $do_not_break_apart - || is_unbreakable_container($current_depth) ); - } + # always break at the last comma if this list is + # interrupted; we wouldn't want to leave a terminal '{', for + # example. + if ($interrupted) { + $self->set_forced_breakpoint($i_true_last_comma); + } + return; + } ## end sub apply_broken_sublist_rule - # break at ',' of lower depth level before opening token - if ( $last_comma_index[$depth] ) { - $self->set_forced_breakpoint( - $last_comma_index[$depth] ); - } + sub set_emergency_comma_breakpoints { - # break at '.' of lower depth level before opening token - if ( $last_dot_index[$depth] ) { - $self->set_forced_breakpoint( - $last_dot_index[$depth] ); - } + my ( - # break before opening structure if preceded by another - # closing structure and a comma. This is normally - # done by the previous closing brace, but not - # if it was a one-line block. - if ( $i_opening > 2 ) { - my $i_prev = - ( $types_to_go[ $i_opening - 1 ] eq 'b' ) - ? $i_opening - 2 - : $i_opening - 1; + $self, # - if ( - $types_to_go[$i_prev] eq ',' - && ( $types_to_go[ $i_prev - 1 ] eq ')' - || $types_to_go[ $i_prev - 1 ] eq '}' ) - ) - { - $self->set_forced_breakpoint($i_prev); - } + $number_of_fields_best, + $rhash_IN, + $comma_count, + $i_first_comma, - # also break before something like ':(' or '?(' - # if appropriate. - elsif ( - $types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ ) - { - my $token_prev = $tokens_to_go[$i_prev]; - if ( $want_break_before{$token_prev} ) { - $self->set_forced_breakpoint($i_prev); - } - } ## end elsif ( $types_to_go[$i_prev...]) - } ## end if ( $i_opening > 2 ) - } ## end if ( $minimum_depth <=...) + ) = @_; - # break after comma following closing structure - if ( $next_type eq ',' ) { - $self->set_forced_breakpoint( $i + 1 ); - } + # The number of fields worked out to be negative, so we + # have to make an emergency fix. - # break before an '=' following closing structure - if ( - $is_assignment{$next_nonblank_type} - && ( $breakpoint_stack[$current_depth] != - $forced_breakpoint_count ) - ) - { - $self->set_forced_breakpoint($i); - } ## end if ( $is_assignment{$next_nonblank_type...}) - - # break at any comma before the opening structure Added - # for -lp, but seems to be good in general. It isn't - # obvious how far back to look; the '5' below seems to - # work well and will catch the comma in something like - # push @list, myfunc( $param, $param, .. - - my $icomma = $last_comma_index[$depth]; - if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { - unless ( $forced_breakpoint_to_go[$icomma] ) { - $self->set_forced_breakpoint($icomma); - } - } - } ## end logic to open up a container + my $rcomma_index = $rhash_IN->{rcomma_index}; + my $next_nonblank_type = $rhash_IN->{next_nonblank_type}; + my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart}; + my $must_break_open = $rhash_IN->{must_break_open}; - # Break open a logical container open if it was already open - elsif ($is_simple_logical_expression - && $has_old_logical_breakpoints[$current_depth] ) - { - $self->set_logical_breakpoints($current_depth); - } + # are we an item contained in an outer list? + my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; - # Handle long container which does not get opened up - elsif ($is_long_term) { + # In many cases, it may be best to not force a break if there is just + # one comma, because the standard continuation break logic will do a + # better job without it. - # must set fake breakpoint to alert outer containers that - # they are complex - set_fake_breakpoint(); - } ## end elsif ($is_long_term) + # In the common case that all but one of the terms can fit + # on a single line, it may look better not to break open the + # containing parens. Consider, for example - } ## end elsif ( $depth < $current_depth) + # $color = + # join ( '/', + # sort { $color_value{$::a} <=> $color_value{$::b}; } + # keys %colors ); - #------------------------------------------------------------ - # Handle this token - #------------------------------------------------------------ + # which will look like this with the container broken: - $current_depth = $depth; + # $color = join ( + # '/', + # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors + # ); - # most token types can skip the rest of this loop - next unless ( $quick_filter{$type} ); + # Here is an example of this rule for a long last term: - # handle comma-arrow - if ( $type eq '=>' ) { - next if ( $last_nonblank_type eq '=>' ); - next if $rOpts_break_at_old_comma_breakpoints; - next - if ( $rOpts_comma_arrow_breakpoints == 3 - && !$override_cab3[$depth] ); - $want_comma_break[$depth] = 1; - $index_before_arrow[$depth] = $i_last_nonblank_token; - next; - } ## end if ( $type eq '=>' ) + # log_message( 0, 256, 128, + # "Number of routes in adj-RIB-in to be considered: $peercount" ); - elsif ( $type eq '.' ) { - $last_dot_index[$depth] = $i; - } + # And here is an example with a long first term: - # Turn off alignment if we are sure that this is not a list - # environment. To be safe, we will do this if we see certain - # non-list tokens, such as ';', and also the environment is - # not a list. Note that '=' could be in any of the = operators - # (lextest.t). We can't just use the reported environment - # because it can be incorrect in some cases. - elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} ) - && !$self->is_in_list_by_i($i) ) - { - $dont_align[$depth] = 1; - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; - } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...)) + # $s = sprintf( + # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", + # $r, $pu, $ps, $cu, $cs, $tt + # ) + # if $style eq 'all'; - # now just handle any commas - next unless ( $type eq ',' ); + my $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; - $last_dot_index[$depth] = undef; - $last_comma_index[$depth] = $i; + my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0; + my $long_first_term = + $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= + 0; - # break here if this comma follows a '=>' - # but not if there is a side comment after the comma - if ( $want_comma_break[$depth] ) { + # break at every comma ... + if ( - if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { - if ($rOpts_comma_arrow_breakpoints) { - $want_comma_break[$depth] = 0; - next; - } - } + # if requested by user or is best looking + $number_of_fields_best == 1 - $self->set_forced_breakpoint($i) - unless ( $next_nonblank_type eq '#' ); + # or if this is a sublist of a larger list + || $in_hierarchical_list - # break before the previous token if it looks safe - # Example of something that we will not try to break before: - # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, - # Also we don't want to break at a binary operator (like +): - # $c->createOval( - # $x + $R, $y + - # $R => $x - $R, - # $y - $R, -fill => 'black', - # ); - my $ibreak = $index_before_arrow[$depth] - 1; - if ( $ibreak > 0 - && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) - { - if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } - if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } - if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { - - # don't break pointer calls, such as the following: - # File::Spec->curdir => 1, - # (This is tokenized as adjacent 'w' tokens) - ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) { - - # And don't break before a comma, as in the following: - # ( LONGER_THAN,=> 1, - # EIGHTY_CHARACTERS,=> 2, - # CAUSES_FORMATTING,=> 3, - # LIKE_THIS,=> 4, - # ); - # This example is for -tso but should be general rule - if ( $tokens_to_go[ $ibreak + 1 ] ne '->' - && $tokens_to_go[ $ibreak + 1 ] ne ',' ) - { - $self->set_forced_breakpoint($ibreak); - } - } ## end if ( $types_to_go[$ibreak...]) - } ## end if ( $ibreak > 0 && $tokens_to_go...) + # or if multiple commas and we don't have a long first or last + # term + || ( $comma_count > 1 + && !( $long_last_term || $long_first_term ) ) + ) + { + foreach ( 0 .. $comma_count - 1 ) { + $self->set_forced_breakpoint( $rcomma_index->[$_] ); + } + } + elsif ($long_last_term) { - $want_comma_break[$depth] = 0; - $index_before_arrow[$depth] = -1; + $self->set_forced_breakpoint($i_last_comma); + ${$rdo_not_break_apart} = 1 unless $must_break_open; + } + elsif ($long_first_term) { - # handle list which mixes '=>'s and ','s: - # treat any list items so far as an interrupted list - $interrupted_list[$depth] = 1; - next; - } ## end if ( $want_comma_break...) - - # Break after all commas above starting depth... - # But only if the last closing token was followed by a comma, - # to avoid breaking a list operator (issue c119) - if ( $depth < $starting_depth - && $comma_follows_last_closing_token - && !$dont_align[$depth] ) - { - $self->set_forced_breakpoint($i) - unless ( $next_nonblank_type eq '#' ); - next; - } + $self->set_forced_breakpoint($i_first_comma); + } + else { - # add this comma to the list.. - my $item_count = $item_count_stack[$depth]; - if ( $item_count == 0 ) { + # let breaks be defined by default bond strength logic + } + return; + } ## end sub set_emergency_comma_breakpoints + + sub break_multiline_list { + my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_; + + # Overriden variables + my $item_count = $rhash_A->{_item_count_A}; + my $identifier_count = $rhash_A->{_identifier_count_A}; + + # Derived variables: + my $ritem_lengths = $rhash_A->{_ritem_lengths}; + my $ri_term_begin = $rhash_A->{_ri_term_begin}; + my $ri_term_end = $rhash_A->{_ri_term_end}; + my $ri_term_comma = $rhash_A->{_ri_term_comma}; + my $rmax_length = $rhash_A->{_rmax_length}; + my $comma_count = $rhash_A->{_comma_count}; + my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma}; + my $first_term_length = $rhash_A->{_first_term_length}; + my $i_first_comma = $rhash_A->{_i_first_comma}; + my $i_last_comma = $rhash_A->{_i_last_comma}; + my $i_true_last_comma = $rhash_A->{_i_true_last_comma}; + + # Veriables received from caller + my $i_opening_paren = $rhash_IN->{i_opening_paren}; + my $i_closing_paren = $rhash_IN->{i_closing_paren}; + my $rcomma_index = $rhash_IN->{rcomma_index}; + my $next_nonblank_type = $rhash_IN->{next_nonblank_type}; + my $list_type = $rhash_IN->{list_type}; + my $interrupted = $rhash_IN->{interrupted}; + my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart}; + my $must_break_open = $rhash_IN->{must_break_open}; +## NOTE: these input vars from caller use the values from rhash_A (see above): +## my $item_count = $rhash_IN->{item_count}; +## my $identifier_count = $rhash_IN->{identifier_count}; + + # NOTE: i_opening_paren changes value below so we need to get these here + my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren); + my $opening_token = $tokens_to_go[$i_opening_paren]; - # but do not form a list with no opening structure - # for example: + #--------------------------------------------------------------- + # Section B1: Determine '$number_of_fields' = the best number of + # fields to use if this is to be formatted as a table. + #--------------------------------------------------------------- - # open INFILE_COPY, ">$input_file_copy" - # or die ("very long message"); - if ( ( $opening_structure_index_stack[$depth] < 0 ) - && $self->is_in_block_by_i($i) ) - { - $dont_align[$depth] = 1; - } - } ## end if ( $item_count == 0 ) + # Now we know that this block spans multiple lines; we have to set + # at least one breakpoint -- real or fake -- as a signal to break + # open any outer containers. + set_fake_breakpoint(); - $comma_index[$depth][$item_count] = $i; - ++$item_count_stack[$depth]; - if ( $last_nonblank_type =~ /^[iR\]]$/ ) { - $identifier_count_stack[$depth]++; - } - } ## end while ( ++$i <= $max_index_to_go) + # Set a flag indicating if we need to break open to keep -lp + # items aligned. This is necessary if any of the list terms + # exceeds the available space after the '('. + my $need_lp_break_open = $must_break_open; + my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] ); + if ( $is_lp_formatting && !$must_break_open ) { + my $columns_if_unbroken = + $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ] + - total_line_length( $i_opening_minus, $i_opening_paren ); + $need_lp_break_open = + ( $rmax_length->[0] > $columns_if_unbroken ) + || ( $rmax_length->[1] > $columns_if_unbroken ) + || ( $first_term_length > $columns_if_unbroken ); + } - #------------------------------------------- - # end of loop over all tokens in this batch - #------------------------------------------- + my $hash_B = + $self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting ); + return if ( !defined($hash_B) ); + + # Updated variables + $i_first_comma = $hash_B->{_i_first_comma_B}; + $i_opening_paren = $hash_B->{_i_opening_paren_B}; + $item_count = $hash_B->{_item_count_B}; + + # New variables + my $columns = $hash_B->{_columns}; + my $formatted_columns = $hash_B->{_formatted_columns}; + my $formatted_lines = $hash_B->{_formatted_lines}; + my $max_width = $hash_B->{_max_width}; + my $new_identifier_count = $hash_B->{_new_identifier_count}; + my $number_of_fields = $hash_B->{_number_of_fields}; + my $odd_or_even = $hash_B->{_odd_or_even}; + my $packed_columns = $hash_B->{_packed_columns}; + my $packed_lines = $hash_B->{_packed_lines}; + my $pair_width = $hash_B->{_pair_width}; + my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list}; + my $use_separate_first_term = $hash_B->{_use_separate_first_term}; - # set breaks for any unfinished lists .. - foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) { + # are we an item contained in an outer list? + my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; - $interrupted_list[$dd] = 1; - $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); - $self->set_comma_breakpoints( $dd, $rbond_strength_bias ); - $self->set_logical_breakpoints($dd) - if ( $has_old_logical_breakpoints[$dd] ); - $self->set_for_semicolon_breakpoints($dd); + my $unused_columns = $formatted_columns - $packed_columns; - # break open container... - my $i_opening = $opening_structure_index_stack[$dd]; - if ( defined($i_opening) && $i_opening >= 0 ) { - $self->set_forced_breakpoint($i_opening) - unless ( - is_unbreakable_container($dd) + # set some empirical parameters to help decide if we should try to + # align; high sparsity does not look good, especially with few lines + my $sparsity = ($unused_columns) / ($formatted_columns); + my $max_allowed_sparsity = + ( $item_count < 3 ) ? 0.1 + : ( $packed_lines == 1 ) ? 0.15 + : ( $packed_lines == 2 ) ? 0.4 + : 0.7; - # Avoid a break which would place an isolated ' or " - # on a line - || ( $type eq 'Q' - && $i_opening >= $max_index_to_go - 2 - && ( $token eq "'" || $token eq '"' ) ) - ); - } - } ## end for ( my $dd = $current_depth...) + my $two_line_word_wrap_ok; + if ( $opening_token eq '(' ) { - # Return a flag indicating if the input file had some good breakpoints. - # This flag will be used to force a break in a line shorter than the - # allowed line length. - if ( $has_old_logical_breakpoints[$current_depth] ) { - $saw_good_breakpoint = 1; - } + # default is to allow wrapping of short paren lists + $two_line_word_wrap_ok = 1; - # A complex line with one break at an = has a good breakpoint. - # This is not complex ($total_depth_variation=0): - # $res1 - # = 10; - # - # This is complex ($total_depth_variation=6): - # $res2 = - # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); + # but turn off word wrap where requested + if ($rOpts_break_open_compact_parens) { - # The check ($i_old_.. < $max_index_to_go) was added to fix b1333 - elsif ($i_old_assignment_break - && $total_depth_variation > 4 - && $old_breakpoint_count == 1 - && $i_old_assignment_break < $max_index_to_go ) - { - $saw_good_breakpoint = 1; - } ## end elsif ( $i_old_assignment_break...) + # This parameter is a one-character flag, as follows: + # '0' matches no parens -> break open NOT OK -> word wrap OK + # '1' matches all parens -> break open OK -> word wrap NOT OK + # Other values are the same as used by the weld-exclusion-list + my $flag = $rOpts_break_open_compact_parens; + if ( $flag eq '*' + || $flag eq '1' ) + { + $two_line_word_wrap_ok = 0; + } + elsif ( $flag eq '0' ) { + $two_line_word_wrap_ok = 1; + } + else { + my $seqno = $type_sequence_to_go[$i_opening_paren]; + $two_line_word_wrap_ok = + !$self->match_paren_control_flag( $seqno, $flag ); + } + } + } - return $saw_good_breakpoint; - } ## end sub break_lists -} ## end closure break_lists + #------------------------------------------------------------------- + # Section B2: Check for shortcut methods, which avoid treating + # a list as a table for relatively small parenthesized lists. These + # are usually easier to read if not formatted as tables. + #------------------------------------------------------------------- + if ( + $packed_lines <= 2 # probably can fit in 2 lines + && $item_count < 9 # doesn't have too many items + && $opening_is_in_block # not a sub-container + && $two_line_word_wrap_ok # ok to wrap this paren list + ) + { -my %is_kwiZ; -my %is_key_type; + # Section B2A: Shortcut method 1: for -lp and just one comma: + # This is a no-brainer, just break at the comma. + if ( + $is_lp_formatting # -lp + && $item_count == 2 # two items, one comma + && !$must_break_open + ) + { + my $i_break = $rcomma_index->[0]; + $self->set_forced_breakpoint($i_break); + ${$rdo_not_break_apart} = 1; + return; -BEGIN { + } - # Added 'w' to fix b1172 - my @q = qw(k w i Z ->); - @is_kwiZ{@q} = (1) x scalar(@q); + # Section B2B: Shortcut method 2 is for most small ragged lists + # which might look best if not displayed as a table. + if ( + ( $number_of_fields == 2 && $item_count == 3 ) + || ( + $new_identifier_count > 0 # isn't all quotes + && $sparsity > 0.15 + ) # would be fairly spaced gaps if aligned + ) + { - # added = for b1211 - @q = qw<( [ { L R } ] ) = b>; - push @q, ','; - @is_key_type{@q} = (1) x scalar(@q); -} + my $break_count = $self->set_ragged_breakpoints( $ri_term_comma, + $ri_ragged_break_list ); + ++$break_count if ($use_separate_first_term); -use constant DEBUG_FIND_START => 0; + # NOTE: we should really use the true break count here, + # which can be greater if there are large terms and + # little space, but usually this will work well enough. + unless ($must_break_open) { -sub find_token_starting_list { + if ( $break_count <= 1 ) { + ${$rdo_not_break_apart} = 1; + } + elsif ( $is_lp_formatting && !$need_lp_break_open ) { + ${$rdo_not_break_apart} = 1; + } + } + return; + } - # When testing to see if a block will fit on one line, some - # previous token(s) may also need to be on the line; particularly - # if this is a sub call. So we will look back at least one - # token. - my ( $self, $i_opening_paren ) = @_; + } ## end shortcut methods - # This will be the return index - my $i_opening_minus = $i_opening_paren; + # debug stuff + DEBUG_SPARSE && do { - goto RETURN if ( $i_opening_minus <= 0 ); + # How many spaces across the page will we fill? + my $columns_per_line = + ( int $number_of_fields / 2 ) * $pair_width + + ( $number_of_fields % 2 ) * $max_width; - my $im1 = $i_opening_paren - 1; - my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] ); - if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) { - $iprev_nb -= 1; - $type_prev_nb = $types_to_go[$iprev_nb]; - } + print STDOUT +"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; - if ( $type_prev_nb eq ',' ) { + }; - # a previous comma is a good break point - # $i_opening_minus = $i_opening_paren; - } + #------------------------------------------------------------------ + # Section B3: Compound List Rule 2: + # If this list is too long for one line, and it is an item of a + # larger list, then we must format it, regardless of sparsity + # (ian.t). One reason that we have to do this is to trigger + # Compound List Rule 1, above, which causes breaks at all commas of + # all outer lists. In this way, the structure will be properly + # displayed. + #------------------------------------------------------------------ - elsif ( - $tokens_to_go[$i_opening_paren] eq '(' + # Decide if this list is too long for one line unless broken + my $total_columns = table_columns_available($i_opening_paren); + my $too_long = $packed_columns > $total_columns; - # non-parens added here to fix case b1186 - || $is_kwiZ{$type_prev_nb} - ) - { - $i_opening_minus = $im1; + # For a paren list, include the length of the token just before the + # '(' because this is likely a sub call, and we would have to + # include the sub name on the same line as the list. This is still + # imprecise, but not too bad. (steve.t) + if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { - # Walk back to improve length estimate... - # FIX for cases b1169 b1170 b1171: start walking back - # at the previous nonblank. This makes the result insensitive - # to the flag --space-function-paren, and similar. - # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) { - foreach my $j ( reverse( 0 .. $iprev_nb ) ) { - if ( $is_key_type{ $types_to_go[$j] } ) { + $too_long = $self->excess_line_length( $i_opening_minus, + $i_effective_last_comma + 1 ) > 0; + } - # fix for b1211 - if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j } - last; + # TODO: For an item after a '=>', try to include the length of the + # thing before the '=>'. This is crude and should be improved by + # actually looking back token by token. + if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { + my $i_opening_minus_test = $i_opening_paren - 4; + if ( $i_opening_minus >= 0 ) { + $too_long = $self->excess_line_length( $i_opening_minus_test, + $i_effective_last_comma + 1 ) > 0; } - $i_opening_minus = $j; } - if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } - } - RETURN: + # Always break lists contained in '[' and '{' if too long for 1 line, + # and always break lists which are too long and part of a more complex + # structure. + my $must_break_open_container = $must_break_open + || ( $too_long + && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) ); - DEBUG_FIND_START && print < im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus] -EOM + #-------------------------------------------------------------------- + # Section B4: A table will work here. But do not attempt to align + # columns if this is a tiny table or it would be too spaced. It + # seems that the more packed lines we have, the sparser the list that + # can be allowed and still look ok. + #-------------------------------------------------------------------- - return $i_opening_minus; -} ## end sub find_token_starting_list + if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) + || ( $formatted_lines < 2 ) + || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) + ) + { + #---------------------------------------------------------------- + # Section B4A: too sparse: would not look good aligned in a table + #---------------------------------------------------------------- -{ ## begin closure set_comma_breakpoints_do + # use old breakpoints if this is a 'big' list + if ( $packed_lines > 2 && $item_count > 10 ) { + write_logfile_entry("List sparse: using old breakpoints\n"); + $self->copy_old_breakpoints( $i_first_comma, $i_last_comma ); + } - my %is_keyword_with_special_leading_term; + # let the continuation logic handle it if 2 lines + else { - BEGIN { + my $break_count = $self->set_ragged_breakpoints( $ri_term_comma, + $ri_ragged_break_list ); + ++$break_count if ($use_separate_first_term); - # These keywords have prototypes which allow a special leading item - # followed by a list - my @q = - qw(formline grep kill map printf sprintf push chmod join pack unshift); - @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q); - } + unless ($must_break_open_container) { + if ( $break_count <= 1 ) { + ${$rdo_not_break_apart} = 1; + } + elsif ( $is_lp_formatting && !$need_lp_break_open ) { + ${$rdo_not_break_apart} = 1; + } + } + } + return; + } - use constant DEBUG_SPARSE => 0; + #-------------------------------------------- + # Section B4B: Go ahead and format as a table + #-------------------------------------------- + $self->write_formatted_table( $number_of_fields, $comma_count, + $rcomma_index, $use_separate_first_term ); + + return; + } ## end sub break_multiline_list - sub set_comma_breakpoints_do { + sub table_layout_A { - # Given a list with some commas, set breakpoints at some of the - # commas, if necessary, to make it easy to read. + my ($rhash_IN) = @_; - my ( $self, $rinput_hash ) = @_; + # Find lengths of all list items needed to calculate page layout - my $depth = $rinput_hash->{depth}; - my $i_opening_paren = $rinput_hash->{i_opening_paren}; - my $i_closing_paren = $rinput_hash->{i_closing_paren}; - my $item_count = $rinput_hash->{item_count}; - my $identifier_count = $rinput_hash->{identifier_count}; - my $rcomma_index = $rinput_hash->{rcomma_index}; - my $next_nonblank_type = $rinput_hash->{next_nonblank_type}; - my $list_type = $rinput_hash->{list_type}; - my $interrupted = $rinput_hash->{interrupted}; - my $rdo_not_break_apart = $rinput_hash->{rdo_not_break_apart}; - my $must_break_open = $rinput_hash->{must_break_open}; - my $has_broken_sublist = $rinput_hash->{has_broken_sublist}; + # Returns: + # - nothing if this list is empty, or + # - a ref to a hash containg some derived parameters + + my $i_opening_paren = $rhash_IN->{i_opening_paren}; + my $i_closing_paren = $rhash_IN->{i_closing_paren}; + my $identifier_count = $rhash_IN->{identifier_count}; + my $rcomma_index = $rhash_IN->{rcomma_index}; + my $item_count = $rhash_IN->{item_count}; # nothing to do if no commas seen return if ( $item_count < 1 ); @@ -19901,21 +23478,21 @@ EOM my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ]; my $i_last_comma = $i_true_last_comma; if ( $i_last_comma >= $max_index_to_go ) { - $i_last_comma = $rcomma_index->[ --$item_count - 1 ]; + $item_count -= 1; return if ( $item_count < 1 ); + $i_last_comma = $rcomma_index->[ $item_count - 1 ]; } - my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] ); - #--------------------------------------------------------------- - # find lengths of all items in the list to calculate page layout - #--------------------------------------------------------------- my $comma_count = $item_count; - my @item_lengths; - my @i_term_begin; - my @i_term_end; - my @i_term_comma; + + my $ritem_lengths = []; + my $ri_term_begin = []; + my $ri_term_end = []; + my $ri_term_comma = []; + + my $rmax_length = [ 0, 0 ]; + my $i_prev_plus; - my @max_length = ( 0, 0 ); my $first_term_length; my $i = $i_opening_paren; my $is_odd = 1; @@ -19926,27 +23503,29 @@ EOM $i = $rcomma_index->[$j]; my $i_term_end = - ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; + ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) + ? $i - 2 + : $i - 1; my $i_term_begin = ( $types_to_go[$i_prev_plus] eq 'b' ) ? $i_prev_plus + 1 : $i_prev_plus; - push @i_term_begin, $i_term_begin; - push @i_term_end, $i_term_end; - push @i_term_comma, $i; + push @{$ri_term_begin}, $i_term_begin; + push @{$ri_term_end}, $i_term_end; + push @{$ri_term_comma}, $i; # note: currently adding 2 to all lengths (for comma and space) my $length = 2 + token_sequence_length( $i_term_begin, $i_term_end ); - push @item_lengths, $length; + push @{$ritem_lengths}, $length; if ( $j == 0 ) { $first_term_length = $length; } else { - if ( $length > $max_length[$is_odd] ) { - $max_length[$is_odd] = $length; + if ( $length > $rmax_length->[$is_odd] ) { + $rmax_length->[$is_odd] = $length; } } } @@ -19970,15 +23549,15 @@ EOM # add 2 to length because other lengths include a comma and a blank $last_item_length += 2; - push @item_lengths, $last_item_length; - push @i_term_begin, $i_b + 1; - push @i_term_end, $i_e; - push @i_term_comma, undef; + push @{$ritem_lengths}, $last_item_length; + push @{$ri_term_begin}, $i_b + 1; + push @{$ri_term_end}, $i_e; + push @{$ri_term_comma}, undef; my $i_odd = $item_count % 2; - if ( $last_item_length > $max_length[$i_odd] ) { - $max_length[$i_odd] = $last_item_length; + if ( $last_item_length > $rmax_length->[$i_odd] ) { + $rmax_length->[$i_odd] = $last_item_length; } $item_count++; @@ -19989,155 +23568,78 @@ EOM } } - #--------------------------------------------------------------- - # End of length calculations - #--------------------------------------------------------------- - - #--------------------------------------------------------------- - # Compound List Rule 1: - # Break at (almost) every comma for a list containing a broken - # sublist. This has higher priority than the Interrupted List - # Rule. - #--------------------------------------------------------------- - if ($has_broken_sublist) { - - # Break at every comma except for a comma between two - # simple, small terms. This prevents long vertical - # columns of, say, just 0's. - my $small_length = 10; # 2 + actual maximum length wanted - - # We'll insert a break in long runs of small terms to - # allow alignment in uniform tables. - my $skipped_count = 0; - my $columns = table_columns_available($i_first_comma); - my $fields = int( $columns / $small_length ); - if ( $rOpts_maximum_fields_per_table - && $fields > $rOpts_maximum_fields_per_table ) - { - $fields = $rOpts_maximum_fields_per_table; - } - my $max_skipped_count = $fields - 1; - - my $is_simple_last_term = 0; - my $is_simple_next_term = 0; - foreach my $j ( 0 .. $item_count ) { - $is_simple_last_term = $is_simple_next_term; - $is_simple_next_term = 0; - if ( $j < $item_count - && $i_term_end[$j] == $i_term_begin[$j] - && $item_lengths[$j] <= $small_length ) - { - $is_simple_next_term = 1; - } - next if $j == 0; - if ( $is_simple_last_term - && $is_simple_next_term - && $skipped_count < $max_skipped_count ) - { - $skipped_count++; - } - else { - $skipped_count = 0; - my $i_tc = $i_term_comma[ $j - 1 ]; - last unless defined $i_tc; - $self->set_forced_breakpoint($i_tc); - } - } - - # always break at the last comma if this list is - # interrupted; we wouldn't want to leave a terminal '{', for - # example. - if ($interrupted) { - $self->set_forced_breakpoint($i_true_last_comma); - } - return; - } - -#my ( $a, $b, $c ) = caller(); -#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count -#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n"; -#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n"; - - #--------------------------------------------------------------- - # Interrupted List Rule: - # A list is forced to use old breakpoints if it was interrupted - # by side comments or blank lines, or requested by user. - #--------------------------------------------------------------- - if ( $rOpts_break_at_old_comma_breakpoints - || $interrupted - || $i_opening_paren < 0 ) - { - $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); - return; + # be sure we do not extend beyond the current list length + if ( $i_effective_last_comma >= $max_index_to_go ) { + $i_effective_last_comma = $max_index_to_go - 1; } - #--------------------------------------------------------------- - # Looks like a list of items. We have to look at it and size it up. - #--------------------------------------------------------------- - - my $opening_token = $tokens_to_go[$i_opening_paren]; - my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren); - - #------------------------------------------------------------------- - # Return if this will fit on one line - #------------------------------------------------------------------- - - # The -bbxi=2 parameters can add an extra hidden level of indentation; - # this needs a tolerance to avoid instability. Fixes b1259, 1260. - my $tol = 0; - if ( $break_before_container_types{$opening_token} - && $container_indentation_options{$opening_token} - && $container_indentation_options{$opening_token} == 2 ) - { - $tol = $rOpts_indent_columns; + # Return the hash of derived variables. + return { + + # Updated variables + _item_count_A => $item_count, + _identifier_count_A => $identifier_count, + + # New variables + _ritem_lengths => $ritem_lengths, + _ri_term_begin => $ri_term_begin, + _ri_term_end => $ri_term_end, + _ri_term_comma => $ri_term_comma, + _rmax_length => $rmax_length, + _comma_count => $comma_count, + _i_effective_last_comma => $i_effective_last_comma, + _first_term_length => $first_term_length, + _i_first_comma => $i_first_comma, + _i_last_comma => $i_last_comma, + _i_true_last_comma => $i_true_last_comma, + }; - # use greater of -ci and -i (fix for case b1334) - if ( $tol < $rOpts_continuation_indentation ) { - $tol = $rOpts_continuation_indentation; - } - } + } ## end sub table_layout_A - my $i_opening_minus = $self->find_token_starting_list($i_opening_paren); - return - unless $self->excess_line_length( $i_opening_minus, $i_closing_paren ) - + $tol > 0; + sub table_layout_B { - #------------------------------------------------------------------- - # Now we know that this block spans multiple lines; we have to set - # at least one breakpoint -- real or fake -- as a signal to break - # open any outer containers. - #------------------------------------------------------------------- - set_fake_breakpoint(); + my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_; - # be sure we do not extend beyond the current list length - if ( $i_effective_last_comma >= $max_index_to_go ) { - $i_effective_last_comma = $max_index_to_go - 1; - } + # Determine variables for the best table layout, including + # the best number of fields. - # Set a flag indicating if we need to break open to keep -lp - # items aligned. This is necessary if any of the list terms - # exceeds the available space after the '('. - my $need_lp_break_open = $must_break_open; - if ( $is_lp_formatting && !$must_break_open ) { - my $columns_if_unbroken = - $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ] - - total_line_length( $i_opening_minus, $i_opening_paren ); - $need_lp_break_open = - ( $max_length[0] > $columns_if_unbroken ) - || ( $max_length[1] > $columns_if_unbroken ) - || ( $first_term_length > $columns_if_unbroken ); - } + # Returns: + # - nothing if nothing more to do + # - a ref to a hash containg some derived parameters + + # Variables from caller + my $i_opening_paren = $rhash_IN->{i_opening_paren}; + my $list_type = $rhash_IN->{list_type}; + my $next_nonblank_type = $rhash_IN->{next_nonblank_type}; + my $rcomma_index = $rhash_IN->{rcomma_index}; + my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart}; + + # Table size variables + my $comma_count = $rhash_A->{_comma_count}; + my $first_term_length = $rhash_A->{_first_term_length}; + my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma}; + my $i_first_comma = $rhash_A->{_i_first_comma}; + my $identifier_count = $rhash_A->{_identifier_count_A}; + my $item_count = $rhash_A->{_item_count_A}; + my $ri_term_begin = $rhash_A->{_ri_term_begin}; + my $ri_term_comma = $rhash_A->{_ri_term_comma}; + my $ri_term_end = $rhash_A->{_ri_term_end}; + my $ritem_lengths = $rhash_A->{_ritem_lengths}; + my $rmax_length = $rhash_A->{_rmax_length}; # Specify if the list must have an even number of fields or not. # It is generally safest to assume an even number, because the # list items might be a hash list. But if we can be sure that # it is not a hash, then we can allow an odd number for more # flexibility. - my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count - - if ( $identifier_count >= $item_count - 1 + # 1 = odd field count ok, 2 = want even count + my $odd_or_even = 2; + if ( + $identifier_count >= $item_count - 1 || $is_assignment{$next_nonblank_type} - || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) + || ( $list_type + && $list_type ne '=>' + && $list_type !~ /^[\:\?]$/ ) ) { $odd_or_even = 1; @@ -20146,12 +23648,12 @@ EOM # do we have a long first term which should be # left on a line by itself? my $use_separate_first_term = ( - $odd_or_even == 1 # only if we can use 1 field/line - && $item_count > 3 # need several items + $odd_or_even == 1 # only if we can use 1 field/line + && $item_count > 3 # need several items && $first_term_length > - 2 * $max_length[0] - 2 # need long first term + 2 * $rmax_length->[0] - 2 # need long first term && $first_term_length > - 2 * $max_length[1] - 2 # need long first term + 2 * $rmax_length->[1] - 2 # need long first term ); # or do we know from the type of list that the first term should @@ -20181,29 +23683,37 @@ EOM if ($use_separate_first_term) { # ..set a break and update starting values - $use_separate_first_term = 1; $self->set_forced_breakpoint($i_first_comma); + $item_count--; + + #--------------------------------------------------------------- + # Section B1A: Stop if one item remains ($i_first_comma = undef) + #--------------------------------------------------------------- + # Fix for b1442: use '$item_count' here instead of '$comma_count' + # to make the result independent of any trailing comma. + return if ( $item_count <= 1 ); + $i_opening_paren = $i_first_comma; $i_first_comma = $rcomma_index->[1]; - $item_count--; - return if $comma_count == 1; - shift @item_lengths; - shift @i_term_begin; - shift @i_term_end; - shift @i_term_comma; + shift @{$ritem_lengths}; + shift @{$ri_term_begin}; + shift @{$ri_term_end}; + shift @{$ri_term_comma}; } # if not, update the metrics to include the first term else { - if ( $first_term_length > $max_length[0] ) { - $max_length[0] = $first_term_length; + if ( $first_term_length > $rmax_length->[0] ) { + $rmax_length->[0] = $first_term_length; } } # Field width parameters - my $pair_width = ( $max_length[0] + $max_length[1] ); + my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] ); my $max_width = - ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1]; + ( $rmax_length->[0] > $rmax_length->[1] ) + ? $rmax_length->[0] + : $rmax_length->[1]; # Number of free columns across the page width for laying out tables my $columns = table_columns_available($i_first_comma); @@ -20215,10 +23725,9 @@ EOM # paren, but in some cases we might not. if ( $rOpts_variable_maximum_line_length && $tokens_to_go[$i_opening_paren] eq '(' - && @i_term_begin ) - ##&& !$old_breakpoint_to_go[$i_opening_paren] ) ## in b1210 patch + && @{$ri_term_begin} ) { - my $ib = $i_term_begin[0]; + my $ib = $ri_term_begin->[0]; my $type = $types_to_go[$ib]; # So far, the only known instance of this problem is when @@ -20239,19 +23748,19 @@ EOM } } - # Estimated maximum number of fields which fit this space - # This will be our first guess + # Estimated maximum number of fields which fit this space. + # This will be our first guess: my $number_of_fields_max = maximum_number_of_fields( $columns, $odd_or_even, $max_width, $pair_width ); my $number_of_fields = $number_of_fields_max; - # Find the best-looking number of fields - # and make this our second guess if possible + # Find the best-looking number of fields. + # This will be our second guess, if possible. my ( $number_of_fields_best, $ri_ragged_break_list, $new_identifier_count ) - = $self->study_list_complexity( \@i_term_begin, \@i_term_end, - \@item_lengths, $max_width ); + = $self->study_list_complexity( $ri_term_begin, $ri_term_end, + $ritem_lengths, $max_width ); if ( $number_of_fields_best != 0 && $number_of_fields_best < $number_of_fields_max ) @@ -20259,10 +23768,15 @@ EOM $number_of_fields = $number_of_fields_best; } - # ---------------------------------------------------------------------- - # If we are crowded and the -lp option is being used, try to - # undo some indentation - # ---------------------------------------------------------------------- + # fix b1427 + elsif ($number_of_fields_best > 1 + && $number_of_fields_best > $number_of_fields_max ) + { + $number_of_fields_best = $number_of_fields_max; + } + + # If we are crowded and the -lp option is being used, try + # to undo some indentation if ( $is_lp_formatting && ( @@ -20272,46 +23786,19 @@ EOM ) ) { - my $available_spaces = - $self->get_available_spaces_to_go($i_first_comma); - if ( $available_spaces > 0 ) { - - my $spaces_wanted = $max_width - $columns; # for 1 field - - if ( $number_of_fields_best == 0 ) { - $number_of_fields_best = - get_maximum_fields_wanted( \@item_lengths ); - } - - if ( $number_of_fields_best != 1 ) { - my $spaces_wanted_2 = - 1 + $pair_width - $columns; # for 2 fields - if ( $available_spaces > $spaces_wanted_2 ) { - $spaces_wanted = $spaces_wanted_2; - } - } - - if ( $spaces_wanted > 0 ) { - my $deleted_spaces = - $self->reduce_lp_indentation( $i_first_comma, - $spaces_wanted ); - - # redo the math - if ( $deleted_spaces > 0 ) { - $columns = table_columns_available($i_first_comma); - $number_of_fields_max = - maximum_number_of_fields( $columns, $odd_or_even, - $max_width, $pair_width ); - $number_of_fields = $number_of_fields_max; + ( $number_of_fields, $number_of_fields_best, $columns ) = + $self->lp_table_fix( + + $columns, + $i_first_comma, + $max_width, + $number_of_fields, + $number_of_fields_best, + $odd_or_even, + $pair_width, + $ritem_lengths, - if ( $number_of_fields_best == 1 - && $number_of_fields >= 1 ) - { - $number_of_fields = $number_of_fields_best; - } - } - } - } + ); } # try for one column if two won't work @@ -20334,97 +23821,30 @@ EOM if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero my $packed_lines = 1 + int( $packed_columns / $columns ); - # are we an item contained in an outer list? - my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; - + #----------------------------------------------------------------- + # Section B1B: Stop here if we did not compute a positive number of + # fields. In this case we just have to bail out. + #----------------------------------------------------------------- if ( $number_of_fields <= 0 ) { -# #--------------------------------------------------------------- -# # We're in trouble. We can't find a single field width that works. -# # There is no simple answer here; we may have a single long list -# # item, or many. -# #--------------------------------------------------------------- -# -# In many cases, it may be best to not force a break if there is just one -# comma, because the standard continuation break logic will do a better -# job without it. -# -# In the common case that all but one of the terms can fit -# on a single line, it may look better not to break open the -# containing parens. Consider, for example -# -# $color = -# join ( '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } -# keys %colors ); -# -# which will look like this with the container broken: -# -# $color = join ( -# '/', -# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors -# ); -# -# Here is an example of this rule for a long last term: -# -# log_message( 0, 256, 128, -# "Number of routes in adj-RIB-in to be considered: $peercount" ); -# -# And here is an example with a long first term: -# -# $s = sprintf( -# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", -# $r, $pu, $ps, $cu, $cs, $tt -# ) -# if $style eq 'all'; - - $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; - - my $long_last_term = - $self->excess_line_length( 0, $i_last_comma ) <= 0; - my $long_first_term = - $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) - <= 0; - - # break at every comma ... - if ( - - # if requested by user or is best looking - $number_of_fields_best == 1 - - # or if this is a sublist of a larger list - || $in_hierarchical_list - - # or if multiple commas and we don't have a long first or last - # term - || ( $comma_count > 1 - && !( $long_last_term || $long_first_term ) ) - ) - { - foreach ( 0 .. $comma_count - 1 ) { - $self->set_forced_breakpoint( $rcomma_index->[$_] ); - } - } - elsif ($long_last_term) { - - $self->set_forced_breakpoint($i_last_comma); - ${$rdo_not_break_apart} = 1 unless $must_break_open; - } - elsif ($long_first_term) { + $self->set_emergency_comma_breakpoints( - $self->set_forced_breakpoint($i_first_comma); - } - else { + $number_of_fields_best, + $rhash_IN, + $comma_count, + $i_first_comma, - # let breaks be defined by default bond strength logic - } + ); return; } - # -------------------------------------------------------- - # We have a tentative field count that seems to work. + #------------------------------------------------------------------ + # Section B1B: We have a tentative field count that seems to work. + # Now we must look more closely to determine if a table layout will + # actually look okay. + #------------------------------------------------------------------ + # How many lines will this require? - # -------------------------------------------------------- my $formatted_lines = $item_count / ($number_of_fields); if ( $formatted_lines != int $formatted_lines ) { $formatted_lines = 1 + int $formatted_lines; @@ -20433,14 +23853,8 @@ EOM # So far we've been trying to fill out to the right margin. But # compact tables are easier to read, so let's see if we can use fewer # fields without increasing the number of lines. - $number_of_fields = - compactify_table( $item_count, $number_of_fields, $formatted_lines, - $odd_or_even ); - - # How many spaces across the page will we fill? - my $columns_per_line = - ( int $number_of_fields / 2 ) * $pair_width + - ( $number_of_fields % 2 ) * $max_width; + $number_of_fields = compactify_table( $item_count, $number_of_fields, + $formatted_lines, $odd_or_even ); my $formatted_columns; @@ -20456,208 +23870,104 @@ EOM $formatted_columns = $packed_columns; } - my $unused_columns = $formatted_columns - $packed_columns; - - # set some empirical parameters to help decide if we should try to - # align; high sparsity does not look good, especially with few lines - my $sparsity = ($unused_columns) / ($formatted_columns); - my $max_allowed_sparsity = - ( $item_count < 3 ) ? 0.1 - : ( $packed_lines == 1 ) ? 0.15 - : ( $packed_lines == 2 ) ? 0.4 - : 0.7; - - my $two_line_word_wrap_ok; - if ( $opening_token eq '(' ) { - - # default is to allow wrapping of short paren lists - $two_line_word_wrap_ok = 1; - - # but turn off word wrap where requested - if ($rOpts_break_open_compact_parens) { - - # This parameter is a one-character flag, as follows: - # '0' matches no parens -> break open NOT OK -> word wrap OK - # '1' matches all parens -> break open OK -> word wrap NOT OK - # Other values are the same as used by the weld-exclusion-list - my $flag = $rOpts_break_open_compact_parens; - if ( $flag eq '*' - || $flag eq '1' ) - { - $two_line_word_wrap_ok = 0; - } - elsif ( $flag eq '0' ) { - $two_line_word_wrap_ok = 1; - } - else { - my $KK = $K_to_go[$i_opening_paren]; - $two_line_word_wrap_ok = - !$self->match_paren_flag( $KK, $flag ); - } - } - } - - # Begin check for shortcut methods, which avoid treating a list - # as a table for relatively small parenthesized lists. These - # are usually easier to read if not formatted as tables. - if ( - $packed_lines <= 2 # probably can fit in 2 lines - && $item_count < 9 # doesn't have too many items - && $opening_is_in_block # not a sub-container - && $two_line_word_wrap_ok # ok to wrap this paren list - ##&& $opening_token eq '(' # is paren list - ) - { - - # Shortcut method 1: for -lp and just one comma: - # This is a no-brainer, just break at the comma. - if ( - $is_lp_formatting # -lp - && $item_count == 2 # two items, one comma - && !$must_break_open - ) - { - my $i_break = $rcomma_index->[0]; - $self->set_forced_breakpoint($i_break); - ${$rdo_not_break_apart} = 1; - return; - - } - - # method 2 is for most small ragged lists which might look - # best if not displayed as a table. - if ( - ( $number_of_fields == 2 && $item_count == 3 ) - || ( - $new_identifier_count > 0 # isn't all quotes - && $sparsity > 0.15 - ) # would be fairly spaced gaps if aligned - ) - { - - my $break_count = $self->set_ragged_breakpoints( \@i_term_comma, - $ri_ragged_break_list ); - ++$break_count if ($use_separate_first_term); - - # NOTE: we should really use the true break count here, - # which can be greater if there are large terms and - # little space, but usually this will work well enough. - unless ($must_break_open) { - - if ( $break_count <= 1 ) { - ${$rdo_not_break_apart} = 1; - } - elsif ( $is_lp_formatting && !$need_lp_break_open ) { - ${$rdo_not_break_apart} = 1; - } - } - return; - } - - } ## end shortcut methods - - # debug stuff - DEBUG_SPARSE && do { - print STDOUT -"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; - + # Construce hash_B: + return { + + # Updated variables + _i_first_comma_B => $i_first_comma, + _i_opening_paren_B => $i_opening_paren, + _item_count_B => $item_count, + + # New variables + _columns => $columns, + _formatted_columns => $formatted_columns, + _formatted_lines => $formatted_lines, + _max_width => $max_width, + _new_identifier_count => $new_identifier_count, + _number_of_fields => $number_of_fields, + _odd_or_even => $odd_or_even, + _packed_columns => $packed_columns, + _packed_lines => $packed_lines, + _pair_width => $pair_width, + _ri_ragged_break_list => $ri_ragged_break_list, + _use_separate_first_term => $use_separate_first_term, }; + } ## end sub table_layout_B - #--------------------------------------------------------------- - # Compound List Rule 2: - # If this list is too long for one line, and it is an item of a - # larger list, then we must format it, regardless of sparsity - # (ian.t). One reason that we have to do this is to trigger - # Compound List Rule 1, above, which causes breaks at all commas of - # all outer lists. In this way, the structure will be properly - # displayed. - #--------------------------------------------------------------- - - # Decide if this list is too long for one line unless broken - my $total_columns = table_columns_available($i_opening_paren); - my $too_long = $packed_columns > $total_columns; + sub lp_table_fix { - # For a paren list, include the length of the token just before the - # '(' because this is likely a sub call, and we would have to - # include the sub name on the same line as the list. This is still - # imprecise, but not too bad. (steve.t) - if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { + # try to undo some -lp indentation to improve table formatting - $too_long = $self->excess_line_length( $i_opening_minus, - $i_effective_last_comma + 1 ) > 0; - } + my ( - # FIXME: For an item after a '=>', try to include the length of the - # thing before the '=>'. This is crude and should be improved by - # actually looking back token by token. - if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { - my $i_opening_minus_test = $i_opening_paren - 4; - if ( $i_opening_minus >= 0 ) { - $too_long = $self->excess_line_length( $i_opening_minus_test, - $i_effective_last_comma + 1 ) > 0; - } - } + $self, # - # Always break lists contained in '[' and '{' if too long for 1 line, - # and always break lists which are too long and part of a more complex - # structure. - my $must_break_open_container = $must_break_open - || ( $too_long - && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) ); + $columns, + $i_first_comma, + $max_width, + $number_of_fields, + $number_of_fields_best, + $odd_or_even, + $pair_width, + $ritem_lengths, -#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n"; + ) = @_; - #--------------------------------------------------------------- - # The main decision: - # Now decide if we will align the data into aligned columns. Do not - # attempt to align columns if this is a tiny table or it would be - # too spaced. It seems that the more packed lines we have, the - # sparser the list that can be allowed and still look ok. - #--------------------------------------------------------------- + my $available_spaces = + $self->get_available_spaces_to_go($i_first_comma); + if ( $available_spaces > 0 ) { - if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) - || ( $formatted_lines < 2 ) - || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) - ) - { + my $spaces_wanted = $max_width - $columns; # for 1 field - #--------------------------------------------------------------- - # too sparse: would look ugly if aligned in a table; - #--------------------------------------------------------------- + if ( $number_of_fields_best == 0 ) { + $number_of_fields_best = + get_maximum_fields_wanted($ritem_lengths); + } - # use old breakpoints if this is a 'big' list - if ( $packed_lines > 2 && $item_count > 10 ) { - write_logfile_entry("List sparse: using old breakpoints\n"); - $self->copy_old_breakpoints( $i_first_comma, $i_last_comma ); + if ( $number_of_fields_best != 1 ) { + my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields + if ( $available_spaces > $spaces_wanted_2 ) { + $spaces_wanted = $spaces_wanted_2; + } } - # let the continuation logic handle it if 2 lines - else { + if ( $spaces_wanted > 0 ) { + my $deleted_spaces = + $self->reduce_lp_indentation( $i_first_comma, + $spaces_wanted ); - my $break_count = $self->set_ragged_breakpoints( \@i_term_comma, - $ri_ragged_break_list ); - ++$break_count if ($use_separate_first_term); + # redo the math + if ( $deleted_spaces > 0 ) { + $columns = table_columns_available($i_first_comma); + $number_of_fields = + maximum_number_of_fields( $columns, $odd_or_even, + $max_width, $pair_width ); - unless ($must_break_open_container) { - if ( $break_count <= 1 ) { - ${$rdo_not_break_apart} = 1; - } - elsif ( $is_lp_formatting && !$need_lp_break_open ) { - ${$rdo_not_break_apart} = 1; + if ( $number_of_fields_best == 1 + && $number_of_fields >= 1 ) + { + $number_of_fields = $number_of_fields_best; } } } - return; } + return ( $number_of_fields, $number_of_fields_best, $columns ); + } ## end sub lp_table_fix + + sub write_formatted_table { + + # Write a table of comma separated items with fixed number of fields + my ( $self, $number_of_fields, $comma_count, $rcomma_index, + $use_separate_first_term ) + = @_; - #--------------------------------------------------------------- - # go ahead and format as a table - #--------------------------------------------------------------- write_logfile_entry( "List: auto formatting with $number_of_fields fields/row\n"); my $j_first_break = - $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; + $use_separate_first_term + ? $number_of_fields + : $number_of_fields - 1; my $j = $j_first_break; while ( $j < $comma_count ) { @@ -20666,8 +23976,9 @@ EOM $j += $number_of_fields; } return; - } ## end sub set_comma_breakpoints_do -} ## end closure set_comma_breakpoints_do + } ## end sub write_formatted_table + +} ## end closure set_comma_breakpoint_final sub study_list_complexity { @@ -20750,7 +24061,7 @@ sub study_list_complexity { && $i_last_last_break != $i - 2 ) { - ## FIXME: don't strand a small term + ## TODO: don't strand a small term pop @i_ragged_break_list; push @i_ragged_break_list, $i - 2; push @i_ragged_break_list, $i - 1; @@ -20915,11 +24226,28 @@ sub copy_old_breakpoints { my ( $self, $i_first_comma, $i_last_comma ) = @_; for my $i ( $i_first_comma .. $i_last_comma ) { if ( $old_breakpoint_to_go[$i] ) { - $self->set_forced_breakpoint($i); + + # If the comma style is under certain controls, and if this is a + # comma breakpoint with the comma is at the beginning of the next + # line, then we must pass that index instead. This will allow sub + # set_forced_breakpoints to check and follow the user settings. This + # produces a uniform style and can prevent instability (b1422). + # + # The flag '$controlled_comma_style' will be set if the user + # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not + # needed or set for the -boc flag. + my $ibreak = $i; + if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) { + my $index = $inext_to_go[$ibreak]; + if ( $index > $ibreak && $types_to_go[$index] eq ',' ) { + $ibreak = $index; + } + } + $self->set_forced_breakpoint($ibreak); } } return; -} +} ## end sub copy_old_breakpoints sub set_nobreaks { my ( $self, $i, $j ) = @_; @@ -20936,11 +24264,12 @@ sub set_nobreaks { # shouldn't happen; non-critical error else { - 0 && do { + if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); - print STDOUT - "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"; - }; + Fault(<get_spaces() : $indentation; -} +} ## end sub get_spaces sub get_recoverable_spaces { @@ -21027,7 +24356,7 @@ sub get_recoverable_spaces { # to get them to line up with their opening parens my $indentation = shift; return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; -} +} ## end sub get_recoverable_spaces sub get_available_spaces_to_go { @@ -21052,9 +24381,6 @@ sub get_available_spaces_to_go { # an -lp indentation level. This survives between batches. my $lp_position_predictor; - # A level at which the lp format becomes too highly stressed to continue - my $lp_cutoff_level; - BEGIN { # Index names for the -lp stack variables. @@ -21068,7 +24394,7 @@ sub get_available_spaces_to_go { _lp_container_seqno_ => $i++, _lp_space_count_ => $i++, }; - } + } ## end BEGIN sub initialize_lp_vars { @@ -21077,10 +24403,9 @@ sub get_available_spaces_to_go { $lp_position_predictor = 0; $max_lp_stack = 0; - $lp_cutoff_level = min( $stress_level_alpha, $stress_level_beta + 2 ); # we can turn off -lp if all levels will be at or above the cutoff - if ( $lp_cutoff_level <= 1 ) { + if ( $high_stress_level <= 1 ) { $rOpts_line_up_parentheses = 0; $rOpts_extended_line_up_parentheses = 0; } @@ -21111,44 +24436,58 @@ sub get_available_spaces_to_go { @hash_test2{@q} = (1) x scalar(@q); @q = qw( . || && ); @hash_test3{@q} = (1) x scalar(@q); - } + } ## end BEGIN + + # shared variables, re-initialized for each batch + my $rlp_object_list; + my $max_lp_object_list; + my %lp_comma_count; + my %lp_arrow_count; + my $space_count; + my $current_level; + my $current_ci_level; + my $ii_begin_line; + my $in_lp_mode; + my $stack_changed; + my $K_last_nonblank; + my $last_nonblank_token; + my $last_nonblank_type; + my $last_last_nonblank_type; sub set_lp_indentation { + my ($self) = @_; + #------------------------------------------------------------------ # Define the leading whitespace for all tokens in the current batch # when the -lp formatting is selected. #------------------------------------------------------------------ - my ($self) = @_; - return unless ($rOpts_line_up_parentheses); return unless ( defined($max_index_to_go) && $max_index_to_go >= 0 ); # List of -lp indentation objects created in this batch - my $rlp_object_list = []; - my $max_lp_object_list = UNDEFINED_INDEX; - - my %last_lp_equals; - my %lp_comma_count; - my %lp_arrow_count; - my $ii_begin_line = 0; - - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rbreak_container = $self->[_rbreak_container_]; - my $rshort_nested = $self->[_rshort_nested_]; - my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_]; - my $K_closing_container = $self->[_K_closing_container_]; - my $rlp_object_by_seqno = $self->[_rlp_object_by_seqno_]; - my $radjusted_levels = $self->[_radjusted_levels_]; - my $rbreak_before_container_by_seqno = - $self->[_rbreak_before_container_by_seqno_]; - my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; + $rlp_object_list = []; + $max_lp_object_list = -1; + + %lp_comma_count = (); + %lp_arrow_count = (); + $space_count = undef; + $current_level = undef; + $current_ci_level = undef; + $ii_begin_line = 0; + $in_lp_mode = 0; + $stack_changed = 1; + $K_last_nonblank = undef; + $last_nonblank_token = EMPTY_STRING; + $last_nonblank_type = EMPTY_STRING; + $last_last_nonblank_type = EMPTY_STRING; + + my %last_lp_equals = (); + + my $rLL = $self->[_rLL_]; + my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_]; - my $nws = @{$radjusted_levels}; my $imin = 0; # The 'starting_in_quote' flag means that the first token is the first @@ -21159,7 +24498,6 @@ sub get_available_spaces_to_go { $imin += 1; } - my $K_last_nonblank; my $Kpnb = $K_to_go[0] - 1; if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) { $Kpnb -= 1; @@ -21168,39 +24506,21 @@ sub get_available_spaces_to_go { $K_last_nonblank = $Kpnb; } - my $last_nonblank_token = EMPTY_STRING; - my $last_nonblank_type = EMPTY_STRING; - my $last_last_nonblank_type = EMPTY_STRING; - if ( defined($K_last_nonblank) ) { $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_]; $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_]; } - my ( $space_count, $current_level, $current_ci_level, $in_lp_mode ); - my $stack_changed = 1; - #----------------------------------- # Loop over all tokens in this batch #----------------------------------- foreach my $ii ( $imin .. $max_index_to_go ) { - my $KK = $K_to_go[$ii]; - my $type = $types_to_go[$ii]; - my $token = $tokens_to_go[$ii]; - my $level = $levels_to_go[$ii]; - my $ci_level = $ci_levels_to_go[$ii]; - my $total_depth = $nesting_depth_to_go[$ii]; - my $standard_spaces = $leading_spaces_to_go[$ii]; - - #-------------------------------------------------- - # Adjust levels if necessary to recycle whitespace: - #-------------------------------------------------- - if ( defined($radjusted_levels) && @{$radjusted_levels} == $Klimit ) - { - $level = $radjusted_levels->[$KK]; - if ( $level < 0 ) { $level = 0 } # note: this should not happen - } + my $type = $types_to_go[$ii]; + my $token = $tokens_to_go[$ii]; + my $level = $levels_to_go[$ii]; + my $ci_level = $ci_levels_to_go[$ii]; + my $total_depth = $nesting_depth_to_go[$ii]; # get the top state from the stack if it has changed if ($stack_changed) { @@ -21218,687 +24538,766 @@ sub get_available_spaces_to_go { $stack_changed = 0; } - #------------------------------ - # update the position predictor - #------------------------------ + #------------------------------------------------------------ + # Break at a previous '=' if necessary to control line length + #------------------------------------------------------------ if ( $type eq '{' || $type eq '(' ) { - $lp_comma_count{ $total_depth + 1 } = 0; $lp_arrow_count{ $total_depth + 1 } = 0; # If we come to an opening token after an '=' token of some # type, see if it would be helpful to 'break' after the '=' to # save space - my $last_equals = $last_lp_equals{$total_depth}; - - # Skip an empty set of parens, such as after channel(): - # my $exchange = $self->_channel()->exchange( - # This fixes issues b1318 b1322 b1323 b1328 - # TODO: maybe also skip parens with just one token? - my $is_empty_container; - if ( $last_equals && $ii < $max_index_to_go ) { - my $seqno = $type_sequence_to_go[$ii]; - my $inext_nb = $ii + 1; - $inext_nb++ - if ( $types_to_go[$inext_nb] eq 'b' ); - my $seqno_nb = $type_sequence_to_go[$inext_nb]; - $is_empty_container = - $seqno && $seqno_nb && $seqno_nb == $seqno; - } - - if ( $last_equals - && $last_equals > $ii_begin_line - && !$is_empty_container ) - { + my $ii_last_equals = $last_lp_equals{$total_depth}; + if ($ii_last_equals) { + $self->lp_equals_break_check( $ii, $ii_last_equals ); + } + } + + #------------------------ + # Handle decreasing depth + #------------------------ + # Note that one token may have both decreasing and then increasing + # depth. For example, (level, ci) can go from (1,1) to (2,0). So, + # in this example we would first go back to (1,0) then up to (2,0) + # in a single call. + if ( $level < $current_level || $ci_level < $current_ci_level ) { + $self->lp_decreasing_depth($ii); + } - my $seqno = $type_sequence_to_go[$ii]; + #------------------------ + # handle increasing depth + #------------------------ + if ( $level > $current_level || $ci_level > $current_ci_level ) { + $self->lp_increasing_depth($ii); + } - # find the position if we break at the '=' - my $i_test = $last_equals; + #------------------ + # Handle all tokens + #------------------ + if ( $type ne 'b' ) { - # Fix for issue b1229, check for break before - if ( $want_break_before{ $types_to_go[$i_test] } ) { - if ( $i_test > 0 ) { $i_test-- } - } - elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } + # Count commas and look for non-list characters. Once we see a + # non-list character, we give up and don't look for any more + # commas. + if ( $type eq '=>' ) { + $lp_arrow_count{$total_depth}++; - my $test_position = total_line_length( $i_test, $ii ); - my $mll = - $maximum_line_length_at_level[ $levels_to_go[$i_test] ]; + # remember '=>' like '=' for estimating breaks (but see + # above note for b1035) + $last_lp_equals{$total_depth} = $ii; + } + + elsif ( $type eq ',' ) { + $lp_comma_count{$total_depth}++; + } - #------------------------------------------------------ - # Break if structure will reach the maximum line length - #------------------------------------------------------ + elsif ( $is_assignment{$type} ) { + $last_lp_equals{$total_depth} = $ii; + } - # Historically, -lp just used one-half line length here - my $len_increase = $rOpts_maximum_line_length / 2; + # this token might start a new line if .. + if ( + $ii > $ii_begin_line - # For -xlp, we can also use the pre-computed lengths - my $min_len = $rcollapsed_length_by_seqno->{$seqno}; - if ( $min_len && $min_len > $len_increase ) { - $len_increase = $min_len; - } + && ( - if ( + # this is the first nonblank token of the line + $ii == 1 && $types_to_go[0] eq 'b' - # if we might exceed the maximum line length - $lp_position_predictor + $len_increase > $mll + # or previous character was one of these: + # /^([\:\?\,f])$/ + || $hash_test2{$last_nonblank_type} - # if a -bbx flag WANTS a break before this opening token - || ( $seqno - && $rbreak_before_container_by_seqno->{$seqno} ) + # or previous character was opening and this is not + # closing + || ( $last_nonblank_type eq '{' && $type ne '}' ) + || ( $last_nonblank_type eq '(' and $type ne ')' ) - # or we are beyond the 1/4 point and there was an old - # break at an assignment (not '=>') [fix for b1035] + # or this token is one of these: + # /^([\.]|\|\||\&\&)$/ + || $hash_test3{$type} + + # or this is a closing structure + || ( $last_nonblank_type eq '}' + && $last_nonblank_token eq $last_nonblank_type ) + + # or previous token was keyword 'return' + || ( + $last_nonblank_type eq 'k' + && ( $last_nonblank_token eq 'return' + && $type ne '{' ) + ) + + # or starting a new line at certain keywords is fine + || ( $type eq 'k' + && $is_if_unless_and_or_last_next_redo_return{ + $token} ) + + # or this is after an assignment after a closing + # structure || ( - $lp_position_predictor > - $mll - $rOpts_maximum_line_length * 3 / 4 - && $types_to_go[$last_equals] ne '=>' + $is_assignment{$last_nonblank_type} && ( - $old_breakpoint_to_go[$last_equals] - || ( $last_equals > 0 - && $old_breakpoint_to_go[ $last_equals - 1 ] - ) - || ( $last_equals > 1 - && $types_to_go[ $last_equals - 1 ] eq 'b' - && $old_breakpoint_to_go[ $last_equals - 2 ] + # /^[\}\)\]]$/ + $hash_test1{$last_last_nonblank_type} + + # and it is significantly to the right + || $lp_position_predictor > ( + $maximum_line_length_at_level[$level] - + $rOpts_maximum_line_length / 2 ) ) ) - ) - { + ) + ) + { + check_for_long_gnu_style_lines($ii); + $ii_begin_line = $ii; - # then make the switch -- note that we do not set a - # real breakpoint here because we may not really need - # one; sub break_lists will do that if necessary. + # back up 1 token if we want to break before that type + # otherwise, we may strand tokens like '?' or ':' on a line + if ( $ii_begin_line > 0 ) { + my $wbb = + $last_nonblank_type eq 'k' + ? $want_break_before{$last_nonblank_token} + : $want_break_before{$last_nonblank_type}; + $ii_begin_line-- if ($wbb); + } + } - my $Kc = $K_closing_container->{$seqno}; - if ( + $K_last_nonblank = $K_to_go[$ii]; + $last_last_nonblank_type = $last_nonblank_type; + $last_nonblank_type = $type; + $last_nonblank_token = $token; - # For -lp, only if the closing token is in this - # batch (c117). Otherwise it cannot be done by sub - # break_lists. - defined($Kc) && $Kc <= $K_to_go[$max_index_to_go] + } ## end if ( $type ne 'b' ) - # For -xlp, we only need one nonblank token after - # the opening token. - || $rOpts_extended_line_up_parentheses - ) - { - $ii_begin_line = $i_test + 1; - $lp_position_predictor = $test_position; - - #-------------------------------------------------- - # Fix for an opening container terminating a batch: - #-------------------------------------------------- - # To get alignment of a -lp container with its - # contents, we have to put a break after $i_test. - # For $ii<$max_index_to_go, this will be done by - # sub break_lists based on the indentation object. - # But for $ii=$max_index_to_go, the indentation - # object for this seqno will not be created until - # the next batch, so we have to set a break at - # $i_test right now in order to get one. - if ( $ii == $max_index_to_go - && !$block_type_to_go[$ii] - && $type eq '{' - && $seqno - && !$ris_excluded_lp_container->{$seqno} ) - { - $self->set_forced_lp_break( $ii_begin_line, - $ii ); - } - } - } - } - } ## end update position predictor + # remember the predicted position of this token on the output line + if ( $ii > $ii_begin_line ) { - #------------------------ - # Handle decreasing depth - #------------------------ - # Note that one token may have both decreasing and then increasing - # depth. For example, (level, ci) can go from (1,1) to (2,0). So, - # in this example we would first go back to (1,0) then up to (2,0) - # in a single call. - if ( $level < $current_level || $ci_level < $current_ci_level ) { + ## NOTE: this is a critical loop - the following call has been + ## expanded for about 2x speedup: + ## $lp_position_predictor = + ## total_line_length( $ii_begin_line, $ii ); - # loop to find the first entry at or completely below this level - while (1) { - if ($max_lp_stack) { + my $indentation = $leading_spaces_to_go[$ii_begin_line]; + if ( ref($indentation) ) { + $indentation = $indentation->get_spaces(); + } + $lp_position_predictor = + $indentation + + $summed_lengths_to_go[ $ii + 1 ] - + $summed_lengths_to_go[$ii_begin_line]; + } + else { + $lp_position_predictor = + $space_count + $token_lengths_to_go[$ii]; + } - # save index of token which closes this level - if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { - my $lp_object = - $rLP->[$max_lp_stack]->[_lp_object_]; + # Store the indentation object for this token. + # This allows us to manipulate the leading whitespace + # (in case we have to reduce indentation to fit a line) without + # having to change any token values. - $lp_object->set_closed($ii); + #--------------------------------------------------------------- + # replace leading whitespace with indentation objects where used + #--------------------------------------------------------------- + if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { + my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_]; + $leading_spaces_to_go[$ii] = $lp_object; + if ( $max_lp_stack > 0 + && $ci_level + && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] ) + { + $reduced_spaces_to_go[$ii] = + $rLP->[ $max_lp_stack - 1 ]->[_lp_object_]; + } + else { + $reduced_spaces_to_go[$ii] = $lp_object; + } + } + } ## end loop over all tokens in this batch - my $comma_count = 0; - my $arrow_count = 0; - if ( $type eq '}' || $type eq ')' ) { - $comma_count = $lp_comma_count{$total_depth}; - $arrow_count = $lp_arrow_count{$total_depth}; - $comma_count = 0 unless $comma_count; - $arrow_count = 0 unless $arrow_count; - } + undo_incomplete_lp_indentation() + if ( !$rOpts_extended_line_up_parentheses ); - $lp_object->set_comma_count($comma_count); - $lp_object->set_arrow_count($arrow_count); + return; + } ## end sub set_lp_indentation - # Undo any extra indentation if we saw no commas - my $available_spaces = - $lp_object->get_available_spaces(); - my $K_start = $lp_object->get_K_begin_line(); + sub lp_equals_break_check { + + my ( $self, $ii, $ii_last_equals ) = @_; + + # If we come to an opening token after an '=' token of some + # type, see if it would be helpful to 'break' after the '=' to + # save space. + + # Given: + # $ii = index of an opening token in the output batch + # $ii_begin_line = index of token starting next output line + # Update: + # $lp_position_predictor - updated position predictor + # $ii_begin_line = updated starting token index + + # Skip an empty set of parens, such as after channel(): + # my $exchange = $self->_channel()->exchange( + # This fixes issues b1318 b1322 b1323 b1328 + my $is_empty_container; + if ( $ii_last_equals && $ii < $max_index_to_go ) { + my $seqno = $type_sequence_to_go[$ii]; + my $inext_nb = $ii + 1; + $inext_nb++ + if ( $types_to_go[$inext_nb] eq 'b' ); + my $seqno_nb = $type_sequence_to_go[$inext_nb]; + $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno; + } + + if ( $ii_last_equals + && $ii_last_equals > $ii_begin_line + && !$is_empty_container ) + { - if ( $available_spaces > 0 - && $K_start >= $K_to_go[0] - && ( $comma_count <= 0 || $arrow_count > 0 ) ) - { + my $seqno = $type_sequence_to_go[$ii]; - my $i = $lp_object->get_lp_item_index(); + # find the position if we break at the '=' + my $i_test = $ii_last_equals; - # Safety check for a valid stack index. It - # should be ok because we just checked that the - # index K of the token associated with this - # indentation is in this batch. - if ( $i < 0 || $i > $max_lp_object_list ) { - if (DEVEL_MODE) { - my $lno = $rLL->[$KK]->[_LINE_INDEX_]; - Fault(<=0 and <= max=$max_lp_object_list -EOM - } - } - else { - if ( $arrow_count == 0 ) { - $rlp_object_list->[$i] - ->permanently_decrease_available_spaces - ($available_spaces); - } - else { - $rlp_object_list->[$i] - ->tentatively_decrease_available_spaces - ($available_spaces); - } - foreach - my $j ( $i + 1 .. $max_lp_object_list ) - { - $rlp_object_list->[$j] - ->decrease_SPACES($available_spaces); - } - } - } - } + # Fix for issue b1229, check if want break before this token + # Fix for issue b1356, if i_test is a blank, the leading spaces may + # be incorrect (if it was an interline blank). + # Fix for issue b1357 .. b1370, i_test must be prev nonblank + # ( the ci value for blanks can vary ) + # See also case b223 + # Fix for issue b1371-b1374 : all of these and the above are fixed + # by simply backing up one index and setting the leading spaces of + # a blank equal to that of the equals. + if ( $want_break_before{ $types_to_go[$i_test] } ) { + $i_test -= 1; + $leading_spaces_to_go[$i_test] = + $leading_spaces_to_go[$ii_last_equals] + if ( $types_to_go[$i_test] eq 'b' ); + } + elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } - # go down one level - --$max_lp_stack; - - my $rLP_top = $rLP->[$max_lp_stack]; - my $ci_lev = $rLP_top->[_lp_ci_level_]; - my $lev = $rLP_top->[_lp_level_]; - my $spaces = $rLP_top->[_lp_space_count_]; - if ( $rLP_top->[_lp_object_] ) { - my $lp_obj = $rLP_top->[_lp_object_]; - ( $spaces, $lev, $ci_lev ) = - @{ $lp_obj->get_spaces_level_ci() }; - } + my $test_position = total_line_length( $i_test, $ii ); + my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ]; - # stop when we reach a level at or below the current - # level - if ( $lev <= $level && $ci_lev <= $ci_level ) { - $space_count = $spaces; - $current_level = $lev; - $current_ci_level = $ci_lev; - last; - } - } + #------------------------------------------------------ + # Break if structure will reach the maximum line length + #------------------------------------------------------ - # reached bottom of stack .. should never happen because - # only negative levels can get here, and $level was forced - # to be positive above. - else { + # Historically, -lp just used one-half line length here + my $len_increase = $rOpts_maximum_line_length / 2; - # non-fatal, keep going except in DEVEL_MODE - if (DEVEL_MODE) { -##program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp - Fault(<[_rcollapsed_length_by_seqno_]->{$seqno}; + if ( $min_len && $min_len > $len_increase ) { + $len_increase = $min_len; + } - #------------------------ - # handle increasing depth - #------------------------ - if ( $level > $current_level || $ci_level > $current_ci_level ) { + if ( - $stack_changed = 1; - - # Compute the standard incremental whitespace. This will be - # the minimum incremental whitespace that will be used. This - # choice results in a smooth transition between the gnu-style - # and the standard style. - my $standard_increment = - ( $level - $current_level ) * - $rOpts_indent_columns + - ( $ci_level - $current_ci_level ) * - $rOpts_continuation_indentation; - - # Now we have to define how much extra incremental space - # ("$available_space") we want. This extra space will be - # reduced as necessary when long lines are encountered or when - # it becomes clear that we do not have a good list. - my $available_spaces = 0; - my $align_seqno = 0; - - my $last_nonblank_seqno; - my $last_nonblank_block_type; - if ( defined($K_last_nonblank) ) { - $last_nonblank_seqno = - $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_]; - $last_nonblank_block_type = - $last_nonblank_seqno - ? $rblock_type_of_seqno->{$last_nonblank_seqno} - : undef; - } - - $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_]; - - #----------------------------------------------- - # Initialize indentation spaces on empty stack.. - #----------------------------------------------- - if ( $max_lp_stack == 0 ) { - $space_count = $level * $rOpts_indent_columns; - } - - #---------------------------------------- - # Add the standard space increment if ... - #---------------------------------------- - elsif ( + # if we might exceed the maximum line length + $lp_position_predictor + $len_increase > $mll - # if this is a BLOCK, add the standard increment - $last_nonblank_block_type + # if a -bbx flag WANTS a break before this opening token + || ( $seqno + && $self->[_rbreak_before_container_by_seqno_]->{$seqno} ) - # or if this is not a sequenced item - || !$last_nonblank_seqno + # or we are beyond the 1/4 point and there was an old + # break at an assignment (not '=>') [fix for b1035] + || ( + $lp_position_predictor > + $mll - $rOpts_maximum_line_length * 3 / 4 + && $types_to_go[$ii_last_equals] ne '=>' + && ( + $old_breakpoint_to_go[$ii_last_equals] + || ( $ii_last_equals > 0 + && $old_breakpoint_to_go[ $ii_last_equals - 1 ] ) + || ( $ii_last_equals > 1 + && $types_to_go[ $ii_last_equals - 1 ] eq 'b' + && $old_breakpoint_to_go[ $ii_last_equals - 2 ] ) + ) + ) + ) + { - # or this container is excluded by user rules - # or contains here-docs or multiline qw text - || defined($last_nonblank_seqno) - && $ris_excluded_lp_container->{$last_nonblank_seqno} + # then make the switch -- note that we do not set a + # real breakpoint here because we may not really need + # one; sub break_lists will do that if necessary. - # or if last nonblank token was not structural indentation - || $last_nonblank_type ne '{' + my $Kc = $self->[_K_closing_container_]->{$seqno}; + if ( - # and do not start -lp under stress .. fixes b1244, b1255 - || !$in_lp_mode && $level >= $lp_cutoff_level + # For -lp, only if the closing token is in this + # batch (c117). Otherwise it cannot be done by sub + # break_lists. + defined($Kc) && $Kc <= $K_to_go[$max_index_to_go] + # For -xlp, we only need one nonblank token after + # the opening token. + || $rOpts_extended_line_up_parentheses ) { - - # If we have entered lp mode, use the top lp object to get - # the current indentation spaces because it may have - # changed. Fixes b1285, b1286. - if ($in_lp_mode) { - $space_count = $in_lp_mode->get_spaces(); + $ii_begin_line = $i_test + 1; + $lp_position_predictor = $test_position; + + #-------------------------------------------------- + # Fix for an opening container terminating a batch: + #-------------------------------------------------- + # To get alignment of a -lp container with its + # contents, we have to put a break after $i_test. + # For $ii<$max_index_to_go, this will be done by + # sub break_lists based on the indentation object. + # But for $ii=$max_index_to_go, the indentation + # object for this seqno will not be created until + # the next batch, so we have to set a break at + # $i_test right now in order to get one. + if ( $ii == $max_index_to_go + && !$block_type_to_go[$ii] + && $types_to_go[$ii] eq '{' + && $seqno + && !$self->[_ris_excluded_lp_container_]->{$seqno} ) + { + $self->set_forced_lp_break( $ii_begin_line, $ii ); } - $space_count += $standard_increment; } + } + } + return; + } ## end sub lp_equals_break_check - #--------------------------------------------------------------- - # -lp mode: try to use space to the first non-blank level change - #--------------------------------------------------------------- - else { - - # see how much space we have available - my $test_space_count = $lp_position_predictor; - my $excess = 0; - my $min_len = - $rcollapsed_length_by_seqno->{$last_nonblank_seqno}; - my $next_opening_too_far; - - if ( defined($min_len) ) { - $excess = - $test_space_count + - $min_len - - $maximum_line_length_at_level[$level]; - if ( $excess > 0 ) { - $test_space_count -= $excess; - - # will the next opening token be a long way out? - $next_opening_too_far = - $lp_position_predictor + $excess > - $maximum_line_length_at_level[$level]; - } - } + sub lp_decreasing_depth { + my ( $self, $ii ) = @_; - my $rLP_top = $rLP->[$max_lp_stack]; - my $min_gnu_indentation = $rLP_top->[_lp_space_count_]; - if ( $rLP_top->[_lp_object_] ) { - $min_gnu_indentation = - $rLP_top->[_lp_object_]->get_spaces(); - } - $available_spaces = - $test_space_count - $min_gnu_indentation; + my $rLL = $self->[_rLL_]; - # Do not startup -lp indentation mode if no space ... - # ... or if it puts the opening far to the right - if ( !$in_lp_mode - && ( $available_spaces <= 0 || $next_opening_too_far ) ) - { - $space_count += $standard_increment; - $available_spaces = 0; - } + my $level = $levels_to_go[$ii]; + my $ci_level = $ci_levels_to_go[$ii]; - # Use -lp mode - else { - $space_count = $test_space_count; + # loop to find the first entry at or completely below this level + while (1) { - $in_lp_mode = 1; - if ( $available_spaces >= $standard_increment ) { - $min_gnu_indentation += $standard_increment; - } - elsif ( $available_spaces > 1 ) { - $min_gnu_indentation += $available_spaces + 1; - } - ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { - elsif ( $is_opening_token{$last_nonblank_token} ) { - if ( ( $tightness{$last_nonblank_token} < 2 ) ) { - $min_gnu_indentation += 2; - } - else { - $min_gnu_indentation += 1; - } - } - else { - $min_gnu_indentation += $standard_increment; - } - $available_spaces = $space_count - $min_gnu_indentation; + # Be sure we have not hit the stack bottom - should never + # happen because only negative levels can get here, and + # $level was forced to be positive above. + if ( !$max_lp_stack ) { - if ( $available_spaces < 0 ) { - $space_count = $min_gnu_indentation; - $available_spaces = 0; - } - $align_seqno = $last_nonblank_seqno; - } + # non-fatal, just keep going except in DEVEL_MODE + if (DEVEL_MODE) { + Fault(<[$max_lp_stack]->[_lp_object_] ) { - $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1); - $in_lp_mode = 1; - } + # save index of token which closes this level + if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { + my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_]; - #---------------------------------------- - # Create indentation object if in lp-mode - #---------------------------------------- - ++$max_lp_stack; - my $lp_object; - if ($in_lp_mode) { - - # A negative level implies not to store the item in the - # item_list - my $lp_item_index = 0; - if ( $level >= 0 ) { - $lp_item_index = ++$max_lp_object_list; - } + $lp_object->set_closed($ii); - my $K_begin_line = 0; - if ( $ii_begin_line >= 0 - && $ii_begin_line <= $max_index_to_go ) - { - $K_begin_line = $K_to_go[$ii_begin_line]; - } + my $comma_count = 0; + my $arrow_count = 0; + my $type = $types_to_go[$ii]; + if ( $type eq '}' || $type eq ')' ) { + my $total_depth = $nesting_depth_to_go[$ii]; + $comma_count = $lp_comma_count{$total_depth}; + $arrow_count = $lp_arrow_count{$total_depth}; + $comma_count = 0 unless $comma_count; + $arrow_count = 0 unless $arrow_count; + } - # Minor Fix: when creating indentation at a side - # comment we don't know what the space to the actual - # next code token will be. We will allow a space for - # sub correct_lp to move it in if necessary. - if ( $type eq '#' - && $max_index_to_go > 0 - && $align_seqno ) - { - $available_spaces += 1; - } + $lp_object->set_comma_count($comma_count); + $lp_object->set_arrow_count($arrow_count); - $lp_object = Perl::Tidy::IndentationItem->new( - spaces => $space_count, - level => $level, - ci_level => $ci_level, - available_spaces => $available_spaces, - lp_item_index => $lp_item_index, - align_seqno => $align_seqno, - stack_depth => $max_lp_stack, - K_begin_line => $K_begin_line, - standard_spaces => $standard_spaces, - ); + # Undo any extra indentation if we saw no commas + my $available_spaces = $lp_object->get_available_spaces(); + my $K_start = $lp_object->get_K_begin_line(); - DEBUG_LP && do { - my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_]; - print STDERR < 0 + && $K_start >= $K_to_go[0] + && ( $comma_count <= 0 || $arrow_count > 0 ) ) + { - if ( $level >= 0 ) { - $rlp_object_list->[$max_lp_object_list] = - $lp_object; - } + my $i = $lp_object->get_lp_item_index(); - ##if ( $last_nonblank_token =~ /^[\{\[\(]$/ - if ( $is_opening_token{$last_nonblank_token} - && $last_nonblank_seqno ) - { - $rlp_object_by_seqno->{$last_nonblank_seqno} = - $lp_object; - } + # Safety check for a valid stack index. It + # should be ok because we just checked that the + # index K of the token associated with this + # indentation is in this batch. + if ( $i < 0 || $i > $max_lp_object_list ) { + my $KK = $K_to_go[$ii]; + my $lno = $rLL->[$KK]->[_LINE_INDEX_]; + DEVEL_MODE && Fault(<=0 and <= max=$max_lp_object_list +EOM + last; } - #------------------------------------ - # Store this indentation on the stack - #------------------------------------ - $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level; - $rLP->[$max_lp_stack]->[_lp_level_] = $level; - $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object; - $rLP->[$max_lp_stack]->[_lp_container_seqno_] = - $last_nonblank_seqno; - $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count; - - # If the opening paren is beyond the half-line length, then - # we will use the minimum (standard) indentation. This will - # help avoid problems associated with running out of space - # near the end of a line. As a result, in deeply nested - # lists, there will be some indentations which are limited - # to this minimum standard indentation. But the most deeply - # nested container will still probably be able to shift its - # parameters to the right for proper alignment, so in most - # cases this will not be noticeable. - if ( $available_spaces > 0 && $lp_object ) { - my $halfway = - $maximum_line_length_at_level[$level] - - $rOpts_maximum_line_length / 2; - $lp_object->tentatively_decrease_available_spaces( - $available_spaces) - if ( $space_count > $halfway ); + if ( $arrow_count == 0 ) { + $rlp_object_list->[$i] + ->permanently_decrease_available_spaces( + $available_spaces); + } + else { + $rlp_object_list->[$i] + ->tentatively_decrease_available_spaces( + $available_spaces); + } + foreach my $j ( $i + 1 .. $max_lp_object_list ) { + $rlp_object_list->[$j] + ->decrease_SPACES($available_spaces); } } - } ## end increasing depth + } - #------------------ - # Handle all tokens - #------------------ - if ( $type ne 'b' ) { + # go down one level + --$max_lp_stack; + + my $rLP_top = $rLP->[$max_lp_stack]; + my $ci_lev = $rLP_top->[_lp_ci_level_]; + my $lev = $rLP_top->[_lp_level_]; + my $spaces = $rLP_top->[_lp_space_count_]; + if ( $rLP_top->[_lp_object_] ) { + my $lp_obj = $rLP_top->[_lp_object_]; + ( $spaces, $lev, $ci_lev ) = + @{ $lp_obj->get_spaces_level_ci() }; + } + + # stop when we reach a level at or below the current + # level + if ( $lev <= $level && $ci_lev <= $ci_level ) { + $space_count = $spaces; + $current_level = $lev; + $current_ci_level = $ci_lev; + last; + } + } + return; + } ## end sub lp_decreasing_depth + + sub lp_increasing_depth { + my ( $self, $ii ) = @_; + + my $rLL = $self->[_rLL_]; + + my $type = $types_to_go[$ii]; + my $level = $levels_to_go[$ii]; + my $ci_level = $ci_levels_to_go[$ii]; + + $stack_changed = 1; + + # Compute the standard incremental whitespace. This will be + # the minimum incremental whitespace that will be used. This + # choice results in a smooth transition between the gnu-style + # and the standard style. + my $standard_increment = + ( $level - $current_level ) * $rOpts_indent_columns + + ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; + + # Now we have to define how much extra incremental space + # ("$available_space") we want. This extra space will be + # reduced as necessary when long lines are encountered or when + # it becomes clear that we do not have a good list. + my $available_spaces = 0; + my $align_seqno = 0; + my $K_extra_space; + + my $last_nonblank_seqno; + my $last_nonblank_block_type; + if ( defined($K_last_nonblank) ) { + $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_]; + $last_nonblank_block_type = + $last_nonblank_seqno + ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno} + : undef; + } + + $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_]; + + #----------------------------------------------- + # Initialize indentation spaces on empty stack.. + #----------------------------------------------- + if ( $max_lp_stack == 0 ) { + $space_count = $level * $rOpts_indent_columns; + } - # Count commas and look for non-list characters. Once we see a - # non-list character, we give up and don't look for any more - # commas. - if ( $type eq '=>' ) { - $lp_arrow_count{$total_depth}++; + #---------------------------------------- + # Add the standard space increment if ... + #---------------------------------------- + elsif ( - # remember '=>' like '=' for estimating breaks (but see - # above note for b1035) - $last_lp_equals{$total_depth} = $ii; - } + # if this is a BLOCK, add the standard increment + $last_nonblank_block_type - elsif ( $type eq ',' ) { - $lp_comma_count{$total_depth}++; - } + # or if this is not a sequenced item + || !$last_nonblank_seqno - elsif ( $is_assignment{$type} ) { - $last_lp_equals{$total_depth} = $ii; - } + # or this container is excluded by user rules + # or contains here-docs or multiline qw text + || defined($last_nonblank_seqno) + && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno} - # this token might start a new line if .. - if ( + # or if last nonblank token was not structural indentation + || $last_nonblank_type ne '{' - # this is the first nonblank token of the line - $ii == 1 && $types_to_go[0] eq 'b' + # and do not start -lp under stress .. fixes b1244, b1255 + || !$in_lp_mode && $level >= $high_stress_level - # or previous character was one of these: - # /^([\:\?\,f])$/ - || $hash_test2{$last_nonblank_type} + ) + { - # or previous character was opening and this is not closing - || ( $last_nonblank_type eq '{' && $type ne '}' ) - || ( $last_nonblank_type eq '(' and $type ne ')' ) + # If we have entered lp mode, use the top lp object to get + # the current indentation spaces because it may have + # changed. Fixes b1285, b1286. + if ($in_lp_mode) { + $space_count = $in_lp_mode->get_spaces(); + } + $space_count += $standard_increment; + } - # or this token is one of these: - # /^([\.]|\|\||\&\&)$/ - || $hash_test3{$type} + #--------------------------------------------------------------- + # -lp mode: try to use space to the first non-blank level change + #--------------------------------------------------------------- + else { - # or this is a closing structure - || ( $last_nonblank_type eq '}' - && $last_nonblank_token eq $last_nonblank_type ) + # see how much space we have available + my $test_space_count = $lp_position_predictor; + my $excess = 0; + my $min_len = + $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno}; + my $next_opening_too_far; - # or previous token was keyword 'return' - || ( - $last_nonblank_type eq 'k' - && ( $last_nonblank_token eq 'return' - && $type ne '{' ) - ) + if ( defined($min_len) ) { + $excess = + $test_space_count + + $min_len - + $maximum_line_length_at_level[$level]; + if ( $excess > 0 ) { + $test_space_count -= $excess; - # or starting a new line at certain keywords is fine - || ( $type eq 'k' - && $is_if_unless_and_or_last_next_redo_return{$token} ) + # will the next opening token be a long way out? + $next_opening_too_far = + $lp_position_predictor + $excess > + $maximum_line_length_at_level[$level]; + } + } - # or this is after an assignment after a closing structure - || ( - $is_assignment{$last_nonblank_type} - && ( - # /^[\}\)\]]$/ - $hash_test1{$last_last_nonblank_type} + my $rLP_top = $rLP->[$max_lp_stack]; + my $min_gnu_indentation = $rLP_top->[_lp_space_count_]; + if ( $rLP_top->[_lp_object_] ) { + $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces(); + } + $available_spaces = $test_space_count - $min_gnu_indentation; - # and it is significantly to the right - || $lp_position_predictor > ( - $maximum_line_length_at_level[$level] - - $rOpts_maximum_line_length / 2 - ) - ) - ) - ) - { - check_for_long_gnu_style_lines( $ii, $rlp_object_list ); - $ii_begin_line = $ii; + # Do not startup -lp indentation mode if no space ... + # ... or if it puts the opening far to the right + if ( !$in_lp_mode + && ( $available_spaces <= 0 || $next_opening_too_far ) ) + { + $space_count += $standard_increment; + $available_spaces = 0; + } - # back up 1 token if we want to break before that type - # otherwise, we may strand tokens like '?' or ':' on a line - if ( $ii_begin_line > 0 ) { - if ( $last_nonblank_type eq 'k' ) { + # Use -lp mode + else { + $space_count = $test_space_count; - if ( $want_break_before{$last_nonblank_token} ) { - $ii_begin_line--; - } - } - elsif ( $want_break_before{$last_nonblank_type} ) { - $ii_begin_line--; - } - } - } ## end if ( $ii == 1 && $types_to_go...) + $in_lp_mode = 1; + if ( $available_spaces >= $standard_increment ) { + $min_gnu_indentation += $standard_increment; + } + elsif ( $available_spaces > 1 ) { + $min_gnu_indentation += $available_spaces + 1; - $K_last_nonblank = $KK; + # The "+1" space can cause mis-alignment if there is no + # blank space between the opening paren and the next + # nonblank token (i.e., -pt=2) and the container does not + # get broken open. So we will mark this token for later + # space removal by sub 'xlp_tweak' if this container + # remains intact (issue git #106). + if ( + $type ne 'b' - $last_last_nonblank_type = $last_nonblank_type; - $last_nonblank_type = $type; - $last_nonblank_token = $token; + # Skip if the maximum line length is exceeded here + && $excess <= 0 - } ## end if ( $type ne 'b' ) + # This is only for level changes, not ci level changes. + # But note: this test is here out of caution but I have + # not found a case where it is actually necessary. + && $is_opening_token{$last_nonblank_token} - # remember the predicted position of this token on the output line - if ( $ii > $ii_begin_line ) { + # Be sure we are at consecutive nonblanks. This test + # should be true, but it guards against future coding + # changes to level values assigned to blank spaces. + && $ii > 0 + && $types_to_go[ $ii - 1 ] ne 'b' - ## NOTE: this is a critical loop - the following call has been - ## expanded for about 2x speedup: - ## $lp_position_predictor = - ## total_line_length( $ii_begin_line, $ii ); + ) + { + $K_extra_space = $K_to_go[$ii]; + } + } + elsif ( $is_opening_token{$last_nonblank_token} ) { + if ( ( $tightness{$last_nonblank_token} < 2 ) ) { + $min_gnu_indentation += 2; + } + else { + $min_gnu_indentation += 1; + } + } + else { + $min_gnu_indentation += $standard_increment; + } + $available_spaces = $space_count - $min_gnu_indentation; - my $indentation = $leading_spaces_to_go[$ii_begin_line]; - if ( ref($indentation) ) { - $indentation = $indentation->get_spaces(); + if ( $available_spaces < 0 ) { + $space_count = $min_gnu_indentation; + $available_spaces = 0; } - $lp_position_predictor = - $indentation + - $summed_lengths_to_go[ $ii + 1 ] - - $summed_lengths_to_go[$ii_begin_line]; - } - else { - $lp_position_predictor = - $space_count + $token_lengths_to_go[$ii]; + $align_seqno = $last_nonblank_seqno; } + } - # Store the indentation object for this token. - # This allows us to manipulate the leading whitespace - # (in case we have to reduce indentation to fit a line) without - # having to change any token values. + #------------------------------------------- + # update the state, but not on a blank token + #------------------------------------------- + if ( $type ne 'b' ) { - #--------------------------------------------------------------- - # replace leading whitespace with indentation objects where used - #--------------------------------------------------------------- if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { - my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_]; - $leading_spaces_to_go[$ii] = $lp_object; - if ( $max_lp_stack > 0 - && $ci_level - && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] ) + $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1); + $in_lp_mode = 1; + } + + #---------------------------------------- + # Create indentation object if in lp-mode + #---------------------------------------- + ++$max_lp_stack; + my $lp_object; + if ($in_lp_mode) { + + # A negative level implies not to store the item in the + # item_list + my $lp_item_index = 0; + if ( $level >= 0 ) { + $lp_item_index = ++$max_lp_object_list; + } + + my $K_begin_line = 0; + if ( $ii_begin_line >= 0 + && $ii_begin_line <= $max_index_to_go ) { - $reduced_spaces_to_go[$ii] = - $rLP->[ $max_lp_stack - 1 ]->[_lp_object_]; + $K_begin_line = $K_to_go[$ii_begin_line]; } - else { - $reduced_spaces_to_go[$ii] = $lp_object; + + # Minor Fix: when creating indentation at a side + # comment we don't know what the space to the actual + # next code token will be. We will allow a space for + # sub correct_lp to move it in if necessary. + if ( $type eq '#' + && $max_index_to_go > 0 + && $align_seqno ) + { + $available_spaces += 1; + } + + my $standard_spaces = $leading_spaces_to_go[$ii]; + $lp_object = Perl::Tidy::IndentationItem->new( + spaces => $space_count, + level => $level, + ci_level => $ci_level, + available_spaces => $available_spaces, + lp_item_index => $lp_item_index, + align_seqno => $align_seqno, + stack_depth => $max_lp_stack, + K_begin_line => $K_begin_line, + standard_spaces => $standard_spaces, + K_extra_space => $K_extra_space, + ); + + DEBUG_LP && do { + my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_]; + my $token = $tokens_to_go[$ii]; + print STDERR <= 0 ) { + $rlp_object_list->[$max_lp_object_list] = $lp_object; } - } - } ## end loop over all tokens in this batch - undo_incomplete_lp_indentation($rlp_object_list) - if ( !$rOpts_extended_line_up_parentheses ); + if ( $is_opening_token{$last_nonblank_token} + && $last_nonblank_seqno ) + { + $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} = + $lp_object; + } + } + #------------------------------------ + # Store this indentation on the stack + #------------------------------------ + $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level; + $rLP->[$max_lp_stack]->[_lp_level_] = $level; + $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object; + $rLP->[$max_lp_stack]->[_lp_container_seqno_] = + $last_nonblank_seqno; + $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count; + + # If the opening paren is beyond the half-line length, then + # we will use the minimum (standard) indentation. This will + # help avoid problems associated with running out of space + # near the end of a line. As a result, in deeply nested + # lists, there will be some indentations which are limited + # to this minimum standard indentation. But the most deeply + # nested container will still probably be able to shift its + # parameters to the right for proper alignment, so in most + # cases this will not be noticeable. + if ( $available_spaces > 0 && $lp_object ) { + my $halfway = + $maximum_line_length_at_level[$level] - + $rOpts_maximum_line_length / 2; + $lp_object->tentatively_decrease_available_spaces( + $available_spaces) + if ( $space_count > $halfway ); + } + } return; - } ## end sub set_lp_indentation + } ## end sub lp_increasing_depth sub check_for_long_gnu_style_lines { # look at the current estimated maximum line length, and # remove some whitespace if it exceeds the desired maximum - my ( $mx_index_to_go, $rlp_object_list ) = @_; - - my $max_lp_object_list = @{$rlp_object_list} - 1; + my ($ii_to_go) = @_; # nothing can be done if no stack items defined for this line return if ( $max_lp_object_list < 0 ); - # see if we have exceeded the maximum desired line length + # See if we have exceeded the maximum desired line length .. # keep 2 extra free because they are needed in some cases # (result of trial-and-error testing) + my $tol = 2; + + # But reduce tol to 0 at a terminal comma; fixes b1432 + if ( $tokens_to_go[$ii_to_go] eq ',' + && $ii_to_go < $max_index_to_go ) + { + my $in = $ii_to_go + 1; + if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ } + if ( $is_closing_token{ $tokens_to_go[$in] } ) { + $tol = 0; + } + } + my $spaces_needed = $lp_position_predictor - - $maximum_line_length_at_level[ $levels_to_go[$mx_index_to_go] ] + 2; + $maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] + + $tol; return if ( $spaces_needed <= 0 ); @@ -21990,9 +25389,6 @@ EOM # was always done because it could cause problems otherwise, but recent # improvements allow fairly good results to be obtained by skipping # this step with the -xlp flag. - my ($rlp_object_list) = @_; - - my $max_lp_object_list = @{$rlp_object_list} - 1; # nothing to do if no stack items defined for this line return if ( $max_lp_object_list < 0 ); @@ -22176,31 +25572,28 @@ sub convey_batch_to_vertical_aligner { # have been defined. Here we prepare the lines for passing to the vertical # aligner. We do the following tasks: # - mark certain vertical alignment tokens, such as '=', in each line - # - make minor indentation adjustments + # - make final indentation adjustments # - do logical padding: insert extra blank spaces to help display certain # logical constructions + # - send the line to the vertical aligner - my $this_batch = $self->[_this_batch_]; - my $ri_first = $this_batch->[_ri_first_]; - my $ri_last = $this_batch->[_ri_last_]; + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; + my $this_batch = $self->[_this_batch_]; + + my $do_not_pad = $this_batch->[_do_not_pad_]; + my $starting_in_quote = $this_batch->[_starting_in_quote_]; + my $ending_in_quote = $this_batch->[_ending_in_quote_]; + my $is_static_block_comment = $this_batch->[_is_static_block_comment_]; + my $batch_CODE_type = $this_batch->[_batch_CODE_type_]; + my $ri_first = $this_batch->[_ri_first_]; + my $ri_last = $this_batch->[_ri_last_]; $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE); my $n_last_line = @{$ri_first} - 1; - my $do_not_pad = $this_batch->[_do_not_pad_]; - my $peak_batch_size = $this_batch->[_peak_batch_size_]; - my $starting_in_quote = $this_batch->[_starting_in_quote_]; - my $ending_in_quote = $this_batch->[_ending_in_quote_]; - my $is_static_block_comment = $this_batch->[_is_static_block_comment_]; - my $rix_seqno_controlling_ci = $this_batch->[_rix_seqno_controlling_ci_]; - my $batch_CODE_type = $this_batch->[_batch_CODE_type_]; - - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; - my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; - my $ibeg_next = $ri_first->[0]; my $iend_next = $ri_last->[0]; @@ -22208,29 +25601,37 @@ sub convey_batch_to_vertical_aligner { my $type_end_next = $types_to_go[$iend_next]; my $token_beg_next = $tokens_to_go[$ibeg_next]; - my $is_block_comment = $max_index_to_go == 0 && $types_to_go[0] eq '#'; - my $rindentation_list = [0]; # ref to indentations for each line - my ( $cscw_block_comment, $closing_side_comment ); + my ( $cscw_block_comment, $closing_side_comment, $is_block_comment ); + + if ( !$max_index_to_go && $type_beg_next eq '#' ) { + $is_block_comment = 1; + } + if ($rOpts_closing_side_comments) { ( $closing_side_comment, $cscw_block_comment ) = $self->add_closing_side_comment( $ri_first, $ri_last ); } - # flush before a long if statement to avoid unwanted alignment - if ( $n_last_line > 0 - && $type_beg_next eq 'k' - && $is_if_unless{$token_beg_next} ) - { - $self->flush_vertical_aligner(); + if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) { + $self->undo_ci( $ri_first, $ri_last, + $this_batch->[_rix_seqno_controlling_ci_] ); } - $self->undo_ci( $ri_first, $ri_last, $rix_seqno_controlling_ci ) - if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ); + # for multi-line batches ... + if ( $n_last_line > 0 ) { + + # flush before a long if statement to avoid unwanted alignment + $self->flush_vertical_aligner() + if ( $type_beg_next eq 'k' + && $is_if_unless{$token_beg_next} ); - $self->set_logical_padding( $ri_first, $ri_last, $peak_batch_size, - $starting_in_quote ) - if ( $n_last_line > 0 && $rOpts_logical_padding ); + $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote ) + if ($rOpts_logical_padding); + + $self->xlp_tweak( $ri_first, $ri_last ) + if ($rOpts_extended_line_up_parentheses); + } if (DEVEL_MODE) { $self->check_batch_summed_lengths() } @@ -22243,7 +25644,7 @@ sub convey_batch_to_vertical_aligner { # ---------------------------------------------- # loop to send each line to the vertical aligner # ---------------------------------------------- - my ( $type_beg, $type_end, $token_beg ); + my ( $type_beg, $type_end, $token_beg, $ljump ); for my $n ( 0 .. $n_last_line ) { @@ -22279,10 +25680,8 @@ sub convey_batch_to_vertical_aligner { my $Kend_code = $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend; - # $ljump is a level jump needed by 'sub final_indentation_adjustment' - my $ljump = 0; - - # Get some vars on line [n+1], if any: + # Get some vars on line [n+1], if any, + # and define $ljump = level jump needed by 'sub get_final_indentation' if ( $n < $n_last_line ) { $ibeg_next = $ri_first->[ $n + 1 ]; $iend_next = $ri_last->[ $n + 1 ]; @@ -22311,6 +25710,9 @@ sub convey_batch_to_vertical_aligner { $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_]; } + else { + $ljump = 0; + } # --------------------------------------------- # get the vertical alignment info for this line @@ -22343,12 +25745,28 @@ EOM # -------------------------------------- # get the final indentation of this line # -------------------------------------- - my ( $indentation, $lev, $level_end, $terminal_type, - $terminal_block_type, $is_semicolon_terminated, $is_outdented_line ) - = $self->final_indentation_adjustment( $ibeg, $iend, $rfields, - $rpatterns, $ri_first, $ri_last, - $rindentation_list, $ljump, $starting_in_quote, - $is_static_block_comment, ); + my ( + + $indentation, + $lev, + $level_end, + $i_terminal, + $is_outdented_line, + + ) = $self->get_final_indentation( + + $ibeg, + $iend, + $rfields, + $rpatterns, + $ri_first, + $ri_last, + $rindentation_list, + $ljump, + $starting_in_quote, + $is_static_block_comment, + + ); # -------------------------------- # define flag 'outdent_long_lines' @@ -22415,7 +25833,7 @@ EOM my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_]; if ($seqno_m) { - $block_type_m = $rblock_type_of_seqno->{$seqno_m}; + $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m}; } } @@ -22442,7 +25860,8 @@ EOM $rvao_args->{rvertical_tightness_flags} = $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last, $ending_in_quote, $closing_side_comment ) - if ( !$is_block_comment ); + unless ( $is_block_comment + || $self->[_no_vertical_tightness_flags_] ); # ---------------------------------- # define 'is_terminal_ternary' flag @@ -22467,6 +25886,7 @@ EOM my $is_terminal_ternary = 0; my $last_leading_type = $n > 0 ? $type_beg_last : ':'; + my $terminal_type = $types_to_go[$i_terminal]; if ( $terminal_type ne ';' && $n_last_line > $n && $level_end == $lev ) @@ -22573,19 +25993,11 @@ EOM # This flag tells the vertical aligner to reset the side comment # location if we are entering a new block from level 0. This is # intended to keep side comments from drifting too far to the right. - if ( $terminal_block_type + if ( $block_type_to_go[$i_terminal] && $nesting_depth_end > $nesting_depth_beg ) { - my $level_adj = $lev; - my $radjusted_levels = $self->[_radjusted_levels_]; - if ( defined($radjusted_levels) && @{$radjusted_levels} == @{$rLL} ) - { - $level_adj = $radjusted_levels->[$Kbeg]; - if ( $level_adj < 0 ) { $level_adj = 0 } - } - if ( $level_adj == 0 ) { - $rvao_args->{forget_side_comment} = 1; - } + $rvao_args->{forget_side_comment} = + !$self->[_radjusted_levels_]->[$Kbeg]; } # ----------------------------------- @@ -22608,41 +26020,43 @@ EOM $do_not_pad = 0; - # Set flag indicating if this line ends in an opening - # token and is very short, so that a blank line is not - # needed if the subsequent line is a comment. - # Examples of what we are looking for: - # { - # && ( - # BEGIN { - # default { - # sub { - $self->[_last_output_short_opening_token_] - - # line ends in opening token - # /^[\{\(\[L]$/ - = $is_opening_type{$type_end} - - # and either - && ( - # line has either single opening token - $Kend == $Kbeg - - # or is a single token followed by opening token. - # Note that sub identifiers have blanks like 'sub doit' - # $token_beg !~ /\s+/ - || ( $Kend - $Kbeg <= 2 && index( $token_beg, SPACE ) < 0 ) - ) + } ## end of loop to output each line - # and limit total to 10 character widths - && token_sequence_length( $ibeg, $iend ) <= 10; + # Set flag indicating if the last line ends in an opening + # token and is very short, so that a blank line is not + # needed if the subsequent line is a comment. + # Examples of what we are looking for: + # { + # && ( + # BEGIN { + # default { + # sub { + $self->[_last_output_short_opening_token_] + + # line ends in opening token + # /^[\{\(\[L]$/ + = $is_opening_type{$type_end} + + # and either + && ( + # line has either single opening token + $iend_next == $ibeg_next + + # or is a single token followed by opening token. + # Note that sub identifiers have blanks like 'sub doit' + # $token_beg !~ /\s+/ + || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 ) + ) - } ## end of loop to output each line + # and limit total to 10 character widths + && token_sequence_length( $ibeg_next, $iend_next ) <= 10; # remember indentation of lines containing opening containers for - # later use by sub final_indentation_adjustment - $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list ) - if ( !$is_block_comment ); + # later use by sub get_final_indentation + $self->save_opening_indentation( $ri_first, $ri_last, + $rindentation_list, $this_batch->[_runmatched_opening_indexes_] ) + if ( $this_batch->[_runmatched_opening_indexes_] + || $types_to_go[$max_index_to_go] eq 'q' ); # output any new -cscw block comment if ($cscw_block_comment) { @@ -22661,7 +26075,7 @@ sub check_batch_summed_lengths { # Verify that the summed lengths are correct. We want to be sure that # errors have not been introduced by programming changes. Summed lengths - # are defined in sub $store_token. Operations like padding and unmasking + # are defined in sub store_token. Operations like padding and unmasking # semicolons can change token lengths, but those operations are expected to # update the summed lengths when they make changes. So the summed lengths # should always be correct. @@ -22671,7 +26085,12 @@ sub check_batch_summed_lengths { my $len_tok_i = $token_lengths_to_go[$i]; my $KK = $K_to_go[$i]; my $len_tok_K; - if ( defined($KK) ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_] } + + # For --indent-only, there is not always agreement between + # token lengths in _rLL_ and token_lengths_to_go, so skip that check. + if ( defined($KK) && !$rOpts_indent_only ) { + $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_]; + } if ( $len_by_sum != $len_tok_i || defined($len_tok_K) && $len_by_sum != $len_tok_K ) { @@ -22723,23 +26142,33 @@ EOM # eq and ne were removed from this list to improve alignment chances @q = qw(if unless and or err for foreach while until); @is_vertical_alignment_keyword{@q} = (1) x scalar(@q); - } + } ## end BEGIN + + my $ralignment_type_to_go; + my $ralignment_counts; + my $ralignment_hash_by_line; sub set_vertical_alignment_markers { - # This routine takes the first step toward vertical alignment of the - # lines of output text. It looks for certain tokens which can serve as - # vertical alignment markers (such as an '='). - # + my ( $self, $ri_first, $ri_last ) = @_; + + #---------------------------------------------------------------------- + # This routine looks at output lines for certain tokens which can serve + # as vertical alignment markers (such as an '='). + #---------------------------------------------------------------------- + + # Input parameters: + # $ri_first = ref to list of starting line indexes in _to_go arrays + # $ri_last = ref to list of ending line indexes in _to_go arrays + # Method: We look at each token $i in this output batch and set # $ralignment_type_to_go->[$i] equal to those tokens at which we would # accept vertical alignment. - my ( $self, $ri_first, $ri_last ) = @_; - - my $ralignment_type_to_go; - my $ralignment_counts = []; - my $ralignment_hash_by_line = []; + # Initialize closure (and return) variables: + $ralignment_type_to_go = []; + $ralignment_counts = []; + $ralignment_hash_by_line = []; # NOTE: closing side comments can insert up to 2 additional tokens # beyond the original $max_index_to_go, so we need to check ri_last for @@ -22754,14 +26183,9 @@ EOM # - and nothing to do if we aren't allowed to change whitespace. # ----------------------------------------------------------------- if ( $max_i <= 0 || !$rOpts_add_whitespace ) { - return ( $ralignment_type_to_go, $ralignment_counts, - $ralignment_hash_by_line ); + goto RETURN; } - my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_]; - my $ris_function_call_paren = $self->[_ris_function_call_paren_]; - my $rLL = $self->[_rLL_]; - # ------------------------------- # First handle any side comment. # ------------------------------- @@ -22781,7 +26205,7 @@ EOM my $do_not_align = ( # it is any specially marked side comment - ( defined($KK) && $rspecial_side_comment_type->{$KK} ) + ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} ) # or it is a static side comment || ( $rOpts->{'static-side-comments'} @@ -22826,16 +26250,12 @@ EOM # Nothing more to do on this line if -nvc is set # ---------------------------------------------- if ( !$rOpts_valign_code ) { - return ( $ralignment_type_to_go, $ralignment_counts, - $ralignment_hash_by_line ); + goto RETURN; } # ------------------------------------- # Loop over each line of this batch ... # ------------------------------------- - my $last_vertical_alignment_BEFORE_index; - my $vert_last_nonblank_type; - my $vert_last_nonblank_token; foreach my $line ( 0 .. $max_line ) { @@ -22847,313 +26267,339 @@ EOM # back up before any side comment if ( $iend > $i_terminal ) { $iend = $i_terminal } - my $level_beg = $levels_to_go[$ibeg]; - my $token_beg = $tokens_to_go[$ibeg]; - my $type_beg = $types_to_go[$ibeg]; - my $type_beg_special_char = - ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' ); - - $last_vertical_alignment_BEFORE_index = -1; - $vert_last_nonblank_type = $type_beg; - $vert_last_nonblank_token = $token_beg; - - # ---------------------------------------------------------------- - # Initialization code merged from 'sub delete_needless_alignments' - # ---------------------------------------------------------------- - my $i_good_paren = -1; - my $i_elsif_close = $ibeg - 1; - my $i_elsif_open = $iend + 1; - my @imatch_list; - if ( $type_beg eq 'k' ) { - - # Initialization for paren patch: mark a location of a paren we - # should keep, such as one following something like a leading - # 'if', 'elsif', - $i_good_paren = $ibeg + 1; - if ( $types_to_go[$i_good_paren] eq 'b' ) { - $i_good_paren++; - } - - # Initialization for 'elsif' patch: remember the paren range of - # an elsif, and do not make alignments within them because this - # can cause loss of padding and overall brace alignment in the - # vertical aligner. - if ( $token_beg eq 'elsif' - && $i_good_paren < $iend - && $tokens_to_go[$i_good_paren] eq '(' ) + #---------------------------------- + # Loop over all tokens on this line + #---------------------------------- + $self->set_vertical_alignment_markers_token_loop( $line, $ibeg, + $iend ); + } + + RETURN: + return ( $ralignment_type_to_go, $ralignment_counts, + $ralignment_hash_by_line ); + } ## end sub set_vertical_alignment_markers + + sub set_vertical_alignment_markers_token_loop { + my ( $self, $line, $ibeg, $iend ) = @_; + + # Set vertical alignment markers for the tokens on one line + # of the current output batch. This is done by updating the + # three closure variables: + # $ralignment_type_to_go + # $ralignment_counts + # $ralignment_hash_by_line + + # Input parameters: + # $line = index of this line in the current batch + # $ibeg, $iend = index range of tokens to check in the _to_go arrays + + my $level_beg = $levels_to_go[$ibeg]; + my $token_beg = $tokens_to_go[$ibeg]; + my $type_beg = $types_to_go[$ibeg]; + my $type_beg_special_char = + ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' ); + + my $last_vertical_alignment_BEFORE_index = -1; + my $vert_last_nonblank_type = $type_beg; + my $vert_last_nonblank_token = $token_beg; + + # ---------------------------------------------------------------- + # Initialization code merged from 'sub delete_needless_alignments' + # ---------------------------------------------------------------- + my $i_good_paren = -1; + my $i_elsif_close = $ibeg - 1; + my $i_elsif_open = $iend + 1; + my @imatch_list; + if ( $type_beg eq 'k' ) { + + # Initialization for paren patch: mark a location of a paren we + # should keep, such as one following something like a leading + # 'if', 'elsif', + $i_good_paren = $ibeg + 1; + if ( $types_to_go[$i_good_paren] eq 'b' ) { + $i_good_paren++; + } + + # Initialization for 'elsif' patch: remember the paren range of + # an elsif, and do not make alignments within them because this + # can cause loss of padding and overall brace alignment in the + # vertical aligner. + if ( $token_beg eq 'elsif' + && $i_good_paren < $iend + && $tokens_to_go[$i_good_paren] eq '(' ) + { + $i_elsif_open = $i_good_paren; + $i_elsif_close = $mate_index_to_go[$i_good_paren]; + if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 } + } + } ## end if ( $type_beg eq 'k' ) + + # -------------------------------------------- + # Loop over each token in this output line ... + # -------------------------------------------- + foreach my $i ( $ibeg + 1 .. $iend ) { + + next if ( $types_to_go[$i] eq 'b' ); + + my $type = $types_to_go[$i]; + my $token = $tokens_to_go[$i]; + my $alignment_type = EMPTY_STRING; + + # ---------------------------------------------- + # Check for 'paren patch' : Remove excess parens + # ---------------------------------------------- + + # Excess alignment of parens can prevent other good alignments. + # For example, note the parens in the first two rows of the + # following snippet. They would normally get marked for + # alignment and aligned as follows: + + # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; + # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; + # my $img = new Gimp::Image( $w, $h, RGB ); + + # This causes unnecessary paren alignment and prevents the + # third equals from aligning. If we remove the unwanted + # alignments we get: + + # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; + # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; + # my $img = new Gimp::Image( $w, $h, RGB ); + + # A rule for doing this which works well is to remove alignment + # of parens whose containers do not contain other aligning + # tokens, with the exception that we always keep alignment of + # the first opening paren on a line (for things like 'if' and + # 'elsif' statements). + if ( $token eq ')' && @imatch_list ) { + + # undo the corresponding opening paren if: + # - it is at the top of the stack + # - and not the first overall opening paren + # - does not follow a leading keyword on this line + my $imate = $mate_index_to_go[$i]; + if ( !defined($imate) ) { $imate = -1 } + if ( $imatch_list[-1] eq $imate + && ( $ibeg > 1 || @imatch_list > 1 ) + && $imate > $i_good_paren ) { - $i_elsif_open = $i_good_paren; - $i_elsif_close = $mate_index_to_go[$i_good_paren]; - } - } ## end if ( $type_beg eq 'k' ) - - # -------------------------------------------- - # Loop over each token in this output line ... - # -------------------------------------------- - foreach my $i ( $ibeg + 1 .. $iend ) { - - next if ( $types_to_go[$i] eq 'b' ); - - my $type = $types_to_go[$i]; - my $token = $tokens_to_go[$i]; - my $alignment_type = EMPTY_STRING; - - # ---------------------------------------------- - # Check for 'paren patch' : Remove excess parens - # ---------------------------------------------- - - # Excess alignment of parens can prevent other good alignments. - # For example, note the parens in the first two rows of the - # following snippet. They would normally get marked for - # alignment and aligned as follows: - - # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; - # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; - # my $img = new Gimp::Image( $w, $h, RGB ); - - # This causes unnecessary paren alignment and prevents the - # third equals from aligning. If we remove the unwanted - # alignments we get: - - # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; - # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; - # my $img = new Gimp::Image( $w, $h, RGB ); - - # A rule for doing this which works well is to remove alignment - # of parens whose containers do not contain other aligning - # tokens, with the exception that we always keep alignment of - # the first opening paren on a line (for things like 'if' and - # 'elsif' statements). - if ( $token eq ')' && @imatch_list ) { - - # undo the corresponding opening paren if: - # - it is at the top of the stack - # - and not the first overall opening paren - # - does not follow a leading keyword on this line - my $imate = $mate_index_to_go[$i]; - if ( $imatch_list[-1] eq $imate - && ( $ibeg > 1 || @imatch_list > 1 ) - && $imate > $i_good_paren ) - { - if ( $ralignment_type_to_go->[$imate] ) { - $ralignment_type_to_go->[$imate] = EMPTY_STRING; - $ralignment_counts->[$line]--; - delete $ralignment_hash_by_line->[$line]->{$imate}; - } - pop @imatch_list; + if ( $ralignment_type_to_go->[$imate] ) { + $ralignment_type_to_go->[$imate] = EMPTY_STRING; + $ralignment_counts->[$line]--; + delete $ralignment_hash_by_line->[$line]->{$imate}; } + pop @imatch_list; } + } - # do not align tokens at lower level than start of line - # except for side comments - if ( $levels_to_go[$i] < $level_beg ) { - next; - } + # do not align tokens at lower level than start of line + # except for side comments + if ( $levels_to_go[$i] < $level_beg ) { + next; + } - #-------------------------------------------------------- - # First see if we want to align BEFORE this token - #-------------------------------------------------------- + #-------------------------------------------------------- + # First see if we want to align BEFORE this token + #-------------------------------------------------------- - # The first possible token that we can align before - # is index 2 because: 1) it doesn't normally make sense to - # align before the first token and 2) the second - # token must be a blank if we are to align before - # the third - if ( $i < $ibeg + 2 ) { } + # The first possible token that we can align before + # is index 2 because: 1) it doesn't normally make sense to + # align before the first token and 2) the second + # token must be a blank if we are to align before + # the third + if ( $i < $ibeg + 2 ) { } - # must follow a blank token - elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } + # must follow a blank token + elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } - # otherwise, do not align two in a row to create a - # blank field - elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { } + # otherwise, do not align two in a row to create a + # blank field + elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { } - # align before one of these keywords - # (within a line, since $i>1) - elsif ( $type eq 'k' ) { + # align before one of these keywords + # (within a line, since $i>1) + elsif ( $type eq 'k' ) { - # /^(if|unless|and|or|eq|ne)$/ - if ( $is_vertical_alignment_keyword{$token} ) { - $alignment_type = $token; - } + # /^(if|unless|and|or|eq|ne)$/ + if ( $is_vertical_alignment_keyword{$token} ) { + $alignment_type = $token; } + } - # align qw in a 'use' statement (issue git #93) - elsif ( $type eq 'q' ) { - if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) { - $alignment_type = $type; - } + # align qw in a 'use' statement (issue git #93) + elsif ( $type eq 'q' ) { + if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) { + $alignment_type = $type; } + } - # align before one of these types.. - elsif ( $is_vertical_alignment_type{$type} - && !$is_not_vertical_alignment_token{$token} ) + # align before one of these types.. + elsif ( $is_vertical_alignment_type{$type} + && !$is_not_vertical_alignment_token{$token} ) + { + $alignment_type = $token; + + # Do not align a terminal token. Although it might + # occasionally look ok to do this, this has been found to be + # a good general rule. The main problems are: + # (1) that the terminal token (such as an = or :) might get + # moved far to the right where it is hard to see because + # nothing follows it, and + # (2) doing so may prevent other good alignments. + # Current exceptions are && and || and => + if ( $i == $iend ) { + $alignment_type = EMPTY_STRING + unless ( $is_terminal_alignment_type{$type} ); + } + + # Do not align leading ': (' or '. ('. This would prevent + # alignment in something like the following: + # $extra_space .= + # ( $input_line_number < 10 ) ? " " + # : ( $input_line_number < 100 ) ? " " + # : ""; + # or + # $code = + # ( $case_matters ? $accessor : " lc($accessor) " ) + # . ( $yesno ? " eq " : " ne " ) + + # Also, do not align a ( following a leading ? so we can + # align something like this: + # $converter{$_}->{ushortok} = + # $PDL::IO::Pic::biggrays + # ? ( m/GIF/ ? 0 : 1 ) + # : ( m/GIF|RAST|IFF/ ? 0 : 1 ); + if ( $type_beg_special_char + && $i == $ibeg + 2 + && $types_to_go[ $i - 1 ] eq 'b' ) { - $alignment_type = $token; + $alignment_type = EMPTY_STRING; + } - # Do not align a terminal token. Although it might - # occasionally look ok to do this, this has been found to be - # a good general rule. The main problems are: - # (1) that the terminal token (such as an = or :) might get - # moved far to the right where it is hard to see because - # nothing follows it, and - # (2) doing so may prevent other good alignments. - # Current exceptions are && and || and => - if ( $i == $iend ) { - $alignment_type = EMPTY_STRING - unless ( $is_terminal_alignment_type{$type} ); - } + # Certain tokens only align at the same level as the + # initial line level + if ( $is_low_level_alignment_token{$token} + && $levels_to_go[$i] != $level_beg ) + { + $alignment_type = EMPTY_STRING; + } - # Do not align leading ': (' or '. ('. This would prevent - # alignment in something like the following: - # $extra_space .= - # ( $input_line_number < 10 ) ? " " - # : ( $input_line_number < 100 ) ? " " - # : ""; - # or - # $code = - # ( $case_matters ? $accessor : " lc($accessor) " ) - # . ( $yesno ? " eq " : " ne " ) - - # Also, do not align a ( following a leading ? so we can - # align something like this: - # $converter{$_}->{ushortok} = - # $PDL::IO::Pic::biggrays - # ? ( m/GIF/ ? 0 : 1 ) - # : ( m/GIF|RAST|IFF/ ? 0 : 1 ); - if ( $type_beg_special_char - && $i == $ibeg + 2 - && $types_to_go[ $i - 1 ] eq 'b' ) - { - $alignment_type = EMPTY_STRING; - } + if ( $token eq '(' ) { - # Certain tokens only align at the same level as the - # initial line level - if ( $is_low_level_alignment_token{$token} - && $levels_to_go[$i] != $level_beg ) + # For a paren after keyword, only align if-like parens, + # such as: + # if ( $a ) { &a } + # elsif ( $b ) { &b } + # ^-------------------aligned parens + if ( $vert_last_nonblank_type eq 'k' + && !$is_if_unless_elsif{$vert_last_nonblank_token} ) { $alignment_type = EMPTY_STRING; } - # For a paren after keyword, only align something like this: - # if ( $a ) { &a } - # elsif ( $b ) { &b } - if ( $token eq '(' ) { - - if ( $vert_last_nonblank_type eq 'k' ) { - $alignment_type = EMPTY_STRING - unless - $is_if_unless_elsif{$vert_last_nonblank_token}; - ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/; - } - - # Do not align a spaced-function-paren if requested. - # Issue git #53, #73. - if ( !$rOpts_function_paren_vertical_alignment ) { - my $seqno = $type_sequence_to_go[$i]; - if ( $ris_function_call_paren->{$seqno} ) { - $alignment_type = EMPTY_STRING; - } - } - - # make () align with qw in a 'use' statement (git #93) - if ( $tokens_to_go[0] eq 'use' - && $types_to_go[0] eq 'k' - && $mate_index_to_go[$i] == $i + 1 ) - { - $alignment_type = 'q'; - } + # Do not align a spaced-function-paren if requested. + # Issue git #53, #73. + if ( !$rOpts_function_paren_vertical_alignment ) { + my $seqno = $type_sequence_to_go[$i]; + $alignment_type = EMPTY_STRING + if ( $self->[_ris_function_call_paren_]->{$seqno} ); } - # be sure the alignment tokens are unique - # This didn't work well: reason not determined - # if ($token ne $type) {$alignment_type .= $type} - } - - # NOTE: This is deactivated because it causes the previous - # if/elsif alignment to fail - #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) - #{ $alignment_type = $type; } + # make () align with qw in a 'use' statement (git #93) + if ( $tokens_to_go[0] eq 'use' + && $types_to_go[0] eq 'k' + && defined( $mate_index_to_go[$i] ) + && $mate_index_to_go[$i] == $i + 1 ) + { + $alignment_type = 'q'; - if ($alignment_type) { - $last_vertical_alignment_BEFORE_index = $i; + ## Note on discussion git #101. We could make this + ## a separate type '()' to separate it from qw's: + ## $alignment_type = + ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()'; + } } - #-------------------------------------------------------- - # Next see if we want to align AFTER the previous nonblank - #-------------------------------------------------------- + # be sure the alignment tokens are unique + # This experiment didn't work well: reason not determined + # if ($token ne $type) {$alignment_type .= $type} + } - # We want to line up ',' and interior ';' tokens, with the added - # space AFTER these tokens. (Note: interior ';' is included - # because it may occur in short blocks). - elsif ( + # NOTE: This is deactivated because it causes the previous + # if/elsif alignment to fail + #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) + #{ $alignment_type = $type; } - # we haven't already set it - ##!$alignment_type + if ($alignment_type) { + $last_vertical_alignment_BEFORE_index = $i; + } - # previous token IS one of these: - ( - $vert_last_nonblank_type eq ',' - || $vert_last_nonblank_type eq ';' - ) + #-------------------------------------------------------- + # Next see if we want to align AFTER the previous nonblank + #-------------------------------------------------------- - # and its not the first token of the line - ## && $i > $ibeg + # We want to line up ',' and interior ';' tokens, with the added + # space AFTER these tokens. (Note: interior ';' is included + # because it may occur in short blocks). + elsif ( - # and it follows a blank - && $types_to_go[ $i - 1 ] eq 'b' + # previous token IS one of these: + ( + $vert_last_nonblank_type eq ',' + || $vert_last_nonblank_type eq ';' + ) - # and it's NOT one of these - && !$is_closing_token{$type} + # and it follows a blank + && $types_to_go[ $i - 1 ] eq 'b' - # then go ahead and align - ) + # and it's NOT one of these + && !$is_closing_token{$type} - { - $alignment_type = $vert_last_nonblank_type; - } + # then go ahead and align + ) - #----------------------- - # Set the alignment type - #----------------------- - if ($alignment_type) { + { + $alignment_type = $vert_last_nonblank_type; + } - # but do not align the opening brace of an anonymous sub - if ( $token eq '{' - && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ ) - { + #----------------------- + # Set the alignment type + #----------------------- + if ($alignment_type) { - } + # but do not align the opening brace of an anonymous sub + if ( $token eq '{' + && $block_type_to_go[$i] + && $block_type_to_go[$i] =~ /$ASUB_PATTERN/ ) + { - # and do not make alignments within 'elsif' parens - elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) { + } - } + # and do not make alignments within 'elsif' parens + elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) { - # and ignore any tokens which have leading padded spaces - # example: perl527/lop.t - elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) { + } - } + # and ignore any tokens which have leading padded spaces + # example: perl527/lop.t + elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) { - else { - $ralignment_type_to_go->[$i] = $alignment_type; - $ralignment_hash_by_line->[$line]->{$i} = - $alignment_type; - $ralignment_counts->[$line]++; - push @imatch_list, $i; - } } - $vert_last_nonblank_type = $type; - $vert_last_nonblank_token = $token; + else { + $ralignment_type_to_go->[$i] = $alignment_type; + $ralignment_hash_by_line->[$line]->{$i} = $alignment_type; + $ralignment_counts->[$line]++; + push @imatch_list, $i; + } } + + $vert_last_nonblank_type = $type; + $vert_last_nonblank_token = $token; } + return; + } ## end sub set_vertical_alignment_markers_token_loop - return ( $ralignment_type_to_go, $ralignment_counts, - $ralignment_hash_by_line ); - } ## end sub set_vertical_alignment_markers } ## end closure set_vertical_alignment_markers sub make_vertical_alignments { @@ -23190,8 +26636,16 @@ sub make_vertical_alignments { #--------------------------------------------------------- # Step 1: Define the alignment tokens for the entire batch #--------------------------------------------------------- - my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ) - = $self->set_vertical_alignment_markers( $ri_first, $ri_last ); + my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ); + + # We only need to make this call if vertical alignment of code is + # requested or if a line might have a side comment. + if ( $rOpts_valign_code + || $types_to_go[$max_index_to_go] eq '#' ) + { + ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ) + = $self->set_vertical_alignment_markers( $ri_first, $ri_last ); + } #---------------------------------------------- # Step 2: Break each line into alignment fields @@ -23254,7 +26708,6 @@ sub get_seqno { # Undo continuation indentation in certain sequences my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_; my ( $line_1, $line_2, $lev_last ); - my $this_line_is_semicolon_terminated; my $max_line = @{$ri_first} - 1; my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; @@ -23267,7 +26720,7 @@ sub get_seqno { # On a very large list test case, this new coding dropped the run time # of this routine from 30 seconds to 169 milliseconds. my @i_controlling_ci; - if ( @{$rix_seqno_controlling_ci} ) { + if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) { my @tmp = reverse @{$rix_seqno_controlling_ci}; my $ix_next = pop @tmp; foreach my $line ( 0 .. $max_line ) { @@ -23340,20 +26793,21 @@ sub get_seqno { # chain continues... # check for chain ending at end of a statement - if ( $line == $max_line ) { + my $is_semicolon_terminated = ( + $line == $max_line + && ( + $types_to_go[$iend] eq ';' - # see of this line ends a statement - $this_line_is_semicolon_terminated = - $types_to_go[$iend] eq ';' + # with possible side comment + || ( $types_to_go[$iend] eq '#' + && $iend - $ibeg >= 2 + && $types_to_go[ $iend - 2 ] eq ';' + && $types_to_go[ $iend - 1 ] eq 'b' ) + ) + ); - # with possible side comment - || ( $types_to_go[$iend] eq '#' - && $iend - $ibeg >= 2 - && $types_to_go[ $iend - 2 ] eq ';' - && $types_to_go[ $iend - 1 ] eq 'b' ); - } $line_2 = $line - if ($this_line_is_semicolon_terminated); + if ($is_semicolon_terminated); } else { @@ -23404,7 +26858,7 @@ sub get_seqno { # SECTION 2: Undo ci at cuddled blocks #------------------------------------- - # Note that sub final_indentation_adjustment will be called later to + # Note that sub get_final_indentation will be called later to # actually do this, but for now we will tentatively mark cuddled # lines with ci=0 so that the the -xci loop which follows will be # correct at cuddles. @@ -23421,8 +26875,14 @@ sub get_seqno { $terminal_type = $types_to_go[ $iend - 2 ]; } } - if ( $terminal_type eq '{' ) { - my $Kbeg = $K_to_go[$ibeg]; + + # Patch for rt144979, part 2. Coordinated with part 1. + # Skip cuddled braces. + my $seqno_beg = $type_sequence_to_go[$ibeg]; + my $is_cuddled_closing_brace = $seqno_beg + && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg}; + + if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) { $ci_levels_to_go[$ibeg] = 0; } } @@ -23491,8 +26951,7 @@ sub get_seqno { # &Error_OutOfRange; # } # - my ( $self, $ri_first, $ri_last, $peak_batch_size, $starting_in_quote ) - = @_; + my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_; my $max_line = @{$ri_first} - 1; my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces, @@ -23684,36 +27143,32 @@ sub get_seqno { # : $i == 2 ? ( "Then", "Rarity" ) # : ( "Then", "Name" ); - if ( $max_line > 1 ) { - my $leading_token = $tokens_to_go[$ibeg_next]; - my $tokens_differ; - - # never indent line 1 of a '.' series because - # previous line is most likely at same level. - # TODO: we should also look at the leading_spaces - # of the last output line and skip if it is same - # as this line. - next if ( $leading_token eq '.' ); - - my $count = 1; - foreach my $l ( 2 .. 3 ) { - last if ( $line + $l > $max_line ); - my $ibeg_next_next = $ri_first->[ $line + $l ]; - if ( $tokens_to_go[$ibeg_next_next] ne - $leading_token ) - { - $tokens_differ = 1; - last; - } - $count++; - } - next if ($tokens_differ); - next if ( $count < 3 && $leading_token ne ':' ); - $ipad = $ibeg; - } - else { - next; + next if ( $max_line <= 1 ); + + my $leading_token = $tokens_to_go[$ibeg_next]; + my $tokens_differ; + + # never indent line 1 of a '.' series because + # previous line is most likely at same level. + # TODO: we should also look at the leading_spaces + # of the last output line and skip if it is same + # as this line. + next if ( $leading_token eq '.' ); + + my $count = 1; + foreach my $l ( 2 .. 3 ) { + last if ( $line + $l > $max_line ); + $count++; + my $ibeg_next_next = $ri_first->[ $line + $l ]; + next + if ( $tokens_to_go[$ibeg_next_next] eq + $leading_token ); + $tokens_differ = 1; + last; } + next if ($tokens_differ); + next if ( $count < 3 && $leading_token ne ':' ); + $ipad = $ibeg; } } } @@ -23726,6 +27181,7 @@ sub get_seqno { # find any unclosed container next unless ( $type_sequence_to_go[$i] + && defined( $mate_index_to_go[$i] ) && $mate_index_to_go[$i] > $iend ); # find next nonblank token to pad @@ -23752,26 +27208,10 @@ sub get_seqno { # an editor. In that case either the user will see and # fix the problem or it will be corrected next time the # entire file is processed with perltidy. + my $this_batch = $self->[_this_batch_]; + my $peak_batch_size = $this_batch->[_peak_batch_size_]; next if ( $ipad == 0 && $peak_batch_size <= 1 ); -## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT -## IT DID MORE HARM THAN GOOD -## ceil( -## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000 -## / $upem -## ), -## # do not put leading padding for just 2 lines of math -## if ( $ipad == $ibeg -## && $line > 0 -## && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ] -## && $is_math_op{$type_next} -## && $line + 2 <= $max_line ) -## { -## my $ibeg_next_next = $ri_first->[ $line + 2 ]; -## my $type_next_next = $types_to_go[$ibeg_next_next]; -## next if !$is_math_op{$type_next_next}; -## } - # next line must not be at greater depth my $iend_next = $ri_last->[ $line + 1 ]; next @@ -24002,6 +27442,9 @@ sub pad_token { $tok = SPACE x $pad_spaces . $tok; $tok_len += $pad_spaces; } + elsif ( $pad_spaces == 0 ) { + return; + } elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) { $tok = EMPTY_STRING; $tok_len = 0; @@ -24009,6 +27452,8 @@ sub pad_token { else { # shouldn't happen + DEVEL_MODE + && Fault("unexpected request for pad spaces = $pad_spaces\n"); return; } @@ -24024,6 +27469,64 @@ sub pad_token { return; } ## end sub pad_token +sub xlp_tweak { + + # Remove one indentation space from unbroken containers marked with + # 'K_extra_space'. These are mostly two-line lists with short names + # formatted with -xlp -pt=2. + # + # Before this fix (extra space in line 2): + # is($module->VERSION, $expected, + # "$main_module->VERSION matches $module->VERSION ($expected)"); + # + # After this fix: + # is($module->VERSION, $expected, + # "$main_module->VERSION matches $module->VERSION ($expected)"); + # + # Notes: + # - This fixes issue git #106 + # - This must be called after 'set_logical_padding'. + # - This is currently only applied to -xlp. It would also work for -lp + # but that style is essentially frozen. + + my ( $self, $ri_first, $ri_last ) = @_; + + # Must be 2 or more lines + return unless ( @{$ri_first} > 1 ); + + # Pull indentation object from start of second line + my $ibeg_1 = $ri_first->[1]; + my $lp_object = $leading_spaces_to_go[$ibeg_1]; + return if ( !ref($lp_object) ); + + # This only applies to an indentation object with a marked token + my $K_extra_space = $lp_object->get_K_extra_space(); + return unless ($K_extra_space); + + # Look for the marked token within the first line of this batch + my $ibeg_0 = $ri_first->[0]; + my $iend_0 = $ri_last->[0]; + my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0]; + return if ( $ii <= $ibeg_0 || $ii > $iend_0 ); + + # Skip padded tokens, they have already been aligned + my $tok = $tokens_to_go[$ii]; + return if ( substr( $tok, 0, 1 ) eq SPACE ); + + # Skip 'if'-like statements, this does not improve them + return + if ( $types_to_go[$ibeg_0] eq 'k' + && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } ); + + # Looks okay, reduce indentation by 1 space if possible + my $spaces = $lp_object->get_spaces(); + if ( $spaces > 0 ) { + $lp_object->decrease_SPACES(1); + } + + return; +} ## end sub xlp_tweak + { ## begin closure make_alignment_patterns my %keyword_map; @@ -24093,8 +27596,8 @@ sub pad_token { @{is_binary_type}{@q} = (1) x scalar(@q); # token keywords which prevent using leading word as a container name - @_ = qw(and or err eq ne cmp); - @is_binary_keyword{@_} = (1) x scalar(@_); + @q = qw(and or err eq ne cmp); + @is_binary_keyword{@q} = (1) x scalar(@q); # Some common function calls whose args can be aligned. These do not # give good alignments if the lengths differ significantly. @@ -24104,15 +27607,30 @@ sub pad_token { ##'is_deeply' => 'is', # poor; names lengths too different ); - } + } ## end BEGIN sub make_alignment_patterns { - # Here we do some important preliminary work for the - # vertical aligner. We create four arrays for one - # output line. These arrays contain strings that can - # be tested by the vertical aligner to see if - # consecutive lines can be aligned vertically. + my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count, + $ralignment_hash ) + = @_; + + #------------------------------------------------------------------ + # This sub creates arrays of vertical alignment info for one output + # line. + #------------------------------------------------------------------ + + # Input parameters: + # $ibeg, $iend - index range of this line in the _to_go arrays + # $ralignment_type_to_go - alignment type of tokens, like '=', if any + # $alignment_count - number of alignment tokens in the line + # $ralignment_hash - this contains all of the alignments for this + # line. It is not yet used but is available for future coding in + # case there is a need to do a preliminary scan of alignment tokens. + + # The arrays which are created contain strings that can be tested by + # the vertical aligner to see if consecutive lines can be aligned + # vertically. # # The four arrays are indexed on the vertical # alignment fields and are: @@ -24129,13 +27647,6 @@ sub pad_token { # allowed, even when the alignment tokens match. # @field_lengths - the display width of each field - my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count, - $ralignment_hash ) - = @_; - - # The var $ralignment_hash contains all of the alignments for this - # line. It is not yet used but is available for future coding in case - # there is a need to do a preliminary scan of the alignment tokens. if (DEVEL_MODE) { my $new_count = 0; if ( defined($ralignment_hash) ) { @@ -24157,8 +27668,9 @@ sub pad_token { # Shortcut for lines without alignments # ------------------------------------- if ( !$alignment_count ) { - my $rtokens = []; - my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] - + my $rtokens = []; + my $rfield_lengths = + [ $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg] ]; my $rpatterns; my $rfields; @@ -24177,6 +27689,8 @@ sub pad_token { my $i_start = $ibeg; my $depth = 0; + my $i_depth_prev = $i_start; + my $depth_prev = $depth; my %container_name = ( 0 => EMPTY_STRING ); my @tokens = (); @@ -24194,7 +27708,7 @@ sub pad_token { if ( $ibeg == 0 && $iend == $max_index_to_go ) { my $iterm = $max_index_to_go; if ( $types_to_go[$iterm] eq '#' ) { - $iterm = $iprev_to_go[$iterm]; + $iterm = iprev_to_go($iterm); } # Alignment lines ending like '=> sub {'; fixes issue c093 @@ -24207,95 +27721,37 @@ sub pad_token { && !$is_my_local_our{ $tokens_to_go[$ibeg] } && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] ) { - - # Make a container name by combining all leading barewords, - # keywords and functions. - my $name = EMPTY_STRING; - my $count = 0; - my $count_max; - my $iname_end; - my $ilast_blank; - for ( $ibeg .. $iterm ) { - my $type = $types_to_go[$_]; - - if ( $type eq 'b' ) { - $ilast_blank = $_; - next; - } - - my $token = $tokens_to_go[$_]; - - # Give up if we find an opening paren, binary operator or - # comma within or after the proposed container name. - if ( $token eq '(' - || $is_binary_type{$type} - || $type eq 'k' && $is_binary_keyword{$token} ) - { - $name = EMPTY_STRING; - last; - } - - # The container name is only built of certain types: - last if ( !$is_kwU{$type} ); - - # Normally it is made of one word, but two words for 'use' - if ( $count == 0 ) { - if ( $type eq 'k' - && $is_use_like{ $tokens_to_go[$_] } ) - { - $count_max = 2; - } - else { - $count_max = 1; - } - } - elsif ( defined($count_max) && $count >= $count_max ) { - last; - } - - if ( defined( $name_map{$token} ) ) { - $token = $name_map{$token}; - } - - $name .= SPACE . $token; - $iname_end = $_; - $count++; - } - - # Require a space after the container name token(s) - if ( $name - && defined($ilast_blank) - && $ilast_blank > $iname_end ) - { - $name = substr( $name, 1 ); - $container_name{'0'} = $name; - } + $container_name{'0'} = + make_uncontained_comma_name( $iterm, $ibeg, $iend ); } } - # -------------------- - # Loop over all tokens - # -------------------- + #-------------------------------- + # Begin main loop over all tokens + #-------------------------------- my $j = 0; # field index $patterns[0] = EMPTY_STRING; my %token_count; for my $i ( $ibeg .. $iend ) { - # Keep track of containers balanced on this line only. + #------------------------------------------------------------- + # Part 1: keep track of containers balanced on this line only. + #------------------------------------------------------------- # These are used below to prevent unwanted cross-line alignments. # Unbalanced containers already avoid aligning across # container boundaries. - - my $type = $types_to_go[$i]; - my $token = $tokens_to_go[$i]; - my $depth_last = $depth; + my $type = $types_to_go[$i]; if ( $type_sequence_to_go[$i] ) { + my $token = $tokens_to_go[$i]; if ( $is_opening_token{$token} ) { # if container is balanced on this line... my $i_mate = $mate_index_to_go[$i]; + if ( !defined($i_mate) ) { $i_mate = -1 } if ( $i_mate > $i && $i_mate <= $iend ) { + $i_depth_prev = $i; + $depth_prev = $depth; $depth++; # Append the previous token name to make the container name @@ -24315,10 +27771,8 @@ sub pad_token { # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } ); # is_d( [ \$a, \$a ], [ \$b, \$c ] ); - my $name = $token; - if ( $token eq '(' ) { - $name = $self->make_paren_name($i); - } + my $name = + $token eq '(' ? $self->make_paren_name($i) : $token; # name cannot be '.', so change to something else if so if ( $name eq '.' ) { $name = 'dot' } @@ -24354,32 +27808,7 @@ sub pad_token { # if we are not aligning on this paren... if ( !$ralignment_type_to_go->[$i] ) { - # Sum length from previous alignment - my $len = token_sequence_length( $i_start, $i - 1 ); - - # Minor patch: do not include the length of any '!'. - # Otherwise, commas in the following line will not - # match - # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) ); - # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) ); - if ( grep { $_ eq '!' } - @types_to_go[ $i_start .. $i - 1 ] ) - { - $len -= 1; - } - - if ( $i_start == $ibeg ) { - - # For first token, use distance from start of - # line but subtract off the indentation due to - # level. Otherwise, results could vary with - # indentation. - $len += - leading_spaces_to_go($ibeg) - - $levels_to_go[$i_start] * - $rOpts_indent_columns; - if ( $len < 0 ) { $len = 0 } - } + my $len = length_tag( $i, $ibeg, $i_start ); # tack this length onto the container name to try # to make a unique token name @@ -24389,12 +27818,16 @@ sub pad_token { } ## end if ( $is_opening_token...) elsif ( $is_closing_type{$token} ) { + $i_depth_prev = $i; + $depth_prev = $depth; $depth-- if $depth > 0; } } ## end if ( $type_sequence_to_go...) - # if we find a new synchronization token, we are done with - # a field + #------------------------------------------------------------ + # Part 2: if we find a new synchronization token, we are done + # with a field + #------------------------------------------------------------ if ( $i > $i_start && $ralignment_type_to_go->[$i] ) { my $tok = my $raw_tok = $ralignment_type_to_go->[$i]; @@ -24415,6 +27848,7 @@ sub pad_token { # If we are at an opening token which increased depth, we have # to use the name from the previous depth. + my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth; my $depth_p = ( $depth_last < $depth ? $depth_last : $depth ); if ( $container_name{$depth_p} ) { @@ -24497,7 +27931,9 @@ sub pad_token { $patterns[$j] = EMPTY_STRING; } ## end if ( new synchronization token - # continue accumulating tokens + #----------------------------------------------- + # Part 3: continue accumulating the next pattern + #----------------------------------------------- # for keywords we have to use the actual text if ( $type eq 'k' ) { @@ -24568,15 +28004,18 @@ sub pad_token { # everything else else { $patterns[$j] .= $type; - } - # remove any zero-level name at first fat comma - if ( $depth == 0 && $type eq '=>' ) { - $container_name{$depth} = EMPTY_STRING; + # remove any zero-level name at first fat comma + if ( $depth == 0 && $type eq '=>' ) { + $container_name{$depth} = EMPTY_STRING; + } } + } ## end for my $i ( $ibeg .. $iend) - # done with this line .. join text of tokens to make the last field + #--------------------------------------------------------------- + # End of main loop .. join text of tokens to make the last field + #--------------------------------------------------------------- push( @fields, join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) ); push @field_lengths, @@ -24585,6 +28024,108 @@ sub pad_token { return [ \@tokens, \@fields, \@patterns, \@field_lengths ]; } ## end sub make_alignment_patterns + sub make_uncontained_comma_name { + my ( $iterm, $ibeg, $iend ) = @_; + + # Make a container name by combining all leading barewords, + # keywords and functions. + my $name = EMPTY_STRING; + my $count = 0; + my $count_max; + my $iname_end; + my $ilast_blank; + for ( $ibeg .. $iterm ) { + my $type = $types_to_go[$_]; + + if ( $type eq 'b' ) { + $ilast_blank = $_; + next; + } + + my $token = $tokens_to_go[$_]; + + # Give up if we find an opening paren, binary operator or + # comma within or after the proposed container name. + if ( $token eq '(' + || $is_binary_type{$type} + || $type eq 'k' && $is_binary_keyword{$token} ) + { + $name = EMPTY_STRING; + last; + } + + # The container name is only built of certain types: + last if ( !$is_kwU{$type} ); + + # Normally it is made of one word, but two words for 'use' + if ( $count == 0 ) { + if ( $type eq 'k' + && $is_use_like{ $tokens_to_go[$_] } ) + { + $count_max = 2; + } + else { + $count_max = 1; + } + } + elsif ( defined($count_max) && $count >= $count_max ) { + last; + } + + if ( defined( $name_map{$token} ) ) { + $token = $name_map{$token}; + } + + $name .= SPACE . $token; + $iname_end = $_; + $count++; + } + + # Require a space after the container name token(s) + if ( $name + && defined($ilast_blank) + && $ilast_blank > $iname_end ) + { + $name = substr( $name, 1 ); + } + return $name; + } ## end sub make_uncontained_comma_name + + sub length_tag { + + my ( $i, $ibeg, $i_start ) = @_; + + # Generate a line length to be used as a tag for rejecting bad + # alignments. The tag is the length of the line from the previous + # matching token, or beginning of line, to the function name. This + # will allow the vertical aligner to reject undesirable matches. + + # The basic method: sum length from previous alignment + my $len = token_sequence_length( $i_start, $i - 1 ); + + # Minor patch: do not include the length of any '!'. + # Otherwise, commas in the following line will not + # match + # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) ); + # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) ); + if ( grep { $_ eq '!' } @types_to_go[ $i_start .. $i - 1 ] ) { + $len -= 1; + } + + if ( $i_start == $ibeg ) { + + # For first token, use distance from start of + # line but subtract off the indentation due to + # level. Otherwise, results could vary with + # indentation. + $len += + leading_spaces_to_go($ibeg) - + $levels_to_go[$i_start] * $rOpts_indent_columns; + } + if ( $len < 0 ) { $len = 0 } + return $len; + } ## end sub length_tag + } ## end closure make_alignment_patterns sub make_paren_name { @@ -24617,23 +28158,40 @@ sub make_paren_name { return $name; } ## end sub make_paren_name -{ ## begin closure final_indentation_adjustment +{ ## begin closure get_final_indentation my ( $last_indentation_written, $last_unadjusted_indentation, $last_leading_token ); - sub initialize_final_indentation_adjustment { + sub initialize_get_final_indentation { $last_indentation_written = 0; $last_unadjusted_indentation = 0; $last_leading_token = EMPTY_STRING; return; - } + } ## end sub initialize_get_final_indentation - sub final_indentation_adjustment { + sub get_final_indentation { - #-------------------------------------------------------------------- - # This routine sets the final indentation of a line in the Formatter. - #-------------------------------------------------------------------- + my ( + $self, # + + $ibeg, + $iend, + $rfields, + $rpatterns, + $ri_first, + $ri_last, + $rindentation_list, + $level_jump, + $starting_in_quote, + $is_static_block_comment, + + ) = @_; + + #-------------------------------------------------------------- + # This routine makes any necessary adjustments to get the final + # indentation of a line in the Formatter. + #-------------------------------------------------------------- # It starts with the basic indentation which has been defined for the # leading token, and then takes into account any options that the user @@ -24656,22 +28214,6 @@ sub make_paren_name { # undo_ci, which was processed earlier, so care has to be taken to # keep them coordinated. - my ( - $self, $ibeg, - $iend, $rfields, - $rpatterns, $ri_first, - $ri_last, $rindentation_list, - $level_jump, $starting_in_quote, - $is_static_block_comment, - ) = @_; - - my $rLL = $self->[_rLL_]; - my $Klimit = $self->[_Klimit_]; - my $ris_bli_container = $self->[_ris_bli_container_]; - my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; - my $rwant_reduced_ci = $self->[_rwant_reduced_ci_]; - my $rK_weld_left = $self->[_rK_weld_left_]; - # Find the last code token of this line my $i_terminal = $iend; my $terminal_type = $types_to_go[$iend]; @@ -24684,19 +28226,15 @@ sub make_paren_name { } } - my $terminal_block_type = $block_type_to_go[$i_terminal]; - my $is_outdented_line = 0; + my $is_outdented_line; my $type_beg = $types_to_go[$ibeg]; my $token_beg = $tokens_to_go[$ibeg]; - my $block_type_beg = $block_type_to_go[$ibeg]; my $level_beg = $levels_to_go[$ibeg]; + my $block_type_beg = $block_type_to_go[$ibeg]; my $leading_spaces_beg = $leading_spaces_to_go[$ibeg]; - my $K_beg = $K_to_go[$ibeg]; my $seqno_beg = $type_sequence_to_go[$ibeg]; - my $ibeg_weld_fix = $ibeg; my $is_closing_type_beg = $is_closing_type{$type_beg}; - my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0; # QW INDENTATION PATCH 3: my $seqno_qw_closing; @@ -24724,7 +28262,7 @@ sub make_paren_name { # } # - # MOJO: Set a flag if this lines begins with ')->' + # MOJO patch: Set a flag if this lines begins with ')->' my $leading_paren_arrow = ( $is_closing_type_beg && $token_beg eq ')' @@ -24748,661 +28286,788 @@ sub make_paren_name { # 2 - vertically align with opening token # 3 - indent #--------------------------------------------------------- + my $adjust_indentation = 0; - my $default_adjust_indentation = $adjust_indentation; + my $default_adjust_indentation = 0; + # Parameters needed for option 2, aligning with opening token: my ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ); - # Honor any flag to reduce -ci set by the -bbxi=n option - if ( $seqno_beg && $rwant_reduced_ci->{$seqno_beg} ) { + #------------------------------------- + # Section 1A: + # if line starts with a sequenced item + #------------------------------------- + if ( $seqno_beg || $seqno_qw_closing ) { + + # This can be tedious so we let a sub do it + ( + $adjust_indentation, + $default_adjust_indentation, + $opening_indentation, + $opening_offset, + $is_leading, + $opening_exists, + + ) = $self->get_closing_token_indentation( + + $ibeg, + $iend, + $ri_first, + $ri_last, + $rindentation_list, + $level_jump, + $i_terminal, + $is_semicolon_terminated, + $seqno_qw_closing, + + ); + } + + #-------------------------------------------------------- + # Section 1B: + # if at ');', '};', '>;', and '];' of a terminal qw quote + #-------------------------------------------------------- + elsif ( + substr( $rpatterns->[0], 0, 2 ) eq 'qb' + && substr( $rfields->[0], -1, 1 ) eq ';' + ## $rpatterns->[0] =~ /^qb*;$/ + && $rfields->[0] =~ /^([\)\}\]\>]);$/ + ) + { + if ( $closing_token_indentation{$1} == 0 ) { + $adjust_indentation = 1; + } + else { + $adjust_indentation = 3; + } + } + + #--------------------------------------------------------- + # Section 2: set indentation according to flag set above + # + # Select the indentation object to define leading + # whitespace. If we are outdenting something like '} } );' + # then we want to use one level below the last token + # ($i_terminal) in order to get it to fully outdent through + # all levels. + #--------------------------------------------------------- + my $indentation; + my $lev; + my $level_end = $levels_to_go[$iend]; + + #------------------------------------ + # Section 2A: adjust_indentation == 0 + # No change in indentation + #------------------------------------ + if ( $adjust_indentation == 0 ) { + $indentation = $leading_spaces_beg; + $lev = $level_beg; + } + + #------------------------------------------------------------------- + # Secton 2B: adjust_indentation == 1 + # Change the indentation to be that of a different token on the line + #------------------------------------------------------------------- + elsif ( $adjust_indentation == 1 ) { + + # Previously, the indentation of the terminal token was used: + # OLD CODING: + # $indentation = $reduced_spaces_to_go[$i_terminal]; + # $lev = $levels_to_go[$i_terminal]; + + # Generalization for MOJO patch: + # Use the lowest level indentation of the tokens on the line. + # For example, here we can use the indentation of the ending ';': + # } until ($selection > 0 and $selection < 10); # ok to use ';' + # But this will not outdent if we use the terminal indentation: + # )->then( sub { # use indentation of the ->, not the { + # Warning: reduced_spaces_to_go[] may be a reference, do not + # do numerical checks with it + + my $i_ind = $ibeg; + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + while ( $i_ind < $i_terminal ) { + $i_ind++; + if ( $levels_to_go[$i_ind] < $lev ) { + $indentation = $reduced_spaces_to_go[$i_ind]; + $lev = $levels_to_go[$i_ind]; + } + } + } + + #-------------------------------------------------------------- + # Secton 2C: adjust_indentation == 2 + # Handle indented closing token which aligns with opening token + #-------------------------------------------------------------- + elsif ( $adjust_indentation == 2 ) { + + # handle option to align closing token with opening token + $lev = $level_beg; + + # calculate spaces needed to align with opening token + my $space_count = + get_spaces($opening_indentation) + $opening_offset; + + # Indent less than the previous line. + # + # Problem: For -lp we don't exactly know what it was if there + # were recoverable spaces sent to the aligner. A good solution + # would be to force a flush of the vertical alignment buffer, so + # that we would know. For now, this rule is used for -lp: + # + # When the last line did not start with a closing token we will + # be optimistic that the aligner will recover everything wanted. + # + # This rule will prevent us from breaking a hierarchy of closing + # tokens, and in a worst case will leave a closing paren too far + # indented, but this is better than frequently leaving it not + # indented enough. + my $last_spaces = get_spaces($last_indentation_written); - # if this is an opening, it must be alone on the line ... - if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) { - $adjust_indentation = 1; + if ( ref($last_indentation_written) + && !$is_closing_token{$last_leading_token} ) + { + $last_spaces += + get_recoverable_spaces($last_indentation_written); } - # ... or a single welded unit (fix for b1173) - elsif ($total_weld_count) { - my $Kterm = $K_to_go[$i_terminal]; - my $Kterm_test = $rK_weld_left->{$Kterm}; - if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) { - $Kterm = $Kterm_test; - } - if ( $Kterm == $K_beg ) { $adjust_indentation = 1 } - } - } + # reset the indentation to the new space count if it works + # only options are all or none: nothing in-between looks good + $lev = $level_beg; - # Update the $is_bli flag as we go. It is initially 1. - # We note seeing a leading opening brace by setting it to 2. - # If we get to the closing brace without seeing the opening then we - # turn it off. This occurs if the opening brace did not get output - # at the start of a line, so we will then indent the closing brace - # in the default way. - if ( $is_bli_beg && $is_bli_beg == 1 ) { - my $K_opening_container = $self->[_K_opening_container_]; - my $K_opening = $K_opening_container->{$seqno_beg}; - if ( $K_beg eq $K_opening ) { - $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2; + my $diff = $last_spaces - $space_count; + if ( $diff > 0 ) { + $indentation = $space_count; } - else { $is_bli_beg = 0 } - } + else { - # QW PATCH for the combination -lp -wn - # For -lp formatting use $ibeg_weld_fix to get around the problem - # that with -lp type formatting the opening and closing tokens to not - # have sequence numbers. - if ( $seqno_qw_closing && $total_weld_count ) { - my $i_plus = $inext_to_go[$ibeg]; - if ( $i_plus <= $max_index_to_go ) { - my $K_plus = $K_to_go[$i_plus]; - if ( defined( $rK_weld_left->{$K_plus} ) ) { - $ibeg_weld_fix = $i_plus; + # We need to fix things ... but there is no good way to do it. + # The best solution is for the user to use a longer maximum + # line length. We could get a smooth variation if we just move + # the paren in using + # $space_count -= ( 1 - $diff ); + # But unfortunately this can give a rather unbalanced look. + + # For -xlp we currently allow a tolerance of one indentation + # level and then revert to a simpler default. This will jump + # suddenly but keeps a balanced look. + if ( $rOpts_extended_line_up_parentheses + && $diff >= -$rOpts_indent_columns + && $space_count > $leading_spaces_beg ) + { + $indentation = $space_count; + } + + # Otherwise revert to defaults + elsif ( $default_adjust_indentation == 0 ) { + $indentation = $leading_spaces_beg; + } + elsif ( $default_adjust_indentation == 1 ) { + $indentation = $reduced_spaces_to_go[$i_terminal]; + $lev = $levels_to_go[$i_terminal]; } } } - # if we are at a closing token of some type.. - if ( $is_closing_type_beg || $seqno_qw_closing ) { - - # get the indentation of the line containing the corresponding - # opening token - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first, - $ri_last, $rindentation_list, $seqno_qw_closing ); + #------------------------------------------------------------- + # Secton 2D: adjust_indentation == 3 + # Full indentation of closing tokens (-icb and -icp or -cti=2) + #------------------------------------------------------------- + else { - my $terminal_is_in_list = $self->is_in_list_by_i($i_terminal); + # handle -icb (indented closing code block braces) + # Updated method for indented block braces: indent one full level if + # there is no continuation indentation. This will occur for major + # structures such as sub, if, else, but not for things like map + # blocks. + # + # Note: only code blocks without continuation indentation are + # handled here (if, else, unless, ..). In the following snippet, + # the terminal brace of the sort block will have continuation + # indentation as shown so it will not be handled by the coding + # here. We would have to undo the continuation indentation to do + # this, but it probably looks ok as is. This is a possible future + # update for semicolon terminated lines. + # + # if ($sortby eq 'date' or $sortby eq 'size') { + # @files = sort { + # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} + # or $a cmp $b + # } @files; + # } + # + if ( $block_type_beg + && $ci_levels_to_go[$i_terminal] == 0 ) + { + my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); + $indentation = $spaces + $rOpts_indent_columns; - # First set the default behavior: - if ( + # NOTE: for -lp we could create a new indentation object, but + # there is probably no need to do it + } - # default behavior is to outdent closing lines - # of the form: "); }; ]; )->xxx;" - $is_semicolon_terminated + # handle -icp and any -icb block braces which fall through above + # test such as the 'sort' block mentioned above. + else { - # and 'cuddled parens' of the form: ")->pack(" - # Bug fix for RT #123749]: the types here were - # incorrectly '(' and ')'. Corrected to be '{' and '}' - || ( - $terminal_type eq '{' - && $type_beg eq '}' - && ( $nesting_depth_to_go[$iend] + 1 == - $nesting_depth_to_go[$ibeg] ) - ) + # There are currently two ways to handle -icp... + # One way is to use the indentation of the previous line: + # $indentation = $last_indentation_written; - # remove continuation indentation for any line like - # } ... { - # or without ending '{' and unbalanced, such as - # such as '}->{$operator}' - || ( - $type_beg eq '}' + # The other way is to use the indentation that the previous line + # would have had if it hadn't been adjusted: + $indentation = $last_unadjusted_indentation; - && ( $types_to_go[$iend] eq '{' - || $levels_to_go[$iend] < $level_beg ) - ) + # Current method: use the minimum of the two. This avoids + # inconsistent indentation. + if ( get_spaces($last_indentation_written) < + get_spaces($indentation) ) + { + $indentation = $last_indentation_written; + } + } - # and when the next line is at a lower indentation level... + # use previous indentation but use own level + # to cause list to be flushed properly + $lev = $level_beg; + } - # PATCH #1: and only if the style allows undoing continuation - # for all closing token types. We should really wait until - # the indentation of the next line is known and then make - # a decision, but that would require another pass. + #------------------------------------------------------------- + # Remember indentation except for multi-line quotes, which get + # no indentation + #------------------------------------------------------------- + if ( !( $ibeg == 0 && $starting_in_quote ) ) { + $last_indentation_written = $indentation; + $last_unadjusted_indentation = $leading_spaces_beg; + $last_leading_token = $token_beg; - # PATCH #2: and not if this token is under -xci control - || ( $level_jump < 0 - && !$some_closing_token_indentation - && !$rseqno_controlling_my_ci->{$K_beg} ) + # Patch to make a line which is the end of a qw quote work with the + # -lp option. Make $token_beg look like a closing token as some + # type even if it is not. This variable will become + # $last_leading_token at the end of this loop. Then, if the -lp + # style is selected, and the next line is also a + # closing token, it will not get more indentation than this line. + # We need to do this because qw quotes (at present) only get + # continuation indentation, not one level of indentation, so we + # need to turn off the -lp indentation. - # Patch for -wn=2, multiple welded closing tokens - || ( $i_terminal > $ibeg - && $is_closing_type{ $types_to_go[$iend] } ) + # ... a picture is worth a thousand words: - # Alternate Patch for git #51, isolated closing qw token not - # outdented if no-delete-old-newlines is set. This works, but - # a more general patch elsewhere fixes the real problem: ljump. - # || ( $seqno_qw_closing && $ibeg == $i_terminal ) + # perltidy -wn -gnu (Without this patch): + # ok(defined( + # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 + # 2981014)]) + # )); - ) + # perltidy -wn -gnu (With this patch): + # ok(defined( + # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 + # 2981014)]) + # )); + if ( $seqno_qw_closing + && ( length($token_beg) > 1 || $token_beg eq '>' ) ) { - $adjust_indentation = 1; + $last_leading_token = ')'; } + } - # outdent something like '),' - if ( - $terminal_type eq ',' + #--------------------------------------------------------------------- + # Rule: lines with leading closing tokens should not be outdented more + # than the line which contained the corresponding opening token. + #--------------------------------------------------------------------- - # Removed this constraint for -wn - # OLD: allow just one character before the comma - # && $i_terminal == $ibeg + 1 + # Updated per bug report in alex_bug.pl: we must not + # mess with the indentation of closing logical braces, so + # we must treat something like '} else {' as if it were + # an isolated brace + my $is_isolated_block_brace = $block_type_beg + && ( $i_terminal == $ibeg + || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg} + ); - # require LIST environment; otherwise, we may outdent too much - - # this can happen in calls without parentheses (overload.t); - && $terminal_is_in_list - ) - { - $adjust_indentation = 1; - } + # only do this for a ':; which is aligned with its leading '?' + my $is_unaligned_colon = $type_beg eq ':' && !$is_leading; - # undo continuation indentation of a terminal closing token if - # it is the last token before a level decrease. This will allow - # a closing token to line up with its opening counterpart, and - # avoids an indentation jump larger than 1 level. - if ( $i_terminal == $ibeg - && $is_closing_type_beg - && defined($K_beg) - && $K_beg < $Klimit ) - { - my $K_plus = $K_beg + 1; - my $type_plus = $rLL->[$K_plus]->[_TYPE_]; + if ( + defined($opening_indentation) + && !$leading_paren_arrow # MOJO patch + && !$is_isolated_block_brace + && !$is_unaligned_colon + ) + { + if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { + $indentation = $opening_indentation; + } + } - if ( $type_plus eq 'b' && $K_plus < $Klimit ) { - $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; - } + #---------------------------------------------------- + # remember the indentation of each line of this batch + #---------------------------------------------------- + push @{$rindentation_list}, $indentation; - if ( $type_plus eq '#' && $K_plus < $Klimit ) { - $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; - if ( $type_plus eq 'b' && $K_plus < $Klimit ) { - $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; - } + #--------------------------------------------- + # outdent lines with certain leading tokens... + #--------------------------------------------- + if ( - # Note: we have skipped past just one comment (perhaps a - # side comment). There could be more, and we could easily - # skip past all the rest with the following code, or with a - # while loop. It would be rare to have to do this, and - # those block comments would still be indented, so it would - # to leave them indented. So it seems best to just stop at - # a maximum of one comment. - ##if ($type_plus eq '#') { - ## $K_plus = $self->K_next_code($K_plus); - ##} - } + # must be first word of this batch + $ibeg == 0 - if ( !$is_bli_beg && defined($K_plus) ) { - my $lev = $level_beg; - my $level_next = $rLL->[$K_plus]->[_LEVEL_]; + # and ... + && ( - # and do not undo ci if it was set by the -xci option - $adjust_indentation = 1 - if ( $level_next < $lev - && !$rseqno_controlling_my_ci->{$K_beg} ); - } + # certain leading keywords if requested + $rOpts_outdent_keywords + && $type_beg eq 'k' + && $outdent_keyword{$token_beg} - # Patch for RT #96101, in which closing brace of anonymous subs - # was not outdented. We should look ahead and see if there is - # a level decrease at the next token (i.e., a closing token), - # but right now we do not have that information. For now - # we see if we are in a list, and this works well. - # See test files 'sub*.t' for good test cases. - if ( $terminal_is_in_list - && !$rOpts_indent_closing_brace - && $block_type_beg - && $block_type_beg =~ /$ASUB_PATTERN/ ) - { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = $self->get_opening_indentation( $ibeg, $ri_first, - $ri_last, $rindentation_list ); - my $indentation = $leading_spaces_beg; - if ( defined($opening_indentation) - && get_spaces($indentation) > - get_spaces($opening_indentation) ) - { - $adjust_indentation = 1; - } - } - } + # or labels if requested + || $rOpts_outdent_labels && $type_beg eq 'J' - # YVES patch 1 of 2: - # Undo ci of line with leading closing eval brace, - # but not beyond the indentation of the line with - # the opening brace. - if ( - $block_type_beg eq 'eval' - ##&& !$rOpts_line_up_parentheses - && !ref($leading_spaces_beg) - && !$rOpts_indent_closing_brace - ) - { - ( - $opening_indentation, $opening_offset, - $is_leading, $opening_exists - ) - = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - my $indentation = $leading_spaces_beg; - if ( defined($opening_indentation) - && get_spaces($indentation) > - get_spaces($opening_indentation) ) - { - $adjust_indentation = 1; + # or static block comments if requested + || $is_static_block_comment + && $rOpts_outdent_static_block_comments + ) + ) + { + my $space_count = leading_spaces_to_go($ibeg); + if ( $space_count > 0 ) { + $space_count -= $rOpts_continuation_indentation; + $is_outdented_line = 1; + if ( $space_count < 0 ) { $space_count = 0 } + + # do not promote a spaced static block comment to non-spaced; + # this is not normally necessary but could be for some + # unusual user inputs (such as -ci = -i) + if ( $type_beg eq '#' && $space_count == 0 ) { + $space_count = 1; } + + $indentation = $space_count; } + } - # patch for issue git #40: -bli setting has priority - $adjust_indentation = 0 if ($is_bli_beg); + return ( - $default_adjust_indentation = $adjust_indentation; + $indentation, + $lev, + $level_end, + $i_terminal, + $is_outdented_line, - # Now modify default behavior according to user request: - # handle option to indent non-blocks of the form ); }; ]; - # But don't do special indentation to something like ')->pack(' - if ( !$block_type_beg ) { + ); + } ## end sub get_final_indentation - # Note that logical padding has already been applied, so we may - # need to remove some spaces to get a valid hash key. - my $tok = $token_beg; - my $cti = $closing_token_indentation{$tok}; + sub get_closing_token_indentation { - # Fix the value of 'cti' for an isolated non-welded closing qw - # delimiter. - if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) { + # Determine indentation adjustment for a line with a leading closing + # token - i.e. one of these: ) ] } : - # A quote delimiter which is not a container will not have - # a cti value defined. In this case use the style of a - # paren. For example - # my @fars = ( - # qw< - # far - # farfar - # farfars-far - # >, - # ); - if ( !defined($cti) && length($tok) == 1 ) { + my ( + $self, # + + $ibeg, + $iend, + $ri_first, + $ri_last, + $rindentation_list, + $level_jump, + $i_terminal, + $is_semicolon_terminated, + $seqno_qw_closing, - # something other than ')', '}', ']' ; use flag for ')' - $cti = $closing_token_indentation{')'}; + ) = @_; - # But for now, do not outdent non-container qw - # delimiters because it would would change existing - # formatting. - if ( $tok ne '>' ) { $cti = 3 } - } + my $adjust_indentation = 0; + my $default_adjust_indentation = $adjust_indentation; + my $terminal_type = $types_to_go[$i_terminal]; - # A non-welded closing qw cannot currently use -cti=1 - # because that option requires a sequence number to find - # the opening indentation, and qw quote delimiters are not - # sequenced items. - if ( defined($cti) && $cti == 1 ) { $cti = 0 } - } + my $type_beg = $types_to_go[$ibeg]; + my $token_beg = $tokens_to_go[$ibeg]; + my $level_beg = $levels_to_go[$ibeg]; + my $block_type_beg = $block_type_to_go[$ibeg]; + my $leading_spaces_beg = $leading_spaces_to_go[$ibeg]; + my $seqno_beg = $type_sequence_to_go[$ibeg]; + my $is_closing_type_beg = $is_closing_type{$type_beg}; - if ( !defined($cti) ) { + my ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ); - # $cti may not be defined for several reasons. - # -padding may have been applied so the character - # has a length > 1 - # - we may have welded to a closing quote token. - # Here is an example (perltidy -wn): - # __PACKAGE__->load_components( qw( - # > Core - # > - # > ) ); - $adjust_indentation = 0; + # Honor any flag to reduce -ci set by the -bbxi=n option + if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) { - } - elsif ( $cti == 1 ) { - if ( $i_terminal <= $ibeg + 1 - || $is_semicolon_terminated ) - { - $adjust_indentation = 2; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 2 ) { - if ($is_semicolon_terminated) { - $adjust_indentation = 3; - } - else { - $adjust_indentation = 0; - } - } - elsif ( $cti == 3 ) { - $adjust_indentation = 3; - } + # if this is an opening, it must be alone on the line ... + if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) { + $adjust_indentation = 1; } - # handle option to indent blocks - else { - if ( - $rOpts_indent_closing_brace - && ( - $i_terminal == $ibeg # isolated terminal '}' - || $is_semicolon_terminated - ) - ) # } xxxx ; - { - $adjust_indentation = 3; + # ... or a single welded unit (fix for b1173) + elsif ($total_weld_count) { + my $K_beg = $K_to_go[$ibeg]; + my $Kterm = $K_to_go[$i_terminal]; + my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm}; + if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) { + $Kterm = $Kterm_test; } + if ( $Kterm == $K_beg ) { $adjust_indentation = 1 } } } - # if at ');', '};', '>;', and '];' of a terminal qw quote - elsif ( - substr( $rpatterns->[0], 0, 2 ) eq 'qb' - && substr( $rfields->[0], -1, 1 ) eq ';' - ##&& $rpatterns->[0] =~ /^qb*;$/ - && $rfields->[0] =~ /^([\)\}\]\>]);$/ - ) - { - if ( $closing_token_indentation{$1} == 0 ) { - $adjust_indentation = 1; + my $ris_bli_container = $self->[_ris_bli_container_]; + my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0; + + # Update the $is_bli flag as we go. It is initially 1. + # We note seeing a leading opening brace by setting it to 2. + # If we get to the closing brace without seeing the opening then we + # turn it off. This occurs if the opening brace did not get output + # at the start of a line, so we will then indent the closing brace + # in the default way. + if ( $is_bli_beg && $is_bli_beg == 1 ) { + my $K_opening_container = $self->[_K_opening_container_]; + my $K_opening = $K_opening_container->{$seqno_beg}; + my $K_beg = $K_to_go[$ibeg]; + if ( $K_beg eq $K_opening ) { + $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2; } - else { - $adjust_indentation = 3; + else { $is_bli_beg = 0 } + } + + # QW PATCH for the combination -lp -wn + # For -lp formatting use $ibeg_weld_fix to get around the problem + # that with -lp type formatting the opening and closing tokens to not + # have sequence numbers. + my $ibeg_weld_fix = $ibeg; + if ( $seqno_qw_closing && $total_weld_count ) { + my $i_plus = $inext_to_go[$ibeg]; + if ( $i_plus <= $max_index_to_go ) { + my $K_plus = $K_to_go[$i_plus]; + if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) { + $ibeg_weld_fix = $i_plus; + } } } - # if line begins with a ':', align it with any - # previous line leading with corresponding ? - elsif ( $type_beg eq ':' ) { + # if we are at a closing token of some type.. + if ( $is_closing_type_beg || $seqno_qw_closing ) { + + my $K_beg = $K_to_go[$ibeg]; + + # get the indentation of the line containing the corresponding + # opening token ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) - = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, - $rindentation_list ); - if ($is_leading) { $adjust_indentation = 2; } - } - - #--------------------------------------------------------- - # Section 2: set indentation according to flag set above - # - # Select the indentation object to define leading - # whitespace. If we are outdenting something like '} } );' - # then we want to use one level below the last token - # ($i_terminal) in order to get it to fully outdent through - # all levels. - #--------------------------------------------------------- - my $indentation; - my $lev; - my $level_end = $levels_to_go[$iend]; - - if ( $adjust_indentation == 0 ) { - $indentation = $leading_spaces_beg; - $lev = $level_beg; - } - elsif ( $adjust_indentation == 1 ) { + = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first, + $ri_last, $rindentation_list, $seqno_qw_closing ); - # Change the indentation to be that of a different token on the line - # Previously, the indentation of the terminal token was used: - # OLD CODING: - # $indentation = $reduced_spaces_to_go[$i_terminal]; - # $lev = $levels_to_go[$i_terminal]; + # Patch for rt144979, part 1. Coordinated with part 2. + # Do not undo ci for a cuddled closing brace control; it + # needs to be treated exactly the same ci as an isolated + # closing brace. + my $is_cuddled_closing_brace = $seqno_beg + && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg}; - # Generalization for MOJO: - # Use the lowest level indentation of the tokens on the line. - # For example, here we can use the indentation of the ending ';': - # } until ($selection > 0 and $selection < 10); # ok to use ';' - # But this will not outdent if we use the terminal indentation: - # )->then( sub { # use indentation of the ->, not the { - # Warning: reduced_spaces_to_go[] may be a reference, do not - # do numerical checks with it + # First set the default behavior: + if ( - my $i_ind = $ibeg; - $indentation = $reduced_spaces_to_go[$i_ind]; - $lev = $levels_to_go[$i_ind]; - while ( $i_ind < $i_terminal ) { - $i_ind++; - if ( $levels_to_go[$i_ind] < $lev ) { - $indentation = $reduced_spaces_to_go[$i_ind]; - $lev = $levels_to_go[$i_ind]; - } - } - } + # default behavior is to outdent closing lines + # of the form: "); }; ]; )->xxx;" + $is_semicolon_terminated - # handle indented closing token which aligns with opening token - elsif ( $adjust_indentation == 2 ) { + # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT + # #123749]: the TYPES here were incorrectly ')' and '('. The + # corrected TYPES are '}' and '{'. But skip a cuddled block. + || ( + $terminal_type eq '{' + && $type_beg eq '}' + && ( $nesting_depth_to_go[$iend] + 1 == + $nesting_depth_to_go[$ibeg] ) + && !$is_cuddled_closing_brace + ) - # handle option to align closing token with opening token - $lev = $level_beg; + # remove continuation indentation for any line like + # } ... { + # or without ending '{' and unbalanced, such as + # such as '}->{$operator}' + || ( + $type_beg eq '}' - # calculate spaces needed to align with opening token - my $space_count = - get_spaces($opening_indentation) + $opening_offset; + && ( $types_to_go[$iend] eq '{' + || $levels_to_go[$iend] < $level_beg ) - # Indent less than the previous line. - # - # Problem: For -lp we don't exactly know what it was if there - # were recoverable spaces sent to the aligner. A good solution - # would be to force a flush of the vertical alignment buffer, so - # that we would know. For now, this rule is used for -lp: - # - # When the last line did not start with a closing token we will - # be optimistic that the aligner will recover everything wanted. - # - # This rule will prevent us from breaking a hierarchy of closing - # tokens, and in a worst case will leave a closing paren too far - # indented, but this is better than frequently leaving it not - # indented enough. - my $last_spaces = get_spaces($last_indentation_written); + # but not if a cuddled block + && !$is_cuddled_closing_brace + ) - if ( ref($last_indentation_written) - && !$is_closing_token{$last_leading_token} ) - { - $last_spaces += - get_recoverable_spaces($last_indentation_written); - } + # and when the next line is at a lower indentation level... - # reset the indentation to the new space count if it works - # only options are all or none: nothing in-between looks good - $lev = $level_beg; + # PATCH #1: and only if the style allows undoing continuation + # for all closing token types. We should really wait until + # the indentation of the next line is known and then make + # a decision, but that would require another pass. - my $diff = $last_spaces - $space_count; - if ( $diff > 0 ) { - $indentation = $space_count; - } - else { + # PATCH #2: and not if this token is under -xci control + || ( $level_jump < 0 + && !$some_closing_token_indentation + && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} ) - # We need to fix things ... but there is no good way to do it. - # The best solution is for the user to use a longer maximum - # line length. We could get a smooth variation if we just move - # the paren in using - # $space_count -= ( 1 - $diff ); - # But unfortunately this can give a rather unbalanced look. + # Patch for -wn=2, multiple welded closing tokens + || ( $i_terminal > $ibeg + && $is_closing_type{ $types_to_go[$iend] } ) - # For -xlp we currently allow a tolerance of one indentation - # level and then revert to a simpler default. This will jump - # suddenly but keeps a balanced look. - if ( $rOpts_extended_line_up_parentheses - && $diff >= -$rOpts_indent_columns - && $space_count > $leading_spaces_beg ) - { - $indentation = $space_count; - } + # Alternate Patch for git #51, isolated closing qw token not + # outdented if no-delete-old-newlines is set. This works, but + # a more general patch elsewhere fixes the real problem: ljump. + # || ( $seqno_qw_closing && $ibeg == $i_terminal ) - # Otherwise revert to defaults - elsif ( $default_adjust_indentation == 0 ) { - $indentation = $leading_spaces_beg; - } - elsif ( $default_adjust_indentation == 1 ) { - $indentation = $reduced_spaces_to_go[$i_terminal]; - $lev = $levels_to_go[$i_terminal]; - } + ) + { + $adjust_indentation = 1; } - } - # Full indentation of closing tokens (-icb and -icp or -cti=2) - else { + # outdent something like '),' + if ( + $terminal_type eq ',' - # handle -icb (indented closing code block braces) - # Updated method for indented block braces: indent one full level if - # there is no continuation indentation. This will occur for major - # structures such as sub, if, else, but not for things like map - # blocks. - # - # Note: only code blocks without continuation indentation are - # handled here (if, else, unless, ..). In the following snippet, - # the terminal brace of the sort block will have continuation - # indentation as shown so it will not be handled by the coding - # here. We would have to undo the continuation indentation to do - # this, but it probably looks ok as is. This is a possible future - # update for semicolon terminated lines. - # - # if ($sortby eq 'date' or $sortby eq 'size') { - # @files = sort { - # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} - # or $a cmp $b - # } @files; - # } - # - if ( $block_type_beg - && $ci_levels_to_go[$i_terminal] == 0 ) - { - my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); - $indentation = $spaces + $rOpts_indent_columns; + # Removed this constraint for -wn + # OLD: allow just one character before the comma + # && $i_terminal == $ibeg + 1 - # NOTE: for -lp we could create a new indentation object, but - # there is probably no need to do it + # require LIST environment; otherwise, we may outdent too much - + # this can happen in calls without parentheses (overload.t); + && $self->is_in_list_by_i($i_terminal) + ) + { + $adjust_indentation = 1; } - # handle -icp and any -icb block braces which fall through above - # test such as the 'sort' block mentioned above. - else { - - # There are currently two ways to handle -icp... - # One way is to use the indentation of the previous line: - # $indentation = $last_indentation_written; - - # The other way is to use the indentation that the previous line - # would have had if it hadn't been adjusted: - $indentation = $last_unadjusted_indentation; + # undo continuation indentation of a terminal closing token if + # it is the last token before a level decrease. This will allow + # a closing token to line up with its opening counterpart, and + # avoids an indentation jump larger than 1 level. + my $rLL = $self->[_rLL_]; + my $Klimit = $self->[_Klimit_]; + if ( $i_terminal == $ibeg + && $is_closing_type_beg + && defined($K_beg) + && $K_beg < $Klimit ) + { + my $K_plus = $K_beg + 1; + my $type_plus = $rLL->[$K_plus]->[_TYPE_]; - # Current method: use the minimum of the two. This avoids - # inconsistent indentation. - if ( get_spaces($last_indentation_written) < - get_spaces($indentation) ) - { - $indentation = $last_indentation_written; + if ( $type_plus eq 'b' && $K_plus < $Klimit ) { + $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; } - } - # use previous indentation but use own level - # to cause list to be flushed properly - $lev = $level_beg; - } + if ( $type_plus eq '#' && $K_plus < $Klimit ) { + $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; + if ( $type_plus eq 'b' && $K_plus < $Klimit ) { + $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; + } - # remember indentation except for multi-line quotes, which get - # no indentation - unless ( $ibeg == 0 && $starting_in_quote ) { - $last_indentation_written = $indentation; - $last_unadjusted_indentation = $leading_spaces_beg; - $last_leading_token = $token_beg; + # Note: we have skipped past just one comment (perhaps a + # side comment). There could be more, and we could easily + # skip past all the rest with the following code, or with a + # while loop. It would be rare to have to do this, and + # those block comments would still be indented, so it would + # to leave them indented. So it seems best to just stop at + # a maximum of one comment. + ##if ($type_plus eq '#') { + ## $K_plus = $self->K_next_code($K_plus); + ##} + } - # Patch to make a line which is the end of a qw quote work with the - # -lp option. Make $token_beg look like a closing token as some - # type even if it is not. This variable will become - # $last_leading_token at the end of this loop. Then, if the -lp - # style is selected, and the next line is also a - # closing token, it will not get more indentation than this line. - # We need to do this because qw quotes (at present) only get - # continuation indentation, not one level of indentation, so we - # need to turn off the -lp indentation. + if ( !$is_bli_beg && defined($K_plus) ) { + my $lev = $level_beg; + my $level_next = $rLL->[$K_plus]->[_LEVEL_]; - # ... a picture is worth a thousand words: + # and do not undo ci if it was set by the -xci option + $adjust_indentation = 1 + if ( $level_next < $lev + && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} ); + } - # perltidy -wn -gnu (Without this patch): - # ok(defined( - # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 - # 2981014)]) - # )); + # Patch for RT #96101, in which closing brace of anonymous subs + # was not outdented. We should look ahead and see if there is + # a level decrease at the next token (i.e., a closing token), + # but right now we do not have that information. For now + # we see if we are in a list, and this works well. + # See test files 'sub*.t' for good test cases. + if ( !$rOpts_indent_closing_brace + && $block_type_beg + && $self->[_ris_asub_block_]->{$seqno_beg} + && $self->is_in_list_by_i($i_terminal) ) + { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = $self->get_opening_indentation( $ibeg, $ri_first, + $ri_last, $rindentation_list ); + my $indentation = $leading_spaces_beg; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } + } + } - # perltidy -wn -gnu (With this patch): - # ok(defined( - # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 - # 2981014)]) - # )); - if ( $seqno_qw_closing - && ( length($token_beg) > 1 || $token_beg eq '>' ) ) + # YVES patch 1 of 2: + # Undo ci of line with leading closing eval brace, + # but not beyond the indentation of the line with + # the opening brace. + if ( $block_type_beg + && $block_type_beg eq 'eval' + && !ref($leading_spaces_beg) + && !$rOpts_indent_closing_brace ) { - $last_leading_token = ')'; + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + my $indentation = $leading_spaces_beg; + if ( defined($opening_indentation) + && get_spaces($indentation) > + get_spaces($opening_indentation) ) + { + $adjust_indentation = 1; + } } - } - # be sure lines with leading closing tokens are not outdented more - # than the line which contained the corresponding opening token. + # patch for issue git #40: -bli setting has priority + $adjust_indentation = 0 if ($is_bli_beg); - #-------------------------------------------------------- - # updated per bug report in alex_bug.pl: we must not - # mess with the indentation of closing logical braces so - # we must treat something like '} else {' as if it were - # an isolated brace - #-------------------------------------------------------- - my $is_isolated_block_brace = $block_type_beg - && ( $i_terminal == $ibeg - || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg} - ); + $default_adjust_indentation = $adjust_indentation; - # only do this for a ':; which is aligned with its leading '?' - my $is_unaligned_colon = $type_beg eq ':' && !$is_leading; + # Now modify default behavior according to user request: + # handle option to indent non-blocks of the form ); }; ]; + # But don't do special indentation to something like ')->pack(' + if ( !$block_type_beg ) { - if ( - defined($opening_indentation) - && !$leading_paren_arrow # MOJO - && !$is_isolated_block_brace - && !$is_unaligned_colon - ) - { - if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { - $indentation = $opening_indentation; - } - } + # Note that logical padding has already been applied, so we may + # need to remove some spaces to get a valid hash key. + my $tok = $token_beg; + my $cti = $closing_token_indentation{$tok}; - # remember the indentation of each line of this batch - push @{$rindentation_list}, $indentation; + # Fix the value of 'cti' for an isolated non-welded closing qw + # delimiter. + if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) { - # outdent lines with certain leading tokens... - if ( + # A quote delimiter which is not a container will not have + # a cti value defined. In this case use the style of a + # paren. For example + # my @fars = ( + # qw< + # far + # farfar + # farfars-far + # >, + # ); + if ( !defined($cti) && length($tok) == 1 ) { - # must be first word of this batch - $ibeg == 0 + # something other than ')', '}', ']' ; use flag for ')' + $cti = $closing_token_indentation{')'}; - # and ... - && ( + # But for now, do not outdent non-container qw + # delimiters because it would would change existing + # formatting. + if ( $tok ne '>' ) { $cti = 3 } + } - # certain leading keywords if requested - $rOpts_outdent_keywords - && $type_beg eq 'k' - && $outdent_keyword{$token_beg} + # A non-welded closing qw cannot currently use -cti=1 + # because that option requires a sequence number to find + # the opening indentation, and qw quote delimiters are not + # sequenced items. + if ( defined($cti) && $cti == 1 ) { $cti = 0 } + } - # or labels if requested - || $rOpts_outdent_labels && $type_beg eq 'J' + if ( !defined($cti) ) { - # or static block comments if requested - || $is_static_block_comment - && $rOpts_outdent_static_block_comments - ) - ) - { - my $space_count = leading_spaces_to_go($ibeg); - if ( $space_count > 0 ) { - $space_count -= $rOpts_continuation_indentation; - $is_outdented_line = 1; - if ( $space_count < 0 ) { $space_count = 0 } + # $cti may not be defined for several reasons. + # -padding may have been applied so the character + # has a length > 1 + # - we may have welded to a closing quote token. + # Here is an example (perltidy -wn): + # __PACKAGE__->load_components( qw( + # > Core + # > + # > ) ); + $adjust_indentation = 0; - # do not promote a spaced static block comment to non-spaced; - # this is not normally necessary but could be for some - # unusual user inputs (such as -ci = -i) - if ( $type_beg eq '#' && $space_count == 0 ) { - $space_count = 1; } + elsif ( $cti == 1 ) { + if ( $i_terminal <= $ibeg + 1 + || $is_semicolon_terminated ) + { + $adjust_indentation = 2; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 2 ) { + if ($is_semicolon_terminated) { + $adjust_indentation = 3; + } + else { + $adjust_indentation = 0; + } + } + elsif ( $cti == 3 ) { + $adjust_indentation = 3; + } + } - $indentation = $space_count; + # handle option to indent blocks + else { + if ( + $rOpts_indent_closing_brace + && ( + $i_terminal == $ibeg # isolated terminal '}' + || $is_semicolon_terminated + ) + ) # } xxxx ; + { + $adjust_indentation = 3; + } } + } ## end if ( $is_closing_type_beg || $seqno_qw_closing ) + + # if line begins with a ':', align it with any + # previous line leading with corresponding ? + elsif ( $type_beg eq ':' ) { + ( + $opening_indentation, $opening_offset, + $is_leading, $opening_exists + ) + = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, + $rindentation_list ); + if ($is_leading) { $adjust_indentation = 2; } } - return ( $indentation, $lev, $level_end, $terminal_type, - $terminal_block_type, $is_semicolon_terminated, - $is_outdented_line ); - } ## end sub final_indentation_adjustment -} ## end closure final_indentation_adjustment + return ( + + $adjust_indentation, + $default_adjust_indentation, + $opening_indentation, + $opening_offset, + $is_leading, + $opening_exists, + + ); + } ## end sub get_closing_token_indentation +} ## end closure get_final_indentation sub get_opening_indentation { @@ -25419,7 +29084,7 @@ sub get_opening_indentation { # $rindentation_list - reference to a list containing the indentation # used for each line. # $qw_seqno - optional sequence number to use if normal seqno not defined - # (TODO: would be more general to just look this up from index i) + # (NOTE: would be more general to just look this up from index i) # # return: # -the indentation of the line which contained the opening token @@ -25451,6 +29116,61 @@ sub get_opening_indentation { return ( $indent, $offset, $is_leading, $exists ); } ## end sub get_opening_indentation +sub examine_vertical_tightness_flags { + my ($self) = @_; + + # For efficiency, we will set a flag to skip all calls to sub + # 'set_vertical_tightness_flags' if vertical tightness is not possible with + # the user input parameters. If vertical tightness is possible, we will + # simply leave the flag undefined and return. + + # Vertical tightness is never possible with --freeze-whitespace + if ($rOpts_freeze_whitespace) { + $self->[_no_vertical_tightness_flags_] = 1; + return; + } + + # This sub is coordinated with sub set_vertical_tightness_flags. + # The Section numbers in the following comments are the sections + # in sub set_vertical_tightness_flags: + + # Examine controls for Section 1a: + return if ($rOpts_line_up_parentheses); + + foreach my $key ( keys %opening_vertical_tightness ) { + return if ( $opening_vertical_tightness{$key} ); + } + + # Examine controls for Section 1b: + foreach my $key ( keys %closing_vertical_tightness ) { + return if ( $closing_vertical_tightness{$key} ); + } + + # Examine controls for Section 1c: + foreach my $key ( keys %opening_token_right ) { + return if ( $opening_token_right{$key} ); + } + + # Examine controls for Section 1d: + foreach my $key ( keys %stack_opening_token ) { + return if ( $stack_opening_token{$key} ); + } + foreach my $key ( keys %stack_closing_token ) { + return if ( $stack_closing_token{$key} ); + } + + # Examine controls for Section 2: + return if ($rOpts_block_brace_vertical_tightness); + + # Examine controls for Section 3: + return if ($rOpts_stack_closing_block_brace); + + # None of the controls used for vertical tightness are set, so + # we can skip all calls to sub set_vertical_tightness_flags + $self->[_no_vertical_tightness_flags_] = 1; + return; +} ## end sub examine_vertical_tightness_flags + sub set_vertical_tightness_flags { my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last, @@ -25458,6 +29178,8 @@ sub set_vertical_tightness_flags { = @_; # Define vertical tightness controls for the nth line of a batch. + # Note: do not call this sub for a block comment or if + # $rOpts_freeze_whitespace is set. # These parameters are passed to the vertical aligner to indicated # if we should combine this line with the next line to achieve the @@ -25487,11 +29209,6 @@ sub set_vertical_tightness_flags { # continually increase if we allowed it when the -fws flag is set. # See case b499 for an example. - # Speedup: just return for a comment - if ( $max_index_to_go == 0 && $types_to_go[0] eq '#' ) { - return; - } - # Define these values... my $vt_type = 0; my $vt_opening_flag = 0; @@ -25503,13 +29220,11 @@ sub set_vertical_tightness_flags { my $vt_min_lines = 0; my $vt_max_lines = 0; - goto RETURN - if ($rOpts_freeze_whitespace); - # Uses these global parameters: # $rOpts_block_brace_tightness # $rOpts_block_brace_vertical_tightness # $rOpts_stack_closing_block_brace + # $rOpts_line_up_parentheses # %opening_vertical_tightness # %closing_vertical_tightness # %opening_token_right @@ -25560,17 +29275,20 @@ sub set_vertical_tightness_flags { if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] } && $is_closing_type{$type_end_next} ); - # Avoid conflict of -bom and -pt=1 or -pt=2, fixes b1270 - # See similar patch above for $cvt. + # The flag '_rbreak_container_' avoids conflict of -bom and -pt=1 + # or -pt=2; fixes b1270. See similar patch above for $cvt. my $seqno = $type_sequence_to_go[$iend]; - if ( $ovt && $self->[_rwant_container_open_]->{$seqno} ) { + if ( $ovt + && $seqno + && $self->[_rbreak_container_]->{$seqno} ) + { $ovt = 0; } - if ( $ovt == 2 - && $self->[_rreduce_vertical_tightness_by_seqno_]->{$seqno} ) - { - $ovt = 1; + # The flag '_rmax_vertical_tightness_' avoids welding conflicts. + if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) { + $ovt = + min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} ); } unless ( @@ -25603,13 +29321,12 @@ sub set_vertical_tightness_flags { && $is_closing_token{$token_next} && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen! { - my $ovt = $opening_vertical_tightness{$token_next}; my $cvt = $closing_vertical_tightness{$token_next}; # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303 # See similar patch above for $ovt. my $seqno = $type_sequence_to_go[$ibeg_next]; - if ( $cvt && $self->[_rwant_container_open_]->{$seqno} ) { + if ( $cvt && $self->[_rbreak_container_]->{$seqno} ) { $cvt = 0; } @@ -25628,6 +29345,17 @@ sub set_vertical_tightness_flags { $cvt = 1; } + # Fix for b1379, b1380, b1381, b1382, b1384 part 2, + # instablility with adding and deleting trailing commas: + # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380. + # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382. + # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384 + if ( $cvt + && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} ) + { + $cvt = 0; + } + if ( # Never append a trailing line like ')->pack(' because it @@ -25647,6 +29375,7 @@ sub set_vertical_tightness_flags { # allow closing up 2-line method calls || ( $rOpts_line_up_parentheses && $token_next eq ')' + && $type_sequence_to_go[$ibeg_next] && $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$ibeg_next] } ) ) @@ -25683,6 +29412,7 @@ sub set_vertical_tightness_flags { my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next]; if ( $rOpts_line_up_parentheses && $total_weld_count + && $seqno_ibeg_next && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next} && $self->is_welded_at_seqno($seqno_ibeg_next) ) { @@ -25740,15 +29470,20 @@ sub set_vertical_tightness_flags { && $token_end ne '||' && $token_end ne '&&' # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089. + # Generalized from '=' to $is_assignment to fix b1375. && !( - $token_end eq '=' + $is_assignment{ $types_to_go[$iend] } && $rOpts_line_up_parentheses + && $type_sequence_to_go[$ibeg_next] && $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$ibeg_next] } ) # looks bad if we align vertically with the wrong container && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] + + # give -kba priority over -otr (b1445) + && !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] } ) { my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; @@ -25782,7 +29517,7 @@ sub set_vertical_tightness_flags { my $seq_next = $type_sequence_to_go[$ibeg_next]; $stackable = $stack_closing_token{$token_beg_next} unless ( $block_type_to_go[$ibeg_next] - || $seq_next && $self->[_rwant_container_open_]->{$seq_next} ); + || $seq_next && $self->[_rbreak_container_]->{$seq_next} ); } elsif ($is_opening_token{$token_end} && $is_opening_token{$token_beg_next} ) @@ -25831,6 +29566,7 @@ sub set_vertical_tightness_flags { elsif ($rOpts_block_brace_vertical_tightness && $ibeg eq $iend && $types_to_go[$iend] eq '{' + && $block_type_to_go[$iend] && $block_type_to_go[$iend] =~ /$block_brace_vertical_tightness_pattern/ ) { @@ -25866,16 +29602,22 @@ sub set_vertical_tightness_flags { # get the sequence numbers of the ends of this line $vt_seqno_beg = $type_sequence_to_go[$ibeg]; - if ( !$vt_seqno_beg && $types_to_go[$ibeg] eq 'q' ) { - $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote ); + if ( !$vt_seqno_beg ) { + if ( $types_to_go[$ibeg] eq 'q' ) { + $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote ); + } + else { $vt_seqno_beg = EMPTY_STRING } } $vt_seqno_end = $type_sequence_to_go[$iend]; - if ( !$vt_seqno_end && $types_to_go[$iend] eq 'q' ) { - $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote ); + if ( !$vt_seqno_end ) { + if ( $types_to_go[$iend] eq 'q' ) { + $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote ); + } + else { $vt_seqno_end = EMPTY_STRING } } - RETURN: + if ( !defined($vt_seqno) ) { $vt_seqno = EMPTY_STRING } my $rvertical_tightness_flags = { _vt_type => $vt_type, @@ -26012,10 +29754,12 @@ sub set_vertical_tightness_flags { && ( ( $i + 1 <= $max_index_to_go + && $block_type_to_go[ $i + 1 ] && $block_type_to_go[ $i + 1 ] eq $accumulating_text_for_block ) || ( $i + 2 <= $max_index_to_go + && $block_type_to_go[ $i + 2 ] && $block_type_to_go[ $i + 2 ] eq $accumulating_text_for_block ) ) @@ -26070,6 +29814,7 @@ sub set_vertical_tightness_flags { my $type = $types_to_go[$i]; my $block_type = $block_type_to_go[$i]; my $token = $tokens_to_go[$i]; + $block_type = EMPTY_STRING unless ($block_type); # remember last nonblank token type if ( $type ne '#' && $type ne 'b' ) { @@ -26300,7 +30045,7 @@ sub set_vertical_tightness_flags { ')' => '(', ']' => '[', ); - } + } ## end BEGIN sub balance_csc_text { @@ -26400,7 +30145,7 @@ sub add_closing_side_comment { # ..and the corresponding opening brace must is not in this batch # (because we do not need to tag one-line blocks, although this # should also be caught with a positive -csci value) - && $mate_index_to_go[$i_terminal] < 0 + && !defined( $mate_index_to_go[$i_terminal] ) # ..and either && ( @@ -26500,38 +30245,35 @@ sub add_closing_side_comment { } $cscw_block_comment = "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]"; -## "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; } } - else { - # No differences.. we can safely delete old comment if we - # are below the threshold - if ( $block_line_count < - $rOpts->{'closing-side-comment-interval'} ) + # No differences.. we can safely delete old comment if we + # are below the threshold + elsif ( $block_line_count < + $rOpts->{'closing-side-comment-interval'} ) + { + # Since the line breaks have already been set, we have + # to remove the token from the _to_go array and also + # from the line range (this fixes issue c081). + # Note that we can only get here if -cscw has been set + # because otherwise the old comment is already deleted. + $token = undef; + my $ibeg = $ri_first->[-1]; + my $iend = $ri_last->[-1]; + if ( $iend > $ibeg + && $iend == $max_index_to_go + && $types_to_go[$max_index_to_go] eq '#' ) { - # Since the line breaks have already been set, we have - # to remove the token from the _to_go array and also - # from the line range (this fixes issue c081). - # Note that we can only get here if -cscw has been set - # because otherwise the old comment is already deleted. - $token = undef; - my $ibeg = $ri_first->[-1]; - my $iend = $ri_last->[-1]; + $iend--; + $max_index_to_go--; if ( $iend > $ibeg - && $iend == $max_index_to_go - && $types_to_go[$max_index_to_go] eq '#' ) + && $types_to_go[$max_index_to_go] eq 'b' ) { $iend--; $max_index_to_go--; - if ( $iend > $ibeg - && $types_to_go[$max_index_to_go] eq 'b' ) - { - $iend--; - $max_index_to_go--; - } - $ri_last->[-1] = $iend; } + $ri_last->[-1] = $iend; } } } @@ -26573,7 +30315,7 @@ sub wrapup { # This is the last routine called when a file is formatted. # Flush buffer and write any informative messages - my $self = shift; + my ( $self, $severe_error ) = @_; $self->flush(); my $file_writer_object = $self->[_file_writer_object_]; @@ -26712,7 +30454,10 @@ sub wrapup { $file_writer_object->report_line_length_errors(); - $self->[_converged_] = $file_writer_object->get_convergence_check() + # Define the formatter self-check for convergence. + $self->[_converged_] = + $severe_error + || $file_writer_object->get_convergence_check() || $rOpts->{'indent-only'}; return;