+ # This provides a quick way to create a new token by
+ # slightly modifying an existing token.
+ my ( $rold_token, $type, $token ) = @_;
+ if ( !defined($token) ) {
+ if ( $type eq 'b' ) {
+ $token = SPACE;
+ }
+ elsif ( $type eq 'q' ) {
+ $token = EMPTY_STRING;
+ }
+ elsif ( $type eq '->' ) {
+ $token = '->';
+ }
+ elsif ( $type eq ';' ) {
+ $token = ';';
+ }
+ elsif ( $type eq ',' ) {
+ $token = ',';
+ }
+ else {
+
+ # Unexpected type ... this sub will work as long as both $token and
+ # $type are defined, but we should catch any unexpected types during
+ # development.
+ if (DEVEL_MODE) {
+ Fault(<<EOM);
+sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';'
+EOM
+ }
+
+ # Shouldn't get here
+ $token = $type;
+ }
+ }
+
+ my @rnew_token = @{$rold_token};
+ $rnew_token[_TYPE_] = $type;
+ $rnew_token[_TOKEN_] = $token;
+ $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
+ return \@rnew_token;
+} ## end sub copy_token_as_type
+
+sub K_next_code {
+ my ( $self, $KK, $rLL ) = @_;
+
+ # return the index K of the next nonblank, non-comment token
+ return unless ( defined($KK) && $KK >= 0 );
+
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
+
+ # We seem to have encountered a gap in our array.
+ # This shouldn't happen because sub write_line() pushed
+ # items into the $rLL array.
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Knnb]->[_TYPE_] ne '#' )
+ {
+ return $Knnb;
+ }
+ $Knnb++;
+ }
+ return;
+} ## end sub K_next_code
+
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
+
+ # return the index K of the next nonblank token, or
+ # return undef if none
+ return unless ( defined($KK) && $KK >= 0 );
+
+ # The third arg allows this routine to be used on any array. This is
+ # useful in sub respace_tokens when we are copying tokens from an old $rLL
+ # to a new $rLL array. But usually the third arg will not be given and we
+ # will just use the $rLL array in $self.
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ return unless ( $Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+ return unless ( ++$Knnb < $Num );
+ return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. Very unlikely to get here; it means we have neighboring
+ # blanks in the token stream.
+ $Knnb++;
+ while ( $Knnb < $Num ) {
+
+ # Safety check, this fault shouldn't happen: The $rLL array is the
+ # main array of tokens, so all entries should be used. It is
+ # initialized in sub write_line, and then re-initialized by sub
+ # store_token() within sub respace_tokens. Tokens are pushed on
+ # so there shouldn't be any gaps.
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE);
+ return;
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
+ }
+ return;
+} ## end sub K_next_nonblank
+
+sub K_previous_code {
+
+ # return the index K of the previous nonblank, non-comment token
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
+
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
+
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ ) if (DEVEL_MODE);
+ return;
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b'
+ && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
+ {
+ return $Kpnb;
+ }
+ $Kpnb--;
+ }
+ return;
+} ## end sub K_previous_code
+
+sub K_previous_nonblank {
+
+ # return index of previous nonblank token before item K;
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
+
+ # use the standard array unless given otherwise
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
+
+ # This fault can be caused by a programming error in which a bad $KK is
+ # given. The caller should make the first call with KK_new=undef to
+ # avoid this error.
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ ) if (DEVEL_MODE);
+ return;
+ }
+ my $Kpnb = $KK - 1;
+ return unless ( $Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+ return unless ( --$Kpnb >= 0 );
+ return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' );
+
+ # Backup loop. We should not get here unless some routine
+ # slipped repeated blanks into the token stream.
+ return unless ( --$Kpnb >= 0 );
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
+ return;
+} ## end sub K_previous_nonblank
+
+sub parent_seqno_by_K {
+
+ # Return the sequence number of the parent container of token K, if any.
+
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+
+ # The task is to jump forward to the next container token
+ # and use the sequence number of either it or its parent.
+
+ # For example, consider the following with seqno=5 of the '[' and ']'
+ # being called with index K of the first token of each line:
+
+ # # result
+ # push @tests, # -
+ # [ # -
+ # sub { 99 }, 'do {&{%s} for 1,2}', # 5
+ # '(&{})(&{})', undef, # 5
+ # [ 2, 2, 0 ], 0 # 5
+ # ]; # -
+
+ # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
+ # unbalanced files, last sequence number will either be undefined or it may
+ # be at a deeper level. In either case we will just return SEQ_ROOT to
+ # have a defined value and allow formatting to proceed.
+ my $parent_seqno = SEQ_ROOT;
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ }
+ else {
+ my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_];
+ if ( defined($Kt) ) {
+ $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
+ my $type = $rLL->[$Kt]->[_TYPE_];
+
+ # if next container token is closing, it is the parent seqno
+ if ( $is_closing_type{$type} ) {
+ $parent_seqno = $type_sequence;
+ }
+
+ # otherwise we want its parent container
+ else {
+ $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
+ }
+ }
+ }
+ $parent_seqno = SEQ_ROOT unless ( defined($parent_seqno) );
+ return $parent_seqno;
+} ## end sub parent_seqno_by_K
+
+sub is_in_block_by_i {
+ my ( $self, $i ) = @_;
+
+ # returns true if
+ # token at i is contained in a BLOCK
+ # or is at root level
+ # or there is some kind of error (i.e. unbalanced file)
+ # returns false otherwise
+
+ if ( $i < 0 ) {
+ DEVEL_MODE && Fault("Bad call, i='$i'\n");
+ return 1;
+ }
+
+ my $seqno = $parent_seqno_to_go[$i];
+ return 1 if ( !$seqno || $seqno eq SEQ_ROOT );
+ return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
+ return;
+} ## end sub is_in_block_by_i
+
+sub is_in_list_by_i {
+ my ( $self, $i ) = @_;
+
+ # returns true if token at i is contained in a LIST
+ # returns false otherwise
+ my $seqno = $parent_seqno_to_go[$i];
+ return unless ( $seqno && $seqno ne SEQ_ROOT );
+ if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
+ return 1;
+ }
+ return;
+} ## end sub is_in_list_by_i
+
+sub is_list_by_K {
+
+ # Return true if token K is in a list
+ my ( $self, $KK ) = @_;
+
+ my $parent_seqno = $self->parent_seqno_by_K($KK);
+ return unless defined($parent_seqno);
+ return $self->[_ris_list_by_seqno_]->{$parent_seqno};
+}
+
+sub is_list_by_seqno {
+
+ # Return true if the immediate contents of a container appears to be a
+ # list.
+ my ( $self, $seqno ) = @_;
+ return unless defined($seqno);
+ return $self->[_ris_list_by_seqno_]->{$seqno};
+}
+
+sub resync_lines_and_tokens {
+
+ my $self = shift;
+
+ # Re-construct the arrays of tokens associated with the original input
+ # lines since they have probably changed due to inserting and deleting
+ # blanks and a few other tokens.
+
+ # Return paremeters:
+ # set severe_error = true if processing needs to terminate
+ my $severe_error;
+ my $rqw_lines = [];
+
+ my $rLL = $self->[_rLL_];
+ my $Klimit = $self->[_Klimit_];
+ my $rlines = $self->[_rlines_];
+ my @Krange_code_without_comments;
+ my @Klast_valign_code;
+
+ # This is the next token and its line index:
+ my $Knext = 0;
+ my $Kmax = defined($Klimit) ? $Klimit : -1;
+
+ # Verify that old line indexes are in still order. If this error occurs,
+ # check locations where sub 'respace_tokens' creates new tokens (like
+ # blank spaces). It must have set a bad old line index.
+ if ( DEVEL_MODE && defined($Klimit) ) {
+ my $iline = $rLL->[0]->[_LINE_INDEX_];
+ foreach my $KK ( 1 .. $Klimit ) {
+ my $iline_last = $iline;
+ $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ if ( $iline < $iline_last ) {
+ my $KK_m = $KK - 1;
+ my $token_m = $rLL->[$KK_m]->[_TOKEN_];
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $type_m = $rLL->[$KK_m]->[_TYPE_];
+ my $type = $rLL->[$KK]->[_TYPE_];
+ Fault(<<EOM);
+Line indexes out of order at index K=$KK:
+at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
+at KK =$KK: old line=$iline, type='$type', token='$token',
+EOM
+ }
+ }
+ }
+
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type eq 'CODE' ) {
+
+ # Get the old number of tokens on this line
+ my $rK_range_old = $line_of_tokens->{_rK_range};
+ my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
+ my $Kdiff_old = 0;
+ if ( defined($Kfirst_old) ) {
+ $Kdiff_old = $Klast_old - $Kfirst_old;
+ }
+
+ # Find the range of NEW K indexes for the line:
+ # $Kfirst = index of first token on line
+ # $Klast = index of last token on line
+ my ( $Kfirst, $Klast );
+
+ my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
+
+ # Optimization: Although the actual K indexes may be completely
+ # changed after respacing, the number of tokens on any given line
+ # will often be nearly unchanged. So we will see if we can start
+ # our search by guessing that the new line has the same number
+ # of tokens as the old line.
+ my $Knext_guess = $Knext + $Kdiff_old;
+ if ( $Knext_guess > $Knext
+ && $Knext_guess < $Kmax
+ && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
+ {
+
+ # the guess is good, so we can start our search here
+ $Knext = $Knext_guess + 1;
+ }
+
+ while ($Knext <= $Kmax
+ && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
+ {
+ $Knext++;
+ }
+
+ if ( $Knext > $Knext_beg ) {
+
+ $Klast = $Knext - 1;
+
+ # Delete any terminal blank token
+ if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
+
+ if ( $Klast < $Knext_beg ) {
+ $Klast = undef;
+ }
+ else {
+
+ $Kfirst = $Knext_beg;
+
+ # Save ranges of non-comment code. This will be used by
+ # sub keep_old_line_breaks.
+ if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
+ push @Krange_code_without_comments, [ $Kfirst, $Klast ];
+ }
+
+ # Only save ending K indexes of code types which are blank
+ # or 'VER'. These will be used for a convergence check.
+ # See related code in sub 'convey_batch_to_vertical_aligner'
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type
+ || $CODE_type eq 'VER' )
+ {
+ push @Klast_valign_code, $Klast;
+ }
+ }
+ }
+
+ # It is only safe to trim the actual line text if the input
+ # line had a terminal blank token. Otherwise, we may be
+ # in a quote.
+ if ( $line_of_tokens->{_ended_in_blank_token} ) {
+ $line_of_tokens->{_line_text} =~ s/\s+$//;
+ }
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
+
+ # Deleting semicolons can create new empty code lines
+ # which should be marked as blank
+ if ( !defined($Kfirst) ) {
+ my $CODE_type = $line_of_tokens->{_code_type};
+ if ( !$CODE_type ) {
+ $line_of_tokens->{_code_type} = 'BL';
+ }
+ }
+ else {
+
+ #---------------------------------------------------
+ # save indexes of all lines with a 'q' at either end
+ # for later use by sub find_multiline_qw
+ #---------------------------------------------------
+ if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
+ || $rLL->[$Klast]->[_TYPE_] eq 'q' )
+ {
+ push @{$rqw_lines}, $iline;
+ }
+ }
+ }
+ }
+
+ # There shouldn't be any nodes beyond the last one. This routine is
+ # relinking lines and tokens after the tokens have been respaced. A fault
+ # here indicates some kind of bug has been introduced into the above loops.
+ # There is not good way to keep going; we better stop here.
+ if ( $Knext <= $Kmax ) {
+ Fault_Warn(
+ "unexpected tokens at end of file when reconstructing lines");
+ $severe_error = 1;
+ return ( $severe_error, $rqw_lines );
+ }
+ $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
+
+ # Setup the convergence test in the FileWriter based on line-ending indexes
+ my $file_writer_object = $self->[_file_writer_object_];
+ $file_writer_object->setup_convergence_test( \@Klast_valign_code );
+
+ # Mark essential old breakpoints if combination -iob -lp is used. These
+ # two options do not work well together, but we can avoid turning -iob off
+ # by ignoring -iob at certain essential line breaks.
+ # Fixes cases b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
+ if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
+ my %is_assignment_or_fat_comma = %is_assignment;
+ $is_assignment_or_fat_comma{'=>'} = 1;
+ my $ris_essential_old_breakpoint =
+ $self->[_ris_essential_old_breakpoint_];
+ my ( $Kfirst, $Klast );
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type ne 'CODE' ) {
+ ( $Kfirst, $Klast ) = ( undef, undef );
+ next;
+ }
+ my ( $Kfirst_prev, $Klast_prev ) = ( $Kfirst, $Klast );
+ ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
+
+ next unless defined($Klast_prev);
+ next unless defined($Kfirst);
+ my $type_last = $rLL->[$Klast_prev]->[_TOKEN_];
+ my $type_first = $rLL->[$Kfirst]->[_TOKEN_];
+ next
+ unless ( $is_assignment_or_fat_comma{$type_last}
+ || $is_assignment_or_fat_comma{$type_first} );
+ $ris_essential_old_breakpoint->{$Klast_prev} = 1;
+ }
+ }
+ return ( $severe_error, $rqw_lines );
+} ## end sub resync_lines_and_tokens
+
+sub keep_old_line_breaks {
+
+ # Called once per file to find and mark any old line breaks which
+ # should be kept. We will be translating the input hashes into
+ # token indexes.
+
+ # A flag is set as follows:
+ # = 1 make a hard break (flush the current batch)
+ # best for something like leading commas (-kbb=',')
+ # = 2 make a soft break (keep building current batch)
+ # best for something like leading ->
+
+ my ($self) = @_;
+
+ my $rLL = $self->[_rLL_];
+ my $rKrange_code_without_comments =
+ $self->[_rKrange_code_without_comments_];
+ my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
+ my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
+ my $rwant_container_open = $self->[_rwant_container_open_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $ris_broken_container = $self->[_ris_broken_container_];
+ my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
+
+ # This code moved here from sub break_lists to fix b1120
+ if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ my $type = $rLL->[$Kfirst]->[_TYPE_];
+ my $token = $rLL->[$Kfirst]->[_TOKEN_];
+
+ # leading '->' use a value of 2 which causes a soft
+ # break rather than a hard break
+ if ( $type eq '->' ) {
+ $rbreak_before_Kfirst->{$Kfirst} = 2;
+ }
+
+ # leading ')->' use a special flag to insure that both
+ # opening and closing parens get opened
+ # Fix for b1120: only for parens, not braces
+ elsif ( $token eq ')' ) {
+ my $Kn = $self->K_next_nonblank($Kfirst);
+ next
+ unless ( defined($Kn)
+ && $Kn <= $Klast
+ && $rLL->[$Kn]->[_TYPE_] eq '->' );
+ my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
+ next unless ($seqno);
+
+ # Note: in previous versions there was a fix here to avoid
+ # instability between conflicting -bom and -pvt or -pvtc flags.
+ # The fix skipped -bom for a small line difference. But this
+ # was troublesome, and instead the fix has been moved to
+ # sub set_vertical_tightness_flags where priority is given to
+ # the -bom flag over -pvt and -pvtc flags. Both opening and
+ # closing paren flags are involved because even though -bom only
+ # requests breaking before the closing paren, automated logic
+ # opens the opening paren when the closing paren opens.
+ # Relevant cases are b977, b1215, b1270, b1303
+
+ $rwant_container_open->{$seqno} = 1;
+ }
+ }
+ }
+
+ return unless ( %keep_break_before_type || %keep_break_after_type );
+
+ my $check_for_break = sub {
+ my ( $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
+ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+
+ # non-container tokens use the type as the key
+ if ( !$seqno ) {
+ my $type = $rLL->[$KK]->[_TYPE_];
+ if ( $rkeep_break_hash->{$type} ) {
+ $rbreak_hash->{$KK} = 1;
+ }
+ }
+
+ # container tokens use the token as the key
+ else {
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ my $flag = $rkeep_break_hash->{$token};
+ if ($flag) {
+
+ my $match = $flag eq '1' || $flag eq '*';
+
+ # check for special matching codes
+ if ( !$match ) {
+ if ( $token eq '(' || $token eq ')' ) {
+ $match =
+ $self->match_paren_control_flag( $seqno, $flag );
+ }
+ elsif ( $token eq '{' || $token eq '}' ) {
+
+ # These tentative codes 'b' and 'B' for brace types are
+ # placeholders for possible future brace types. They
+ # are not documented and may be changed.
+ my $block_type =
+ $self->[_rblock_type_of_seqno_]->{$seqno};
+ if ( $flag eq 'b' ) { $match = $block_type }
+ elsif ( $flag eq 'B' ) { $match = !$block_type }
+ else {
+ # unknown code - no match
+ }
+ }
+ }
+ $rbreak_hash->{$KK} = 1 if ($match);
+ }
+ }
+ };
+
+ foreach my $item ( @{$rKrange_code_without_comments} ) {
+ my ( $Kfirst, $Klast ) = @{$item};
+ $check_for_break->(
+ $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst
+ );
+ $check_for_break->(
+ $Klast, \%keep_break_after_type, $rbreak_after_Klast
+ );
+ }
+ return;
+} ## end sub keep_old_line_breaks
+
+sub weld_containers {
+
+ # Called once per file to do any welding operations requested by --weld*
+ # flags.
+ my ($self) = @_;
+
+ # This count is used to eliminate needless calls for weld checks elsewhere
+ $total_weld_count = 0;
+
+ return if ( $rOpts->{'indent-only'} );
+ return unless ($rOpts_add_newlines);
+
+ # Important: sub 'weld_cuddled_blocks' must be called before
+ # sub 'weld_nested_containers'. This is because the cuddled option needs to
+ # use the original _LEVEL_ values of containers, but the weld nested
+ # containers changes _LEVEL_ of welded containers.
+
+ # Here is a good test case to be sure that both cuddling and welding
+ # are working and not interfering with each other: <<snippets/ce_wn1.in>>
+
+ # perltidy -wn -ce
+
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
+
+ $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
+
+ if ( $rOpts->{'weld-nested-containers'} ) {
+
+ $self->weld_nested_containers();
+
+ $self->weld_nested_quotes();
+ }
+
+ #-------------------------------------------------------------
+ # All welding is done. Finish setting up weld data structures.
+ #-------------------------------------------------------------
+
+ my $rLL = $self->[_rLL_];
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
+
+ my @K_multi_weld;
+ my @keys = keys %{$rK_weld_right};
+ $total_weld_count = @keys;
+
+ # First pass to process binary welds.
+ # This loop is processed in unsorted order for efficiency.
+ foreach my $Kstart (@keys) {
+ my $Kend = $rK_weld_right->{$Kstart};
+
+ # An error here would be due to an incorrect initialization introduced
+ # in one of the above weld routines, like sub weld_nested.
+ if ( $Kend <= $Kstart ) {
+ Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
+ if (DEVEL_MODE);
+ next;
+ }
+
+ # Set weld values for all tokens this welded pair
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
+
+ # Remember the leftmost index of welds which continue to the right
+ if ( defined( $rK_weld_right->{$Kend} )
+ && !defined( $rK_weld_left->{$Kstart} ) )
+ {
+ push @K_multi_weld, $Kstart;
+ }
+ }
+
+ # Second pass to process chains of welds (these are rare).
+ # This has to be processed in sorted order.
+ if (@K_multi_weld) {
+ my $Kend = -1;
+ foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
+
+ # Skip any interior K which was originally missing a left link
+ next if ( $Kstart <= $Kend );
+
+ # Find the end of this chain
+ $Kend = $rK_weld_right->{$Kstart};
+ my $Knext = $rK_weld_right->{$Kend};
+ while ( defined($Knext) ) {
+ $Kend = $Knext;
+ $Knext = $rK_weld_right->{$Kend};
+ }
+
+ # Set weld values this chain
+ foreach ( $Kstart + 1 .. $Kend ) {
+ $rK_weld_left->{$_} = $Kstart;
+ }
+ foreach my $Kx ( $Kstart .. $Kend - 1 ) {
+ $rK_weld_right->{$Kx} = $Kend;
+ $rweld_len_right_at_K->{$Kx} =
+ $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
+ }
+ }
+ }
+
+ return;
+} ## end sub weld_containers
+
+sub cumulative_length_before_K {
+ my ( $self, $KK ) = @_;
+ my $rLL = $self->[_rLL_];
+ return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
+}
+
+sub weld_cuddled_blocks {
+ my ($self) = @_;
+
+ # Called once per file to handle cuddled formatting
+
+ my $rK_weld_left = $self->[_rK_weld_left_];
+ my $rK_weld_right = $self->[_rK_weld_right_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+ return unless ( %{$rcuddled_block_types} );
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+
+ my $rbreak_container = $self->[_rbreak_container_];
+ my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+
+ my $is_broken_block = sub {
+
+ # a block is broken if the input line numbers of the braces differ
+ # we can only cuddle between broken blocks
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
+
+ # A stack to remember open chains at all levels: This is a hash rather than
+ # an array for safety because negative levels can occur in files with
+ # errors. This allows us to keep processing with negative levels.
+ # $in_chain{$level} = [$chain_type, $type_sequence];
+ my %in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
+
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KNEXT = $self->[_K_first_seq_item_];
+ while ( defined($KNEXT) ) {
+ my $KK = $KNEXT;
+ $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ next if ( $KK == 0 ); # first token in file may not be container
+
+ # A fault here implies that an error was made in the little loop at
+ # the bottom of sub 'respace_tokens' which set the values of
+ # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the
+ # loop control lines above.
+ Fault("sequence = $type_sequence not defined at K=$KK")
+ if (DEVEL_MODE);
+ next;
+ }
+
+ # NOTE: we must use the original levels here. They can get changed
+ # by sub 'weld_nested_containers', so this routine must be called
+ # before sub 'weld_nested_containers'.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_];
+
+ if ( $level < $last_level ) { $in_chain{$last_level} = undef }
+ elsif ( $level > $last_level ) { $in_chain{$level} = undef }
+
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+
+ if ( $token eq '{' ) {
+
+ my $block_type = $rblock_type_of_seqno->{$type_sequence};
+ if ( !$block_type ) {
+
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
+ }
+ if ( $in_chain{$level} ) {
+
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain{$level}->[1];
+ my $opening_seqno = $type_sequence;
+
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$is_broken_block->($closing_seqno) ) {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
+
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon;
+ if ( defined($Ko) ) {
+ $Kon = $self->K_next_nonblank($Ko);
+ }
+
+ # ..unless it is a comment
+ if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
+
+ # OK to weld these two tokens...
+ $rK_weld_right->{$Ko} = $Kon;
+ $rK_weld_left->{$Kon} = $Ko;
+
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
+
+ # Remember which braces are cuddled.
+ # The closing brace is used to set adjusted indentations.
+ # The opening brace is not yet used but might eventually
+ # be needed in setting adjusted indentation.
+ $ris_cuddled_closing_brace->{$closing_seqno} = 1;
+
+ }
+
+ }
+ else {
+
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain{$level} = [ $block_type, $type_sequence ];
+ }
+ }
+ }
+ elsif ( $token eq '}' ) {
+ if ( $in_chain{$level} ) {
+
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_code($KK);
+ next unless $Knn;
+
+ my $chain_type = $in_chain{$level}->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
+
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain{$level}->[1] = $type_sequence;
+ }
+ else { $in_chain{$level} = undef }
+ }
+ }
+ }
+ return;
+} ## end sub weld_cuddled_blocks
+
+sub find_nested_pairs {
+ my $self = shift;
+
+ # This routine is called once per file to do preliminary work needed for
+ # the --weld-nested option. This information is also needed for adding
+ # semicolons.
+
+ my $rLL = $self->[_rLL_];
+ return unless ( defined($rLL) && @{$rLL} );
+ my $Num = @{$rLL};
+
+ my $K_opening_container = $self->[_K_opening_container_];
+ my $K_closing_container = $self->[_K_closing_container_];
+ my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
+
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
+
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
+
+ # Loop over all closing container tokens
+ foreach my $inner_seqno ( keys %{$K_closing_container} ) {
+ my $K_inner_closing = $K_closing_container->{$inner_seqno};
+
+ # See if it is immediately followed by another, outer closing token
+ my $K_outer_closing = $K_inner_closing + 1;
+ $K_outer_closing += 1
+ if ( $K_outer_closing < $Num
+ && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
+
+ next unless ( $K_outer_closing < $Num );
+ my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
+ next unless ($outer_seqno);
+ my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
+ next unless ( $is_closing_token{$token_outer_closing} );
+
+ # Simple filter: No commas or semicolons in the outer container
+ my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
+ if ($rtype_count) {
+ next if ( $rtype_count->{','} || $rtype_count->{';'} );
+ }
+
+ # Now we have to check the opening tokens.
+ my $K_outer_opening = $K_opening_container->{$outer_seqno};
+ my $K_inner_opening = $K_opening_container->{$inner_seqno};
+ next unless defined($K_outer_opening) && defined($K_inner_opening);
+
+ my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
+ my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
+
+ # Verify that the inner opening token is the next container after the
+ # outer opening token.
+ my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_];
+ next unless defined($K_io_check);
+ if ( $K_io_check != $K_inner_opening ) {
+
+ # The inner opening container does not immediately follow the outer
+ # opening container, but we may still allow a weld if they are
+ # separated by a sub signature. For example, we may have something
+ # like this, where $K_io_check may be at the first 'x' instead of
+ # 'io'. So we need to hop over the signature and see if we arrive
+ # at 'io'.
+
+ # oo io
+ # | x x |
+ # $obj->then( sub ( $code ) {
+ # ...
+ # return $c->render(text => '', status => $code);
+ # } );
+ # | |
+ # ic oc
+
+ next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
+ next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
+ my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
+ next unless defined($seqno_signature);
+ my $K_signature_closing = $K_closing_container->{$seqno_signature};
+ next unless defined($K_signature_closing);
+ my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_];
+ next
+ unless ( defined($K_test) && $K_test == $K_inner_opening );
+
+ # OK, we have arrived at 'io' in the above diagram. We should put
+ # a limit on the length or complexity of the signature here. There
+ # is no perfect way to do this, one way is to put a limit on token
+ # count. For consistency with older versions, we should allow a
+ # signature with a single variable to weld, but not with
+ # multiple variables. A single variable as in 'sub ($code) {' can
+ # have a $Kdiff of 2 to 4, depending on spacing.
+
+ # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
+ # 7, depending on spacing. So to keep formatting consistent with
+ # previous versions, we will also avoid welding if there is a comma
+ # in the signature.
+
+ my $Kdiff = $K_signature_closing - $K_io_check;
+ next if ( $Kdiff > 4 );
+
+ # backup comma count test; but we cannot get here with Kdiff<=4
+ my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
+ next if ( $rtc && $rtc->{','} );
+ }
+
+ # Yes .. this is a possible nesting pair.
+ # They can be separated by a small amount.
+ my $K_diff = $K_inner_opening - $K_outer_opening;
+
+ # Count the number of nonblank characters separating them.
+ # Note: the $nonblank_count includes the inner opening container
+ # but not the outer opening container, so it will be >= 1.
+ if ( $K_diff < 0 ) { next } # Shouldn't happen
+ my $nonblank_count = 0;
+ my $type;
+ my $is_name;
+
+ # Here is an example of a long identifier chain which counts as a
+ # single nonblank here (this spans about 10 K indexes):
+ # if ( !Boucherot::SetOfConnections->new->handler->execute(
+ # ^--K_o_o ^--K_i_o
+ # @array) )
+ my $Kn_first = $K_outer_opening;
+ my $Kn_last_nonblank;
+ my $saw_comment;
+
+ foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
+ next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
+ if ( !$nonblank_count ) { $Kn_first = $Kn }
+ if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
+ $Kn_last_nonblank = $Kn;
+
+ # skip chain of identifier tokens
+ my $last_type = $type;
+ my $last_is_name = $is_name;
+ $type = $rLL->[$Kn]->[_TYPE_];
+ if ( $type eq '#' ) { $saw_comment = 1; last }
+ $is_name = $is_name_type->{$type};
+ next if ( $is_name && $last_is_name );
+
+ # do not count a possible leading - of bareword hash key
+ next if ( $type eq 'm' && !$last_type );
+
+ $nonblank_count++;
+ last if ( $nonblank_count > 2 );
+ }
+
+ # Do not weld across a comment .. fix for c058.
+ next if ($saw_comment);
+
+ # Patch for b1104: do not weld to a paren preceded by sort/map/grep
+ # because the special line break rules may cause a blinking state
+ if ( defined($Kn_last_nonblank)
+ && $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
+ && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
+ {
+ my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
+
+ # Turn off welding at sort/map/grep (
+ if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
+ }
+
+ my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
+
+ if (
+
+ # 1: adjacent opening containers, like: do {{
+ $nonblank_count == 1
+
+ # 2. anonymous sub + prototype or sig: )->then( sub ($code) {
+ # ... but it seems best not to stack two structural blocks, like
+ # this
+ # sub make_anon_with_my_sub { sub {
+ # because it probably hides the structure a little too much.
+ || ( $inner_blocktype
+ && $inner_blocktype eq 'sub'
+ && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
+ && !$outer_blocktype )
+
+ # 3. short item following opening paren, like: fun( yyy (
+ || $nonblank_count == 2 && $token_oo eq '('
+
+ # 4. weld around fat commas, if requested (git #108), such as
+ # elf->call_method( method_name_foo => {
+ || ( $type eq '=>'
+ && $nonblank_count <= 3
+ && %weld_fat_comma_rules
+ && $weld_fat_comma_rules{$token_oo} )
+ )
+ {
+ push @nested_pairs,
+ [ $inner_seqno, $outer_seqno, $K_inner_closing ];
+ }
+ next;
+ }
+
+ # The weld routine expects the pairs in order in the form
+ # [$seqno_inner, $seqno_outer]
+ # And they must be in the same order as the inner closing tokens
+ # (otherwise, welds of three or more adjacent tokens will not work). The K
+ # value of this inner closing token has temporarily been stored for
+ # sorting.
+ @nested_pairs =
+
+ # Drop the K index after sorting (it would cause trouble downstream)
+ map { [ $_->[0], $_->[1] ] }
+
+ # Sort on the K values
+ sort { $a->[2] <=> $b->[2] } @nested_pairs;
+
+ return \@nested_pairs;
+} ## end sub find_nested_pairs
+
+sub match_paren_control_flag {
+
+ # Decide if this paren is excluded by user request:
+ # undef matches no parens
+ # '*' matches all parens
+ # 'k' matches only if the previous nonblank token is a perl builtin
+ # keyword (such as 'if', 'while'),
+ # 'K' matches if 'k' does not, meaning if the previous token is not a
+ # keyword.
+ # 'f' matches if the previous token is a function other than a keyword.
+ # 'F' matches if 'f' does not.
+ # 'w' matches if either 'k' or 'f' match.
+ # 'W' matches if 'w' does not.
+ my ( $self, $seqno, $flag, $rLL ) = @_;
+
+ # Input parameters:
+ # $seqno = sequence number of the container (should be paren)
+ # $flag = the flag which defines what matches
+ # $rLL = an optional alternate token list needed for respace operations
+ $rLL = $self->[_rLL_] unless ( defined($rLL) );
+
+ return 0 unless ( defined($flag) );
+ return 0 if $flag eq '0';
+ return 1 if $flag eq '1';
+ return 1 if $flag eq '*';
+ return 0 unless ($seqno);
+ my $K_opening = $self->[_K_opening_container_]->{$seqno};
+ return unless ( defined($K_opening) );
+
+ my ( $is_f, $is_k, $is_w );
+ my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
+ if ( defined($Kp) ) {
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+
+ # keyword?
+ $is_k = $type_p eq 'k';
+
+ # function call?