+ # handle line of code..
+ if ( $line_type eq 'CODE' ) {
+
+ my $CODE_type = $line_of_tokens->{_code_type};
+ $in_format_skipping_section = $CODE_type eq 'FS';
+
+ # Handle blank lines
+ if ( $CODE_type eq 'BL' ) {
+
+ # If keep-old-blank-lines is zero, we delete all
+ # old blank lines and let the blank line rules generate any
+ # needed blanks.
+ if ($rOpts_keep_old_blank_lines) {
+ $self->flush();
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
+ $last_line_leading_type = 'b';
+ }
+ next;
+ }
+ else {
+
+ # let logger see all non-blank lines of code
+ my $output_line_number = get_output_line_number();
+ ##$vertical_aligner_object->get_output_line_number();
+ black_box( $line_of_tokens, $output_line_number );
+ }
+
+ # Handle Format Skipping (FS) and Verbatim (VB) Lines
+ if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
+ $self->write_unindented_line("$input_line");
+ $file_writer_object->reset_consecutive_blank_lines();
+ next;
+ }
+
+ # Handle all other lines of code
+ $self->print_line_of_tokens($line_of_tokens);
+ }
+
+ # handle line of non-code..
+ else {
+
+ # set special flags
+ my $skip_line = 0;
+ my $tee_line = 0;
+ if ( $line_type =~ /^POD/ ) {
+
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
+ if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
+ if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
+ && !$in_format_skipping_section
+ && $line_type eq 'POD_START'
+ && !$saw_END_or_DATA_ )
+ {
+ $self->want_blank_line();
+ }
+ }
+
+ # leave the blank counters in a predictable state
+ # after __END__ or __DATA__
+ elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ $saw_END_or_DATA_ = 1;
+ }
+
+ # write unindented non-code line
+ if ( !$skip_line ) {
+ if ($tee_line) { $file_writer_object->tee_on() }
+ $self->write_unindented_line($input_line);
+ if ($tee_line) { $file_writer_object->tee_off() }
+ }
+ }
+ }
+ return;
+}
+
+{ ## Beginning of routine to check line hashes
+
+ my %valid_line_hash;
+
+ BEGIN {
+
+ # These keys are defined for each line in the formatter
+ # Each line must have exactly these quantities
+ my @valid_line_keys = qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _rK_range
+ _square_bracket_depth
+ _starting_in_quote
+ _ended_in_blank_token
+ _code_type
+
+ _ci_level_0
+ _level_0
+ _nesting_blocks_0
+ _nesting_tokens_0
+ );
+
+ @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
+ }
+
+ sub check_line_hashes {
+ my $self = shift;
+ $self->check_self_hash();
+ my $rlines = $self->{rlines};
+ foreach my $rline ( @{$rlines} ) {
+ my $iline = $rline->{_line_number};
+ my $line_type = $rline->{_line_type};
+ check_keys( $rline, \%valid_line_hash,
+ "Checkpoint: line number =$iline, line_type=$line_type", 1 );
+ }
+ return;
+ }
+
+} ## End check line hashes
+
+sub write_line {
+
+ # We are caching tokenized lines as they arrive and converting them to the
+ # format needed for the final formatting.
+ my ( $self, $line_of_tokens_old ) = @_;
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+ my $rlines_new = $self->{rlines};
+
+ my $Kfirst;
+ my $line_of_tokens = {};
+ foreach my $key (
+ qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _square_bracket_depth
+ _starting_in_quote
+ )
+ )
+ {
+ $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
+ }
+
+ # Data needed by Logger
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = "";
+ $line_of_tokens->{_nesting_tokens_0} = "";
+
+ # Needed to avoid trimming quotes
+ $line_of_tokens->{_ended_in_blank_token} = undef;
+
+ my $line_type = $line_of_tokens_old->{_line_type};
+ my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
+ if ( $line_type eq 'CODE' ) {
+
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
+ my $rblock_type = $line_of_tokens_old->{_rblock_type};
+ my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
+ my $rcontainer_environment =
+ $line_of_tokens_old->{_rcontainer_environment};
+ my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
+ my $rlevels = $line_of_tokens_old->{_rlevels};
+ my $rslevels = $line_of_tokens_old->{_rslevels};
+ my $rci_levels = $line_of_tokens_old->{_rci_levels};
+ my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
+ my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
+
+ my $jmax = @{$rtokens} - 1;
+ if ( $jmax >= 0 ) {
+ $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
+ foreach my $j ( 0 .. $jmax ) {
+ my @tokary;
+ @tokary[
+ _TOKEN_, _TYPE_,
+ _BLOCK_TYPE_, _CONTAINER_TYPE_,
+ _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
+ _LEVEL_, _LEVEL_TRUE_,
+ _SLEVEL_, _CI_LEVEL_,
+ _LINE_INDEX_,
+ ]
+ = (
+ $rtokens->[$j], $rtoken_type->[$j],
+ $rblock_type->[$j], $rcontainer_type->[$j],
+ $rcontainer_environment->[$j], $rtype_sequence->[$j],
+ $rlevels->[$j], $rlevels->[$j],
+ $rslevels->[$j], $rci_levels->[$j],
+ $input_line_no,
+ );
+ ##push @token_array, \@tokary;
+ push @{$rLL}, \@tokary;
+ }
+
+ #$Klast=@{$rLL}-1;
+ $Klimit = @{$rLL} - 1;
+
+ # Need to remember if we can trim the input line
+ $line_of_tokens->{_ended_in_blank_token} =
+ $rtoken_type->[$jmax] eq 'b';
+
+ $line_of_tokens->{_level_0} = $rlevels->[0];
+ $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
+ $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
+ $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
+ }
+ }
+
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
+ $line_of_tokens->{_code_type} = "";
+ $self->{Klimit} = $Klimit;
+
+ push @{$rlines_new}, $line_of_tokens;
+ return;
+}
+
+BEGIN {
+
+ # initialize these global hashes, which control the use of
+ # whitespace around tokens:
+ #
+ # %binary_ws_rules
+ # %want_left_space
+ # %want_right_space
+ # %space_after_keyword
+ #
+ # Many token types are identical to the tokens themselves.
+ # See the tokenizer for a complete list. Here are some special types:
+ # k = perl keyword
+ # f = semicolon in for statement
+ # m = unary minus
+ # p = unary plus
+ # Note that :: is excluded since it should be contained in an identifier
+ # Note that '->' is excluded because it never gets space
+ # parentheses and brackets are excluded since they are handled specially
+ # curly braces are included but may be overridden by logic, such as
+ # newline logic.
+
+ # NEW_TOKENS: create a whitespace rule here. This can be as
+ # simple as adding your new letter to @spaces_both_sides, for
+ # example.
+
+ my @q;
+
+ @q = qw" L { ( [ ";
+ @is_opening_type{@q} = (1) x scalar(@q);
+
+ @q = qw" R } ) ] ";
+ @is_closing_type{@q} = (1) x scalar(@q);
+
+ my @spaces_both_sides = qw"
+ + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
+ .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
+ &&= ||= //= <=> A k f w F n C Y U G v
+ ";
+
+ my @spaces_left_side = qw"
+ t ! ~ m p { \ h pp mm Z j
+ ";
+ push( @spaces_left_side, '#' ); # avoids warning message
+
+ my @spaces_right_side = qw"
+ ; } ) ] R J ++ -- **=
+ ";
+ push( @spaces_right_side, ',' ); # avoids warning message
+
+ # Note that we are in a BEGIN block here. Later in processing
+ # the values of %want_left_space and %want_right_space
+ # may be overridden by any user settings specified by the
+ # -wls and -wrs parameters. However the binary_whitespace_rules
+ # are hardwired and have priority.
+ @want_left_space{@spaces_both_sides} =
+ (1) x scalar(@spaces_both_sides);
+ @want_right_space{@spaces_both_sides} =
+ (1) x scalar(@spaces_both_sides);
+ @want_left_space{@spaces_left_side} =
+ (1) x scalar(@spaces_left_side);
+ @want_right_space{@spaces_left_side} =
+ (-1) x scalar(@spaces_left_side);
+ @want_left_space{@spaces_right_side} =
+ (-1) x scalar(@spaces_right_side);
+ @want_right_space{@spaces_right_side} =
+ (1) x scalar(@spaces_right_side);
+ $want_left_space{'->'} = WS_NO;
+ $want_right_space{'->'} = WS_NO;
+ $want_left_space{'**'} = WS_NO;
+ $want_right_space{'**'} = WS_NO;
+ $want_right_space{'CORE::'} = WS_NO;
+
+ # These binary_ws_rules are hardwired and have priority over the above
+ # settings. It would be nice to allow adjustment by the user,
+ # but it would be complicated to specify.
+ #
+ # hash type information must stay tightly bound
+ # as in : ${xxxx}
+ $binary_ws_rules{'i'}{'L'} = WS_NO;
+ $binary_ws_rules{'i'}{'{'} = WS_YES;
+ $binary_ws_rules{'k'}{'{'} = WS_YES;
+ $binary_ws_rules{'U'}{'{'} = WS_YES;
+ $binary_ws_rules{'i'}{'['} = WS_NO;
+ $binary_ws_rules{'R'}{'L'} = WS_NO;
+ $binary_ws_rules{'R'}{'{'} = WS_NO;
+ $binary_ws_rules{'t'}{'L'} = WS_NO;
+ $binary_ws_rules{'t'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'L'} = WS_NO;
+ $binary_ws_rules{'}'}{'{'} = WS_NO;
+ $binary_ws_rules{'$'}{'L'} = WS_NO;
+ $binary_ws_rules{'$'}{'{'} = WS_NO;
+ $binary_ws_rules{'@'}{'L'} = WS_NO;
+ $binary_ws_rules{'@'}{'{'} = WS_NO;
+ $binary_ws_rules{'='}{'L'} = WS_YES;
+ $binary_ws_rules{'J'}{'J'} = WS_YES;
+
+ # the following includes ') {'
+ # as in : if ( xxx ) { yyy }
+ $binary_ws_rules{']'}{'L'} = WS_NO;
+ $binary_ws_rules{']'}{'{'} = WS_NO;
+ $binary_ws_rules{')'}{'{'} = WS_YES;
+ $binary_ws_rules{')'}{'['} = WS_NO;
+ $binary_ws_rules{']'}{'['} = WS_NO;
+ $binary_ws_rules{']'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'['} = WS_NO;
+ $binary_ws_rules{'R'}{'['} = WS_NO;
+
+ $binary_ws_rules{']'}{'++'} = WS_NO;
+ $binary_ws_rules{']'}{'--'} = WS_NO;
+ $binary_ws_rules{')'}{'++'} = WS_NO;
+ $binary_ws_rules{')'}{'--'} = WS_NO;
+
+ $binary_ws_rules{'R'}{'++'} = WS_NO;
+ $binary_ws_rules{'R'}{'--'} = WS_NO;
+
+ $binary_ws_rules{'i'}{'Q'} = WS_YES;
+ $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
+
+ # FIXME: we could to split 'i' into variables and functions
+ # and have no space for functions but space for variables. For now,
+ # I have a special patch in the special rules below
+ $binary_ws_rules{'i'}{'('} = WS_NO;
+
+ $binary_ws_rules{'w'}{'('} = WS_NO;
+ $binary_ws_rules{'w'}{'{'} = WS_YES;
+} ## end BEGIN block
+
+sub set_whitespace_flags {
+
+ # This routine examines each pair of nonblank tokens and
+ # sets a flag indicating if white space is needed.
+ #
+ # $rwhitespace_flags->[$j] is a flag indicating whether a white space
+ # BEFORE token $j is needed, with the following values:
+ #
+ # WS_NO = -1 do not want a space before token $j
+ # WS_OPTIONAL= 0 optional space or $j is a whitespace
+ # WS_YES = 1 want a space before token $j
+ #
+
+ my $self = shift;
+ my $rLL = $self->{rLL};
+
+ my $rwhitespace_flags = [];
+
+ my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
+ $token, $type, $block_type, $input_line_no );
+ my $j_tight_closing_paren = -1;
+
+ $token = ' ';
+ $type = 'b';
+ $block_type = '';
+ $input_line_no = 0;
+ $last_token = ' ';
+ $last_type = 'b';
+ $last_block_type = '';
+ $last_input_line_no = 0;
+
+ my $jmax = @{$rLL} - 1;
+
+ my ($ws);
+
+ # This is some logic moved to a sub to avoid deep nesting of if stmts
+ my $ws_in_container = sub {
+
+ my ($j) = @_;
+ my $ws = WS_YES;
+ if ( $j + 1 > $jmax ) { return (WS_NO) }
+
+ # Patch to count '-foo' as single token so that
+ # each of $a{-foo} and $a{foo} and $a{'foo'} do
+ # not get spaces with default formatting.
+ my $j_here = $j;
+ ++$j_here
+ if ( $token eq '-'
+ && $last_token eq '{'
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
+
+ # $j_next is where a closing token should be if
+ # the container has a single token
+ if ( $j_here + 1 > $jmax ) { return (WS_NO) }
+ my $j_next =
+ ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
+ ? $j_here + 2
+ : $j_here + 1;
+
+ if ( $j_next > $jmax ) { return WS_NO }
+ my $tok_next = $rLL->[$j_next]->[_TOKEN_];
+ my $type_next = $rLL->[$j_next]->[_TYPE_];
+
+ # for tightness = 1, if there is just one token
+ # within the matching pair, we will keep it tight
+ if (
+ $tok_next eq $matching_token{$last_token}
+
+ # but watch out for this: [ [ ] (misc.t)
+ && $last_token ne $token
+
+ # double diamond is usually spaced
+ && $token ne '<<>>'
+
+ )
+ {
+
+ # remember where to put the space for the closing paren
+ $j_tight_closing_paren = $j_next;
+ return (WS_NO);
+ }
+ return (WS_YES);
+ };
+
+ # main loop over all tokens to define the whitespace flags
+ for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
+
+ my $rtokh = $rLL->[$j];
+
+ # Set a default
+ $rwhitespace_flags->[$j] = WS_OPTIONAL;
+
+ if ( $rtokh->[_TYPE_] eq 'b' ) {
+ next;
+ }
+
+ # set a default value, to be changed as needed
+ $ws = undef;
+ $last_token = $token;
+ $last_type = $type;
+ $last_block_type = $block_type;
+ $last_input_line_no = $input_line_no;
+ $token = $rtokh->[_TOKEN_];
+ $type = $rtokh->[_TYPE_];
+ $block_type = $rtokh->[_BLOCK_TYPE_];
+ $input_line_no = $rtokh->[_LINE_INDEX_];
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 1:
+ # Handle space on the inside of opening braces.
+ #---------------------------------------------------------------
+
+ # /^[L\{\(\[]$/
+ if ( $is_opening_type{$last_type} ) {
+
+ $j_tight_closing_paren = -1;
+
+ # let us keep empty matched braces together: () {} []
+ # except for BLOCKS
+ if ( $token eq $matching_token{$last_token} ) {
+ if ($block_type) {
+ $ws = WS_YES;
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+ else {
+
+ # we're considering the right of an opening brace
+ # tightness = 0 means always pad inside with space
+ # tightness = 1 means pad inside if "complex"
+ # tightness = 2 means never pad inside with space
+
+ my $tightness;
+ if ( $last_type eq '{'
+ && $last_token eq '{'
+ && $last_block_type )
+ {
+ $tightness = $rOpts_block_brace_tightness;
+ }
+ else { $tightness = $tightness{$last_token} }
+
+ #=============================================================
+ # Patch for test problem fabrice_bug.pl
+ # We must always avoid spaces around a bare word beginning
+ # with ^ as in:
+ # my $before = ${^PREMATCH};
+ # Because all of the following cause an error in perl:
+ # my $before = ${ ^PREMATCH };
+ # my $before = ${ ^PREMATCH};
+ # my $before = ${^PREMATCH };
+ # So if brace tightness flag is -bt=0 we must temporarily reset
+ # to bt=1. Note that here we must set tightness=1 and not 2 so
+ # that the closing space
+ # is also avoided (via the $j_tight_closing_paren flag in coding)
+ if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
+
+ #=============================================================
+
+ if ( $tightness <= 0 ) {
+ $ws = WS_YES;
+ }
+ elsif ( $tightness > 1 ) {
+ $ws = WS_NO;
+ }
+ else {
+ $ws = $ws_in_container->($j);
+ }
+ }
+ } # end setting space flag inside opening tokens
+ my $ws_1;
+ $ws_1 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 2:
+ # Handle space on inside of closing brace pairs.
+ #---------------------------------------------------------------
+
+ # /[\}\)\]R]/
+ if ( $is_closing_type{$type} ) {
+
+ if ( $j == $j_tight_closing_paren ) {
+
+ $j_tight_closing_paren = -1;
+ $ws = WS_NO;
+ }
+ else {
+
+ if ( !defined($ws) ) {
+
+ my $tightness;
+ if ( $type eq '}' && $token eq '}' && $block_type ) {
+ $tightness = $rOpts_block_brace_tightness;
+ }
+ else { $tightness = $tightness{$token} }
+
+ $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
+ }
+ }
+ } # end setting space flag inside closing tokens
+
+ my $ws_2;
+ $ws_2 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 3:
+ # Use the binary rule table.
+ #---------------------------------------------------------------
+ if ( !defined($ws) ) {
+ $ws = $binary_ws_rules{$last_type}{$type};
+ }
+ my $ws_3;
+ $ws_3 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 4:
+ # Handle some special cases.
+ #---------------------------------------------------------------
+ if ( $token eq '(' ) {
+
+ # This will have to be tweaked as tokenization changes.
+ # We usually want a space at '} (', for example:
+ # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
+ #
+ # But not others:
+ # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
+ # At present, the above & block is marked as type L/R so this case
+ # won't go through here.
+ if ( $last_type eq '}' ) { $ws = WS_YES }
+
+ # NOTE: some older versions of Perl had occasional problems if
+ # spaces are introduced between keywords or functions and opening
+ # parens. So the default is not to do this except is certain
+ # cases. The current Perl seems to tolerate spaces.
+
+ # Space between keyword and '('
+ elsif ( $last_type eq 'k' ) {
+ $ws = WS_NO
+ unless ( $rOpts_space_keyword_paren
+ || $space_after_keyword{$last_token} );
+ }
+
+ # Space between function and '('
+ # -----------------------------------------------------
+ # 'w' and 'i' checks for something like:
+ # myfun( &myfun( ->myfun(
+ # -----------------------------------------------------
+ elsif (( $last_type =~ /^[wUG]$/ )
+ || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
+ {
+ $ws = WS_NO unless ($rOpts_space_function_paren);
+ }
+
+ # space between something like $i and ( in
+ # for $i ( 0 .. 20 ) {
+ # FIXME: eventually, type 'i' needs to be split into multiple
+ # token types so this can be a hardwired rule.
+ elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
+ $ws = WS_YES;
+ }
+
+ # allow constant function followed by '()' to retain no space
+ elsif ($last_type eq 'C'
+ && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
+ {
+ $ws = WS_NO;
+ }
+ }
+
+ # patch for SWITCH/CASE: make space at ']{' optional
+ # since the '{' might begin a case or when block
+ elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
+ $ws = WS_OPTIONAL;
+ }
+
+ # keep space between 'sub' and '{' for anonymous sub definition
+ if ( $type eq '{' ) {
+ if ( $last_token eq 'sub' ) {
+ $ws = WS_YES;
+ }
+
+ # this is needed to avoid no space in '){'
+ if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+
+ # avoid any space before the brace or bracket in something like
+ # @opts{'a','b',...}
+ if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
+ $ws = WS_NO;
+ }
+ }
+
+ elsif ( $type eq 'i' ) {
+
+ # never a space before ->
+ if ( $token =~ /^\-\>/ ) {
+ $ws = WS_NO;
+ }
+ }
+
+ # retain any space between '-' and bare word
+ elsif ( $type eq 'w' || $type eq 'C' ) {
+ $ws = WS_OPTIONAL if $last_type eq '-';
+
+ # never a space before ->
+ if ( $token =~ /^\-\>/ ) {
+ $ws = WS_NO;
+ }
+ }
+
+ # retain any space between '-' and bare word
+ # example: avoid space between 'USER' and '-' here:
+ # $myhash{USER-NAME}='steve';
+ elsif ( $type eq 'm' || $type eq '-' ) {
+ $ws = WS_OPTIONAL if ( $last_type eq 'w' );
+ }
+
+ # always space before side comment
+ elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+
+ # always preserver whatever space was used after a possible
+ # filehandle (except _) or here doc operator
+ if (
+ $type ne '#'
+ && ( ( $last_type eq 'Z' && $last_token ne '_' )
+ || $last_type eq 'h' )
+ )
+ {
+ $ws = WS_OPTIONAL;
+ }
+
+ # space_backslash_quote; RT #123774
+ # allow a space between a backslash and single or double quote
+ # to avoid fooling html formatters
+ elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
+ if ($rOpts_space_backslash_quote) {
+ if ( $rOpts_space_backslash_quote == 1 ) {
+ $ws = WS_OPTIONAL;
+ }
+ elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
+ else { } # shouldnt happen
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+
+ my $ws_4;
+ $ws_4 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 5:
+ # Apply default rules not covered above.
+ #---------------------------------------------------------------
+
+ # If we fall through to here, look at the pre-defined hash tables for
+ # the two tokens, and:
+ # if (they are equal) use the common value
+ # if (either is zero or undef) use the other
+ # if (either is -1) use it
+ # That is,
+ # left vs right
+ # 1 vs 1 --> 1
+ # 0 vs 0 --> 0
+ # -1 vs -1 --> -1
+ #
+ # 0 vs -1 --> -1
+ # 0 vs 1 --> 1
+ # 1 vs 0 --> 1
+ # -1 vs 0 --> -1
+ #
+ # -1 vs 1 --> -1
+ # 1 vs -1 --> -1
+ if ( !defined($ws) ) {
+ my $wl = $want_left_space{$type};
+ my $wr = $want_right_space{$last_type};
+ if ( !defined($wl) ) { $wl = 0 }
+ if ( !defined($wr) ) { $wr = 0 }
+ $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
+ }
+
+ if ( !defined($ws) ) {
+ $ws = 0;
+ write_diagnostics(
+ "WS flag is undefined for tokens $last_token $token\n");
+ }
+
+ # Treat newline as a whitespace. Otherwise, we might combine
+ # 'Send' and '-recipients' here according to the above rules:
+ # my $msg = new Fax::Send
+ # -recipients => $to,
+ # -data => $data;
+ if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
+
+ if ( ( $ws == 0 )
+ && $j > 0
+ && $j < $jmax
+ && ( $last_type !~ /^[Zh]$/ ) )
+ {
+
+ # If this happens, we have a non-fatal but undesirable
+ # hole in the above rules which should be patched.
+ write_diagnostics(
+ "WS flag is zero for tokens $last_token $token\n");
+ }
+
+ $rwhitespace_flags->[$j] = $ws;
+
+ FORMATTER_DEBUG_FLAG_WHITE && do {
+ my $str = substr( $last_token, 0, 15 );
+ $str .= ' ' x ( 16 - length($str) );
+ if ( !defined($ws_1) ) { $ws_1 = "*" }
+ if ( !defined($ws_2) ) { $ws_2 = "*" }
+ if ( !defined($ws_3) ) { $ws_3 = "*" }
+ if ( !defined($ws_4) ) { $ws_4 = "*" }
+ print STDOUT
+"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
+ };
+ } ## end main loop
+
+ if ( $rOpts->{'tight-secret-operators'} ) {
+ new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
+ }
+ return $rwhitespace_flags;
+} ## end sub set_whitespace_flags
+
+sub respace_tokens {
+
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
+
+ # This routine makes all necessary changes to the tokenization after the
+ # file has been read. This consists mostly of inserting and deleting spaces
+ # according to the selected parameters. In a few cases non-space characters
+ # are added, deleted or modified.
+
+ # The old tokens are copied one-by-one, with changes, from the old
+ # linear storage array to a new array.
+
+ my $rLL = $self->{rLL};
+ my $Klimit_old = $self->{Klimit};
+ my $rlines = $self->{rlines};
+ my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
+
+ my $rLL_new = []; # This is the new array
+ my $KK = 0;
+ my $rtoken_vars;
+ my $Kmax = @{$rLL} - 1;
+
+ # Set the whitespace flags, which indicate the token spacing preference.
+ my $rwhitespace_flags = $self->set_whitespace_flags();
+
+ # we will be setting token lengths as we go
+ my $cumulative_length = 0;
+
+ # We also define these hash indexes giving container token array indexes
+ # as a function of the container sequence numbers. For example,
+ my $K_opening_container = {}; # opening [ { or (
+ my $K_closing_container = {}; # closing ] } or )
+ my $K_opening_ternary = {}; # opening ? of ternary
+ my $K_closing_ternary = {}; # closing : of ternary
+
+ # List of new K indexes of phantom semicolons
+ # This will be needed if we want to undo them for iterations
+ my $rK_phantom_semicolons = [];
+
+ # Temporary hashes for adding semicolons
+ ##my $rKfirst_new = {};
+
+ # a sub to link preceding nodes forward to a new node type
+ my $link_back = sub {
+ my ( $Ktop, $key ) = @_;
+
+ my $Kprev = $Ktop - 1;
+ while ( $Kprev >= 0
+ && !defined( $rLL_new->[$Kprev]->[$key] ) )
+ {
+ $rLL_new->[$Kprev]->[$key] = $Ktop;
+ $Kprev -= 1;
+ }
+ };
+
+ # A sub to store each token in the new array
+ # All new tokens must be stored by this sub so that it can update
+ # all data structures on the fly.
+ my $last_nonblank_type = ';';
+ my $store_token = sub {
+ my ($item) = @_;
+
+ # This will be the index of this item in the new array
+ my $KK_new = @{$rLL_new};
+
+ # check for a sequenced item (i.e., container or ?/:)
+ my $type_sequence = $item->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+
+ $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
+
+ my $token = $item->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
+
+ $K_opening_container->{$type_sequence} = $KK_new;
+ }
+ elsif ( $is_closing_token{$token} ) {
+
+ $K_closing_container->{$type_sequence} = $KK_new;
+ }
+
+ # These are not yet used but could be useful
+ else {
+ if ( $token eq '?' ) {
+ $K_opening_ternary->{$type_sequence} = $KK;
+ }
+ elsif ( $token eq ':' ) {
+ $K_closing_ternary->{$type_sequence} = $KK;
+ }
+ else {
+ # shouldn't happen
+ print STDERR "Ugh: shouldn't happen\n";
+ }
+ }
+ }
+
+ # Save the length sum to just BEFORE this token
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+
+ # now set the length of this token
+ my $token_length = length( $item->[_TOKEN_] );
+
+ # and update the cumulative length
+ $cumulative_length += $token_length;
+
+ my $type = $item->[_TYPE_];
+ if ( $type ne 'b' ) { $last_nonblank_type = $type }
+
+ # and finally, add this item to the new array
+ push @{$rLL_new}, $item;
+ };
+
+ my $add_phantom_semicolon = sub {
+
+ my ($KK) = @_;
+
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+
+ # we are only adding semicolons for certain block types
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ return
+ unless ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ );
+
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+
+ # Do not add a semicolon if...
+ return
+ if (
+
+ # it would follow a comment (and be isolated)
+ $previous_nonblank_type eq '#'
+
+ # it follows a code block ( because they are not always wanted
+ # there and may add clutter)
+ || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
+
+ # it would follow a label
+ || $previous_nonblank_type eq 'J'
+
+ # it would be inside a 'format' statement (and cause syntax error)
+ || ( $previous_nonblank_type eq 'k'
+ && $previous_nonblank_token =~ /format/ )
+
+ # if it would prevent welding two containers
+ || $rpaired_to_inner_container->{$type_sequence}
+
+ );
+
+ # We will insert an empty semicolon here as a placeholder.
+ # Later, if it becomes the last token on a line, we will bring it to life.
+ # The advantage of doing this is that (1) we just have to check line endings,
+ # and (2) the phantom semicolon has zero width and therefore won't cause
+ # needless breaks of one-line blocks.
+ my $Ktop = -1;
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
+ && $want_left_space{';'} == WS_NO )
+ {
+
+ # convert the blank into a semicolon..
+ # be careful: we are working on the new stack top
+ # on a token which has been stored.
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
+
+ # Convert the existing blank to a semicolon
+ $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length
+ $rLL_new->[$Ktop]->[_TYPE_] = ';';
+ $rLL_new->[$Ktop]->[_SLEVEL_] =
+ $rLL->[$KK]->[_SLEVEL_];
+
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+
+ # Then store a new blank
+ $store_token->($rcopy);
+ }
+ else {
+
+ # insert a new token
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
+ $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
+ $store_token->($rcopy);
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ }
+ };
+
+ my $check_Q = sub {
+
+ # Check that a quote looks okay
+ # This works but needs to by sync'd with the log file output
+ my ( $KK, $Kfirst ) = @_;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ note_embedded_tab() if ( $token =~ "\t" );
+
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+
+ my $previous_nonblank_type_2 = 'b';
+ my $previous_nonblank_token_2 = "";
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
+ $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
+ }
+
+ my $Kn = $self->K_next_nonblank($KK);
+ my $next_nonblank_token = "";
+ if ( defined($Kn) ) {
+ $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ }
+
+ my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
+
+ # make note of something like '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
+ if (
+ $token =~ /^(s|tr|y|m|\/)/
+ && $previous_nonblank_token =~ /^(=|==|!=)$/
+
+ # preceded by simple scalar
+ && $previous_nonblank_type_2 eq 'i'
+ && $previous_nonblank_token_2 =~ /^\$/
+
+ # followed by some kind of termination
+ # (but give complaint if we can not see far enough ahead)
+ && $next_nonblank_token =~ /^[; \)\}]$/
+
+ # scalar is not declared
+ && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
+ )
+ {
+ my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
+ complain(
+"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+ );
+ }
+ };
+
+ # Main loop over all lines of the file
+ my $last_K_out;
+ my $CODE_type = "";
+ my $line_type = "";
+ foreach my $line_of_tokens ( @{$rlines} ) {
+
+ $input_line_number = $line_of_tokens->{_line_number};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+
+ # Check for correct sequence of token indexes...
+ # An error here means that sub write_line() did not correctly
+ # package the tokenized lines as it received them.
+ if ( defined($last_K_out) ) {
+ if ( $Kfirst != $last_K_out + 1 ) {
+ Fault(
+ "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
+ );
+ }
+ }
+ else {
+ if ( $Kfirst != 0 ) {
+ Fault("Program Bug: first K is $Kfirst but should be 0");
+ }
+ }
+ $last_K_out = $Klast;
+
+ # Handle special lines of code
+ if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+
+ # CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'FS' = Format Skipping - line goes out verbatim, no blanks
+ # 'IO' = Indent Only - only indentation may be changed
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'DEL'=Delete this line
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
+
+ # For a hanging side comment we insert an empty quote before
+ # the comment so that it becomes a normal side comment and
+ # will be aligned by the vertical aligner
+ if ( $CODE_type eq 'HSC' ) {
+
+ # Safety Check: This must be a line with one token (a comment)
+ my $rtoken_vars = $rLL->[$Kfirst];
+ if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
+
+ # Note that even if the flag 'noadd-whitespace' is set, we will
+ # make an exception here and allow a blank to be inserted to push the comment
+ # to the right. We can think of this as an adjustment of indentation
+ # rather than whitespace between tokens. This will also prevent the hanging
+ # side comment from getting converted to a block comment if whitespace
+ # gets deleted, as for example with the -extrude and -mangle options.
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
+ $store_token->($rcopy);
+ $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
+
+ # This line was mis-marked by sub scan_comment
+ Fault(
+ "Program bug. A hanging side comment has been mismarked"
+ );
+ }
+ }
+
+ # Copy tokens unchanged
+ foreach my $KK ( $Kfirst .. $Klast ) {
+ $store_token->( $rLL->[$KK] );
+ }
+ next;
+ }
+
+ # Handle normal line..
+
+ # Insert any essential whitespace between lines
+ # if last line was normal CODE
+ my $type_next = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ if ( $last_line_type eq 'CODE'
+ && $type_next ne 'b'
+ && defined($Kp) )
+ {
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+
+ my ( $token_pp, $type_pp );
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
+
+ if (
+ is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ )
+ )
+ {
+
+ # Copy this first token as blank, but use previous line number
+ my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+ $store_token->($rcopy);
+ }
+ }
+
+ # loop to copy all tokens on this line, with any changes
+ my $type_sequence;
+ for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
+ $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $last_type_sequence = $type_sequence;
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ # Handle a blank space ...
+ if ( $type eq 'b' ) {
+
+ # Delete it if not wanted by whitespace rules
+ # or we are deleting all whitespace
+ # Note that whitespace flag is a flag indicating whether a
+ # white space BEFORE the token is needed
+ next if ( $KK >= $Kmax ); # skip terminal blank
+ my $Knext = $KK + 1;
+ my $ws = $rwhitespace_flags->[$Knext];
+ if ( $ws == -1
+ || $rOpts_delete_old_whitespace )
+ {
+
+ # FIXME: maybe switch to using _new
+ my $Kp = $self->K_previous_nonblank($KK);
+ next unless defined($Kp);
+ my $token_p = $rLL->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+
+ my ( $token_pp, $type_pp );
+
+ #my $Kpp = $K_previous_nonblank->($Kp);
+ my $Kpp = $self->K_previous_nonblank($Kp);
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
+ my $token_next = $rLL->[$Knext]->[_TOKEN_];
+ my $type_next = $rLL->[$Knext]->[_TYPE_];
+
+ my $do_not_delete = is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ );
+
+ next unless ($do_not_delete);
+ }
+
+ # make it just one character if allowed
+ if ($rOpts_add_whitespace) {
+ $rtoken_vars->[_TOKEN_] = ' ';
+ }
+ $store_token->($rtoken_vars);
+ next;
+ }
+
+ # Handle a nonblank token...
+
+ # Modify certain tokens here for whitespace
+ # The following is not yet done, but could be:
+ # sub (x x x)
+ if ( $type =~ /^[wit]$/ ) {
+
+ # Examples:
+ # change '$ var' to '$var' etc
+ # '-> new' to '->new'
+ if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
+ $token =~ s/\s*//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # Split identifiers with leading arrows, inserting blanks if
+ # necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->' and
+ # 'new' with a possible blank between.
+ #
+ # Note: there is a related patch in sub set_whitespace_flags
+ if ( $token =~ /^\-\>(.*)$/ && $1 ) {
+ my $token_save = $1;
+ my $type_save = $type;
+
+ # store a blank to left of arrow if necessary
+ my $Kprev = $self->K_previous_nonblank($KK);
+ if ( defined($Kprev)
+ && $rLL->[$Kprev]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace
+ && $want_left_space{'->'} == WS_YES )
+ {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
+
+ # then store the arrow
+ my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
+ $store_token->($rcopy);
+
+ # then reset the current token to be the remainder,
+ # and reset the whitespace flag according to the arrow
+ $token = $rtoken_vars->[_TOKEN_] = $token_save;
+ $type = $rtoken_vars->[_TYPE_] = $type_save;
+ $store_token->($rtoken_vars);
+ next;
+ }
+
+ if ( $token =~ /$SUB_PATTERN/ ) {
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ if ( $type eq 'i' ) {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+
+ # change 'LABEL :' to 'LABEL:'
+ elsif ( $type eq 'J' ) {
+ $token =~ s/\s+//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # patch to add space to something like "x10"
+ # This avoids having to split this token in the pre-tokenizer
+ elsif ( $type eq 'n' ) {
+ if ( $token =~ /^x\d+/ ) {
+ $token =~ s/x/x /;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+
+ # check a quote for problems
+ elsif ( $type eq 'Q' ) {
+
+ # This is ready to go but is commented out because there is
+ # still identical logic in sub break_lines.
+ # $check_Q->($KK, $Kfirst);
+ }
+
+ # trim blanks from right of qw quotes
+ # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
+ elsif ( $type eq 'q' ) {
+ $token =~ s/\s*$//;
+ $rtoken_vars->[_TOKEN_] = $token;
+ note_embedded_tab() if ( $token =~ "\t" );
+ }
+
+ elsif ($type_sequence) {
+
+ # if ( $is_opening_token{$token} ) {
+ # }
+
+ if ( $is_closing_token{$token} ) {
+
+ # Insert a tentative missing semicolon if the next token is
+ # a closing block brace
+ if (
+ $type eq '}'
+ && $token eq '}'
+
+ # not preceded by a ';'
+ && $last_nonblank_type ne ';'
+
+ # and this is not a VERSION stmt (is all one line, we are not
+ # inserting semicolons on one-line blocks)
+ && $CODE_type ne 'VER'
+
+ # and we are allowed to add semicolons
+ && $rOpts->{'add-semicolons'}
+ )
+ {
+ $add_phantom_semicolon->($KK);
+ }
+ }
+ }
+
+ # Insert any needed whitespace
+ if ( @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ my $ws = $rwhitespace_flags->[$KK];
+ if ( $ws == 1 ) {
+
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+ $store_token->($rcopy);
+ }
+ }
+ $store_token->($rtoken_vars);
+ } # End token loop
+ } # End line loop
+
+ # Reset memory to be the new array
+ $self->{rLL} = $rLL_new;
+ $self->set_rLL_max_index();
+ $self->{K_opening_container} = $K_opening_container;
+ $self->{K_closing_container} = $K_closing_container;
+ $self->{K_opening_ternary} = $K_opening_ternary;
+ $self->{K_closing_ternary} = $K_closing_ternary;
+ $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
+
+ # make sure the new array looks okay
+ $self->check_token_array();
+
+ # reset the token limits of each line
+ $self->resync_lines_and_tokens();
+
+ return;
+}
+
+{ # scan_comments
+
+ my $Last_line_had_side_comment;
+ my $In_format_skipping_section;
+ my $Saw_VERSION_in_this_file;
+
+ sub scan_comments {
+ my $self = shift;
+ my $rlines = $self->{rlines};
+
+ $Last_line_had_side_comment = undef;
+ $In_format_skipping_section = undef;
+ $Saw_VERSION_in_this_file = undef;
+
+ # Loop over all lines
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ my $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $CODE_type = $self->get_CODE_type($line_of_tokens);
+ $line_of_tokens->{_code_type} = $CODE_type;
+ }
+ return;
+ }
+
+ sub get_CODE_type {
+ my ( $self, $line_of_tokens ) = @_;
+
+ # We are looking at a line of code and setting a flag to
+ # describe any special processing that it requires
+
+ # Possible CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'DEL'=Delete this line
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
+
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+
+ my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
+ my $no_internal_newlines = 1 - $rOpts_add_newlines;
+ if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
+
+ # extract what we need for this line..
+
+ # Global value for error messages:
+ $input_line_number = $line_of_tokens->{_line_number};
+
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $jmax = -1;
+ if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
+ my $input_line = $line_of_tokens->{_line_text};
+ my $in_continued_quote = my $starting_in_quote =
+ $line_of_tokens->{_starting_in_quote};
+ my $in_quote = $line_of_tokens->{_ending_in_quote};
+ my $ending_in_quote = $in_quote;
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+
+ my $is_static_block_comment = 0;
+
+ # Handle a continued quote..
+ if ($in_continued_quote) {
+
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( ( $input_line =~ "\t" ) ) {
+ note_embedded_tab();
+ }
+ $Last_line_had_side_comment = 0;
+ return 'VB';
+ }
+ }
+
+ my $is_block_comment =
+ ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
+
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
+ $Last_line_had_side_comment = 0;
+
+ # Note: extra space appended to comment simplifies pattern matching
+ if ( $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_end/o )
+ {
+ $In_format_skipping_section = 0;
+ write_logfile_entry("Exiting formatting skip section\n");
+ }
+ return 'FS';
+ }
+
+ # See if we are entering a formatting skip section
+ if ( $rOpts_format_skipping
+ && $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_begin/o )
+ {
+ $In_format_skipping_section = 1;
+ write_logfile_entry("Entering formatting skip section\n");
+ $Last_line_had_side_comment = 0;
+ return 'FS';
+ }
+
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
+ }
+
+ # Handle a blank line..
+ if ( $jmax < 0 ) {
+ $Last_line_had_side_comment = 0;
+ return 'BL';
+ }
+
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment_without_leading_space = 0;
+ if ( $is_block_comment
+ && $rOpts->{'static-block-comments'}
+ && $input_line =~ /$static_block_comment_pattern/o )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space =
+ substr( $input_line, 0, 1 ) eq '#';
+ }
+
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $is_block_comment
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space = 1;
+ }
+
+ # look for hanging side comment
+ if (
+ $is_block_comment
+ && $Last_line_had_side_comment # last line had side comment
+ && $input_line =~ /^\s/ # there is some leading space
+ && !$is_static_block_comment # do not make static comment hanging
+ && $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
+ )
+ {
+ $Last_line_had_side_comment = 1;
+ return 'HSC';
+ }
+
+ # remember if this line has a side comment
+ $Last_line_had_side_comment =
+ ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
+
+ # Handle a block (full-line) comment..
+ if ($is_block_comment) {
+
+ if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
+
+ # TRIM COMMENTS -- This could be turned off as a option
+ $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
+
+ if ($is_static_block_comment_without_leading_space) {
+ return 'SBCX';
+ }
+ elsif ($is_static_block_comment) {
+ return 'SBC';
+ }
+ else {
+ return 'BC';
+ }
+ }
+
+=pod
+ # NOTE: This does not work yet. Version in print-line-of-tokens
+ # is Still used until fixed
+
+ # compare input/output indentation except for continuation lines
+ # (because they have an unknown amount of initial blank space)
+ # and lines which are quotes (because they may have been outdented)
+ # Note: this test is placed here because we know the continuation flag
+ # at this point, which allows us to avoid non-meaningful checks.
+ my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
+ compare_indentation_levels( $guessed_indentation_level,
+ $structural_indentation_level )
+ unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
+ || $guessed_indentation_level == 0
+ && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
+=cut
+
+ # Patch needed for MakeMaker. Do not break a statement
+ # in which $VERSION may be calculated. See MakeMaker.pm;
+ # this is based on the coding in it.
+ # The first line of a file that matches this will be eval'd:
+ # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
+ # Examples:
+ # *VERSION = \'1.01';
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # We will pass such a line straight through without breaking
+ # it unless -npvl is used.
+
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
+
+ my $is_VERSION_statement = 0;
+ if ( !$Saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
+ {
+ $Saw_VERSION_in_this_file = 1;
+ write_logfile_entry("passing VERSION line; -npvl deactivates\n");
+ $CODE_type = 'VER';
+ }
+ return $CODE_type;
+ }
+}
+
+sub find_nested_pairs {
+ my $self = shift;
+
+ my $rLL = $self->{rLL};
+ return unless ( defined($rLL) && @{$rLL} );
+
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
+
+ # We also set the following hash values to identify container pairs for
+ # which the opening and closing tokens are adjacent in the token stream:
+ # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
+ # $seqno_in are the seqence numbers of the outer and inner containers of
+ # the pair We need these later to decide if we can insert a missing
+ # semicolon
+ my $rpaired_to_inner_container = {};
+
+ # This local hash remembers if an outer container has a close following
+ # inner container;
+ # The key is the outer sequence number
+ # The value is the token_hash of the inner container
+
+ my %has_close_following_opening;
+
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
+
+ my $is_name = sub {
+ my $type = shift;
+ return $type && $is_name_type->{$type};
+ };
+
+ my $last_container;
+ my $last_last_container;
+ my $last_nonblank_token_vars;
+ my $last_count;
+
+ my $nonblank_token_count = 0;
+
+ # loop over all tokens
+ foreach my $rtoken_vars ( @{$rLL} ) {
+
+ my $type = $rtoken_vars->[_TYPE_];
+
+ next if ( $type eq 'b' );
+
+ # long identifier-like items are counted as a single item
+ $nonblank_token_count++
+ unless ( $is_name->($type)
+ && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
+
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+
+ my $token = $rtoken_vars->[_TOKEN_];
+
+ if ( $is_opening_token{$token} ) {
+
+ # following previous opening token ...
+ if ( $last_container
+ && $is_opening_token{ $last_container->[_TOKEN_] } )
+ {
+
+ # adjacent to this one
+ my $tok_diff = $nonblank_token_count - $last_count;
+
+ my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
+
+ if ( $tok_diff == 1
+ || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
+ {
+
+ # remember this pair...
+ my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
+ my $inner_seqno = $type_sequence;
+ $has_close_following_opening{$outer_seqno} =
+ $rtoken_vars;
+ }
+ }
+ }
+
+ elsif ( $is_closing_token{$token} ) {
+
+ # if the corresponding opening token had an adjacent opening
+ if ( $has_close_following_opening{$type_sequence}
+ && $is_closing_token{ $last_container->[_TOKEN_] }
+ && $has_close_following_opening{$type_sequence}
+ ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
+ {
+
+ # The closing weld tokens must be adjacent
+ # NOTE: so intermediate commas and semicolons
+ # can currently block a weld. This is something
+ # that could be fixed in the future by including
+ # a flag to delete un-necessary commas and semicolons.
+ my $tok_diff = $nonblank_token_count - $last_count;
+
+ if ( $tok_diff == 1 ) {
+
+ # This is a closely nested pair ..
+ my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
+ my $outer_seqno = $type_sequence;
+ $rpaired_to_inner_container->{$outer_seqno} =
+ $inner_seqno;
+
+ push @nested_pairs, [ $inner_seqno, $outer_seqno ];
+ }
+ }
+ }
+
+ $last_last_container = $last_container;
+ $last_container = $rtoken_vars;
+ $last_count = $nonblank_token_count;
+ }
+ $last_nonblank_token_vars = $rtoken_vars;
+ }
+ $self->{rnested_pairs} = \@nested_pairs;
+ $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
+ return;
+}
+
+sub dump_tokens {
+
+ # a debug routine, not normally used
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->{rLL};
+ my $nvars = @{$rLL};
+ print STDERR "$msg\n";
+ print STDERR "ntokens=$nvars\n";
+ print STDERR "K\t_TOKEN_\t_TYPE_\n";
+ my $K = 0;
+ foreach my $item ( @{$rLL} ) {
+ print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
+ $K++;
+ }
+}
+
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
+
+ # return the index K of the next nonblank token
+ return unless ( defined($KK) && $KK >= 0 );
+ $rLL = $self->{rLL} unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb");
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
+ }
+ return;
+}
+
+sub K_previous_nonblank {
+
+ # return index of previous nonblank token before item K
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
+ $rLL = $self->{rLL} unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
+
+ # The caller should make the first call with KK_new=undef to
+ # avoid this error
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ );
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
+ return;
+}
+
+sub weld_containers {
+
+ # do any welding operations
+ my $self = shift;
+
+ # initialize weld length hashes needed later for checking line lengths
+ # TODO: These should eventually be stored in $self rather than be package vars
+ %weld_len_left_closing = ();
+ %weld_len_right_closing = ();
+ %weld_len_left_opening = ();
+ %weld_len_right_opening = ();
+
+ return if ( $rOpts->{'indent-only'} );
+ return unless ($rOpts_add_newlines);
+
+ $self->weld_nested_containers()
+ if $rOpts->{'weld-nested-containers'};
+
+ # Note that these two calls are order-dependent.
+ # sub weld_nested_containers() must be called before sub
+ # weld_cuddled_blocks(). This is because it is more complex and could
+ # overwrite the %weld_len_... hash values written by weld_cuddled_blocks().
+ # sub weld_cuddled_blocks(), on the other hand, is much simpler and will
+ # not overwrite the values written by weld_nested_containers. But
+ # note that weld_nested_containers() changes the _LEVEL_ values, so
+ # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
+
+ # Here is a good test case to Be sure that both cuddling and welding
+ # are working and not interfering with each other:
+
+ # perltidy -wn -cb -cbl='if-elsif-else'
+
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
+
+ $self->weld_cuddled_blocks()
+ if $rOpts->{'cuddled-blocks'};
+
+ return;
+}
+
+sub weld_cuddled_blocks {
+ my $self = shift;
+
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+
+ my $rLL = $self->{rLL};
+ return unless ( defined($rLL) && @{$rLL} );
+ my $rbreak_container = $self->{rbreak_container};
+
+ my $K_opening_container = $self->{K_opening_container};
+ my $K_closing_container = $self->{K_closing_container};
+
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+
+ my $is_broken_block = sub {
+
+ # a block is broken if the input line numbers of the braces differ
+ # we can only cuddle between broken blocks
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
+
+ # A stack to remember open chains at all levels:
+ # $in_chain[$level] = [$chain_type, $type_sequence];
+ my @in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
+
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KK = 0;
+ while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ Fault("sequence = $type_sequence not defined");
+ }
+
+ # We use the original levels because they get changed by sub
+ # 'weld_nested_containers'. So if this were to be called before that
+ # routine, the levels would be wrong and things would go bad.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_TRUE_];
+
+ if ( $level < $last_level ) { $in_chain[$last_level] = undef }
+ elsif ( $level > $last_level ) { $in_chain[$level] = undef }
+
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+
+ if ( $token eq '{' ) {
+
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ if ( !$block_type ) {
+
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
+ }
+ if ( $in_chain[$level] ) {
+
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain[$level]->[1];
+ my $opening_seqno = $type_sequence;
+
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$is_broken_block->($closing_seqno) ) {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
+
+ # we will let the trailing block be either broken or intact
+ ## && $is_broken_block->($opening_seqno);
+
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon = $self->K_next_nonblank($Ko);
+
+ # ..unless it is a comment
+ if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
+ my $dlen =
+ $rLL->[ $Kon + 1 ]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Ko]->[_CUMULATIVE_LENGTH_];
+ $weld_len_right_closing{$closing_seqno} = $dlen;
+
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
+ }
+
+ }
+ else {
+
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain[$level] = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain[$level] = [ $block_type, $type_sequence ];
+ }
+ }
+ }
+ elsif ( $token eq '}' ) {
+ if ( $in_chain[$level] ) {
+
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_nonblank($KK);
+
+ # skip past comments
+ while ( $Knn && $rLL->[$Knn]->[_TYPE_] eq '#' ) {
+ $Knn = $self->K_next_nonblank($Knn);
+ }
+ next unless $Knn;
+
+ my $chain_type = $in_chain[$level]->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
+
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain[$level]->[1] = $type_sequence;
+ }
+ else { $in_chain[$level] = undef }
+ }
+ }
+ }
+
+ return;
+}
+
+sub weld_nested_containers {
+ my $self = shift;
+
+ # This routine implements the -wn flag by "welding together"
+ # the nested closing and opening tokens which were previously
+ # identified by sub 'find_nested_pairs'. "welding" simply
+ # involves setting certain hash values which will be checked
+ # later during formatting.
+
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->get_rLL_max_index();
+ my $rnested_pairs = $self->{rnested_pairs};
+ my $rlines = $self->{rlines};
+ my $K_opening_container = $self->{K_opening_container};
+ my $K_closing_container = $self->{K_closing_container};
+
+ # Return unless there are nested pairs to weld
+ return unless defined($rnested_pairs) && @{$rnested_pairs};
+
+ # This array will hold the sequence numbers of the tokens to be welded.
+ my @welds;
+
+ # Variables needed for estimating line lengths
+ my $starting_indent;
+ my $starting_lentot;
+
+ # A tolerance to the length for length estimates. In some rare cases
+ # this can avoid problems where a final weld slightly exceeds the
+ # line length and gets broken in a bad spot.
+ my $length_tol = 1;
+
+ my $excess_length_to = sub {
+ my ($rtoken_hash) = @_;
+
+ # Estimate the length from the line start to a given token
+ my $length = $rtoken_hash->[_CUMULATIVE_LENGTH_] - $starting_lentot;
+
+ my $excess_length =
+ $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
+ return ($excess_length);
+ };
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+
+ # Abbreviations:
+ # _oo=outer opening, i.e. first of { {
+ # _io=inner opening, i.e. second of { {
+ # _oc=outer closing, i.e. second of } {
+ # _ic=inner closing, i.e. first of } }
+
+ my $previous_pair;
+
+ # We are working from outermost to innermost pairs so that
+ # level changes will be complete when we arrive at the inner pairs.
+
+ while ( my $item = pop( @{$rnested_pairs} ) ) {
+ my ( $inner_seqno, $outer_seqno ) = @{$item};
+
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $Kinner_opening = $K_opening_container->{$inner_seqno};
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $K_closing_container->{$inner_seqno};
+
+ my $outer_opening = $rLL->[$Kouter_opening];
+ my $inner_opening = $rLL->[$Kinner_opening];
+ my $outer_closing = $rLL->[$Kouter_closing];
+ my $inner_closing = $rLL->[$Kinner_closing];
+
+ my $iline_oo = $outer_opening->[_LINE_INDEX_];
+ my $iline_io = $inner_opening->[_LINE_INDEX_];
+
+ # Set flag saying if this pair starts a new weld
+ my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
+
+ # Set flag saying if this pair is adjacent to the previous nesting pair
+ # (even if previous pair was rejected as a weld)
+ my $touch_previous_pair =
+ defined($previous_pair) && $outer_seqno == $previous_pair->[0];
+ $previous_pair = $item;
+
+ # Set a flag if we should not weld. It sometimes looks best not to weld
+ # when the opening and closing tokens are very close. However, there
+ # is a danger that we will create a "blinker", which oscillates between
+ # two semi-stable states, if we do not weld. So the rules for
+ # not welding have to be carefully defined and tested.
+ my $do_not_weld;
+ if ( !$touch_previous_pair ) {
+
+ # If this pair is not adjacent to the previous pair (skipped or
+ # not), then measure lengths from the start of line of oo
+
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ $starting_lentot = $rLL->[$Kfirst]->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
+
+ # DO-NOT-WELD RULE 1:
+ # Do not weld something that looks like the start of a two-line
+ # function call, like this:
+ # $trans->add_transformation(
+ # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
+ # We will look for a semicolon after the closing paren.
+
+ # We want to weld something complex, like this though
+ # my $compass = uc( opposite_direction( line_to_canvas_direction(
+ # @{ $coords[0] }, @{ $coords[1] } ) ) );
+ # Otherwise we will get a 'blinker'
+
+ my $iline_oc = $outer_closing->[_LINE_INDEX_];
+ if ( $iline_oc <= $iline_oo + 1 ) {
+
+ # Look for following semicolon...
+ my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
+ my $next_nonblank_type =
+ defined($Knext_nonblank)
+ ? $rLL->[$Knext_nonblank]->[_TYPE_]
+ : 'b';
+ if ( $next_nonblank_type eq ';' ) {
+
+ # Then do not weld if no other containers between inner
+ # opening and closing.
+ my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
+ if ( $Knext_seq_item == $Kinner_closing ) {
+ $do_not_weld ||= 1;
+ }
+ }
+ }
+ }
+
+ my $iline_ic = $inner_closing->[_LINE_INDEX_];
+
+ # DO-NOT-WELD RULE 2:
+ # Do not weld an opening paren to an inner one line brace block
+ # We will just use old line numbers for this test and require
+ # iterations if necessary for convergence
+
+ # For example, otherwise we could cause the opening paren
+ # in the following example to separate from the caller name
+ # as here:
+
+ # $_[0]->code_handler
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
+
+ # Here is another example where we do not want to weld:
+ # $wrapped->add_around_modifier(
+ # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+
+ # If the one line sub block gets broken due to length or by the
+ # user, then we can weld. The result will then be:
+ # $wrapped->add_around_modifier( sub {
+ # push @tracelog => 'around 1';
+ # $_[0]->();
+ # } );
+
+ if ( $iline_ic == $iline_io ) {
+
+ my $token_oo = $outer_opening->[_TOKEN_];
+ my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
+ my $token_io = $inner_opening->[_TOKEN_];
+ $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
+ }
+
+ # DO-NOT-WELD RULE 3:
+ # Do not weld if this makes our line too long
+ $do_not_weld ||= $excess_length_to->($inner_opening) > 0;
+
+ if ($do_not_weld) {
+
+ # After neglecting a pair, we start measuring from start of point io
+ $starting_lentot = $inner_opening->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $inner_opening->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
+
+ # Normally, a broken pair should not decrease indentation of
+ # intermediate tokens:
+ ## if ( $last_pair_broken ) { next }
+ # However, for long strings of welded tokens, such as '{{{{{{...'
+ # we will allow broken pairs to also remove indentation.
+ # This will keep very long strings of opening and closing
+ # braces from marching off to the right. We will do this if the
+ # number of tokens in a weld before the broken weld is 4 or more.
+ # This rule will mainly be needed for test scripts, since typical
+ # welds have fewer than about 4 welded tokens.
+ if ( !@welds || @{ $welds[-1] } < 4 ) { next }
+ }
+
+ # otherwise start new weld ...
+ elsif ($starting_new_weld) {
+ push @welds, $item;
+ }
+
+ # ... or extend current weld
+ else {
+ unshift @{ $welds[-1] }, $inner_seqno;
+ }