# simple as adding your new letter to @spaces_both_sides, for
# example.
- # fix for c250: added space rules new package type 'P'
+ # fix for c250: added space rules new package type 'P' and sub type 'S'
my @spaces_both_sides = qw#
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
- &&= ||= //= <=> A k f w F n C Y U G v P
+ &&= ||= //= <=> A k f w F n C Y U G v P S
#;
my @spaces_left_side = qw<
$right_bond_strength{'CORE::'} = NO_BREAK;
# Fix for c250: added strengths for new type 'P'
+ # Note: these are working okay, but may eventually need to be
+ # adjusted or even removed.
$left_bond_strength{'P'} = NOMINAL;
$right_bond_strength{'P'} = NOMINAL;
$binary_bond_strength{'t'}{'L{'} = NO_BREAK;
$binary_bond_strength{'i'}{'L{'} = NO_BREAK;
+ # Fix for c250: set strength for new 'S' to be same as 'i'
+ # testfile is test11/Hub.pm
+ $binary_bond_strength{'S'}{'L{'} = NO_BREAK;
+
# As a defensive measure, do not break between a '(' and a
# filehandle. In some cases, this can cause an error. For
# example, the following program works:
my %wU;
my %wiq;
-my %is_witP;
+my %is_witPS;
my %is_sigil;
my %is_nonlist_keyword;
my %is_nonlist_type;
@q = qw(w i q Q G C Z);
@{wiq}{@q} = (1) x scalar(@q);
- @q = qw(w i t P); # Fix for c250: added new type 'P', formerly 'i'
- @{is_witP}{@q} = (1) x scalar(@q);
+ @q = qw(w i t P S); # Fix for c250: added new types 'P', 'S', formerly 'i'
+ @{is_witPS}{@q} = (1) x scalar(@q);
@q = qw($ & % * @);
@{is_sigil}{@q} = (1) x scalar(@q);
# Modify certain tokens here for whitespace
# The following is not yet done, but could be:
# sub (x x x)
- # ( $type =~ /^[wit]$/ )
- elsif ( $is_witP{$type} ) {
+ # ( $type =~ /^[witPS]$/ )
+ elsif ( $is_witPS{$type} ) {
# index() is several times faster than a regex test with \s here
## $token =~ /\s/
}
}
- # Trim certain spaces in identifiers
- if ( $type eq 'i' ) {
-
- if ( $token =~ /$SUB_PATTERN/ ) {
-
- # -spp = 0 : no space before opening prototype paren
- # -spp = 1 : stable (follow input spacing)
- # -spp = 2 : always space before opening prototype paren
- if ( !defined($rOpts_space_prototype_paren)
- || $rOpts_space_prototype_paren == 1 )
- {
- ## default: stable
- }
- elsif ( $rOpts_space_prototype_paren == 0 ) {
- $token =~ s/\s+\(/\(/;
- }
- elsif ( $rOpts_space_prototype_paren == 2 ) {
- $token =~ s/\(/ (/;
- }
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ my $ord_ch = ord( substr( $token, -1, 1 ) );
+ if (
- # one space max, and no tabs
- $token =~ s/\s+/ /g;
- $rtoken_vars->[_TOKEN_] = $token;
+ # quick check for possible ending space
+ $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
+ || $ord_ch > ORD_PRINTABLE_MAX )
+ )
+ {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
- $self->[_ris_special_identifier_token_]->{$token} =
- 'sub';
+ # Fixed for c250 to use 'S' for sub definitions
+ if ( $type eq 'S' ) {
+ # -spp = 0 : no space before opening prototype paren
+ # -spp = 1 : stable (follow input spacing)
+ # -spp = 2 : always space before opening prototype paren
+ if ( !defined($rOpts_space_prototype_paren)
+ || $rOpts_space_prototype_paren == 1 )
+ {
+ ## default: stable
+ }
+ elsif ( $rOpts_space_prototype_paren == 0 ) {
+ $token =~ s/\s+\(/\(/;
+ }
+ elsif ( $rOpts_space_prototype_paren == 2 ) {
+ $token =~ s/\(/ (/;
}
- # trim identifiers of trailing blanks which can occur
- # under some unusual circumstances, such as if the
- # identifier 'witch' has trailing blanks on input here:
- #
- # sub
- # witch
- # () # prototype may be on new line ...
- # ...
- my $ord_ch = ord( substr( $token, -1, 1 ) );
- if (
+ # one space max, and no tabs
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
- # quick check for possible ending space
- $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
- || $ord_ch > ORD_PRINTABLE_MAX )
- )
- {
- $token =~ s/\s+$//g;
- $rtoken_vars->[_TOKEN_] = $token;
- }
+ $self->[_ris_special_identifier_token_]->{$token} = 'sub';
}
# and trim spaces in package statements (added for c250)
}
# blank lines before subs except declarations and one-liners
- # Fix for c250: added new type 'P'
- elsif ( $leading_type eq 'i' || $leading_type eq 'P' ) {
+ # Fix for c250: added new type 'P', changed 'i' to 'S'
+ elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) {
my $special_identifier =
$self->[_ris_special_identifier_token_]->{$leading_token};
if ($special_identifier) {
# Do not separate an isolated bare word from an opening paren.
# Alternate Fix #2 for issue b1299. This waits as long as possible
# to make the decision.
+ # Note for fix #c250: to keep line breaks unchanged under -extrude when
+ # switching from 'i' to 'S' for subs, we would have to also check 'S', i.e.
+ # =~/^[Si]$/. But this was never necessary at a sub signature, so we leave
+ # it alone and allow the new version to be different for --extrude. For a
+ # test file run perl527/signatures.t with --extrude.
if ( $types_to_go[$i_begin] eq 'i'
&& substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
{
# always open comma lists not preceded by keywords,
# barewords, identifiers (that is, anything that doesn't
# look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+ # c250: added new sub identifier type 'S'
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiUS]$/;
$self->table_maker(
{
# Added 'package' (can be 'class') for --use-feature=class (rt145706)
if ( substr( $statement_type, 0, 3 ) eq 'sub' ) {
$last_nonblank_token = $statement_type;
- $last_nonblank_type = 'i';
+ $last_nonblank_type = 'S'; # c250 change
$statement_type = EMPTY_STRING;
}
elsif ( substr( $statement_type, 0, 7 ) eq 'package' ) {
# this pre-token will start an output token
push( @{$routput_token_list}, $i_tok );
- # The search for the full token ends in one of 5 main END NODES
+ # The search for the full token ends in one of 5 main END NODES:
#-----------------------
# END NODE 1: whitespace
next;
}
- #-----------------------------
- # END NODE 5: all other tokens
- #-----------------------------
+ #------------------------------------------
+ # END NODE 5: everything else (punctuation)
+ #------------------------------------------
my $code = $tokenization_code->{$tok};
if ($code) {
$code->($self);
# note: this is identical to '@value_requestor_type' defined later.
# Fix for c250: add new type 'P' for package (expecting VERSION or {}
# after package NAMESPACE, so expecting TERM)
+ # Fix for c250: add new type 'S' for sub (not expecting operator)
my @q = qw(
- ; ! + x & ? F J - p / Y : % f U ~ A G j L P * . | ^ < = [ m { \ > t
+ ; ! + x & ? F J - p / Y : % f U ~ A G j L P S * . | ^ < = [ m { \ > t
|| >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /=
&= // >> ~. &. |. ^.
... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
# 'i' is currently excluded because it might be a package
# 'q' is currently excluded because it might be a prototype
# Fix for c030: removed '->' from this list:
- # Fix for c250: added 'i' after new type 'P' added
+ # Fix for c250: added 'i' because new type 'P' was added
@q = qw( -- C h R ++ ] Q <> i ); ## n v q );
push @q, ')';
@{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
# Types 'n', 'v', 'q' also depend on context.
# identifier...
- # Fix for c250: type 'i' and new type 'P' are in the hash table now
+ # Fix for c250: removed coding for type 'i' because 'i' and new type 'P'
+ # are now done by hash table lookup
# keyword...
if ( $last_nonblank_type eq 'k' ) {
}
# or a sub or package BLOCK
- # Fixed for c250 to include new package type 'P'
- # FIXME: this could use optimization
+ # Fixed for c250 to include new package type 'P', and change 'i' to 'S'
elsif (
- (
- $last_nonblank_type eq 'i'
- || $last_nonblank_type eq 't'
- || $last_nonblank_type eq 'P'
- )
- && $last_nonblank_token =~ /^(sub|package)\b/
+ $last_nonblank_type eq 'P'
+ || $last_nonblank_type eq 'S'
+ || ( $last_nonblank_type eq 't'
+ && substr( $last_nonblank_token, 0, 3 ) eq 'sub' )
)
{
return $last_nonblank_token;
}
# or a sub alias
+ # FIXME: see if this is really needed after the c250 update
elsif (( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
&& ( $is_sub{$last_nonblank_token} ) )
{
my $pos = pos($input_line);
my $numc = $pos - $pos_beg;
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
- $type = 'i';
+ $type = 'S'; ## Fix for c250, was 'i';
# remember the sub name in case another call is needed to
# get the prototype
# Patch part #1 to fixes cases b994 and b1053:
# Mark an anonymous sub keyword without prototype as type 'k', i.e.
# 'sub : lvalue { ...'
- $type = 'i';
+ $type = 'S'; ## C250, was 'i';
if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
}
C user-defined constant or constant function (with void prototype = ())
U user-defined function taking parameters
G user-defined function taking block parameter (like grep/map/eval)
- M (unused, but reserved for subroutine definition name)
- P package definition
+ S sub definition (reported as type 'i' in older versions)
+ P package definition (reported as type 'i' in older versions)
t type indicater such as %,$,@,*,&,sub
w bare word (perhaps a subroutine call)
i identifier of some type (with leading %, $, @, *, &, sub, -> )
# make a hash of all valid token types for self-checking the tokenizer
# (adding NEW_TOKENS : select a new character and add to this list)
- # fix for c250: added new token type 'P'
+ # fix for c250: added new token type 'P' and 'S'
my @valid_token_types = qw#
- A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P
+ A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v P S
{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
#;
push( @valid_token_types, @digraphs );