From 017fd07c0dd21eb34f5829a560189933affc4e5b Mon Sep 17 00:00:00 2001 From: Steve Hancock Date: Sat, 17 Oct 2020 07:04:32 -0700 Subject: [PATCH] fixed problem parsing multi-line signatures with comments --- lib/Perl/Tidy/Formatter.pm | 18 +++++++++++--- lib/Perl/Tidy/Tokenizer.pm | 50 +++++++++++++++++++++++++++----------- local-docs/BugLog.pod | 27 ++++++++++++++++++++ 3 files changed, 78 insertions(+), 17 deletions(-) diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 5b96965f..ebcda5e8 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -5883,6 +5883,7 @@ sub find_nested_pairs { # if ( !Boucherot::SetOfConnections->new->handler->execute( # ^--K_o_o ^--K_i_o # @array) ) + my $Kn_first = $K_outer_opening; for ( my $Kn = $K_outer_opening + 1 ; $Kn <= $K_inner_opening ; @@ -5890,6 +5891,7 @@ sub find_nested_pairs { ) { next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' ); + if ( !$nonblank_count ) { $Kn_first = $Kn } if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; } # skip chain of identifier tokens @@ -5903,9 +5905,19 @@ sub find_nested_pairs { last if ( $nonblank_count > 2 ); } - if ( $nonblank_count == 1 - || $nonblank_count == 2 - && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' ) + if ( + + # adjacent opening containers, like: do {{ + $nonblank_count == 1 + + # short item following opening paren, like: fun( yyy ( + || ( $nonblank_count == 2 + && $rLL->[$K_outer_opening]->[_TOKEN_] eq '(' ) + + # anonymous sub + prototype or sig: )->then( sub ($code) { + || ( $rLL->[$K_inner_opening]->[_BLOCK_TYPE_] eq 'sub' + && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub' ) + ) { push @nested_pairs, [ $inner_seqno, $outer_seqno, $K_inner_closing ]; diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 502a70ff..9375abac 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -4510,6 +4510,17 @@ sub operator_expected { { $op_expected = OPERATOR; + # Patch: The following snippet from 'signatures.t' splits the $ from + # the variable name with a side comment. To avoid an error message we + # can mark this special case as UNKNOWN. + # sub t086 + # ( #foo))) + # $ #foo))) + # a #foo))) <-This 'a' is split from its $ + # ) #foo))) + # { $a.$b } + if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN } + # in a 'use' statement, numbers and v-strings are not true # numbers, so to avoid incorrect error messages, we will # mark them as unknown for now (use.t) @@ -5434,14 +5445,18 @@ sub guess_if_pattern_or_division { if ($in_quote) { - # we didn't find an ending / on this line, so we bias towards division + # we didn't find an ending / on this line, so we bias towards + # division if ( $divide_expected >= 0 ) { $is_pattern = 0; $msg .= "division (no ending / on this line)\n"; } else { - # going down the rabbit hole... + # assuming a multi-line pattern ... this is risky, but division + # does not seem possible. If this fails, it would either be due + # to a syntax error in the code, or the division_expected logic + # needs to be fixed. $msg = "multi-line pattern (division not possible)\n"; $is_pattern = 1; } @@ -6043,8 +6058,7 @@ sub scan_identifier_do { my $in_prototype_or_signature = $container_type =~ /^sub\b/; # these flags will be used to help figure out the type: - ##my $saw_alpha = ( $tok =~ /^\w/ ); # This was slow - my $saw_alpha; + my $saw_alpha; my $saw_type; # allow old package separator (') except in 'use' statement @@ -6085,7 +6099,7 @@ sub scan_identifier_do { } elsif ( $tok =~ /^\w/ ) { $id_scan_state = ':'; - $saw_alpha = 1; + $saw_alpha = 1; } elsif ( $tok eq '->' ) { $id_scan_state = '$'; @@ -6113,7 +6127,6 @@ sub scan_identifier_do { my $i_save = $i; while ( $i < $max_token_index ) { - ##$i_save = $i unless ( $tok =~ /^\s*$/ ); # This was a slow statement if ($tok_is_blank) { $tok_is_blank = undef } else { $i_save = $i } @@ -6171,9 +6184,17 @@ sub scan_identifier_do { } # $# and POSTDEFREF ->$# - elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array + elsif ( + ( $tok eq '#' ) + && ( $identifier =~ /\$$/ ) + + # a # inside a prototype or signature can only start a comment + && !$in_prototype_or_signature + ) + { # $#array $identifier .= $tok; # keep same state, a $ could follow } + elsif ( $tok eq '{' ) { # check for something like ${#} or ${©} @@ -6262,7 +6283,7 @@ sub scan_identifier_do { } else { # something else - if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) { + if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) { $id_scan_state = ''; $i = $i_save; $type = 'i'; # probably punctuation variable @@ -6337,12 +6358,10 @@ sub scan_identifier_do { $id_scan_state = ':'; # now need :: $saw_alpha = 1; } - ##elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { elsif ( $tok_is_blank && $identifier =~ /^sub / ) { $id_scan_state = '('; $identifier .= $tok; } - ##elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { elsif ( $tok eq '(' && $identifier =~ /^sub / ) { $id_scan_state = ')'; $identifier .= $tok; @@ -6381,12 +6400,10 @@ sub scan_identifier_do { $identifier .= $tok; } } - ##elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) { elsif ( $tok_is_blank && $identifier =~ /^sub / ) { $id_scan_state = '('; $identifier .= $tok; } - ##elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) { elsif ( $tok eq '(' && $identifier =~ /^sub / ) { $id_scan_state = ')'; $identifier .= $tok; @@ -6658,9 +6675,15 @@ sub scan_identifier_do { # does not look like a prototype, we assume it is a SIGNATURE and we # will stop and let the the standard tokenizer handle it. In # particular, we stop if we see any nested parens, braces, or commas. + # Note, a valid prototype cannot contain any alphabetic character + # see https://perldoc.perl.org/perlsub + # But it appears that an underscore may be valid now my $saw_opening_paren = $input_line =~ /\G\s*\(/; if ( - $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO + ## FIXME: this should be the future version after some + ## problems are resolved + ## $input_line =~ m/\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))? # PROTO + $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))? # PROTO (\s*:)? # ATTRS leading ':' /gcx && ( $1 || $2 ) @@ -8446,4 +8469,3 @@ BEGIN { @is_keyword{@Keywords} = (1) x scalar(@Keywords); } 1; - diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod index 55a1a093..1f0b7229 100644 --- a/local-docs/BugLog.pod +++ b/local-docs/BugLog.pod @@ -1,5 +1,32 @@ =head1 Issues fixed after release 20201001 +=item b + +Problems with parsing prototypes and signatures were found during testing and +fixed. For example the following snippet was mis-parsed because of the hash +mark. + + sub test ( # comment ))) + $x, $x) { $x+$y } + + +Complex signature expressions such as the following are now parsed without +error: + + sub t086 + ( #foo))) + $ #foo))) + a #foo))) + ) #foo))) + { $a.$b } + +The parenthesized expression in the snippet below was parsed as a prototype +rather than a signature. This was fixed. + + sub echo ( $message = 'Hello World!' ) { + print "$message\n"; + } + =item b The following line caused a tokenization error in which the two slashes -- 2.39.5