From: Steve Hancock <perltidy@users.sourceforge.net> Date: Sun, 5 Sep 2021 19:55:33 +0000 (-0700) Subject: fix unusual parsing issue, c065 X-Git-Tag: 20210717.02~8 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=f242f784abb09457ca39ff722dbbed038fd76ac0;p=perltidy.git fix unusual parsing issue, c065 --- diff --git a/BUGS.md b/BUGS.md index a64b8ae1..c88aa6e4 100644 --- a/BUGS.md +++ b/BUGS.md @@ -29,15 +29,6 @@ it part of the standard Perl distribution. But for example the following line which uses double brackets to contain single brackets does not render correctly. -## Two iterations are sometimes needed - -Usually the code produced by perltidy on the first pass does not change if it -is run again, but sometimes a second pass will produce some small additional -change. This mainly happens if a major style change is made, particularly when -perltidy is untangling complex ternary statements. Use the iteration parameter -**-it=2** if it is important that the results be unchanged on subsequent passes, -but note that this doubles the run time. - ## Perltidy does not look for here-document targets inside of quoted strings For example, consider the following script diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 9eb7364f..1ff1641d 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -2661,12 +2661,6 @@ sub set_whitespace_flags { $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; } - if ( !defined($ws) ) { - $ws = 0; - write_diagnostics( - "WS flag is undefined for tokens $last_token $token\n"); - } - # Treat newline as a whitespace. Otherwise, we might combine # 'Send' and '-recipients' here according to the above rules: # <<snippets/space3.in>> @@ -6446,12 +6440,18 @@ sub respace_tokens { } } - # patch to add space to something like "x10" - # This avoids having to split this token in the pre-tokenizer + # Old patch to add space to something like "x10". + # Note: This is now done in the Tokenizer, but this code remains + # for reference. elsif ( $type eq 'n' ) { if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) { $token =~ s/x/x /; $rtoken_vars->[_TOKEN_] = $token; + if (DEVEL_MODE) { + Fault(<<EOM); +Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer +EOM + } } } diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index 354c8154..c986c17a 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -23,6 +23,9 @@ use strict; use warnings; our $VERSION = '20210717.01'; +# this can be turned on for extra checking during development +use constant DEVEL_MODE => 0; + use Perl::Tidy::LineBuffer; use Carp; @@ -1585,6 +1588,42 @@ sub prepare_for_a_new_file { return; } + sub split_current_pretoken { + + # Split the current pretoken at index $i into two parts. + # $numc = number of characters in the first part; must be fewer than + # the number of characters in the pretoken. + # i.e., numc=1 to split off just the first character. + # + # The part we split will become the current token; the remainder will + # be appear as the subsequent token. + + # returns undef if error + # returns new initial token if successful + + my ($numc) = @_; + + # Do not try to split more characters than we have + if ( !$tok || $numc >= length($tok) ) { + my $len = length($tok); + if (DEVEL_MODE) { + Die(<<EOM); +Code bug: bad call to 'split_current_pretoken': numc=$numc >= len=$len at token='$tok' +EOM + } + return; + } + my $tok_new = substr( $tok, 0, $numc ); + my $new_pos = $rtoken_map->[$i] + $numc; + splice @{$rtoken_map}, $i + 1, 0, $new_pos; + splice @{$rtokens}, $i + 1, 0, substr( $tok, $numc ); + splice @{$rtoken_type}, $i + 1, 0, 'd'; + $tok = $tok_new; + $rtokens->[$i] = $tok_new; + $max_token_index++; + return $tok_new; + } + sub get_indentation_level { # patch to avoid reporting error if indented if is not terminated @@ -1712,6 +1751,23 @@ sub prepare_for_a_new_file { ( $i, $tok, $type, $id_scan_state, $identifier ) = scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens, $max_token_index, $expecting, $paren_type[$paren_depth] ); + + # Check for something like a keyword joined to a special variable, like + # '$^One$0'. This is very rare and tricky to program around, so + # we issue a warning message and let the user fix it by hand. + if ( $tok && length($tok) > 3 && substr( $tok, 1, 1 ) eq '^' ) { + my $sigil = substr( $tok, 0, 1 ); + if ( $sigil =~ /^[\$\&\%\*\@]$/ ) { + my $var = substr( $tok, 0, 3 ); + my $excess = substr( $tok, 3 ); + interrupt_logfile(); + warning(<<EOM); +$input_line_number: Unexpected characters '$excess' after special variable '$var'. +This version of perltidy does not allow letters or digits immediately after a special variable +EOM + resume_logfile(); + } + } return; } @@ -3757,6 +3813,9 @@ EOM # is a syntax error. if ( $expecting == OPERATOR && $tok =~ /^x\d+$/ ) { $type = 'n'; + if ( split_current_pretoken(1) ) { + $type = 'x'; + } } else { @@ -3843,12 +3902,23 @@ EOM $type = 'x'; } } - - # NOTE: mark something like x4 as an integer for now - # It gets fixed downstream. This is easier than - # splitting the pretoken. else { + + # Split a pretoken like 'x10' into 'x' and '10'. + # Note: In previous versions of perltidy it was marked + # as a number, $type = 'n', and fixed downstream by the + # Formatter. Note that there can still be trouble if + # the remaining token is not all digits; for example + # $snake_says = 'hi' . 's' x2if (1); which gives a + # pretoken 'x2if'. This will cause an + # error message and require that the user insert + # blanks. One way to fix this would be to make a + # leading 'x' followed by a digit a separate pretoken, + # but it does not seem worth the effort. $type = 'n'; + if ( split_current_pretoken(1) ) { + $type = 'x'; + } } } elsif ( $tok_kw eq 'CORE::' ) { diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod index 1acb6c0b..07e557e1 100644 --- a/local-docs/BugLog.pod +++ b/local-docs/BugLog.pod @@ -2,6 +2,47 @@ =over 4 +=item B<Handle unusual parsing problem issue c066> + +This issue is illustrated with the following line (rt80058): + + $^One$0 + +Running perltidy generates a warning message which is caused by the lack of +space before the 'ne'. This update gives a better warning message. + +4 Sep 2021. + +=item B<Fix unusual parsing problem issue c065> + +Testing produced an unusual parsing problem in perltidy which can be illustrated +with the following simple script: + + my $str = 'a'x2.2; + print $str,"\n"; + +Normally an integer would follow the 'x' operator, but Perl seems to accept +any valid number and truncates it to an integer. So in this case the number +2.2 is truncated to 2 and the output is 'aa'. + +But perltidy, with default parameters, formats this as + + my $str = 'a' x 2 . 2; + print $str,"\n"; + +which prints the result "aa2". The problem is fixed with this update. +With the update, the result is + + my $str = 'a' x 2.2; + print $str, "\n"; + +which is equivalent to the original script. + +This fixes issue c065. + +4 Sep 2021. + + =item B<Fix formatting instability issues b1195, b1196> Testing with random parameters produced two similar cases of unstable