From f242f784abb09457ca39ff722dbbed038fd76ac0 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Sun, 5 Sep 2021 12:55:33 -0700
Subject: [PATCH] fix unusual parsing issue, c065

---
 BUGS.md                    |  9 -----
 lib/Perl/Tidy/Formatter.pm | 16 ++++----
 lib/Perl/Tidy/Tokenizer.pm | 78 ++++++++++++++++++++++++++++++++++++--
 local-docs/BugLog.pod      | 41 ++++++++++++++++++++
 4 files changed, 123 insertions(+), 21 deletions(-)

diff --git a/BUGS.md b/BUGS.md
index a64b8ae1..c88aa6e4 100644
--- a/BUGS.md
+++ b/BUGS.md
@@ -29,15 +29,6 @@ it part of the standard Perl distribution.  But for example the following line
 
 which uses double brackets to contain single brackets does not render correctly.
 
-## Two iterations are sometimes needed
-
-Usually the code produced by perltidy on the first pass does not change if it
-is run again, but sometimes a second pass will produce some small additional
-change.  This mainly happens if a major style change is made, particularly when
-perltidy is untangling complex ternary statements. Use the iteration parameter
-**-it=2** if it is important that the results be unchanged on subsequent passes,
-but note that this doubles the run time.
-
 ## Perltidy does not look for here-document targets inside of quoted strings
 
 For example, consider the following script
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 9eb7364f..1ff1641d 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -2661,12 +2661,6 @@ sub set_whitespace_flags {
             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
         }
 
-        if ( !defined($ws) ) {
-            $ws = 0;
-            write_diagnostics(
-                "WS flag is undefined for tokens $last_token $token\n");
-        }
-
         # Treat newline as a whitespace. Otherwise, we might combine
         # 'Send' and '-recipients' here according to the above rules:
         # <<snippets/space3.in>>
@@ -6446,12 +6440,18 @@ sub respace_tokens {
                 }
             }
 
-            # patch to add space to something like "x10"
-            # This avoids having to split this token in the pre-tokenizer
+            # Old patch to add space to something like "x10".
+            # Note: This is now done in the Tokenizer, but this code remains
+            # for reference.
             elsif ( $type eq 'n' ) {
                 if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
                     $token =~ s/x/x /;
                     $rtoken_vars->[_TOKEN_] = $token;
+                    if (DEVEL_MODE) {
+                        Fault(<<EOM);
+Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
+EOM
+                    }
                 }
             }
 
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index 354c8154..c986c17a 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -23,6 +23,9 @@ use strict;
 use warnings;
 our $VERSION = '20210717.01';
 
+# this can be turned on for extra checking during development
+use constant DEVEL_MODE => 0;
+
 use Perl::Tidy::LineBuffer;
 use Carp;
 
@@ -1585,6 +1588,42 @@ sub prepare_for_a_new_file {
         return;
     }
 
+    sub split_current_pretoken {
+
+        # Split the current pretoken at index $i into two parts.
+        #   $numc = number of characters in the first part; must be fewer than
+        #           the number of characters in the pretoken.
+        #           i.e., numc=1 to split off just the first character.
+        #
+        # The part we split will become the current token; the remainder will
+        # be appear as the subsequent token.
+
+        # returns undef if error
+        # returns new initial token if successful
+
+        my ($numc) = @_;
+
+        # Do not try to split more characters than we have
+        if ( !$tok || $numc >= length($tok) ) {
+            my $len = length($tok);
+            if (DEVEL_MODE) {
+                Die(<<EOM);
+Code bug: bad call to 'split_current_pretoken': numc=$numc >= len=$len at token='$tok'
+EOM
+            }
+            return;
+        }
+        my $tok_new = substr( $tok, 0, $numc );
+        my $new_pos = $rtoken_map->[$i] + $numc;
+        splice @{$rtoken_map},  $i + 1, 0, $new_pos;
+        splice @{$rtokens},     $i + 1, 0, substr( $tok, $numc );
+        splice @{$rtoken_type}, $i + 1, 0, 'd';
+        $tok = $tok_new;
+        $rtokens->[$i] = $tok_new;
+        $max_token_index++;
+        return $tok_new;
+    }
+
     sub get_indentation_level {
 
         # patch to avoid reporting error if indented if is not terminated
@@ -1712,6 +1751,23 @@ sub prepare_for_a_new_file {
         ( $i, $tok, $type, $id_scan_state, $identifier ) =
           scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
             $max_token_index, $expecting, $paren_type[$paren_depth] );
+
+        # Check for something like a keyword joined to a special variable, like
+        # '$^One$0'. This is very rare and tricky to program around, so
+        # we issue a warning message and let the user fix it by hand.
+        if ( $tok && length($tok) > 3 && substr( $tok, 1, 1 ) eq '^' ) {
+            my $sigil = substr( $tok, 0, 1 );
+            if ( $sigil =~ /^[\$\&\%\*\@]$/ ) {
+                my $var    = substr( $tok, 0, 3 );
+                my $excess = substr( $tok, 3 );
+                interrupt_logfile();
+                warning(<<EOM);
+$input_line_number: Unexpected characters '$excess' after special variable '$var'.
+This version of perltidy does not allow letters or digits immediately after a special variable
+EOM
+                resume_logfile();
+            }
+        }
         return;
     }
 
@@ -3757,6 +3813,9 @@ EOM
                            # is a syntax error.
                             if ( $expecting == OPERATOR && $tok =~ /^x\d+$/ ) {
                                 $type = 'n';
+                                if ( split_current_pretoken(1) ) {
+                                    $type = 'x';
+                                }
                             }
                             else {
 
@@ -3843,12 +3902,23 @@ EOM
                             $type = 'x';
                         }
                     }
-
-                    # NOTE: mark something like x4 as an integer for now
-                    # It gets fixed downstream.  This is easier than
-                    # splitting the pretoken.
                     else {
+
+                        # Split a pretoken like 'x10' into 'x' and '10'.
+                        # Note: In previous versions of perltidy it was marked
+                        # as a number, $type = 'n', and fixed downstream by the
+                        # Formatter. Note that there can still be trouble if
+                        # the remaining token is not all digits; for example
+                        # $snake_says = 'hi' . 's' x2if (1); which gives a
+                        # pretoken 'x2if'.  This will cause an
+                        # error message and require that the user insert
+                        # blanks.  One way to fix this would be to make a
+                        # leading 'x' followed by a digit a separate pretoken,
+                        # but it does not seem worth the effort.
                         $type = 'n';
+                        if ( split_current_pretoken(1) ) {
+                            $type = 'x';
+                        }
                     }
                 }
                 elsif ( $tok_kw eq 'CORE::' ) {
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod
index 1acb6c0b..07e557e1 100644
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -2,6 +2,47 @@
 
 =over 4
 
+=item B<Handle unusual parsing problem issue c066>
+
+This issue is illustrated with the following line (rt80058):
+
+   $^One$0
+
+Running perltidy generates a warning message which is caused by the lack of
+space before the 'ne'. This update gives a better warning message.
+
+4 Sep 2021.
+
+=item B<Fix unusual parsing problem issue c065>
+
+Testing produced an unusual parsing problem in perltidy which can be illustrated
+with the following simple script:
+
+    my $str = 'a'x2.2;
+    print $str,"\n";
+
+Normally an integer would follow the 'x' operator, but Perl seems to accept
+any valid number and truncates it to an integer.  So in this case the number
+2.2 is truncated to 2 and the output is 'aa'.
+
+But perltidy, with default parameters, formats this as
+
+    my $str = 'a' x 2 . 2;
+    print $str,"\n";
+
+which prints the result "aa2".  The problem is fixed with this update.
+With the update, the result is
+
+    my $str = 'a' x 2.2;
+    print $str, "\n";
+
+which is equivalent to the original script.
+
+This fixes issue c065.
+
+4 Sep 2021.
+
+
 =item B<Fix formatting instability issues b1195, b1196>
 
 Testing with random parameters produced two similar cases of unstable
-- 
2.39.5