fix unusual parsing issue, c065

author Steve Hancock <perltidy@users.sourceforge.net>

Sun, 5 Sep 2021 19:55:33 +0000 (12:55 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Sun, 5 Sep 2021 19:55:33 +0000 (12:55 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Sun, 5 Sep 2021 19:55:33 +0000 (12:55 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Sun, 5 Sep 2021 19:55:33 +0000 (12:55 -0700)
diff --git a/BUGS.md b/BUGS.md

index a64b8ae11edc011eed778c5a2b1b7fa7cda26066..c88aa6e478be471e4c57a7ff8b4c6befe2400636 100644 (file)
--- a/BUGS.md
+++ b/BUGS.md
@@ -29,15 +29,6 @@ it part of the standard Perl distribution.  But for example the following line
  
  which uses double brackets to contain single brackets does not render correctly.
  
-## Two iterations are sometimes needed
-
-Usually the code produced by perltidy on the first pass does not change if it
-is run again, but sometimes a second pass will produce some small additional
-change.  This mainly happens if a major style change is made, particularly when
-perltidy is untangling complex ternary statements. Use the iteration parameter
-**-it=2** if it is important that the results be unchanged on subsequent passes,
-but note that this doubles the run time.
-
  ## Perltidy does not look for here-document targets inside of quoted strings
  
  For example, consider the following script
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index 9eb7364fd4a9d3e2fcef348e3c17af15d608b950..1ff1641d7f036e6cd1ebcec3384cba953b1eb057 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -2661,12 +2661,6 @@ sub set_whitespace_flags {
              $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
          }
  
-        if ( !defined($ws) ) {
-            $ws = 0;
-            write_diagnostics(
-                "WS flag is undefined for tokens $last_token $token\n");
-        }
-
          # Treat newline as a whitespace. Otherwise, we might combine
          # 'Send' and '-recipients' here according to the above rules:
          # <<snippets/space3.in>>
@@ -6446,12 +6440,18 @@ sub respace_tokens {
                  }
              }
  
-            # patch to add space to something like "x10"
-            # This avoids having to split this token in the pre-tokenizer
+            # Old patch to add space to something like "x10".
+            # Note: This is now done in the Tokenizer, but this code remains
+            # for reference.
              elsif ( $type eq 'n' ) {
                  if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
                      $token =~ s/x/x /;
                      $rtoken_vars->[_TOKEN_] = $token;
+                    if (DEVEL_MODE) {
+                        Fault(<<EOM);
+Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
+EOM
+                    }
                  }
              }
  
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index 354c8154358771ed5e0e050a341134e7088c62e3..c986c17a6500ce517bca75dcb6cd38999d9506a8 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -23,6 +23,9 @@ use strict;
  use warnings;
  our $VERSION = '20210717.01';
  
+# this can be turned on for extra checking during development
+use constant DEVEL_MODE => 0;
+
  use Perl::Tidy::LineBuffer;
  use Carp;
  
@@ -1585,6 +1588,42 @@ sub prepare_for_a_new_file {
          return;
      }
  
+    sub split_current_pretoken {
+
+        # Split the current pretoken at index $i into two parts.
+        #   $numc = number of characters in the first part; must be fewer than
+        #           the number of characters in the pretoken.
+        #           i.e., numc=1 to split off just the first character.
+        #
+        # The part we split will become the current token; the remainder will
+        # be appear as the subsequent token.
+
+        # returns undef if error
+        # returns new initial token if successful
+
+        my ($numc) = @_;
+
+        # Do not try to split more characters than we have
+        if ( !$tok || $numc >= length($tok) ) {
+            my $len = length($tok);
+            if (DEVEL_MODE) {
+                Die(<<EOM);
+Code bug: bad call to 'split_current_pretoken': numc=$numc >= len=$len at token='$tok'
+EOM
+            }
+            return;
+        }
+        my $tok_new = substr( $tok, 0, $numc );
+        my $new_pos = $rtoken_map->[$i] + $numc;
+        splice @{$rtoken_map},  $i + 1, 0, $new_pos;
+        splice @{$rtokens},     $i + 1, 0, substr( $tok, $numc );
+        splice @{$rtoken_type}, $i + 1, 0, 'd';
+        $tok = $tok_new;
+        $rtokens->[$i] = $tok_new;
+        $max_token_index++;
+        return $tok_new;
+    }
+
      sub get_indentation_level {
  
          # patch to avoid reporting error if indented if is not terminated
@@ -1712,6 +1751,23 @@ sub prepare_for_a_new_file {
          ( $i, $tok, $type, $id_scan_state, $identifier ) =
            scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
              $max_token_index, $expecting, $paren_type[$paren_depth] );
+
+        # Check for something like a keyword joined to a special variable, like
+        # '$^One$0'. This is very rare and tricky to program around, so
+        # we issue a warning message and let the user fix it by hand.
+        if ( $tok && length($tok) > 3 && substr( $tok, 1, 1 ) eq '^' ) {
+            my $sigil = substr( $tok, 0, 1 );
+            if ( $sigil =~ /^[\$\&\%\*\@]$/ ) {
+                my $var    = substr( $tok, 0, 3 );
+                my $excess = substr( $tok, 3 );
+                interrupt_logfile();
+                warning(<<EOM);
+$input_line_number: Unexpected characters '$excess' after special variable '$var'.
+This version of perltidy does not allow letters or digits immediately after a special variable
+EOM
+                resume_logfile();
+            }
+        }
          return;
      }
  
@@ -3757,6 +3813,9 @@ EOM
                             # is a syntax error.
                              if ( $expecting == OPERATOR && $tok =~ /^x\d+$/ ) {
                                  $type = 'n';
+                                if ( split_current_pretoken(1) ) {
+                                    $type = 'x';
+                                }
                              }
                              else {
  
@@ -3843,12 +3902,23 @@ EOM
                              $type = 'x';
                          }
                      }
-
-                    # NOTE: mark something like x4 as an integer for now
-                    # It gets fixed downstream.  This is easier than
-                    # splitting the pretoken.
                      else {
+
+                        # Split a pretoken like 'x10' into 'x' and '10'.
+                        # Note: In previous versions of perltidy it was marked
+                        # as a number, $type = 'n', and fixed downstream by the
+                        # Formatter. Note that there can still be trouble if
+                        # the remaining token is not all digits; for example
+                        # $snake_says = 'hi' . 's' x2if (1); which gives a
+                        # pretoken 'x2if'.  This will cause an
+                        # error message and require that the user insert
+                        # blanks.  One way to fix this would be to make a
+                        # leading 'x' followed by a digit a separate pretoken,
+                        # but it does not seem worth the effort.
                          $type = 'n';
+                        if ( split_current_pretoken(1) ) {
+                            $type = 'x';
+                        }
                      }
                  }
                  elsif ( $tok_kw eq 'CORE::' ) {
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod

index 1acb6c0bf4fb214df835115fc1bf3167cf1aada7..07e557e178e01fa09d50758c8ba64d3a07b119d4 100644 (file)
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -2,6 +2,47 @@
  
  =over 4
  
+=item B<Handle unusual parsing problem issue c066>
+
+This issue is illustrated with the following line (rt80058):
+
+   $^One$0
+
+Running perltidy generates a warning message which is caused by the lack of
+space before the 'ne'. This update gives a better warning message.
+
+4 Sep 2021.
+
+=item B<Fix unusual parsing problem issue c065>
+
+Testing produced an unusual parsing problem in perltidy which can be illustrated
+with the following simple script:
+
+    my $str = 'a'x2.2;
+    print $str,"\n";
+
+Normally an integer would follow the 'x' operator, but Perl seems to accept
+any valid number and truncates it to an integer.  So in this case the number
+2.2 is truncated to 2 and the output is 'aa'.
+
+But perltidy, with default parameters, formats this as
+
+    my $str = 'a' x 2 . 2;
+    print $str,"\n";
+
+which prints the result "aa2".  The problem is fixed with this update.
+With the update, the result is
+
+    my $str = 'a' x 2.2;
+    print $str, "\n";
+
+which is equivalent to the original script.
+
+This fixes issue c065.
+
+4 Sep 2021.
+
+
  =item B<Fix formatting instability issues b1195, b1196>
  
  Testing with random parameters produced two similar cases of unstable
author	Steve Hancock <perltidy@users.sourceforge.net>
	Sun, 5 Sep 2021 19:55:33 +0000 (12:55 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Sun, 5 Sep 2021 19:55:33 +0000 (12:55 -0700)
BUGS.md		patch \| blob \| history
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history
local-docs/BugLog.pod		patch \| blob \| history