From 2ef16fbb004c2133d85758bca3feaea9ffb897e8 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Mon, 5 Jul 2021 18:33:08 -0700
Subject: [PATCH] Fix error parsing format statement

---
 dev-bin/side_comment_test.pl |  2 +-
 lib/Perl/Tidy/Tokenizer.pm   | 16 ++++++++++++----
 local-docs/BugLog.pod        | 22 +++++++++++++++++++++-
 3 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/dev-bin/side_comment_test.pl b/dev-bin/side_comment_test.pl
index 26249fd7..d6809a3d 100755
--- a/dev-bin/side_comment_test.pl
+++ b/dev-bin/side_comment_test.pl
@@ -75,7 +75,7 @@ sub add_side_comments {
         next if ( $line eq '.' );
 
         # Optional: Avoid problems involving guessing if / starts a pattern
-        # next if ( $line eq '/' );
+        next if ( $line eq '/' );
 
         # Try to skip here targets; see note above
         next if ( $line =~ /^\s*[A-Z_0-9=\.\-]+\s*$/ );
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index bf3e1573..ae4615eb 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -3017,8 +3017,8 @@ EOM
     @is_use_require{@_} = (1) x scalar(@_);
 
     # This hash holds the array index in $tokenizer_self for these keywords:
-    my %is_format_END_DATA = (
-        'format'   => _in_format_,
+    # Fix for issue c035: removed 'format' from this hash
+    my %is_END_DATA = (
         '__END__'  => _in_end_,
         '__DATA__' => _in_data_,
     );
@@ -3943,15 +3943,23 @@ EOM
                     scan_id();
                 }
 
+                # Fix for c035: split 'format' from 'is_format_END_DATA' to be
+                # more restrictive. Require a new statement to be ok here.
+                elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
+                    $type = ';';    # make tokenizer look for TERM next
+                    $tokenizer_self->[_in_format_] = 1;
+                    last;
+                }
+
                 # Note on token types for format, __DATA__, __END__:
                 # It simplifies things to give these type ';', so that when we
                 # start rescanning we will be expecting a token of type TERM.
                 # We will switch to type 'k' before outputting the tokens.
-                elsif ( $is_format_END_DATA{$tok_kw} ) {
+                elsif ( $is_END_DATA{$tok_kw} ) {
                     $type = ';';    # make tokenizer look for TERM next
 
                     # Remember that we are in one of these three sections
-                    $tokenizer_self->[ $is_format_END_DATA{$tok_kw} ] = 1;
+                    $tokenizer_self->[ $is_END_DATA{$tok_kw} ] = 1;
                     last;
                 }
 
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod
index 65700785..4177298b 100644
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -2,6 +2,26 @@
 
 =over 4
 
+=item B<Fix error parsing format statement>
+
+The following test script caused an error when perltidy took 'format' to
+start a format statement.
+
+    my$ascii#sc#
+    =#sc#
+    $formatter#sc#
+    ->#sc#
+    format#sc#
+    (#sc#
+    $html#sc#
+    )#sc#
+    ;#sc#
+
+This was fixed by requiring a format statement to begin where a new statement
+can occur. This fixes issue c035.
+
+5 Jan 2021.
+
 =item B<Fix some incorrect error messages due to side comments>
 
 Testing with large numbers of side comments caused perltidy to produce some
@@ -19,7 +39,7 @@ The following snippet is an example.
 
 This update fixes cases c029 and c030.
 
-4 Jul 2021.
+4 Jul 2021, caffc2c.
 
 =item B<Fix undefined var ref involving --format-skipping>
 
-- 
2.39.5