From 1badc49d547bd707e8b569f66f7911cb5628fbb9 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Sun, 2 Apr 2023 21:18:21 -0700
Subject: [PATCH] update markup language filter

---
 lib/Perl/Tidy/Tokenizer.pm | 44 ++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index cd0a670a..47c69b6f 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -9348,29 +9348,37 @@ EOM
                     "Possible tokinization error..please check this line\n");
             }
 
-            # See if this looks like html...
-            my $is_html_tag =
+            # Check for accidental formatting of a markup language doc...
+            # Formatting will be skipped if we set _html_tag_count_ and
+            # also set a warning of any kind.
+            my $is_html_tag;
+            my $is_first_string =
+              $i_beg == 0 && $self->[_last_line_number_] == 1;
+
+            # html comment '<!...' of any type
+            if ( $str =~ /^<\s*!/ ) {
+                $is_html_tag = 1;
+                if ($is_first_string) {
+                    $self->warning(
+"looks like a markup language, continuing error checks\n"
+                    );
+                }
+            }
 
-              # something that looks like an html comment '<!...'
-              $str =~ /^<\s*!/
+            # html end tag, something like </h1>
+            elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) {
+                $is_html_tag = 1;
+            }
 
-              # or possible html end tag, something like </h1>
-              || $str =~ /^<\s*\/\w+\s*>$/;
+            # xml prolog?
+            elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) {
+                $is_html_tag = 1;
+                $self->warning(
+                    "looks like a markup language, continuing error checks\n");
+            }
 
             if ($is_html_tag) {
-
                 $self->[_html_tag_count_]++;
-
-                # Issue a warning on seeing '<!' at the start of a file;
-                # this will insure that the file is ouput verbatim.
-                if (   $self->[_last_line_number_] == 1
-                    && $i_beg == 0
-                    && $str =~ /^<\s*!/ )
-                {
-                    $self->warning(
-"looks like a markup language, continuing error checks\n"
-                    );
-                }
             }
 
             # count blanks on inside of brackets
-- 
2.39.5