From 1badc49d547bd707e8b569f66f7911cb5628fbb9 Mon Sep 17 00:00:00 2001 From: Steve Hancock <perltidy@users.sourceforge.net> Date: Sun, 2 Apr 2023 21:18:21 -0700 Subject: [PATCH] update markup language filter --- lib/Perl/Tidy/Tokenizer.pm | 44 ++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index cd0a670a..47c69b6f 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -9348,29 +9348,37 @@ EOM "Possible tokinization error..please check this line\n"); } - # See if this looks like html... - my $is_html_tag = + # Check for accidental formatting of a markup language doc... + # Formatting will be skipped if we set _html_tag_count_ and + # also set a warning of any kind. + my $is_html_tag; + my $is_first_string = + $i_beg == 0 && $self->[_last_line_number_] == 1; + + # html comment '<!...' of any type + if ( $str =~ /^<\s*!/ ) { + $is_html_tag = 1; + if ($is_first_string) { + $self->warning( +"looks like a markup language, continuing error checks\n" + ); + } + } - # something that looks like an html comment '<!...' - $str =~ /^<\s*!/ + # html end tag, something like </h1> + elsif ( $str =~ /^<\s*\/\w+\s*>$/ ) { + $is_html_tag = 1; + } - # or possible html end tag, something like </h1> - || $str =~ /^<\s*\/\w+\s*>$/; + # xml prolog? + elsif ( $str =~ /^<\?xml\s.*\?>$/i && $is_first_string ) { + $is_html_tag = 1; + $self->warning( + "looks like a markup language, continuing error checks\n"); + } if ($is_html_tag) { - $self->[_html_tag_count_]++; - - # Issue a warning on seeing '<!' at the start of a file; - # this will insure that the file is ouput verbatim. - if ( $self->[_last_line_number_] == 1 - && $i_beg == 0 - && $str =~ /^<\s*!/ ) - { - $self->warning( -"looks like a markup language, continuing error checks\n" - ); - } } # count blanks on inside of brackets -- 2.39.5