From 41797ca5f978b8664016c6c1493e89861692ba88 Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Thu, 21 Sep 2023 20:33:11 -0700
Subject: [PATCH] improve tokenizer efficiency

---
 lib/Perl/Tidy.pm           | 42 ++++++++++++----------
 lib/Perl/Tidy/Formatter.pm |  4 +--
 lib/Perl/Tidy/Tokenizer.pm | 71 ++++++++++++++++++++++----------------
 3 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm
index 6880b2ad..8b47cedf 100644
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -3480,25 +3480,29 @@ sub generate_options {
     $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
     $add_option->( 'stack-opening-paren',                     'sop',   '!' );
     $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
-    $add_option->( 'vertical-tightness',                      'vt',    '=i' );
-    $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
-    $add_option->( 'want-break-after',                        'wba',   '=s' );
-    $add_option->( 'want-break-before',                       'wbb',   '=s' );
-    $add_option->( 'break-after-all-operators',               'baao',  '!' );
-    $add_option->( 'break-before-all-operators',              'bbao',  '!' );
-    $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
-    $add_option->( 'one-line-block-semicolons',               'olbs',  '=i' );
-    $add_option->( 'one-line-block-nesting',                  'olbn',  '=i' );
-    $add_option->( 'one-line-block-exclusion-list',           'olbxl', '=s' );
-    $add_option->( 'break-before-hash-brace',                 'bbhb',  '=i' );
-    $add_option->( 'break-before-hash-brace-and-indent',      'bbhbi', '=i' );
-    $add_option->( 'break-before-square-bracket',             'bbsb',  '=i' );
-    $add_option->( 'break-before-square-bracket-and-indent',  'bbsbi', '=i' );
-    $add_option->( 'break-before-paren',                      'bbp',   '=i' );
-    $add_option->( 'break-before-paren-and-indent',           'bbpi',  '=i' );
-    $add_option->( 'brace-left-list',                         'bll',   '=s' );
-    $add_option->( 'brace-left-exclusion-list',               'blxl',  '=s' );
-    $add_option->( 'break-after-labels',                      'bal',   '=i' );
+
+    # FIXME: --vt and --vtc are actually expansions now, so these two lines
+    # should eventually be removed.
+    $add_option->( 'vertical-tightness',         'vt',  '=i' );
+    $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
+
+    $add_option->( 'want-break-after',                       'wba',   '=s' );
+    $add_option->( 'want-break-before',                      'wbb',   '=s' );
+    $add_option->( 'break-after-all-operators',              'baao',  '!' );
+    $add_option->( 'break-before-all-operators',             'bbao',  '!' );
+    $add_option->( 'keep-interior-semicolons',               'kis',   '!' );
+    $add_option->( 'one-line-block-semicolons',              'olbs',  '=i' );
+    $add_option->( 'one-line-block-nesting',                 'olbn',  '=i' );
+    $add_option->( 'one-line-block-exclusion-list',          'olbxl', '=s' );
+    $add_option->( 'break-before-hash-brace',                'bbhb',  '=i' );
+    $add_option->( 'break-before-hash-brace-and-indent',     'bbhbi', '=i' );
+    $add_option->( 'break-before-square-bracket',            'bbsb',  '=i' );
+    $add_option->( 'break-before-square-bracket-and-indent', 'bbsbi', '=i' );
+    $add_option->( 'break-before-paren',                     'bbp',   '=i' );
+    $add_option->( 'break-before-paren-and-indent',          'bbpi',  '=i' );
+    $add_option->( 'brace-left-list',                        'bll',   '=s' );
+    $add_option->( 'brace-left-exclusion-list',              'blxl',  '=s' );
+    $add_option->( 'break-after-labels',                     'bal',   '=i' );
 
     # This was an experiment mentioned in git #78, originally named -bopl. I
     # expanded it to also open logical blocks, based on git discussion #100,
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 95e866ec..c4f28800 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -6941,7 +6941,7 @@ sub follow_if_chain {
         push @seqno_list, $seqno;
 
         # Update info for this block
-        my $block_type = $rblock_type_of_seqno->{$seqno};
+        $block_type = $rblock_type_of_seqno->{$seqno};
         if ( $block_type eq 'elsif' ) { $elsif_count++ }
         my $item = $rlevel_info->{$seqno};
         if ( defined($item) ) {
@@ -7014,7 +7014,7 @@ sub follow_if_chain {
     }
 
     # check count
-    return unless ( $elsif_count >= $elsif_count_min );
+    return if ( $elsif_count < $elsif_count_min );
 
     # Store the chain
     my $K_opening = $K_opening_container->{$seqno_if};
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index 63005061..9275bb38 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -120,6 +120,7 @@ my (
     %is_keyword,
     %is_my_our_state,
     %is_package,
+    %matching_end_token,
 
     # INITIALIZER: sub check_options
     $code_skipping_pattern_begin,
@@ -5609,7 +5610,11 @@ EOM
 
         $self->[_in_quote_] = $in_quote;
         $self->[_quote_target_] =
-          $in_quote ? matching_end_token($quote_character) : EMPTY_STRING;
+            $in_quote
+          ? $matching_end_token{$quote_character}
+              ? $matching_end_token{$quote_character}
+              : $quote_character
+          : EMPTY_STRING;
         $self->[_rhere_target_list_] = $rhere_target_list;
 
         return;
@@ -9781,6 +9786,17 @@ sub do_quote {
     );
 } ## end sub do_quote
 
+# Some possible non-word quote delimiters, for preliminary checking
+my %is_punct_char;
+
+BEGIN {
+
+    my @q = qw# / " ' { } ( ) [ ] < > ; + - * | % ! x ~ = ? : . ^ & #;
+    push @q, '#';
+    push @q, ',';
+    @is_punct_char{@q} = (1) x scalar(@q);
+}
+
 sub follow_quoted_string {
 
     # scan for a specific token, skipping escaped characters
@@ -9820,12 +9836,20 @@ sub follow_quoted_string {
 "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
     };
 
-    # get the corresponding end token
-    if ( $beginning_tok !~ /^\s*$/ ) {
-        $end_tok = matching_end_token($beginning_tok);
+    # for a non-blank token, get the corresponding end token
+    if (
+        $is_punct_char{$beginning_tok}
+        || ( length($beginning_tok)
+            && $beginning_tok !~ /^\s+$/ )
+      )
+    {
+        $end_tok =
+            $matching_end_token{$beginning_tok}
+          ? $matching_end_token{$beginning_tok}
+          : $beginning_tok;
     }
 
-    # a blank token means we must find and use the first non-blank one
+    # for a blank token, find and use the first non-blank one
     else {
         my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
 
@@ -9847,7 +9871,10 @@ sub follow_quoted_string {
                         $beginning_tok = $tok;
                         $quote_pos     = 0;
                     }
-                    $end_tok     = matching_end_token($beginning_tok);
+                    $end_tok =
+                        $matching_end_token{$beginning_tok}
+                      ? $matching_end_token{$beginning_tok}
+                      : $beginning_tok;
                     $quote_depth = 1;
                     last;
                 }
@@ -9869,7 +9896,7 @@ sub follow_quoted_string {
     # Case 1 (rare): loop for case of alphanumeric quote delimiter..
     # "quote_pos" is the position the current word to begin searching
     #----------------------------------------------------------------
-    if ( $beginning_tok =~ /\w/ ) {
+    if ( !$is_punct_char{$beginning_tok} && $beginning_tok =~ /\w/ ) {
 
         # Note this because it is not recommended practice except
         # for obfuscated perl contests
@@ -10156,29 +10183,6 @@ sub show_tokens {
     return;
 } ## end sub show_tokens
 
-{    ## closure for sub matching end token
-    my %matching_end_token;
-
-    BEGIN {
-        %matching_end_token = (
-            '{' => '}',
-            '(' => ')',
-            '[' => ']',
-            '<' => '>',
-        );
-    } ## end BEGIN
-
-    sub matching_end_token {
-
-        # return closing character for a pattern
-        my $beginning_token = shift;
-        if ( $matching_end_token{$beginning_token} ) {
-            return $matching_end_token{$beginning_token};
-        }
-        return ($beginning_token);
-    } ## end sub matching_end_token
-}
-
 sub dump_token_types {
     my ( $class, $fh ) = @_;
 
@@ -10917,5 +10921,12 @@ BEGIN {
     #  __DATA__ __END__
 
     @is_keyword{@Keywords} = (1) x scalar(@Keywords);
+
+    %matching_end_token = (
+        '{' => '}',
+        '(' => ')',
+        '[' => ']',
+        '<' => '>',
+    );
 } ## end BEGIN
 1;
-- 
2.39.5