From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Sat, 2 Apr 2022 03:01:57 +0000 (-0700)
Subject: convert some regexes to hashes based on NYTProf
X-Git-Tag: 20220217.03~21
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=89da64b6dc0cb78287c06890515d3af3bce59c01;p=perltidy.git

convert some regexes to hashes based on NYTProf
---

diff --git a/docs/eos_flag.md b/docs/eos_flag.md
index 9ddbb9f3..11bda6d6 100644
--- a/docs/eos_flag.md
+++ b/docs/eos_flag.md
@@ -140,7 +140,7 @@ single-byte characters or for storing an encoded string of multi-byte
 characters.  The 'C' mode is needed for actually working with multi-byte
 characters.  Thinking of a Perl script as a single long string of text, we can
 look at the mode of the text of a source script as it is processed by perltidy
-at three points as it is processed by perltidy:
+at three points:
 
     - when it enters as a source
     - at the intermediate stage as it is processed
@@ -189,7 +189,7 @@ destination is a string:
 
 The first three of these may start at either a file or a string, and the last one only starts at a string.
 
-From this we can see that, if **-eos** is set, then only cases 1, 3, and 8 can occur.  In that case the starting and ending states have the same storage mode for all routes through perltidy which end at a string.  This verfies that perltidy will work well as a filter in all cases when the **-eos** flag is set, which is the goal here.
+From this we can see that, if **-eos** is set, then only cases 1, 3, and 8 can occur.  In that case the starting and ending states have the same storage mode for all routes through perltidy which end at a string.  This verifies that perltidy will work well as a filter in all cases when the **-eos** flag is set, which is the goal here.
 
 The last case in this table, the C->C->C route, corresponds to programs which
 pass decoded strings to perltidy. This is a common usage pattern, and this
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm
index 944a5acd..1932741c 100644
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -205,6 +205,7 @@ my (
     %is_if_unless_while_until_for_foreach,
     %is_last_next_redo_return,
     %is_if_unless,
+    %is_if_unless_elsif,
     %is_and_or,
     %is_chain_operator,
     %is_block_without_semicolon,
@@ -591,6 +592,9 @@ BEGIN {
     @q = qw(if unless);
     @is_if_unless{@q} = (1) x scalar(@q);
 
+    @q = qw(if unless elsif);
+    @is_if_unless_elsif{@q} = (1) x scalar(@q);
+
     @q = qw(and or err);
     @is_and_or{@q} = (1) x scalar(@q);
 
@@ -13135,7 +13139,8 @@ EOM
                         $rbrace_follower = { ')' => 1 };
                     }
                 }
-                elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
+                ##elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
+                elsif ( $is_if_unless_elsif{$block_type} ) {
                     $rbrace_follower = \%is_if_brace_follower;
                 }
                 elsif ( $block_type eq 'else' ) {
@@ -21393,7 +21398,8 @@ EOM
                         elsif ( $available_spaces > 1 ) {
                             $min_gnu_indentation += $available_spaces + 1;
                         }
-                        elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+                        ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+                        elsif ( $is_opening_token{$last_nonblank_token} ) {
                             if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
                                 $min_gnu_indentation += 2;
                             }
@@ -21479,7 +21485,8 @@ EOM
                               $lp_object;
                         }
 
-                        if (   $last_nonblank_token =~ /^[\{\[\(]$/
+                        ##if (   $last_nonblank_token =~ /^[\{\[\(]$/
+                        if (   $is_opening_token{$last_nonblank_token}
                             && $last_nonblank_seqno )
                         {
                             $rlp_object_by_seqno->{$last_nonblank_seqno} =
@@ -22003,9 +22010,12 @@ sub convey_batch_to_vertical_aligner {
     }
 
     # flush before a long if statement to avoid unwanted alignment
-    if (   $n_last_line > 0
+    if (
+           $n_last_line > 0
         && $type_beg_next eq 'k'
-        && $token_beg_next =~ /^(if|unless)$/ )
+        && $is_if_unless{$token_beg_next}
+        ## && $token_beg_next =~ /^(if|unless)$/ )
+      )
     {
         $self->flush_vertical_aligner();
     }
@@ -22823,8 +22833,9 @@ EOM
 
                         if ( $vert_last_nonblank_type eq 'k' ) {
                             $alignment_type = ""
-                              unless $vert_last_nonblank_token =~
-                              /^(if|unless|elsif)$/;
+                              unless
+                              $is_if_unless_elsif{$vert_last_nonblank_token};
+                            ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/;
                         }
 
                         # Do not align a spaced-function-paren if requested.
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index a66f403d..0fc72d4c 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -3215,39 +3215,40 @@ EOM
     # semicolon
     # patched for SWITCH/CASE/
     my %is_zero_continuation_block_type;
-    @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
+    my @q;
+    @q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
       if elsif else unless while until for foreach switch case given when);
-    @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
+    @is_zero_continuation_block_type{@q} = (1) x scalar(@q);
 
     my %is_logical_container;
-    @_ = qw(if elsif unless while and or err not && !  || for foreach);
-    @is_logical_container{@_} = (1) x scalar(@_);
+    @q = qw(if elsif unless while and or err not && !  || for foreach);
+    @is_logical_container{@q} = (1) x scalar(@q);
 
     my %is_binary_type;
-    @_ = qw(|| &&);
-    @is_binary_type{@_} = (1) x scalar(@_);
+    @q = qw(|| &&);
+    @is_binary_type{@q} = (1) x scalar(@q);
 
     my %is_binary_keyword;
-    @_ = qw(and or err eq ne cmp);
-    @is_binary_keyword{@_} = (1) x scalar(@_);
+    @q = qw(and or err eq ne cmp);
+    @is_binary_keyword{@q} = (1) x scalar(@q);
 
     # 'L' is token for opening { at hash key
     my %is_opening_type;
-    @_ = qw< L { ( [ >;
-    @is_opening_type{@_} = (1) x scalar(@_);
+    @q = qw< L { ( [ >;
+    @is_opening_type{@q} = (1) x scalar(@q);
 
     # 'R' is token for closing } at hash key
     my %is_closing_type;
-    @_ = qw< R } ) ] >;
-    @is_closing_type{@_} = (1) x scalar(@_);
+    @q = qw< R } ) ] >;
+    @is_closing_type{@q} = (1) x scalar(@q);
 
     my %is_redo_last_next_goto;
-    @_ = qw(redo last next goto);
-    @is_redo_last_next_goto{@_} = (1) x scalar(@_);
+    @q = qw(redo last next goto);
+    @is_redo_last_next_goto{@q} = (1) x scalar(@q);
 
     my %is_use_require;
-    @_ = qw(use require);
-    @is_use_require{@_} = (1) x scalar(@_);
+    @q = qw(use require);
+    @is_use_require{@q} = (1) x scalar(@q);
 
     # This hash holds the array index in $tokenizer_self for these keywords:
     # Fix for issue c035: removed 'format' from this hash
@@ -3256,6 +3257,11 @@ EOM
         '__DATA__' => _in_data_,
     );
 
+    my %is_list_end_type;
+    @q = qw( ; { } );
+    push @q, ',';
+    @is_list_end_type{@q} = (1) x scalar(@q);
+
     # original ref: camel 3 p 147,
     # but perl may accept undocumented flags
     # perl 5.10 adds 'p' (preserve)
@@ -5031,7 +5037,10 @@ EOM
                     #     );
                     elsif ( $tok eq ')' ) {
                         $in_statement_continuation = 1
-                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
+                          if (
+                            $is_list_end_type{ $routput_container_type->[$i] }
+                          );
+                        ##if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
                     }
 
                     elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }