convert some regexes to hashes based on NYTProf

author Steve Hancock <perltidy@users.sourceforge.net>

Sat, 2 Apr 2022 03:01:57 +0000 (20:01 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Sat, 2 Apr 2022 03:01:57 +0000 (20:01 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Sat, 2 Apr 2022 03:01:57 +0000 (20:01 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Sat, 2 Apr 2022 03:01:57 +0000 (20:01 -0700)
diff --git a/docs/eos_flag.md b/docs/eos_flag.md

index 9ddbb9f3c2a7e1dc7fd2361a5dab19264640b387..11bda6d67df9fb1d7d16aa2007e9d767a022ed1b 100644 (file)
--- a/docs/eos_flag.md
+++ b/docs/eos_flag.md
@@ -140,7 +140,7 @@ single-byte characters or for storing an encoded string of multi-byte
  characters.  The 'C' mode is needed for actually working with multi-byte
  characters.  Thinking of a Perl script as a single long string of text, we can
  look at the mode of the text of a source script as it is processed by perltidy
-at three points as it is processed by perltidy:
+at three points:
  
      - when it enters as a source
      - at the intermediate stage as it is processed
@@ -189,7 +189,7 @@ destination is a string:
  
  The first three of these may start at either a file or a string, and the last one only starts at a string.
  
-From this we can see that, if **-eos** is set, then only cases 1, 3, and 8 can occur.  In that case the starting and ending states have the same storage mode for all routes through perltidy which end at a string.  This verfies that perltidy will work well as a filter in all cases when the **-eos** flag is set, which is the goal here.
+From this we can see that, if **-eos** is set, then only cases 1, 3, and 8 can occur.  In that case the starting and ending states have the same storage mode for all routes through perltidy which end at a string.  This verifies that perltidy will work well as a filter in all cases when the **-eos** flag is set, which is the goal here.
  
  The last case in this table, the C->C->C route, corresponds to programs which
  pass decoded strings to perltidy. This is a common usage pattern, and this
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index 944a5acddb8bdeb1efefced4686f11d3d1db1644..1932741c97a45ede75a2a07686bd4a23d2fa03e5 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -205,6 +205,7 @@ my (
      %is_if_unless_while_until_for_foreach,
      %is_last_next_redo_return,
      %is_if_unless,
+    %is_if_unless_elsif,
      %is_and_or,
      %is_chain_operator,
      %is_block_without_semicolon,
@@ -591,6 +592,9 @@ BEGIN {
      @q = qw(if unless);
      @is_if_unless{@q} = (1) x scalar(@q);
  
+    @q = qw(if unless elsif);
+    @is_if_unless_elsif{@q} = (1) x scalar(@q);
+
      @q = qw(and or err);
      @is_and_or{@q} = (1) x scalar(@q);
  
@@ -13135,7 +13139,8 @@ EOM
                          $rbrace_follower = { ')' => 1 };
                      }
                  }
-                elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
+                ##elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
+                elsif ( $is_if_unless_elsif{$block_type} ) {
                      $rbrace_follower = \%is_if_brace_follower;
                  }
                  elsif ( $block_type eq 'else' ) {
@@ -21393,7 +21398,8 @@ EOM
                          elsif ( $available_spaces > 1 ) {
                              $min_gnu_indentation += $available_spaces + 1;
                          }
-                        elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+                        ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
+                        elsif ( $is_opening_token{$last_nonblank_token} ) {
                              if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
                                  $min_gnu_indentation += 2;
                              }
@@ -21479,7 +21485,8 @@ EOM
                                $lp_object;
                          }
  
-                        if (   $last_nonblank_token =~ /^[\{\[\(]$/
+                        ##if (   $last_nonblank_token =~ /^[\{\[\(]$/
+                        if (   $is_opening_token{$last_nonblank_token}
                              && $last_nonblank_seqno )
                          {
                              $rlp_object_by_seqno->{$last_nonblank_seqno} =
@@ -22003,9 +22010,12 @@ sub convey_batch_to_vertical_aligner {
      }
  
      # flush before a long if statement to avoid unwanted alignment
-    if (   $n_last_line > 0
+    if (
+           $n_last_line > 0
          && $type_beg_next eq 'k'
-        && $token_beg_next =~ /^(if|unless)$/ )
+        && $is_if_unless{$token_beg_next}
+        ## && $token_beg_next =~ /^(if|unless)$/ )
+      )
      {
          $self->flush_vertical_aligner();
      }
@@ -22823,8 +22833,9 @@ EOM
  
                          if ( $vert_last_nonblank_type eq 'k' ) {
                              $alignment_type = ""
-                              unless $vert_last_nonblank_token =~
-                              /^(if|unless|elsif)$/;
+                              unless
+                              $is_if_unless_elsif{$vert_last_nonblank_token};
+                            ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/;
                          }
  
                          # Do not align a spaced-function-paren if requested.
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index a66f403dda281c8554cd01afcacfb715b29bc357..0fc72d4c30c21235ac641fc13d360480b0dd37aa 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -3215,39 +3215,40 @@ EOM
      # semicolon
      # patched for SWITCH/CASE/
      my %is_zero_continuation_block_type;
-    @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
+    my @q;
+    @q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
        if elsif else unless while until for foreach switch case given when);
-    @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
+    @is_zero_continuation_block_type{@q} = (1) x scalar(@q);
  
      my %is_logical_container;
-    @_ = qw(if elsif unless while and or err not && !  || for foreach);
-    @is_logical_container{@_} = (1) x scalar(@_);
+    @q = qw(if elsif unless while and or err not && !  || for foreach);
+    @is_logical_container{@q} = (1) x scalar(@q);
  
      my %is_binary_type;
-    @_ = qw(|| &&);
-    @is_binary_type{@_} = (1) x scalar(@_);
+    @q = qw(|| &&);
+    @is_binary_type{@q} = (1) x scalar(@q);
  
      my %is_binary_keyword;
-    @_ = qw(and or err eq ne cmp);
-    @is_binary_keyword{@_} = (1) x scalar(@_);
+    @q = qw(and or err eq ne cmp);
+    @is_binary_keyword{@q} = (1) x scalar(@q);
  
      # 'L' is token for opening { at hash key
      my %is_opening_type;
-    @_ = qw< L { ( [ >;
-    @is_opening_type{@_} = (1) x scalar(@_);
+    @q = qw< L { ( [ >;
+    @is_opening_type{@q} = (1) x scalar(@q);
  
      # 'R' is token for closing } at hash key
      my %is_closing_type;
-    @_ = qw< R } ) ] >;
-    @is_closing_type{@_} = (1) x scalar(@_);
+    @q = qw< R } ) ] >;
+    @is_closing_type{@q} = (1) x scalar(@q);
  
      my %is_redo_last_next_goto;
-    @_ = qw(redo last next goto);
-    @is_redo_last_next_goto{@_} = (1) x scalar(@_);
+    @q = qw(redo last next goto);
+    @is_redo_last_next_goto{@q} = (1) x scalar(@q);
  
      my %is_use_require;
-    @_ = qw(use require);
-    @is_use_require{@_} = (1) x scalar(@_);
+    @q = qw(use require);
+    @is_use_require{@q} = (1) x scalar(@q);
  
      # This hash holds the array index in $tokenizer_self for these keywords:
      # Fix for issue c035: removed 'format' from this hash
@@ -3256,6 +3257,11 @@ EOM
          '__DATA__' => _in_data_,
      );
  
+    my %is_list_end_type;
+    @q = qw( ; { } );
+    push @q, ',';
+    @is_list_end_type{@q} = (1) x scalar(@q);
+
      # original ref: camel 3 p 147,
      # but perl may accept undocumented flags
      # perl 5.10 adds 'p' (preserve)
@@ -5031,7 +5037,10 @@ EOM
                      #     );
                      elsif ( $tok eq ')' ) {
                          $in_statement_continuation = 1
-                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
+                          if (
+                            $is_list_end_type{ $routput_container_type->[$i] }
+                          );
+                        ##if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
                      }
  
                      elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
author	Steve Hancock <perltidy@users.sourceforge.net>
	Sat, 2 Apr 2022 03:01:57 +0000 (20:01 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Sat, 2 Apr 2022 03:01:57 +0000 (20:01 -0700)
docs/eos_flag.md		patch \| blob \| history
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history