From: Steve Hancock Date: Sat, 2 Apr 2022 03:01:57 +0000 (-0700) Subject: convert some regexes to hashes based on NYTProf X-Git-Tag: 20220217.03~21 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=89da64b6dc0cb78287c06890515d3af3bce59c01;p=perltidy.git convert some regexes to hashes based on NYTProf --- diff --git a/docs/eos_flag.md b/docs/eos_flag.md index 9ddbb9f3..11bda6d6 100644 --- a/docs/eos_flag.md +++ b/docs/eos_flag.md @@ -140,7 +140,7 @@ single-byte characters or for storing an encoded string of multi-byte characters. The 'C' mode is needed for actually working with multi-byte characters. Thinking of a Perl script as a single long string of text, we can look at the mode of the text of a source script as it is processed by perltidy -at three points as it is processed by perltidy: +at three points: - when it enters as a source - at the intermediate stage as it is processed @@ -189,7 +189,7 @@ destination is a string: The first three of these may start at either a file or a string, and the last one only starts at a string. -From this we can see that, if **-eos** is set, then only cases 1, 3, and 8 can occur. In that case the starting and ending states have the same storage mode for all routes through perltidy which end at a string. This verfies that perltidy will work well as a filter in all cases when the **-eos** flag is set, which is the goal here. +From this we can see that, if **-eos** is set, then only cases 1, 3, and 8 can occur. In that case the starting and ending states have the same storage mode for all routes through perltidy which end at a string. This verifies that perltidy will work well as a filter in all cases when the **-eos** flag is set, which is the goal here. The last case in this table, the C->C->C route, corresponds to programs which pass decoded strings to perltidy. This is a common usage pattern, and this diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 944a5acd..1932741c 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -205,6 +205,7 @@ my ( %is_if_unless_while_until_for_foreach, %is_last_next_redo_return, %is_if_unless, + %is_if_unless_elsif, %is_and_or, %is_chain_operator, %is_block_without_semicolon, @@ -591,6 +592,9 @@ BEGIN { @q = qw(if unless); @is_if_unless{@q} = (1) x scalar(@q); + @q = qw(if unless elsif); + @is_if_unless_elsif{@q} = (1) x scalar(@q); + @q = qw(and or err); @is_and_or{@q} = (1) x scalar(@q); @@ -13135,7 +13139,8 @@ EOM $rbrace_follower = { ')' => 1 }; } } - elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { + ##elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { + elsif ( $is_if_unless_elsif{$block_type} ) { $rbrace_follower = \%is_if_brace_follower; } elsif ( $block_type eq 'else' ) { @@ -21393,7 +21398,8 @@ EOM elsif ( $available_spaces > 1 ) { $min_gnu_indentation += $available_spaces + 1; } - elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { + ##elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { + elsif ( $is_opening_token{$last_nonblank_token} ) { if ( ( $tightness{$last_nonblank_token} < 2 ) ) { $min_gnu_indentation += 2; } @@ -21479,7 +21485,8 @@ EOM $lp_object; } - if ( $last_nonblank_token =~ /^[\{\[\(]$/ + ##if ( $last_nonblank_token =~ /^[\{\[\(]$/ + if ( $is_opening_token{$last_nonblank_token} && $last_nonblank_seqno ) { $rlp_object_by_seqno->{$last_nonblank_seqno} = @@ -22003,9 +22010,12 @@ sub convey_batch_to_vertical_aligner { } # flush before a long if statement to avoid unwanted alignment - if ( $n_last_line > 0 + if ( + $n_last_line > 0 && $type_beg_next eq 'k' - && $token_beg_next =~ /^(if|unless)$/ ) + && $is_if_unless{$token_beg_next} + ## && $token_beg_next =~ /^(if|unless)$/ ) + ) { $self->flush_vertical_aligner(); } @@ -22823,8 +22833,9 @@ EOM if ( $vert_last_nonblank_type eq 'k' ) { $alignment_type = "" - unless $vert_last_nonblank_token =~ - /^(if|unless|elsif)$/; + unless + $is_if_unless_elsif{$vert_last_nonblank_token}; + ##unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/; } # Do not align a spaced-function-paren if requested. diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index a66f403d..0fc72d4c 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -3215,39 +3215,40 @@ EOM # semicolon # patched for SWITCH/CASE/ my %is_zero_continuation_block_type; - @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; + my @q; + @q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ; if elsif else unless while until for foreach switch case given when); - @is_zero_continuation_block_type{@_} = (1) x scalar(@_); + @is_zero_continuation_block_type{@q} = (1) x scalar(@q); my %is_logical_container; - @_ = qw(if elsif unless while and or err not && ! || for foreach); - @is_logical_container{@_} = (1) x scalar(@_); + @q = qw(if elsif unless while and or err not && ! || for foreach); + @is_logical_container{@q} = (1) x scalar(@q); my %is_binary_type; - @_ = qw(|| &&); - @is_binary_type{@_} = (1) x scalar(@_); + @q = qw(|| &&); + @is_binary_type{@q} = (1) x scalar(@q); my %is_binary_keyword; - @_ = qw(and or err eq ne cmp); - @is_binary_keyword{@_} = (1) x scalar(@_); + @q = qw(and or err eq ne cmp); + @is_binary_keyword{@q} = (1) x scalar(@q); # 'L' is token for opening { at hash key my %is_opening_type; - @_ = qw< L { ( [ >; - @is_opening_type{@_} = (1) x scalar(@_); + @q = qw< L { ( [ >; + @is_opening_type{@q} = (1) x scalar(@q); # 'R' is token for closing } at hash key my %is_closing_type; - @_ = qw< R } ) ] >; - @is_closing_type{@_} = (1) x scalar(@_); + @q = qw< R } ) ] >; + @is_closing_type{@q} = (1) x scalar(@q); my %is_redo_last_next_goto; - @_ = qw(redo last next goto); - @is_redo_last_next_goto{@_} = (1) x scalar(@_); + @q = qw(redo last next goto); + @is_redo_last_next_goto{@q} = (1) x scalar(@q); my %is_use_require; - @_ = qw(use require); - @is_use_require{@_} = (1) x scalar(@_); + @q = qw(use require); + @is_use_require{@q} = (1) x scalar(@q); # This hash holds the array index in $tokenizer_self for these keywords: # Fix for issue c035: removed 'format' from this hash @@ -3256,6 +3257,11 @@ EOM '__DATA__' => _in_data_, ); + my %is_list_end_type; + @q = qw( ; { } ); + push @q, ','; + @is_list_end_type{@q} = (1) x scalar(@q); + # original ref: camel 3 p 147, # but perl may accept undocumented flags # perl 5.10 adds 'p' (preserve) @@ -5031,7 +5037,10 @@ EOM # ); elsif ( $tok eq ')' ) { $in_statement_continuation = 1 - if $routput_container_type->[$i] =~ /^[;,\{\}]$/; + if ( + $is_list_end_type{ $routput_container_type->[$i] } + ); + ##if $routput_container_type->[$i] =~ /^[;,\{\}]$/; } elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }