From: Steve Hancock Date: Mon, 6 Sep 2021 14:52:00 +0000 (-0700) Subject: minor code cleanups and optimizations X-Git-Tag: 20210717.02~7 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=1ccd872d8e2e3d8b6ded3b6801264da6ba7bef51;p=perltidy.git minor code cleanups and optimizations --- diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm index 1ff1641d..f49e683c 100644 --- a/lib/Perl/Tidy/Formatter.pm +++ b/lib/Perl/Tidy/Formatter.pm @@ -1119,15 +1119,6 @@ sub consecutive_nonblank_lines { $vao->get_cached_line_count(); } -sub trim { - - # trim leading and trailing whitespace from a string - my $str = shift; - $str =~ s/\s+$//; - $str =~ s/^\s+//; - return $str; -} - sub max { my (@vals) = @_; my $max = shift @vals; @@ -2290,25 +2281,7 @@ sub set_whitespace_flags { } }; - my $ws_opening_container_override = sub { - my ( $ws, $sequence_number ) = @_; - return $ws unless (%opening_container_inside_ws); - if ($sequence_number) { - my $ws_override = $opening_container_inside_ws{$sequence_number}; - if ($ws_override) { $ws = $ws_override } - } - return $ws; - }; - - my $ws_closing_container_override = sub { - my ( $ws, $sequence_number ) = @_; - return $ws unless (%closing_container_inside_ws); - if ($sequence_number) { - my $ws_override = $closing_container_inside_ws{$sequence_number}; - if ($ws_override) { $ws = $ws_override } - } - return $ws; - }; + my ( $ws_1, $ws_2, $ws_3, $ws_4 ); # main loop over all tokens to define the whitespace flags for ( my $j = 0 ; $j <= $jmax ; $j++ ) { @@ -2400,10 +2373,12 @@ sub set_whitespace_flags { } # check for special cases which override the above rules - $ws = $ws_opening_container_override->( $ws, $last_seqno ); + if ( %opening_container_inside_ws && $last_seqno ) { + my $ws_override = $opening_container_inside_ws{$last_seqno}; + if ($ws_override) { $ws = $ws_override } + } } # end setting space flag inside opening tokens - my $ws_1; $ws_1 = $ws if DEBUG_WHITE; @@ -2435,11 +2410,13 @@ sub set_whitespace_flags { } # check for special cases which override the above rules - $ws = $ws_closing_container_override->( $ws, $seqno ); + if ( %closing_container_inside_ws && $seqno ) { + my $ws_override = $closing_container_inside_ws{$seqno}; + if ($ws_override) { $ws = $ws_override } + } } # end setting space flag inside closing tokens - my $ws_2; $ws_2 = $ws if DEBUG_WHITE; @@ -2450,7 +2427,6 @@ sub set_whitespace_flags { if ( !defined($ws) ) { $ws = $binary_ws_rules{$last_type}{$type}; } - my $ws_3; $ws_3 = $ws if DEBUG_WHITE; @@ -2626,7 +2602,6 @@ sub set_whitespace_flags { } } - my $ws_4; $ws_4 = $ws if DEBUG_WHITE; @@ -2671,7 +2646,7 @@ sub set_whitespace_flags { $rwhitespace_flags->[$j] = $ws; - DEBUG_WHITE && do { + if (DEBUG_WHITE) { my $str = substr( $last_token, 0, 15 ); $str .= ' ' x ( 16 - length($str) ); if ( !defined($ws_1) ) { $ws_1 = "*" } @@ -2680,7 +2655,10 @@ sub set_whitespace_flags { if ( !defined($ws_4) ) { $ws_4 = "*" } print STDOUT "NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; - }; + + # reset for next pass + $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef; + } } ## end main loop if ( $rOpts->{'tight-secret-operators'} ) { @@ -6182,28 +6160,16 @@ sub respace_tokens { || $rOpts_delete_old_whitespace ) { - my $Kp = $self->K_previous_nonblank($KK); - next unless defined($Kp); - my $token_p = $rLL->[$Kp]->[_TOKEN_]; - my $type_p = $rLL->[$Kp]->[_TYPE_]; - - my ( $token_pp, $type_pp ); - - my $Kpp = $self->K_previous_nonblank($Kp); - if ( defined($Kpp) ) { - $token_pp = $rLL->[$Kpp]->[_TOKEN_]; - $type_pp = $rLL->[$Kpp]->[_TYPE_]; - } - else { - $token_pp = ";"; - $type_pp = ';'; - } my $token_next = $rLL->[$Knext]->[_TOKEN_]; my $type_next = $rLL->[$Knext]->[_TYPE_]; my $do_not_delete = is_essential_whitespace( - $token_pp, $type_pp, $token_p, - $type_p, $token_next, $type_next, + $last_last_nonblank_code_token, + $last_last_nonblank_code_type, + $last_nonblank_code_token, + $last_nonblank_code_type, + $token_next, + $type_next, ); # Note that repeated blanks will get filtered out here @@ -6400,8 +6366,8 @@ sub respace_tokens { { # This looks like a deletable semicolon, but even if a - # semicolon can be deleted it is necessarily best to do so. - # We apply these additional rules for deletion: + # semicolon can be deleted it is not necessarily best to do + # so. We apply these additional rules for deletion: # - Always ok to delete a ';' at the end of a line # - Never delete a ';' before a '#' because it would # promote it to a block comment. @@ -11388,7 +11354,7 @@ EOM $self->set_forced_breakpoint($max_index_to_go); } else { - $self->end_batch(); + $self->end_batch() if ( $max_index_to_go >= 0 ); } } } @@ -12503,7 +12469,7 @@ sub compare_indentation_levels { if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); Fault( -"Bad call to forced breakpoint from $a $b $c ; called with i=$i\n" +"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n" ); } return; @@ -12540,6 +12506,8 @@ EOM } print STDOUT $msg; }; + + return; } sub set_forced_breakpoint_AFTER { diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm index c986c17a..0e6796a1 100644 --- a/lib/Perl/Tidy/Tokenizer.pm +++ b/lib/Perl/Tidy/Tokenizer.pm @@ -1588,40 +1588,70 @@ sub prepare_for_a_new_file { return; } - sub split_current_pretoken { + sub split_x_pretoken { - # Split the current pretoken at index $i into two parts. - # $numc = number of characters in the first part; must be fewer than - # the number of characters in the pretoken. - # i.e., numc=1 to split off just the first character. - # - # The part we split will become the current token; the remainder will - # be appear as the subsequent token. - - # returns undef if error - # returns new initial token if successful + # Given a token which has been parsed as a word with leading 'x' + # followed by one or more digits, split off the 'x' (which is now known + # to be an operator) and insert the remainder back into the pretoken + # stream with appropriate settings. - my ($numc) = @_; + # Examples: + # $tok => $tok_0 $tok_1 $tok_2 + # 'x10' => 'x' '10' + # 'x10if' => 'x' '10' 'if' + + # return 1 if successful + # return undef if error (shouldn't happen) + + if ( $tok && $tok =~ /^x(\d+)(.*)$/ ) { + + # Split $tok into up to 3 tokens: + my $tok_0 = 'x'; + my $tok_1 = $1; + my $tok_2 = $2 ? $2 : ""; + + my $len_0 = length($tok_0); + my $len_1 = length($tok_1); + my $len_2 = length($tok_2); + + my $pre_type_0 = 'w'; + my $pre_type_1 = 'd'; + my $pre_type_2 = 'w'; + + my $pos_0 = $rtoken_map->[$i]; + my $pos_1 = $pos_0 + $len_0; + my $pos_2 = $pos_1 + $len_1; + + # Splice in the digits + splice @{$rtoken_map}, $i + 1, 0, $pos_1; + splice @{$rtokens}, $i + 1, 0, $tok_1; + splice @{$rtoken_type}, $i + 1, 0, $pre_type_1; + $max_token_index++; + + # Splice in any trailing word + if ($len_2) { + splice @{$rtoken_map}, $i + 2, 0, $pos_2; + splice @{$rtokens}, $i + 2, 0, $tok_2; + splice @{$rtoken_type}, $i + 2, 0, $pre_type_2; + $max_token_index++; + } + + # The first token, 'x', becomes the current token + $tok = $tok_0; + $rtokens->[$i] = $tok; + $type = 'x'; + return 1; + } + else { - # Do not try to split more characters than we have - if ( !$tok || $numc >= length($tok) ) { - my $len = length($tok); + # Shouldn't get here if (DEVEL_MODE) { Die(<= len=$len at token='$tok' +Bad arg '$tok' passed to sub split_x_pretoken(); please fix EOM } - return; } - my $tok_new = substr( $tok, 0, $numc ); - my $new_pos = $rtoken_map->[$i] + $numc; - splice @{$rtoken_map}, $i + 1, 0, $new_pos; - splice @{$rtokens}, $i + 1, 0, substr( $tok, $numc ); - splice @{$rtoken_type}, $i + 1, 0, 'd'; - $tok = $tok_new; - $rtokens->[$i] = $tok_new; - $max_token_index++; - return $tok_new; + return; } sub get_indentation_level { @@ -3811,9 +3841,15 @@ EOM # a key with 18 a's. But something like # push @array, a x18; # is a syntax error. - if ( $expecting == OPERATOR && $tok =~ /^x\d+$/ ) { + if ( + $expecting == OPERATOR + && substr( $tok, 0, 1 ) eq 'x' + && ( length($tok) == 1 + || substr( $tok, 1, 1 ) =~ /^\d/ ) + ) + { $type = 'n'; - if ( split_current_pretoken(1) ) { + if ( split_x_pretoken() ) { $type = 'x'; } } @@ -3887,12 +3923,15 @@ EOM } # handle operator x (now we know it isn't $x=) - if ( $expecting == OPERATOR + if ( + $expecting == OPERATOR && substr( $tok, 0, 1 ) eq 'x' - && $tok =~ /^x\d*$/ ) + && ( length($tok) == 1 + || substr( $tok, 1, 1 ) =~ /^\d/ ) + ) { - if ( $tok eq 'x' ) { + if ( $tok eq 'x' ) { if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= $tok = 'x='; $type = $tok; @@ -3907,16 +3946,9 @@ EOM # Split a pretoken like 'x10' into 'x' and '10'. # Note: In previous versions of perltidy it was marked # as a number, $type = 'n', and fixed downstream by the - # Formatter. Note that there can still be trouble if - # the remaining token is not all digits; for example - # $snake_says = 'hi' . 's' x2if (1); which gives a - # pretoken 'x2if'. This will cause an - # error message and require that the user insert - # blanks. One way to fix this would be to make a - # leading 'x' followed by a digit a separate pretoken, - # but it does not seem worth the effort. + # Formatter. $type = 'n'; - if ( split_current_pretoken(1) ) { + if ( split_x_pretoken() ) { $type = 'x'; } }