improve tokenizer efficiency

author Steve Hancock <perltidy@users.sourceforge.net>

Fri, 22 Sep 2023 03:33:11 +0000 (20:33 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Fri, 22 Sep 2023 03:33:11 +0000 (20:33 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Fri, 22 Sep 2023 03:33:11 +0000 (20:33 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Fri, 22 Sep 2023 03:33:11 +0000 (20:33 -0700)
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm

index 6880b2ad00a0f771982d3943f19ae206bfebd633..8b47cedfe74c03bb4496dea1dd288f13b55d586c 100644 (file)
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -3480,25 +3480,29 @@ sub generate_options {
      $add_option->( 'stack-opening-hash-brace',                'sohb',  '!' );
      $add_option->( 'stack-opening-paren',                     'sop',   '!' );
      $add_option->( 'stack-opening-square-bracket',            'sosb',  '!' );
-    $add_option->( 'vertical-tightness',                      'vt',    '=i' );
-    $add_option->( 'vertical-tightness-closing',              'vtc',   '=i' );
-    $add_option->( 'want-break-after',                        'wba',   '=s' );
-    $add_option->( 'want-break-before',                       'wbb',   '=s' );
-    $add_option->( 'break-after-all-operators',               'baao',  '!' );
-    $add_option->( 'break-before-all-operators',              'bbao',  '!' );
-    $add_option->( 'keep-interior-semicolons',                'kis',   '!' );
-    $add_option->( 'one-line-block-semicolons',               'olbs',  '=i' );
-    $add_option->( 'one-line-block-nesting',                  'olbn',  '=i' );
-    $add_option->( 'one-line-block-exclusion-list',           'olbxl', '=s' );
-    $add_option->( 'break-before-hash-brace',                 'bbhb',  '=i' );
-    $add_option->( 'break-before-hash-brace-and-indent',      'bbhbi', '=i' );
-    $add_option->( 'break-before-square-bracket',             'bbsb',  '=i' );
-    $add_option->( 'break-before-square-bracket-and-indent',  'bbsbi', '=i' );
-    $add_option->( 'break-before-paren',                      'bbp',   '=i' );
-    $add_option->( 'break-before-paren-and-indent',           'bbpi',  '=i' );
-    $add_option->( 'brace-left-list',                         'bll',   '=s' );
-    $add_option->( 'brace-left-exclusion-list',               'blxl',  '=s' );
-    $add_option->( 'break-after-labels',                      'bal',   '=i' );
+
+    # FIXME: --vt and --vtc are actually expansions now, so these two lines
+    # should eventually be removed.
+    $add_option->( 'vertical-tightness',         'vt',  '=i' );
+    $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
+
+    $add_option->( 'want-break-after',                       'wba',   '=s' );
+    $add_option->( 'want-break-before',                      'wbb',   '=s' );
+    $add_option->( 'break-after-all-operators',              'baao',  '!' );
+    $add_option->( 'break-before-all-operators',             'bbao',  '!' );
+    $add_option->( 'keep-interior-semicolons',               'kis',   '!' );
+    $add_option->( 'one-line-block-semicolons',              'olbs',  '=i' );
+    $add_option->( 'one-line-block-nesting',                 'olbn',  '=i' );
+    $add_option->( 'one-line-block-exclusion-list',          'olbxl', '=s' );
+    $add_option->( 'break-before-hash-brace',                'bbhb',  '=i' );
+    $add_option->( 'break-before-hash-brace-and-indent',     'bbhbi', '=i' );
+    $add_option->( 'break-before-square-bracket',            'bbsb',  '=i' );
+    $add_option->( 'break-before-square-bracket-and-indent', 'bbsbi', '=i' );
+    $add_option->( 'break-before-paren',                     'bbp',   '=i' );
+    $add_option->( 'break-before-paren-and-indent',          'bbpi',  '=i' );
+    $add_option->( 'brace-left-list',                        'bll',   '=s' );
+    $add_option->( 'brace-left-exclusion-list',              'blxl',  '=s' );
+    $add_option->( 'break-after-labels',                     'bal',   '=i' );
  
      # This was an experiment mentioned in git #78, originally named -bopl. I
      # expanded it to also open logical blocks, based on git discussion #100,
diff --git a/lib/Perl/Tidy/Formatter.pm b/lib/Perl/Tidy/Formatter.pm

index 95e866ec7736827c61ebe2aea9a98ad5427a04cb..c4f288008d7a01570eeef1a64990ca7d47a98c37 100644 (file)
--- a/lib/Perl/Tidy/Formatter.pm
+++ b/lib/Perl/Tidy/Formatter.pm
@@ -6941,7 +6941,7 @@ sub follow_if_chain {
          push @seqno_list, $seqno;
  
          # Update info for this block
-        my $block_type = $rblock_type_of_seqno->{$seqno};
+        $block_type = $rblock_type_of_seqno->{$seqno};
          if ( $block_type eq 'elsif' ) { $elsif_count++ }
          my $item = $rlevel_info->{$seqno};
          if ( defined($item) ) {
@@ -7014,7 +7014,7 @@ sub follow_if_chain {
      }
  
      # check count
-    return unless ( $elsif_count >= $elsif_count_min );
+    return if ( $elsif_count < $elsif_count_min );
  
      # Store the chain
      my $K_opening = $K_opening_container->{$seqno_if};
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index 63005061b54d8fff7414490270638675b6d6b842..9275bb38e7b4953f543b7697d6972023e3cd1bb2 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -120,6 +120,7 @@ my (
      %is_keyword,
      %is_my_our_state,
      %is_package,
+    %matching_end_token,
  
      # INITIALIZER: sub check_options
      $code_skipping_pattern_begin,
@@ -5609,7 +5610,11 @@ EOM
  
          $self->[_in_quote_] = $in_quote;
          $self->[_quote_target_] =
-          $in_quote ? matching_end_token($quote_character) : EMPTY_STRING;
+            $in_quote
+          ? $matching_end_token{$quote_character}
+              ? $matching_end_token{$quote_character}
+              : $quote_character
+          : EMPTY_STRING;
          $self->[_rhere_target_list_] = $rhere_target_list;
  
          return;
@@ -9781,6 +9786,17 @@ sub do_quote {
      );
  } ## end sub do_quote
  
+# Some possible non-word quote delimiters, for preliminary checking
+my %is_punct_char;
+
+BEGIN {
+
+    my @q = qw# / " ' { } ( ) [ ] < > ; + - * | % ! x ~ = ? : . ^ & #;
+    push @q, '#';
+    push @q, ',';
+    @is_punct_char{@q} = (1) x scalar(@q);
+}
+
  sub follow_quoted_string {
  
      # scan for a specific token, skipping escaped characters
@@ -9820,12 +9836,20 @@ sub follow_quoted_string {
  "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
      };
  
-    # get the corresponding end token
-    if ( $beginning_tok !~ /^\s*$/ ) {
-        $end_tok = matching_end_token($beginning_tok);
+    # for a non-blank token, get the corresponding end token
+    if (
+        $is_punct_char{$beginning_tok}
+        || ( length($beginning_tok)
+            && $beginning_tok !~ /^\s+$/ )
+      )
+    {
+        $end_tok =
+            $matching_end_token{$beginning_tok}
+          ? $matching_end_token{$beginning_tok}
+          : $beginning_tok;
      }
  
-    # a blank token means we must find and use the first non-blank one
+    # for a blank token, find and use the first non-blank one
      else {
          my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
  
@@ -9847,7 +9871,10 @@ sub follow_quoted_string {
                          $beginning_tok = $tok;
                          $quote_pos     = 0;
                      }
-                    $end_tok     = matching_end_token($beginning_tok);
+                    $end_tok =
+                        $matching_end_token{$beginning_tok}
+                      ? $matching_end_token{$beginning_tok}
+                      : $beginning_tok;
                      $quote_depth = 1;
                      last;
                  }
@@ -9869,7 +9896,7 @@ sub follow_quoted_string {
      # Case 1 (rare): loop for case of alphanumeric quote delimiter..
      # "quote_pos" is the position the current word to begin searching
      #----------------------------------------------------------------
-    if ( $beginning_tok =~ /\w/ ) {
+    if ( !$is_punct_char{$beginning_tok} && $beginning_tok =~ /\w/ ) {
  
          # Note this because it is not recommended practice except
          # for obfuscated perl contests
@@ -10156,29 +10183,6 @@ sub show_tokens {
      return;
  } ## end sub show_tokens
  
-{    ## closure for sub matching end token
-    my %matching_end_token;
-
-    BEGIN {
-        %matching_end_token = (
-            '{' => '}',
-            '(' => ')',
-            '[' => ']',
-            '<' => '>',
-        );
-    } ## end BEGIN
-
-    sub matching_end_token {
-
-        # return closing character for a pattern
-        my $beginning_token = shift;
-        if ( $matching_end_token{$beginning_token} ) {
-            return $matching_end_token{$beginning_token};
-        }
-        return ($beginning_token);
-    } ## end sub matching_end_token
-}
-
  sub dump_token_types {
      my ( $class, $fh ) = @_;
  
@@ -10917,5 +10921,12 @@ BEGIN {
      #  __DATA__ __END__
  
      @is_keyword{@Keywords} = (1) x scalar(@Keywords);
+
+    %matching_end_token = (
+        '{' => '}',
+        '(' => ')',
+        '[' => ']',
+        '<' => '>',
+    );
  } ## end BEGIN
  1;
author	Steve Hancock <perltidy@users.sourceforge.net>
	Fri, 22 Sep 2023 03:33:11 +0000 (20:33 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Fri, 22 Sep 2023 03:33:11 +0000 (20:33 -0700)
lib/Perl/Tidy.pm		patch \| blob \| history
lib/Perl/Tidy/Formatter.pm		patch \| blob \| history
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history