rewrote logic to guess if divide or pattern

author Steve Hancock <perltidy@users.sourceforge.net>

Thu, 15 Oct 2020 03:01:27 +0000 (20:01 -0700)

committer Steve Hancock <perltidy@users.sourceforge.net>

Thu, 15 Oct 2020 03:01:27 +0000 (20:01 -0700)
author Steve Hancock <perltidy@users.sourceforge.net>
Thu, 15 Oct 2020 03:01:27 +0000 (20:01 -0700)
committer Steve Hancock <perltidy@users.sourceforge.net>
Thu, 15 Oct 2020 03:01:27 +0000 (20:01 -0700)
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index d1a988989176fce2587b502b2f6580ef5e574e5b..568515239524abcd495cd7e6b68a86efd0b3d133 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -5403,6 +5403,20 @@ sub guess_if_pattern_or_division {
          $i = $ibeg + 1;
          my $next_token = $rtokens->[$i];    # first token after slash
  
+        # One of the things we can look at is the spacing around the slash.
+        # There # are four possible spacings around the first slash:
+        #
+        #     return pi/two;#/;     -/-
+        #     return pi/ two;#/;    -/+
+        #     return pi / two;#/;   +/+
+        #     return pi /two;#/;    +/-   <-- possible pattern
+        #
+        # Spacing rule: a space before the slash but not after the slash
+        # usually indicates a pattern.  We can use this to break ties. 
+       
+        my $is_pattern_by_spacing =
+          ( $i > 1 && $next_token ne ' ' && $rtokens->[ $i - 2 ] eq ' ' );
+
          # look for a possible ending / on this line..
          my $in_quote        = 1;
          my $quote_depth     = 0;
@@ -5418,50 +5432,89 @@ sub guess_if_pattern_or_division {
  
          if ($in_quote) {
  
-            # we didn't find an ending / on this line,
-            # so we bias towards division
+            # we didn't find an ending / on this line, so we bias towards division
              if ( $divide_expected >= 0 ) {
                  $is_pattern = 0;
                  $msg .= "division (no ending / on this line)\n";
              }
              else {
+
+                # going down the rabbit hole...
                  $msg        = "multi-line pattern (division not possible)\n";
                  $is_pattern = 1;
              }
-
          }
  
-        # we found an ending /, so we bias towards a pattern
+        # we found an ending /, so we bias slightly towards a pattern
          else {
  
-            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
+            my $pattern_expected =
+              pattern_expected( $i, $rtokens, $max_token_index );
  
+            if ( $pattern_expected >= 0 ) {
+
+                # pattern looks possible...
                  if ( $divide_expected >= 0 ) {
  
-                    if ( $i - $ibeg > 60 ) {
-                        $msg .= "division (matching / too distant)\n";
+                    # Both pattern and divide can work here...
+
+                    # A very common bare word in math expressions is 'pi'
+                    if ( $last_nonblank_token eq 'pi' ) {
+                        $msg .= "division (pattern works too but saw 'pi')\n";
                          $is_pattern = 0;
                      }
-                    else {
-                        $msg .= "pattern (but division possible too)\n";
+
+                    # A very common bare word in pattern expressions is 'ok'
+                    elsif ( $last_nonblank_token eq 'ok' ) {
+                        $msg .= "pattern (division works too but saw 'ok')\n";
+                        $is_pattern = 1;
+                    }
+
+                   # If one rule is more definite, use it 
+                    elsif ( $divide_expected > $pattern_expected ) {
+                        $msg .=
+                          "division (more likely based on following tokens)\n";
+                        $is_pattern = 0;
+                    }
+
+                    # otherwise, use the spacing rule
+                    elsif ($is_pattern_by_spacing) {
+                        $msg .=
+"pattern (guess on spacing, but division possible too)\n";
                          $is_pattern = 1;
                      }
+                    else {
+                        $msg .=
+"division (guess on spacing, but pattern is possible too)\n";
+                        $is_pattern = 0;
+                    }
                  }
+
+                # divide_expected < 0 means divide can not work here
                  else {
                      $is_pattern = 1;
                      $msg .= "pattern (division not possible)\n";
                  }
              }
+
+            # pattern does not look possible...
              else {
  
                  if ( $divide_expected >= 0 ) {
                      $is_pattern = 0;
                      $msg .= "division (pattern not possible)\n";
                  }
+
+                # Neither pattern nor divide look possible...go by spacing
                  else {
-                    $is_pattern = 1;
-                    $msg .=
-                      "pattern (uncertain, but division would not work here)\n";
+                    if ($is_pattern_by_spacing) {
+                        $msg .= "pattern (guess on spacing)\n";
+                        $is_pattern = 1;
+                    }
+                    else {
+                        $msg .= "division (guess on spacing)\n";
+                        $is_pattern = 0;
+                    }
                  }
              }
          }
@@ -6885,16 +6938,29 @@ sub find_angle_operator_termination {
                  report_possible_bug();
              }
  
+            # count blanks on inside of brackets
+            my $blank_count = 0;
+            $blank_count++ if ( $str =~ /<\s+/ );
+            $blank_count++ if ( $str =~ /\s+>/ );
+
              # Now let's see where we stand....
              # OK if math op not possible
              if ( $expecting == TERM ) {
              }
  
-            # OK if there are no more than 2 pre-tokens inside
+            # OK if there are no more than 2 non-blank pre-tokens inside
              # (not possible to write 2 token math between < and >)
              # This catches most common cases
-            elsif ( $i <= $i_beg + 3 ) {
-                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+            elsif ( $i <= $i_beg + 3 + $blank_count ) {
+
+                # No longer any need to document this common case
+                ## write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+            }
+
+            # OK if there is some kind of identifier inside
+            #   print $fh <tvg::INPUT>;
+            elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) {
+                write_diagnostics("ANGLE (contains identifier): $str\n");
              }
  
              # Not sure..
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod

index de3a5b3e384dd1a15f8f4a130d31e58deea85404..55a1a093cc2b2b657771266c26094da52522d9da 100644 (file)
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -1,5 +1,17 @@
  =head1 Issues fixed after release 20201001
  
+=item b<improve guess for pattern or division>
+
+The following line caused a tokenization error in which the two slashes
+were parsed as a pattern.  
+
+   my $masksize = ceil( Opcode::opcodes / 8 );    # /
+
+This problem was discovered in random testing.  When a slash follows a bareword
+whose prototype is not known to perltidy, it has to guess whether the slash
+starts a pattern or is a division.  The guessing logic was rewritten and 
+improved.
+
  =item b<fix -bos to keep isolated semicolon breaks after block braces>
  
  The flag B<-bos>,  or B<--break-at-old-semicolon-breakpoints>, keeps breaks
author	Steve Hancock <perltidy@users.sourceforge.net>
	Thu, 15 Oct 2020 03:01:27 +0000 (20:01 -0700)
committer	Steve Hancock <perltidy@users.sourceforge.net>
	Thu, 15 Oct 2020 03:01:27 +0000 (20:01 -0700)
lib/Perl/Tidy/Tokenizer.pm		patch \| blob \| history
local-docs/BugLog.pod		patch \| blob \| history