From afebe2f5529492eccfdde549ec4f91497001522f Mon Sep 17 00:00:00 2001
From: Steve Hancock <perltidy@users.sourceforge.net>
Date: Wed, 14 Oct 2020 20:01:27 -0700
Subject: [PATCH] rewrote logic to guess if divide or pattern

---
 lib/Perl/Tidy/Tokenizer.pm | 96 ++++++++++++++++++++++++++++++++------
 local-docs/BugLog.pod      | 12 +++++
 2 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm
index d1a98898..56851523 100644
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -5403,6 +5403,20 @@ sub guess_if_pattern_or_division {
         $i = $ibeg + 1;
         my $next_token = $rtokens->[$i];    # first token after slash
 
+        # One of the things we can look at is the spacing around the slash.
+        # There # are four possible spacings around the first slash:
+        #
+        #     return pi/two;#/;     -/-
+        #     return pi/ two;#/;    -/+
+        #     return pi / two;#/;   +/+
+        #     return pi /two;#/;    +/-   <-- possible pattern
+        #
+        # Spacing rule: a space before the slash but not after the slash
+        # usually indicates a pattern.  We can use this to break ties. 
+       
+        my $is_pattern_by_spacing =
+          ( $i > 1 && $next_token ne ' ' && $rtokens->[ $i - 2 ] eq ' ' );
+
         # look for a possible ending / on this line..
         my $in_quote        = 1;
         my $quote_depth     = 0;
@@ -5418,50 +5432,89 @@ sub guess_if_pattern_or_division {
 
         if ($in_quote) {
 
-            # we didn't find an ending / on this line,
-            # so we bias towards division
+            # we didn't find an ending / on this line, so we bias towards division
             if ( $divide_expected >= 0 ) {
                 $is_pattern = 0;
                 $msg .= "division (no ending / on this line)\n";
             }
             else {
+
+                # going down the rabbit hole...
                 $msg        = "multi-line pattern (division not possible)\n";
                 $is_pattern = 1;
             }
-
         }
 
-        # we found an ending /, so we bias towards a pattern
+        # we found an ending /, so we bias slightly towards a pattern
         else {
 
-            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
+            my $pattern_expected =
+              pattern_expected( $i, $rtokens, $max_token_index );
 
+            if ( $pattern_expected >= 0 ) {
+
+                # pattern looks possible...
                 if ( $divide_expected >= 0 ) {
 
-                    if ( $i - $ibeg > 60 ) {
-                        $msg .= "division (matching / too distant)\n";
+                    # Both pattern and divide can work here...
+
+                    # A very common bare word in math expressions is 'pi'
+                    if ( $last_nonblank_token eq 'pi' ) {
+                        $msg .= "division (pattern works too but saw 'pi')\n";
                         $is_pattern = 0;
                     }
-                    else {
-                        $msg .= "pattern (but division possible too)\n";
+
+                    # A very common bare word in pattern expressions is 'ok'
+                    elsif ( $last_nonblank_token eq 'ok' ) {
+                        $msg .= "pattern (division works too but saw 'ok')\n";
+                        $is_pattern = 1;
+                    }
+
+		    # If one rule is more definite, use it 
+                    elsif ( $divide_expected > $pattern_expected ) {
+                        $msg .=
+                          "division (more likely based on following tokens)\n";
+                        $is_pattern = 0;
+                    }
+
+                    # otherwise, use the spacing rule
+                    elsif ($is_pattern_by_spacing) {
+                        $msg .=
+"pattern (guess on spacing, but division possible too)\n";
                         $is_pattern = 1;
                     }
+                    else {
+                        $msg .=
+"division (guess on spacing, but pattern is possible too)\n";
+                        $is_pattern = 0;
+                    }
                 }
+
+                # divide_expected < 0 means divide can not work here
                 else {
                     $is_pattern = 1;
                     $msg .= "pattern (division not possible)\n";
                 }
             }
+
+            # pattern does not look possible...
             else {
 
                 if ( $divide_expected >= 0 ) {
                     $is_pattern = 0;
                     $msg .= "division (pattern not possible)\n";
                 }
+
+                # Neither pattern nor divide look possible...go by spacing
                 else {
-                    $is_pattern = 1;
-                    $msg .=
-                      "pattern (uncertain, but division would not work here)\n";
+                    if ($is_pattern_by_spacing) {
+                        $msg .= "pattern (guess on spacing)\n";
+                        $is_pattern = 1;
+                    }
+                    else {
+                        $msg .= "division (guess on spacing)\n";
+                        $is_pattern = 0;
+                    }
                 }
             }
         }
@@ -6885,16 +6938,29 @@ sub find_angle_operator_termination {
                 report_possible_bug();
             }
 
+            # count blanks on inside of brackets
+            my $blank_count = 0;
+            $blank_count++ if ( $str =~ /<\s+/ );
+            $blank_count++ if ( $str =~ /\s+>/ );
+
             # Now let's see where we stand....
             # OK if math op not possible
             if ( $expecting == TERM ) {
             }
 
-            # OK if there are no more than 2 pre-tokens inside
+            # OK if there are no more than 2 non-blank pre-tokens inside
             # (not possible to write 2 token math between < and >)
             # This catches most common cases
-            elsif ( $i <= $i_beg + 3 ) {
-                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+            elsif ( $i <= $i_beg + 3 + $blank_count ) {
+
+                # No longer any need to document this common case
+                ## write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+            }
+
+            # OK if there is some kind of identifier inside
+            #   print $fh <tvg::INPUT>;
+            elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) {
+                write_diagnostics("ANGLE (contains identifier): $str\n");
             }
 
             # Not sure..
diff --git a/local-docs/BugLog.pod b/local-docs/BugLog.pod
index de3a5b3e..55a1a093 100644
--- a/local-docs/BugLog.pod
+++ b/local-docs/BugLog.pod
@@ -1,5 +1,17 @@
 =head1 Issues fixed after release 20201001
 
+=item b<improve guess for pattern or division>
+
+The following line caused a tokenization error in which the two slashes
+were parsed as a pattern.  
+
+   my $masksize = ceil( Opcode::opcodes / 8 );    # /
+
+This problem was discovered in random testing.  When a slash follows a bareword
+whose prototype is not known to perltidy, it has to guess whether the slash
+starts a pattern or is a division.  The guessing logic was rewritten and 
+improved.
+
 =item b<fix -bos to keep isolated semicolon breaks after block braces>
 
 The flag B<-bos>,  or B<--break-at-old-semicolon-breakpoints>, keeps breaks
-- 
2.39.5