New upstream version 20210717

[perltidy.git] / lib / Perl / Tidy / Tokenizer.pm
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index e1d644a96bb06a3e89949e14a4edf45ff62e1270..c012e47f9d89b0a0c068a375f21c8ece343ced89 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -1,6 +1,6 @@
-########################################################################
+#####################################################################
  #
-# the Perl::Tidy::Tokenizer package is essentially a filter which
+# The Perl::Tidy::Tokenizer package is essentially a filter which
  # reads lines of perl source code from a source object and provides
  # corresponding tokenized lines through its get_line() method.  Lines
  # flow from the source_object to the caller like this:
@@ -14,43 +14,21 @@
  # The Tokenizer returns a reference to a data structure 'line_of_tokens'
  # containing one tokenized line for each call to its get_line() method.
  #
-# WARNING: This is not a real class yet.  Only one tokenizer my be used.
+# WARNING: This is not a real class.  Only one tokenizer my be used.
  #
  ########################################################################
  
  package Perl::Tidy::Tokenizer;
  use strict;
  use warnings;
-our $VERSION = '20200110';
+our $VERSION = '20210717';
  
  use Perl::Tidy::LineBuffer;
-
-BEGIN {
-
-    # Caution: these debug flags produce a lot of output
-    # They should all be 0 except when debugging small scripts
-
-    use constant TOKENIZER_DEBUG_FLAG_EXPECT   => 0;
-    use constant TOKENIZER_DEBUG_FLAG_NSCAN    => 0;
-    use constant TOKENIZER_DEBUG_FLAG_QUOTE    => 0;
-    use constant TOKENIZER_DEBUG_FLAG_SCAN_ID  => 0;
-    use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
-
-    my $debug_warning = sub {
-        print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
-    };
-
-    TOKENIZER_DEBUG_FLAG_EXPECT   && $debug_warning->('EXPECT');
-    TOKENIZER_DEBUG_FLAG_NSCAN    && $debug_warning->('NSCAN');
-    TOKENIZER_DEBUG_FLAG_QUOTE    && $debug_warning->('QUOTE');
-    TOKENIZER_DEBUG_FLAG_SCAN_ID  && $debug_warning->('SCAN_ID');
-    TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
-
-}
-
  use Carp;
  
  # PACKAGE VARIABLES for processing an entire FILE.
+# These must be package variables because most may get localized during
+# processing.  Most are initialized in sub prepare_for_a_new_file.
  use vars qw{
    $tokenizer_self
  
@@ -68,6 +46,7 @@ use vars qw{
    %is_block_function
    %is_block_list_function
    %saw_function_definition
+  %saw_use_module
  
    $brace_depth
    $paren_depth
@@ -93,7 +72,8 @@ use vars qw{
    @starting_line_of_current_depth
  };
  
-# GLOBAL CONSTANTS for routines in this package
+# GLOBAL CONSTANTS for routines in this package,
+# Initialized in a BEGIN block.
  use vars qw{
    %is_indirect_object_taker
    %is_block_operator
@@ -112,12 +92,27 @@ use vars qw{
    @opening_brace_names
    @closing_brace_names
    %is_keyword_taking_list
-  %is_keyword_taking_optional_args
+  %is_keyword_taking_optional_arg
+  %is_keyword_rejecting_slash_as_pattern_delimiter
+  %is_keyword_rejecting_question_as_pattern_delimiter
    %is_q_qq_qw_qx_qr_s_y_tr_m
    %is_sub
    %is_package
+  %is_comma_question_colon
+  %other_line_endings
+  $code_skipping_pattern_begin
+  $code_skipping_pattern_end
  };
  
+# GLOBAL VARIABLES which are constant after being configured by user-supplied
+# parameters.  They remain constant as a file is being processed.
+my (
+
+    $rOpts_code_skipping,
+    $code_skipping_pattern_begin,
+    $code_skipping_pattern_end,
+);
+
  # possible values of operator_expected()
  use constant TERM     => -1;
  use constant UNKNOWN  => 0;
@@ -131,7 +126,68 @@ use constant LIST_CONTEXT    => 1;
  # Maximum number of little messages; probably need not be changed.
  use constant MAX_NAG_MESSAGES => 6;
  
-{
+BEGIN {
+
+    # Array index names for $self
+    my $i = 0;
+    use constant {
+        _rhere_target_list_                  => $i++,
+        _in_here_doc_                        => $i++,
+        _here_doc_target_                    => $i++,
+        _here_quote_character_               => $i++,
+        _in_data_                            => $i++,
+        _in_end_                             => $i++,
+        _in_format_                          => $i++,
+        _in_error_                           => $i++,
+        _in_pod_                             => $i++,
+        _in_skipped_                         => $i++,
+        _in_attribute_list_                  => $i++,
+        _in_quote_                           => $i++,
+        _quote_target_                       => $i++,
+        _line_start_quote_                   => $i++,
+        _starting_level_                     => $i++,
+        _know_starting_level_                => $i++,
+        _tabsize_                            => $i++,
+        _indent_columns_                     => $i++,
+        _look_for_hash_bang_                 => $i++,
+        _trim_qw_                            => $i++,
+        _continuation_indentation_           => $i++,
+        _outdent_labels_                     => $i++,
+        _last_line_number_                   => $i++,
+        _saw_perl_dash_P_                    => $i++,
+        _saw_perl_dash_w_                    => $i++,
+        _saw_use_strict_                     => $i++,
+        _saw_v_string_                       => $i++,
+        _hit_bug_                            => $i++,
+        _look_for_autoloader_                => $i++,
+        _look_for_selfloader_                => $i++,
+        _saw_autoloader_                     => $i++,
+        _saw_selfloader_                     => $i++,
+        _saw_hash_bang_                      => $i++,
+        _saw_end_                            => $i++,
+        _saw_data_                           => $i++,
+        _saw_negative_indentation_           => $i++,
+        _started_tokenizing_                 => $i++,
+        _line_buffer_object_                 => $i++,
+        _debugger_object_                    => $i++,
+        _diagnostics_object_                 => $i++,
+        _logger_object_                      => $i++,
+        _unexpected_error_count_             => $i++,
+        _started_looking_for_here_target_at_ => $i++,
+        _nearly_matched_here_target_at_      => $i++,
+        _line_of_text_                       => $i++,
+        _rlower_case_labels_at_              => $i++,
+        _extended_syntax_                    => $i++,
+        _maximum_level_                      => $i++,
+        _true_brace_error_count_             => $i++,
+        _rOpts_maximum_level_errors_         => $i++,
+        _rOpts_maximum_unexpected_errors_    => $i++,
+        _rOpts_logfile_                      => $i++,
+        _rOpts_                              => $i++,
+    };
+}
+
+{    ## closure for subs to count instances
  
      # methods to count instances
      my $_count = 0;
@@ -146,6 +202,60 @@ sub DESTROY {
      return;
  }
  
+sub AUTOLOAD {
+
+    # Catch any undefined sub calls so that we are sure to get
+    # some diagnostic information.  This sub should never be called
+    # except for a programming error.
+    our $AUTOLOAD;
+    return if ( $AUTOLOAD =~ /\bDESTROY$/ );
+    my ( $pkg, $fname, $lno ) = caller();
+    my $my_package = __PACKAGE__;
+    print STDERR <<EOM;
+======================================================================
+Error detected in package '$my_package', version $VERSION
+Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
+Called from package: '$pkg'  
+Called from File '$fname'  at line '$lno'
+This error is probably due to a recent programming change
+======================================================================
+EOM
+    exit 1;
+}
+
+sub Die {
+    my ($msg) = @_;
+    Perl::Tidy::Die($msg);
+    croak "unexpected return from Perl::Tidy::Die";
+}
+
+sub bad_pattern {
+
+    # See if a pattern will compile. We have to use a string eval here,
+    # but it should be safe because the pattern has been constructed
+    # by this program.
+    my ($pattern) = @_;
+    eval "'##'=~/$pattern/";
+    return $@;
+}
+
+sub make_code_skipping_pattern {
+    my ( $rOpts, $opt_name, $default ) = @_;
+    my $param = $rOpts->{$opt_name};
+    unless ($param) { $param = $default }
+    $param =~ s/^\s*//;    # allow leading spaces to be like format-skipping
+    if ( $param !~ /^#/ ) {
+        Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
+    }
+    my $pattern = '^\s*' . $param . '\b';
+    if ( bad_pattern($pattern) ) {
+        Die(
+"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
+        );
+    }
+    return $pattern;
+}
+
  sub check_options {
  
      # Check Tokenizer parameters
@@ -165,6 +275,12 @@ sub check_options {
              $is_sub{$word} = 1;
          }
      }
+
+    $rOpts_code_skipping = $rOpts->{'code-skipping'};
+    $code_skipping_pattern_begin =
+      make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
+    $code_skipping_pattern_end =
+      make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
      return;
  }
  
@@ -188,90 +304,102 @@ sub new {
          look_for_selfloader  => 1,
          starting_line_number => 1,
          extended_syntax      => 0,
+        rOpts                => {},
      );
      my %args = ( %defaults, @args );
  
      # we are given an object with a get_line() method to supply source lines
      my $source_object = $args{source_object};
+    my $rOpts         = $args{rOpts};
  
      # we create another object with a get_line() and peek_ahead() method
      my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
  
      # Tokenizer state data is as follows:
-    # _rhere_target_list    reference to list of here-doc targets
-    # _here_doc_target      the target string for a here document
-    # _here_quote_character the type of here-doc quoting (" ' ` or none)
-    #                       to determine if interpolation is done
-    # _quote_target         character we seek if chasing a quote
-    # _line_start_quote     line where we started looking for a long quote
-    # _in_here_doc          flag indicating if we are in a here-doc
-    # _in_pod               flag set if we are in pod documentation
-    # _in_error             flag set if we saw severe error (binary in script)
-    # _in_data              flag set if we are in __DATA__ section
-    # _in_end               flag set if we are in __END__ section
-    # _in_format            flag set if we are in a format description
-    # _in_attribute_list    flag telling if we are looking for attributes
-    # _in_quote             flag telling if we are chasing a quote
-    # _starting_level       indentation level of first line
-    # _line_buffer_object   object with get_line() method to supply source code
-    # _diagnostics_object   place to write debugging information
-    # _unexpected_error_count  error count used to limit output
-    # _lower_case_labels_at  line numbers where lower case labels seen
-    # _hit_bug              program bug detected
-    $tokenizer_self = {
-        _rhere_target_list                  => [],
-        _in_here_doc                        => 0,
-        _here_doc_target                    => "",
-        _here_quote_character               => "",
-        _in_data                            => 0,
-        _in_end                             => 0,
-        _in_format                          => 0,
-        _in_error                           => 0,
-        _in_pod                             => 0,
-        _in_attribute_list                  => 0,
-        _in_quote                           => 0,
-        _quote_target                       => "",
-        _line_start_quote                   => -1,
-        _starting_level                     => $args{starting_level},
-        _know_starting_level                => defined( $args{starting_level} ),
-        _tabsize                            => $args{tabsize},
-        _indent_columns                     => $args{indent_columns},
-        _look_for_hash_bang                 => $args{look_for_hash_bang},
-        _trim_qw                            => $args{trim_qw},
-        _continuation_indentation           => $args{continuation_indentation},
-        _outdent_labels                     => $args{outdent_labels},
-        _last_line_number                   => $args{starting_line_number} - 1,
-        _saw_perl_dash_P                    => 0,
-        _saw_perl_dash_w                    => 0,
-        _saw_use_strict                     => 0,
-        _saw_v_string                       => 0,
-        _hit_bug                            => 0,
-        _look_for_autoloader                => $args{look_for_autoloader},
-        _look_for_selfloader                => $args{look_for_selfloader},
-        _saw_autoloader                     => 0,
-        _saw_selfloader                     => 0,
-        _saw_hash_bang                      => 0,
-        _saw_end                            => 0,
-        _saw_data                           => 0,
-        _saw_negative_indentation           => 0,
-        _started_tokenizing                 => 0,
-        _line_buffer_object                 => $line_buffer_object,
-        _debugger_object                    => $args{debugger_object},
-        _diagnostics_object                 => $args{diagnostics_object},
-        _logger_object                      => $args{logger_object},
-        _unexpected_error_count             => 0,
-        _started_looking_for_here_target_at => 0,
-        _nearly_matched_here_target_at      => undef,
-        _line_text                          => "",
-        _rlower_case_labels_at              => undef,
-        _extended_syntax                    => $args{extended_syntax},
-    };
+    # _rhere_target_list_    reference to list of here-doc targets
+    # _here_doc_target_      the target string for a here document
+    # _here_quote_character_ the type of here-doc quoting (" ' ` or none)
+    #                        to determine if interpolation is done
+    # _quote_target_         character we seek if chasing a quote
+    # _line_start_quote_     line where we started looking for a long quote
+    # _in_here_doc_          flag indicating if we are in a here-doc
+    # _in_pod_               flag set if we are in pod documentation
+    # _in_skipped_           flag set if we are in a skipped section
+    # _in_error_             flag set if we saw severe error (binary in script)
+    # _in_data_              flag set if we are in __DATA__ section
+    # _in_end_               flag set if we are in __END__ section
+    # _in_format_            flag set if we are in a format description
+    # _in_attribute_list_    flag telling if we are looking for attributes
+    # _in_quote_             flag telling if we are chasing a quote
+    # _starting_level_       indentation level of first line
+    # _line_buffer_object_   object with get_line() method to supply source code
+    # _diagnostics_object_   place to write debugging information
+    # _unexpected_error_count_ error count used to limit output
+    # _lower_case_labels_at_ line numbers where lower case labels seen
+    # _hit_bug_              program bug detected
+
+    my $self = [];
+    $self->[_rhere_target_list_]        = [];
+    $self->[_in_here_doc_]              = 0;
+    $self->[_here_doc_target_]          = "";
+    $self->[_here_quote_character_]     = "";
+    $self->[_in_data_]                  = 0;
+    $self->[_in_end_]                   = 0;
+    $self->[_in_format_]                = 0;
+    $self->[_in_error_]                 = 0;
+    $self->[_in_pod_]                   = 0;
+    $self->[_in_skipped_]               = 0;
+    $self->[_in_attribute_list_]        = 0;
+    $self->[_in_quote_]                 = 0;
+    $self->[_quote_target_]             = "";
+    $self->[_line_start_quote_]         = -1;
+    $self->[_starting_level_]           = $args{starting_level};
+    $self->[_know_starting_level_]      = defined( $args{starting_level} );
+    $self->[_tabsize_]                  = $args{tabsize};
+    $self->[_indent_columns_]           = $args{indent_columns};
+    $self->[_look_for_hash_bang_]       = $args{look_for_hash_bang};
+    $self->[_trim_qw_]                  = $args{trim_qw};
+    $self->[_continuation_indentation_] = $args{continuation_indentation};
+    $self->[_outdent_labels_]           = $args{outdent_labels};
+    $self->[_last_line_number_]         = $args{starting_line_number} - 1;
+    $self->[_saw_perl_dash_P_]          = 0;
+    $self->[_saw_perl_dash_w_]          = 0;
+    $self->[_saw_use_strict_]           = 0;
+    $self->[_saw_v_string_]             = 0;
+    $self->[_hit_bug_]                  = 0;
+    $self->[_look_for_autoloader_]      = $args{look_for_autoloader};
+    $self->[_look_for_selfloader_]      = $args{look_for_selfloader};
+    $self->[_saw_autoloader_]           = 0;
+    $self->[_saw_selfloader_]           = 0;
+    $self->[_saw_hash_bang_]            = 0;
+    $self->[_saw_end_]                  = 0;
+    $self->[_saw_data_]                 = 0;
+    $self->[_saw_negative_indentation_] = 0;
+    $self->[_started_tokenizing_]       = 0;
+    $self->[_line_buffer_object_]       = $line_buffer_object;
+    $self->[_debugger_object_]          = $args{debugger_object};
+    $self->[_diagnostics_object_]       = $args{diagnostics_object};
+    $self->[_logger_object_]            = $args{logger_object};
+    $self->[_unexpected_error_count_]   = 0;
+    $self->[_started_looking_for_here_target_at_] = 0;
+    $self->[_nearly_matched_here_target_at_]      = undef;
+    $self->[_line_of_text_]                       = "";
+    $self->[_rlower_case_labels_at_]              = undef;
+    $self->[_extended_syntax_]                    = $args{extended_syntax};
+    $self->[_maximum_level_]                      = 0;
+    $self->[_true_brace_error_count_]             = 0;
+    $self->[_rOpts_maximum_level_errors_] = $rOpts->{'maximum-level-errors'};
+    $self->[_rOpts_maximum_unexpected_errors_] =
+      $rOpts->{'maximum-unexpected-errors'};
+    $self->[_rOpts_logfile_] = $rOpts->{'logfile'};
+    $self->[_rOpts_]         = $rOpts;
+    bless $self, $class;
+
+    $tokenizer_self = $self;
  
      prepare_for_a_new_file();
      find_starting_indentation_level();
  
-    bless $tokenizer_self, $class;
-
      # This is not a full class yet, so die if an attempt is made to
      # create more than one object.
  
@@ -280,14 +408,14 @@ sub new {
  "Attempt to create more than 1 object in $class, which is not a true class yet\n";
      }
  
-    return $tokenizer_self;
+    return $self;
  
  }
  
  # interface to Perl::Tidy::Logger routines
  sub warning {
      my $msg           = shift;
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->warning($msg);
      }
@@ -296,7 +424,7 @@ sub warning {
  
  sub complain {
      my $msg           = shift;
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->complain($msg);
      }
@@ -305,7 +433,7 @@ sub complain {
  
  sub write_logfile_entry {
      my $msg           = shift;
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->write_logfile_entry($msg);
      }
@@ -313,7 +441,7 @@ sub write_logfile_entry {
  }
  
  sub interrupt_logfile {
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->interrupt_logfile();
      }
@@ -321,7 +449,7 @@ sub interrupt_logfile {
  }
  
  sub resume_logfile {
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->resume_logfile();
      }
@@ -329,7 +457,7 @@ sub resume_logfile {
  }
  
  sub increment_brace_error {
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->increment_brace_error();
      }
@@ -337,8 +465,8 @@ sub increment_brace_error {
  }
  
  sub report_definite_bug {
-    $tokenizer_self->{_hit_bug} = 1;
-    my $logger_object = $tokenizer_self->{_logger_object};
+    $tokenizer_self->[_hit_bug_] = 1;
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->report_definite_bug();
      }
@@ -347,7 +475,7 @@ sub report_definite_bug {
  
  sub brace_warning {
      my $msg           = shift;
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          $logger_object->brace_warning($msg);
      }
@@ -355,7 +483,7 @@ sub brace_warning {
  }
  
  sub get_saw_brace_error {
-    my $logger_object = $tokenizer_self->{_logger_object};
+    my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
          return $logger_object->get_saw_brace_error();
      }
@@ -365,48 +493,90 @@ sub get_saw_brace_error {
  }
  
  sub get_unexpected_error_count {
-    my ($self) = shift;
-    return $self->{_unexpected_error_count};
+    my ($self) = @_;
+    return $self->[_unexpected_error_count_];
  }
  
  # interface to Perl::Tidy::Diagnostics routines
  sub write_diagnostics {
      my $msg = shift;
-    if ( $tokenizer_self->{_diagnostics_object} ) {
-        $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg);
+    if ( $tokenizer_self->[_diagnostics_object_] ) {
+        $tokenizer_self->[_diagnostics_object_]->write_diagnostics($msg);
      }
      return;
  }
  
+sub get_maximum_level {
+    return $tokenizer_self->[_maximum_level_];
+}
+
  sub report_tokenization_errors {
  
-    my $self         = shift;
-    my $severe_error = $self->{_in_error};
+    my ($self) = @_;
+
+    # Report any tokenization errors and return a flag '$severe_error'.
+    # Set $severe_error = 1 if the tokenizations errors are so severe that
+    # the formatter should not attempt to format the file. Instead, it will
+    # just output the file verbatim.
+
+    # set severe error flag if tokenizer has encountered file reading problems
+    # (i.e. unexpected binary characters)
+    my $severe_error = $self->[_in_error_];
+
+    my $maxle = $self->[_rOpts_maximum_level_errors_];
+    my $maxue = $self->[_rOpts_maximum_unexpected_errors_];
+    $maxle = 1 unless defined($maxle);
+    $maxue = 0 unless defined($maxue);
  
      my $level = get_indentation_level();
-    if ( $level != $tokenizer_self->{_starting_level} ) {
+    if ( $level != $tokenizer_self->[_starting_level_] ) {
          warning("final indentation level: $level\n");
+        my $level_diff = $tokenizer_self->[_starting_level_] - $level;
+        if ( $level_diff < 0 ) { $level_diff = -$level_diff }
+
+        # Set severe error flag if the level error is greater than 1.
+        # The formatter can function for any level error but it is probably
+        # best not to attempt formatting for a high level error.
+        if ( $maxle >= 0 && $level_diff > $maxle ) {
+            $severe_error = 1;
+            warning(<<EOM);
+Formatting will be skipped because level error '$level_diff' exceeds -maxle=$maxle; use -maxle=-1 to force formatting
+EOM
+        }
      }
  
      check_final_nesting_depths();
  
-    if ( $tokenizer_self->{_look_for_hash_bang}
-        && !$tokenizer_self->{_saw_hash_bang} )
+    # Likewise, large numbers of brace errors usually indicate non-perl
+    # scirpts, so set the severe error flag at a low number.  This is similar
+    # to the level check, but different because braces may balance but be
+    # incorrectly interlaced.
+    if ( $tokenizer_self->[_true_brace_error_count_] > 2 ) {
+        $severe_error = 1;
+    }
+
+    if ( $tokenizer_self->[_look_for_hash_bang_]
+        && !$tokenizer_self->[_saw_hash_bang_] )
      {
          warning(
              "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
      }
  
-    if ( $tokenizer_self->{_in_format} ) {
+    if ( $tokenizer_self->[_in_format_] ) {
          warning("hit EOF while in format description\n");
      }
  
-    if ( $tokenizer_self->{_in_pod} ) {
+    if ( $tokenizer_self->[_in_skipped_] ) {
+        write_logfile_entry(
+            "hit EOF while in lines skipped with --code-skipping\n");
+    }
+
+    if ( $tokenizer_self->[_in_pod_] ) {
  
          # Just write log entry if this is after __END__ or __DATA__
          # because this happens to often, and it is not likely to be
          # a parsing error.
-        if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
+        if ( $tokenizer_self->[_saw_data_] || $tokenizer_self->[_saw_end_] ) {
              write_logfile_entry(
  "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
              );
@@ -420,11 +590,11 @@ sub report_tokenization_errors {
  
      }
  
-    if ( $tokenizer_self->{_in_here_doc} ) {
+    if ( $tokenizer_self->[_in_here_doc_] ) {
          $severe_error = 1;
-        my $here_doc_target = $tokenizer_self->{_here_doc_target};
+        my $here_doc_target = $tokenizer_self->[_here_doc_target_];
          my $started_looking_for_here_target_at =
-          $tokenizer_self->{_started_looking_for_here_target_at};
+          $tokenizer_self->[_started_looking_for_here_target_at_];
          if ($here_doc_target) {
              warning(
  "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
@@ -436,7 +606,7 @@ sub report_tokenization_errors {
              );
          }
          my $nearly_matched_here_target_at =
-          $tokenizer_self->{_nearly_matched_here_target_at};
+          $tokenizer_self->[_nearly_matched_here_target_at_];
          if ($nearly_matched_here_target_at) {
              warning(
  "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
@@ -444,12 +614,13 @@ sub report_tokenization_errors {
          }
      }
  
-    if ( $tokenizer_self->{_in_quote} ) {
+    # Something is seriously wrong if we ended inside a quote
+    if ( $tokenizer_self->[_in_quote_] ) {
          $severe_error = 1;
-        my $line_start_quote = $tokenizer_self->{_line_start_quote};
-        my $quote_target     = $tokenizer_self->{_quote_target};
+        my $line_start_quote = $tokenizer_self->[_line_start_quote_];
+        my $quote_target     = $tokenizer_self->[_quote_target_];
          my $what =
-          ( $tokenizer_self->{_in_attribute_list} )
+          ( $tokenizer_self->[_in_attribute_list_] )
            ? "attribute list"
            : "quote/pattern";
          warning(
@@ -457,32 +628,25 @@ sub report_tokenization_errors {
          );
      }
  
-    if ( $tokenizer_self->{_hit_bug} ) {
+    if ( $tokenizer_self->[_hit_bug_] ) {
          $severe_error = 1;
      }
  
-    my $logger_object = $tokenizer_self->{_logger_object};
-
-# TODO: eventually may want to activate this to cause file to be output verbatim
-    if (0) {
-
-        # Set the severe error for a fairly high warning count because
-        # some of the warnings do not harm formatting, such as duplicate
-        # sub names.
-        my $warning_count = $logger_object->{_warning_count};
-        if ( $warning_count > 50 ) {
-            $severe_error = 1;
-        }
-
-        # Brace errors are significant, so set the severe error flag at
-        # a low number.
-        my $saw_brace_error = $logger_object->{_saw_brace_error};
-        if ( $saw_brace_error > 2 ) {
-            $severe_error = 1;
-        }
+    # Multiple "unexpected" type tokenization errors usually indicate parsing
+    # non-perl scripts, or that something is seriously wrong, so we should
+    # avoid formatting them.  This can happen for example if we run perltidy on
+    # a shell script or an html file.  But unfortunately this check can
+    # interfere with some extended syntaxes, such as RPerl, so it has to be off
+    # by default.
+    my $ue_count = $tokenizer_self->[_unexpected_error_count_];
+    if ( $maxue > 0 && $ue_count > $maxue ) {
+        warning(<<EOM);
+Formatting will be skipped since unexpected token count = $ue_count > -maxue=$maxue; use -maxue=0 to force formatting
+EOM
+        $severe_error = 1;
      }
  
-    unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
+    unless ( $tokenizer_self->[_saw_perl_dash_w_] ) {
          if ( $] < 5.006 ) {
              write_logfile_entry("Suggest including '-w parameter'\n");
          }
@@ -491,19 +655,19 @@ sub report_tokenization_errors {
          }
      }
  
-    if ( $tokenizer_self->{_saw_perl_dash_P} ) {
+    if ( $tokenizer_self->[_saw_perl_dash_P_] ) {
          write_logfile_entry("Use of -P parameter for defines is discouraged\n");
      }
  
-    unless ( $tokenizer_self->{_saw_use_strict} ) {
+    unless ( $tokenizer_self->[_saw_use_strict_] ) {
          write_logfile_entry("Suggest including 'use strict;'\n");
      }
  
      # it is suggested that labels have at least one upper case character
      # for legibility and to avoid code breakage as new keywords are introduced
-    if ( $tokenizer_self->{_rlower_case_labels_at} ) {
+    if ( $tokenizer_self->[_rlower_case_labels_at_] ) {
          my @lower_case_labels_at =
-          @{ $tokenizer_self->{_rlower_case_labels_at} };
+          @{ $tokenizer_self->[_rlower_case_labels_at_] };
          write_logfile_entry(
              "Suggest using upper case characters in label(s)\n");
          local $" = ')(';
@@ -516,8 +680,9 @@ sub report_v_string {
  
      # warn if this version can't handle v-strings
      my $tok = shift;
-    unless ( $tokenizer_self->{_saw_v_string} ) {
-        $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
+    unless ( $tokenizer_self->[_saw_v_string_] ) {
+        $tokenizer_self->[_saw_v_string_] =
+          $tokenizer_self->[_last_line_number_];
      }
      if ( $] < 5.006 ) {
          warning(
@@ -528,7 +693,7 @@ sub report_v_string {
  }
  
  sub get_input_line_number {
-    return $tokenizer_self->{_last_line_number};
+    return $tokenizer_self->[_last_line_number_];
  }
  
  # returns the next tokenized line
@@ -539,27 +704,35 @@ sub get_line {
      # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
      # $square_bracket_depth, $paren_depth
  
-    my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
-    $tokenizer_self->{_line_text} = $input_line;
+    my $input_line = $tokenizer_self->[_line_buffer_object_]->get_line();
+    $tokenizer_self->[_line_of_text_] = $input_line;
  
      return unless ($input_line);
  
-    my $input_line_number = ++$tokenizer_self->{_last_line_number};
+    my $input_line_number = ++$tokenizer_self->[_last_line_number_];
+
+    my $write_logfile_entry = sub {
+        my ($msg) = @_;
+        write_logfile_entry("Line $input_line_number: $msg");
+    };
  
      # Find and remove what characters terminate this line, including any
      # control r
      my $input_line_separator = "";
      if ( chomp($input_line) ) { $input_line_separator = $/ }
  
-    # TODO: what other characters should be included here?
-    if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
-        $input_line_separator = $2 . $input_line_separator;
+    # The first test here very significantly speeds things up, but be sure to
+    # keep the regex and hash %other_line_endings the same.
+    if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
+        if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
+            $input_line_separator = $2 . $input_line_separator;
+        }
      }
  
      # for backwards compatibility we keep the line text terminated with
      # a newline character
      $input_line .= "\n";
-    $tokenizer_self->{_line_text} = $input_line;    # update
+    $tokenizer_self->[_line_of_text_] = $input_line;    # update
  
      # create a data structure describing this line which will be
      # returned to the caller.
@@ -592,32 +765,32 @@ sub get_line {
          _line_type                 => 'EOF',
          _line_text                 => $input_line,
          _line_number               => $input_line_number,
-        _rtoken_type               => undef,
-        _rtokens                   => undef,
-        _rlevels                   => undef,
-        _rslevels                  => undef,
-        _rblock_type               => undef,
-        _rcontainer_type           => undef,
-        _rcontainer_environment    => undef,
-        _rtype_sequence            => undef,
-        _rnesting_tokens           => undef,
-        _rci_levels                => undef,
-        _rnesting_blocks           => undef,
          _guessed_indentation_level => 0,
-        _starting_in_quote    => 0,                    # to be set by subroutine
-        _ending_in_quote      => 0,
-        _curly_brace_depth    => $brace_depth,
-        _square_bracket_depth => $square_bracket_depth,
-        _paren_depth          => $paren_depth,
-        _quote_character      => '',
+        _curly_brace_depth         => $brace_depth,
+        _square_bracket_depth      => $square_bracket_depth,
+        _paren_depth               => $paren_depth,
+        _quote_character           => '',
+##        _rtoken_type               => undef,
+##        _rtokens                   => undef,
+##        _rlevels                   => undef,
+##        _rslevels                  => undef,
+##        _rblock_type               => undef,
+##        _rcontainer_type           => undef,
+##        _rcontainer_environment    => undef,
+##        _rtype_sequence            => undef,
+##        _rnesting_tokens           => undef,
+##        _rci_levels                => undef,
+##        _rnesting_blocks           => undef,
+##        _starting_in_quote         => 0,
+##        _ending_in_quote           => 0,
      };
  
      # must print line unchanged if we are in a here document
-    if ( $tokenizer_self->{_in_here_doc} ) {
+    if ( $tokenizer_self->[_in_here_doc_] ) {
  
          $line_of_tokens->{_line_type} = 'HERE';
-        my $here_doc_target      = $tokenizer_self->{_here_doc_target};
-        my $here_quote_character = $tokenizer_self->{_here_quote_character};
+        my $here_doc_target      = $tokenizer_self->[_here_doc_target_];
+        my $here_quote_character = $tokenizer_self->[_here_quote_character_];
          my $candidate_target     = $input_line;
          chomp $candidate_target;
  
@@ -627,27 +800,27 @@ sub get_line {
              $candidate_target =~ s/^\s*//;
          }
          if ( $candidate_target eq $here_doc_target ) {
-            $tokenizer_self->{_nearly_matched_here_target_at} = undef;
-            $line_of_tokens->{_line_type}                     = 'HERE_END';
-            write_logfile_entry("Exiting HERE document $here_doc_target\n");
+            $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
+            $line_of_tokens->{_line_type} = 'HERE_END';
+            $write_logfile_entry->("Exiting HERE document $here_doc_target\n");
  
-            my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
+            my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
              if ( @{$rhere_target_list} ) {  # there can be multiple here targets
                  ( $here_doc_target, $here_quote_character ) =
                    @{ shift @{$rhere_target_list} };
-                $tokenizer_self->{_here_doc_target} = $here_doc_target;
-                $tokenizer_self->{_here_quote_character} =
+                $tokenizer_self->[_here_doc_target_] = $here_doc_target;
+                $tokenizer_self->[_here_quote_character_] =
                    $here_quote_character;
-                write_logfile_entry(
+                $write_logfile_entry->(
                      "Entering HERE document $here_doc_target\n");
-                $tokenizer_self->{_nearly_matched_here_target_at} = undef;
-                $tokenizer_self->{_started_looking_for_here_target_at} =
+                $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
+                $tokenizer_self->[_started_looking_for_here_target_at_] =
                    $input_line_number;
              }
              else {
-                $tokenizer_self->{_in_here_doc}          = 0;
-                $tokenizer_self->{_here_doc_target}      = "";
-                $tokenizer_self->{_here_quote_character} = "";
+                $tokenizer_self->[_in_here_doc_]          = 0;
+                $tokenizer_self->[_here_doc_target_]      = "";
+                $tokenizer_self->[_here_quote_character_] = "";
              }
          }
  
@@ -657,37 +830,49 @@ sub get_line {
              $candidate_target =~ s/\s*$//;
              $candidate_target =~ s/^\s*//;
              if ( $candidate_target eq $here_doc_target ) {
-                $tokenizer_self->{_nearly_matched_here_target_at} =
+                $tokenizer_self->[_nearly_matched_here_target_at_] =
                    $input_line_number;
              }
          }
          return $line_of_tokens;
      }
  
-    # must print line unchanged if we are in a format section
-    elsif ( $tokenizer_self->{_in_format} ) {
+    # Print line unchanged if we are in a format section
+    elsif ( $tokenizer_self->[_in_format_] ) {
  
          if ( $input_line =~ /^\.[\s#]*$/ ) {
-            write_logfile_entry("Exiting format section\n");
-            $tokenizer_self->{_in_format} = 0;
-            $line_of_tokens->{_line_type} = 'FORMAT_END';
+
+            # Decrement format depth count at a '.' after a 'format'
+            $tokenizer_self->[_in_format_]--;
+
+            # This is the end when count reaches 0
+            if ( !$tokenizer_self->[_in_format_] ) {
+                $write_logfile_entry->("Exiting format section\n");
+                $line_of_tokens->{_line_type} = 'FORMAT_END';
+            }
          }
          else {
              $line_of_tokens->{_line_type} = 'FORMAT';
+            if ( $input_line =~ /^\s*format\s+\w+/ ) {
+
+                # Increment format depth count at a 'format' within a 'format'
+                # This is a simple way to handle nested formats (issue c019).
+                $tokenizer_self->[_in_format_]++;
+            }
          }
          return $line_of_tokens;
      }
  
      # must print line unchanged if we are in pod documentation
-    elsif ( $tokenizer_self->{_in_pod} ) {
+    elsif ( $tokenizer_self->[_in_pod_] ) {
  
          $line_of_tokens->{_line_type} = 'POD';
          if ( $input_line =~ /^=cut/ ) {
              $line_of_tokens->{_line_type} = 'POD_END';
-            write_logfile_entry("Exiting POD section\n");
-            $tokenizer_self->{_in_pod} = 0;
+            $write_logfile_entry->("Exiting POD section\n");
+            $tokenizer_self->[_in_pod_] = 0;
          }
-        if ( $input_line =~ /^\#\!.*perl\b/ ) {
+        if ( $input_line =~ /^\#\!.*perl\b/ && !$tokenizer_self->[_in_end_] ) {
              warning(
                  "Hash-bang in pod can cause older versions of perl to fail! \n"
              );
@@ -696,26 +881,38 @@ sub get_line {
          return $line_of_tokens;
      }
  
+    # print line unchanged if in skipped section
+    elsif ( $tokenizer_self->[_in_skipped_] ) {
+
+        # NOTE: marked as the existing type 'FORMAT' to keep html working
+        $line_of_tokens->{_line_type} = 'FORMAT';
+        if ( $input_line =~ /$code_skipping_pattern_end/ ) {
+            $write_logfile_entry->("Exiting code-skipping section\n");
+            $tokenizer_self->[_in_skipped_] = 0;
+        }
+        return $line_of_tokens;
+    }
+
      # must print line unchanged if we have seen a severe error (i.e., we
      # are seeing illegal tokens and cannot continue.  Syntax errors do
      # not pass this route).  Calling routine can decide what to do, but
      # the default can be to just pass all lines as if they were after __END__
-    elsif ( $tokenizer_self->{_in_error} ) {
+    elsif ( $tokenizer_self->[_in_error_] ) {
          $line_of_tokens->{_line_type} = 'ERROR';
          return $line_of_tokens;
      }
  
      # print line unchanged if we are __DATA__ section
-    elsif ( $tokenizer_self->{_in_data} ) {
+    elsif ( $tokenizer_self->[_in_data_] ) {
  
          # ...but look for POD
          # Note that the _in_data and _in_end flags remain set
          # so that we return to that state after seeing the
          # end of a pod section
-        if ( $input_line =~ /^=(?!cut)/ ) {
+        if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
              $line_of_tokens->{_line_type} = 'POD_START';
-            write_logfile_entry("Entering POD section\n");
-            $tokenizer_self->{_in_pod} = 1;
+            $write_logfile_entry->("Entering POD section\n");
+            $tokenizer_self->[_in_pod_] = 1;
              return $line_of_tokens;
          }
          else {
@@ -725,16 +922,16 @@ sub get_line {
      }
  
      # print line unchanged if we are in __END__ section
-    elsif ( $tokenizer_self->{_in_end} ) {
+    elsif ( $tokenizer_self->[_in_end_] ) {
  
          # ...but look for POD
          # Note that the _in_data and _in_end flags remain set
          # so that we return to that state after seeing the
          # end of a pod section
-        if ( $input_line =~ /^=(?!cut)/ ) {
+        if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
              $line_of_tokens->{_line_type} = 'POD_START';
-            write_logfile_entry("Entering POD section\n");
-            $tokenizer_self->{_in_pod} = 1;
+            $write_logfile_entry->("Entering POD section\n");
+            $tokenizer_self->[_in_pod_] = 1;
              return $line_of_tokens;
          }
          else {
@@ -744,21 +941,21 @@ sub get_line {
      }
  
      # check for a hash-bang line if we haven't seen one
-    if ( !$tokenizer_self->{_saw_hash_bang} ) {
+    if ( !$tokenizer_self->[_saw_hash_bang_] ) {
          if ( $input_line =~ /^\#\!.*perl\b/ ) {
-            $tokenizer_self->{_saw_hash_bang} = $input_line_number;
+            $tokenizer_self->[_saw_hash_bang_] = $input_line_number;
  
              # check for -w and -P flags
              if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
-                $tokenizer_self->{_saw_perl_dash_P} = 1;
+                $tokenizer_self->[_saw_perl_dash_P_] = 1;
              }
  
              if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
-                $tokenizer_self->{_saw_perl_dash_w} = 1;
+                $tokenizer_self->[_saw_perl_dash_w_] = 1;
              }
  
              if (
-                ( $input_line_number > 1 )
+                $input_line_number > 1
  
                  # leave any hash bang in a BEGIN block alone
                  # i.e. see 'debugger-duck_type.t'
@@ -766,13 +963,28 @@ sub get_line {
                         $last_nonblank_block_type
                      && $last_nonblank_block_type eq 'BEGIN'
                  )
-                && ( !$tokenizer_self->{_look_for_hash_bang} )
+                && !$tokenizer_self->[_look_for_hash_bang_]
+
+                # Try to avoid giving a false alarm at a simple comment.
+                # These look like valid hash-bang lines:
+
+                #!/usr/bin/perl -w
+                #!   /usr/bin/perl -w
+                #!c:\perl\bin\perl.exe
+
+                # These are comments:
+                #! I love perl
+                #!  sunos does not yet provide a /usr/bin/perl
+
+                # Comments typically have multiple spaces, which suggests
+                # the filter
+                && $input_line =~ /^\#\!(\s+)?(\S+)?perl/
                )
              {
  
                  # this is helpful for VMS systems; we may have accidentally
                  # tokenized some DCL commands
-                if ( $tokenizer_self->{_started_tokenizing} ) {
+                if ( $tokenizer_self->[_started_tokenizing_] ) {
                      warning(
  "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
                      );
@@ -792,8 +1004,8 @@ sub get_line {
      }
  
      # wait for a hash-bang before parsing if the user invoked us with -x
-    if ( $tokenizer_self->{_look_for_hash_bang}
-        && !$tokenizer_self->{_saw_hash_bang} )
+    if ( $tokenizer_self->[_look_for_hash_bang_]
+        && !$tokenizer_self->[_saw_hash_bang_] )
      {
          $line_of_tokens->{_line_type} = 'SYSTEM';
          return $line_of_tokens;
@@ -808,106 +1020,120 @@ sub get_line {
  
      # now we know that it is ok to tokenize the line...
      # the line tokenizer will modify any of these private variables:
-    #        _rhere_target_list
-    #        _in_data
-    #        _in_end
-    #        _in_format
-    #        _in_error
-    #        _in_pod
-    #        _in_quote
-    my $ending_in_quote_last = $tokenizer_self->{_in_quote};
+    #        _rhere_target_list_
+    #        _in_data_
+    #        _in_end_
+    #        _in_format_
+    #        _in_error_
+    #        _in_skipped_
+    #        _in_pod_
+    #        _in_quote_
+    my $ending_in_quote_last = $tokenizer_self->[_in_quote_];
      tokenize_this_line($line_of_tokens);
  
      # Now finish defining the return structure and return it
-    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
+    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->[_in_quote_];
  
      # handle severe error (binary data in script)
-    if ( $tokenizer_self->{_in_error} ) {
-        $tokenizer_self->{_in_quote} = 0;    # to avoid any more messages
+    if ( $tokenizer_self->[_in_error_] ) {
+        $tokenizer_self->[_in_quote_] = 0;    # to avoid any more messages
          warning("Giving up after error\n");
          $line_of_tokens->{_line_type} = 'ERROR';
-        reset_indentation_level(0);          # avoid error messages
+        reset_indentation_level(0);           # avoid error messages
          return $line_of_tokens;
      }
  
      # handle start of pod documentation
-    if ( $tokenizer_self->{_in_pod} ) {
+    if ( $tokenizer_self->[_in_pod_] ) {
  
          # This gets tricky..above a __DATA__ or __END__ section, perl
          # accepts '=cut' as the start of pod section. But afterwards,
          # only pod utilities see it and they may ignore an =cut without
          # leading =head.  In any case, this isn't good.
          if ( $input_line =~ /^=cut\b/ ) {
-            if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
+            if ( $tokenizer_self->[_saw_data_] || $tokenizer_self->[_saw_end_] )
+            {
                  complain("=cut while not in pod ignored\n");
-                $tokenizer_self->{_in_pod}    = 0;
+                $tokenizer_self->[_in_pod_] = 0;
                  $line_of_tokens->{_line_type} = 'POD_END';
              }
              else {
                  $line_of_tokens->{_line_type} = 'POD_START';
-                complain(
+                warning(
  "=cut starts a pod section .. this can fool pod utilities.\n"
                  );
-                write_logfile_entry("Entering POD section\n");
+                $write_logfile_entry->("Entering POD section\n");
              }
          }
  
          else {
              $line_of_tokens->{_line_type} = 'POD_START';
-            write_logfile_entry("Entering POD section\n");
+            $write_logfile_entry->("Entering POD section\n");
          }
  
          return $line_of_tokens;
      }
  
-    # update indentation levels for log messages
-    if ( $input_line !~ /^\s*$/ ) {
+    # handle start of skipped section
+    if ( $tokenizer_self->[_in_skipped_] ) {
+
+        # NOTE: marked as the existing type 'FORMAT' to keep html working
+        $line_of_tokens->{_line_type} = 'FORMAT';
+        $write_logfile_entry->("Entering code-skipping section\n");
+        return $line_of_tokens;
+    }
+
+    # Update indentation levels for log messages.
+    # Skip blank lines and also block comments, unless a logfile is requested.
+    # Note that _line_of_text_ is the input line but trimmed from left to right.
+    my $lot = $tokenizer_self->[_line_of_text_];
+    if ( $lot && ( $self->[_rOpts_logfile_] || substr( $lot, 0, 1 ) ne '#' ) ) {
          my $rlevels = $line_of_tokens->{_rlevels};
          $line_of_tokens->{_guessed_indentation_level} =
            guess_old_indentation_level($input_line);
      }
  
      # see if this line contains here doc targets
-    my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
+    my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
      if ( @{$rhere_target_list} ) {
  
          my ( $here_doc_target, $here_quote_character ) =
            @{ shift @{$rhere_target_list} };
-        $tokenizer_self->{_in_here_doc}          = 1;
-        $tokenizer_self->{_here_doc_target}      = $here_doc_target;
-        $tokenizer_self->{_here_quote_character} = $here_quote_character;
-        write_logfile_entry("Entering HERE document $here_doc_target\n");
-        $tokenizer_self->{_started_looking_for_here_target_at} =
+        $tokenizer_self->[_in_here_doc_]          = 1;
+        $tokenizer_self->[_here_doc_target_]      = $here_doc_target;
+        $tokenizer_self->[_here_quote_character_] = $here_quote_character;
+        $write_logfile_entry->("Entering HERE document $here_doc_target\n");
+        $tokenizer_self->[_started_looking_for_here_target_at_] =
            $input_line_number;
      }
  
      # NOTE: __END__ and __DATA__ statements are written unformatted
      # because they can theoretically contain additional characters
      # which are not tokenized (and cannot be read with <DATA> either!).
-    if ( $tokenizer_self->{_in_data} ) {
+    if ( $tokenizer_self->[_in_data_] ) {
          $line_of_tokens->{_line_type} = 'DATA_START';
-        write_logfile_entry("Starting __DATA__ section\n");
-        $tokenizer_self->{_saw_data} = 1;
+        $write_logfile_entry->("Starting __DATA__ section\n");
+        $tokenizer_self->[_saw_data_] = 1;
  
          # keep parsing after __DATA__ if use SelfLoader was seen
-        if ( $tokenizer_self->{_saw_selfloader} ) {
-            $tokenizer_self->{_in_data} = 0;
-            write_logfile_entry(
+        if ( $tokenizer_self->[_saw_selfloader_] ) {
+            $tokenizer_self->[_in_data_] = 0;
+            $write_logfile_entry->(
                  "SelfLoader seen, continuing; -nlsl deactivates\n");
          }
  
          return $line_of_tokens;
      }
  
-    elsif ( $tokenizer_self->{_in_end} ) {
+    elsif ( $tokenizer_self->[_in_end_] ) {
          $line_of_tokens->{_line_type} = 'END_START';
-        write_logfile_entry("Starting __END__ section\n");
-        $tokenizer_self->{_saw_end} = 1;
+        $write_logfile_entry->("Starting __END__ section\n");
+        $tokenizer_self->[_saw_end_] = 1;
  
          # keep parsing after __END__ if use AutoLoader was seen
-        if ( $tokenizer_self->{_saw_autoloader} ) {
-            $tokenizer_self->{_in_end} = 0;
-            write_logfile_entry(
+        if ( $tokenizer_self->[_saw_autoloader_] ) {
+            $tokenizer_self->[_in_end_] = 0;
+            $write_logfile_entry->(
                  "AutoLoader seen, continuing; -nlal deactivates\n");
          }
          return $line_of_tokens;
@@ -917,41 +1143,42 @@ sub get_line {
      $line_of_tokens->{_line_type} = 'CODE';
  
      # remember if we have seen any real code
-    if (  !$tokenizer_self->{_started_tokenizing}
+    if (  !$tokenizer_self->[_started_tokenizing_]
          && $input_line !~ /^\s*$/
          && $input_line !~ /^\s*#/ )
      {
-        $tokenizer_self->{_started_tokenizing} = 1;
+        $tokenizer_self->[_started_tokenizing_] = 1;
      }
  
-    if ( $tokenizer_self->{_debugger_object} ) {
-        $tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
+    if ( $tokenizer_self->[_debugger_object_] ) {
+        $tokenizer_self->[_debugger_object_]
+          ->write_debug_entry($line_of_tokens);
      }
  
      # Note: if keyword 'format' occurs in this line code, it is still CODE
      # (keyword 'format' need not start a line)
-    if ( $tokenizer_self->{_in_format} ) {
-        write_logfile_entry("Entering format section\n");
+    if ( $tokenizer_self->[_in_format_] ) {
+        $write_logfile_entry->("Entering format section\n");
      }
  
-    if ( $tokenizer_self->{_in_quote}
-        and ( $tokenizer_self->{_line_start_quote} < 0 ) )
+    if ( $tokenizer_self->[_in_quote_]
+        and ( $tokenizer_self->[_line_start_quote_] < 0 ) )
      {
  
          #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
-        if (
-            ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
+        if ( ( my $quote_target = $tokenizer_self->[_quote_target_] ) !~
+            /^\s*$/ )
          {
-            $tokenizer_self->{_line_start_quote} = $input_line_number;
-            write_logfile_entry(
+            $tokenizer_self->[_line_start_quote_] = $input_line_number;
+            $write_logfile_entry->(
                  "Start multi-line quote or pattern ending in $quote_target\n");
          }
      }
-    elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
-        && !$tokenizer_self->{_in_quote} )
+    elsif ( ( $tokenizer_self->[_line_start_quote_] >= 0 )
+        && !$tokenizer_self->[_in_quote_] )
      {
-        $tokenizer_self->{_line_start_quote} = -1;
-        write_logfile_entry("End of multi-line quote or pattern\n");
+        $tokenizer_self->[_line_start_quote_] = -1;
+        $write_logfile_entry->("End of multi-line quote or pattern\n");
      }
  
      # we are returning a line of CODE
@@ -970,13 +1197,13 @@ sub find_starting_indentation_level {
      my $starting_level = 0;
  
      # use value if given as parameter
-    if ( $tokenizer_self->{_know_starting_level} ) {
-        $starting_level = $tokenizer_self->{_starting_level};
+    if ( $tokenizer_self->[_know_starting_level_] ) {
+        $starting_level = $tokenizer_self->[_starting_level_];
      }
  
      # if we know there is a hash_bang line, the level must be zero
-    elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
-        $tokenizer_self->{_know_starting_level} = 1;
+    elsif ( $tokenizer_self->[_look_for_hash_bang_] ) {
+        $tokenizer_self->[_know_starting_level_] = 1;
      }
  
      # otherwise figure it out from the input file
@@ -987,7 +1214,7 @@ sub find_starting_indentation_level {
          # keep looking at lines until we find a hash bang or piece of code
          my $msg = "";
          while ( $line =
-            $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
+            $tokenizer_self->[_line_buffer_object_]->peek_ahead( $i++ ) )
          {
  
              # if first line is #! then assume starting level is zero
@@ -1003,7 +1230,7 @@ sub find_starting_indentation_level {
          $msg = "Line $i implies starting-indentation-level = $starting_level\n";
          write_logfile_entry("$msg");
      }
-    $tokenizer_self->{_starting_level} = $starting_level;
+    $tokenizer_self->[_starting_level_] = $starting_level;
      reset_indentation_level($starting_level);
      return;
  }
@@ -1034,20 +1261,20 @@ sub guess_old_indentation_level {
  
          # If there are leading tabs, we use the tab scheme for this run, if
          # any, so that the code will remain stable when editing.
-        if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
+        if ($1) { $spaces += length($1) * $tokenizer_self->[_tabsize_] }
  
          if ($2) { $spaces += length($2) }
  
          # correct for outdented labels
-        if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
-            $spaces += $tokenizer_self->{_continuation_indentation};
+        if ( $3 && $tokenizer_self->[_outdent_labels_] ) {
+            $spaces += $tokenizer_self->[_continuation_indentation_];
          }
      }
  
      # compute indentation using the value of -i for this run.
      # If -i=0 is used for this run (which is possible) it doesn't matter
      # what we do here but we'll guess that the old run used 4 spaces per level.
-    my $indent_columns = $tokenizer_self->{_indent_columns};
+    my $indent_columns = $tokenizer_self->[_indent_columns_];
      $indent_columns = 4 if ( !$indent_columns );
      $level          = int( $spaces / $indent_columns );
      return ($level);
@@ -1058,7 +1285,7 @@ sub dump_functions {
  
      my $fh = *STDOUT;
      foreach my $pkg ( keys %is_user_function ) {
-        print $fh "\nnon-constant subs in package $pkg\n";
+        $fh->print("\nnon-constant subs in package $pkg\n");
  
          foreach my $sub ( keys %{ $is_user_function{$pkg} } ) {
              my $msg = "";
@@ -1069,28 +1296,20 @@ sub dump_functions {
              if ( $is_block_function{$pkg}{$sub} ) {
                  $msg = 'block';
              }
-            print $fh "$sub $msg\n";
+            $fh->print("$sub $msg\n");
          }
      }
  
      foreach my $pkg ( keys %is_constant ) {
-        print $fh "\nconstants and constant subs in package $pkg\n";
+        $fh->print("\nconstants and constant subs in package $pkg\n");
  
          foreach my $sub ( keys %{ $is_constant{$pkg} } ) {
-            print $fh "$sub\n";
+            $fh->print("$sub\n");
          }
      }
      return;
  }
  
-sub ones_count {
-
-    # count number of 1's in a string of 1's and 0's
-    # example: ones_count("010101010101") gives 6
-    my $str = shift;
-    return $str =~ tr/1/0/;
-}
-
  sub prepare_for_a_new_file {
  
      # previous tokens needed to determine what to expect next
@@ -1113,17 +1332,33 @@ sub prepare_for_a_new_file {
      %is_block_function       = ();
      %is_block_list_function  = ();
      %saw_function_definition = ();
+    %saw_use_module          = ();
  
      # variables used to track depths of various containers
      # and report nesting errors
-    $paren_depth                         = 0;
-    $brace_depth                         = 0;
-    $square_bracket_depth                = 0;
-    @current_depth                       = (0) x scalar @closing_brace_names;
-    $total_depth                         = 0;
-    @total_depth                         = ();
-    @nesting_sequence_number             = ( 0 .. @closing_brace_names - 1 );
-    @current_sequence_number             = ();
+    $paren_depth             = 0;
+    $brace_depth             = 0;
+    $square_bracket_depth    = 0;
+    @current_depth           = (0) x scalar @closing_brace_names;
+    $total_depth             = 0;
+    @total_depth             = ();
+    @nesting_sequence_number = ( 0 .. @closing_brace_names - 1 );
+    @current_sequence_number = ();
+
+    @paren_type                     = ();
+    @paren_semicolon_count          = ();
+    @paren_structural_type          = ();
+    @brace_type                     = ();
+    @brace_structural_type          = ();
+    @brace_context                  = ();
+    @brace_package                  = ();
+    @square_bracket_type            = ();
+    @square_bracket_structural_type = ();
+    @depth_array                    = ();
+    @nested_ternary_flag            = ();
+    @nested_statement_type          = ();
+    @starting_line_of_current_depth = ();
+
      $paren_type[$paren_depth]            = '';
      $paren_semicolon_count[$paren_depth] = 0;
      $paren_structural_type[$brace_depth] = '';
@@ -1138,7 +1373,7 @@ sub prepare_for_a_new_file {
      return;
  }
  
-{                                       # begin tokenize_this_line
+{    ## closure for sub tokenize_this_line
  
      use constant BRACE          => 0;
      use constant SQUARE_BRACKET => 1;
@@ -1390,7 +1625,7 @@ sub prepare_for_a_new_file {
          write_logfile_entry("scanning replacement text for here-doc targets\n");
  
          # save the logger object for error messages
-        my $logger_object = $tokenizer_self->{_logger_object};
+        my $logger_object = $tokenizer_self->[_logger_object_];
  
          # localize all package variables
          local (
@@ -1421,9 +1656,11 @@ sub prepare_for_a_new_file {
          # make a new tokenizer
          my $rOpts = {};
          my $rpending_logfile_message;
-        my $source_object =
-          Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
-            $rpending_logfile_message );
+        my $source_object = Perl::Tidy::LineSource->new(
+            input_file               => \$replacement_text,
+            rOpts                    => $rOpts,
+            rpending_logfile_message => $rpending_logfile_message,
+        );
          my $tokenizer = Perl::Tidy::Tokenizer->new(
              source_object        => $source_object,
              logger_object        => $logger_object,
@@ -1435,18 +1672,18 @@ sub prepare_for_a_new_file {
  
          # remove any here doc targets
          my $rht = undef;
-        if ( $tokenizer_self->{_in_here_doc} ) {
+        if ( $tokenizer_self->[_in_here_doc_] ) {
              $rht = [];
              push @{$rht},
                [
-                $tokenizer_self->{_here_doc_target},
-                $tokenizer_self->{_here_quote_character}
+                $tokenizer_self->[_here_doc_target_],
+                $tokenizer_self->[_here_quote_character_]
                ];
-            if ( $tokenizer_self->{_rhere_target_list} ) {
-                push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
-                $tokenizer_self->{_rhere_target_list} = undef;
+            if ( $tokenizer_self->[_rhere_target_list_] ) {
+                push @{$rht}, @{ $tokenizer_self->[_rhere_target_list_] };
+                $tokenizer_self->[_rhere_target_list_] = undef;
              }
-            $tokenizer_self->{_in_here_doc} = undef;
+            $tokenizer_self->[_in_here_doc_] = undef;
          }
  
          # now its safe to report errors
@@ -1475,6 +1712,139 @@ sub prepare_for_a_new_file {
          return;
      }
  
+    use constant VERIFY_FASTSCAN => 0;
+    my %fast_scan_context;
+
+    BEGIN {
+        %fast_scan_context = (
+            '$' => SCALAR_CONTEXT,
+            '*' => SCALAR_CONTEXT,
+            '@' => LIST_CONTEXT,
+            '%' => LIST_CONTEXT,
+            '&' => UNKNOWN_CONTEXT,
+        );
+    }
+
+    sub scan_identifier_fast {
+
+        # This is a wrapper for sub scan_identifier. It does a fast preliminary
+        # scan for certain common identifiers:
+        #   '$var', '@var', %var, *var, &var, '@{...}', '%{...}'
+        # If it does not find one of these, or this is a restart, it calls the
+        # original scanner directly.
+
+        # This gives the same results as the full scanner in about 1/4 the
+        # total runtime for a typical input stream.
+
+        my $i_begin   = $i;
+        my $tok_begin = $tok;
+        my $fast_scan_type;
+
+        ###############################
+        # quick scan with leading sigil
+        ###############################
+        if (  !$id_scan_state
+            && $i + 1 <= $max_token_index
+            && $fast_scan_context{$tok} )
+        {
+            $context = $fast_scan_context{$tok};
+
+            # look for $var, @var, ...
+            if ( $rtoken_type->[ $i + 1 ] eq 'w' ) {
+                my $pretype_next = "";
+                my $i_next       = $i + 2;
+                if ( $i_next <= $max_token_index ) {
+                    if (   $rtoken_type->[$i_next] eq 'b'
+                        && $i_next < $max_token_index )
+                    {
+                        $i_next += 1;
+                    }
+                    $pretype_next = $rtoken_type->[$i_next];
+                }
+                if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
+
+                    # Found type 'i' like '$var', '@var', or '%var'
+                    $identifier     = $tok . $rtokens->[ $i + 1 ];
+                    $tok            = $identifier;
+                    $type           = 'i';
+                    $i              = $i + 1;
+                    $fast_scan_type = $type;
+                }
+            }
+
+            # Look for @{ or %{  .
+            # But we must let the full scanner handle things ${ because it may
+            # keep going to get a complete identifier like '${#}'  .
+            elsif (
+                $rtoken_type->[ $i + 1 ] eq '{'
+                && (   $tok_begin eq '@'
+                    || $tok_begin eq '%' )
+              )
+            {
+
+                $identifier     = $tok;
+                $type           = 't';
+                $fast_scan_type = $type;
+            }
+        }
+
+        ############################
+        # Quick scan with leading ->
+        # Look for ->[ and ->{
+        ############################
+        elsif (
+               $tok eq '->'
+            && $i < $max_token_index
+            && (   $rtokens->[ $i + 1 ] eq '{'
+                || $rtokens->[ $i + 1 ] eq '[' )
+          )
+        {
+            $type           = $tok;
+            $fast_scan_type = $type;
+            $identifier     = $tok;
+            $context        = UNKNOWN_CONTEXT;
+        }
+
+        #######################################
+        # Verify correctness during development
+        #######################################
+        if ( VERIFY_FASTSCAN && $fast_scan_type ) {
+
+            # We will call the full method
+            my $identifier_simple = $identifier;
+            my $tok_simple        = $tok;
+            my $fast_scan_type    = $type;
+            my $i_simple          = $i;
+            my $context_simple    = $context;
+
+            $tok = $tok_begin;
+            $i   = $i_begin;
+            scan_identifier();
+
+            if (   $tok ne $tok_simple
+                || $type ne $fast_scan_type
+                || $i != $i_simple
+                || $identifier ne $identifier_simple
+                || $id_scan_state
+                || $context ne $context_simple )
+            {
+                print STDERR <<EOM;
+scan_identifier_fast differs from scan_identifier:
+simple:  i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
+full:    i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
+EOM
+            }
+        }
+
+        ###################################################
+        # call full scanner if fast method did not succeed
+        ###################################################
+        if ( !$fast_scan_type ) {
+            scan_identifier();
+        }
+        return;
+    }
+
      sub scan_id {
          ( $i, $tok, $type, $id_scan_state ) =
            scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
@@ -1490,6 +1860,96 @@ sub prepare_for_a_new_file {
          return $number;
      }
  
+    use constant VERIFY_FASTNUM => 0;
+
+    sub scan_number_fast {
+
+        # This is a wrapper for sub scan_number. It does a fast preliminary
+        # scan for a simple integer.  It calls the original scan_number if it
+        # does not find one.
+
+        my $i_begin   = $i;
+        my $tok_begin = $tok;
+        my $number;
+
+        ##################################
+        # Quick check for (signed) integer
+        ##################################
+
+        # This will be the string of digits:
+        my $i_d   = $i;
+        my $tok_d = $tok;
+        my $typ_d = $rtoken_type->[$i_d];
+
+        # check for signed integer
+        my $sign = "";
+        if (   $typ_d ne 'd'
+            && ( $typ_d eq '+' || $typ_d eq '-' )
+            && $i_d < $max_token_index )
+        {
+            $sign = $tok_d;
+            $i_d++;
+            $tok_d = $rtokens->[$i_d];
+            $typ_d = $rtoken_type->[$i_d];
+        }
+
+        # Handle integers
+        if (
+            $typ_d eq 'd'
+            && (
+                $i_d == $max_token_index
+                || (   $i_d < $max_token_index
+                    && $rtoken_type->[ $i_d + 1 ] ne '.'
+                    && $rtoken_type->[ $i_d + 1 ] ne 'w' )
+            )
+          )
+        {
+            # Let let full scanner handle multi-digit integers beginning with
+            # '0' because there could be error messages.  For example, '009' is
+            # not a valid number.
+
+            if ( $tok_d eq '0' || substr( $tok_d, 0, 1 ) ne '0' ) {
+                $number = $sign . $tok_d;
+                $type   = 'n';
+                $i      = $i_d;
+            }
+        }
+
+        #######################################
+        # Verify correctness during development
+        #######################################
+        if ( VERIFY_FASTNUM && defined($number) ) {
+
+            # We will call the full method
+            my $type_simple   = $type;
+            my $i_simple      = $i;
+            my $number_simple = $number;
+
+            $tok    = $tok_begin;
+            $i      = $i_begin;
+            $number = scan_number();
+
+            if (   $type ne $type_simple
+                || ( $i != $i_simple && $i <= $max_token_index )
+                || $number ne $number_simple )
+            {
+                print STDERR <<EOM;
+scan_number_fast differs from scan_number:
+simple:  i=$i_simple, type=$type_simple, number=$number_simple
+full:  i=$i, type=$type, number=$number
+EOM
+            }
+        }
+
+        #########################################
+        # call full scanner if may not be integer
+        #########################################
+        if ( !defined($number) ) {
+            $number = scan_number();
+        }
+        return $number;
+    }
+
      # a sub to warn if token found where term expected
      sub error_if_expecting_TERM {
          if ( $expecting == TERM ) {
@@ -1511,7 +1971,7 @@ sub prepare_for_a_new_file {
                  $rtoken_map, $rtoken_type, $input_line );
              if ( $i_tok == 0 ) {
                  interrupt_logfile();
-                warning("Missing ';' above?\n");
+                warning("Missing ';' or ',' above?\n");
                  resume_logfile();
              }
              return 1;
@@ -1527,9 +1987,9 @@ sub prepare_for_a_new_file {
      @_ = qw(for foreach);
      @is_for_foreach{@_} = (1) x scalar(@_);
  
-    my %is_my_our;
-    @_ = qw(my our);
-    @is_my_our{@_} = (1) x scalar(@_);
+    my %is_my_our_state;
+    @_ = qw(my our state);
+    @is_my_our_state{@_} = (1) x scalar(@_);
  
      # These keywords may introduce blocks after parenthesized expressions,
      # in the form:
@@ -1540,6 +2000,10 @@ sub prepare_for_a_new_file {
        qw(if elsif unless while until for foreach switch case given when catch);
      @is_blocktype_with_paren{@_} = (1) x scalar(@_);
  
+    my %is_case_default;
+    @_ = qw(case default);
+    @is_case_default{@_} = (1) x scalar(@_);
+
      # ------------------------------------------------------------
      # begin hash of code for handling most token types
      # ------------------------------------------------------------
@@ -1592,10 +2056,10 @@ sub prepare_for_a_new_file {
              # start looking for a scalar
              error_if_expecting_OPERATOR("Scalar")
                if ( $expecting == OPERATOR );
-            scan_identifier();
+            scan_identifier_fast();
  
              if ( $identifier eq '$^W' ) {
-                $tokenizer_self->{_saw_perl_dash_w} = 1;
+                $tokenizer_self->[_saw_perl_dash_w_] = 1;
              }
  
              # Check for identifier in indirect object slot
@@ -1603,10 +2067,10 @@ sub prepare_for_a_new_file {
              #   /^(print|printf|sort|exec|system)$/
              if (
                  $is_indirect_object_taker{$last_nonblank_token}
-
                  || ( ( $last_nonblank_token eq '(' )
                      && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
-                || ( $last_nonblank_type =~ /^[Uw]$/ )    # possible object
+                || (   $last_nonblank_type eq 'w'
+                    || $last_nonblank_type eq 'U' )    # possible object
                )
              {
                  $type = 'Z';
@@ -1631,12 +2095,13 @@ sub prepare_for_a_new_file {
                  if (
                      $expecting == OPERATOR
  
-                    # be sure this is not a method call of the form
+                    # Be sure this is not a method call of the form
                      # &method(...), $method->(..), &{method}(...),
                      # $ref[2](list) is ok & short for $ref[2]->(list)
                      # NOTE: at present, braces in something like &{ xxx }
-                    # are not marked as a block, we might have a method call
-                    && $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
+                    # are not marked as a block, we might have a method call.
+                    # Added ')' to fix case c017, something like ()()()
+                    && $last_nonblank_token !~ /^([\]\}\)\&]|\-\>)/
  
                    )
                  {
@@ -1655,8 +2120,14 @@ sub prepare_for_a_new_file {
                          my ( $next_nonblank_token, $i_next ) =
                            find_next_nonblank_token( $i, $rtokens,
                              $max_token_index );
-                        if ( $next_nonblank_token ne ')' ) {
+
+                        # Patch for c029: give up error check if
+                        # a side comment follows
+                        if (   $next_nonblank_token ne ')'
+                            && $next_nonblank_token ne '#' )
+                        {
                              my $hint;
+
                              error_if_expecting_OPERATOR('(');
  
                              if ( $last_nonblank_type eq 'C' ) {
@@ -1760,12 +2231,16 @@ sub prepare_for_a_new_file {
                  complain("Repeated ','s \n");
              }
  
+            # Note that we have to check both token and type here because a
+            # comma following a qw list can have last token='(' but type = 'q'
+            elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' )
+            {
+                warning("Unexpected leading ',' after a '('\n");
+            }
+
              # patch for operator_expected: note if we are in the list (use.t)
              if ( $statement_type eq 'use' ) { $statement_type = '_use' }
-##                FIXME: need to move this elsewhere, perhaps check after a '('
-##                elsif ($last_nonblank_token eq '(') {
-##                    warning("Leading ','s illegal in some versions of perl\n");
-##                }
+
          },
          ';' => sub {
              $context        = UNKNOWN_CONTEXT;
@@ -1818,8 +2293,11 @@ sub prepare_for_a_new_file {
  
              # a pattern cannot follow certain keywords which take optional
              # arguments, like 'shift' and 'pop'. See also '?'.
-            if (   $last_nonblank_type eq 'k'
-                && $is_keyword_taking_optional_args{$last_nonblank_token} )
+            if (
+                $last_nonblank_type eq 'k'
+                && $is_keyword_rejecting_slash_as_pattern_delimiter{
+                    $last_nonblank_token}
+              )
              {
                  $is_pattern = 0;
              }
@@ -1849,11 +2327,11 @@ sub prepare_for_a_new_file {
                      $type = $tok;
                  }
  
-              #DEBUG - collecting info on what tokens follow a divide
-              # for development of guessing algorithm
-              #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
-              #    #write_diagnostics( "DIVIDE? $input_line\n" );
-              #}
+           #DEBUG - collecting info on what tokens follow a divide
+           # for development of guessing algorithm
+           #if ( is_possible_numerator( $i, $rtokens, $max_token_index ) < 0 ) {
+           #    #write_diagnostics( "DIVIDE? $input_line\n" );
+           #}
              }
          },
          '{' => sub {
@@ -1868,7 +2346,7 @@ sub prepare_for_a_new_file {
  
              # ATTRS: for a '{' following an attribute list, reset
              # things to look like we just saw the sub name
-            if ( $statement_type =~ /^sub/ ) {
+            if ( $statement_type =~ /^sub\b/ ) {
                  $last_nonblank_token = $statement_type;
                  $last_nonblank_type  = 'i';
                  $statement_type      = "";
@@ -1894,7 +2372,7 @@ sub prepare_for_a_new_file {
  
                  # check for syntax error here;
                  unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
-                    if ( $tokenizer_self->{'_extended_syntax'} ) {
+                    if ( $tokenizer_self->[_extended_syntax_] ) {
  
                          # we append a trailing () to mark this as an unknown
                          # block type.  This allows perltidy to format some
@@ -2025,7 +2503,7 @@ sub prepare_for_a_new_file {
                  # For example we probably don't want & as sub call here:
                  #    Fcntl::S_IRUSR & $mode;
                  if ( $expecting == TERM || $next_type ne 'b' ) {
-                    scan_identifier();
+                    scan_identifier_fast();
                  }
              }
              else {
@@ -2038,12 +2516,15 @@ sub prepare_for_a_new_file {
                    find_angle_operator_termination( $input_line, $i, $rtoken_map,
                      $expecting, $max_token_index );
  
-                if ( $type eq '<' && $expecting == TERM ) {
-                    error_if_expecting_TERM();
-                    interrupt_logfile();
-                    warning("Unterminated <> operator?\n");
-                    resume_logfile();
-                }
+                ##  This message is not very helpful and quite confusing if the above
+                ##  routine decided not to write a message with the line number.
+                ##  if ( $type eq '<' && $expecting == TERM ) {
+                ##      error_if_expecting_TERM();
+                ##      interrupt_logfile();
+                ##      warning("Unterminated <> operator?\n");
+                ##      resume_logfile();
+                ##  }
+
              }
              else {
              }
@@ -2055,8 +2536,11 @@ sub prepare_for_a_new_file {
              # Patch for rt #126965
              # a pattern cannot follow certain keywords which take optional
              # arguments, like 'shift' and 'pop'. See also '/'.
-            if (   $last_nonblank_type eq 'k'
-                && $is_keyword_taking_optional_args{$last_nonblank_token} )
+            if (
+                $last_nonblank_type eq 'k'
+                && $is_keyword_rejecting_question_as_pattern_delimiter{
+                    $last_nonblank_token}
+              )
              {
                  $is_pattern = 0;
              }
@@ -2069,9 +2553,11 @@ sub prepare_for_a_new_file {
              elsif ( $expecting == UNKNOWN ) {
  
                  # In older versions of Perl, a bare ? can be a pattern
-                # delimiter.  Sometime after Perl 5.10 this seems to have
-                # been dropped, but we have to support it in order to format
-                # older programs.  For example, the following line worked
+                # delimiter.  In perl version 5.22 this was
+                # dropped, but we have to support it in order to format
+                # older programs. See:
+                ## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
+                # For example, the following line worked
                  # at one time:
                  #      ?(.*)? && (print $1,"\n");
                  # In current versions it would have to be written with slashes:
@@ -2098,8 +2584,16 @@ sub prepare_for_a_new_file {
          },
          '*' => sub {    # typeglob, or multiply?
  
+            if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
+                if (   $next_type ne 'b'
+                    && $next_type ne '('
+                    && $next_type ne '#' )    # Fix c036
+                {
+                    $expecting = TERM;
+                }
+            }
              if ( $expecting == TERM ) {
-                scan_identifier();
+                scan_identifier_fast();
              }
              else {
  
@@ -2141,21 +2635,47 @@ sub prepare_for_a_new_file {
              }
  
              # ATTRS: check for a ':' which introduces an attribute list
-            # (this might eventually get its own token type)
+            # either after a 'sub' keyword or within a paren list
              elsif ( $statement_type =~ /^sub\b/ ) {
                  $type              = 'A';
                  $in_attribute_list = 1;
              }
  
+            # Within a signature, unless we are in a ternary.  For example,
+            # from 't/filter_example.t':
+            #    method foo4 ( $class: $bar ) { $class->bar($bar) }
+            elsif ( $paren_type[$paren_depth] =~ /^sub\b/
+                && !is_balanced_closing_container(QUESTION_COLON) )
+            {
+                $type              = 'A';
+                $in_attribute_list = 1;
+            }
+
              # check for scalar attribute, such as
              # my $foo : shared = 1;
-            elsif ($is_my_our{$statement_type}
+            elsif ($is_my_our_state{$statement_type}
                  && $current_depth[QUESTION_COLON] == 0 )
              {
                  $type              = 'A';
                  $in_attribute_list = 1;
              }
  
+            # Look for Switch::Plain syntax if an error would otherwise occur
+            # here. Note that we do not need to check if the extended syntax
+            # flag is set because otherwise an error would occur, and we would
+            # then have to output a message telling the user to set the
+            # extended syntax flag to avoid the error.
+            #  case 1: {
+            #  default: {
+            #  default:
+            # Note that the line 'default:' will be parsed as a label elsewhere.
+            elsif ( $is_case_default{$statement_type}
+                && !is_balanced_closing_container(QUESTION_COLON) )
+            {
+                # mark it as a perltidy label type
+                $type = 'J';
+            }
+
              # otherwise, it should be part of a ?/: operator
              else {
                  ( $type_sequence, $indent_flag ) =
@@ -2169,7 +2689,7 @@ sub prepare_for_a_new_file {
          '+' => sub {    # what kind of plus?
  
              if ( $expecting == TERM ) {
-                my $number = scan_number();
+                my $number = scan_number_fast();
  
                  # unary plus is safest assumption if not a number
                  if ( !defined($number) ) { $type = 'p'; }
@@ -2184,16 +2704,18 @@ sub prepare_for_a_new_file {
  
              error_if_expecting_OPERATOR("Array")
                if ( $expecting == OPERATOR );
-            scan_identifier();
+            scan_identifier_fast();
          },
          '%' => sub {    # hash or modulo?
  
-            # first guess is hash if no following blank
+            # first guess is hash if no following blank or paren
              if ( $expecting == UNKNOWN ) {
-                if ( $next_type ne 'b' ) { $expecting = TERM }
+                if ( $next_type ne 'b' && $next_type ne '(' ) {
+                    $expecting = TERM;
+                }
              }
              if ( $expecting == TERM ) {
-                scan_identifier();
+                scan_identifier_fast();
              }
          },
          '[' => sub {
@@ -2248,7 +2770,7 @@ sub prepare_for_a_new_file {
                  }
              }
              elsif ( $expecting == TERM ) {
-                my $number = scan_number();
+                my $number = scan_number_fast();
  
                  # maybe part of bareword token? unary is safest
                  if ( !defined($number) ) { $type = 'm'; }
@@ -2272,12 +2794,13 @@ sub prepare_for_a_new_file {
                  # FIXME: this should work but will not catch errors
                  # because we also have to be sure that previous token is
                  # a type character ($,@,%).
-                if ( $last_nonblank_token eq '{'
-                    && ( $next_tok =~ /^[A-Za-z_]/ ) )
+                if (   $last_nonblank_token eq '{'
+                    && ( $next_tok !~ /^\d/ )
+                    && ( $next_tok =~ /^\w/ ) )
                  {
  
                      if ( $next_tok eq 'W' ) {
-                        $tokenizer_self->{_saw_perl_dash_w} = 1;
+                        $tokenizer_self->[_saw_perl_dash_w_] = 1;
                      }
                      $tok  = $tok . $next_tok;
                      $i    = $i + 1;
@@ -2299,9 +2822,12 @@ sub prepare_for_a_new_file {
              scan_bare_identifier();
          },
          '<<' => sub {    # maybe a here-doc?
-            return
-              unless ( $i < $max_token_index )
-              ;          # here-doc not possible if end of line
+
+##      This check removed because it could be a deprecated here-doc with
+##      no specified target.  See example in log 16 Sep 2020.
+##            return
+##              unless ( $i < $max_token_index )
+##              ;          # here-doc not possible if end of line
  
              if ( $expecting != OPERATOR ) {
                  my ( $found_target, $here_doc_target, $here_quote_character,
@@ -2321,6 +2847,11 @@ sub prepare_for_a_new_file {
                          my $truncated = substr( $here_doc_target, 0, 80 );
                          complain("Long here-target: '$truncated' ...\n");
                      }
+                    elsif ( !$here_doc_target ) {
+                        warning(
+                            'Use of bare << to mean <<"" is deprecated' . "\n" )
+                          unless ($here_quote_character);
+                    }
                      elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
                          complain(
                              "Unconventional here-target: '$here_doc_target'\n");
@@ -2386,15 +2917,33 @@ sub prepare_for_a_new_file {
  
              # if -> points to a bare word, we must scan for an identifier,
              # otherwise something like ->y would look like the y operator
-            scan_identifier();
+
+            # NOTE: this will currently allow things like
+            #     '->@array'    '->*VAR'  '->%hash'
+            # to get parsed as identifiers, even though these are not currently
+            # allowed syntax.  To catch syntax errors like this we could first
+            # check that the next character and skip this call if it is one of
+            # ' @ % * '.  A disadvantage with doing this is that this would
+            # have to be fixed if the perltidy syntax is ever extended to make
+            # any of these valid.  So for now this check is not done.
+            scan_identifier_fast();
          },
  
          # type = 'pp' for pre-increment, '++' for post-increment
          '++' => sub {
-            if ( $expecting == TERM ) { $type = 'pp' }
+            if    ( $expecting == TERM ) { $type = 'pp' }
              elsif ( $expecting == UNKNOWN ) {
+
                  my ( $next_nonblank_token, $i_next ) =
                    find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+                # Fix for c042: look past a side comment
+                if ( $next_nonblank_token eq '#' ) {
+                    ( $next_nonblank_token, $i_next ) =
+                      find_next_nonblank_token( $max_token_index,
+                        $rtokens, $max_token_index );
+                }
+
                  if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
              }
          },
@@ -2412,22 +2961,30 @@ sub prepare_for_a_new_file {
          # type = 'mm' for pre-decrement, '--' for post-decrement
          '--' => sub {
  
-            if ( $expecting == TERM ) { $type = 'mm' }
+            if    ( $expecting == TERM ) { $type = 'mm' }
              elsif ( $expecting == UNKNOWN ) {
                  my ( $next_nonblank_token, $i_next ) =
                    find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+                # Fix for c042: look past a side comment
+                if ( $next_nonblank_token eq '#' ) {
+                    ( $next_nonblank_token, $i_next ) =
+                      find_next_nonblank_token( $max_token_index,
+                        $rtokens, $max_token_index );
+                }
+
                  if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
              }
          },
  
          '&&' => sub {
              error_if_expecting_TERM()
-              if ( $expecting == TERM );
+              if ( $expecting == TERM && $last_nonblank_token ne ',' );    #c015
          },
  
          '||' => sub {
              error_if_expecting_TERM()
-              if ( $expecting == TERM );
+              if ( $expecting == TERM && $last_nonblank_token ne ',' );    #c015
          },
  
          '//' => sub {
@@ -2484,11 +3041,11 @@ sub prepare_for_a_new_file {
      @_ = qw(use require);
      @is_use_require{@_} = (1) x scalar(@_);
  
-    # This hash holds the hash key in $tokenizer_self for these keywords:
-    my %is_format_END_DATA = (
-        'format'   => '_in_format',
-        '__END__'  => '_in_end',
-        '__DATA__' => '_in_data',
+    # This hash holds the array index in $tokenizer_self for these keywords:
+    # Fix for issue c035: removed 'format' from this hash
+    my %is_END_DATA = (
+        '__END__'  => _in_end_,
+        '__DATA__' => _in_data_,
      );
  
      # original ref: camel 3 p 147,
@@ -2528,6 +3085,8 @@ sub prepare_for_a_new_file {
          'qx' => 1,
      );
  
+    use constant DEBUG_TOKENIZE => 0;
+
      sub tokenize_this_line {
  
    # This routine breaks a line of perl code into tokens which are of use in
@@ -2637,13 +3196,16 @@ sub prepare_for_a_new_file {
          $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
  
          # check for pod documentation
-        if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
+        if ( substr( $untrimmed_input_line, 0, 1 ) eq '='
+            && $untrimmed_input_line =~ /^=[A-Za-z_]/ )
+        {
  
              # must not be in multi-line quote
              # and must not be in an equation
-            if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) )
+            if ( !$in_quote
+                && ( operator_expected( [ 'b', '=', 'b' ] ) == TERM ) )
              {
-                $tokenizer_self->{_in_pod} = 1;
+                $tokenizer_self->[_in_pod_] = 1;
                  return;
              }
          }
@@ -2652,21 +3214,24 @@ sub prepare_for_a_new_file {
  
          chomp $input_line;
  
+        # Set a flag to indicate if we might be at an __END__ or __DATA__ line
+        # This will be used below to avoid quoting a bare word followed by
+        # a fat comma.
+        my $is_END_or_DATA;
+
          # trim start of this line unless we are continuing a quoted line
          # do not trim end because we might end in a quote (test: deken4.pl)
          # Perl::Tidy::Formatter will delete needless trailing blanks
          unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
-            $input_line =~ s/^\s*//;    # trim left end
-        }
+            $input_line =~ s/^\s+//;    # trim left end
  
-        # Set a flag to indicate if we might be at an __END__ or __DATA__ line
-        # This will be used below to avoid quoting a bare word followed by
-        # a fat comma.
-        my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
+            $is_END_or_DATA = substr( $input_line, 0, 1 ) eq '_'
+              && $input_line =~ /^\s*__(END|DATA)__\s*$/;
+        }
  
          # update the copy of the line for use in error messages
          # This must be exactly what we give the pre_tokenizer
-        $tokenizer_self->{_line_text} = $input_line;
+        $tokenizer_self->[_line_of_text_] = $input_line;
  
          # re-initialize for the main loop
          $routput_token_list     = [];    # stack of output token indexes
@@ -2691,9 +3256,17 @@ sub prepare_for_a_new_file {
          # stage 1 is a very simple pre-tokenization
          my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
  
-        # a little optimization for a full-line comment
-        if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
-            $max_tokens_wanted = 1    # no use tokenizing a comment
+        # optimize for a full-line comment
+        if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) {
+            $max_tokens_wanted = 1;    # no use tokenizing a comment
+
+            # and check for skipped section
+            if (   $rOpts_code_skipping
+                && $input_line =~ /$code_skipping_pattern_begin/ )
+            {
+                $tokenizer_self->[_in_skipped_] = 1;
+                return;
+            }
          }
  
          # start by breaking the line into pre-tokens
@@ -2733,7 +3306,7 @@ sub prepare_for_a_new_file {
                      $routput_token_type->[$i] = $type;
  
                  }
-                $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
+                $tok = $quote_character if ($quote_character);
  
                  # scan for the end of the quote or pattern
                  (
@@ -2776,14 +3349,19 @@ sub prepare_for_a_new_file {
                          }
  
                          # For an 'e' quote modifier we must scan the replacement
-                        # text for here-doc targets.
-                        if ($saw_modifier_e) {
+                        # text for here-doc targets...
+                        # but if the modifier starts a new line we can skip
+                        # this because either the here doc will be fully
+                        # contained in the replacement text (so we can
+                        # ignore it) or Perl will not find it.
+                        # See test 'here2.in'.
+                        if ( $saw_modifier_e && $i_tok >= 0 ) {
  
                              my $rht = scan_replacement_text($qs1);
  
                              # Change type from 'Q' to 'h' for quotes with
                              # here-doc targets so that the formatter (see sub
-                            # print_line_of_tokens) will not make any line
+                            # process_line_of_CODE) will not make any line
                              # breaks after this point.
                              if ($rht) {
                                  push @{$rhere_target_list}, @{$rht};
@@ -2844,7 +3422,7 @@ EOM
                  }
              }
  
-            unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
+            unless ( $type eq 'b' || $tok eq 'CORE::' ) {
  
                  # try to catch some common errors
                  if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
@@ -2871,6 +3449,21 @@ EOM
                  $last_nonblank_container_type = $container_type;
                  $last_nonblank_type_sequence  = $type_sequence;
                  $last_nonblank_i              = $i_tok;
+
+                # Patch for c030: Fix things in case a '->' got separated from
+                # the subsequent identifier by a side comment.  We need the
+                # last_nonblank_token to have a leading -> to avoid triggering
+                # an operator expected error message at the next '('. See also
+                # fix for git #63.
+                if ( $last_last_nonblank_token eq '->' ) {
+                    if (   $last_nonblank_type eq 'w'
+                        || $last_nonblank_type eq 'i'
+                        && substr( $last_nonblank_token, 0, 1 ) eq '$' )
+                    {
+                        $last_nonblank_token = '->' . $last_nonblank_token;
+                        $last_nonblank_type  = 'i';
+                    }
+                }
              }
  
              # store previous token type
@@ -2883,8 +3476,8 @@ EOM
              }
              my $pre_tok  = $rtokens->[$i];        # get the next pre-token
              my $pre_type = $rtoken_type->[$i];    # and type
-            $tok  = $pre_tok;
-            $type = $pre_type;                    # to be modified as necessary
+            $tok        = $pre_tok;
+            $type       = $pre_type;              # to be modified as necessary
              $block_type = "";    # blank for all tokens except code block braces
              $container_type = "";    # blank for all tokens except some parens
              $type_sequence  = "";    # blank for all tokens except ?/:
@@ -2897,7 +3490,7 @@ EOM
  
              # continue gathering identifier if necessary
              # but do not start on blanks and comments
-            if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
+            if ( $id_scan_state && $pre_type ne 'b' && $pre_type ne '#' ) {
  
                  if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) {
                      scan_id();
@@ -2948,7 +3541,7 @@ EOM
                  if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
                      my $next_type = $rtokens->[ $i + 1 ];
                      my $expecting =
-                      operator_expected( $prev_type, $tok, $next_type );
+                      operator_expected( [ $prev_type, $tok, $next_type ] );
  
                      # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
                      $combine_ok = 0 if ( $expecting == TERM );
@@ -3003,7 +3596,7 @@ EOM
              $next_tok  = $rtokens->[ $i + 1 ];
              $next_type = $rtoken_type->[ $i + 1 ];
  
-            TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
+            DEBUG_TOKENIZE && do {
                  local $" = ')(';
                  my @debug_list = (
                      $last_nonblank_token,      $tok,
@@ -3014,8 +3607,11 @@ EOM
                  print STDOUT "TOKENIZE:(@debug_list)\n";
              };
  
-            # turn off attribute list on first non-blank, non-bareword
-            if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
+            # Turn off attribute list on first non-blank, non-bareword.
+            # Added '#' to fix c038.
+            if ( $pre_type ne 'w' && $pre_type ne '#' ) {
+                $in_attribute_list = 0;
+            }
  
              ###############################################################
              # We have the next token, $tok.
@@ -3026,7 +3622,16 @@ EOM
              ###############################################################
  
              if ( $pre_type eq 'w' ) {
-                $expecting = operator_expected( $prev_type, $tok, $next_type );
+                $expecting =
+                  operator_expected( [ $prev_type, $tok, $next_type ] );
+
+                # Patch for c043, part 3: A bareword after '->' expects a TERM
+                # FIXME: It would be cleaner to give method calls a new type 'M'
+                # and update sub operator_expected to handle this.
+                if ( $last_nonblank_type eq '->' ) {
+                    $expecting = TERM;
+                }
+
                  my ( $next_nonblank_token, $i_next ) =
                    find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
@@ -3035,6 +3640,43 @@ EOM
  
                      # treat bare word followed by open paren like qw(
                      if ( $next_nonblank_token eq '(' ) {
+
+                        # For something like:
+                        #     : prototype($$)
+                        # we should let do_scan_sub see it so that it can see
+                        # the prototype.  All other attributes get parsed as a
+                        # quoted string.
+                        if ( $tok eq 'prototype' ) {
+                            $id_scan_state = 'prototype';
+
+                            # start just after the word 'prototype'
+                            my $i_beg = $i + 1;
+                            ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
+                                {
+                                    input_line      => $input_line,
+                                    i               => $i,
+                                    i_beg           => $i_beg,
+                                    tok             => $tok,
+                                    type            => $type,
+                                    rtokens         => $rtokens,
+                                    rtoken_map      => $rtoken_map,
+                                    id_scan_state   => $id_scan_state,
+                                    max_token_index => $max_token_index
+                                }
+                            );
+
+                   # If successful, mark as type 'q' to be consistent with other
+                   # attributes.  Note that type 'w' would also work.
+                            if ( $i > $i_beg ) {
+                                $type = 'q';
+                                next;
+                            }
+
+                            # If not successful, continue and parse as a quote.
+                        }
+
+                        # All other attribute lists must be parsed as quotes
+                        # (see 'signatures.t' for good examples)
                          $in_quote                = $quote_items{'q'};
                          $allowed_quote_modifiers = $quote_modifiers{'q'};
                          $type                    = 'q';
@@ -3067,7 +3709,36 @@ EOM
                              $type = 'v';
                              report_v_string($tok);
                          }
-                        else { $type = 'w' }
+                        else {
+
+                           # Bareword followed by a fat comma ... see 'git18.in'
+                           # If tok is something like 'x17' then it could
+                           # actually be operator x followed by number 17.
+                           # For example, here:
+                           #     123x17 => [ 792, 1224 ],
+                           # (a key of 123 repeated 17 times, perhaps not
+                           # what was intended). We will mark x17 as type
+                           # 'n' and it will be split. If the previous token
+                           # was also a bareword then it is not very clear is
+                           # going on.  In this case we will not be sure that
+                           # an operator is expected, so we just mark it as a
+                           # bareword.  Perl is a little murky in what it does
+                           # with stuff like this, and its behavior can change
+                           # over time.  Something like
+                           #    a x18 => [792, 1224], will compile as
+                           # a key with 18 a's.  But something like
+                           #    push @array, a x18;
+                           # is a syntax error.
+                            if ( $expecting == OPERATOR && $tok =~ /^x\d+$/ ) {
+                                $type = 'n';
+                            }
+                            else {
+
+                                # git #18
+                                $type = 'w';
+                                error_if_expecting_OPERATOR();
+                            }
+                        }
  
                          next;
                      }
@@ -3091,6 +3762,18 @@ EOM
                      next;
                  }
  
+                # Scan a bare word following a -> as an identifir; it could
+                # have a long package name.  Fixes c037, c041.
+                if ( $last_nonblank_token eq '->' ) {
+                    scan_bare_identifier();
+
+                    # Patch for c043, part 4; use type 'w' after a '->'.
+                    # This is just a safety check on sub scan_bare_identifier,
+                    # which should get this case correct.
+                    $type = 'w';
+                    next;
+                }
+
                  # a bare word immediately followed by :: is not a keyword;
                  # use $tok_kw when testing for keywords to avoid a mistake
                  my $tok_kw = $tok;
@@ -3100,8 +3783,28 @@ EOM
                      $tok_kw .= '::';
                  }
  
+                # Decide if 'sub :' can be the start of a sub attribute list.
+                # We will decide based on if the colon is followed by a
+                # bareword which is not a keyword.
+                my $sub_attribute_ok_here;
+                if (   $is_sub{$tok_kw}
+                    && $expecting != OPERATOR
+                    && $next_nonblank_token eq ':' )
+                {
+                    my ( $nn_nonblank_token, $i_nn ) =
+                      find_next_nonblank_token( $i_next + 1,
+                        $rtokens, $max_token_index );
+                    $sub_attribute_ok_here =
+                         $nn_nonblank_token =~ /^\w/
+                      && $nn_nonblank_token !~ /^\d/
+                      && !$is_keyword{$nn_nonblank_token};
+                }
+
                  # handle operator x (now we know it isn't $x=)
-                if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
+                if (   $expecting == OPERATOR
+                    && substr( $tok, 0, 1 ) eq 'x'
+                    && $tok =~ /^x\d*$/ )
+                {
                      if ( $tok eq 'x' ) {
  
                          if ( $rtokens->[ $i + 1 ] eq '=' ) {    # x=
@@ -3114,7 +3817,7 @@ EOM
                          }
                      }
  
-                    # FIXME: Patch: mark something like x4 as an integer for now
+                    # NOTE: mark something like x4 as an integer for now
                      # It gets fixed downstream.  This is easier than
                      # splitting the pretoken.
                      else {
@@ -3128,14 +3831,14 @@ EOM
                  elsif ( ( $tok eq 'strict' )
                      and ( $last_nonblank_token eq 'use' ) )
                  {
-                    $tokenizer_self->{_saw_use_strict} = 1;
+                    $tokenizer_self->[_saw_use_strict_] = 1;
                      scan_bare_identifier();
                  }
  
                  elsif ( ( $tok eq 'warnings' )
                      and ( $last_nonblank_token eq 'use' ) )
                  {
-                    $tokenizer_self->{_saw_perl_dash_w} = 1;
+                    $tokenizer_self->[_saw_perl_dash_w_] = 1;
  
                      # scan as identifier, so that we pick up something like:
                      # use warnings::register
@@ -3144,7 +3847,7 @@ EOM
  
                  elsif (
                         $tok eq 'AutoLoader'
-                    && $tokenizer_self->{_look_for_autoloader}
+                    && $tokenizer_self->[_look_for_autoloader_]
                      && (
                          $last_nonblank_token eq 'use'
  
@@ -3156,22 +3859,22 @@ EOM
                    )
                  {
                      write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
-                    $tokenizer_self->{_saw_autoloader}      = 1;
-                    $tokenizer_self->{_look_for_autoloader} = 0;
+                    $tokenizer_self->[_saw_autoloader_]      = 1;
+                    $tokenizer_self->[_look_for_autoloader_] = 0;
                      scan_bare_identifier();
                  }
  
                  elsif (
                         $tok eq 'SelfLoader'
-                    && $tokenizer_self->{_look_for_selfloader}
+                    && $tokenizer_self->[_look_for_selfloader_]
                      && (   $last_nonblank_token eq 'use'
                          || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
                          || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
                    )
                  {
                      write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
-                    $tokenizer_self->{_saw_selfloader}      = 1;
-                    $tokenizer_self->{_look_for_selfloader} = 0;
+                    $tokenizer_self->[_saw_selfloader_]      = 1;
+                    $tokenizer_self->[_look_for_selfloader_] = 0;
                      scan_bare_identifier();
                  }
  
@@ -3201,8 +3904,6 @@ EOM
                              }
                          }
  
-                        # FIXME: could check for error in which next token is
-                        # not a word (number, punctuation, ..)
                          else {
                              $is_constant{$current_package}{$next_nonblank_token}
                                = 1;
@@ -3250,7 +3951,7 @@ EOM
                     # of leading and trailing whitespace.  So they are given a
                     # separate type, 'q', unless requested otherwise.
                      $type =
-                      ( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
+                      ( $tok eq 'qw' && $tokenizer_self->[_trim_qw_] )
                        ? 'q'
                        : 'Q';
                      $quote_type = $type;
@@ -3260,12 +3961,13 @@ EOM
                  elsif (
                         ( $next_nonblank_token eq ':' )
                      && ( $rtokens->[ $i_next + 1 ] ne ':' )
-                    && ( $i_next <= $max_token_index )    # colon on same line
+                    && ( $i_next <= $max_token_index )   # colon on same line
+                    && !$sub_attribute_ok_here           # like 'sub : lvalue' ?
                      && label_ok()
                    )
                  {
                      if ( $tok !~ /[A-Z]/ ) {
-                        push @{ $tokenizer_self->{_rlower_case_labels_at} },
+                        push @{ $tokenizer_self->[_rlower_case_labels_at_] },
                            $input_line_number;
                      }
                      $type = 'J';
@@ -3274,20 +3976,38 @@ EOM
                      next;
                  }
  
-                #      'sub' || 'package'
-                elsif ( $is_sub{$tok_kw} || $is_package{$tok_kw} ) {
+                #      'sub' or alias
+                elsif ( $is_sub{$tok_kw} ) {
+                    error_if_expecting_OPERATOR()
+                      if ( $expecting == OPERATOR );
+                    initialize_subname();
+                    scan_id();
+                }
+
+                #      'package'
+                elsif ( $is_package{$tok_kw} ) {
                      error_if_expecting_OPERATOR()
                        if ( $expecting == OPERATOR );
                      scan_id();
                  }
  
+                # Fix for c035: split 'format' from 'is_format_END_DATA' to be
+                # more restrictive. Require a new statement to be ok here.
+                elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
+                    $type = ';';    # make tokenizer look for TERM next
+                    $tokenizer_self->[_in_format_] = 1;
+                    last;
+                }
+
                  # Note on token types for format, __DATA__, __END__:
                  # It simplifies things to give these type ';', so that when we
                  # start rescanning we will be expecting a token of type TERM.
                  # We will switch to type 'k' before outputting the tokens.
-                elsif ( $is_format_END_DATA{$tok_kw} ) {
+                elsif ( $is_END_DATA{$tok_kw} ) {
                      $type = ';';    # make tokenizer look for TERM next
-                    $tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
+
+                    # Remember that we are in one of these three sections
+                    $tokenizer_self->[ $is_END_DATA{$tok_kw} ] = 1;
                      last;
                  }
  
@@ -3311,7 +4031,7 @@ EOM
                      }
  
                      # remember my and our to check for trailing ": shared"
-                    elsif ( $is_my_our{$tok} ) {
+                    elsif ( $is_my_our_state{$tok} ) {
                          $statement_type = $tok;
                      }
  
@@ -3368,8 +4088,11 @@ EOM
                      }
  
                      # patch for SWITCH/CASE if 'case' and 'when are
-                    # treated as keywords.
-                    elsif ( $tok eq 'when' || $tok eq 'case' ) {
+                    # treated as keywords.  Also 'default' for Switch::Plain
+                    elsif ($tok eq 'when'
+                        || $tok eq 'case'
+                        || $tok eq 'default' )
+                    {
                          $statement_type = $tok;    # next '{' is block
                      }
  
@@ -3408,10 +4131,38 @@ EOM
                  else {
  
                      scan_bare_identifier();
+
+                    if (   $statement_type eq 'use'
+                        && $last_nonblank_token eq 'use' )
+                    {
+                        $saw_use_module{$current_package}->{$tok} = 1;
+                    }
+
                      if ( $type eq 'w' ) {
  
                          if ( $expecting == OPERATOR ) {
  
+                            # Patch to avoid error message for RPerl overloaded
+                            # operator functions: use overload
+                            #    '+' => \&sse_add,
+                            #    '-' => \&sse_sub,
+                            #    '*' => \&sse_mul,
+                            #    '/' => \&sse_div;
+                            # FIXME: this should eventually be generalized
+                            if (   $saw_use_module{$current_package}->{'RPerl'}
+                                && $tok =~ /^sse_(mul|div|add|sub)$/ )
+                            {
+
+                            }
+
+                            # Fix part 1 for git #63 in which a comment falls
+                            # between an -> and the following word.  An
+                            # alternate fix would be to change operator_expected
+                            # to return an UNKNOWN for this type.
+                            elsif ( $last_nonblank_type eq '->' ) {
+
+                            }
+
                              # don't complain about possible indirect object
                              # notation.
                              # For example:
@@ -3422,7 +4173,7 @@ EOM
                              # This will call A::new but we have a 'new' in
                              # main:: which looks like a constant.
                              #
-                            if ( $last_nonblank_type eq 'C' ) {
+                            elsif ( $last_nonblank_type eq 'C' ) {
                                  if ( $tok !~ /::$/ ) {
                                      complain(<<EOM);
  Expecting operator after '$last_nonblank_token' but found bare word '$tok'
@@ -3439,7 +4190,10 @@ EOM
                          # functions
                          $next_tok = $rtokens->[ $i + 1 ];
                          if ( $next_tok eq '(' ) {
-                            $type = 'U';
+
+                            # Fix part 2 for git #63.  Leave type as 'w' to keep
+                            # the type the same as if the -> were not separated
+                            $type = 'U' unless ( $last_nonblank_type eq '->' );
                          }
  
                          # underscore after file test operator is file handle
@@ -3459,7 +4213,7 @@ EOM
                            )
                          {
                              $statement_type = $tok;    # next '{' is block
-                            $type = 'k';    # for keyword syntax coloring
+                            $type           = 'k'; # for keyword syntax coloring
                          }
  
                          # patch for SWITCH/CASE if switch and given not keywords
@@ -3480,10 +4234,12 @@ EOM
              # section 2: strings of digits
              ###############################################################
              elsif ( $pre_type eq 'd' ) {
-                $expecting = operator_expected( $prev_type, $tok, $next_type );
+                $expecting =
+                  operator_expected( [ $prev_type, $tok, $next_type ] );
                  error_if_expecting_OPERATOR("Number")
                    if ( $expecting == OPERATOR );
-                my $number = scan_number();
+
+                my $number = scan_number_fast();
                  if ( !defined($number) ) {
  
                      # shouldn't happen - we should always get a number
@@ -3501,7 +4257,7 @@ EOM
                  my $code = $tokenization_code->{$tok};
                  if ($code) {
                      $expecting =
-                      operator_expected( $prev_type, $tok, $next_type );
+                      operator_expected( [ $prev_type, $tok, $next_type ] );
                      $code->();
                      redo if $in_quote;
                  }
@@ -3562,7 +4318,9 @@ EOM
          my $container_environment = '';
          my $im                    = -1;    # previous $i value
          my $num;
-        my $ci_string_sum = ones_count($ci_string_in_tokenizer);
+
+        # Count the number of '1's in the string (previously sub ones_count)
+        my $ci_string_sum = ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
  
  # Computing Token Indentation
  #
@@ -3737,7 +4495,7 @@ EOM
                  my $val = ord($type);
                  warning(
                      "unexpected character decimal $val ($type) in script\n");
-                $tokenizer_self->{_in_error} = 1;
+                $tokenizer_self->[_in_error_] = 1;
              }
  
              # ----------------------------------------------------------------
@@ -3829,6 +4587,11 @@ EOM
                  push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
                  $level_in_tokenizer++;
  
+                if ( $level_in_tokenizer > $tokenizer_self->[_maximum_level_] )
+                {
+                    $tokenizer_self->[_maximum_level_] = $level_in_tokenizer;
+                }
+
                  if ($forced_indentation_flag) {
  
                      # break BEFORE '?' when there is forced indentation
@@ -3885,7 +4648,8 @@ EOM
  
                  $ci_string_in_tokenizer .=
                    ( $intervening_secondary_structure != 0 ) ? '1' : '0';
-                $ci_string_sum = ones_count($ci_string_in_tokenizer);
+                $ci_string_sum =
+                  ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
                  $continuation_string_in_tokenizer .=
                    ( $in_statement_continuation > 0 ) ? '1' : '0';
  
@@ -3914,7 +4678,7 @@ EOM
                    )
                  {
                      $total_ci += $in_statement_continuation
-                      unless ( $ci_string_in_tokenizer =~ /1$/ );
+                      unless ( substr( $ci_string_in_tokenizer, -1 ) eq '1' );
                  }
  
                  $ci_string_i               = $total_ci;
@@ -3935,12 +4699,15 @@ EOM
                  if ( length($nesting_block_string) > 1 )
                  {    # true for valid script
                      chop $nesting_block_string;
-                    $nesting_block_flag = ( $nesting_block_string =~ /1$/ );
+                    $nesting_block_flag =
+                      substr( $nesting_block_string, -1 ) eq '1';
                      chop $nesting_list_string;
-                    $nesting_list_flag = ( $nesting_list_string =~ /1$/ );
+                    $nesting_list_flag =
+                      substr( $nesting_list_string, -1 ) eq '1';
  
                      chop $ci_string_in_tokenizer;
-                    $ci_string_sum = ones_count($ci_string_in_tokenizer);
+                    $ci_string_sum =
+                      ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
  
                      $in_statement_continuation =
                        chop $continuation_string_in_tokenizer;
@@ -3967,7 +4734,8 @@ EOM
                          elsif (
                              $is_zero_continuation_block_type{
                                  $routput_block_type->[$i]
-                            } )
+                            }
+                          )
                          {
                              $in_statement_continuation = 0;
                          }
@@ -3977,7 +4745,8 @@ EOM
                          elsif (
                              $is_not_zero_continuation_block_type{
                                  $routput_block_type->[$i]
-                            } )
+                            }
+                          )
                          {
                          }
  
@@ -4034,7 +4803,8 @@ EOM
                  # commas, this simplifies the -lp indentation logic, which
                  # counts commas.  For ?: it makes them stand out.
                  if ($nesting_list_flag) {
-                    if ( $type =~ /^[,\?\:]$/ ) {
+                    ##      $type =~ /^[,\?\:]$/
+                    if ( $is_comma_question_colon{$type} ) {
                          $in_statement_continuation = 0;
                      }
                  }
@@ -4101,8 +4871,8 @@ EOM
              }
  
              if ( $level_in_tokenizer < 0 ) {
-                unless ( $tokenizer_self->{_saw_negative_indentation} ) {
-                    $tokenizer_self->{_saw_negative_indentation} = 1;
+                unless ( $tokenizer_self->[_saw_negative_indentation_] ) {
+                    $tokenizer_self->[_saw_negative_indentation_] = 1;
                      warning("Starting negative indentation\n");
                  }
              }
@@ -4165,11 +4935,11 @@ EOM
              push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) );
          }
  
-        $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
-        $tokenizer_self->{_in_quote}          = $in_quote;
-        $tokenizer_self->{_quote_target} =
+        $tokenizer_self->[_in_attribute_list_] = $in_attribute_list;
+        $tokenizer_self->[_in_quote_]          = $in_quote;
+        $tokenizer_self->[_quote_target_] =
            $in_quote ? matching_end_token($quote_character) : "";
-        $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
+        $tokenizer_self->[_rhere_target_list_] = $rhere_target_list;
  
          $line_of_tokens->{_rtoken_type}            = \@token_type;
          $line_of_tokens->{_rtokens}                = \@tokens;
@@ -4191,14 +4961,65 @@ EOM
  # Tokenizer routines which assist in identifying token types
  #######################################################################
  
+# hash lookup table of operator expected values
+my %op_expected_table;
+
+# exceptions to perl's weird parsing rules after type 'Z'
+my %is_weird_parsing_rule_exception;
+
+BEGIN {
+
+    # Always expecting TERM following these types:
+    # note: this is identical to '@value_requestor_type' defined later.
+    my @q = qw(
+      ; ! + x & ?  F J - p / Y : % f U ~ A G j L * . | ^ < = [ m { \ > t
+      || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /=
+      &= // >> ~. &. |. ^.
+      ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
+    );
+    push @q, ',';
+    push @q, '(';    # for completeness, not currently a token type
+    @{op_expected_table}{@q} = (TERM) x scalar(@q);
+
+    # Always UNKNOWN following these types:
+    # Fix for c030: added '->' to this list
+    @q = qw( w -> );
+    @{op_expected_table}{@q} = (UNKNOWN) x scalar(@q);
+
+    # Always expecting OPERATOR ...
+    # 'n' and 'v' are currently excluded because they might be VERSION numbers
+    # 'i' is currently excluded because it might be a package
+    # 'q' is currently excluded because it might be a prototype
+    # Fix for c030: removed '->' from this list:
+    @q = qw( -- C h R ++ ] Q <> );    ## n v q i );
+    push @q, ')';
+    @{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
+
+    # Fix for git #62: added '*' and '%'
+    @q = qw( < ? * % );
+    @{is_weird_parsing_rule_exception}{@q} = (OPERATOR) x scalar(@q);
+
+}
+
+use constant DEBUG_OPERATOR_EXPECTED => 0;
+
  sub operator_expected {
  
+    # Returns a parameter indicating what types of tokens can occur next
+
+    # Call format:
+    #    $op_expected = operator_expected( [ $prev_type, $tok, $next_type ] );
+    # where
+    #    $prev_type is the type of the previous token (blank or not)
+    #    $tok is the current token
+    #    $next_type is the type of the next token (blank or not)
+
      # Many perl symbols have two or more meanings.  For example, '<<'
      # can be a shift operator or a here-doc operator.  The
      # interpretation of these symbols depends on the current state of
      # the tokenizer, which may either be expecting a term or an
-    # operator.  For this example, a << would be a shift if an operator
-    # is expected, and a here-doc if a term is expected.  This routine
+    # operator.  For this example, a << would be a shift if an OPERATOR
+    # is expected, and a here-doc if a TERM is expected.  This routine
      # is called to make this decision for any current token.  It returns
      # one of three possible values:
      #
@@ -4218,7 +5039,7 @@ sub operator_expected {
      # UNKNOWN, because a wrong guess can spoil the formatting of a
      # script.
      #
-    # adding NEW_TOKENS: it is critically important that this routine be
+    # Adding NEW_TOKENS: it is critically important that this routine be
      # updated to allow it to determine if an operator or term is to be
      # expected after the new token.  Doing this simply involves adding
      # the new token character to one of the regexes in this routine or
@@ -4227,206 +5048,243 @@ sub operator_expected {
      # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
      # $statement_type
  
-    my ( $prev_type, $tok, $next_type ) = @_;
+    # When possible, token types should be selected such that we can determine
+    # the 'operator_expected' value by a simple hash lookup.  If there are
+    # exceptions, that is an indication that a new type is needed.
  
-    my $op_expected = UNKNOWN;
+    my ($rarg) = @_;
  
-##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
+    my $msg = "";
  
-# Note: function prototype is available for token type 'U' for future
-# program development.  It contains the leading and trailing parens,
-# and no blanks.  It might be used to eliminate token type 'C', for
-# example (prototype = '()'). Thus:
-# if ($last_nonblank_type eq 'U') {
-#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
-# }
+    ##############
+    # Table lookup
+    ##############
  
-    # A possible filehandle (or object) requires some care...
-    if ( $last_nonblank_type eq 'Z' ) {
+    # Many types are can be obtained by a table lookup given the previous type.
+    # This typically handles half or more of the calls.
+    my $op_expected = $op_expected_table{$last_nonblank_type};
+    if ( defined($op_expected) ) {
+        $msg = "Table lookup";
+        goto RETURN;
+    }
  
-        # angle.t
-        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
-            $op_expected = UNKNOWN;
-        }
+    ######################
+    # Handle special cases
+    ######################
  
-        # For possible file handle like "$a", Perl uses weird parsing rules.
-        # For example:
-        # print $a/2,"/hi";   - division
-        # print $a / 2,"/hi"; - division
-        # print $a/ 2,"/hi";  - division
-        # print $a /2,"/hi";  - pattern (and error)!
-        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
-            $op_expected = TERM;
-        }
+    $op_expected = UNKNOWN;
+    my ( $prev_type, $tok, $next_type ) = @{$rarg};
  
-        # Note when an operation is being done where a
-        # filehandle might be expected, since a change in whitespace
-        # could change the interpretation of the statement.
-        else {
-            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
+    # Types 'k', '}' and 'Z' depend on context
+    # FIXME: Types 'i', 'n', 'v', 'q' currently also temporarily depend on
+    # context but that dependence could eventually be eliminated with better
+    # token type definition
  
-               # Do not complain in 'use' statements, which have special syntax.
-               # For example, from RT#130344:
-               #   use lib $FindBin::Bin . '/lib';
-                if ( $statement_type ne 'use' ) {
-                    complain("operator in print statement not recommended\n");
-                }
-                $op_expected = OPERATOR;
-            }
+    # identifier...
+    if ( $last_nonblank_type eq 'i' ) {
+        $op_expected = OPERATOR;
+
+        # FIXME: it would be cleaner to make this a special type
+        # expecting VERSION or {} after package NAMESPACE
+        # TODO: maybe mark these words as type 'Y'?
+        if (   $statement_type =~ /^package\b/
+            && $last_nonblank_token =~ /^package\b/ )
+        {
+            $op_expected = TERM;
          }
      }
  
-    # Check for smartmatch operator before preceding brace or square bracket.
-    # For example, at the ? after the ] in the following expressions we are
-    # expecting an operator:
-    #
-    # qr/3/ ~~ ['1234'] ? 1 : 0;
-    # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
-    elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
-        $op_expected = OPERATOR;
-    }
+    # keyword...
+    elsif ( $last_nonblank_type eq 'k' ) {
+        $op_expected = TERM;
+        if ( $expecting_operator_token{$last_nonblank_token} ) {
+            $op_expected = OPERATOR;
+        }
+        elsif ( $expecting_term_token{$last_nonblank_token} ) {
  
-    # handle something after 'do' and 'eval'
-    elsif ( $is_block_operator{$last_nonblank_token} ) {
+            # Exceptions from TERM:
  
-        # something like $a = eval "expression";
-        #                          ^
-        if ( $last_nonblank_type eq 'k' ) {
-            $op_expected = TERM;    # expression or list mode following keyword
-        }
+            # // may follow perl functions which may be unary operators
+            # see test file dor.t (defined or);
+            if (
+                   $tok eq '/'
+                && $next_type eq '/'
+                && $is_keyword_rejecting_slash_as_pattern_delimiter{
+                    $last_nonblank_token}
+              )
+            {
+                $op_expected = OPERATOR;
+            }
  
-        # something like $a = do { BLOCK } / 2;
-        # or this ? after a smartmatch anonynmous hash or array reference:
-        #   qr/3/ ~~ ['1234'] ? 1 : 0;
-        #                                  ^
-        else {
-            $op_expected = OPERATOR;    # block mode following }
+            # Patch to allow a ? following 'split' to be a depricated pattern
+            # delimiter.  This patch is coordinated with the omission of split
+            # from the list
+            # %is_keyword_rejecting_question_as_pattern_delimiter. This patch
+            # will force perltidy to guess.
+            elsif ($tok eq '?'
+                && $last_nonblank_token eq 'split' )
+            {
+                $op_expected = UNKNOWN;
+            }
          }
-    }
+    } ## end type 'k'
  
-    # handle bare word..
-    elsif ( $last_nonblank_type eq 'w' ) {
+    # closing container token...
+
+    # Note that the actual token for type '}' may also be a ')'.
+
+    # Also note that $last_nonblank_token is not the token corresponding to
+    # $last_nonblank_type when the type is a closing container.  In that
+    # case it is the token before the corresponding opening container token.
+    # So for example, for this snippet
+    #       $a = do { BLOCK } / 2;
+    # the $last_nonblank_token is 'do' when $last_nonblank_type eq '}'.
  
-        # unfortunately, we can't tell what type of token to expect next
-        # after most bare words
+    elsif ( $last_nonblank_type eq '}' ) {
          $op_expected = UNKNOWN;
-    }
  
-    # operator, but not term possible after these types
-    # Note: moved ')' from type to token because parens in list context
-    # get marked as '{' '}' now.  This is a minor glitch in the following:
-    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
-    #
-    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
-        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
-    {
-        $op_expected = OPERATOR;
+        # handle something after 'do' and 'eval'
+        if ( $is_block_operator{$last_nonblank_token} ) {
  
-        # in a 'use' statement, numbers and v-strings are not true
-        # numbers, so to avoid incorrect error messages, we will
-        # mark them as unknown for now (use.t)
-        # TODO: it would be much nicer to create a new token V for VERSION
-        # number in a use statement.  Then this could be a check on type V
-        # and related patches which change $statement_type for '=>'
-        # and ',' could be removed.  Further, it would clean things up to
-        # scan the 'use' statement with a separate subroutine.
-        if (   ( $statement_type eq 'use' )
-            && ( $last_nonblank_type =~ /^[nv]$/ ) )
-        {
-            $op_expected = UNKNOWN;
+            # something like $a = do { BLOCK } / 2;
+            $op_expected = OPERATOR;    # block mode following }
          }
  
-        # expecting VERSION or {} after package NAMESPACE
-        elsif ($statement_type =~ /^package\b/
-            && $last_nonblank_token =~ /^package\b/ )
-        {
-            $op_expected = TERM;
-        }
-    }
+        elsif ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) {
+            $op_expected = OPERATOR;
+            if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN }
  
-    # no operator after many keywords, such as "die", "warn", etc
-    elsif ( $expecting_term_token{$last_nonblank_token} ) {
+        }
  
-        # // may follow perl functions which may be unary operators
-        # see test file dor.t (defined or);
-        if (   $tok eq '/'
-            && $next_type eq '/'
-            && $last_nonblank_type eq 'k'
-            && $is_keyword_taking_optional_args{$last_nonblank_token} )
-        {
+        # Check for smartmatch operator before preceding brace or square
+        # bracket.  For example, at the ? after the ] in the following
+        # expressions we are expecting an operator:
+        #
+        # qr/3/ ~~ ['1234'] ? 1 : 0;
+        # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
+        elsif ( $last_nonblank_token eq '~~' ) {
              $op_expected = OPERATOR;
          }
+
+        # A right brace here indicates the end of a simple block.  All
+        # non-structural right braces have type 'R' all braces associated with
+        # block operator keywords have been given those keywords as
+        # "last_nonblank_token" and caught above.  (This statement is order
+        # dependent, and must come after checking $last_nonblank_token).
          else {
-            $op_expected = TERM;
-        }
-    }
  
-    # no operator after things like + - **  (i.e., other operators)
-    elsif ( $expecting_term_types{$last_nonblank_type} ) {
-        $op_expected = TERM;
-    }
+            # patch for dor.t (defined or).
+            if (   $tok eq '/'
+                && $next_type eq '/'
+                && $last_nonblank_token eq ']' )
+            {
+                $op_expected = OPERATOR;
+            }
+
+            # Patch for RT #116344: misparse a ternary operator after an
+            # anonymous hash, like this:
+            #   return ref {} ? 1 : 0;
+            # The right brace should really be marked type 'R' in this case,
+            # and it is safest to return an UNKNOWN here. Expecting a TERM will
+            # cause the '?' to always be interpreted as a pattern delimiter
+            # rather than introducing a ternary operator.
+            elsif ( $tok eq '?' ) {
+                $op_expected = UNKNOWN;
+            }
+            else {
+                $op_expected = TERM;
+            }
+        }
+    } ## end type '}'
  
-    # a few operators, like "time", have an empty prototype () and so
-    # take no parameters but produce a value to operate on
-    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
+    # number or v-string...
+    # An exception is for VERSION numbers a 'use' statement. It has the format
+    #     use Module VERSION LIST
+    # We could avoid this exception by writing a special sub to parse 'use'
+    # statements and perhaps mark these numbers with a new type V (for VERSION)
+    elsif ( $last_nonblank_type =~ /^[nv]$/ ) {
          $op_expected = OPERATOR;
+        if ( $statement_type eq 'use' ) {
+            $op_expected = UNKNOWN;
+        }
      }
  
-    # post-increment and decrement produce values to be operated on
-    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
+    # quote...
+    # FIXME: labeled prototype words should probably be given type 'A' or maybe
+    # 'J'; not 'q'; or maybe mark as type 'Y'
+    elsif ( $last_nonblank_type eq 'q' ) {
          $op_expected = OPERATOR;
+        if ( $last_nonblank_token eq 'prototype' )
+          ##|| $last_nonblank_token eq 'switch' )
+        {
+            $op_expected = TERM;
+        }
      }
  
-    # no value to operate on after sub block
-    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
+    # file handle or similar
+    elsif ( $last_nonblank_type eq 'Z' ) {
  
-    # a right brace here indicates the end of a simple block.
-    # all non-structural right braces have type 'R'
-    # all braces associated with block operator keywords have been given those
-    # keywords as "last_nonblank_token" and caught above.
-    # (This statement is order dependent, and must come after checking
-    # $last_nonblank_token).
-    elsif ( $last_nonblank_type eq '}' ) {
+        $op_expected = UNKNOWN;
  
-        # patch for dor.t (defined or).
-        if (   $tok eq '/'
-            && $next_type eq '/'
-            && $last_nonblank_token eq ']' )
-        {
-            $op_expected = OPERATOR;
+        # angle.t
+        if ( $last_nonblank_token =~ /^\w/ ) {
+            $op_expected = UNKNOWN;
          }
  
-        # Patch for RT #116344: misparse a ternary operator after an anonymous
-        # hash, like this:
-        #   return ref {} ? 1 : 0;
-        # The right brace should really be marked type 'R' in this case, and
-        # it is safest to return an UNKNOWN here. Expecting a TERM will
-        # cause the '?' to always be interpreted as a pattern delimiter
-        # rather than introducing a ternary operator.
-        elsif ( $tok eq '?' ) {
+        # The 'weird parsing rules' of next section do not work for '<' and '?'
+        # It is best to mark them as unknown.  Test case:
+        #  print $fh <DATA>;
+        elsif ( $is_weird_parsing_rule_exception{$tok} ) {
              $op_expected = UNKNOWN;
          }
-        else {
+
+        # For possible file handle like "$a", Perl uses weird parsing rules.
+        # For example:
+        # print $a/2,"/hi";   - division
+        # print $a / 2,"/hi"; - division
+        # print $a/ 2,"/hi";  - division
+        # print $a /2,"/hi";  - pattern (and error)!
+        # Some examples where this logic works okay, for '&','*','+':
+        #    print $fh &xsi_protos(@mods);
+        #    my $x = new $CompressClass *FH;
+        #    print $OUT +( $count % 15 ? ", " : "\n\t" );
+        elsif ($prev_type eq 'b'
+            && $next_type ne 'b' )
+        {
              $op_expected = TERM;
          }
+
+        # Note that '?' and '<' have been moved above
+        # ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
+        elsif ( $tok =~ /^([x\/\+\-\*\%\&\.]|\>\>)$/ ) {
+
+            # Do not complain in 'use' statements, which have special syntax.
+            # For example, from RT#130344:
+            #   use lib $FindBin::Bin . '/lib';
+            if ( $statement_type ne 'use' ) {
+                complain(
+"operator in possible indirect object location not recommended\n"
+                );
+            }
+            $op_expected = OPERATOR;
+        }
      }
  
-    # something else..what did I forget?
+    # anything else...
      else {
-
-        # collecting diagnostics on unknown operator types..see what was missed
          $op_expected = UNKNOWN;
-        write_diagnostics(
-"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
-        );
      }
  
-    TOKENIZER_DEBUG_FLAG_EXPECT && do {
+  RETURN:
+
+    DEBUG_OPERATOR_EXPECTED && do {
          print STDOUT
-"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
+"OPERATOR_EXPECTED: $msg: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
      };
+
      return $op_expected;
-}
+
+} ## end of sub operator_expected
  
  sub new_statement_ok {
  
@@ -4581,6 +5439,11 @@ sub code_block_type {
  
      # check bareword
      elsif ( $last_nonblank_type eq 'w' ) {
+
+        # check for syntax 'use MODULE LIST'
+        # This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
+        return "" if ( $statement_type eq 'use' );
+
          return decide_if_code_block( $i, $rtokens, $rtoken_type,
              $max_token_index );
      }
@@ -4732,6 +5595,15 @@ sub decide_if_code_block {
                  $code_block_type = "";
              }
          }
+
+        if ($code_block_type) {
+
+            # Patch for cases b1085 b1128: It is uncertain if this is a block.
+            # If this brace follows a bareword, then append a space as a signal
+            # to the formatter that this may not be a block brace.  To find the
+            # corresponding code in Formatter.pm search for 'b1085'.
+            $code_block_type .= " " if ( $code_block_type =~ /^\w/ );
+        }
      }
  
      return $code_block_type;
@@ -4745,11 +5617,11 @@ sub report_unexpected {
          $rpretoken_type, $input_line )
        = @_;
  
-    if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
+    if ( ++$tokenizer_self->[_unexpected_error_count_] <= MAX_NAG_MESSAGES ) {
          my $msg = "found $found where $expecting expected";
          my $pos = $rpretoken_map->[$i_tok];
          interrupt_logfile();
-        my $input_line_number = $tokenizer_self->{_last_line_number};
+        my $input_line_number = $tokenizer_self->[_last_line_number_];
          my ( $offset, $numbered_line, $underline ) =
            make_numbered_line( $input_line_number, $input_line, $pos );
          $underline = write_on_underline( $underline, $pos - $offset, '^' );
@@ -4868,8 +5740,8 @@ sub increase_nesting_depth {
      $current_depth[$aa]++;
      $total_depth++;
      $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
-    my $input_line_number = $tokenizer_self->{_last_line_number};
-    my $input_line        = $tokenizer_self->{_line_text};
+    my $input_line_number = $tokenizer_self->[_last_line_number_];
+    my $input_line        = $tokenizer_self->[_line_of_text_];
  
      # Sequence numbers increment by number of items.  This keeps
      # a unique set of numbers but still allows the relative location
@@ -4905,6 +5777,27 @@ sub increase_nesting_depth {
      return ( $seqno, $indent );
  }
  
+sub is_balanced_closing_container {
+
+    # Return true if a closing container can go here without error
+    # Return false if not
+    my ($aa) = @_;
+
+    # cannot close if there was no opening
+    return unless ( $current_depth[$aa] > 0 );
+
+    # check that any other brace types $bb contained within would be balanced
+    for my $bb ( 0 .. @closing_brace_names - 1 ) {
+        next if ( $bb == $aa );
+        return
+          unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
+            $current_depth[$bb] );
+    }
+
+    # OK, everything will be balanced
+    return 1;
+}
+
  sub decrease_nesting_depth {
  
      my ( $aa, $pos ) = @_;
@@ -4913,8 +5806,8 @@ sub decrease_nesting_depth {
      # @current_sequence_number, @depth_array, @starting_line_of_current_depth
      # $statement_type
      my $seqno             = 0;
-    my $input_line_number = $tokenizer_self->{_last_line_number};
-    my $input_line        = $tokenizer_self->{_line_text};
+    my $input_line_number = $tokenizer_self->[_last_line_number_];
+    my $input_line        = $tokenizer_self->[_line_of_text_];
  
      my $outdent = 0;
      $total_depth--;
@@ -5000,6 +5893,10 @@ EOM
              indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
          }
          increment_brace_error();
+
+        # keep track of errors in braces alone (ignoring ternary nesting errors)
+        $tokenizer_self->[_true_brace_error_count_]++
+          if ( $closing_brace_names[$aa] ne "':'" );
      }
      return ( $seqno, $outdent );
  }
@@ -5039,9 +5936,10 @@ sub peek_ahead_for_n_nonblank_pre_tokens {
      my $i = 0;
      my ( $rpre_tokens, $rmap, $rpre_types );
  
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
+    while ( $line =
+        $tokenizer_self->[_line_buffer_object_]->peek_ahead( $i++ ) )
      {
-        $line =~ s/^\s*//;    # trim leading blanks
+        $line =~ s/^\s*//;                 # trim leading blanks
          next if ( length($line) <= 0 );    # skip blank
          next if ( $line =~ /^#/ );         # skip comment
          ( $rpre_tokens, $rmap, $rpre_types ) =
@@ -5059,9 +5957,10 @@ sub peek_ahead_for_nonblank_token {
      my $line;
      my $i = 0;
  
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
+    while ( $line =
+        $tokenizer_self->[_line_buffer_object_]->peek_ahead( $i++ ) )
      {
-        $line =~ s/^\s*//;    # trim leading blanks
+        $line =~ s/^\s*//;                 # trim leading blanks
          next if ( length($line) <= 0 );    # skip blank
          next if ( $line =~ /^#/ );         # skip comment
          my ( $rtok, $rmap, $rtype ) =
@@ -5074,7 +5973,7 @@ sub peek_ahead_for_nonblank_token {
          }
          last;
      }
-    return $rtokens;
+    return;
  }
  
  #########i#############################################################
@@ -5092,8 +5991,6 @@ sub guess_if_pattern_or_conditional {
      #   msg = a warning or diagnostic message
      # USES GLOBAL VARIABLES: $last_nonblank_token
  
-    # FIXME: this needs to be rewritten
-
      my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
      my $is_pattern = 0;
      my $msg        = "guessing that ? after $last_nonblank_token starts a ";
@@ -5157,6 +6054,20 @@ sub guess_if_pattern_or_conditional {
      return ( $is_pattern, $msg );
  }
  
+my %is_known_constant;
+my %is_known_function;
+
+BEGIN {
+
+    # Constants like 'pi' in Trig.pm are common
+    my @q = qw(pi pi2 pi4 pip2 pip4);
+    @{is_known_constant}{@q} = (1) x scalar(@q);
+
+    # parenless calls of 'ok' are common
+    @q = qw( ok );
+    @{is_known_function}{@q} = (1) x scalar(@q);
+}
+
  sub guess_if_pattern_or_division {
  
      # this routine is called when we have encountered a / following an
@@ -5177,11 +6088,32 @@ sub guess_if_pattern_or_division {
      }
      else {
          my $ibeg = $i;
-        my $divide_expected =
-          numerator_expected( $i, $rtokens, $max_token_index );
+        my $divide_possible =
+          is_possible_numerator( $i, $rtokens, $max_token_index );
+
+        if ( $divide_possible < 0 ) {
+            $msg        = "pattern (division not possible here)\n";
+            $is_pattern = 1;
+            goto RETURN;
+        }
+
          $i = $ibeg + 1;
          my $next_token = $rtokens->[$i];    # first token after slash
  
+        # One of the things we can look at is the spacing around the slash.
+        # There # are four possible spacings around the first slash:
+        #
+        #     return pi/two;#/;     -/-
+        #     return pi/ two;#/;    -/+
+        #     return pi / two;#/;   +/+
+        #     return pi /two;#/;    +/-   <-- possible pattern
+        #
+        # Spacing rule: a space before the slash but not after the slash
+        # usually indicates a pattern.  We can use this to break ties.
+
+        my $is_pattern_by_spacing =
+          ( $i > 1 && $next_token ne ' ' && $rtokens->[ $i - 2 ] eq ' ' );
+
          # look for a possible ending / on this line..
          my $in_quote        = 1;
          my $quote_depth     = 0;
@@ -5197,54 +6129,104 @@ sub guess_if_pattern_or_division {
  
          if ($in_quote) {
  
-            # we didn't find an ending / on this line,
-            # so we bias towards division
-            if ( $divide_expected >= 0 ) {
+            # we didn't find an ending / on this line, so we bias towards
+            # division
+            if ( $divide_possible >= 0 ) {
                  $is_pattern = 0;
                  $msg .= "division (no ending / on this line)\n";
              }
              else {
+
+                # assuming a multi-line pattern ... this is risky, but division
+                # does not seem possible.  If this fails, it would either be due
+                # to a syntax error in the code, or the division_expected logic
+                # needs to be fixed.
                  $msg        = "multi-line pattern (division not possible)\n";
                  $is_pattern = 1;
              }
-
          }
  
-        # we found an ending /, so we bias towards a pattern
+        # we found an ending /, so we bias slightly towards a pattern
          else {
  
-            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
+            my $pattern_expected =
+              pattern_expected( $i, $rtokens, $max_token_index );
+
+            if ( $pattern_expected >= 0 ) {
+
+                # pattern looks possible...
+                if ( $divide_possible >= 0 ) {
+
+                    # Both pattern and divide can work here...
  
-                if ( $divide_expected >= 0 ) {
+                    # Increase weight of divide if a pure number follows
+                    $divide_possible += $next_token =~ /^\d+$/;
  
-                    if ( $i - $ibeg > 60 ) {
-                        $msg .= "division (matching / too distant)\n";
+                    # Check for known constants in the numerator, like 'pi'
+                    if ( $is_known_constant{$last_nonblank_token} ) {
+                        $msg .=
+"division (pattern works too but saw known constant '$last_nonblank_token')\n";
                          $is_pattern = 0;
                      }
-                    else {
-                        $msg .= "pattern (but division possible too)\n";
+
+                    # A very common bare word in pattern expressions is 'ok'
+                    elsif ( $is_known_function{$last_nonblank_token} ) {
+                        $msg .=
+"pattern (division works too but saw '$last_nonblank_token')\n";
+                        $is_pattern = 1;
+                    }
+
+                    # If one rule is more definite, use it
+                    elsif ( $divide_possible > $pattern_expected ) {
+                        $msg .=
+                          "division (more likely based on following tokens)\n";
+                        $is_pattern = 0;
+                    }
+
+                    # otherwise, use the spacing rule
+                    elsif ($is_pattern_by_spacing) {
+                        $msg .=
+"pattern (guess on spacing, but division possible too)\n";
                          $is_pattern = 1;
                      }
+                    else {
+                        $msg .=
+"division (guess on spacing, but pattern is possible too)\n";
+                        $is_pattern = 0;
+                    }
                  }
+
+                # divide_possible < 0 means divide can not work here
                  else {
                      $is_pattern = 1;
                      $msg .= "pattern (division not possible)\n";
                  }
              }
+
+            # pattern does not look possible...
              else {
  
-                if ( $divide_expected >= 0 ) {
+                if ( $divide_possible >= 0 ) {
                      $is_pattern = 0;
                      $msg .= "division (pattern not possible)\n";
                  }
+
+                # Neither pattern nor divide look possible...go by spacing
                  else {
-                    $is_pattern = 1;
-                    $msg .=
-                      "pattern (uncertain, but division would not work here)\n";
+                    if ($is_pattern_by_spacing) {
+                        $msg .= "pattern (guess on spacing)\n";
+                        $is_pattern = 1;
+                    }
+                    else {
+                        $msg .= "division (guess on spacing)\n";
+                        $is_pattern = 0;
+                    }
                  }
              }
          }
      }
+
+  RETURN:
      return ( $is_pattern, $msg );
  }
  
@@ -5266,7 +6248,8 @@ sub guess_if_here_doc {
      my $k   = 0;
      my $msg = "checking <<";
  
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
+    while ( $line =
+        $tokenizer_self->[_line_buffer_object_]->peek_ahead( $k++ ) )
      {
          chomp $line;
  
@@ -5361,13 +6344,14 @@ sub scan_bare_identifier_do {
          else {
              $package = $current_package;
  
-            if ( $is_keyword{$tok} ) {
+            # patched for c043, part 1: keyword does not follow '->'
+            if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
                  $type = 'k';
              }
          }
  
-        # if it is a bareword..
-        if ( $type eq 'w' ) {
+        # if it is a bareword..  patched for c043, part 2: not following '->'
+        if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
  
              # check for v-string with leading 'v' type character
              # (This seems to have precedence over filehandle, type 'Y')
@@ -5416,7 +6400,6 @@ sub scan_bare_identifier_do {
              elsif ( $is_block_function{$package}{$sub_name} ) {
                  $type = 'G';
              }
-
              elsif ( $is_block_list_function{$package}{$sub_name} ) {
                  $type = 'G';
              }
@@ -5449,8 +6432,12 @@ sub scan_bare_identifier_do {
                )
              {
  
-                # may not be indirect object unless followed by a space
-                if ( $input_line =~ m/\G\s+/gc ) {
+                # may not be indirect object unless followed by a space;
+                # updated 2021-01-16 to consider newline to be a space.
+                # updated for case b990 to look for either ';' or space
+                if ( pos($input_line) == length($input_line)
+                    || $input_line =~ m/\G[;\s]/gc )
+                {
                      $type = 'Y';
  
                      # Abandon Hope ...
@@ -5528,6 +6515,7 @@ sub scan_id_do {
      my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
          $max_token_index )
        = @_;
+    use constant DEBUG_NSCAN => 0;
      my $type = '';
      my ( $i_beg, $pos_beg );
  
@@ -5579,9 +6567,17 @@ sub scan_id_do {
  
          if ( $is_sub{$id_scan_state} ) {
              ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
-                $input_line, $i,             $i_beg,
-                $tok,        $type,          $rtokens,
-                $rtoken_map, $id_scan_state, $max_token_index
+                {
+                    input_line      => $input_line,
+                    i               => $i,
+                    i_beg           => $i_beg,
+                    tok             => $tok,
+                    type            => $type,
+                    rtokens         => $rtokens,
+                    rtoken_map      => $rtoken_map,
+                    id_scan_state   => $id_scan_state,
+                    max_token_index => $max_token_index
+                }
              );
          }
  
@@ -5607,7 +6603,7 @@ sub scan_id_do {
          report_definite_bug();
      }
  
-    TOKENIZER_DEBUG_FLAG_NSCAN && do {
+    DEBUG_NSCAN && do {
          print STDOUT
            "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
      };
@@ -5678,7 +6674,7 @@ sub do_scan_package {
      pos($input_line) = $pos_beg;
  
      # handle non-blank line; package name, if any, must follow
-    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
+    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w*)/gc ) {
          $package = $1;
          $package = ( defined($1) && $1 ) ? $1 : 'main';
          $package =~ s/\'/::/g;
@@ -5713,7 +6709,8 @@ sub do_scan_package {
          # knows that the number is in a package statement.
          # Examples of valid primitive tokens that might follow are:
          #  1235  . ; { } v3  v
-        if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
+        # FIX: added a '#' since a side comment may also follow
+        if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#])|v\d|\d+$/ ) {
              $statement_type = $tok;
          }
          else {
@@ -5738,12 +6735,14 @@ sub scan_identifier_do {
      # scan state, id_scan_state.  It updates id_scan_state based upon
      # current id_scan_state and token, and returns an updated
      # id_scan_state and the next index after the identifier.
+
      # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
      # $last_nonblank_type
  
      my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
          $expecting, $container_type )
        = @_;
+    use constant DEBUG_SCAN_ID => 0;
      my $i_begin   = $i;
      my $type      = '';
      my $tok_begin = $rtokens->[$i_begin];
@@ -5752,18 +6751,23 @@ sub scan_identifier_do {
      my $identifier_begin    = $identifier;
      my $tok                 = $tok_begin;
      my $message             = "";
+    my $tok_is_blank;    # a flag to speed things up
  
-    my $in_prototype_or_signature = $container_type =~ /^sub/;
+    my $in_prototype_or_signature =
+      $container_type && $container_type =~ /^sub\b/;
  
      # these flags will be used to help figure out the type:
-    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
+    my $saw_alpha;
      my $saw_type;
  
      # allow old package separator (') except in 'use' statement
      my $allow_tick = ( $last_nonblank_token ne 'use' );
  
+    #########################################################
      # get started by defining a type and a state if necessary
-    unless ($id_scan_state) {
+    #########################################################
+
+    if ( !$id_scan_state ) {
          $context = UNKNOWN_CONTEXT;
  
          # fixup for digraph
@@ -5792,8 +6796,9 @@ sub scan_identifier_do {
          elsif ( $tok eq '::' ) {
              $id_scan_state = 'A';
          }
-        elsif ( $tok =~ /^[A-Za-z_]/ ) {
+        elsif ( $tok =~ /^\w/ ) {
              $id_scan_state = ':';
+            $saw_alpha     = 1;
          }
          elsif ( $tok eq '->' ) {
              $id_scan_state = '$';
@@ -5810,22 +6815,34 @@ sub scan_identifier_do {
      }
      else {
          $i--;
-        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
+        $saw_alpha = ( $tok =~ /^\w/ );
+        $saw_type  = ( $tok =~ /([\$\%\@\*\&])/ );
      }
  
-    # now loop to gather the identifier
+    ###############################
+    # loop to gather the identifier
+    ###############################
+
      my $i_save = $i;
  
      while ( $i < $max_token_index ) {
-        $i_save = $i unless ( $tok =~ /^\s*$/ );
-        $tok    = $rtokens->[ ++$i ];
+        my $last_tok_is_blank = $tok_is_blank;
+        if   ($tok_is_blank) { $tok_is_blank = undef }
+        else                 { $i_save       = $i }
+
+        $tok = $rtokens->[ ++$i ];
  
+        # patch to make digraph :: if necessary
          if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
              $tok = '::';
              $i++;
          }
  
-        if ( $id_scan_state eq '$' ) {    # starting variable name
+        ########################
+        # Starting variable name
+        ########################
+
+        if ( $id_scan_state eq '$' ) {
  
              if ( $tok eq '$' ) {
  
@@ -5838,16 +6855,20 @@ sub scan_identifier_do {
                      last;
                  }
              }
+            elsif ( $tok =~ /^\w/ ) {    # alphanumeric ..
+                $saw_alpha     = 1;
+                $id_scan_state = ':';    # now need ::
+                $identifier .= $tok;
+            }
+            elsif ( $tok eq '::' ) {
+                $id_scan_state = 'A';
+                $identifier .= $tok;
+            }
  
              # POSTDEFREF ->@ ->% ->& ->*
              elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
                  $identifier .= $tok;
              }
-            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';           # now need ::
-                $identifier .= $tok;
-            }
              elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
                  $saw_alpha     = 1;
                  $id_scan_state = ':';                 # now need ::
@@ -5861,32 +6882,53 @@ sub scan_identifier_do {
                  #  howdy::123::bubba();
                  #
              }
-            elsif ( $tok =~ /^[0-9]/ ) {    # numeric
-                $saw_alpha     = 1;
-                $id_scan_state = ':';       # now need ::
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '::' ) {
-                $id_scan_state = 'A';
-                $identifier .= $tok;
-            }
+            elsif ( $tok eq '#' ) {
  
-            # $# and POSTDEFREF ->$#
-            elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) {    # $#array
-                $identifier .= $tok;    # keep same state, a $ could follow
-            }
-            elsif ( $tok eq '{' ) {
-
-                # check for something like ${#} or ${©}
+                # side comment or identifier?
                  if (
-                    (
-                           $identifier eq '$'
-                        || $identifier eq '@'
-                        || $identifier eq '$#'
-                    )
-                    && $i + 2 <= $max_token_index
-                    && $rtokens->[ $i + 2 ] eq '}'
-                    && $rtokens->[ $i + 1 ] !~ /[\s\w]/
+
+                    # A '#' starts a comment if it follows a space. For example,
+                    # the following is equivalent to $ans=40.
+                    #   my $ #
+                    #     ans = 40;
+                    !$last_tok_is_blank
+
+                    # a # inside a prototype or signature can only start a
+                    # comment
+                    && !$in_prototype_or_signature
+
+                    # these are valid punctuation vars: *# %# @# $#
+                    # May also be '$#array' or POSTDEFREF ->$#
+                    && ( $identifier =~ /^[\%\@\$\*]$/ || $identifier =~ /\$$/ )
+
+                  )
+                {
+                    $identifier .= $tok;    # keep same state, a $ could follow
+                }
+                else {
+
+                    # otherwise it is a side comment
+                    if    ( $identifier eq '->' )   { }
+                    elsif ( $id_scan_state eq '$' ) { $type = 't' }
+                    else                            { $type = 'i' }
+                    $i             = $i_save;
+                    $id_scan_state = '';
+                    last;
+                }
+            }
+
+            elsif ( $tok eq '{' ) {
+
+                # check for something like ${#} or ${©}
+                if (
+                    (
+                           $identifier eq '$'
+                        || $identifier eq '@'
+                        || $identifier eq '$#'
+                    )
+                    && $i + 2 <= $max_token_index
+                    && $rtokens->[ $i + 2 ] eq '}'
+                    && $rtokens->[ $i + 1 ] !~ /[\s\w]/
                    )
                  {
                      my $next2 = $rtokens->[ $i + 2 ];
@@ -5910,6 +6952,8 @@ sub scan_identifier_do {
              # space ok after leading $ % * & @
              elsif ( $tok =~ /^\s*$/ ) {
  
+                $tok_is_blank = 1;
+
                  if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
  
                      if ( length($identifier) > 1 ) {
@@ -5961,10 +7005,33 @@ sub scan_identifier_do {
              }
              else {    # something else
  
-                if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
+                if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
+
+                    # We might be in an extrusion of
+                    #     sub foo2 ( $first, $, $third ) {
+                    # looking at a line starting with a comma, like
+                    #   $
+                    #   ,
+                    # in this case the comma ends the signature variable
+                    # '$' which will have been previously marked type 't'
+                    # rather than 'i'.
+                    if ( $i == $i_begin ) {
+                        $identifier = "";
+                        $type       = "";
+                    }
+
+                    # at a # we have to mark as type 't' because more may
+                    # follow, otherwise, in a signature we can let '$' be an
+                    # identifier here for better formatting.
+                    # See 'mangle4.in' for a test case.
+                    else {
+                        $type = 'i';
+                        if ( $id_scan_state eq '$' && $tok eq '#' ) {
+                            $type = 't';
+                        }
+                        $i = $i_save;
+                    }
                      $id_scan_state = '';
-                    $i             = $i_save;
-                    $type          = 'i';       # probably punctuation variable
                      last;
                  }
  
@@ -5974,7 +7041,9 @@ sub scan_identifier_do {
                  }
  
                  # POSTDEFREF: Postfix reference ->$* ->%*  ->@* ->** ->&* ->$#*
-                elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
+                elsif ($tok eq '*'
+                    && $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ )
+                {
                      $identifier .= $tok;
                  }
  
@@ -6002,6 +7071,7 @@ sub scan_identifier_do {
                      # You would have to use
                      #  $a = ${$:};
  
+                    # '$$' alone is punctuation variable for PID
                      $i = $i_save;
                      if   ( $tok eq '{' ) { $type = 't' }
                      else                 { $type = 'i' }
@@ -6017,84 +7087,30 @@ sub scan_identifier_do {
                  last;
              }
          }
-        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
  
-            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
-                $id_scan_state = ':';          # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $id_scan_state = ':';                 # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
-                $id_scan_state = ':';       # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok =~ /^\s*$/ ) {     # allow space
-            }
-            elsif ( $tok eq '::' ) {        # leading ::
-                $id_scan_state = 'A';       # accept alpha next
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '{' ) {
-                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
-                $i             = $i_save;
-                $id_scan_state = '';
-                last;
-            }
-            else {
+        ###################################
+        # looking for alphanumeric after ::
+        ###################################
  
-                # punctuation variable?
-                # testfile: cunningham4.pl
-                #
-                # We have to be careful here.  If we are in an unknown state,
-                # we will reject the punctuation variable.  In the following
-                # example the '&' is a binary operator but we are in an unknown
-                # state because there is no sigil on 'Prima', so we don't
-                # know what it is.  But it is a bad guess that
-                # '&~' is a function variable.
-                # $self->{text}->{colorMap}->[
-                #   Prima::PodView::COLOR_CODE_FOREGROUND
-                #   & ~tb::COLOR_INDEX ] =
-                #   $sec->{ColorCode}
-                if ( $identifier eq '&' && $expecting ) {
-                    $identifier .= $tok;
-                }
-                else {
-                    $identifier = '';
-                    $i          = $i_save;
-                    $type       = '&';
-                }
-                $id_scan_state = '';
-                last;
-            }
-        }
-        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
+        elsif ( $id_scan_state eq 'A' ) {
+
+            $tok_is_blank = $tok =~ /^\s*$/;
  
-            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
+            if ( $tok =~ /^\w/ ) {    # found it
                  $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
+                $id_scan_state = ':';    # now need ::
                  $saw_alpha     = 1;
              }
              elsif ( $tok eq "'" && $allow_tick ) {
                  $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
+                $id_scan_state = ':';    # now need ::
                  $saw_alpha     = 1;
              }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+            elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
                  $id_scan_state = '(';
                  $identifier .= $tok;
              }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+            elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
                  $id_scan_state = ')';
                  $identifier .= $tok;
              }
@@ -6104,20 +7120,22 @@ sub scan_identifier_do {
                  last;
              }
          }
+
+        ###################################
+        # looking for :: after alphanumeric
+        ###################################
+
          elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
  
+            $tok_is_blank = $tok =~ /^\s*$/;
+
              if ( $tok eq '::' ) {            # got it
                  $identifier .= $tok;
                  $id_scan_state = 'A';        # now require alpha
              }
-            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
-                $identifier .= $tok;
-                $id_scan_state = ':';           # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
+            elsif ( $tok =~ /^\w/ ) {        # more alphanumeric is ok here
                  $identifier .= $tok;
-                $id_scan_state = ':';           # now need ::
+                $id_scan_state = ':';        # now need ::
                  $saw_alpha     = 1;
              }
              elsif ( $tok eq "'" && $allow_tick ) {    # tick
@@ -6130,11 +7148,11 @@ sub scan_identifier_do {
                      $identifier .= $tok;
                  }
              }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+            elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
                  $id_scan_state = '(';
                  $identifier .= $tok;
              }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+            elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
                  $id_scan_state = ')';
                  $identifier .= $tok;
              }
@@ -6144,26 +7162,39 @@ sub scan_identifier_do {
                  last;
              }
          }
-        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
  
-            if ( $tok eq '(' ) {             # got it
+        ##############################
+        # looking for '(' of prototype
+        ##############################
+
+        elsif ( $id_scan_state eq '(' ) {
+
+            if ( $tok eq '(' ) {    # got it
                  $identifier .= $tok;
-                $id_scan_state = ')';        # now find the end of it
+                $id_scan_state = ')';    # now find the end of it
              }
-            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
+            elsif ( $tok =~ /^\s*$/ ) {    # blank - keep going
                  $identifier .= $tok;
+                $tok_is_blank = 1;
              }
              else {
-                $id_scan_state = '';         # that's all - no prototype
+                $id_scan_state = '';        # that's all - no prototype
                  $i             = $i_save;
                  last;
              }
          }
-        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
  
-            if ( $tok eq ')' ) {             # got it
+        ##############################
+        # looking for ')' of prototype
+        ##############################
+
+        elsif ( $id_scan_state eq ')' ) {
+
+            $tok_is_blank = $tok =~ /^\s*$/;
+
+            if ( $tok eq ')' ) {    # got it
                  $identifier .= $tok;
-                $id_scan_state = '';         # all done
+                $id_scan_state = '';    # all done
                  last;
              }
              elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
@@ -6174,12 +7205,76 @@ sub scan_identifier_do {
                  $identifier .= $tok;
              }
          }
-        else {        # can get here due to error in initialization
+
+        ###################
+        # Starting sub call
+        ###################
+
+        elsif ( $id_scan_state eq '&' ) {
+
+            if ( $tok =~ /^[\$\w]/ ) {    # alphanumeric ..
+                $id_scan_state = ':';     # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
+            }
+            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
+                $id_scan_state = ':';                 # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
+            }
+            elsif ( $tok =~ /^\s*$/ ) {               # allow space
+                $tok_is_blank = 1;
+            }
+            elsif ( $tok eq '::' ) {                  # leading ::
+                $id_scan_state = 'A';                 # accept alpha next
+                $identifier .= $tok;
+            }
+            elsif ( $tok eq '{' ) {
+                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
+                $i             = $i_save;
+                $id_scan_state = '';
+                last;
+            }
+            else {
+
+                # punctuation variable?
+                # testfile: cunningham4.pl
+                #
+                # We have to be careful here.  If we are in an unknown state,
+                # we will reject the punctuation variable.  In the following
+                # example the '&' is a binary operator but we are in an unknown
+                # state because there is no sigil on 'Prima', so we don't
+                # know what it is.  But it is a bad guess that
+                # '&~' is a function variable.
+                # $self->{text}->{colorMap}->[
+                #   Prima::PodView::COLOR_CODE_FOREGROUND
+                #   & ~tb::COLOR_INDEX ] =
+                #   $sec->{ColorCode}
+
+                # Fix for case c033: a '#' here starts a side comment
+                if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
+                    $identifier .= $tok;
+                }
+                else {
+                    $identifier = '';
+                    $i          = $i_save;
+                    $type       = '&';
+                }
+                $id_scan_state = '';
+                last;
+            }
+        }
+
+        ######################
+        # unknown state - quit
+        ######################
+
+        else {    # can get here due to error in initialization
              $id_scan_state = '';
              $i             = $i_save;
              last;
          }
-    }
+    } ## end of main loop
  
      if ( $id_scan_state eq ')' ) {
          warning("Hit end of line while seeking ) to end prototype\n");
@@ -6190,9 +7285,15 @@ sub scan_identifier_do {
      if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
          $id_scan_state = '';
      }
+
+    # Patch: the deprecated variable $# does not combine with anything on the
+    # next line.
+    if ( $identifier eq '$#' ) { $id_scan_state = '' }
+
      if ( $i < 0 ) { $i = 0 }
  
-    unless ($type) {
+    # Be sure a token type is defined
+    if ( !$type ) {
  
          if ($saw_type) {
  
@@ -6230,16 +7331,19 @@ sub scan_identifier_do {
          }    # this can happen on a restart
      }
  
+    # See if we formed an identifier...
      if ($identifier) {
          $tok = $identifier;
          if ($message) { write_logfile_entry($message) }
      }
+
+    # did not find an identifier, back  up
      else {
          $tok = $tok_begin;
          $i   = $i_begin;
      }
  
-    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
+    DEBUG_SCAN_ID && do {
          my ( $a, $b, $c ) = caller;
          print STDOUT
  "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
@@ -6249,16 +7353,66 @@ sub scan_identifier_do {
      return ( $i, $tok, $type, $id_scan_state, $identifier );
  }
  
-{
+{    ## closure for sub do_scan_sub
+
+    my %warn_if_lexical;
+
+    BEGIN {
+
+        # lexical subs with these names can cause parsing errors in this version
+        my @q = qw( m q qq qr qw qx s tr y );
+        @{warn_if_lexical}{@q} = (1) x scalar(@q);
+    }
  
      # saved package and subnames in case prototype is on separate line
      my ( $package_saved, $subname_saved );
  
+    # initialize subname each time a new 'sub' keyword is encountered
+    sub initialize_subname {
+        $package_saved = "";
+        $subname_saved = "";
+        return;
+    }
+
+    use constant {
+        SUB_CALL       => 1,
+        PAREN_CALL     => 2,
+        PROTOTYPE_CALL => 3,
+    };
+
      sub do_scan_sub {
  
-        # do_scan_sub parses a sub name and prototype
-        # it is called with $i_beg equal to the index of the first nonblank
-        # token following a 'sub' token.
+        # do_scan_sub parses a sub name and prototype.
+
+        # At present there are three basic CALL TYPES which are
+        # distinguished by the starting value of '$tok':
+        # 1. $tok='sub', id_scan_state='sub'
+        #    it is called with $i_beg equal to the index of the first nonblank
+        #    token following a 'sub' token.
+        # 2. $tok='(', id_scan_state='sub',
+        #    it is called with $i_beg equal to the index of a '(' which may
+        #    start a prototype.
+        # 3. $tok='prototype', id_scan_state='prototype'
+        #    it is called with $i_beg equal to the index of a '(' which is
+        #    preceded by ': prototype' and has $id_scan_state eq 'prototype'
+
+        # Examples:
+
+        # A single type 1 call will get both the sub and prototype
+        #   sub foo1 ( $$ ) { }
+        #   ^
+
+        # The subname will be obtained with a 'sub' call
+        # The prototype on line 2 will be obtained with a '(' call
+        #   sub foo1
+        #   ^                    <---call type 1
+        #     ( $$ ) { }
+        #     ^                  <---call type 2
+
+        # The subname will be obtained with a 'sub' call
+        # The prototype will be obtained with a 'prototype' call
+        #   sub foo1 ( $x, $y ) : prototype ( $$ ) { }
+        #   ^ <---type 1                    ^ <---type 3
  
          # TODO: add future error checks to be sure we have a valid
          # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
@@ -6268,14 +7422,32 @@ sub scan_identifier_do {
          # $in_attribute_list, %saw_function_definition,
          # $statement_type
  
-        my (
-            $input_line, $i,             $i_beg,
-            $tok,        $type,          $rtokens,
-            $rtoken_map, $id_scan_state, $max_token_index
-        ) = @_;
+        my ($rinput_hash) = @_;
+
+        my $input_line      = $rinput_hash->{input_line};
+        my $i               = $rinput_hash->{i};
+        my $i_beg           = $rinput_hash->{i_beg};
+        my $tok             = $rinput_hash->{tok};
+        my $type            = $rinput_hash->{type};
+        my $rtokens         = $rinput_hash->{rtokens};
+        my $rtoken_map      = $rinput_hash->{rtoken_map};
+        my $id_scan_state   = $rinput_hash->{id_scan_state};
+        my $max_token_index = $rinput_hash->{max_token_index};
+
+        my $i_entry = $i;
+
+        # Determine the CALL TYPE
+        # 1=sub
+        # 2=(
+        # 3=prototype
+        my $call_type =
+            $tok eq 'prototype' ? PROTOTYPE_CALL
+          : $tok eq '('         ? PAREN_CALL
+          :                       SUB_CALL;
+
          $id_scan_state = "";    # normally we get everything in one call
-        my $subname = undef;
-        my $package = undef;
+        my $subname = $subname_saved;
+        my $package = $package_saved;
          my $proto   = undef;
          my $attrs   = undef;
          my $match;
@@ -6283,9 +7455,10 @@ sub scan_identifier_do {
          my $pos_beg = $rtoken_map->[$i_beg];
          pos($input_line) = $pos_beg;
  
-        # Look for the sub NAME
+        # Look for the sub NAME if this is a SUB call
          if (
-            $input_line =~ m/\G\s*
+               $call_type == SUB_CALL
+            && $input_line =~ m/\G\s*
          ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
          (\w+)               # NAME    - required
          /gcx
@@ -6294,17 +7467,45 @@ sub scan_identifier_do {
              $match   = 1;
              $subname = $2;
  
-            $package = ( defined($1) && $1 ) ? $1 : $current_package;
-            $package =~ s/\'/::/g;
-            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
-            $package =~ s/::$//;
+            my $is_lexical_sub =
+              $last_nonblank_type eq 'k' && $last_nonblank_token eq 'my';
+            if ( $is_lexical_sub && $1 ) {
+                warning("'my' sub $subname cannot be in package '$1'\n");
+                $is_lexical_sub = 0;
+            }
+
+            if ($is_lexical_sub) {
+
+                # lexical subs use the block sequence number as a package name
+                my $seqno =
+                  $current_sequence_number[BRACE][ $current_depth[BRACE] ];
+                $seqno   = 1 unless ( defined($seqno) );
+                $package = $seqno;
+                if ( $warn_if_lexical{$subname} ) {
+                    warning(
+"'my' sub '$subname' matches a builtin name and may not be handled correctly in this perltidy version.\n"
+                    );
+                }
+            }
+            else {
+                $package = ( defined($1) && $1 ) ? $1 : $current_package;
+                $package =~ s/\'/::/g;
+                if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+                $package =~ s/::$//;
+            }
+
              my $pos  = pos($input_line);
              my $numc = $pos - $pos_beg;
              $tok  = 'sub ' . substr( $input_line, $pos_beg, $numc );
              $type = 'i';
+
+            # remember the sub name in case another call is needed to
+            # get the prototype
+            $package_saved = $package;
+            $subname_saved = $subname;
          }
  
-        # Now look for PROTO ATTRS
+        # Now look for PROTO ATTRS for all call types
          # Look for prototype/attributes which are usually on the same
          # line as the sub name but which might be on a separate line.
          # For example, we might have an anonymous sub with attributes,
@@ -6314,9 +7515,15 @@ sub scan_identifier_do {
          # does not look like a prototype, we assume it is a SIGNATURE and we
          # will stop and let the the standard tokenizer handle it.  In
          # particular, we stop if we see any nested parens, braces, or commas.
+        # Also note, a valid prototype cannot contain any alphabetic character
+        #  -- see https://perldoc.perl.org/perlsub
+        # But it appears that an underscore is valid in a prototype, so the
+        # regex below uses [A-Za-z] rather than \w
+        # This is the old regex which has been replaced:
+        # $input_line =~ m/\G(\s*\([^\)\(\}\{\,#]*\))?  # PROTO
          my $saw_opening_paren = $input_line =~ /\G\s*\(/;
          if (
-            $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))?  # PROTO
+            $input_line =~ m/\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))?  # PROTO
              (\s*:)?                              # ATTRS leading ':'
              /gcx
              && ( $1 || $2 )
@@ -6325,21 +7532,27 @@ sub scan_identifier_do {
              $proto = $1;
              $attrs = $2;
  
-            # If we also found the sub name on this call then append PROTO.
-            # This is not necessary but for compatibility with previous
-            # versions when the -csc flag is used:
-            if ( $match && $proto ) {
+            # Append the prototype to the starting token if it is 'sub' or
+            # 'prototype'.  This is not necessary but for compatibility with
+            # previous versions when the -csc flag is used:
+            if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) {
                  $tok .= $proto;
              }
-            $match ||= 1;
  
-            # Handle prototype on separate line from subname
-            if ($subname_saved) {
-                $package = $package_saved;
-                $subname = $subname_saved;
-                $tok     = $last_nonblank_token;
+            # If we just entered the sub at an opening paren on this call, not
+            # a following :prototype, label it with the previous token.  This is
+            # necessary to propagate the sub name to its opening block.
+            elsif ( $call_type == PAREN_CALL ) {
+                $tok = $last_nonblank_token;
              }
+
+            $match ||= 1;
+
+            # Patch part #1 to fixes cases b994 and b1053:
+            # Mark an anonymous sub keyword without prototype as type 'k', i.e.
+            #    'sub : lvalue { ...'
              $type = 'i';
+            if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
          }
  
          if ($match) {
@@ -6369,6 +7582,19 @@ sub scan_identifier_do {
                      $max_token_index );
                  if ($error) { warning("Possibly invalid sub\n") }
  
+            # Patch part #2 to fixes cases b994 and b1053:
+            # Do not let spaces be part of the token of an anonymous sub keyword
+            # which we marked as type 'k' above...i.e. for something like:
+            #    'sub : lvalue { ...'
+            # Back up and let it be parsed as a blank
+                if (   $type eq 'k'
+                    && $attrs
+                    && $i > $i_entry
+                    && substr( $rtokens->[$i], 0, 1 ) eq ' ' )
+                {
+                    $i--;
+                }
+
                  # check for multiple definitions of a sub
                  ( $next_nonblank_token, my $i_next ) =
                    find_next_nonblank_token_on_this_line( $i, $rtokens,
@@ -6386,8 +7612,6 @@ sub scan_identifier_do {
                      $next_nonblank_token = '}';
                  }
              }
-            $package_saved = "";
-            $subname_saved = "";
  
              # See what's next...
              if ( $next_nonblank_token eq '{' ) {
@@ -6396,16 +7620,24 @@ sub scan_identifier_do {
                      # Check for multiple definitions of a sub, but
                      # it is ok to have multiple sub BEGIN, etc,
                      # so we do not complain if name is all caps
-                    if (   $saw_function_definition{$package}{$subname}
+                    if (   $saw_function_definition{$subname}{$package}
                          && $subname !~ /^[A-Z]+$/ )
                      {
-                        my $lno = $saw_function_definition{$package}{$subname};
-                        warning(
+                        my $lno = $saw_function_definition{$subname}{$package};
+                        if ( $package =~ /^\d/ ) {
+                            warning(
+"already saw definition of lexical 'sub $subname' at line $lno\n"
+                            );
+
+                        }
+                        else {
+                            warning(
  "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
-                        );
+                            );
+                        }
                      }
-                    $saw_function_definition{$package}{$subname} =
-                      $tokenizer_self->{_last_line_number};
+                    $saw_function_definition{$subname}{$package} =
+                      $tokenizer_self->[_last_line_number_];
                  }
              }
              elsif ( $next_nonblank_token eq ';' ) {
@@ -6418,7 +7650,10 @@ sub scan_identifier_do {
              # Setting 'statement_type' causes any ':'s to introduce
              # attributes.
              elsif ( $next_nonblank_token eq ':' ) {
-                $statement_type = $tok;
+                if ( $call_type == SUB_CALL ) {
+                    $statement_type =
+                      substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
+                }
              }
  
              # if we stopped before an open paren ...
@@ -6431,13 +7666,14 @@ sub scan_identifier_do {
                  # Otherwise, we assume it is a SIGNATURE rather than a
                  # PROTOTYPE and let the normal tokenizer handle it as a list
                  if ( !$saw_opening_paren ) {
-                    $id_scan_state = 'sub';     # we must come back to get proto
-                    $package_saved = $package;
-                    $subname_saved = $subname;
+                    $id_scan_state = 'sub';    # we must come back to get proto
+                }
+                if ( $call_type == SUB_CALL ) {
+                    $statement_type =
+                      substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
                  }
-                $statement_type = $tok;
              }
-            elsif ($next_nonblank_token) {      # EOF technically ok
+            elsif ($next_nonblank_token) {    # EOF technically ok
                  $subname = "" unless defined($subname);
                  warning(
  "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
@@ -6446,8 +7682,9 @@ sub scan_identifier_do {
              check_prototype( $proto, $package, $subname );
          }
  
-        # no match but line not blank
+        # no match to either sub name or prototype, but line not blank
          else {
+
          }
          return ( $i, $tok, $type, $id_scan_state );
      }
@@ -6460,87 +7697,113 @@ sub scan_identifier_do {
  sub find_next_nonblank_token {
      my ( $i, $rtokens, $max_token_index ) = @_;
  
+    # Returns the next nonblank token after the token at index $i
+    # To skip past a side comment, and any subsequent block comments
+    # and blank lines, call with i=$max_token_index
+
      if ( $i >= $max_token_index ) {
          if ( !peeked_ahead() ) {
              peeked_ahead(1);
-            $rtokens =
-              peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
+            peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
          }
      }
+
      my $next_nonblank_token = $rtokens->[ ++$i ];
+    return ( " ", $i ) unless defined($next_nonblank_token);
  
      if ( $next_nonblank_token =~ /^\s*$/ ) {
          $next_nonblank_token = $rtokens->[ ++$i ];
+        return ( " ", $i ) unless defined($next_nonblank_token);
      }
      return ( $next_nonblank_token, $i );
  }
  
-sub numerator_expected {
+sub is_possible_numerator {
  
-    # this is a filter for a possible numerator, in support of guessing
-    # for the / pattern delimiter token.
-    # returns -
+    # Look at the next non-comment character and decide if it could be a
+    # numerator.  Return
      #   1 - yes
      #   0 - can't tell
      #  -1 - no
-    # Note: I am using the convention that variables ending in
-    # _expected have these 3 possible values.
+
      my ( $i, $rtokens, $max_token_index ) = @_;
-    my $numerator_expected = 0;
+    my $is_possible_numerator = 0;
  
      my $next_token = $rtokens->[ $i + 1 ];
      if ( $next_token eq '=' ) { $i++; }    # handle /=
      my ( $next_nonblank_token, $i_next ) =
        find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
+    if ( $next_nonblank_token eq '#' ) {
+        ( $next_nonblank_token, $i_next ) =
+          find_next_nonblank_token( $max_token_index, $rtokens,
+            $max_token_index );
+    }
+
      if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
-        $numerator_expected = 1;
+        $is_possible_numerator = 1;
+    }
+    elsif ( $next_nonblank_token =~ /^\s*$/ ) {
+        $is_possible_numerator = 0;
      }
      else {
-
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
-            $numerator_expected = 0;
-        }
-        else {
-            $numerator_expected = -1;
-        }
+        $is_possible_numerator = -1;
      }
-    return $numerator_expected;
+
+    return $is_possible_numerator;
  }
  
-sub pattern_expected {
+{    ## closure for sub pattern_expected
+    my %pattern_test;
  
-    # This is the start of a filter for a possible pattern.
-    # It looks at the token after a possible pattern and tries to
-    # determine if that token could end a pattern.
-    # returns -
-    #   1 - yes
-    #   0 - can't tell
-    #  -1 - no
-    my ( $i, $rtokens, $max_token_index ) = @_;
-    my $is_pattern = 0;
+    BEGIN {
  
-    my $next_token = $rtokens->[ $i + 1 ];
-    if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; }   # skip possible modifier
-    my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens, $max_token_index );
+        # List of tokens which may follow a pattern.  Note that we will not
+        # have formed digraphs at this point, so we will see '&' instead of
+        # '&&' and '|' instead of '||'
  
-    # list of tokens which may follow a pattern
-    # (can probably be expanded)
-    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
-    {
-        $is_pattern = 1;
+        # /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/
+        my @q = qw( & && | || ? : + - * and or while if unless);
+        push @q, ')', '}', ']', '>', ',', ';';
+        @{pattern_test}{@q} = (1) x scalar(@q);
      }
-    else {
  
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
-            $is_pattern = 0;
+    sub pattern_expected {
+
+        # This a filter for a possible pattern.
+        # It looks at the token after a possible pattern and tries to
+        # determine if that token could end a pattern.
+        # returns -
+        #   1 - yes
+        #   0 - can't tell
+        #  -1 - no
+        my ( $i, $rtokens, $max_token_index ) = @_;
+        my $is_pattern = 0;
+
+        my $next_token = $rtokens->[ $i + 1 ];
+        if ( $next_token =~ /^[msixpodualgc]/ ) {
+            $i++;
+        }    # skip possible modifier
+        my ( $next_nonblank_token, $i_next ) =
+          find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+        if ( $pattern_test{$next_nonblank_token} ) {
+            $is_pattern = 1;
          }
          else {
-            $is_pattern = -1;
+
+            # Added '#' to fix issue c044
+            if (   $next_nonblank_token =~ /^\s*$/
+                || $next_nonblank_token eq '#' )
+            {
+                $is_pattern = 0;
+            }
+            else {
+                $is_pattern = -1;
+            }
          }
+        return $is_pattern;
      }
-    return $is_pattern;
  }
  
  sub find_next_nonblank_token_on_this_line {
@@ -6649,16 +7912,29 @@ sub find_angle_operator_termination {
                  report_possible_bug();
              }
  
+            # count blanks on inside of brackets
+            my $blank_count = 0;
+            $blank_count++ if ( $str =~ /<\s+/ );
+            $blank_count++ if ( $str =~ /\s+>/ );
+
              # Now let's see where we stand....
              # OK if math op not possible
              if ( $expecting == TERM ) {
              }
  
-            # OK if there are no more than 2 pre-tokens inside
+            # OK if there are no more than 2 non-blank pre-tokens inside
              # (not possible to write 2 token math between < and >)
              # This catches most common cases
-            elsif ( $i <= $i_beg + 3 ) {
-                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+            elsif ( $i <= $i_beg + 3 + $blank_count ) {
+
+                # No longer any need to document this common case
+                ## write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
+            }
+
+            # OK if there is some kind of identifier inside
+            #   print $fh <tvg::INPUT>;
+            elsif ( $str =~ /^<\s*\$?(\w|::|\s)+\s*>$/ ) {
+                write_diagnostics("ANGLE (contains identifier): $str\n");
              }
  
              # Not sure..
@@ -6735,9 +8011,10 @@ sub scan_number_do {
  
      # handle v-string without leading 'v' character ('Two Dot' rule)
      # (vstring.t)
-    # TODO: v-strings may contain underscores
+    # Here is the format prior to including underscores:
+    ## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
      pos($input_line) = $pos_beg;
-    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
+    if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
          $pos = pos($input_line);
          my $numc = $pos - $pos_beg;
          $number = substr( $input_line, $pos_beg, $numc );
@@ -6748,8 +8025,45 @@ sub scan_number_do {
      # handle octal, hex, binary
      if ( !defined($number) ) {
          pos($input_line) = $pos_beg;
-        if ( $input_line =~
-            /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
+
+        # Perl 5.22 added floating point literals, like '0x0.b17217f7d1cf78p0'
+        # For reference, the format prior to hex floating point is:
+        #   /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
+        #             (hex)               (octal)   (binary)
+        if (
+            $input_line =~
+
+            /\G[+-]?0(                   # leading [signed] 0
+
+           # a hex float, i.e. '0x0.b17217f7d1cf78p0'
+           ([xX][0-9a-fA-F_]*            # X and optional leading digits
+           (\.([0-9a-fA-F][0-9a-fA-F_]*)?)?   # optional decimal and fraction
+           [Pp][+-]?[0-9a-fA-F]          # REQUIRED exponent with digit
+           [0-9a-fA-F_]*)                # optional Additional exponent digits
+
+          # or hex integer
+           |([xX][0-9a-fA-F_]+)        
+
+          # or octal fraction
+           |([oO]?[0-7_]+          # string of octal digits
+           (\.([0-7][0-7_]*)?)?    # optional decimal and fraction
+           [Pp][+-]?[0-7]          # REQUIRED exponent, no underscore
+           [0-7_]*)                # Additional exponent digits with underscores
+
+           # or octal integer
+           |([oO]?[0-7_]+)         # string of octal digits
+
+           # or a binary float
+           |([bB][01_]*            # 'b' with string of binary digits 
+           (\.([01][01_]*)?)?      # optional decimal and fraction
+           [Pp][+-]?[01]           # Required exponent indicator, no underscore
+           [01_]*)                 # additional exponent bits
+
+          # or binary integer
+           |([bB][01_]+)           # 'b' with string of binary digits 
+
+           )/gx
+          )
          {
              $pos = pos($input_line);
              my $numc = $pos - $pos_beg;
@@ -7016,7 +8330,7 @@ sub follow_quoted_string {
      my $i             = $i_beg - 1;
      my $quoted_string = "";
  
-    TOKENIZER_DEBUG_FLAG_QUOTE && do {
+    0 && do {
          print STDOUT
  "QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
      };
@@ -7115,7 +8429,9 @@ sub follow_quoted_string {
                  }
              }
              else {
-                $quoted_string .= substr( $tok, $old_pos );
+                if ( $old_pos <= length($tok) ) {
+                    $quoted_string .= substr( $tok, $old_pos );
+                }
              }
          }
      }
@@ -7327,7 +8643,7 @@ sub show_tokens {
      return;
  }
  
-{
+{    ## closure for sub matching end token
      my %matching_end_token;
  
      BEGIN {
@@ -7355,7 +8671,7 @@ sub dump_token_types {
  
      # This should be the latest list of token types in use
      # adding NEW_TOKENS: add a comment here
-    print $fh <<'END_OF_LIST';
+    $fh->print(<<'END_OF_LIST');
  
  Here is a list of the token types currently used for lines of type 'CODE'.  
  For the following tokens, the "type" of a token is just the token itself.  
@@ -7476,11 +8792,12 @@ BEGIN {
  
      # These tokens may precede a code block
      # patched for SWITCH/CASE/CATCH.  Actually these could be removed
-    # now and we could let the extended-syntax coding handle them
+    # now and we could let the extended-syntax coding handle them.
+    # Added 'default' for Switch::Plain.
      @q =
        qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
        unless do while until eval for foreach map grep sort
-      switch case given when catch try finally);
+      switch case given when default catch try finally);
      @is_code_block_token{@q} = (1) x scalar(@q);
  
      # I'll build the list of keywords incrementally
@@ -7539,10 +8856,12 @@ BEGIN {
        elsif
        eof
        eq
+      evalbytes
        exec
        exists
        exit
        exp
+      fc
        fcntl
        fileno
        flock
@@ -7669,6 +8988,7 @@ BEGIN {
        sqrt
        srand
        stat
+      state
        study
        substr
        symlink
@@ -7705,19 +9025,20 @@ BEGIN {
  
        switch
        case
+      default
        given
        when
        err
        say
+      isa
  
        catch
      );
  
      # patched above for SWITCH/CASE given/when err say
      # 'err' is a fairly safe addition.
-    # TODO: 'default' still needed if appropriate
-    # 'use feature' seen, but perltidy works ok without it.
-    # Concerned that 'default' could break code.
+    # Added 'default' for Switch::Plain. Note that we could also have
+    # a separate set of keywords to include if we see 'use Switch::Plain'
      push( @Keywords, @value_requestor );
  
      # These are treated the same but are not keywords:
@@ -7782,7 +9103,7 @@ BEGIN {
      my @value_requestor_type = qw#
        L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
        **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
-      <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
+      <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~ <<~
        f F pp mm Y p m U J G j >> << ^ t
        ~. ^. |. &. ^.= |.= &.=
        #;
@@ -7810,12 +9131,19 @@ BEGIN {
      @q = qw(q qq qw qx qr s y tr m);
      @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
  
-    @q = qw(sub);
-    @is_sub{@q} = (1) x scalar(@q);
-
      @q = qw(package);
      @is_package{@q} = (1) x scalar(@q);
  
+    @q = qw( ? : );
+    push @q, ',';
+    @is_comma_question_colon{@q} = (1) x scalar(@q);
+
+    # Hash of other possible line endings which may occur.
+    # Keep these coordinated with the regex where this is used.
+    # Note: chr(13) = chr(015)="\r".
+    @q = ( chr(13), chr(29), chr(26) );
+    @other_line_endings{@q} = (1) x scalar(@q);
+
      # These keywords are handled specially in the tokenizer code:
      my @special_keywords = qw(
        do
@@ -7893,6 +9221,7 @@ BEGIN {
        splice
        split
        sprintf
+      state
        substr
        syscall
        sysopen
@@ -7915,19 +9244,104 @@ BEGIN {
      @is_keyword_taking_list{@keyword_taking_list} =
        (1) x scalar(@keyword_taking_list);
  
-    # perl functions which may be unary operators
-    my @keyword_taking_optional_args = qw(
+    # perl functions which may be unary operators.
+
+    # This list is used to decide if a pattern delimited by slashes, /pattern/,
+    # can follow one of these keywords.
+    @q = qw(
+      chomp eof eval fc lc pop shift uc undef
+    );
+
+    @is_keyword_rejecting_slash_as_pattern_delimiter{@q} =
+      (1) x scalar(@q);
+
+    # These are keywords for which an arg may optionally be omitted.  They are
+    # currently only used to disambiguate a ? used as a ternary from one used
+    # as a (depricated) pattern delimiter.  In the future, they might be used
+    # to give a warning about ambiguous syntax before a /.
+    # Note: split has been omitted (see not below).
+    my @keywords_taking_optional_arg = qw(
+      abs
+      alarm
+      caller
+      chdir
        chomp
+      chop
+      chr
+      chroot
+      close
+      cos
+      defined
+      die
        eof
        eval
+      evalbytes
+      exit
+      exp
+      fc
+      getc
+      glob
+      gmtime
+      hex
+      int
+      last
        lc
+      lcfirst
+      length
+      localtime
+      log
+      lstat
+      mkdir
+      next
+      oct
+      ord
        pop
+      pos
+      print
+      printf
+      prototype
+      quotemeta
+      rand
+      readline
+      readlink
+      readpipe
+      redo
+      ref
+      require
+      reset
+      reverse
+      rmdir
+      say
+      select
        shift
+      sin
+      sleep
+      sqrt
+      srand
+      stat
+      study
+      tell
        uc
+      ucfirst
+      umask
        undef
+      unlink
+      warn
+      write
      );
-    @is_keyword_taking_optional_args{@keyword_taking_optional_args} =
-      (1) x scalar(@keyword_taking_optional_args);
+    @is_keyword_taking_optional_arg{@keywords_taking_optional_arg} =
+      (1) x scalar(@keywords_taking_optional_arg);
+
+    # This list is used to decide if a pattern delmited by question marks,
+    # ?pattern?, can follow one of these keywords.  Note that from perl 5.22
+    # on, a ?pattern? is not recognized, so we can be much more strict than
+    # with a /pattern/. Note that 'split' is not in this list. In current
+    # versions of perl a question following split must be a ternary, but
+    # in older versions it could be a pattern.  The guessing algorithm will
+    # decide.  We are combining two lists here to simplify the test.
+    @q = ( @keywords_taking_optional_arg, @operator_requestor );
+    @is_keyword_rejecting_question_as_pattern_delimiter{@q} =
+      (1) x scalar(@q);
  
      # These are not used in any way yet
      #    my @unused_keywords = qw(
@@ -7949,4 +9363,3 @@ BEGIN {
      @is_keyword{@Keywords} = (1) x scalar(@Keywords);
  }
  1;
-