New upstream version 20221112

[perltidy.git] / lib / Perl / Tidy / Tokenizer.pm
diff --git a/lib/Perl/Tidy/Tokenizer.pm b/lib/Perl/Tidy/Tokenizer.pm

index c012e47f9d89b0a0c068a375f21c8ece343ced89..41c684afb4498f88128553e126948e6eedb6f081 100644 (file)
--- a/lib/Perl/Tidy/Tokenizer.pm
+++ b/lib/Perl/Tidy/Tokenizer.pm
@@ -21,11 +21,23 @@
  package Perl::Tidy::Tokenizer;
  use strict;
  use warnings;
-our $VERSION = '20210717';
+use English qw( -no_match_vars );
+
+our $VERSION = '20221112';
  
  use Perl::Tidy::LineBuffer;
  use Carp;
  
+use constant DEVEL_MODE   => 0;
+use constant EMPTY_STRING => q{};
+use constant SPACE        => q{ };
+
+# Decimal values of some ascii characters for quick checks
+use constant ORD_TAB           => 9;
+use constant ORD_SPACE         => 32;
+use constant ORD_PRINTABLE_MIN => 33;
+use constant ORD_PRINTABLE_MAX => 126;
+
  # PACKAGE VARIABLES for processing an entire FILE.
  # These must be package variables because most may get localized during
  # processing.  Most are initialized in sub prepare_for_a_new_file.
@@ -55,6 +67,7 @@ use vars qw{
    @current_depth
    @total_depth
    $total_depth
+  $next_sequence_number
    @nesting_sequence_number
    @current_sequence_number
    @paren_type
@@ -82,12 +95,16 @@ use vars qw{
    %expecting_term_types
    %expecting_term_token
    %is_digraph
+  %can_start_digraph
    %is_file_test_operator
    %is_trigraph
    %is_tetragraph
    %is_valid_token_type
    %is_keyword
    %is_code_block_token
+  %is_sort_map_grep_eval_do
+  %is_sort_map_grep
+  %is_grep_alias
    %really_want_term
    @opening_brace_names
    @closing_brace_names
@@ -95,11 +112,16 @@ use vars qw{
    %is_keyword_taking_optional_arg
    %is_keyword_rejecting_slash_as_pattern_delimiter
    %is_keyword_rejecting_question_as_pattern_delimiter
+  %is_q_qq_qx_qr_s_y_tr_m
    %is_q_qq_qw_qx_qr_s_y_tr_m
    %is_sub
    %is_package
    %is_comma_question_colon
+  %is_if_elsif_unless
+  %is_if_elsif_unless_case_when
    %other_line_endings
+  %is_END_DATA_format_sub
+  %is_semicolon_or_t
    $code_skipping_pattern_begin
    $code_skipping_pattern_end
  };
@@ -128,7 +150,8 @@ use constant MAX_NAG_MESSAGES => 6;
  
  BEGIN {
  
-    # Array index names for $self
+    # Array index names for $self.
+    # Do not combine with other BEGIN blocks (c101).
      my $i = 0;
      use constant {
          _rhere_target_list_                  => $i++,
@@ -221,7 +244,7 @@ This error is probably due to a recent programming change
  ======================================================================
  EOM
      exit 1;
-}
+} ## end sub AUTOLOAD
  
  sub Die {
      my ($msg) = @_;
@@ -229,14 +252,43 @@ sub Die {
      croak "unexpected return from Perl::Tidy::Die";
  }
  
+sub Fault {
+    my ($msg) = @_;
+
+    # This routine is called for errors that really should not occur
+    # except if there has been a bug introduced by a recent program change.
+    # Please add comments at calls to Fault to explain why the call
+    # should not occur, and where to look to fix it.
+    my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+    my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+    my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+    my $input_stream_name = get_input_stream_name();
+
+    Die(<<EOM);
+==============================================================================
+While operating on input stream with name: '$input_stream_name'
+A fault was detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+Perl::Tidy::Tokenizer.pm reports VERSION='$VERSION'.
+==============================================================================
+EOM
+
+    # We shouldn't get here, but this return is to keep Perl-Critic from
+    # complaining.
+    return;
+} ## end sub Fault
+
  sub bad_pattern {
  
      # See if a pattern will compile. We have to use a string eval here,
      # but it should be safe because the pattern has been constructed
      # by this program.
      my ($pattern) = @_;
-    eval "'##'=~/$pattern/";
-    return $@;
+    my $ok = eval "'##'=~/$pattern/";
+    return !defined($ok) || $EVAL_ERROR;
  }
  
  sub make_code_skipping_pattern {
@@ -254,7 +306,7 @@ sub make_code_skipping_pattern {
          );
      }
      return $pattern;
-}
+} ## end sub make_code_skipping_pattern
  
  sub check_options {
  
@@ -264,6 +316,13 @@ sub check_options {
      %is_sub = ();
      $is_sub{'sub'} = 1;
  
+    %is_END_DATA_format_sub = (
+        '__END__'  => 1,
+        '__DATA__' => 1,
+        'format'   => 1,
+        'sub'      => 1,
+    );
+
      # Install any aliases to 'sub'
      if ( $rOpts->{'sub-alias-list'} ) {
  
@@ -272,17 +331,27 @@ sub check_options {
          # for example, it might be 'sub method fun'
          my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
          foreach my $word (@sub_alias_list) {
-            $is_sub{$word} = 1;
+            $is_sub{$word}                 = 1;
+            $is_END_DATA_format_sub{$word} = 1;
          }
      }
  
+    %is_grep_alias = ();
+    if ( $rOpts->{'grep-alias-list'} ) {
+
+        # Note that 'grep-alias-list' has been preprocessed to be a trimmed,
+        # space-separated list
+        my @q = split /\s+/, $rOpts->{'grep-alias-list'};
+        @{is_grep_alias}{@q} = (1) x scalar(@q);
+    }
+
      $rOpts_code_skipping = $rOpts->{'code-skipping'};
      $code_skipping_pattern_begin =
        make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
      $code_skipping_pattern_end =
        make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
      return;
-}
+} ## end sub check_options
  
  sub new {
  
@@ -341,8 +410,8 @@ sub new {
      my $self = [];
      $self->[_rhere_target_list_]        = [];
      $self->[_in_here_doc_]              = 0;
-    $self->[_here_doc_target_]          = "";
-    $self->[_here_quote_character_]     = "";
+    $self->[_here_doc_target_]          = EMPTY_STRING;
+    $self->[_here_quote_character_]     = EMPTY_STRING;
      $self->[_in_data_]                  = 0;
      $self->[_in_end_]                   = 0;
      $self->[_in_format_]                = 0;
@@ -351,7 +420,7 @@ sub new {
      $self->[_in_skipped_]               = 0;
      $self->[_in_attribute_list_]        = 0;
      $self->[_in_quote_]                 = 0;
-    $self->[_quote_target_]             = "";
+    $self->[_quote_target_]             = EMPTY_STRING;
      $self->[_line_start_quote_]         = -1;
      $self->[_starting_level_]           = $args{starting_level};
      $self->[_know_starting_level_]      = defined( $args{starting_level} );
@@ -383,7 +452,7 @@ sub new {
      $self->[_unexpected_error_count_]   = 0;
      $self->[_started_looking_for_here_target_at_] = 0;
      $self->[_nearly_matched_here_target_at_]      = undef;
-    $self->[_line_of_text_]                       = "";
+    $self->[_line_of_text_]                       = EMPTY_STRING;
      $self->[_rlower_case_labels_at_]              = undef;
      $self->[_extended_syntax_]                    = $args{extended_syntax};
      $self->[_maximum_level_]                      = 0;
@@ -393,12 +462,17 @@ sub new {
        $rOpts->{'maximum-unexpected-errors'};
      $self->[_rOpts_logfile_] = $rOpts->{'logfile'};
      $self->[_rOpts_]         = $rOpts;
+
+    # These vars are used for guessing indentation and must be positive
+    $self->[_tabsize_]        = 8 if ( !$self->[_tabsize_] );
+    $self->[_indent_columns_] = 4 if ( !$self->[_indent_columns_] );
+
      bless $self, $class;
  
      $tokenizer_self = $self;
  
      prepare_for_a_new_file();
-    find_starting_indentation_level();
+    $self->find_starting_indentation_level();
  
      # This is not a full class yet, so die if an attempt is made to
      # create more than one object.
@@ -410,7 +484,7 @@ sub new {
  
      return $self;
  
-}
+} ## end sub new
  
  # interface to Perl::Tidy::Logger routines
  sub warning {
@@ -422,14 +496,25 @@ sub warning {
      return;
  }
  
+sub get_input_stream_name {
+    my $input_stream_name = EMPTY_STRING;
+    my $logger_object     = $tokenizer_self->[_logger_object_];
+    if ($logger_object) {
+        $input_stream_name = $logger_object->get_input_stream_name();
+    }
+    return $input_stream_name;
+}
+
  sub complain {
      my $msg           = shift;
      my $logger_object = $tokenizer_self->[_logger_object_];
      if ($logger_object) {
+        my $input_line_number = $tokenizer_self->[_last_line_number_] + 1;
+        $msg = "Line $input_line_number: $msg";
          $logger_object->complain($msg);
      }
      return;
-}
+} ## end sub complain
  
  sub write_logfile_entry {
      my $msg           = shift;
@@ -515,7 +600,7 @@ sub report_tokenization_errors {
      my ($self) = @_;
  
      # Report any tokenization errors and return a flag '$severe_error'.
-    # Set $severe_error = 1 if the tokenizations errors are so severe that
+    # Set $severe_error = 1 if the tokenization errors are so severe that
      # the formatter should not attempt to format the file. Instead, it will
      # just output the file verbatim.
  
@@ -548,7 +633,7 @@ EOM
      check_final_nesting_depths();
  
      # Likewise, large numbers of brace errors usually indicate non-perl
-    # scirpts, so set the severe error flag at a low number.  This is similar
+    # scripts, so set the severe error flag at a low number.  This is similar
      # to the level check, but different because braces may balance but be
      # incorrectly interlaced.
      if ( $tokenizer_self->[_true_brace_error_count_] > 2 ) {
@@ -601,9 +686,10 @@ EOM
              );
          }
          else {
-            warning(
-"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
-            );
+            warning(<<EOM);
+Hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string.
+  (Perl will match to the end of file but this may not be intended).
+EOM
          }
          my $nearly_matched_here_target_at =
            $tokenizer_self->[_nearly_matched_here_target_at_];
@@ -670,11 +756,11 @@ EOM
            @{ $tokenizer_self->[_rlower_case_labels_at_] };
          write_logfile_entry(
              "Suggest using upper case characters in label(s)\n");
-        local $" = ')(';
+        local $LIST_SEPARATOR = ')(';
          write_logfile_entry("  defined at line(s): (@lower_case_labels_at)\n");
      }
      return $severe_error;
-}
+} ## end sub report_tokenization_errors
  
  sub report_v_string {
  
@@ -690,36 +776,47 @@ sub report_v_string {
          );
      }
      return;
+} ## end sub report_v_string
+
+sub is_valid_token_type {
+    my ($type) = @_;
+    return $is_valid_token_type{$type};
  }
  
  sub get_input_line_number {
      return $tokenizer_self->[_last_line_number_];
  }
  
+sub log_numbered_msg {
+    my ( $self, $msg ) = @_;
+
+    # write input line number + message to logfile
+    my $input_line_number = $self->[_last_line_number_];
+    write_logfile_entry("Line $input_line_number: $msg");
+    return;
+}
+
  # returns the next tokenized line
  sub get_line {
  
      my $self = shift;
  
-    # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
-    # $square_bracket_depth, $paren_depth
+    # USES GLOBAL VARIABLES:
+    #   $brace_depth, $square_bracket_depth, $paren_depth
  
-    my $input_line = $tokenizer_self->[_line_buffer_object_]->get_line();
-    $tokenizer_self->[_line_of_text_] = $input_line;
+    my $input_line = $self->[_line_buffer_object_]->get_line();
+    $self->[_line_of_text_] = $input_line;
  
      return unless ($input_line);
  
-    my $input_line_number = ++$tokenizer_self->[_last_line_number_];
-
-    my $write_logfile_entry = sub {
-        my ($msg) = @_;
-        write_logfile_entry("Line $input_line_number: $msg");
-    };
+    my $input_line_number = ++$self->[_last_line_number_];
  
      # Find and remove what characters terminate this line, including any
      # control r
-    my $input_line_separator = "";
-    if ( chomp($input_line) ) { $input_line_separator = $/ }
+    my $input_line_separator = EMPTY_STRING;
+    if ( chomp($input_line) ) {
+        $input_line_separator = $INPUT_RECORD_SEPARATOR;
+    }
  
      # The first test here very significantly speeds things up, but be sure to
      # keep the regex and hash %other_line_endings the same.
@@ -732,7 +829,7 @@ sub get_line {
      # for backwards compatibility we keep the line text terminated with
      # a newline character
      $input_line .= "\n";
-    $tokenizer_self->[_line_of_text_] = $input_line;    # update
+    $self->[_line_of_text_] = $input_line;
  
      # create a data structure describing this line which will be
      # returned to the caller.
@@ -747,6 +844,8 @@ sub get_line {
      #   HERE_END       - last line of here-doc (target word)
      #   FORMAT         - format section
      #   FORMAT_END     - last line of format section, '.'
+    #   SKIP           - code skipping section
+    #   SKIP_END       - last line of code skipping section, '#>>V'
      #   DATA_START     - __DATA__ line
      #   DATA           - unidentified text following __DATA__
      #   END_START      - __END__ line
@@ -769,28 +868,24 @@ sub get_line {
          _curly_brace_depth         => $brace_depth,
          _square_bracket_depth      => $square_bracket_depth,
          _paren_depth               => $paren_depth,
-        _quote_character           => '',
-##        _rtoken_type               => undef,
-##        _rtokens                   => undef,
-##        _rlevels                   => undef,
-##        _rslevels                  => undef,
-##        _rblock_type               => undef,
-##        _rcontainer_type           => undef,
-##        _rcontainer_environment    => undef,
-##        _rtype_sequence            => undef,
-##        _rnesting_tokens           => undef,
-##        _rci_levels                => undef,
-##        _rnesting_blocks           => undef,
-##        _starting_in_quote         => 0,
-##        _ending_in_quote           => 0,
+        _quote_character           => EMPTY_STRING,
+## Skip these needless initializations for efficiency:
+##      _rtoken_type               => undef,
+##      _rtokens                   => undef,
+##      _rlevels                   => undef,
+##      _rblock_type               => undef,
+##      _rtype_sequence            => undef,
+##      _rci_levels                => undef,
+##      _starting_in_quote         => 0,
+##      _ending_in_quote           => 0,
      };
  
      # must print line unchanged if we are in a here document
-    if ( $tokenizer_self->[_in_here_doc_] ) {
+    if ( $self->[_in_here_doc_] ) {
  
          $line_of_tokens->{_line_type} = 'HERE';
-        my $here_doc_target      = $tokenizer_self->[_here_doc_target_];
-        my $here_quote_character = $tokenizer_self->[_here_quote_character_];
+        my $here_doc_target      = $self->[_here_doc_target_];
+        my $here_quote_character = $self->[_here_quote_character_];
          my $candidate_target     = $input_line;
          chomp $candidate_target;
  
@@ -800,27 +895,26 @@ sub get_line {
              $candidate_target =~ s/^\s*//;
          }
          if ( $candidate_target eq $here_doc_target ) {
-            $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
+            $self->[_nearly_matched_here_target_at_] = undef;
              $line_of_tokens->{_line_type} = 'HERE_END';
-            $write_logfile_entry->("Exiting HERE document $here_doc_target\n");
+            $self->log_numbered_msg("Exiting HERE document $here_doc_target\n");
  
-            my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
+            my $rhere_target_list = $self->[_rhere_target_list_];
              if ( @{$rhere_target_list} ) {  # there can be multiple here targets
                  ( $here_doc_target, $here_quote_character ) =
                    @{ shift @{$rhere_target_list} };
-                $tokenizer_self->[_here_doc_target_] = $here_doc_target;
-                $tokenizer_self->[_here_quote_character_] =
-                  $here_quote_character;
-                $write_logfile_entry->(
+                $self->[_here_doc_target_]      = $here_doc_target;
+                $self->[_here_quote_character_] = $here_quote_character;
+                $self->log_numbered_msg(
                      "Entering HERE document $here_doc_target\n");
-                $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
-                $tokenizer_self->[_started_looking_for_here_target_at_] =
+                $self->[_nearly_matched_here_target_at_] = undef;
+                $self->[_started_looking_for_here_target_at_] =
                    $input_line_number;
              }
              else {
-                $tokenizer_self->[_in_here_doc_]          = 0;
-                $tokenizer_self->[_here_doc_target_]      = "";
-                $tokenizer_self->[_here_quote_character_] = "";
+                $self->[_in_here_doc_]          = 0;
+                $self->[_here_doc_target_]      = EMPTY_STRING;
+                $self->[_here_quote_character_] = EMPTY_STRING;
              }
          }
  
@@ -830,24 +924,23 @@ sub get_line {
              $candidate_target =~ s/\s*$//;
              $candidate_target =~ s/^\s*//;
              if ( $candidate_target eq $here_doc_target ) {
-                $tokenizer_self->[_nearly_matched_here_target_at_] =
-                  $input_line_number;
+                $self->[_nearly_matched_here_target_at_] = $input_line_number;
              }
          }
          return $line_of_tokens;
      }
  
      # Print line unchanged if we are in a format section
-    elsif ( $tokenizer_self->[_in_format_] ) {
+    elsif ( $self->[_in_format_] ) {
  
          if ( $input_line =~ /^\.[\s#]*$/ ) {
  
              # Decrement format depth count at a '.' after a 'format'
-            $tokenizer_self->[_in_format_]--;
+            $self->[_in_format_]--;
  
              # This is the end when count reaches 0
-            if ( !$tokenizer_self->[_in_format_] ) {
-                $write_logfile_entry->("Exiting format section\n");
+            if ( !$self->[_in_format_] ) {
+                $self->log_numbered_msg("Exiting format section\n");
                  $line_of_tokens->{_line_type} = 'FORMAT_END';
              }
          }
@@ -857,22 +950,22 @@ sub get_line {
  
                  # Increment format depth count at a 'format' within a 'format'
                  # This is a simple way to handle nested formats (issue c019).
-                $tokenizer_self->[_in_format_]++;
+                $self->[_in_format_]++;
              }
          }
          return $line_of_tokens;
      }
  
      # must print line unchanged if we are in pod documentation
-    elsif ( $tokenizer_self->[_in_pod_] ) {
+    elsif ( $self->[_in_pod_] ) {
  
          $line_of_tokens->{_line_type} = 'POD';
          if ( $input_line =~ /^=cut/ ) {
              $line_of_tokens->{_line_type} = 'POD_END';
-            $write_logfile_entry->("Exiting POD section\n");
-            $tokenizer_self->[_in_pod_] = 0;
+            $self->log_numbered_msg("Exiting POD section\n");
+            $self->[_in_pod_] = 0;
          }
-        if ( $input_line =~ /^\#\!.*perl\b/ && !$tokenizer_self->[_in_end_] ) {
+        if ( $input_line =~ /^\#\!.*perl\b/ && !$self->[_in_end_] ) {
              warning(
                  "Hash-bang in pod can cause older versions of perl to fail! \n"
              );
@@ -882,13 +975,13 @@ sub get_line {
      }
  
      # print line unchanged if in skipped section
-    elsif ( $tokenizer_self->[_in_skipped_] ) {
+    elsif ( $self->[_in_skipped_] ) {
  
-        # NOTE: marked as the existing type 'FORMAT' to keep html working
-        $line_of_tokens->{_line_type} = 'FORMAT';
+        $line_of_tokens->{_line_type} = 'SKIP';
          if ( $input_line =~ /$code_skipping_pattern_end/ ) {
-            $write_logfile_entry->("Exiting code-skipping section\n");
-            $tokenizer_self->[_in_skipped_] = 0;
+            $line_of_tokens->{_line_type} = 'SKIP_END';
+            $self->log_numbered_msg("Exiting code-skipping section\n");
+            $self->[_in_skipped_] = 0;
          }
          return $line_of_tokens;
      }
@@ -897,13 +990,13 @@ sub get_line {
      # are seeing illegal tokens and cannot continue.  Syntax errors do
      # not pass this route).  Calling routine can decide what to do, but
      # the default can be to just pass all lines as if they were after __END__
-    elsif ( $tokenizer_self->[_in_error_] ) {
+    elsif ( $self->[_in_error_] ) {
          $line_of_tokens->{_line_type} = 'ERROR';
          return $line_of_tokens;
      }
  
      # print line unchanged if we are __DATA__ section
-    elsif ( $tokenizer_self->[_in_data_] ) {
+    elsif ( $self->[_in_data_] ) {
  
          # ...but look for POD
          # Note that the _in_data and _in_end flags remain set
@@ -911,8 +1004,8 @@ sub get_line {
          # end of a pod section
          if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
              $line_of_tokens->{_line_type} = 'POD_START';
-            $write_logfile_entry->("Entering POD section\n");
-            $tokenizer_self->[_in_pod_] = 1;
+            $self->log_numbered_msg("Entering POD section\n");
+            $self->[_in_pod_] = 1;
              return $line_of_tokens;
          }
          else {
@@ -922,7 +1015,7 @@ sub get_line {
      }
  
      # print line unchanged if we are in __END__ section
-    elsif ( $tokenizer_self->[_in_end_] ) {
+    elsif ( $self->[_in_end_] ) {
  
          # ...but look for POD
          # Note that the _in_data and _in_end flags remain set
@@ -930,8 +1023,8 @@ sub get_line {
          # end of a pod section
          if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
              $line_of_tokens->{_line_type} = 'POD_START';
-            $write_logfile_entry->("Entering POD section\n");
-            $tokenizer_self->[_in_pod_] = 1;
+            $self->log_numbered_msg("Entering POD section\n");
+            $self->[_in_pod_] = 1;
              return $line_of_tokens;
          }
          else {
@@ -941,17 +1034,17 @@ sub get_line {
      }
  
      # check for a hash-bang line if we haven't seen one
-    if ( !$tokenizer_self->[_saw_hash_bang_] ) {
+    if ( !$self->[_saw_hash_bang_] ) {
          if ( $input_line =~ /^\#\!.*perl\b/ ) {
-            $tokenizer_self->[_saw_hash_bang_] = $input_line_number;
+            $self->[_saw_hash_bang_] = $input_line_number;
  
              # check for -w and -P flags
              if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
-                $tokenizer_self->[_saw_perl_dash_P_] = 1;
+                $self->[_saw_perl_dash_P_] = 1;
              }
  
              if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
-                $tokenizer_self->[_saw_perl_dash_w_] = 1;
+                $self->[_saw_perl_dash_w_] = 1;
              }
  
              if (
@@ -963,7 +1056,7 @@ sub get_line {
                         $last_nonblank_block_type
                      && $last_nonblank_block_type eq 'BEGIN'
                  )
-                && !$tokenizer_self->[_look_for_hash_bang_]
+                && !$self->[_look_for_hash_bang_]
  
                  # Try to avoid giving a false alarm at a simple comment.
                  # These look like valid hash-bang lines:
@@ -984,7 +1077,7 @@ sub get_line {
  
                  # this is helpful for VMS systems; we may have accidentally
                  # tokenized some DCL commands
-                if ( $tokenizer_self->[_started_tokenizing_] ) {
+                if ( $self->[_started_tokenizing_] ) {
                      warning(
  "There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
                      );
@@ -1004,8 +1097,8 @@ sub get_line {
      }
  
      # wait for a hash-bang before parsing if the user invoked us with -x
-    if ( $tokenizer_self->[_look_for_hash_bang_]
-        && !$tokenizer_self->[_saw_hash_bang_] )
+    if ( $self->[_look_for_hash_bang_]
+        && !$self->[_saw_hash_bang_] )
      {
          $line_of_tokens->{_line_type} = 'SYSTEM';
          return $line_of_tokens;
@@ -1028,112 +1121,99 @@ sub get_line {
      #        _in_skipped_
      #        _in_pod_
      #        _in_quote_
-    my $ending_in_quote_last = $tokenizer_self->[_in_quote_];
-    tokenize_this_line($line_of_tokens);
+    my $ending_in_quote_last = $self->[_in_quote_];
+    $self->tokenize_this_line($line_of_tokens);
  
      # Now finish defining the return structure and return it
-    $line_of_tokens->{_ending_in_quote} = $tokenizer_self->[_in_quote_];
+    $line_of_tokens->{_ending_in_quote} = $self->[_in_quote_];
  
      # handle severe error (binary data in script)
-    if ( $tokenizer_self->[_in_error_] ) {
-        $tokenizer_self->[_in_quote_] = 0;    # to avoid any more messages
+    if ( $self->[_in_error_] ) {
+        $self->[_in_quote_] = 0;    # to avoid any more messages
          warning("Giving up after error\n");
          $line_of_tokens->{_line_type} = 'ERROR';
-        reset_indentation_level(0);           # avoid error messages
+        reset_indentation_level(0);    # avoid error messages
          return $line_of_tokens;
      }
  
      # handle start of pod documentation
-    if ( $tokenizer_self->[_in_pod_] ) {
+    if ( $self->[_in_pod_] ) {
  
          # This gets tricky..above a __DATA__ or __END__ section, perl
          # accepts '=cut' as the start of pod section. But afterwards,
          # only pod utilities see it and they may ignore an =cut without
          # leading =head.  In any case, this isn't good.
          if ( $input_line =~ /^=cut\b/ ) {
-            if ( $tokenizer_self->[_saw_data_] || $tokenizer_self->[_saw_end_] )
-            {
+            if ( $self->[_saw_data_] || $self->[_saw_end_] ) {
                  complain("=cut while not in pod ignored\n");
-                $tokenizer_self->[_in_pod_] = 0;
+                $self->[_in_pod_] = 0;
                  $line_of_tokens->{_line_type} = 'POD_END';
              }
              else {
                  $line_of_tokens->{_line_type} = 'POD_START';
                  warning(
  "=cut starts a pod section .. this can fool pod utilities.\n"
-                );
-                $write_logfile_entry->("Entering POD section\n");
+                ) unless (DEVEL_MODE);
+                $self->log_numbered_msg("Entering POD section\n");
              }
          }
  
          else {
              $line_of_tokens->{_line_type} = 'POD_START';
-            $write_logfile_entry->("Entering POD section\n");
+            $self->log_numbered_msg("Entering POD section\n");
          }
  
          return $line_of_tokens;
      }
  
      # handle start of skipped section
-    if ( $tokenizer_self->[_in_skipped_] ) {
+    if ( $self->[_in_skipped_] ) {
  
-        # NOTE: marked as the existing type 'FORMAT' to keep html working
-        $line_of_tokens->{_line_type} = 'FORMAT';
-        $write_logfile_entry->("Entering code-skipping section\n");
+        $line_of_tokens->{_line_type} = 'SKIP';
+        $self->log_numbered_msg("Entering code-skipping section\n");
          return $line_of_tokens;
      }
  
-    # Update indentation levels for log messages.
-    # Skip blank lines and also block comments, unless a logfile is requested.
-    # Note that _line_of_text_ is the input line but trimmed from left to right.
-    my $lot = $tokenizer_self->[_line_of_text_];
-    if ( $lot && ( $self->[_rOpts_logfile_] || substr( $lot, 0, 1 ) ne '#' ) ) {
-        my $rlevels = $line_of_tokens->{_rlevels};
-        $line_of_tokens->{_guessed_indentation_level} =
-          guess_old_indentation_level($input_line);
-    }
-
      # see if this line contains here doc targets
-    my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
+    my $rhere_target_list = $self->[_rhere_target_list_];
      if ( @{$rhere_target_list} ) {
  
          my ( $here_doc_target, $here_quote_character ) =
            @{ shift @{$rhere_target_list} };
-        $tokenizer_self->[_in_here_doc_]          = 1;
-        $tokenizer_self->[_here_doc_target_]      = $here_doc_target;
-        $tokenizer_self->[_here_quote_character_] = $here_quote_character;
-        $write_logfile_entry->("Entering HERE document $here_doc_target\n");
-        $tokenizer_self->[_started_looking_for_here_target_at_] =
-          $input_line_number;
+        $self->[_in_here_doc_]          = 1;
+        $self->[_here_doc_target_]      = $here_doc_target;
+        $self->[_here_quote_character_] = $here_quote_character;
+        $self->log_numbered_msg("Entering HERE document $here_doc_target\n");
+        $self->[_started_looking_for_here_target_at_] = $input_line_number;
      }
  
      # NOTE: __END__ and __DATA__ statements are written unformatted
      # because they can theoretically contain additional characters
      # which are not tokenized (and cannot be read with <DATA> either!).
-    if ( $tokenizer_self->[_in_data_] ) {
+    if ( $self->[_in_data_] ) {
          $line_of_tokens->{_line_type} = 'DATA_START';
-        $write_logfile_entry->("Starting __DATA__ section\n");
-        $tokenizer_self->[_saw_data_] = 1;
+        $self->log_numbered_msg("Starting __DATA__ section\n");
+        $self->[_saw_data_] = 1;
  
          # keep parsing after __DATA__ if use SelfLoader was seen
-        if ( $tokenizer_self->[_saw_selfloader_] ) {
-            $tokenizer_self->[_in_data_] = 0;
-            $write_logfile_entry->(
+        if ( $self->[_saw_selfloader_] ) {
+            $self->[_in_data_] = 0;
+            $self->log_numbered_msg(
                  "SelfLoader seen, continuing; -nlsl deactivates\n");
          }
  
          return $line_of_tokens;
      }
  
-    elsif ( $tokenizer_self->[_in_end_] ) {
+    elsif ( $self->[_in_end_] ) {
          $line_of_tokens->{_line_type} = 'END_START';
-        $write_logfile_entry->("Starting __END__ section\n");
-        $tokenizer_self->[_saw_end_] = 1;
+        $self->log_numbered_msg("Starting __END__ section\n");
+        $self->[_saw_end_] = 1;
  
          # keep parsing after __END__ if use AutoLoader was seen
-        if ( $tokenizer_self->[_saw_autoloader_] ) {
-            $tokenizer_self->[_in_end_] = 0;
-            $write_logfile_entry->(
+        if ( $self->[_saw_autoloader_] ) {
+            $self->[_in_end_] = 0;
+            $self->log_numbered_msg(
                  "AutoLoader seen, continuing; -nlal deactivates\n");
          }
          return $line_of_tokens;
@@ -1143,47 +1223,44 @@ sub get_line {
      $line_of_tokens->{_line_type} = 'CODE';
  
      # remember if we have seen any real code
-    if (  !$tokenizer_self->[_started_tokenizing_]
+    if (  !$self->[_started_tokenizing_]
          && $input_line !~ /^\s*$/
          && $input_line !~ /^\s*#/ )
      {
-        $tokenizer_self->[_started_tokenizing_] = 1;
+        $self->[_started_tokenizing_] = 1;
      }
  
-    if ( $tokenizer_self->[_debugger_object_] ) {
-        $tokenizer_self->[_debugger_object_]
-          ->write_debug_entry($line_of_tokens);
+    if ( $self->[_debugger_object_] ) {
+        $self->[_debugger_object_]->write_debug_entry($line_of_tokens);
      }
  
      # Note: if keyword 'format' occurs in this line code, it is still CODE
      # (keyword 'format' need not start a line)
-    if ( $tokenizer_self->[_in_format_] ) {
-        $write_logfile_entry->("Entering format section\n");
+    if ( $self->[_in_format_] ) {
+        $self->log_numbered_msg("Entering format section\n");
      }
  
-    if ( $tokenizer_self->[_in_quote_]
-        and ( $tokenizer_self->[_line_start_quote_] < 0 ) )
+    if ( $self->[_in_quote_]
+        and ( $self->[_line_start_quote_] < 0 ) )
      {
  
          #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
-        if ( ( my $quote_target = $tokenizer_self->[_quote_target_] ) !~
-            /^\s*$/ )
-        {
-            $tokenizer_self->[_line_start_quote_] = $input_line_number;
-            $write_logfile_entry->(
+        if ( ( my $quote_target = $self->[_quote_target_] ) !~ /^\s*$/ ) {
+            $self->[_line_start_quote_] = $input_line_number;
+            $self->log_numbered_msg(
                  "Start multi-line quote or pattern ending in $quote_target\n");
          }
      }
-    elsif ( ( $tokenizer_self->[_line_start_quote_] >= 0 )
-        && !$tokenizer_self->[_in_quote_] )
+    elsif ( ( $self->[_line_start_quote_] >= 0 )
+        && !$self->[_in_quote_] )
      {
-        $tokenizer_self->[_line_start_quote_] = -1;
-        $write_logfile_entry->("End of multi-line quote or pattern\n");
+        $self->[_line_start_quote_] = -1;
+        $self->log_numbered_msg("End of multi-line quote or pattern\n");
      }
  
      # we are returning a line of CODE
      return $line_of_tokens;
-}
+} ## end sub get_line
  
  sub find_starting_indentation_level {
  
@@ -1193,17 +1270,17 @@ sub find_starting_indentation_level {
      # example) it may not be zero.  The user may specify this with the
      # -sil=n parameter but normally doesn't so we have to guess.
      #
-    # USES GLOBAL VARIABLES: $tokenizer_self
+    my ($self) = @_;
      my $starting_level = 0;
  
      # use value if given as parameter
-    if ( $tokenizer_self->[_know_starting_level_] ) {
-        $starting_level = $tokenizer_self->[_starting_level_];
+    if ( $self->[_know_starting_level_] ) {
+        $starting_level = $self->[_starting_level_];
      }
  
      # if we know there is a hash_bang line, the level must be zero
-    elsif ( $tokenizer_self->[_look_for_hash_bang_] ) {
-        $tokenizer_self->[_know_starting_level_] = 1;
+    elsif ( $self->[_look_for_hash_bang_] ) {
+        $self->[_know_starting_level_] = 1;
      }
  
      # otherwise figure it out from the input file
@@ -1212,10 +1289,8 @@ sub find_starting_indentation_level {
          my $i = 0;
  
          # keep looking at lines until we find a hash bang or piece of code
-        my $msg = "";
-        while ( $line =
-            $tokenizer_self->[_line_buffer_object_]->peek_ahead( $i++ ) )
-        {
+        my $msg = EMPTY_STRING;
+        while ( $line = $self->[_line_buffer_object_]->peek_ahead( $i++ ) ) {
  
              # if first line is #! then assume starting level is zero
              if ( $i == 1 && $line =~ /^\#\!/ ) {
@@ -1230,10 +1305,10 @@ sub find_starting_indentation_level {
          $msg = "Line $i implies starting-indentation-level = $starting_level\n";
          write_logfile_entry("$msg");
      }
-    $tokenizer_self->[_starting_level_] = $starting_level;
+    $self->[_starting_level_] = $starting_level;
      reset_indentation_level($starting_level);
      return;
-}
+} ## end sub find_starting_indentation_level
  
  sub guess_old_indentation_level {
      my ($line) = @_;
@@ -1278,7 +1353,7 @@ sub guess_old_indentation_level {
      $indent_columns = 4 if ( !$indent_columns );
      $level          = int( $spaces / $indent_columns );
      return ($level);
-}
+} ## end sub guess_old_indentation_level
  
  # This is a currently unused debug routine
  sub dump_functions {
@@ -1288,7 +1363,7 @@ sub dump_functions {
          $fh->print("\nnon-constant subs in package $pkg\n");
  
          foreach my $sub ( keys %{ $is_user_function{$pkg} } ) {
-            my $msg = "";
+            my $msg = EMPTY_STRING;
              if ( $is_block_list_function{$pkg}{$sub} ) {
                  $msg = 'block_list';
              }
@@ -1308,17 +1383,17 @@ sub dump_functions {
          }
      }
      return;
-}
+} ## end sub dump_functions
  
  sub prepare_for_a_new_file {
  
      # previous tokens needed to determine what to expect next
      $last_nonblank_token      = ';';    # the only possible starting state which
      $last_nonblank_type       = ';';    # will make a leading brace a code block
-    $last_nonblank_block_type = '';
+    $last_nonblank_block_type = EMPTY_STRING;
  
      # scalars for remembering statement types across multiple lines
-    $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
+    $statement_type    = EMPTY_STRING;    # '' or 'use' or 'sub..' or 'case..'
      $in_attribute_list = 0;
  
      # scalars for remembering where we are in the file
@@ -1326,9 +1401,9 @@ sub prepare_for_a_new_file {
      $context         = UNKNOWN_CONTEXT;
  
      # hashes used to remember function information
-    %is_constant             = ();      # user-defined constants
-    %is_user_function        = ();      # user-defined functions
-    %user_function_prototype = ();      # their prototypes
+    %is_constant             = ();        # user-defined constants
+    %is_user_function        = ();        # user-defined functions
+    %user_function_prototype = ();        # their prototypes
      %is_block_function       = ();
      %is_block_list_function  = ();
      %saw_function_definition = ();
@@ -1344,6 +1419,7 @@ sub prepare_for_a_new_file {
      @total_depth             = ();
      @nesting_sequence_number = ( 0 .. @closing_brace_names - 1 );
      @current_sequence_number = ();
+    $next_sequence_number    = 2;    # The value 1 is reserved for SEQ_ROOT
  
      @paren_type                     = ();
      @paren_semicolon_count          = ();
@@ -1359,19 +1435,19 @@ sub prepare_for_a_new_file {
      @nested_statement_type          = ();
      @starting_line_of_current_depth = ();
  
-    $paren_type[$paren_depth]            = '';
+    $paren_type[$paren_depth]            = EMPTY_STRING;
      $paren_semicolon_count[$paren_depth] = 0;
-    $paren_structural_type[$brace_depth] = '';
+    $paren_structural_type[$brace_depth] = EMPTY_STRING;
      $brace_type[$brace_depth] = ';';    # identify opening brace as code block
-    $brace_structural_type[$brace_depth]                   = '';
+    $brace_structural_type[$brace_depth]                   = EMPTY_STRING;
      $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
      $brace_package[$paren_depth]                           = $current_package;
-    $square_bracket_type[$square_bracket_depth]            = '';
-    $square_bracket_structural_type[$square_bracket_depth] = '';
+    $square_bracket_type[$square_bracket_depth]            = EMPTY_STRING;
+    $square_bracket_structural_type[$square_bracket_depth] = EMPTY_STRING;
  
      initialize_tokenizer_state();
      return;
-}
+} ## end sub prepare_for_a_new_file
  
  {    ## closure for sub tokenize_this_line
  
@@ -1409,7 +1485,7 @@ sub prepare_for_a_new_file {
      # TV4: SCALARS for multi-line identifiers and
      # statements. These are initialized with a subroutine call
      # and continually updated as lines are processed.
-    my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
+    my ( $id_scan_state, $identifier, $want_paren );
  
      # TV5: SCALARS for tracking indentation level.
      # Initialized once and continually updated as lines are
@@ -1444,27 +1520,26 @@ sub prepare_for_a_new_file {
          # TV3:
          $in_quote                = 0;
          $quote_type              = 'Q';
-        $quote_character         = "";
+        $quote_character         = EMPTY_STRING;
          $quote_pos               = 0;
          $quote_depth             = 0;
-        $quoted_string_1         = "";
-        $quoted_string_2         = "";
-        $allowed_quote_modifiers = "";
+        $quoted_string_1         = EMPTY_STRING;
+        $quoted_string_2         = EMPTY_STRING;
+        $allowed_quote_modifiers = EMPTY_STRING;
  
          # TV4:
-        $id_scan_state     = '';
-        $identifier        = '';
-        $want_paren        = "";
-        $indented_if_level = 0;
+        $id_scan_state = EMPTY_STRING;
+        $identifier    = EMPTY_STRING;
+        $want_paren    = EMPTY_STRING;
  
          # TV5:
-        $nesting_token_string             = "";
-        $nesting_type_string              = "";
-        $nesting_block_string             = '1';    # initially in a block
-        $nesting_block_flag               = 1;
-        $nesting_list_string              = '0';    # initially not in a list
-        $nesting_list_flag                = 0;      # initially not in a list
-        $ci_string_in_tokenizer           = "";
+        $nesting_token_string   = EMPTY_STRING;
+        $nesting_type_string    = EMPTY_STRING;
+        $nesting_block_string   = '1';            # initially in a block
+        $nesting_block_flag     = 1;
+        $nesting_list_string    = '0';            # initially not in a list
+        $nesting_list_flag      = 0;              # initially not in a list
+        $ci_string_in_tokenizer = EMPTY_STRING;
          $continuation_string_in_tokenizer = "0";
          $in_statement_continuation        = 0;
          $level_in_tokenizer               = 0;
@@ -1472,16 +1547,16 @@ sub prepare_for_a_new_file {
          $rslevel_stack                    = [];
  
          # TV6:
-        $last_nonblank_container_type      = '';
-        $last_nonblank_type_sequence       = '';
+        $last_nonblank_container_type      = EMPTY_STRING;
+        $last_nonblank_type_sequence       = EMPTY_STRING;
          $last_last_nonblank_token          = ';';
          $last_last_nonblank_type           = ';';
-        $last_last_nonblank_block_type     = '';
-        $last_last_nonblank_container_type = '';
-        $last_last_nonblank_type_sequence  = '';
-        $last_nonblank_prototype           = "";
+        $last_last_nonblank_block_type     = EMPTY_STRING;
+        $last_last_nonblank_container_type = EMPTY_STRING;
+        $last_last_nonblank_type_sequence  = EMPTY_STRING;
+        $last_nonblank_prototype           = EMPTY_STRING;
          return;
-    }
+    } ## end sub initialize_tokenizer_state
  
      sub save_tokenizer_state {
  
@@ -1508,8 +1583,7 @@ sub prepare_for_a_new_file {
              $quoted_string_2, $allowed_quote_modifiers,
          ];
  
-        my $rTV4 =
-          [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
+        my $rTV4 = [ $id_scan_state, $identifier, $want_paren ];
  
          my $rTV5 = [
              $nesting_token_string,      $nesting_type_string,
@@ -1531,7 +1605,7 @@ sub prepare_for_a_new_file {
              $last_nonblank_prototype,
          ];
          return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
-    }
+    } ## end sub save_tokenizer_state
  
      sub restore_tokenizer_state {
          my ($rstate) = @_;
@@ -1549,7 +1623,7 @@ sub prepare_for_a_new_file {
          (
              $routput_token_list,    $routput_token_type,
              $routput_block_type,    $routput_container_type,
-            $routput_type_sequence, $routput_type_sequence,
+            $routput_type_sequence, $routput_indent_flag,
          ) = @{$rTV2};
  
          (
@@ -1557,8 +1631,7 @@ sub prepare_for_a_new_file {
              $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
          ) = @{$rTV3};
  
-        ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
-          @{$rTV4};
+        ( $id_scan_state, $identifier, $want_paren ) = @{$rTV4};
  
          (
              $nesting_token_string,      $nesting_type_string,
@@ -1580,12 +1653,93 @@ sub prepare_for_a_new_file {
              $last_nonblank_prototype,
          ) = @{$rTV6};
          return;
-    }
+    } ## end sub restore_tokenizer_state
  
-    sub get_indentation_level {
+    sub split_pretoken {
+
+        my ($numc) = @_;
  
-        # patch to avoid reporting error if indented if is not terminated
-        if ($indented_if_level) { return $level_in_tokenizer - 1 }
+     # Split the leading $numc characters from the current token (at index=$i)
+     # which is pre-type 'w' and insert the remainder back into the pretoken
+     # stream with appropriate settings.  Since we are splitting a pre-type 'w',
+     # there are three cases, depending on if the remainder starts with a digit:
+     # Case 1: remainder is type 'd', all digits
+     # Case 2: remainder is type 'd' and type 'w': digits and other characters
+     # Case 3: remainder is type 'w'
+
+        # Examples, for $numc=1:
+        #   $tok    => $tok_0 $tok_1 $tok_2
+        #   'x10'   => 'x'    '10'                # case 1
+        #   'x10if' => 'x'    '10'   'if'         # case 2
+        #   '0ne    => 'O'            'ne'        # case 3
+
+        # where:
+        #   $tok_1 is a possible string of digits (pre-type 'd')
+        #   $tok_2 is a possible word (pre-type 'w')
+
+        # return 1 if successful
+        # return undef if error (shouldn't happen)
+
+        # Calling routine should update '$type' and '$tok' if successful.
+
+        my $pretoken = $rtokens->[$i];
+        if (   $pretoken
+            && length($pretoken) > $numc
+            && substr( $pretoken, $numc ) =~ /^(\d*)(.*)$/ )
+        {
+
+            # Split $tok into up to 3 tokens:
+            my $tok_0 = substr( $pretoken, 0, $numc );
+            my $tok_1 = defined($1) ? $1 : EMPTY_STRING;
+            my $tok_2 = defined($2) ? $2 : EMPTY_STRING;
+
+            my $len_0 = length($tok_0);
+            my $len_1 = length($tok_1);
+            my $len_2 = length($tok_2);
+
+            my $pre_type_0 = 'w';
+            my $pre_type_1 = 'd';
+            my $pre_type_2 = 'w';
+
+            my $pos_0 = $rtoken_map->[$i];
+            my $pos_1 = $pos_0 + $len_0;
+            my $pos_2 = $pos_1 + $len_1;
+
+            my $isplice = $i + 1;
+
+            # Splice in any digits
+            if ($len_1) {
+                splice @{$rtoken_map},  $isplice, 0, $pos_1;
+                splice @{$rtokens},     $isplice, 0, $tok_1;
+                splice @{$rtoken_type}, $isplice, 0, $pre_type_1;
+                $max_token_index++;
+                $isplice++;
+            }
+
+            # Splice in any trailing word
+            if ($len_2) {
+                splice @{$rtoken_map},  $isplice, 0, $pos_2;
+                splice @{$rtokens},     $isplice, 0, $tok_2;
+                splice @{$rtoken_type}, $isplice, 0, $pre_type_2;
+                $max_token_index++;
+            }
+
+            $rtokens->[$i] = $tok_0;
+            return 1;
+        }
+        else {
+
+            # Shouldn't get here
+            if (DEVEL_MODE) {
+                Fault(<<EOM);
+While working near line number $input_line_number, bad arg '$tok' passed to sub split_pretoken()
+EOM
+            }
+        }
+        return;
+    } ## end sub split_pretoken
+
+    sub get_indentation_level {
          return $level_in_tokenizer;
      }
  
@@ -1605,6 +1759,125 @@ sub prepare_for_a_new_file {
      # end of tokenizer variable access and manipulation routines
      # ------------------------------------------------------------
  
+    #------------------------------
+    # beginning of tokenizer hashes
+    #------------------------------
+
+    my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
+
+    # These block types terminate statements and do not need a trailing
+    # semicolon
+    # patched for SWITCH/CASE/
+    my %is_zero_continuation_block_type;
+    my @q;
+    @q = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
+      if elsif else unless while until for foreach switch case given when);
+    @is_zero_continuation_block_type{@q} = (1) x scalar(@q);
+
+    my %is_logical_container;
+    @q = qw(if elsif unless while and or err not && !  || for foreach);
+    @is_logical_container{@q} = (1) x scalar(@q);
+
+    my %is_binary_type;
+    @q = qw(|| &&);
+    @is_binary_type{@q} = (1) x scalar(@q);
+
+    my %is_binary_keyword;
+    @q = qw(and or err eq ne cmp);
+    @is_binary_keyword{@q} = (1) x scalar(@q);
+
+    # 'L' is token for opening { at hash key
+    my %is_opening_type;
+    @q = qw< L { ( [ >;
+    @is_opening_type{@q} = (1) x scalar(@q);
+
+    # 'R' is token for closing } at hash key
+    my %is_closing_type;
+    @q = qw< R } ) ] >;
+    @is_closing_type{@q} = (1) x scalar(@q);
+
+    my %is_redo_last_next_goto;
+    @q = qw(redo last next goto);
+    @is_redo_last_next_goto{@q} = (1) x scalar(@q);
+
+    my %is_use_require;
+    @q = qw(use require);
+    @is_use_require{@q} = (1) x scalar(@q);
+
+    # This hash holds the array index in $tokenizer_self for these keywords:
+    # Fix for issue c035: removed 'format' from this hash
+    my %is_END_DATA = (
+        '__END__'  => _in_end_,
+        '__DATA__' => _in_data_,
+    );
+
+    my %is_list_end_type;
+    @q = qw( ; { } );
+    push @q, ',';
+    @is_list_end_type{@q} = (1) x scalar(@q);
+
+    # original ref: camel 3 p 147,
+    # but perl may accept undocumented flags
+    # perl 5.10 adds 'p' (preserve)
+    # Perl version 5.22 added 'n'
+    # From http://perldoc.perl.org/perlop.html we have
+    # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
+    # s/PATTERN/REPLACEMENT/msixpodualngcer
+    # y/SEARCHLIST/REPLACEMENTLIST/cdsr
+    # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
+    # qr/STRING/msixpodualn
+    my %quote_modifiers = (
+        's'  => '[msixpodualngcer]',
+        'y'  => '[cdsr]',
+        'tr' => '[cdsr]',
+        'm'  => '[msixpodualngc]',
+        'qr' => '[msixpodualn]',
+        'q'  => EMPTY_STRING,
+        'qq' => EMPTY_STRING,
+        'qw' => EMPTY_STRING,
+        'qx' => EMPTY_STRING,
+    );
+
+    # table showing how many quoted things to look for after quote operator..
+    # s, y, tr have 2 (pattern and replacement)
+    # others have 1 (pattern only)
+    my %quote_items = (
+        's'  => 2,
+        'y'  => 2,
+        'tr' => 2,
+        'm'  => 1,
+        'qr' => 1,
+        'q'  => 1,
+        'qq' => 1,
+        'qw' => 1,
+        'qx' => 1,
+    );
+
+    my %is_for_foreach;
+    @q = qw(for foreach);
+    @is_for_foreach{@q} = (1) x scalar(@q);
+
+    my %is_my_our_state;
+    @q = qw(my our state);
+    @is_my_our_state{@q} = (1) x scalar(@q);
+
+    # These keywords may introduce blocks after parenthesized expressions,
+    # in the form:
+    # keyword ( .... ) { BLOCK }
+    # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
+    my %is_blocktype_with_paren;
+    @q =
+      qw(if elsif unless while until for foreach switch case given when catch);
+    @is_blocktype_with_paren{@q} = (1) x scalar(@q);
+
+    my %is_case_default;
+    @q = qw(case default);
+    @is_case_default{@q} = (1) x scalar(@q);
+
+    #------------------------
+    # end of tokenizer hashes
+    #------------------------
+
      # ------------------------------------------------------------
      # beginning of various scanner interface routines
      # ------------------------------------------------------------
@@ -1646,7 +1919,7 @@ sub prepare_for_a_new_file {
              @brace_package,                  @square_bracket_type,
              @square_bracket_structural_type, @depth_array,
              @starting_line_of_current_depth, @nested_ternary_flag,
-            @nested_statement_type,
+            @nested_statement_type,          $next_sequence_number,
          );
  
          # save all lexical variables
@@ -1654,12 +1927,10 @@ sub prepare_for_a_new_file {
          _decrement_count();    # avoid error check for multiple tokenizers
  
          # make a new tokenizer
-        my $rOpts = {};
-        my $rpending_logfile_message;
+        my $rOpts         = {};
          my $source_object = Perl::Tidy::LineSource->new(
-            input_file               => \$replacement_text,
-            rOpts                    => $rOpts,
-            rpending_logfile_message => $rpending_logfile_message,
+            input_file => \$replacement_text,
+            rOpts      => $rOpts,
          );
          my $tokenizer = Perl::Tidy::Tokenizer->new(
              source_object        => $source_object,
@@ -1696,7 +1967,7 @@ sub prepare_for_a_new_file {
  
          # return the here doc targets
          return $rht;
-    }
+    } ## end sub scan_replacement_text
  
      sub scan_bare_identifier {
          ( $i, $tok, $type, $prototype ) =
@@ -1706,11 +1977,40 @@ sub prepare_for_a_new_file {
      }
  
      sub scan_identifier {
-        ( $i, $tok, $type, $id_scan_state, $identifier ) =
-          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
+        (
+            $i, $tok, $type, $id_scan_state, $identifier,
+            my $split_pretoken_flag
+          )
+          = scan_complex_identifier( $i, $id_scan_state, $identifier, $rtokens,
              $max_token_index, $expecting, $paren_type[$paren_depth] );
+
+        # Check for signal to fix a special variable adjacent to a keyword,
+        # such as '$^One$0'.
+        if ($split_pretoken_flag) {
+
+            # Try to fix it by splitting the pretoken
+            if (   $i > 0
+                && $rtokens->[ $i - 1 ] eq '^'
+                && split_pretoken(1) )
+            {
+                $identifier = substr( $identifier, 0, 3 );
+                $tok        = $identifier;
+            }
+            else {
+
+                # This shouldn't happen ...
+                my $var    = substr( $tok, 0, 3 );
+                my $excess = substr( $tok, 3 );
+                interrupt_logfile();
+                warning(<<EOM);
+$input_line_number: Trouble parsing at characters '$excess' after special variable '$var'.
+A space may be needed after '$var'. 
+EOM
+                resume_logfile();
+            }
+        }
          return;
-    }
+    } ## end sub scan_identifier
  
      use constant VERIFY_FASTSCAN => 0;
      my %fast_scan_context;
@@ -1725,7 +2025,7 @@ sub prepare_for_a_new_file {
          );
      }
  
-    sub scan_identifier_fast {
+    sub scan_simple_identifier {
  
          # This is a wrapper for sub scan_identifier. It does a fast preliminary
          # scan for certain common identifiers:
@@ -1736,24 +2036,41 @@ sub prepare_for_a_new_file {
          # This gives the same results as the full scanner in about 1/4 the
          # total runtime for a typical input stream.
  
+        # Notation:
+        #     $var * 2
+        #     ^^   ^
+        #     ||  |
+        #     ||  ---- $i_next [= next nonblank pretoken ]
+        #     |----$i_plus_1 [= a bareword ]
+        #     ---$i_begin [= a sigil]
+
          my $i_begin   = $i;
          my $tok_begin = $tok;
+        my $i_plus_1  = $i + 1;
          my $fast_scan_type;
  
-        ###############################
+        #-------------------------------------------------------
+        # Do full scan for anything following a pointer, such as
+        #      $cref->&*;    # a postderef
+        #-------------------------------------------------------
+        if ( $last_nonblank_token eq '->' ) {
+
+        }
+
+        #------------------------------
          # quick scan with leading sigil
-        ###############################
-        if (  !$id_scan_state
-            && $i + 1 <= $max_token_index
+        #------------------------------
+        elsif ( !$id_scan_state
+            && $i_plus_1 <= $max_token_index
              && $fast_scan_context{$tok} )
          {
              $context = $fast_scan_context{$tok};
  
              # look for $var, @var, ...
-            if ( $rtoken_type->[ $i + 1 ] eq 'w' ) {
-                my $pretype_next = "";
-                my $i_next       = $i + 2;
-                if ( $i_next <= $max_token_index ) {
+            if ( $rtoken_type->[$i_plus_1] eq 'w' ) {
+                my $pretype_next = EMPTY_STRING;
+                if ( $i_plus_1 < $max_token_index ) {
+                    my $i_next = $i_plus_1 + 1;
                      if (   $rtoken_type->[$i_next] eq 'b'
                          && $i_next < $max_token_index )
                      {
@@ -1764,10 +2081,10 @@ sub prepare_for_a_new_file {
                  if ( $pretype_next ne ':' && $pretype_next ne "'" ) {
  
                      # Found type 'i' like '$var', '@var', or '%var'
-                    $identifier     = $tok . $rtokens->[ $i + 1 ];
+                    $identifier     = $tok . $rtokens->[$i_plus_1];
                      $tok            = $identifier;
                      $type           = 'i';
-                    $i              = $i + 1;
+                    $i              = $i_plus_1;
                      $fast_scan_type = $type;
                  }
              }
@@ -1776,7 +2093,7 @@ sub prepare_for_a_new_file {
              # But we must let the full scanner handle things ${ because it may
              # keep going to get a complete identifier like '${#}'  .
              elsif (
-                $rtoken_type->[ $i + 1 ] eq '{'
+                $rtoken_type->[$i_plus_1] eq '{'
                  && (   $tok_begin eq '@'
                      || $tok_begin eq '%' )
                )
@@ -1788,15 +2105,15 @@ sub prepare_for_a_new_file {
              }
          }
  
-        ############################
+        #---------------------------
          # Quick scan with leading ->
          # Look for ->[ and ->{
-        ############################
+        #---------------------------
          elsif (
                 $tok eq '->'
              && $i < $max_token_index
-            && (   $rtokens->[ $i + 1 ] eq '{'
-                || $rtokens->[ $i + 1 ] eq '[' )
+            && (   $rtokens->[$i_plus_1] eq '{'
+                || $rtokens->[$i_plus_1] eq '[' )
            )
          {
              $type           = $tok;
@@ -1805,15 +2122,14 @@ sub prepare_for_a_new_file {
              $context        = UNKNOWN_CONTEXT;
          }
  
-        #######################################
+        #--------------------------------------
          # Verify correctness during development
-        #######################################
+        #--------------------------------------
          if ( VERIFY_FASTSCAN && $fast_scan_type ) {
  
              # We will call the full method
              my $identifier_simple = $identifier;
              my $tok_simple        = $tok;
-            my $fast_scan_type    = $type;
              my $i_simple          = $i;
              my $context_simple    = $context;
  
@@ -1829,21 +2145,21 @@ sub prepare_for_a_new_file {
                  || $context ne $context_simple )
              {
                  print STDERR <<EOM;
-scan_identifier_fast differs from scan_identifier:
+scan_simple_identifier differs from scan_identifier:
  simple:  i=$i_simple, tok=$tok_simple, type=$fast_scan_type, ident=$identifier_simple, context='$context_simple
  full:    i=$i, tok=$tok, type=$type, ident=$identifier, context='$context state=$id_scan_state
  EOM
              }
          }
  
-        ###################################################
+        #-------------------------------------------------
          # call full scanner if fast method did not succeed
-        ###################################################
+        #-------------------------------------------------
          if ( !$fast_scan_type ) {
              scan_identifier();
          }
          return;
-    }
+    } ## end sub scan_simple_identifier
  
      sub scan_id {
          ( $i, $tok, $type, $id_scan_state ) =
@@ -1872,9 +2188,9 @@ EOM
          my $tok_begin = $tok;
          my $number;
  
-        ##################################
+        #---------------------------------
          # Quick check for (signed) integer
-        ##################################
+        #---------------------------------
  
          # This will be the string of digits:
          my $i_d   = $i;
@@ -1882,7 +2198,7 @@ EOM
          my $typ_d = $rtoken_type->[$i_d];
  
          # check for signed integer
-        my $sign = "";
+        my $sign = EMPTY_STRING;
          if (   $typ_d ne 'd'
              && ( $typ_d eq '+' || $typ_d eq '-' )
              && $i_d < $max_token_index )
@@ -1915,9 +2231,9 @@ EOM
              }
          }
  
-        #######################################
+        #--------------------------------------
          # Verify correctness during development
-        #######################################
+        #--------------------------------------
          if ( VERIFY_FASTNUM && defined($number) ) {
  
              # We will call the full method
@@ -1941,14 +2257,14 @@ EOM
              }
          }
  
-        #########################################
+        #----------------------------------------
          # call full scanner if may not be integer
-        #########################################
+        #----------------------------------------
          if ( !defined($number) ) {
              $number = scan_number();
          }
          return $number;
-    }
+    } ## end sub scan_number_fast
  
      # a sub to warn if token found where term expected
      sub error_if_expecting_TERM {
@@ -1960,7 +2276,7 @@ EOM
              }
          }
          return;
-    }
+    } ## end sub error_if_expecting_TERM
  
      # a sub to warn if token found where operator expected
      sub error_if_expecting_OPERATOR {
@@ -1977,1113 +2293,2027 @@ EOM
              return 1;
          }
          return;
-    }
+    } ## end sub error_if_expecting_OPERATOR
  
      # ------------------------------------------------------------
      # end scanner interfaces
      # ------------------------------------------------------------
  
-    my %is_for_foreach;
-    @_ = qw(for foreach);
-    @is_for_foreach{@_} = (1) x scalar(@_);
+    #------------------
+    # Tokenization subs
+    #------------------
+    sub do_GREATER_THAN_SIGN {
  
-    my %is_my_our_state;
-    @_ = qw(my our state);
-    @is_my_our_state{@_} = (1) x scalar(@_);
+        # '>'
+        error_if_expecting_TERM()
+          if ( $expecting == TERM );
+        return;
+    }
  
-    # These keywords may introduce blocks after parenthesized expressions,
-    # in the form:
-    # keyword ( .... ) { BLOCK }
-    # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
-    my %is_blocktype_with_paren;
-    @_ =
-      qw(if elsif unless while until for foreach switch case given when catch);
-    @is_blocktype_with_paren{@_} = (1) x scalar(@_);
+    sub do_VERTICAL_LINE {
  
-    my %is_case_default;
-    @_ = qw(case default);
-    @is_case_default{@_} = (1) x scalar(@_);
+        # '|'
+        error_if_expecting_TERM()
+          if ( $expecting == TERM );
+        return;
+    }
  
-    # ------------------------------------------------------------
-    # begin hash of code for handling most token types
-    # ------------------------------------------------------------
-    my $tokenization_code = {
+    sub do_DOLLAR_SIGN {
  
-        # no special code for these types yet, but syntax checks
-        # could be added
-
-##      '!'   => undef,
-##      '!='  => undef,
-##      '!~'  => undef,
-##      '%='  => undef,
-##      '&&=' => undef,
-##      '&='  => undef,
-##      '+='  => undef,
-##      '-='  => undef,
-##      '..'  => undef,
-##      '..'  => undef,
-##      '...' => undef,
-##      '.='  => undef,
-##      '<<=' => undef,
-##      '<='  => undef,
-##      '<=>' => undef,
-##      '<>'  => undef,
-##      '='   => undef,
-##      '=='  => undef,
-##      '=~'  => undef,
-##      '>='  => undef,
-##      '>>'  => undef,
-##      '>>=' => undef,
-##      '\\'  => undef,
-##      '^='  => undef,
-##      '|='  => undef,
-##      '||=' => undef,
-##      '//=' => undef,
-##      '~'   => undef,
-##      '~~'  => undef,
-##      '!~~'  => undef,
-
-        '>' => sub {
-            error_if_expecting_TERM()
-              if ( $expecting == TERM );
-        },
-        '|' => sub {
-            error_if_expecting_TERM()
-              if ( $expecting == TERM );
-        },
-        '$' => sub {
-
-            # start looking for a scalar
-            error_if_expecting_OPERATOR("Scalar")
-              if ( $expecting == OPERATOR );
-            scan_identifier_fast();
+        # '$'
+        # start looking for a scalar
+        error_if_expecting_OPERATOR("Scalar")
+          if ( $expecting == OPERATOR );
+        scan_simple_identifier();
  
-            if ( $identifier eq '$^W' ) {
-                $tokenizer_self->[_saw_perl_dash_w_] = 1;
-            }
+        if ( $identifier eq '$^W' ) {
+            $tokenizer_self->[_saw_perl_dash_w_] = 1;
+        }
  
-            # Check for identifier in indirect object slot
-            # (vorboard.pl, sort.t).  Something like:
-            #   /^(print|printf|sort|exec|system)$/
-            if (
-                $is_indirect_object_taker{$last_nonblank_token}
-                || ( ( $last_nonblank_token eq '(' )
-                    && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
-                || (   $last_nonblank_type eq 'w'
-                    || $last_nonblank_type eq 'U' )    # possible object
-              )
-            {
-                $type = 'Z';
-            }
-        },
-        '(' => sub {
+        # Check for identifier in indirect object slot
+        # (vorboard.pl, sort.t).  Something like:
+        #   /^(print|printf|sort|exec|system)$/
+        if (
+               $is_indirect_object_taker{$last_nonblank_token}
+            && $last_nonblank_type eq 'k'
+            || ( ( $last_nonblank_token eq '(' )
+                && $is_indirect_object_taker{ $paren_type[$paren_depth] } )
+            || (   $last_nonblank_type eq 'w'
+                || $last_nonblank_type eq 'U' )    # possible object
+          )
+        {
  
-            ++$paren_depth;
-            $paren_semicolon_count[$paren_depth] = 0;
-            if ($want_paren) {
-                $container_type = $want_paren;
-                $want_paren     = "";
-            }
-            elsif ( $statement_type =~ /^sub\b/ ) {
-                $container_type = $statement_type;
-            }
-            else {
-                $container_type = $last_nonblank_token;
+            # An identifier followed by '->' is not indirect object;
+            # fixes b1175, b1176
+            my ( $next_nonblank_type, $i_next ) =
+              find_next_noncomment_type( $i, $rtokens, $max_token_index );
+            $type = 'Z' if ( $next_nonblank_type ne '->' );
+        }
+        return;
+    } ## end sub do_DOLLAR_SIGN
  
-                # We can check for a syntax error here of unexpected '(',
-                # but this is going to get messy...
-                if (
-                    $expecting == OPERATOR
+    sub do_LEFT_PARENTHESIS {
  
-                    # Be sure this is not a method call of the form
-                    # &method(...), $method->(..), &{method}(...),
-                    # $ref[2](list) is ok & short for $ref[2]->(list)
-                    # NOTE: at present, braces in something like &{ xxx }
-                    # are not marked as a block, we might have a method call.
-                    # Added ')' to fix case c017, something like ()()()
-                    && $last_nonblank_token !~ /^([\]\}\)\&]|\-\>)/
+        # '('
+        ++$paren_depth;
+        $paren_semicolon_count[$paren_depth] = 0;
+        if ($want_paren) {
+            $container_type = $want_paren;
+            $want_paren     = EMPTY_STRING;
+        }
+        elsif ( $statement_type =~ /^sub\b/ ) {
+            $container_type = $statement_type;
+        }
+        else {
+            $container_type = $last_nonblank_token;
  
-                  )
-                {
+            # We can check for a syntax error here of unexpected '(',
+            # but this is going to get messy...
+            if (
+                $expecting == OPERATOR
+
+                # Be sure this is not a method call of the form
+                # &method(...), $method->(..), &{method}(...),
+                # $ref[2](list) is ok & short for $ref[2]->(list)
+                # NOTE: at present, braces in something like &{ xxx }
+                # are not marked as a block, we might have a method call.
+                # Added ')' to fix case c017, something like ()()()
+                && $last_nonblank_token !~ /^([\]\}\)\&]|\-\>)/
+              )
+            {
  
-                    # ref: camel 3 p 703.
-                    if ( $last_last_nonblank_token eq 'do' ) {
-                        complain(
+                # ref: camel 3 p 703.
+                if ( $last_last_nonblank_token eq 'do' ) {
+                    complain(
  "do SUBROUTINE is deprecated; consider & or -> notation\n"
-                        );
-                    }
-                    else {
+                    );
+                }
+                else {
  
-                        # if this is an empty list, (), then it is not an
-                        # error; for example, we might have a constant pi and
-                        # invoke it with pi() or just pi;
-                        my ( $next_nonblank_token, $i_next ) =
-                          find_next_nonblank_token( $i, $rtokens,
-                            $max_token_index );
+                    # if this is an empty list, (), then it is not an
+                    # error; for example, we might have a constant pi and
+                    # invoke it with pi() or just pi;
+                    my ( $next_nonblank_token, $i_next ) =
+                      find_next_nonblank_token( $i, $rtokens,
+                        $max_token_index );
  
-                        # Patch for c029: give up error check if
-                        # a side comment follows
-                        if (   $next_nonblank_token ne ')'
-                            && $next_nonblank_token ne '#' )
-                        {
-                            my $hint;
+                    # Patch for c029: give up error check if
+                    # a side comment follows
+                    if (   $next_nonblank_token ne ')'
+                        && $next_nonblank_token ne '#' )
+                    {
+                        my $hint;
  
-                            error_if_expecting_OPERATOR('(');
+                        error_if_expecting_OPERATOR('(');
  
-                            if ( $last_nonblank_type eq 'C' ) {
+                        if ( $last_nonblank_type eq 'C' ) {
+                            $hint =
+                              "$last_nonblank_token has a void prototype\n";
+                        }
+                        elsif ( $last_nonblank_type eq 'i' ) {
+                            if (   $i_tok > 0
+                                && $last_nonblank_token =~ /^\$/ )
+                            {
                                  $hint =
-                                  "$last_nonblank_token has a void prototype\n";
-                            }
-                            elsif ( $last_nonblank_type eq 'i' ) {
-                                if (   $i_tok > 0
-                                    && $last_nonblank_token =~ /^\$/ )
-                                {
-                                    $hint =
-"Do you mean '$last_nonblank_token->(' ?\n";
-                                }
-                            }
-                            if ($hint) {
-                                interrupt_logfile();
-                                warning($hint);
-                                resume_logfile();
+                                  "Do you mean '$last_nonblank_token->(' ?\n";
                              }
-                        } ## end if ( $next_nonblank_token...
-                    } ## end else [ if ( $last_last_nonblank_token...
-                } ## end if ( $expecting == OPERATOR...
-            }
-            $paren_type[$paren_depth] = $container_type;
-            ( $type_sequence, $indent_flag ) =
-              increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
-
-            # propagate types down through nested parens
-            # for example: the second paren in 'if ((' would be structural
-            # since the first is.
-
-            if ( $last_nonblank_token eq '(' ) {
-                $type = $last_nonblank_type;
+                        }
+                        if ($hint) {
+                            interrupt_logfile();
+                            warning($hint);
+                            resume_logfile();
+                        }
+                    } ## end if ( $next_nonblank_token...
+                } ## end else [ if ( $last_last_nonblank_token...
+            } ## end if ( $expecting == OPERATOR...
+        }
+
+        # Do not update container type at ') ('; fix for git #105.  This will
+        # propagate the container type onward so that any subsequent brace gets
+        # correctly marked.  I have implemented this as a general rule, which
+        # should be safe, but if necessary it could be restricted to certain
+        # container statement types such as 'for'.
+        $paren_type[$paren_depth] = $container_type
+          if ( $last_nonblank_token ne ')' );
+
+        ( $type_sequence, $indent_flag ) =
+          increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
+
+        # propagate types down through nested parens
+        # for example: the second paren in 'if ((' would be structural
+        # since the first is.
+
+        if ( $last_nonblank_token eq '(' ) {
+            $type = $last_nonblank_type;
+        }
+
+        #     We exclude parens as structural after a ',' because it
+        #     causes subtle problems with continuation indentation for
+        #     something like this, where the first 'or' will not get
+        #     indented.
+        #
+        #         assert(
+        #             __LINE__,
+        #             ( not defined $check )
+        #               or ref $check
+        #               or $check eq "new"
+        #               or $check eq "old",
+        #         );
+        #
+        #     Likewise, we exclude parens where a statement can start
+        #     because of problems with continuation indentation, like
+        #     these:
+        #
+        #         ($firstline =~ /^#\!.*perl/)
+        #         and (print $File::Find::name, "\n")
+        #           and (return 1);
+        #
+        #         (ref($usage_fref) =~ /CODE/)
+        #         ? &$usage_fref
+        #           : (&blast_usage, &blast_params, &blast_general_params);
+
+        else {
+            $type = '{';
+        }
+
+        if ( $last_nonblank_type eq ')' ) {
+            warning(
+                "Syntax error? found token '$last_nonblank_type' then '('\n");
+        }
+        $paren_structural_type[$paren_depth] = $type;
+        return;
+
+    } ## end sub do_LEFT_PARENTHESIS
+
+    sub do_RIGHT_PARENTHESIS {
+
+        # ')'
+        ( $type_sequence, $indent_flag ) =
+          decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
+
+        if ( $paren_structural_type[$paren_depth] eq '{' ) {
+            $type = '}';
+        }
+
+        $container_type = $paren_type[$paren_depth];
+
+        # restore statement type as 'sub' at closing paren of a signature
+        # so that a subsequent ':' is identified as an attribute
+        if ( $container_type =~ /^sub\b/ ) {
+            $statement_type = $container_type;
+        }
+
+        #    /^(for|foreach)$/
+        if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
+            my $num_sc = $paren_semicolon_count[$paren_depth];
+            if ( $num_sc > 0 && $num_sc != 2 ) {
+                warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
              }
+        }
  
-            #     We exclude parens as structural after a ',' because it
-            #     causes subtle problems with continuation indentation for
-            #     something like this, where the first 'or' will not get
-            #     indented.
-            #
-            #         assert(
-            #             __LINE__,
-            #             ( not defined $check )
-            #               or ref $check
-            #               or $check eq "new"
-            #               or $check eq "old",
-            #         );
-            #
-            #     Likewise, we exclude parens where a statement can start
-            #     because of problems with continuation indentation, like
-            #     these:
-            #
-            #         ($firstline =~ /^#\!.*perl/)
-            #         and (print $File::Find::name, "\n")
-            #           and (return 1);
+        if ( $paren_depth > 0 ) { $paren_depth-- }
+        return;
+    } ## end sub do_RIGHT_PARENTHESIS
+
+    sub do_COMMA {
+
+        # ','
+        if ( $last_nonblank_type eq ',' ) {
+            complain("Repeated ','s \n");
+        }
+
+        # Note that we have to check both token and type here because a
+        # comma following a qw list can have last token='(' but type = 'q'
+        elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' ) {
+            warning("Unexpected leading ',' after a '('\n");
+        }
+
+        # patch for operator_expected: note if we are in the list (use.t)
+        if ( $statement_type eq 'use' ) { $statement_type = '_use' }
+        return;
+
+    } ## end sub do_COMMA
+
+    sub do_SEMICOLON {
+
+        # ';'
+        $context        = UNKNOWN_CONTEXT;
+        $statement_type = EMPTY_STRING;
+        $want_paren     = EMPTY_STRING;
+
+        #    /^(for|foreach)$/
+        if ( $is_for_foreach{ $paren_type[$paren_depth] } )
+        {    # mark ; in for loop
+
+            # Be careful: we do not want a semicolon such as the
+            # following to be included:
              #
-            #         (ref($usage_fref) =~ /CODE/)
-            #         ? &$usage_fref
-            #           : (&blast_usage, &blast_params, &blast_general_params);
+            #    for (sort {strcoll($a,$b);} keys %investments) {
+
+            if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
+                && $square_bracket_depth ==
+                $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
+            {
+
+                $type = 'f';
+                $paren_semicolon_count[$paren_depth]++;
+            }
+        }
+        return;
+    } ## end sub do_SEMICOLON
+
+    sub do_QUOTATION_MARK {
+
+        # '"'
+        error_if_expecting_OPERATOR("String")
+          if ( $expecting == OPERATOR );
+        $in_quote                = 1;
+        $type                    = 'Q';
+        $allowed_quote_modifiers = EMPTY_STRING;
+        return;
+    } ## end sub do_QUOTATION_MARK
+
+    sub do_APOSTROPHE {
+
+        # "'"
+        error_if_expecting_OPERATOR("String")
+          if ( $expecting == OPERATOR );
+        $in_quote                = 1;
+        $type                    = 'Q';
+        $allowed_quote_modifiers = EMPTY_STRING;
+        return;
+    } ## end sub do_APOSTROPHE
+
+    sub do_BACKTICK {
+
+        # '`'
+        error_if_expecting_OPERATOR("String")
+          if ( $expecting == OPERATOR );
+        $in_quote                = 1;
+        $type                    = 'Q';
+        $allowed_quote_modifiers = EMPTY_STRING;
+        return;
+    } ## end sub do_BACKTICK
+
+    sub do_SLASH {
+
+        # '/'
+        my $is_pattern;
+
+        # a pattern cannot follow certain keywords which take optional
+        # arguments, like 'shift' and 'pop'. See also '?'.
+        if (
+            $last_nonblank_type eq 'k'
+            && $is_keyword_rejecting_slash_as_pattern_delimiter{
+                $last_nonblank_token}
+          )
+        {
+            $is_pattern = 0;
+        }
+        elsif ( $expecting == UNKNOWN ) {    # indeterminate, must guess..
+            my $msg;
+            ( $is_pattern, $msg ) =
+              guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
+                $max_token_index );
+
+            if ($msg) {
+                write_diagnostics("DIVIDE:$msg\n");
+                write_logfile_entry($msg);
+            }
+        }
+        else { $is_pattern = ( $expecting == TERM ) }
+
+        if ($is_pattern) {
+            $in_quote                = 1;
+            $type                    = 'Q';
+            $allowed_quote_modifiers = '[msixpodualngc]';
+        }
+        else {    # not a pattern; check for a /= token
+
+            if ( $rtokens->[ $i + 1 ] eq '=' ) {    # form token /=
+                $i++;
+                $tok  = '/=';
+                $type = $tok;
+            }
+
+           #DEBUG - collecting info on what tokens follow a divide
+           # for development of guessing algorithm
+           #if ( is_possible_numerator( $i, $rtokens, $max_token_index ) < 0 ) {
+           #    #write_diagnostics( "DIVIDE? $input_line\n" );
+           #}
+        }
+        return;
+    } ## end sub do_SLASH
+
+    sub do_LEFT_CURLY_BRACKET {
+
+        # '{'
+        # if we just saw a ')', we will label this block with
+        # its type.  We need to do this to allow sub
+        # code_block_type to determine if this brace starts a
+        # code block or anonymous hash.  (The type of a paren
+        # pair is the preceding token, such as 'if', 'else',
+        # etc).
+        $container_type = EMPTY_STRING;
+
+        # ATTRS: for a '{' following an attribute list, reset
+        # things to look like we just saw the sub name
+        if ( $statement_type =~ /^sub\b/ ) {
+            $last_nonblank_token = $statement_type;
+            $last_nonblank_type  = 'i';
+            $statement_type      = EMPTY_STRING;
+        }
+
+        # patch for SWITCH/CASE: hide these keywords from an immediately
+        # following opening brace
+        elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
+            && $statement_type eq $last_nonblank_token )
+        {
+            $last_nonblank_token = ";";
+        }
+
+        elsif ( $last_nonblank_token eq ')' ) {
+            $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
+
+            # defensive move in case of a nesting error (pbug.t)
+            # in which this ')' had no previous '('
+            # this nesting error will have been caught
+            if ( !defined($last_nonblank_token) ) {
+                $last_nonblank_token = 'if';
+            }
+
+            # check for syntax error here;
+            unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
+                if ( $tokenizer_self->[_extended_syntax_] ) {
+
+                    # we append a trailing () to mark this as an unknown
+                    # block type.  This allows perltidy to format some
+                    # common extensions of perl syntax.
+                    # This is used by sub code_block_type
+                    $last_nonblank_token .= '()';
+                }
+                else {
+                    my $list =
+                      join( SPACE, sort keys %is_blocktype_with_paren );
+                    warning(
+"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
+                    );
+                }
+            }
+        }
+
+        # patch for paren-less for/foreach glitch, part 2.
+        # see note below under 'qw'
+        elsif ($last_nonblank_token eq 'qw'
+            && $is_for_foreach{$want_paren} )
+        {
+            $last_nonblank_token = $want_paren;
+            if ( $last_last_nonblank_token eq $want_paren ) {
+                warning(
+"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
+                );
+
+            }
+            $want_paren = EMPTY_STRING;
+        }
+
+        # now identify which of the three possible types of
+        # curly braces we have: hash index container, anonymous
+        # hash reference, or code block.
+
+        # non-structural (hash index) curly brace pair
+        # get marked 'L' and 'R'
+        if ( is_non_structural_brace() ) {
+            $type = 'L';
+
+            # patch for SWITCH/CASE:
+            # allow paren-less identifier after 'when'
+            # if the brace is preceded by a space
+            if (   $statement_type eq 'when'
+                && $last_nonblank_type eq 'i'
+                && $last_last_nonblank_type eq 'k'
+                && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
+            {
+                $type       = '{';
+                $block_type = $statement_type;
+            }
+        }
+
+        # code and anonymous hash have the same type, '{', but are
+        # distinguished by 'block_type',
+        # which will be blank for an anonymous hash
+        else {
+
+            $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
+                $max_token_index );
+
+            # patch to promote bareword type to function taking block
+            if (   $block_type
+                && $last_nonblank_type eq 'w'
+                && $last_nonblank_i >= 0 )
+            {
+                if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
+                    $routput_token_type->[$last_nonblank_i] =
+                      $is_grep_alias{$block_type} ? 'k' : 'G';
+                }
+            }
+
+            # patch for SWITCH/CASE: if we find a stray opening block brace
+            # where we might accept a 'case' or 'when' block, then take it
+            if (   $statement_type eq 'case'
+                || $statement_type eq 'when' )
+            {
+                if ( !$block_type || $block_type eq '}' ) {
+                    $block_type = $statement_type;
+                }
+            }
+        }
+
+        $brace_type[ ++$brace_depth ]        = $block_type;
+        $brace_package[$brace_depth]         = $current_package;
+        $brace_structural_type[$brace_depth] = $type;
+        $brace_context[$brace_depth]         = $context;
+        ( $type_sequence, $indent_flag ) =
+          increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
+        return;
+    } ## end sub do_LEFT_CURLY_BRACKET
+
+    sub do_RIGHT_CURLY_BRACKET {
+
+        # '}'
+        $block_type = $brace_type[$brace_depth];
+        if ($block_type) { $statement_type = EMPTY_STRING }
+        if ( defined( $brace_package[$brace_depth] ) ) {
+            $current_package = $brace_package[$brace_depth];
+        }
+
+        # can happen on brace error (caught elsewhere)
+        else {
+        }
+        ( $type_sequence, $indent_flag ) =
+          decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
+
+        if ( $brace_structural_type[$brace_depth] eq 'L' ) {
+            $type = 'R';
+        }
+
+        # propagate type information for 'do' and 'eval' blocks, and also
+        # for smartmatch operator.  This is necessary to enable us to know
+        # if an operator or term is expected next.
+        if ( $is_block_operator{$block_type} ) {
+            $tok = $block_type;
+        }
+
+        $context = $brace_context[$brace_depth];
+        if ( $brace_depth > 0 ) { $brace_depth--; }
+        return;
+    } ## end sub do_RIGHT_CURLY_BRACKET
+
+    sub do_AMPERSAND {
+
+        # '&' = maybe sub call? start looking
+        # We have to check for sub call unless we are sure we
+        # are expecting an operator.  This example from s2p
+        # got mistaken as a q operator in an early version:
+        #   print BODY &q(<<'EOT');
+        if ( $expecting != OPERATOR ) {
+
+            # But only look for a sub call if we are expecting a term or
+            # if there is no existing space after the &.
+            # For example we probably don't want & as sub call here:
+            #    Fcntl::S_IRUSR & $mode;
+            if ( $expecting == TERM || $next_type ne 'b' ) {
+                scan_simple_identifier();
+            }
+        }
+        else {
+        }
+        return;
+    } ## end sub do_AMPERSAND
+
+    sub do_LESS_THAN_SIGN {
+
+        # '<' - angle operator or less than?
+        if ( $expecting != OPERATOR ) {
+            ( $i, $type ) =
+              find_angle_operator_termination( $input_line, $i, $rtoken_map,
+                $expecting, $max_token_index );
+
+            ##  This message is not very helpful and quite confusing if the above
+            ##  routine decided not to write a message with the line number.
+            ##  if ( $type eq '<' && $expecting == TERM ) {
+            ##      error_if_expecting_TERM();
+            ##      interrupt_logfile();
+            ##      warning("Unterminated <> operator?\n");
+            ##      resume_logfile();
+            ##  }
+
+        }
+        else {
+        }
+        return;
+    } ## end sub do_LESS_THAN_SIGN
+
+    sub do_QUESTION_MARK {
+
+        # '?' = conditional or starting pattern?
+        my $is_pattern;
+
+        # Patch for rt #126965
+        # a pattern cannot follow certain keywords which take optional
+        # arguments, like 'shift' and 'pop'. See also '/'.
+        if (
+            $last_nonblank_type eq 'k'
+            && $is_keyword_rejecting_question_as_pattern_delimiter{
+                $last_nonblank_token}
+          )
+        {
+            $is_pattern = 0;
+        }
+
+        # patch for RT#131288, user constant function without prototype
+        # last type is 'U' followed by ?.
+        elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
+            $is_pattern = 0;
+        }
+        elsif ( $expecting == UNKNOWN ) {
+
+            # In older versions of Perl, a bare ? can be a pattern
+            # delimiter.  In perl version 5.22 this was
+            # dropped, but we have to support it in order to format
+            # older programs. See:
+            ## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
+            # For example, the following line worked
+            # at one time:
+            #      ?(.*)? && (print $1,"\n");
+            # In current versions it would have to be written with slashes:
+            #      /(.*)/ && (print $1,"\n");
+            my $msg;
+            ( $is_pattern, $msg ) =
+              guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
+                $max_token_index );
+
+            if ($msg) { write_logfile_entry($msg) }
+        }
+        else { $is_pattern = ( $expecting == TERM ) }
+
+        if ($is_pattern) {
+            $in_quote                = 1;
+            $type                    = 'Q';
+            $allowed_quote_modifiers = '[msixpodualngc]';
+        }
+        else {
+            ( $type_sequence, $indent_flag ) =
+              increase_nesting_depth( QUESTION_COLON, $rtoken_map->[$i_tok] );
+        }
+        return;
+    } ## end sub do_QUESTION_MARK
+
+    sub do_STAR {
+
+        # '*' = typeglob, or multiply?
+        if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
+            if (   $next_type ne 'b'
+                && $next_type ne '('
+                && $next_type ne '#' )    # Fix c036
+            {
+                $expecting = TERM;
+            }
+        }
+        if ( $expecting == TERM ) {
+            scan_simple_identifier();
+        }
+        else {
+
+            if ( $rtokens->[ $i + 1 ] eq '=' ) {
+                $tok  = '*=';
+                $type = $tok;
+                $i++;
+            }
+            elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
+                $tok  = '**';
+                $type = $tok;
+                $i++;
+                if ( $rtokens->[ $i + 1 ] eq '=' ) {
+                    $tok  = '**=';
+                    $type = $tok;
+                    $i++;
+                }
+            }
+        }
+        return;
+    } ## end sub do_STAR
+
+    sub do_DOT {
+
+        # '.' =  what kind of . ?
+        if ( $expecting != OPERATOR ) {
+            scan_number();
+            if ( $type eq '.' ) {
+                error_if_expecting_TERM()
+                  if ( $expecting == TERM );
+            }
+        }
+        else {
+        }
+        return;
+    } ## end sub do_DOT
+
+    sub do_COLON {
+
+        # ':' = label, ternary, attribute, ?
+
+        # if this is the first nonblank character, call it a label
+        # since perl seems to just swallow it
+        if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
+            $type = 'J';
+        }
+
+        # ATTRS: check for a ':' which introduces an attribute list
+        # either after a 'sub' keyword or within a paren list
+        elsif ( $statement_type =~ /^sub\b/ ) {
+            $type              = 'A';
+            $in_attribute_list = 1;
+        }
+
+        # Within a signature, unless we are in a ternary.  For example,
+        # from 't/filter_example.t':
+        #    method foo4 ( $class: $bar ) { $class->bar($bar) }
+        elsif ( $paren_type[$paren_depth] =~ /^sub\b/
+            && !is_balanced_closing_container(QUESTION_COLON) )
+        {
+            $type              = 'A';
+            $in_attribute_list = 1;
+        }
+
+        # check for scalar attribute, such as
+        # my $foo : shared = 1;
+        elsif ($is_my_our_state{$statement_type}
+            && $current_depth[QUESTION_COLON] == 0 )
+        {
+            $type              = 'A';
+            $in_attribute_list = 1;
+        }
+
+        # Look for Switch::Plain syntax if an error would otherwise occur
+        # here. Note that we do not need to check if the extended syntax
+        # flag is set because otherwise an error would occur, and we would
+        # then have to output a message telling the user to set the
+        # extended syntax flag to avoid the error.
+        #  case 1: {
+        #  default: {
+        #  default:
+        # Note that the line 'default:' will be parsed as a label elsewhere.
+        elsif ( $is_case_default{$statement_type}
+            && !is_balanced_closing_container(QUESTION_COLON) )
+        {
+            # mark it as a perltidy label type
+            $type = 'J';
+        }
+
+        # otherwise, it should be part of a ?/: operator
+        else {
+            ( $type_sequence, $indent_flag ) =
+              decrease_nesting_depth( QUESTION_COLON, $rtoken_map->[$i_tok] );
+            if ( $last_nonblank_token eq '?' ) {
+                warning("Syntax error near ? :\n");
+            }
+        }
+        return;
+    } ## end sub do_COLON
+
+    sub do_PLUS_SIGN {
+
+        # '+' = what kind of plus?
+        if ( $expecting == TERM ) {
+            my $number = scan_number_fast();
+
+            # unary plus is safest assumption if not a number
+            if ( !defined($number) ) { $type = 'p'; }
+        }
+        elsif ( $expecting == OPERATOR ) {
+        }
+        else {
+            if ( $next_type eq 'w' ) { $type = 'p' }
+        }
+        return;
+    } ## end sub do_PLUS_SIGN
+
+    sub do_AT_SIGN {
+
+        # '@' = sigil for array?
+        error_if_expecting_OPERATOR("Array")
+          if ( $expecting == OPERATOR );
+        scan_simple_identifier();
+        return;
+    }
+
+    sub do_PERCENT_SIGN {
+
+        # '%' = hash or modulo?
+        # first guess is hash if no following blank or paren
+        if ( $expecting == UNKNOWN ) {
+            if ( $next_type ne 'b' && $next_type ne '(' ) {
+                $expecting = TERM;
+            }
+        }
+        if ( $expecting == TERM ) {
+            scan_simple_identifier();
+        }
+        return;
+    } ## end sub do_PERCENT_SIGN
+
+    sub do_LEFT_SQUARE_BRACKET {
+
+        # '['
+        $square_bracket_type[ ++$square_bracket_depth ] = $last_nonblank_token;
+        ( $type_sequence, $indent_flag ) =
+          increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
+
+        # It may seem odd, but structural square brackets have
+        # type '{' and '}'.  This simplifies the indentation logic.
+        if ( !is_non_structural_brace() ) {
+            $type = '{';
+        }
+        $square_bracket_structural_type[$square_bracket_depth] = $type;
+        return;
+    } ## end sub do_LEFT_SQUARE_BRACKET
+
+    sub do_RIGHT_SQUARE_BRACKET {
+
+        # ']'
+        ( $type_sequence, $indent_flag ) =
+          decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
+
+        if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' ) {
+            $type = '}';
+        }
+
+        # propagate type information for smartmatch operator.  This is
+        # necessary to enable us to know if an operator or term is expected
+        # next.
+        if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
+            $tok = $square_bracket_type[$square_bracket_depth];
+        }
+
+        if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
+        return;
+    } ## end sub do_RIGHT_SQUARE_BRACKET
+
+    sub do_MINUS_SIGN {
+
+        # '-' = what kind of minus?
+        if ( ( $expecting != OPERATOR )
+            && $is_file_test_operator{$next_tok} )
+        {
+            my ( $next_nonblank_token, $i_next ) =
+              find_next_nonblank_token( $i + 1, $rtokens, $max_token_index );
+
+            # check for a quoted word like "-w=>xx";
+            # it is sufficient to just check for a following '='
+            if ( $next_nonblank_token eq '=' ) {
+                $type = 'm';
+            }
+            else {
+                $i++;
+                $tok .= $next_tok;
+                $type = 'F';
+            }
+        }
+        elsif ( $expecting == TERM ) {
+            my $number = scan_number_fast();
+
+            # maybe part of bareword token? unary is safest
+            if ( !defined($number) ) { $type = 'm'; }
+
+        }
+        elsif ( $expecting == OPERATOR ) {
+        }
+        else {
+
+            if ( $next_type eq 'w' ) {
+                $type = 'm';
+            }
+        }
+        return;
+    } ## end sub do_MINUS_SIGN
+
+    sub do_CARAT_SIGN {
+
+        # '^'
+        # check for special variables like ${^WARNING_BITS}
+        if ( $expecting == TERM ) {
+
+            if (   $last_nonblank_token eq '{'
+                && ( $next_tok !~ /^\d/ )
+                && ( $next_tok =~ /^\w/ ) )
+            {
+
+                if ( $next_tok eq 'W' ) {
+                    $tokenizer_self->[_saw_perl_dash_w_] = 1;
+                }
+                $tok  = $tok . $next_tok;
+                $i    = $i + 1;
+                $type = 'w';
+
+                # Optional coding to try to catch syntax errors. This can
+                # be removed if it ever causes incorrect warning messages.
+                # The '{^' should be preceded by either by a type or '$#'
+                # Examples:
+                #   $#{^CAPTURE}       ok
+                #   *${^LAST_FH}{NAME} ok
+                #   @{^HOWDY}          ok
+                #   $hash{^HOWDY}      error
+
+                # Note that a type sigil '$' may be tokenized as 'Z'
+                # after something like 'print', so allow type 'Z'
+                if (   $last_last_nonblank_type ne 't'
+                    && $last_last_nonblank_type ne 'Z'
+                    && $last_last_nonblank_token ne '$#' )
+                {
+                    warning("Possible syntax error near '{^'\n");
+                }
+            }
  
              else {
-                $type = '{';
+                unless ( error_if_expecting_TERM() ) {
+
+                    # Something like this is valid but strange:
+                    # undef ^I;
+                    complain("The '^' seems unusual here\n");
+                }
+            }
+        }
+        return;
+    } ## end sub do_CARAT_SIGN
+
+    sub do_DOUBLE_COLON {
+
+        #  '::' = probably a sub call
+        scan_bare_identifier();
+        return;
+    }
+
+    sub do_LEFT_SHIFT {
+
+        # '<<' = maybe a here-doc?
+
+##      This check removed because it could be a deprecated here-doc with
+##      no specified target.  See example in log 16 Sep 2020.
+##            return
+##              unless ( $i < $max_token_index )
+##              ;          # here-doc not possible if end of line
+
+        if ( $expecting != OPERATOR ) {
+            my ( $found_target, $here_doc_target, $here_quote_character,
+                $saw_error );
+            (
+                $found_target, $here_doc_target, $here_quote_character, $i,
+                $saw_error
+              )
+              = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
+                $max_token_index );
+
+            if ($found_target) {
+                push @{$rhere_target_list},
+                  [ $here_doc_target, $here_quote_character ];
+                $type = 'h';
+                if ( length($here_doc_target) > 80 ) {
+                    my $truncated = substr( $here_doc_target, 0, 80 );
+                    complain("Long here-target: '$truncated' ...\n");
+                }
+                elsif ( !$here_doc_target ) {
+                    warning(
+                        'Use of bare << to mean <<"" is deprecated' . "\n" )
+                      unless ($here_quote_character);
+                }
+                elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
+                    complain(
+                        "Unconventional here-target: '$here_doc_target'\n");
+                }
+            }
+            elsif ( $expecting == TERM ) {
+                unless ($saw_error) {
+
+                    # shouldn't happen..arriving here implies an error in
+                    # the logic in sub 'find_here_doc'
+                    if (DEVEL_MODE) {
+                        Fault(<<EOM);
+Program bug; didn't find here doc target
+EOM
+                    }
+                    warning(
+                        "Possible program error: didn't find here doc target\n"
+                    );
+                    report_definite_bug();
+                }
              }
+        }
+        else {
+        }
+        return;
+    } ## end sub do_LEFT_SHIFT
  
-            if ( $last_nonblank_type eq ')' ) {
-                warning(
-                    "Syntax error? found token '$last_nonblank_type' then '('\n"
-                );
-            }
-            $paren_structural_type[$paren_depth] = $type;
+    sub do_NEW_HERE_DOC {
  
-        },
-        ')' => sub {
-            ( $type_sequence, $indent_flag ) =
-              decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
+        # '<<~' = a here-doc, new type added in v26
+        return
+          unless ( $i < $max_token_index )
+          ;    # here-doc not possible if end of line
+        if ( $expecting != OPERATOR ) {
+            my ( $found_target, $here_doc_target, $here_quote_character,
+                $saw_error );
+            (
+                $found_target, $here_doc_target, $here_quote_character, $i,
+                $saw_error
+              )
+              = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
+                $max_token_index );
  
-            if ( $paren_structural_type[$paren_depth] eq '{' ) {
-                $type = '}';
-            }
+            if ($found_target) {
  
-            $container_type = $paren_type[$paren_depth];
+                if ( length($here_doc_target) > 80 ) {
+                    my $truncated = substr( $here_doc_target, 0, 80 );
+                    complain("Long here-target: '$truncated' ...\n");
+                }
+                elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
+                    complain(
+                        "Unconventional here-target: '$here_doc_target'\n");
+                }
  
-            # restore statement type as 'sub' at closing paren of a signature
-            # so that a subsequent ':' is identified as an attribute
-            if ( $container_type =~ /^sub\b/ ) {
-                $statement_type = $container_type;
+                # Note that we put a leading space on the here quote
+                # character indicate that it may be preceded by spaces
+                $here_quote_character = SPACE . $here_quote_character;
+                push @{$rhere_target_list},
+                  [ $here_doc_target, $here_quote_character ];
+                $type = 'h';
              }
+            elsif ( $expecting == TERM ) {
+                unless ($saw_error) {
  
-            #    /^(for|foreach)$/
-            if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
-                my $num_sc = $paren_semicolon_count[$paren_depth];
-                if ( $num_sc > 0 && $num_sc != 2 ) {
-                    warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
+                    # shouldn't happen..arriving here implies an error in
+                    # the logic in sub 'find_here_doc'
+                    if (DEVEL_MODE) {
+                        Fault(<<EOM);
+Program bug; didn't find here doc target
+EOM
+                    }
+                    warning(
+                        "Possible program error: didn't find here doc target\n"
+                    );
+                    report_definite_bug();
                  }
              }
+        }
+        else {
+            error_if_expecting_OPERATOR();
+        }
+        return;
+    } ## end sub do_NEW_HERE_DOC
  
-            if ( $paren_depth > 0 ) { $paren_depth-- }
-        },
-        ',' => sub {
-            if ( $last_nonblank_type eq ',' ) {
-                complain("Repeated ','s \n");
-            }
+    sub do_POINTER {
  
-            # Note that we have to check both token and type here because a
-            # comma following a qw list can have last token='(' but type = 'q'
-            elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' )
-            {
-                warning("Unexpected leading ',' after a '('\n");
+        #  '->'
+        return;
+    } ## end sub do_POINTER
+
+    sub do_PLUS_PLUS {
+
+        # '++'
+        # type = 'pp' for pre-increment, '++' for post-increment
+        if    ( $expecting == TERM ) { $type = 'pp' }
+        elsif ( $expecting == UNKNOWN ) {
+
+            my ( $next_nonblank_token, $i_next ) =
+              find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+            # Fix for c042: look past a side comment
+            if ( $next_nonblank_token eq '#' ) {
+                ( $next_nonblank_token, $i_next ) =
+                  find_next_nonblank_token( $max_token_index,
+                    $rtokens, $max_token_index );
              }
  
-            # patch for operator_expected: note if we are in the list (use.t)
-            if ( $statement_type eq 'use' ) { $statement_type = '_use' }
+            if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
+        }
+        return;
+    } ## end sub do_PLUS_PLUS
  
-        },
-        ';' => sub {
-            $context        = UNKNOWN_CONTEXT;
-            $statement_type = '';
-            $want_paren     = "";
+    sub do_FAT_COMMA {
  
-            #    /^(for|foreach)$/
-            if ( $is_for_foreach{ $paren_type[$paren_depth] } )
-            {    # mark ; in for loop
+        # '=>'
+        if ( $last_nonblank_type eq $tok ) {
+            complain("Repeated '=>'s \n");
+        }
  
-                # Be careful: we do not want a semicolon such as the
-                # following to be included:
-                #
-                #    for (sort {strcoll($a,$b);} keys %investments) {
+        # patch for operator_expected: note if we are in the list (use.t)
+        # TODO: make version numbers a new token type
+        if ( $statement_type eq 'use' ) { $statement_type = '_use' }
+        return;
+    } ## end sub do_FAT_COMMA
  
-                if (   $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
-                    && $square_bracket_depth ==
-                    $depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
-                {
+    sub do_MINUS_MINUS {
  
-                    $type = 'f';
-                    $paren_semicolon_count[$paren_depth]++;
-                }
-            }
+        # '--'
+        # type = 'mm' for pre-decrement, '--' for post-decrement
  
-        },
-        '"' => sub {
-            error_if_expecting_OPERATOR("String")
-              if ( $expecting == OPERATOR );
-            $in_quote                = 1;
-            $type                    = 'Q';
-            $allowed_quote_modifiers = "";
-        },
-        "'" => sub {
-            error_if_expecting_OPERATOR("String")
-              if ( $expecting == OPERATOR );
-            $in_quote                = 1;
-            $type                    = 'Q';
-            $allowed_quote_modifiers = "";
-        },
-        '`' => sub {
-            error_if_expecting_OPERATOR("String")
-              if ( $expecting == OPERATOR );
-            $in_quote                = 1;
-            $type                    = 'Q';
-            $allowed_quote_modifiers = "";
-        },
-        '/' => sub {
-            my $is_pattern;
+        if    ( $expecting == TERM ) { $type = 'mm' }
+        elsif ( $expecting == UNKNOWN ) {
+            my ( $next_nonblank_token, $i_next ) =
+              find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
-            # a pattern cannot follow certain keywords which take optional
-            # arguments, like 'shift' and 'pop'. See also '?'.
-            if (
-                $last_nonblank_type eq 'k'
-                && $is_keyword_rejecting_slash_as_pattern_delimiter{
-                    $last_nonblank_token}
-              )
-            {
-                $is_pattern = 0;
+            # Fix for c042: look past a side comment
+            if ( $next_nonblank_token eq '#' ) {
+                ( $next_nonblank_token, $i_next ) =
+                  find_next_nonblank_token( $max_token_index,
+                    $rtokens, $max_token_index );
              }
-            elsif ( $expecting == UNKNOWN ) {    # indeterminate, must guess..
-                my $msg;
-                ( $is_pattern, $msg ) =
-                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
-                    $max_token_index );
  
-                if ($msg) {
-                    write_diagnostics("DIVIDE:$msg\n");
-                    write_logfile_entry($msg);
-                }
-            }
-            else { $is_pattern = ( $expecting == TERM ) }
+            if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
+        }
+        return;
+    } ## end sub do_MINUS_MINUS
  
-            if ($is_pattern) {
-                $in_quote                = 1;
-                $type                    = 'Q';
-                $allowed_quote_modifiers = '[msixpodualngc]';
-            }
-            else {    # not a pattern; check for a /= token
+    sub do_LOGICAL_AND {
  
-                if ( $rtokens->[ $i + 1 ] eq '=' ) {    # form token /=
-                    $i++;
-                    $tok  = '/=';
-                    $type = $tok;
-                }
+        # '&&'
+        error_if_expecting_TERM()
+          if ( $expecting == TERM && $last_nonblank_token ne ',' );    #c015
+        return;
+    }
  
-           #DEBUG - collecting info on what tokens follow a divide
-           # for development of guessing algorithm
-           #if ( is_possible_numerator( $i, $rtokens, $max_token_index ) < 0 ) {
-           #    #write_diagnostics( "DIVIDE? $input_line\n" );
-           #}
-            }
-        },
-        '{' => sub {
-
-            # if we just saw a ')', we will label this block with
-            # its type.  We need to do this to allow sub
-            # code_block_type to determine if this brace starts a
-            # code block or anonymous hash.  (The type of a paren
-            # pair is the preceding token, such as 'if', 'else',
-            # etc).
-            $container_type = "";
-
-            # ATTRS: for a '{' following an attribute list, reset
-            # things to look like we just saw the sub name
-            if ( $statement_type =~ /^sub\b/ ) {
-                $last_nonblank_token = $statement_type;
-                $last_nonblank_type  = 'i';
-                $statement_type      = "";
-            }
-
-            # patch for SWITCH/CASE: hide these keywords from an immediately
-            # following opening brace
-            elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
-                && $statement_type eq $last_nonblank_token )
-            {
-                $last_nonblank_token = ";";
-            }
+    sub do_LOGICAL_OR {
  
-            elsif ( $last_nonblank_token eq ')' ) {
-                $last_nonblank_token = $paren_type[ $paren_depth + 1 ];
+        # '||'
+        error_if_expecting_TERM()
+          if ( $expecting == TERM && $last_nonblank_token ne ',' );    #c015
+        return;
+    }
  
-                # defensive move in case of a nesting error (pbug.t)
-                # in which this ')' had no previous '('
-                # this nesting error will have been caught
-                if ( !defined($last_nonblank_token) ) {
-                    $last_nonblank_token = 'if';
-                }
+    sub do_SLASH_SLASH {
  
-                # check for syntax error here;
-                unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
-                    if ( $tokenizer_self->[_extended_syntax_] ) {
+        # '//'
+        error_if_expecting_TERM()
+          if ( $expecting == TERM );
+        return;
+    }
  
-                        # we append a trailing () to mark this as an unknown
-                        # block type.  This allows perltidy to format some
-                        # common extensions of perl syntax.
-                        # This is used by sub code_block_type
-                        $last_nonblank_token .= '()';
-                    }
-                    else {
-                        my $list =
-                          join( ' ', sort keys %is_blocktype_with_paren );
-                        warning(
-"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
-                        );
-                    }
-                }
-            }
+    sub do_DIGITS {
  
-            # patch for paren-less for/foreach glitch, part 2.
-            # see note below under 'qw'
-            elsif ($last_nonblank_token eq 'qw'
-                && $is_for_foreach{$want_paren} )
-            {
-                $last_nonblank_token = $want_paren;
-                if ( $last_last_nonblank_token eq $want_paren ) {
-                    warning(
-"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
-                    );
+        # 'd' = string of digits
+        error_if_expecting_OPERATOR("Number")
+          if ( $expecting == OPERATOR );
  
-                }
-                $want_paren = "";
+        my $number = scan_number_fast();
+        if ( !defined($number) ) {
+
+            # shouldn't happen - we should always get a number
+            if (DEVEL_MODE) {
+                Fault(<<EOM);
+non-number beginning with digit--program bug
+EOM
              }
+            warning(
+                "Unexpected error condition: non-number beginning with digit\n"
+            );
+            report_definite_bug();
+        }
+        return;
+    } ## end sub do_DIGITS
  
-            # now identify which of the three possible types of
-            # curly braces we have: hash index container, anonymous
-            # hash reference, or code block.
+    sub do_ATTRIBUTE_LIST {
  
-            # non-structural (hash index) curly brace pair
-            # get marked 'L' and 'R'
-            if ( is_non_structural_brace() ) {
-                $type = 'L';
+        my ($next_nonblank_token) = @_;
  
-                # patch for SWITCH/CASE:
-                # allow paren-less identifier after 'when'
-                # if the brace is preceded by a space
-                if (   $statement_type eq 'when'
-                    && $last_nonblank_type eq 'i'
-                    && $last_last_nonblank_type eq 'k'
-                    && ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
-                {
-                    $type       = '{';
-                    $block_type = $statement_type;
-                }
-            }
+        # Called at a bareword encountered while in an attribute list
+        # returns 'is_attribute':
+        #    true if attribute found
+        #    false if an attribute (continue parsing bareword)
  
-            # code and anonymous hash have the same type, '{', but are
-            # distinguished by 'block_type',
-            # which will be blank for an anonymous hash
-            else {
+        # treat bare word followed by open paren like qw(
+        if ( $next_nonblank_token eq '(' ) {
  
-                $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
-                    $max_token_index );
+            # For something like:
+            #     : prototype($$)
+            # we should let do_scan_sub see it so that it can see
+            # the prototype.  All other attributes get parsed as a
+            # quoted string.
+            if ( $tok eq 'prototype' ) {
+                $id_scan_state = 'prototype';
  
-                # patch to promote bareword type to function taking block
-                if (   $block_type
-                    && $last_nonblank_type eq 'w'
-                    && $last_nonblank_i >= 0 )
-                {
-                    if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
-                        $routput_token_type->[$last_nonblank_i] = 'G';
+                # start just after the word 'prototype'
+                my $i_beg = $i + 1;
+                ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
+                    {
+                        input_line      => $input_line,
+                        i               => $i,
+                        i_beg           => $i_beg,
+                        tok             => $tok,
+                        type            => $type,
+                        rtokens         => $rtokens,
+                        rtoken_map      => $rtoken_map,
+                        id_scan_state   => $id_scan_state,
+                        max_token_index => $max_token_index,
                      }
-                }
+                );
  
-                # patch for SWITCH/CASE: if we find a stray opening block brace
-                # where we might accept a 'case' or 'when' block, then take it
-                if (   $statement_type eq 'case'
-                    || $statement_type eq 'when' )
-                {
-                    if ( !$block_type || $block_type eq '}' ) {
-                        $block_type = $statement_type;
-                    }
+                # If successful, mark as type 'q' to be consistent
+                # with other attributes.  Type 'w' would also work.
+                if ( $i > $i_beg ) {
+                    $type = 'q';
+                    return 1;
                  }
-            }
  
-            $brace_type[ ++$brace_depth ]        = $block_type;
-            $brace_package[$brace_depth]         = $current_package;
-            $brace_structural_type[$brace_depth] = $type;
-            $brace_context[$brace_depth]         = $context;
-            ( $type_sequence, $indent_flag ) =
-              increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
-        },
-        '}' => sub {
-            $block_type = $brace_type[$brace_depth];
-            if ($block_type) { $statement_type = '' }
-            if ( defined( $brace_package[$brace_depth] ) ) {
-                $current_package = $brace_package[$brace_depth];
+                # If not successful, continue and parse as a quote.
              }
  
-            # can happen on brace error (caught elsewhere)
-            else {
-            }
-            ( $type_sequence, $indent_flag ) =
-              decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
+            # All other attribute lists must be parsed as quotes
+            # (see 'signatures.t' for good examples)
+            $in_quote                = $quote_items{'q'};
+            $allowed_quote_modifiers = $quote_modifiers{'q'};
+            $type                    = 'q';
+            $quote_type              = 'q';
+            return 1;
+        }
  
-            if ( $brace_structural_type[$brace_depth] eq 'L' ) {
-                $type = 'R';
-            }
+        # handle bareword not followed by open paren
+        else {
+            $type = 'w';
+            return 1;
+        }
  
-            # propagate type information for 'do' and 'eval' blocks, and also
-            # for smartmatch operator.  This is necessary to enable us to know
-            # if an operator or term is expected next.
-            if ( $is_block_operator{$block_type} ) {
-                $tok = $block_type;
-            }
+        # attribute not found
+        return;
+    } ## end sub do_ATTRIBUTE_LIST
  
-            $context = $brace_context[$brace_depth];
-            if ( $brace_depth > 0 ) { $brace_depth--; }
-        },
-        '&' => sub {    # maybe sub call? start looking
+    sub do_QUOTED_BAREWORD {
  
-            # We have to check for sub call unless we are sure we
-            # are expecting an operator.  This example from s2p
-            # got mistaken as a q operator in an early version:
-            #   print BODY &q(<<'EOT');
-            if ( $expecting != OPERATOR ) {
+        # find type of a bareword followed by a '=>'
+        if ( $is_constant{$current_package}{$tok} ) {
+            $type = 'C';
+        }
+        elsif ( $is_user_function{$current_package}{$tok} ) {
+            $type      = 'U';
+            $prototype = $user_function_prototype{$current_package}{$tok};
+        }
+        elsif ( $tok =~ /^v\d+$/ ) {
+            $type = 'v';
+            report_v_string($tok);
+        }
+        else {
  
-                # But only look for a sub call if we are expecting a term or
-                # if there is no existing space after the &.
-                # For example we probably don't want & as sub call here:
-                #    Fcntl::S_IRUSR & $mode;
-                if ( $expecting == TERM || $next_type ne 'b' ) {
-                    scan_identifier_fast();
+            # Bareword followed by a fat comma - see 'git18.in'
+            # If tok is something like 'x17' then it could
+            # actually be operator x followed by number 17.
+            # For example, here:
+            #     123x17 => [ 792, 1224 ],
+            # (a key of 123 repeated 17 times, perhaps not
+            # what was intended). We will mark x17 as type
+            # 'n' and it will be split. If the previous token
+            # was also a bareword then it is not very clear is
+            # going on.  In this case we will not be sure that
+            # an operator is expected, so we just mark it as a
+            # bareword.  Perl is a little murky in what it does
+            # with stuff like this, and its behavior can change
+            # over time.  Something like
+            #    a x18 => [792, 1224], will compile as
+            # a key with 18 a's.  But something like
+            #    push @array, a x18;
+            # is a syntax error.
+            if (
+                   $expecting == OPERATOR
+                && substr( $tok, 0, 1 ) eq 'x'
+                && ( length($tok) == 1
+                    || substr( $tok, 1, 1 ) =~ /^\d/ )
+              )
+            {
+                $type = 'n';
+                if ( split_pretoken(1) ) {
+                    $type = 'x';
+                    $tok  = 'x';
                  }
              }
              else {
-            }
-        },
-        '<' => sub {    # angle operator or less than?
  
-            if ( $expecting != OPERATOR ) {
-                ( $i, $type ) =
-                  find_angle_operator_termination( $input_line, $i, $rtoken_map,
-                    $expecting, $max_token_index );
+                # git #18
+                $type = 'w';
+                error_if_expecting_OPERATOR();
+            }
+        }
+        return;
+    } ## end sub do_QUOTED_BAREWORD
  
-                ##  This message is not very helpful and quite confusing if the above
-                ##  routine decided not to write a message with the line number.
-                ##  if ( $type eq '<' && $expecting == TERM ) {
-                ##      error_if_expecting_TERM();
-                ##      interrupt_logfile();
-                ##      warning("Unterminated <> operator?\n");
-                ##      resume_logfile();
-                ##  }
+    sub do_X_OPERATOR {
  
+        if ( $tok eq 'x' ) {
+            if ( $rtokens->[ $i + 1 ] eq '=' ) {    # x=
+                $tok  = 'x=';
+                $type = $tok;
+                $i++;
              }
              else {
+                $type = 'x';
              }
-        },
-        '?' => sub {    # ?: conditional or starting pattern?
-
-            my $is_pattern;
+        }
+        else {
  
-            # Patch for rt #126965
-            # a pattern cannot follow certain keywords which take optional
-            # arguments, like 'shift' and 'pop'. See also '/'.
-            if (
-                $last_nonblank_type eq 'k'
-                && $is_keyword_rejecting_question_as_pattern_delimiter{
-                    $last_nonblank_token}
-              )
-            {
-                $is_pattern = 0;
+            # Split a pretoken like 'x10' into 'x' and '10'.
+            # Note: In previous versions of perltidy it was marked
+            # as a number, $type = 'n', and fixed downstream by the
+            # Formatter.
+            $type = 'n';
+            if ( split_pretoken(1) ) {
+                $type = 'x';
+                $tok  = 'x';
              }
+        }
+        return;
+    } ## end sub do_X_OPERATOR
  
-            # patch for RT#131288, user constant function without prototype
-            # last type is 'U' followed by ?.
-            elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
-                $is_pattern = 0;
-            }
-            elsif ( $expecting == UNKNOWN ) {
-
-                # In older versions of Perl, a bare ? can be a pattern
-                # delimiter.  In perl version 5.22 this was
-                # dropped, but we have to support it in order to format
-                # older programs. See:
-                ## https://perl.developpez.com/documentations/en/5.22.0/perl5211delta.html
-                # For example, the following line worked
-                # at one time:
-                #      ?(.*)? && (print $1,"\n");
-                # In current versions it would have to be written with slashes:
-                #      /(.*)/ && (print $1,"\n");
-                my $msg;
-                ( $is_pattern, $msg ) =
-                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
-                    $max_token_index );
+    sub do_USE_CONSTANT {
+        scan_bare_identifier();
+        my ( $next_nonblank_tok2, $i_next2 ) =
+          find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
-                if ($msg) { write_logfile_entry($msg) }
-            }
-            else { $is_pattern = ( $expecting == TERM ) }
+        if ($next_nonblank_tok2) {
  
-            if ($is_pattern) {
-                $in_quote                = 1;
-                $type                    = 'Q';
-                $allowed_quote_modifiers = '[msixpodualngc]';
-            }
-            else {
-                ( $type_sequence, $indent_flag ) =
-                  increase_nesting_depth( QUESTION_COLON,
-                    $rtoken_map->[$i_tok] );
-            }
-        },
-        '*' => sub {    # typeglob, or multiply?
+            if ( $is_keyword{$next_nonblank_tok2} ) {
  
-            if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
-                if (   $next_type ne 'b'
-                    && $next_type ne '('
-                    && $next_type ne '#' )    # Fix c036
-                {
-                    $expecting = TERM;
-                }
-            }
-            if ( $expecting == TERM ) {
-                scan_identifier_fast();
-            }
-            else {
+                # Assume qw is used as a quote and okay, as in:
+                #  use constant qw{ DEBUG 0 };
+                # Not worth trying to parse for just a warning
  
-                if ( $rtokens->[ $i + 1 ] eq '=' ) {
-                    $tok  = '*=';
-                    $type = $tok;
-                    $i++;
-                }
-                elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
-                    $tok  = '**';
-                    $type = $tok;
-                    $i++;
-                    if ( $rtokens->[ $i + 1 ] eq '=' ) {
-                        $tok  = '**=';
-                        $type = $tok;
-                        $i++;
-                    }
+                # NOTE: This warning is deactivated because recent
+                # versions of perl do not complain here, but
+                # the coding is retained for reference.
+                if ( 0 && $next_nonblank_tok2 ne 'qw' ) {
+                    warning(
+"Attempting to define constant '$next_nonblank_tok2' which is a perl keyword\n"
+                    );
                  }
              }
-        },
-        '.' => sub {    # what kind of . ?
  
-            if ( $expecting != OPERATOR ) {
-                scan_number();
-                if ( $type eq '.' ) {
-                    error_if_expecting_TERM()
-                      if ( $expecting == TERM );
-                }
-            }
              else {
+                $is_constant{$current_package}{$next_nonblank_tok2} = 1;
              }
-        },
-        ':' => sub {
+        }
+        return;
+    } ## end sub do_USE_CONSTANT
  
-            # if this is the first nonblank character, call it a label
-            # since perl seems to just swallow it
-            if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
-                $type = 'J';
-            }
+    sub do_KEYWORD {
  
-            # ATTRS: check for a ':' which introduces an attribute list
-            # either after a 'sub' keyword or within a paren list
-            elsif ( $statement_type =~ /^sub\b/ ) {
-                $type              = 'A';
-                $in_attribute_list = 1;
+        # found a keyword - set any associated flags
+        $type = 'k';
+
+        # Since for and foreach may not be followed immediately
+        # by an opening paren, we have to remember which keyword
+        # is associated with the next '('
+        if ( $is_for_foreach{$tok} ) {
+            if ( new_statement_ok() ) {
+                $want_paren = $tok;
              }
+        }
+
+        # recognize 'use' statements, which are special
+        elsif ( $is_use_require{$tok} ) {
+            $statement_type = $tok;
+            error_if_expecting_OPERATOR()
+              if ( $expecting == OPERATOR );
+        }
+
+        # remember my and our to check for trailing ": shared"
+        elsif ( $is_my_our_state{$tok} ) {
+            $statement_type = $tok;
+        }
+
+        # Check for misplaced 'elsif' and 'else', but allow isolated
+        # else or elsif blocks to be formatted.  This is indicated
+        # by a last noblank token of ';'
+        elsif ( $tok eq 'elsif' ) {
+            if (
+                $last_nonblank_token ne ';'
  
-            # Within a signature, unless we are in a ternary.  For example,
-            # from 't/filter_example.t':
-            #    method foo4 ( $class: $bar ) { $class->bar($bar) }
-            elsif ( $paren_type[$paren_depth] =~ /^sub\b/
-                && !is_balanced_closing_container(QUESTION_COLON) )
+                ## !~ /^(if|elsif|unless)$/
+                && !$is_if_elsif_unless{$last_nonblank_block_type}
+              )
              {
-                $type              = 'A';
-                $in_attribute_list = 1;
+                warning(
+                    "expecting '$tok' to follow one of 'if|elsif|unless'\n");
              }
+        }
+        elsif ( $tok eq 'else' ) {
  
-            # check for scalar attribute, such as
-            # my $foo : shared = 1;
-            elsif ($is_my_our_state{$statement_type}
-                && $current_depth[QUESTION_COLON] == 0 )
+            # patched for SWITCH/CASE
+            if (
+                $last_nonblank_token ne ';'
+
+                ## !~ /^(if|elsif|unless|case|when)$/
+                && !$is_if_elsif_unless_case_when{$last_nonblank_block_type}
+
+                # patch to avoid an unwanted error message for
+                # the case of a parenless 'case' (RT 105484):
+                # switch ( 1 ) { case x { 2 } else { } }
+                ## !~ /^(if|elsif|unless|case|when)$/
+                && !$is_if_elsif_unless_case_when{$statement_type}
+              )
              {
-                $type              = 'A';
-                $in_attribute_list = 1;
+                warning(
+"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
+                );
              }
+        }
+
+        # patch for SWITCH/CASE if 'case' and 'when are
+        # treated as keywords.  Also 'default' for Switch::Plain
+        elsif ($tok eq 'when'
+            || $tok eq 'case'
+            || $tok eq 'default' )
+        {
+            $statement_type = $tok;    # next '{' is block
+        }
+
+        # feature 'err' was removed in Perl 5.10.  So mark this as
+        # a bareword unless an operator is expected (see c158).
+        elsif ( $tok eq 'err' ) {
+            if ( $expecting != OPERATOR ) { $type = 'w' }
+        }
+
+        return;
+    } ## end sub do_KEYWORD
+
+    sub do_QUOTE_OPERATOR {
+
+        if ( $expecting == OPERATOR ) {
+
+            # Be careful not to call an error for a qw quote
+            # where a parenthesized list is allowed.  For example,
+            # it could also be a for/foreach construct such as
+            #
+            #    foreach my $key qw\Uno Due Tres Quadro\ {
+            #        print "Set $key\n";
+            #    }
+            #
+
+            # Or it could be a function call.
+            # NOTE: Braces in something like &{ xxx } are not
+            # marked as a block, we might have a method call.
+            # &method(...), $method->(..), &{method}(...),
+            # $ref[2](list) is ok & short for $ref[2]->(list)
+            #
+            # See notes in 'sub code_block_type' and
+            # 'sub is_non_structural_brace'
  
-            # Look for Switch::Plain syntax if an error would otherwise occur
-            # here. Note that we do not need to check if the extended syntax
-            # flag is set because otherwise an error would occur, and we would
-            # then have to output a message telling the user to set the
-            # extended syntax flag to avoid the error.
-            #  case 1: {
-            #  default: {
-            #  default:
-            # Note that the line 'default:' will be parsed as a label elsewhere.
-            elsif ( $is_case_default{$statement_type}
-                && !is_balanced_closing_container(QUESTION_COLON) )
+            unless (
+                $tok eq 'qw'
+                && (   $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
+                    || $is_for_foreach{$want_paren} )
+              )
              {
-                # mark it as a perltidy label type
-                $type = 'J';
+                error_if_expecting_OPERATOR();
              }
+        }
+        $in_quote                = $quote_items{$tok};
+        $allowed_quote_modifiers = $quote_modifiers{$tok};
+
+        # All quote types are 'Q' except possibly qw quotes.
+        # qw quotes are special in that they may generally be trimmed
+        # of leading and trailing whitespace.  So they are given a
+        # separate type, 'q', unless requested otherwise.
+        $type =
+          ( $tok eq 'qw' && $tokenizer_self->[_trim_qw_] )
+          ? 'q'
+          : 'Q';
+        $quote_type = $type;
+        return;
+    } ## end sub do_QUOTE_OPERATOR
+
+    sub do_UNKNOWN_BAREWORD {
+
+        my ($next_nonblank_token) = @_;
+
+        scan_bare_identifier();
+
+        if (   $statement_type eq 'use'
+            && $last_nonblank_token eq 'use' )
+        {
+            $saw_use_module{$current_package}->{$tok} = 1;
+        }
+
+        if ( $type eq 'w' ) {
+
+            if ( $expecting == OPERATOR ) {
+
+                # Patch to avoid error message for RPerl overloaded
+                # operator functions: use overload
+                #    '+' => \&sse_add,
+                #    '-' => \&sse_sub,
+                #    '*' => \&sse_mul,
+                #    '/' => \&sse_div;
+                # TODO: this could eventually be generalized
+                if (   $saw_use_module{$current_package}->{'RPerl'}
+                    && $tok =~ /^sse_(mul|div|add|sub)$/ )
+                {
  
-            # otherwise, it should be part of a ?/: operator
-            else {
-                ( $type_sequence, $indent_flag ) =
-                  decrease_nesting_depth( QUESTION_COLON,
-                    $rtoken_map->[$i_tok] );
-                if ( $last_nonblank_token eq '?' ) {
-                    warning("Syntax error near ? :\n");
                  }
-            }
-        },
-        '+' => sub {    # what kind of plus?
  
-            if ( $expecting == TERM ) {
-                my $number = scan_number_fast();
+                # Fix part 1 for git #63 in which a comment falls
+                # between an -> and the following word.  An
+                # alternate fix would be to change operator_expected
+                # to return an UNKNOWN for this type.
+                elsif ( $last_nonblank_type eq '->' ) {
  
-                # unary plus is safest assumption if not a number
-                if ( !defined($number) ) { $type = 'p'; }
-            }
-            elsif ( $expecting == OPERATOR ) {
-            }
-            else {
-                if ( $next_type eq 'w' ) { $type = 'p' }
-            }
-        },
-        '@' => sub {
+                }
  
-            error_if_expecting_OPERATOR("Array")
-              if ( $expecting == OPERATOR );
-            scan_identifier_fast();
-        },
-        '%' => sub {    # hash or modulo?
-
-            # first guess is hash if no following blank or paren
-            if ( $expecting == UNKNOWN ) {
-                if ( $next_type ne 'b' && $next_type ne '(' ) {
-                    $expecting = TERM;
+                # don't complain about possible indirect object
+                # notation.
+                # For example:
+                #   package main;
+                #   sub new($) { ... }
+                #   $b = new A::;  # calls A::new
+                #   $c = new A;    # same thing but suspicious
+                # This will call A::new but we have a 'new' in
+                # main:: which looks like a constant.
+                #
+                elsif ( $last_nonblank_type eq 'C' ) {
+                    if ( $tok !~ /::$/ ) {
+                        complain(<<EOM);
+Expecting operator after '$last_nonblank_token' but found bare word '$tok'
+       Maybe indirectet object notation?
+EOM
+                    }
+                }
+                else {
+                    error_if_expecting_OPERATOR("bareword");
                  }
              }
-            if ( $expecting == TERM ) {
-                scan_identifier_fast();
-            }
-        },
-        '[' => sub {
-            $square_bracket_type[ ++$square_bracket_depth ] =
-              $last_nonblank_token;
-            ( $type_sequence, $indent_flag ) =
-              increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
  
-            # It may seem odd, but structural square brackets have
-            # type '{' and '}'.  This simplifies the indentation logic.
-            if ( !is_non_structural_brace() ) {
-                $type = '{';
-            }
-            $square_bracket_structural_type[$square_bracket_depth] = $type;
-        },
-        ']' => sub {
-            ( $type_sequence, $indent_flag ) =
-              decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
+            # mark bare words immediately followed by a paren as
+            # functions
+            $next_tok = $rtokens->[ $i + 1 ];
+            if ( $next_tok eq '(' ) {
  
-            if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
-            {
-                $type = '}';
-            }
+                # Patch for issue c151, where we are processing a snippet and
+                # have not seen that SPACE is a constant.  In this case 'x' is
+                # probably an operator. The only disadvantage with an incorrect
+                # guess is that the space after it may be incorrect. For example
+                #   $str .= SPACE x ( 16 - length($str) ); See also b1410.
+                if ( $tok eq 'x' && $last_nonblank_type eq 'w' ) { $type = 'x' }
+
+                # Fix part 2 for git #63.  Leave type as 'w' to keep
+                # the type the same as if the -> were not separated
+                elsif ( $last_nonblank_type ne '->' ) { $type = 'U' }
  
-            # propagate type information for smartmatch operator.  This is
-            # necessary to enable us to know if an operator or term is expected
-            # next.
-            if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
-                $tok = $square_bracket_type[$square_bracket_depth];
              }
  
-            if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
-        },
-        '-' => sub {    # what kind of minus?
+            # underscore after file test operator is file handle
+            if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
+                $type = 'Z';
+            }
  
-            if ( ( $expecting != OPERATOR )
-                && $is_file_test_operator{$next_tok} )
+            # patch for SWITCH/CASE if 'case' and 'when are
+            # not treated as keywords:
+            if (
+                ( $tok eq 'case' && $brace_type[$brace_depth] eq 'switch' )
+                || (   $tok eq 'when'
+                    && $brace_type[$brace_depth] eq 'given' )
+              )
              {
-                my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i + 1, $rtokens,
-                    $max_token_index );
+                $statement_type = $tok;    # next '{' is block
+                $type           = 'k';     # for keyword syntax coloring
+            }
+            if ( $next_nonblank_token eq '(' ) {
  
-                # check for a quoted word like "-w=>xx";
-                # it is sufficient to just check for a following '='
-                if ( $next_nonblank_token eq '=' ) {
-                    $type = 'm';
+                # patch for SWITCH/CASE if switch and given not keywords
+                # Switch is not a perl 5 keyword, but we will gamble
+                # and mark switch followed by paren as a keyword.  This
+                # is only necessary to get html syntax coloring nice,
+                # and does not commit this as being a switch/case.
+                if ( $tok eq 'switch' || $tok eq 'given' ) {
+                    $type = 'k';    # for keyword syntax coloring
                  }
-                else {
-                    $i++;
-                    $tok .= $next_tok;
-                    $type = 'F';
+
+                # mark 'x' as operator for something like this (see b1410)
+                #  my $line = join( LD_X, map { LD_H x ( $_ + 2 ) } @$widths );
+                elsif ( $tok eq 'x' && $last_nonblank_type eq 'w' ) {
+                    $type = 'x';
                  }
              }
-            elsif ( $expecting == TERM ) {
-                my $number = scan_number_fast();
+        }
+        return;
+    } ## end sub do_UNKNOWN_BAREWORD
  
-                # maybe part of bareword token? unary is safest
-                if ( !defined($number) ) { $type = 'm'; }
+    sub sub_attribute_ok_here {
  
-            }
-            elsif ( $expecting == OPERATOR ) {
-            }
-            else {
+        my ( $tok_kw, $next_nonblank_token, $i_next ) = @_;
  
-                if ( $next_type eq 'w' ) {
-                    $type = 'm';
-                }
-            }
-        },
+        # Decide if 'sub :' can be the start of a sub attribute list.
+        # We will decide based on if the colon is followed by a
+        # bareword which is not a keyword.
+        # Changed inext+1 to inext to fixed case b1190.
+        my $sub_attribute_ok_here;
+        if (   $is_sub{$tok_kw}
+            && $expecting != OPERATOR
+            && $next_nonblank_token eq ':' )
+        {
+            my ( $nn_nonblank_token, $i_nn ) =
+              find_next_nonblank_token( $i_next, $rtokens, $max_token_index );
+            $sub_attribute_ok_here =
+                 $nn_nonblank_token =~ /^\w/
+              && $nn_nonblank_token !~ /^\d/
+              && !$is_keyword{$nn_nonblank_token};
+        }
+        return $sub_attribute_ok_here;
+    } ## end sub sub_attribute_ok_here
  
-        '^' => sub {
+    sub do_BAREWORD {
  
-            # check for special variables like ${^WARNING_BITS}
-            if ( $expecting == TERM ) {
+        my ($is_END_or_DATA) = @_;
  
-                # FIXME: this should work but will not catch errors
-                # because we also have to be sure that previous token is
-                # a type character ($,@,%).
-                if (   $last_nonblank_token eq '{'
-                    && ( $next_tok !~ /^\d/ )
-                    && ( $next_tok =~ /^\w/ ) )
-                {
+        # handle a bareword token:
+        # returns
+        #    true if this token ends the current line
+        #    false otherwise
  
-                    if ( $next_tok eq 'W' ) {
-                        $tokenizer_self->[_saw_perl_dash_w_] = 1;
-                    }
-                    $tok  = $tok . $next_tok;
-                    $i    = $i + 1;
-                    $type = 'w';
-                }
+        my ( $next_nonblank_token, $i_next ) =
+          find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
-                else {
-                    unless ( error_if_expecting_TERM() ) {
+        # a bare word immediately followed by :: is not a keyword;
+        # use $tok_kw when testing for keywords to avoid a mistake
+        my $tok_kw = $tok;
+        if (   $rtokens->[ $i + 1 ] eq ':'
+            && $rtokens->[ $i + 2 ] eq ':' )
+        {
+            $tok_kw .= '::';
+        }
  
-                        # Something like this is valid but strange:
-                        # undef ^I;
-                        complain("The '^' seems unusual here\n");
-                    }
-                }
-            }
-        },
+        if ($in_attribute_list) {
+            my $is_attribute = do_ATTRIBUTE_LIST($next_nonblank_token);
+            return if ($is_attribute);
+        }
+
+        #----------------------------------------
+        # Starting final if-elsif- chain of tests
+        #----------------------------------------
  
-        '::' => sub {    # probably a sub call
+        # This is the return flag:
+        #   true => this is the last token on the line
+        #   false => keep tokenizing the line
+        my $is_last;
+
+        # The following blocks of code must update these vars:
+        # $type - the final token type, must always be set
+
+        # In addition, if additional pretokens are added:
+        # $tok  - the final token
+        # $i    - the index of the last pretoken
+
+        # They may also need to check and set various flags
+
+        # Scan a bare word following a -> as an identifier; it could
+        # have a long package name.  Fixes c037, c041.
+        if ( $last_nonblank_token eq '->' ) {
              scan_bare_identifier();
-        },
-        '<<' => sub {    # maybe a here-doc?
  
-##      This check removed because it could be a deprecated here-doc with
-##      no specified target.  See example in log 16 Sep 2020.
-##            return
-##              unless ( $i < $max_token_index )
-##              ;          # here-doc not possible if end of line
+            # a bareward after '->' gets type 'i'
+            $type = 'i';
+        }
  
-            if ( $expecting != OPERATOR ) {
-                my ( $found_target, $here_doc_target, $here_quote_character,
-                    $saw_error );
-                (
-                    $found_target, $here_doc_target, $here_quote_character, $i,
-                    $saw_error
-                  )
-                  = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
-                    $max_token_index );
+        # Quote a word followed by => operator
+        # unless the word __END__ or __DATA__ and the only word on
+        # the line.
+        elsif ( !$is_END_or_DATA
+            && $next_nonblank_token eq '='
+            && $rtokens->[ $i_next + 1 ] eq '>' )
+        {
+            do_QUOTED_BAREWORD();
+        }
  
-                if ($found_target) {
-                    push @{$rhere_target_list},
-                      [ $here_doc_target, $here_quote_character ];
-                    $type = 'h';
-                    if ( length($here_doc_target) > 80 ) {
-                        my $truncated = substr( $here_doc_target, 0, 80 );
-                        complain("Long here-target: '$truncated' ...\n");
-                    }
-                    elsif ( !$here_doc_target ) {
-                        warning(
-                            'Use of bare << to mean <<"" is deprecated' . "\n" )
-                          unless ($here_quote_character);
-                    }
-                    elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
-                        complain(
-                            "Unconventional here-target: '$here_doc_target'\n");
-                    }
-                }
-                elsif ( $expecting == TERM ) {
-                    unless ($saw_error) {
+        # quote a bare word within braces..like xxx->{s}; note that we
+        # must be sure this is not a structural brace, to avoid
+        # mistaking {s} in the following for a quoted bare word:
+        #     for(@[){s}bla}BLA}
+        # Also treat q in something like var{-q} as a bare word, not
+        # a quote operator
+        elsif (
+            $next_nonblank_token eq '}'
+            && (
+                $last_nonblank_type eq 'L'
+                || (   $last_nonblank_type eq 'm'
+                    && $last_last_nonblank_type eq 'L' )
+            )
+          )
+        {
+            $type = 'w';
+        }
  
-                        # shouldn't happen..
-                        warning("Program bug; didn't find here doc target\n");
-                        report_definite_bug();
-                    }
-                }
-            }
-            else {
-            }
-        },
-        '<<~' => sub {    # a here-doc, new type added in v26
-            return
-              unless ( $i < $max_token_index )
-              ;           # here-doc not possible if end of line
-            if ( $expecting != OPERATOR ) {
-                my ( $found_target, $here_doc_target, $here_quote_character,
-                    $saw_error );
-                (
-                    $found_target, $here_doc_target, $here_quote_character, $i,
-                    $saw_error
-                  )
-                  = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
-                    $max_token_index );
+        # handle operator x (now we know it isn't $x=)
+        elsif (
+               $expecting == OPERATOR
+            && substr( $tok, 0, 1 ) eq 'x'
+            && ( length($tok) == 1
+                || substr( $tok, 1, 1 ) =~ /^\d/ )
+          )
+        {
+            do_X_OPERATOR();
+        }
+        elsif ( $tok_kw eq 'CORE::' ) {
+            $type = $tok = $tok_kw;
+            $i += 2;
+        }
+        elsif ( ( $tok eq 'strict' )
+            and ( $last_nonblank_token eq 'use' ) )
+        {
+            $tokenizer_self->[_saw_use_strict_] = 1;
+            scan_bare_identifier();
+        }
  
-                if ($found_target) {
+        elsif ( ( $tok eq 'warnings' )
+            and ( $last_nonblank_token eq 'use' ) )
+        {
+            $tokenizer_self->[_saw_perl_dash_w_] = 1;
  
-                    if ( length($here_doc_target) > 80 ) {
-                        my $truncated = substr( $here_doc_target, 0, 80 );
-                        complain("Long here-target: '$truncated' ...\n");
-                    }
-                    elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
-                        complain(
-                            "Unconventional here-target: '$here_doc_target'\n");
-                    }
+            # scan as identifier, so that we pick up something like:
+            # use warnings::register
+            scan_bare_identifier();
+        }
  
-                    # Note that we put a leading space on the here quote
-                    # character indicate that it may be preceded by spaces
-                    $here_quote_character = " " . $here_quote_character;
-                    push @{$rhere_target_list},
-                      [ $here_doc_target, $here_quote_character ];
-                    $type = 'h';
-                }
-                elsif ( $expecting == TERM ) {
-                    unless ($saw_error) {
+        elsif (
+               $tok eq 'AutoLoader'
+            && $tokenizer_self->[_look_for_autoloader_]
+            && (
+                $last_nonblank_token eq 'use'
  
-                        # shouldn't happen..
-                        warning("Program bug; didn't find here doc target\n");
-                        report_definite_bug();
-                    }
-                }
-            }
-            else {
-            }
-        },
-        '->' => sub {
+                # these regexes are from AutoSplit.pm, which we want
+                # to mimic
+                || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
+                || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
+            )
+          )
+        {
+            write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
+            $tokenizer_self->[_saw_autoloader_]      = 1;
+            $tokenizer_self->[_look_for_autoloader_] = 0;
+            scan_bare_identifier();
+        }
  
-            # if -> points to a bare word, we must scan for an identifier,
-            # otherwise something like ->y would look like the y operator
+        elsif (
+               $tok eq 'SelfLoader'
+            && $tokenizer_self->[_look_for_selfloader_]
+            && (   $last_nonblank_token eq 'use'
+                || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
+                || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
+          )
+        {
+            write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
+            $tokenizer_self->[_saw_selfloader_]      = 1;
+            $tokenizer_self->[_look_for_selfloader_] = 0;
+            scan_bare_identifier();
+        }
  
-            # NOTE: this will currently allow things like
-            #     '->@array'    '->*VAR'  '->%hash'
-            # to get parsed as identifiers, even though these are not currently
-            # allowed syntax.  To catch syntax errors like this we could first
-            # check that the next character and skip this call if it is one of
-            # ' @ % * '.  A disadvantage with doing this is that this would
-            # have to be fixed if the perltidy syntax is ever extended to make
-            # any of these valid.  So for now this check is not done.
-            scan_identifier_fast();
-        },
+        elsif ( ( $tok eq 'constant' )
+            and ( $last_nonblank_token eq 'use' ) )
+        {
+            do_USE_CONSTANT();
+        }
  
-        # type = 'pp' for pre-increment, '++' for post-increment
-        '++' => sub {
-            if    ( $expecting == TERM ) { $type = 'pp' }
-            elsif ( $expecting == UNKNOWN ) {
-
-                my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
-                # Fix for c042: look past a side comment
-                if ( $next_nonblank_token eq '#' ) {
-                    ( $next_nonblank_token, $i_next ) =
-                      find_next_nonblank_token( $max_token_index,
-                        $rtokens, $max_token_index );
-                }
+        # various quote operators
+        elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
+            do_QUOTE_OPERATOR();
+        }
  
-                if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
-            }
-        },
+        # check for a statement label
+        elsif (
+               ( $next_nonblank_token eq ':' )
+            && ( $rtokens->[ $i_next + 1 ] ne ':' )
+            && ( $i_next <= $max_token_index )    # colon on same line
  
-        '=>' => sub {
-            if ( $last_nonblank_type eq $tok ) {
-                complain("Repeated '=>'s \n");
+            # like 'sub : lvalue' ?
+            && !sub_attribute_ok_here( $tok_kw, $next_nonblank_token, $i_next )
+            && label_ok()
+          )
+        {
+            if ( $tok !~ /[A-Z]/ ) {
+                push @{ $tokenizer_self->[_rlower_case_labels_at_] },
+                  $input_line_number;
              }
+            $type = 'J';
+            $tok .= ':';
+            $i = $i_next;
+        }
  
-            # patch for operator_expected: note if we are in the list (use.t)
-            # TODO: make version numbers a new token type
-            if ( $statement_type eq 'use' ) { $statement_type = '_use' }
-        },
+        #      'sub' or alias
+        elsif ( $is_sub{$tok_kw} ) {
+            error_if_expecting_OPERATOR()
+              if ( $expecting == OPERATOR );
+            initialize_subname();
+            scan_id();
+        }
  
-        # type = 'mm' for pre-decrement, '--' for post-decrement
-        '--' => sub {
-
-            if    ( $expecting == TERM ) { $type = 'mm' }
-            elsif ( $expecting == UNKNOWN ) {
-                my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
-                # Fix for c042: look past a side comment
-                if ( $next_nonblank_token eq '#' ) {
-                    ( $next_nonblank_token, $i_next ) =
-                      find_next_nonblank_token( $max_token_index,
-                        $rtokens, $max_token_index );
-                }
+        #      'package'
+        elsif ( $is_package{$tok_kw} ) {
+            error_if_expecting_OPERATOR()
+              if ( $expecting == OPERATOR );
+            scan_id();
+        }
  
-                if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
-            }
-        },
+        # Fix for c035: split 'format' from 'is_format_END_DATA' to be
+        # more restrictive. Require a new statement to be ok here.
+        elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
+            $type = ';';    # make tokenizer look for TERM next
+            $tokenizer_self->[_in_format_] = 1;
+            $is_last = 1;                          ## is last token on this line
+        }
  
-        '&&' => sub {
-            error_if_expecting_TERM()
-              if ( $expecting == TERM && $last_nonblank_token ne ',' );    #c015
-        },
+        # Note on token types for format, __DATA__, __END__:
+        # It simplifies things to give these type ';', so that when we
+        # start rescanning we will be expecting a token of type TERM.
+        # We will switch to type 'k' before outputting the tokens.
+        elsif ( $is_END_DATA{$tok_kw} ) {
+            $type = ';';    # make tokenizer look for TERM next
  
-        '||' => sub {
-            error_if_expecting_TERM()
-              if ( $expecting == TERM && $last_nonblank_token ne ',' );    #c015
-        },
+            # Remember that we are in one of these three sections
+            $tokenizer_self->[ $is_END_DATA{$tok_kw} ] = 1;
+            $is_last = 1;    ## is last token on this line
+        }
  
-        '//' => sub {
-            error_if_expecting_TERM()
-              if ( $expecting == TERM );
-        },
-    };
+        elsif ( $is_keyword{$tok_kw} ) {
+            do_KEYWORD();
+        }
  
-    # ------------------------------------------------------------
-    # end hash of code for handling individual token types
-    # ------------------------------------------------------------
+        # check for inline label following
+        #         /^(redo|last|next|goto)$/
+        elsif (( $last_nonblank_type eq 'k' )
+            && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
+        {
+            $type = 'j';
+        }
  
-    my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
+        # something else --
+        else {
+            do_UNKNOWN_BAREWORD($next_nonblank_token);
+        }
  
-    # These block types terminate statements and do not need a trailing
-    # semicolon
-    # patched for SWITCH/CASE/
-    my %is_zero_continuation_block_type;
-    @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
-      if elsif else unless while until for foreach switch case given when);
-    @is_zero_continuation_block_type{@_} = (1) x scalar(@_);
+        return $is_last;
  
-    my %is_not_zero_continuation_block_type;
-    @_ = qw(sort grep map do eval);
-    @is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
+    } ## end sub do_BAREWORD
  
-    my %is_logical_container;
-    @_ = qw(if elsif unless while and or err not && !  || for foreach);
-    @is_logical_container{@_} = (1) x scalar(@_);
+    sub do_FOLLOW_QUOTE {
  
-    my %is_binary_type;
-    @_ = qw(|| &&);
-    @is_binary_type{@_} = (1) x scalar(@_);
+        # Continue following a quote on a new line
+        $type = $quote_type;
  
-    my %is_binary_keyword;
-    @_ = qw(and or err eq ne cmp);
-    @is_binary_keyword{@_} = (1) x scalar(@_);
+        unless ( @{$routput_token_list} ) {    # initialize if continuation line
+            push( @{$routput_token_list}, $i );
+            $routput_token_type->[$i] = $type;
  
-    # 'L' is token for opening { at hash key
-    my %is_opening_type;
-    @_ = qw< L { ( [ >;
-    @is_opening_type{@_} = (1) x scalar(@_);
+        }
  
-    # 'R' is token for closing } at hash key
-    my %is_closing_type;
-    @_ = qw< R } ) ] >;
-    @is_closing_type{@_} = (1) x scalar(@_);
+        # scan for the end of the quote or pattern
+        (
+            $i,
+            $in_quote,
+            $quote_character,
+            $quote_pos,
+            $quote_depth,
+            $quoted_string_1,
+            $quoted_string_2,
+
+        ) = do_quote(
+
+            $i,
+            $in_quote,
+            $quote_character,
+            $quote_pos,
+            $quote_depth,
+            $quoted_string_1,
+            $quoted_string_2,
+            $rtokens,
+            $rtoken_map,
+            $max_token_index,
  
-    my %is_redo_last_next_goto;
-    @_ = qw(redo last next goto);
-    @is_redo_last_next_goto{@_} = (1) x scalar(@_);
+        );
  
-    my %is_use_require;
-    @_ = qw(use require);
-    @is_use_require{@_} = (1) x scalar(@_);
+        # all done if we didn't find it
+        if ($in_quote) { return }
+
+        # save pattern and replacement text for rescanning
+        my $qs1 = $quoted_string_1;
+
+        # re-initialize for next search
+        $quote_character = EMPTY_STRING;
+        $quote_pos       = 0;
+        $quote_type      = 'Q';
+        $quoted_string_1 = EMPTY_STRING;
+        $quoted_string_2 = EMPTY_STRING;
+        if ( ++$i > $max_token_index ) { return }
+
+        # look for any modifiers
+        if ($allowed_quote_modifiers) {
+
+            # check for exact quote modifiers
+            if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
+                my $str = $rtokens->[$i];
+                my $saw_modifier_e;
+                while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
+                    my $pos  = pos($str);
+                    my $char = substr( $str, $pos - 1, 1 );
+                    $saw_modifier_e ||= ( $char eq 'e' );
+                }
+
+                # For an 'e' quote modifier we must scan the replacement
+                # text for here-doc targets...
+                # but if the modifier starts a new line we can skip
+                # this because either the here doc will be fully
+                # contained in the replacement text (so we can
+                # ignore it) or Perl will not find it.
+                # See test 'here2.in'.
+                if ( $saw_modifier_e && $i_tok >= 0 ) {
+
+                    my $rht = scan_replacement_text($qs1);
+
+                    # Change type from 'Q' to 'h' for quotes with
+                    # here-doc targets so that the formatter (see sub
+                    # process_line_of_CODE) will not make any line
+                    # breaks after this point.
+                    if ($rht) {
+                        push @{$rhere_target_list}, @{$rht};
+                        $type = 'h';
+                        if ( $i_tok < 0 ) {
+                            my $ilast = $routput_token_list->[-1];
+                            $routput_token_type->[$ilast] = $type;
+                        }
+                    }
+                }
  
-    # This hash holds the array index in $tokenizer_self for these keywords:
-    # Fix for issue c035: removed 'format' from this hash
-    my %is_END_DATA = (
-        '__END__'  => _in_end_,
-        '__DATA__' => _in_data_,
-    );
+                if ( defined( pos($str) ) ) {
  
-    # original ref: camel 3 p 147,
-    # but perl may accept undocumented flags
-    # perl 5.10 adds 'p' (preserve)
-    # Perl version 5.22 added 'n'
-    # From http://perldoc.perl.org/perlop.html we have
-    # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
-    # s/PATTERN/REPLACEMENT/msixpodualngcer
-    # y/SEARCHLIST/REPLACEMENTLIST/cdsr
-    # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
-    # qr/STRING/msixpodualn
-    my %quote_modifiers = (
-        's'  => '[msixpodualngcer]',
-        'y'  => '[cdsr]',
-        'tr' => '[cdsr]',
-        'm'  => '[msixpodualngc]',
-        'qr' => '[msixpodualn]',
-        'q'  => "",
-        'qq' => "",
-        'qw' => "",
-        'qx' => "",
-    );
+                    # matched
+                    if ( pos($str) == length($str) ) {
+                        if ( ++$i > $max_token_index ) { return }
+                    }
  
-    # table showing how many quoted things to look for after quote operator..
-    # s, y, tr have 2 (pattern and replacement)
-    # others have 1 (pattern only)
-    my %quote_items = (
-        's'  => 2,
-        'y'  => 2,
-        'tr' => 2,
-        'm'  => 1,
-        'qr' => 1,
-        'q'  => 1,
-        'qq' => 1,
-        'qw' => 1,
-        'qx' => 1,
-    );
+                    # Looks like a joined quote modifier
+                    # and keyword, maybe something like
+                    # s/xxx/yyy/gefor @k=...
+                    # Example is "galgen.pl".  Would have to split
+                    # the word and insert a new token in the
+                    # pre-token list.  This is so rare that I haven't
+                    # done it.  Will just issue a warning citation.
+
+                    # This error might also be triggered if my quote
+                    # modifier characters are incomplete
+                    else {
+                        warning(<<EOM);
+
+Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
+Please put a space between quote modifiers and trailing keywords.
+EOM
+
+                        # print "token $rtokens->[$i]\n";
+                        # my $num = length($str) - pos($str);
+                        # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
+                        # print "continuing with new token $rtokens->[$i]\n";
+
+                        # skipping past this token does least damage
+                        if ( ++$i > $max_token_index ) { return }
+                    }
+                }
+                else {
+
+                    # example file: rokicki4.pl
+                    # This error might also be triggered if my quote
+                    # modifier characters are incomplete
+                    write_logfile_entry(
+                        "Note: found word $str at quote modifier location\n");
+                }
+            }
+
+            # re-initialize
+            $allowed_quote_modifiers = EMPTY_STRING;
+        }
+        return;
+    } ## end sub do_FOLLOW_QUOTE
+
+    # ------------------------------------------------------------
+    # begin hash of code for handling most token types
+    # ------------------------------------------------------------
+    my $tokenization_code = {
+
+        '>'   => \&do_GREATER_THAN_SIGN,
+        '|'   => \&do_VERTICAL_LINE,
+        '$'   => \&do_DOLLAR_SIGN,
+        '('   => \&do_LEFT_PARENTHESIS,
+        ')'   => \&do_RIGHT_PARENTHESIS,
+        ','   => \&do_COMMA,
+        ';'   => \&do_SEMICOLON,
+        '"'   => \&do_QUOTATION_MARK,
+        "'"   => \&do_APOSTROPHE,
+        '`'   => \&do_BACKTICK,
+        '/'   => \&do_SLASH,
+        '{'   => \&do_LEFT_CURLY_BRACKET,
+        '}'   => \&do_RIGHT_CURLY_BRACKET,
+        '&'   => \&do_AMPERSAND,
+        '<'   => \&do_LESS_THAN_SIGN,
+        '?'   => \&do_QUESTION_MARK,
+        '*'   => \&do_STAR,
+        '.'   => \&do_DOT,
+        ':'   => \&do_COLON,
+        '+'   => \&do_PLUS_SIGN,
+        '@'   => \&do_AT_SIGN,
+        '%'   => \&do_PERCENT_SIGN,
+        '['   => \&do_LEFT_SQUARE_BRACKET,
+        ']'   => \&do_RIGHT_SQUARE_BRACKET,
+        '-'   => \&do_MINUS_SIGN,
+        '^'   => \&do_CARAT_SIGN,
+        '::'  => \&do_DOUBLE_COLON,
+        '<<'  => \&do_LEFT_SHIFT,
+        '<<~' => \&do_NEW_HERE_DOC,
+        '->'  => \&do_POINTER,
+        '++'  => \&do_PLUS_PLUS,
+        '=>'  => \&do_FAT_COMMA,
+        '--'  => \&do_MINUS_MINUS,
+        '&&'  => \&do_LOGICAL_AND,
+        '||'  => \&do_LOGICAL_OR,
+        '//'  => \&do_SLASH_SLASH,
+
+        # No special code for these types yet, but syntax checks
+        # could be added.
+        ##  '!'   => undef,
+        ##  '!='  => undef,
+        ##  '!~'  => undef,
+        ##  '%='  => undef,
+        ##  '&&=' => undef,
+        ##  '&='  => undef,
+        ##  '+='  => undef,
+        ##  '-='  => undef,
+        ##  '..'  => undef,
+        ##  '..'  => undef,
+        ##  '...' => undef,
+        ##  '.='  => undef,
+        ##  '<<=' => undef,
+        ##  '<='  => undef,
+        ##  '<=>' => undef,
+        ##  '<>'  => undef,
+        ##  '='   => undef,
+        ##  '=='  => undef,
+        ##  '=~'  => undef,
+        ##  '>='  => undef,
+        ##  '>>'  => undef,
+        ##  '>>=' => undef,
+        ##  '\\'  => undef,
+        ##  '^='  => undef,
+        ##  '|='  => undef,
+        ##  '||=' => undef,
+        ##  '//=' => undef,
+        ##  '~'   => undef,
+        ##  '~~'  => undef,
+        ##  '!~~' => undef,
+
+    };
+
+    # ------------------------------------------------------------
+    # end hash of code for handling individual token types
+    # ------------------------------------------------------------
  
      use constant DEBUG_TOKENIZE => 0;
  
@@ -3182,30 +4412,23 @@ EOM
    #
    # -----------------------------------------------------------------------
  
-        my $line_of_tokens = shift;
+        my ( $self, $line_of_tokens ) = @_;
          my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
  
-        # patch while coding change is underway
-        # make callers private data to allow access
-        # $tokenizer_self = $caller_tokenizer_self;
-
-        # extract line number for use in error messages
+        # Extract line number for use in error messages
          $input_line_number = $line_of_tokens->{_line_number};
  
-        # reinitialize for multi-line quote
-        $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
-
-        # check for pod documentation
+        # Check for pod documentation
          if ( substr( $untrimmed_input_line, 0, 1 ) eq '='
              && $untrimmed_input_line =~ /^=[A-Za-z_]/ )
          {
  
-            # must not be in multi-line quote
+            # Must not be in multi-line quote
              # and must not be in an equation
              if ( !$in_quote
                  && ( operator_expected( [ 'b', '=', 'b' ] ) == TERM ) )
              {
-                $tokenizer_self->[_in_pod_] = 1;
+                $self->[_in_pod_] = 1;
                  return;
              }
          }
@@ -3219,19 +4442,85 @@ EOM
          # a fat comma.
          my $is_END_or_DATA;
  
-        # trim start of this line unless we are continuing a quoted line
-        # do not trim end because we might end in a quote (test: deken4.pl)
-        # Perl::Tidy::Formatter will delete needless trailing blanks
-        unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
-            $input_line =~ s/^\s+//;    # trim left end
+        # Reinitialize the multi-line quote flag
+        if ( $in_quote && $quote_type eq 'Q' ) {
+            $line_of_tokens->{_starting_in_quote} = 1;
+        }
+        else {
+            $line_of_tokens->{_starting_in_quote} = 0;
+
+            # Trim start of this line unless we are continuing a quoted line.
+            # Do not trim end because we might end in a quote (test: deken4.pl)
+            # Perl::Tidy::Formatter will delete needless trailing blanks
+            $input_line =~ s/^(\s+)//;
+
+            # Calculate a guessed level for nonblank lines to avoid calls to
+            #    sub guess_old_indentation_level()
+            if ( length($input_line) && $1 ) {
+                my $leading_spaces = $1;
+                my $spaces         = length($leading_spaces);
+
+                # handle leading tabs
+                if ( ord( substr( $leading_spaces, 0, 1 ) ) == ORD_TAB
+                    && $leading_spaces =~ /^(\t+)/ )
+                {
+                    my $tabsize = $self->[_tabsize_];
+                    $spaces += length($1) * ( $tabsize - 1 );
+                }
+
+                my $indent_columns = $self->[_indent_columns_];
+                $line_of_tokens->{_guessed_indentation_level} =
+                  int( $spaces / $indent_columns );
+            }
  
              $is_END_or_DATA = substr( $input_line, 0, 1 ) eq '_'
-              && $input_line =~ /^\s*__(END|DATA)__\s*$/;
+              && $input_line =~ /^__(END|DATA)__\s*$/;
+        }
+
+        # Optimize for a full-line comment.
+        if ( !$in_quote ) {
+            if ( substr( $input_line, 0, 1 ) eq '#' ) {
+
+                # and check for skipped section
+                if (   $rOpts_code_skipping
+                    && $input_line =~ /$code_skipping_pattern_begin/ )
+                {
+                    $self->[_in_skipped_] = 1;
+                    return;
+                }
+
+                # Optional fast processing of a block comment
+                my $ci_string_sum =
+                  ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
+                my $ci_string_i = $ci_string_sum + $in_statement_continuation;
+                $line_of_tokens->{_line_type}        = 'CODE';
+                $line_of_tokens->{_rtokens}          = [$input_line];
+                $line_of_tokens->{_rtoken_type}      = ['#'];
+                $line_of_tokens->{_rlevels}          = [$level_in_tokenizer];
+                $line_of_tokens->{_rci_levels}       = [$ci_string_i];
+                $line_of_tokens->{_rblock_type}      = [EMPTY_STRING];
+                $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
+                $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
+                return;
+            }
+
+            # Optimize handling of a blank line
+            if ( !length($input_line) ) {
+                $line_of_tokens->{_line_type}        = 'CODE';
+                $line_of_tokens->{_rtokens}          = [];
+                $line_of_tokens->{_rtoken_type}      = [];
+                $line_of_tokens->{_rlevels}          = [];
+                $line_of_tokens->{_rci_levels}       = [];
+                $line_of_tokens->{_rblock_type}      = [];
+                $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
+                $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
+                return;
+            }
          }
  
          # update the copy of the line for use in error messages
          # This must be exactly what we give the pre_tokenizer
-        $tokenizer_self->[_line_of_text_] = $input_line;
+        $self->[_line_of_text_] = $input_line;
  
          # re-initialize for the main loop
          $routput_token_list     = [];    # stack of output token indexes
@@ -3252,177 +4541,69 @@ EOM
          $indent_flag     = 0;
          $peeked_ahead    = 0;
  
-        # tokenization is done in two stages..
-        # stage 1 is a very simple pre-tokenization
-        my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
+        $self->tokenizer_main_loop($is_END_or_DATA);
  
-        # optimize for a full-line comment
-        if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) {
-            $max_tokens_wanted = 1;    # no use tokenizing a comment
+        #-----------------------------------------------
+        # all done tokenizing this line ...
+        # now prepare the final list of tokens and types
+        #-----------------------------------------------
  
-            # and check for skipped section
-            if (   $rOpts_code_skipping
-                && $input_line =~ /$code_skipping_pattern_begin/ )
-            {
-                $tokenizer_self->[_in_skipped_] = 1;
-                return;
-            }
-        }
+        $self->tokenizer_wrapup_line($line_of_tokens);
+
+        return;
+    } ## end sub tokenize_this_line
+
+    sub tokenizer_main_loop {
+
+        my ( $self, $is_END_or_DATA ) = @_;
+
+        #---------------------------------
+        # Break one input line into tokens
+        #---------------------------------
+
+        # Input parameter:
+        #   $is_END_or_DATA is true for a __END__ or __DATA__ line
  
          # start by breaking the line into pre-tokens
+        my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
          ( $rtokens, $rtoken_map, $rtoken_type ) =
            pre_tokenize( $input_line, $max_tokens_wanted );
  
          $max_token_index = scalar( @{$rtokens} ) - 1;
-        push( @{$rtokens}, ' ', ' ', ' ' );  # extra whitespace simplifies logic
+        push( @{$rtokens}, SPACE, SPACE, SPACE )
+          ;                        # extra whitespace simplifies logic
          push( @{$rtoken_map},  0,   0,   0 );     # shouldn't be referenced
          push( @{$rtoken_type}, 'b', 'b', 'b' );
  
          # initialize for main loop
+        if (0) { #<<< this is not necessary
          foreach my $ii ( 0 .. $max_token_index + 3 ) {
-            $routput_token_type->[$ii]     = "";
-            $routput_block_type->[$ii]     = "";
-            $routput_container_type->[$ii] = "";
-            $routput_type_sequence->[$ii]  = "";
+            $routput_token_type->[$ii]     = EMPTY_STRING;
+            $routput_block_type->[$ii]     = EMPTY_STRING;
+            $routput_container_type->[$ii] = EMPTY_STRING;
+            $routput_type_sequence->[$ii]  = EMPTY_STRING;
              $routput_indent_flag->[$ii]    = 0;
          }
+        }
+
          $i     = -1;
          $i_tok = -1;
  
-        # ------------------------------------------------------------
+        #-----------------------------
          # begin main tokenization loop
-        # ------------------------------------------------------------
+        #-----------------------------
  
          # we are looking at each pre-token of one line and combining them
          # into tokens
          while ( ++$i <= $max_token_index ) {
  
-            if ($in_quote) {    # continue looking for end of a quote
-                $type = $quote_type;
-
-                unless ( @{$routput_token_list} )
-                {               # initialize if continuation line
-                    push( @{$routput_token_list}, $i );
-                    $routput_token_type->[$i] = $type;
-
-                }
-                $tok = $quote_character if ($quote_character);
-
-                # scan for the end of the quote or pattern
-                (
-                    $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
-                    $quoted_string_1, $quoted_string_2
-                  )
-                  = do_quote(
-                    $i,               $in_quote,    $quote_character,
-                    $quote_pos,       $quote_depth, $quoted_string_1,
-                    $quoted_string_2, $rtokens,     $rtoken_map,
-                    $max_token_index
-                  );
-
-                # all done if we didn't find it
-                last if ($in_quote);
-
-                # save pattern and replacement text for rescanning
-                my $qs1 = $quoted_string_1;
-                my $qs2 = $quoted_string_2;
-
-                # re-initialize for next search
-                $quote_character = '';
-                $quote_pos       = 0;
-                $quote_type      = 'Q';
-                $quoted_string_1 = "";
-                $quoted_string_2 = "";
-                last if ( ++$i > $max_token_index );
-
-                # look for any modifiers
-                if ($allowed_quote_modifiers) {
-
-                    # check for exact quote modifiers
-                    if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
-                        my $str = $rtokens->[$i];
-                        my $saw_modifier_e;
-                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
-                            my $pos  = pos($str);
-                            my $char = substr( $str, $pos - 1, 1 );
-                            $saw_modifier_e ||= ( $char eq 'e' );
-                        }
-
-                        # For an 'e' quote modifier we must scan the replacement
-                        # text for here-doc targets...
-                        # but if the modifier starts a new line we can skip
-                        # this because either the here doc will be fully
-                        # contained in the replacement text (so we can
-                        # ignore it) or Perl will not find it.
-                        # See test 'here2.in'.
-                        if ( $saw_modifier_e && $i_tok >= 0 ) {
-
-                            my $rht = scan_replacement_text($qs1);
-
-                            # Change type from 'Q' to 'h' for quotes with
-                            # here-doc targets so that the formatter (see sub
-                            # process_line_of_CODE) will not make any line
-                            # breaks after this point.
-                            if ($rht) {
-                                push @{$rhere_target_list}, @{$rht};
-                                $type = 'h';
-                                if ( $i_tok < 0 ) {
-                                    my $ilast = $routput_token_list->[-1];
-                                    $routput_token_type->[$ilast] = $type;
-                                }
-                            }
-                        }
-
-                        if ( defined( pos($str) ) ) {
-
-                            # matched
-                            if ( pos($str) == length($str) ) {
-                                last if ( ++$i > $max_token_index );
-                            }
-
-                            # Looks like a joined quote modifier
-                            # and keyword, maybe something like
-                            # s/xxx/yyy/gefor @k=...
-                            # Example is "galgen.pl".  Would have to split
-                            # the word and insert a new token in the
-                            # pre-token list.  This is so rare that I haven't
-                            # done it.  Will just issue a warning citation.
-
-                            # This error might also be triggered if my quote
-                            # modifier characters are incomplete
-                            else {
-                                warning(<<EOM);
-
-Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
-Please put a space between quote modifiers and trailing keywords.
-EOM
-
-                         # print "token $rtokens->[$i]\n";
-                         # my $num = length($str) - pos($str);
-                         # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
-                         # print "continuing with new token $rtokens->[$i]\n";
-
-                                # skipping past this token does least damage
-                                last if ( ++$i > $max_token_index );
-                            }
-                        }
-                        else {
-
-                            # example file: rokicki4.pl
-                            # This error might also be triggered if my quote
-                            # modifier characters are incomplete
-                            write_logfile_entry(
-"Note: found word $str at quote modifier location\n"
-                            );
-                        }
-                    }
-
-                    # re-initialize
-                    $allowed_quote_modifiers = "";
-                }
+            # continue looking for the end of a quote
+            if ($in_quote) {
+                do_FOLLOW_QUOTE();
+                last if ( $in_quote || $i > $max_token_index );
              }
  
-            unless ( $type eq 'b' || $tok eq 'CORE::' ) {
+            if ( $type ne 'b' && $tok ne 'CORE::' ) {
  
                  # try to catch some common errors
                  if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
@@ -3435,20 +4616,27 @@ EOM
                      }
                  }
  
-                $last_last_nonblank_token      = $last_nonblank_token;
-                $last_last_nonblank_type       = $last_nonblank_type;
-                $last_last_nonblank_block_type = $last_nonblank_block_type;
-                $last_last_nonblank_container_type =
-                  $last_nonblank_container_type;
-                $last_last_nonblank_type_sequence =
-                  $last_nonblank_type_sequence;
-                $last_nonblank_token          = $tok;
-                $last_nonblank_type           = $type;
-                $last_nonblank_prototype      = $prototype;
-                $last_nonblank_block_type     = $block_type;
-                $last_nonblank_container_type = $container_type;
-                $last_nonblank_type_sequence  = $type_sequence;
-                $last_nonblank_i              = $i_tok;
+                # fix c090, only rotate vars if a new token will be stored
+                if ( $i_tok >= 0 ) {
+                    $last_last_nonblank_token      = $last_nonblank_token;
+                    $last_last_nonblank_type       = $last_nonblank_type;
+                    $last_last_nonblank_block_type = $last_nonblank_block_type;
+                    $last_last_nonblank_container_type =
+                      $last_nonblank_container_type;
+                    $last_last_nonblank_type_sequence =
+                      $last_nonblank_type_sequence;
+
+                    # Fix part #3 for git82: propagate type 'Z' though L-R pair
+                    unless ( $type eq 'R' && $last_nonblank_type eq 'Z' ) {
+                        $last_nonblank_token = $tok;
+                        $last_nonblank_type  = $type;
+                    }
+                    $last_nonblank_prototype      = $prototype;
+                    $last_nonblank_block_type     = $block_type;
+                    $last_nonblank_container_type = $container_type;
+                    $last_nonblank_type_sequence  = $type_sequence;
+                    $last_nonblank_i              = $i_tok;
+                }
  
                  # Patch for c030: Fix things in case a '->' got separated from
                  # the subsequent identifier by a side comment.  We need the
@@ -3457,8 +4645,7 @@ EOM
                  # fix for git #63.
                  if ( $last_last_nonblank_token eq '->' ) {
                      if (   $last_nonblank_type eq 'w'
-                        || $last_nonblank_type eq 'i'
-                        && substr( $last_nonblank_token, 0, 1 ) eq '$' )
+                        || $last_nonblank_type eq 'i' )
                      {
                          $last_nonblank_token = '->' . $last_nonblank_token;
                          $last_nonblank_type  = 'i';
@@ -3474,23 +4661,37 @@ EOM
                  $routput_type_sequence->[$i_tok]  = $type_sequence;
                  $routput_indent_flag->[$i_tok]    = $indent_flag;
              }
-            my $pre_tok  = $rtokens->[$i];        # get the next pre-token
-            my $pre_type = $rtoken_type->[$i];    # and type
-            $tok        = $pre_tok;
-            $type       = $pre_type;              # to be modified as necessary
-            $block_type = "";    # blank for all tokens except code block braces
-            $container_type = "";    # blank for all tokens except some parens
-            $type_sequence  = "";    # blank for all tokens except ?/:
-            $indent_flag    = 0;
-            $prototype = "";    # blank for all tokens except user defined subs
-            $i_tok     = $i;
+
+            # get the next pre-token and type
+            # $tok and $type will be modified to make the output token
+            my $pre_tok  = $tok  = $rtokens->[$i];      # get the next pre-token
+            my $pre_type = $type = $rtoken_type->[$i];  # and type
+
+            # remember the starting index of this token; we will be updating $i
+            $i_tok = $i;
+
+            # re-initialize various flags for the next output token
+            $block_type     &&= EMPTY_STRING;
+            $container_type &&= EMPTY_STRING;
+            $type_sequence  &&= EMPTY_STRING;
+            $indent_flag    &&= 0;
+            $prototype      &&= EMPTY_STRING;
  
              # this pre-token will start an output token
              push( @{$routput_token_list}, $i_tok );
  
+            #--------------------------
+            # handle a whitespace token
+            #--------------------------
+            next if ( $pre_type eq 'b' );
+
+            #-----------------
+            # handle a comment
+            #-----------------
+            last if ( $pre_type eq '#' );
+
              # continue gathering identifier if necessary
-            # but do not start on blanks and comments
-            if ( $id_scan_state && $pre_type ne 'b' && $pre_type ne '#' ) {
+            if ($id_scan_state) {
  
                  if ( $is_sub{$id_scan_state} || $is_package{$id_scan_state} ) {
                      scan_id();
@@ -3499,7 +4700,25 @@ EOM
                      scan_identifier();
                  }
  
-                last if ($id_scan_state);
+                if ($id_scan_state) {
+
+                    # Still scanning ...
+                    # Check for side comment between sub and prototype (c061)
+
+                    # done if nothing left to scan on this line
+                    last if ( $i > $max_token_index );
+
+                    my ( $next_nonblank_token, $i_next ) =
+                      find_next_nonblank_token_on_this_line( $i, $rtokens,
+                        $max_token_index );
+
+                    # done if it was just some trailing space
+                    last if ( $i_next > $max_token_index );
+
+                    # something remains on the line ... must be a side comment
+                    next;
+                }
+
                  next if ( ( $i > 0 ) || $type );
  
                  # didn't find any token; start over
@@ -3507,753 +4726,146 @@ EOM
                  $tok  = $pre_tok;
              }
  
-            # handle whitespace tokens..
-            next if ( $type eq 'b' );
-            my $prev_tok  = $i > 0 ? $rtokens->[ $i - 1 ]     : ' ';
+            my $prev_tok  = $i > 0 ? $rtokens->[ $i - 1 ]     : SPACE;
              my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b';
  
-            # Build larger tokens where possible, since we are not in a quote.
-            #
-            # First try to assemble digraphs.  The following tokens are
-            # excluded and handled specially:
+            #-----------------------------------------------------------
+            # Combine pre-tokens into digraphs and trigraphs if possible
+            #-----------------------------------------------------------
+
+            # See if we can make a digraph...
+            # The following tokens are excluded and handled specially:
              # '/=' is excluded because the / might start a pattern.
              # 'x=' is excluded since it might be $x=, with $ on previous line
              # '**' and *= might be typeglobs of punctuation variables
              # I have allowed tokens starting with <, such as <=,
              # because I don't think these could be valid angle operators.
              # test file: storrs4.pl
-            my $test_tok   = $tok . $rtokens->[ $i + 1 ];
-            my $combine_ok = $is_digraph{$test_tok};
-
-            # check for special cases which cannot be combined
-            if ($combine_ok) {
-
-                # '//' must be defined_or operator if an operator is expected.
-                # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
-                # could be migrated here for clarity
-
-              # Patch for RT#102371, misparsing a // in the following snippet:
-              #     state $b //= ccc();
-              # The solution is to always accept the digraph (or trigraph) after
-              # token type 'Z' (possible file handle).  The reason is that
-              # sub operator_expected gives TERM expected here, which is
-              # wrong in this case.
-                if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
-                    my $next_type = $rtokens->[ $i + 1 ];
-                    my $expecting =
-                      operator_expected( [ $prev_type, $tok, $next_type ] );
-
-                    # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
-                    $combine_ok = 0 if ( $expecting == TERM );
-                }
-
-                # Patch for RT #114359: Missparsing of "print $x ** 0.5;
-                # Accept the digraphs '**' only after type 'Z'
-                # Otherwise postpone the decision.
-                if ( $test_tok eq '**' ) {
-                    if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
-                }
-            }
-
-            if (
-                $combine_ok
-
-                && ( $test_tok ne '/=' )    # might be pattern
-                && ( $test_tok ne 'x=' )    # might be $x
-                && ( $test_tok ne '*=' )    # typeglob?
-
-                # Moved above as part of fix for
-                # RT #114359: Missparsing of "print $x ** 0.5;
-                # && ( $test_tok ne '**' )    # typeglob?
-              )
-            {
-                $tok = $test_tok;
-                $i++;
-
-                # Now try to assemble trigraphs.  Note that all possible
-                # perl trigraphs can be constructed by appending a character
-                # to a digraph.
-                $test_tok = $tok . $rtokens->[ $i + 1 ];
-
-                if ( $is_trigraph{$test_tok} ) {
-                    $tok = $test_tok;
-                    $i++;
-                }
-
-                # The only current tetragraph is the double diamond operator
-                # and its first three characters are not a trigraph, so
-                # we do can do a special test for it
-                elsif ( $test_tok eq '<<>' ) {
-                    $test_tok .= $rtokens->[ $i + 2 ];
-                    if ( $is_tetragraph{$test_tok} ) {
-                        $tok = $test_tok;
-                        $i += 2;
-                    }
-                }
-            }
-
-            $type      = $tok;
-            $next_tok  = $rtokens->[ $i + 1 ];
-            $next_type = $rtoken_type->[ $i + 1 ];
-
-            DEBUG_TOKENIZE && do {
-                local $" = ')(';
-                my @debug_list = (
-                    $last_nonblank_token,      $tok,
-                    $next_tok,                 $brace_depth,
-                    $brace_type[$brace_depth], $paren_depth,
-                    $paren_type[$paren_depth]
-                );
-                print STDOUT "TOKENIZE:(@debug_list)\n";
-            };
-
-            # Turn off attribute list on first non-blank, non-bareword.
-            # Added '#' to fix c038.
-            if ( $pre_type ne 'w' && $pre_type ne '#' ) {
-                $in_attribute_list = 0;
-            }
-
-            ###############################################################
-            # We have the next token, $tok.
-            # Now we have to examine this token and decide what it is
-            # and define its $type
-            #
-            # section 1: bare words
-            ###############################################################
-
-            if ( $pre_type eq 'w' ) {
-                $expecting =
-                  operator_expected( [ $prev_type, $tok, $next_type ] );
-
-                # Patch for c043, part 3: A bareword after '->' expects a TERM
-                # FIXME: It would be cleaner to give method calls a new type 'M'
-                # and update sub operator_expected to handle this.
-                if ( $last_nonblank_type eq '->' ) {
-                    $expecting = TERM;
-                }
-
-                my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
-
-                # ATTRS: handle sub and variable attributes
-                if ($in_attribute_list) {
-
-                    # treat bare word followed by open paren like qw(
-                    if ( $next_nonblank_token eq '(' ) {
-
-                        # For something like:
-                        #     : prototype($$)
-                        # we should let do_scan_sub see it so that it can see
-                        # the prototype.  All other attributes get parsed as a
-                        # quoted string.
-                        if ( $tok eq 'prototype' ) {
-                            $id_scan_state = 'prototype';
-
-                            # start just after the word 'prototype'
-                            my $i_beg = $i + 1;
-                            ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
-                                {
-                                    input_line      => $input_line,
-                                    i               => $i,
-                                    i_beg           => $i_beg,
-                                    tok             => $tok,
-                                    type            => $type,
-                                    rtokens         => $rtokens,
-                                    rtoken_map      => $rtoken_map,
-                                    id_scan_state   => $id_scan_state,
-                                    max_token_index => $max_token_index
-                                }
-                            );
-
-                   # If successful, mark as type 'q' to be consistent with other
-                   # attributes.  Note that type 'w' would also work.
-                            if ( $i > $i_beg ) {
-                                $type = 'q';
-                                next;
-                            }
-
-                            # If not successful, continue and parse as a quote.
-                        }
-
-                        # All other attribute lists must be parsed as quotes
-                        # (see 'signatures.t' for good examples)
-                        $in_quote                = $quote_items{'q'};
-                        $allowed_quote_modifiers = $quote_modifiers{'q'};
-                        $type                    = 'q';
-                        $quote_type              = 'q';
-                        next;
-                    }
-
-                    # handle bareword not followed by open paren
-                    else {
-                        $type = 'w';
-                        next;
-                    }
-                }
-
-                # quote a word followed by => operator
-                # unless the word __END__ or __DATA__ and the only word on
-                # the line.
-                if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
-
-                    if ( $rtokens->[ $i_next + 1 ] eq '>' ) {
-                        if ( $is_constant{$current_package}{$tok} ) {
-                            $type = 'C';
-                        }
-                        elsif ( $is_user_function{$current_package}{$tok} ) {
-                            $type = 'U';
-                            $prototype =
-                              $user_function_prototype{$current_package}{$tok};
-                        }
-                        elsif ( $tok =~ /^v\d+$/ ) {
-                            $type = 'v';
-                            report_v_string($tok);
-                        }
-                        else {
-
-                           # Bareword followed by a fat comma ... see 'git18.in'
-                           # If tok is something like 'x17' then it could
-                           # actually be operator x followed by number 17.
-                           # For example, here:
-                           #     123x17 => [ 792, 1224 ],
-                           # (a key of 123 repeated 17 times, perhaps not
-                           # what was intended). We will mark x17 as type
-                           # 'n' and it will be split. If the previous token
-                           # was also a bareword then it is not very clear is
-                           # going on.  In this case we will not be sure that
-                           # an operator is expected, so we just mark it as a
-                           # bareword.  Perl is a little murky in what it does
-                           # with stuff like this, and its behavior can change
-                           # over time.  Something like
-                           #    a x18 => [792, 1224], will compile as
-                           # a key with 18 a's.  But something like
-                           #    push @array, a x18;
-                           # is a syntax error.
-                            if ( $expecting == OPERATOR && $tok =~ /^x\d+$/ ) {
-                                $type = 'n';
-                            }
-                            else {
-
-                                # git #18
-                                $type = 'w';
-                                error_if_expecting_OPERATOR();
-                            }
-                        }
-
-                        next;
-                    }
-                }
-
-     # quote a bare word within braces..like xxx->{s}; note that we
-     # must be sure this is not a structural brace, to avoid
-     # mistaking {s} in the following for a quoted bare word:
-     #     for(@[){s}bla}BLA}
-     # Also treat q in something like var{-q} as a bare word, not qoute operator
-                if (
-                    $next_nonblank_token eq '}'
-                    && (
-                        $last_nonblank_type eq 'L'
-                        || (   $last_nonblank_type eq 'm'
-                            && $last_last_nonblank_type eq 'L' )
-                    )
-                  )
-                {
-                    $type = 'w';
-                    next;
-                }
-
-                # Scan a bare word following a -> as an identifir; it could
-                # have a long package name.  Fixes c037, c041.
-                if ( $last_nonblank_token eq '->' ) {
-                    scan_bare_identifier();
-
-                    # Patch for c043, part 4; use type 'w' after a '->'.
-                    # This is just a safety check on sub scan_bare_identifier,
-                    # which should get this case correct.
-                    $type = 'w';
-                    next;
-                }
-
-                # a bare word immediately followed by :: is not a keyword;
-                # use $tok_kw when testing for keywords to avoid a mistake
-                my $tok_kw = $tok;
-                if (   $rtokens->[ $i + 1 ] eq ':'
-                    && $rtokens->[ $i + 2 ] eq ':' )
-                {
-                    $tok_kw .= '::';
-                }
-
-                # Decide if 'sub :' can be the start of a sub attribute list.
-                # We will decide based on if the colon is followed by a
-                # bareword which is not a keyword.
-                my $sub_attribute_ok_here;
-                if (   $is_sub{$tok_kw}
-                    && $expecting != OPERATOR
-                    && $next_nonblank_token eq ':' )
-                {
-                    my ( $nn_nonblank_token, $i_nn ) =
-                      find_next_nonblank_token( $i_next + 1,
-                        $rtokens, $max_token_index );
-                    $sub_attribute_ok_here =
-                         $nn_nonblank_token =~ /^\w/
-                      && $nn_nonblank_token !~ /^\d/
-                      && !$is_keyword{$nn_nonblank_token};
-                }
-
-                # handle operator x (now we know it isn't $x=)
-                if (   $expecting == OPERATOR
-                    && substr( $tok, 0, 1 ) eq 'x'
-                    && $tok =~ /^x\d*$/ )
-                {
-                    if ( $tok eq 'x' ) {
-
-                        if ( $rtokens->[ $i + 1 ] eq '=' ) {    # x=
-                            $tok  = 'x=';
-                            $type = $tok;
-                            $i++;
-                        }
-                        else {
-                            $type = 'x';
-                        }
-                    }
-
-                    # NOTE: mark something like x4 as an integer for now
-                    # It gets fixed downstream.  This is easier than
-                    # splitting the pretoken.
-                    else {
-                        $type = 'n';
-                    }
-                }
-                elsif ( $tok_kw eq 'CORE::' ) {
-                    $type = $tok = $tok_kw;
-                    $i += 2;
-                }
-                elsif ( ( $tok eq 'strict' )
-                    and ( $last_nonblank_token eq 'use' ) )
-                {
-                    $tokenizer_self->[_saw_use_strict_] = 1;
-                    scan_bare_identifier();
-                }
-
-                elsif ( ( $tok eq 'warnings' )
-                    and ( $last_nonblank_token eq 'use' ) )
-                {
-                    $tokenizer_self->[_saw_perl_dash_w_] = 1;
-
-                    # scan as identifier, so that we pick up something like:
-                    # use warnings::register
-                    scan_bare_identifier();
-                }
-
-                elsif (
-                       $tok eq 'AutoLoader'
-                    && $tokenizer_self->[_look_for_autoloader_]
-                    && (
-                        $last_nonblank_token eq 'use'
-
-                        # these regexes are from AutoSplit.pm, which we want
-                        # to mimic
-                        || $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
-                        || $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
-                    )
-                  )
-                {
-                    write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
-                    $tokenizer_self->[_saw_autoloader_]      = 1;
-                    $tokenizer_self->[_look_for_autoloader_] = 0;
-                    scan_bare_identifier();
-                }
-
-                elsif (
-                       $tok eq 'SelfLoader'
-                    && $tokenizer_self->[_look_for_selfloader_]
-                    && (   $last_nonblank_token eq 'use'
-                        || $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
-                        || $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
-                  )
-                {
-                    write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
-                    $tokenizer_self->[_saw_selfloader_]      = 1;
-                    $tokenizer_self->[_look_for_selfloader_] = 0;
-                    scan_bare_identifier();
-                }
-
-                elsif ( ( $tok eq 'constant' )
-                    and ( $last_nonblank_token eq 'use' ) )
-                {
-                    scan_bare_identifier();
-                    my ( $next_nonblank_token, $i_next ) =
-                      find_next_nonblank_token( $i, $rtokens,
-                        $max_token_index );
-
-                    if ($next_nonblank_token) {
-
-                        if ( $is_keyword{$next_nonblank_token} ) {
-
-                            # Assume qw is used as a quote and okay, as in:
-                            #  use constant qw{ DEBUG 0 };
-                            # Not worth trying to parse for just a warning
-
-                            # NOTE: This warning is deactivated because recent
-                            # versions of perl do not complain here, but
-                            # the coding is retained for reference.
-                            if ( 0 && $next_nonblank_token ne 'qw' ) {
-                                warning(
-"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
-                                );
-                            }
-                        }
-
-                        else {
-                            $is_constant{$current_package}{$next_nonblank_token}
-                              = 1;
-                        }
-                    }
-                }
-
-                # various quote operators
-                elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
-##NICOL PATCH
-                    if ( $expecting == OPERATOR ) {
-
-                        # Be careful not to call an error for a qw quote
-                        # where a parenthesized list is allowed.  For example,
-                        # it could also be a for/foreach construct such as
-                        #
-                        #    foreach my $key qw\Uno Due Tres Quadro\ {
-                        #        print "Set $key\n";
-                        #    }
-                        #
-
-                        # Or it could be a function call.
-                        # NOTE: Braces in something like &{ xxx } are not
-                        # marked as a block, we might have a method call.
-                        # &method(...), $method->(..), &{method}(...),
-                        # $ref[2](list) is ok & short for $ref[2]->(list)
-                        #
-                        # See notes in 'sub code_block_type' and
-                        # 'sub is_non_structural_brace'
-
-                        unless (
-                            $tok eq 'qw'
-                            && (   $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
-                                || $is_for_foreach{$want_paren} )
-                          )
-                        {
-                            error_if_expecting_OPERATOR();
-                        }
-                    }
-                    $in_quote                = $quote_items{$tok};
-                    $allowed_quote_modifiers = $quote_modifiers{$tok};
-
-                   # All quote types are 'Q' except possibly qw quotes.
-                   # qw quotes are special in that they may generally be trimmed
-                   # of leading and trailing whitespace.  So they are given a
-                   # separate type, 'q', unless requested otherwise.
-                    $type =
-                      ( $tok eq 'qw' && $tokenizer_self->[_trim_qw_] )
-                      ? 'q'
-                      : 'Q';
-                    $quote_type = $type;
-                }
-
-                # check for a statement label
-                elsif (
-                       ( $next_nonblank_token eq ':' )
-                    && ( $rtokens->[ $i_next + 1 ] ne ':' )
-                    && ( $i_next <= $max_token_index )   # colon on same line
-                    && !$sub_attribute_ok_here           # like 'sub : lvalue' ?
-                    && label_ok()
-                  )
-                {
-                    if ( $tok !~ /[A-Z]/ ) {
-                        push @{ $tokenizer_self->[_rlower_case_labels_at_] },
-                          $input_line_number;
-                    }
-                    $type = 'J';
-                    $tok .= ':';
-                    $i = $i_next;
-                    next;
-                }
-
-                #      'sub' or alias
-                elsif ( $is_sub{$tok_kw} ) {
-                    error_if_expecting_OPERATOR()
-                      if ( $expecting == OPERATOR );
-                    initialize_subname();
-                    scan_id();
-                }
-
-                #      'package'
-                elsif ( $is_package{$tok_kw} ) {
-                    error_if_expecting_OPERATOR()
-                      if ( $expecting == OPERATOR );
-                    scan_id();
-                }
-
-                # Fix for c035: split 'format' from 'is_format_END_DATA' to be
-                # more restrictive. Require a new statement to be ok here.
-                elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
-                    $type = ';';    # make tokenizer look for TERM next
-                    $tokenizer_self->[_in_format_] = 1;
-                    last;
-                }
-
-                # Note on token types for format, __DATA__, __END__:
-                # It simplifies things to give these type ';', so that when we
-                # start rescanning we will be expecting a token of type TERM.
-                # We will switch to type 'k' before outputting the tokens.
-                elsif ( $is_END_DATA{$tok_kw} ) {
-                    $type = ';';    # make tokenizer look for TERM next
-
-                    # Remember that we are in one of these three sections
-                    $tokenizer_self->[ $is_END_DATA{$tok_kw} ] = 1;
-                    last;
-                }
-
-                elsif ( $is_keyword{$tok_kw} ) {
-                    $type = 'k';
-
-                    # Since for and foreach may not be followed immediately
-                    # by an opening paren, we have to remember which keyword
-                    # is associated with the next '('
-                    if ( $is_for_foreach{$tok} ) {
-                        if ( new_statement_ok() ) {
-                            $want_paren = $tok;
-                        }
-                    }
-
-                    # recognize 'use' statements, which are special
-                    elsif ( $is_use_require{$tok} ) {
-                        $statement_type = $tok;
-                        error_if_expecting_OPERATOR()
-                          if ( $expecting == OPERATOR );
-                    }
-
-                    # remember my and our to check for trailing ": shared"
-                    elsif ( $is_my_our_state{$tok} ) {
-                        $statement_type = $tok;
-                    }
-
-                    # Check for misplaced 'elsif' and 'else', but allow isolated
-                    # else or elsif blocks to be formatted.  This is indicated
-                    # by a last noblank token of ';'
-                    elsif ( $tok eq 'elsif' ) {
-                        if (   $last_nonblank_token ne ';'
-                            && $last_nonblank_block_type !~
-                            /^(if|elsif|unless)$/ )
-                        {
-                            warning(
-"expecting '$tok' to follow one of 'if|elsif|unless'\n"
-                            );
-                        }
-                    }
-                    elsif ( $tok eq 'else' ) {
-
-                        # patched for SWITCH/CASE
-                        if (
-                               $last_nonblank_token ne ';'
-                            && $last_nonblank_block_type !~
-                            /^(if|elsif|unless|case|when)$/
-
-                            # patch to avoid an unwanted error message for
-                            # the case of a parenless 'case' (RT 105484):
-                            # switch ( 1 ) { case x { 2 } else { } }
-                            && $statement_type !~
-                            /^(if|elsif|unless|case|when)$/
-                          )
-                        {
-                            warning(
-"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
-                            );
-                        }
-                    }
-                    elsif ( $tok eq 'continue' ) {
-                        if (   $last_nonblank_token ne ';'
-                            && $last_nonblank_block_type !~
-                            /(^(\{|\}|;|while|until|for|foreach)|:$)/ )
-                        {
-
-                            # note: ';' '{' and '}' in list above
-                            # because continues can follow bare blocks;
-                            # ':' is labeled block
-                            #
-                            ############################################
-                            # NOTE: This check has been deactivated because
-                            # continue has an alternative usage for given/when
-                            # blocks in perl 5.10
-                            ## warning("'$tok' should follow a block\n");
-                            ############################################
-                        }
-                    }
+            if (   $can_start_digraph{$tok}
+                && $i < $max_token_index
+                && $is_digraph{ $tok . $rtokens->[ $i + 1 ] } )
+            {
  
-                    # patch for SWITCH/CASE if 'case' and 'when are
-                    # treated as keywords.  Also 'default' for Switch::Plain
-                    elsif ($tok eq 'when'
-                        || $tok eq 'case'
-                        || $tok eq 'default' )
-                    {
-                        $statement_type = $tok;    # next '{' is block
-                    }
+                my $combine_ok = 1;
+                my $test_tok   = $tok . $rtokens->[ $i + 1 ];
  
-                    #
-                    # indent trailing if/unless/while/until
-                    # outdenting will be handled by later indentation loop
-## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
-##$opt_o = 1
-##  if !(
-##             $opt_b
-##          || $opt_c
-##          || $opt_d
-##          || $opt_f
-##          || $opt_i
-##          || $opt_l
-##          || $opt_o
-##          || $opt_x
-##  );
-##                    if (   $tok =~ /^(if|unless|while|until)$/
-##                        && $next_nonblank_token ne '(' )
-##                    {
-##                        $indent_flag = 1;
-##                    }
-                }
+                # check for special cases which cannot be combined
  
-                # check for inline label following
-                #         /^(redo|last|next|goto)$/
-                elsif (( $last_nonblank_type eq 'k' )
-                    && ( $is_redo_last_next_goto{$last_nonblank_token} ) )
-                {
-                    $type = 'j';
-                    next;
-                }
+                # '//' must be defined_or operator if an operator is expected.
+                # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
+                # could be migrated here for clarity
  
-                # something else --
-                else {
+                # Patch for RT#102371, misparsing a // in the following snippet:
+                #     state $b //= ccc();
+                # The solution is to always accept the digraph (or trigraph)
+                # after type 'Z' (possible file handle).  The reason is that
+                # sub operator_expected gives TERM expected here, which is
+                # wrong in this case.
+                if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
  
-                    scan_bare_identifier();
+                    # note that here $tok = '/' and the next tok and type is '/'
+                    $expecting = operator_expected( [ $prev_type, $tok, '/' ] );
  
-                    if (   $statement_type eq 'use'
-                        && $last_nonblank_token eq 'use' )
-                    {
-                        $saw_use_module{$current_package}->{$tok} = 1;
-                    }
+                    # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
+                    $combine_ok = 0 if ( $expecting == TERM );
+                }
  
-                    if ( $type eq 'w' ) {
+                # Patch for RT #114359: Missparsing of "print $x ** 0.5;
+                # Accept the digraphs '**' only after type 'Z'
+                # Otherwise postpone the decision.
+                if ( $test_tok eq '**' ) {
+                    if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
+                }
  
-                        if ( $expecting == OPERATOR ) {
+                if (
  
-                            # Patch to avoid error message for RPerl overloaded
-                            # operator functions: use overload
-                            #    '+' => \&sse_add,
-                            #    '-' => \&sse_sub,
-                            #    '*' => \&sse_mul,
-                            #    '/' => \&sse_div;
-                            # FIXME: this should eventually be generalized
-                            if (   $saw_use_module{$current_package}->{'RPerl'}
-                                && $tok =~ /^sse_(mul|div|add|sub)$/ )
-                            {
+                    # still ok to combine?
+                    $combine_ok
  
-                            }
+                    && ( $test_tok ne '/=' )    # might be pattern
+                    && ( $test_tok ne 'x=' )    # might be $x
+                    && ( $test_tok ne '*=' )    # typeglob?
  
-                            # Fix part 1 for git #63 in which a comment falls
-                            # between an -> and the following word.  An
-                            # alternate fix would be to change operator_expected
-                            # to return an UNKNOWN for this type.
-                            elsif ( $last_nonblank_type eq '->' ) {
+                    # Moved above as part of fix for
+                    # RT #114359: Missparsing of "print $x ** 0.5;
+                    # && ( $test_tok ne '**' )    # typeglob?
+                  )
+                {
+                    $tok = $test_tok;
+                    $i++;
  
-                            }
+                    # Now try to assemble trigraphs.  Note that all possible
+                    # perl trigraphs can be constructed by appending a character
+                    # to a digraph.
+                    $test_tok = $tok . $rtokens->[ $i + 1 ];
  
-                            # don't complain about possible indirect object
-                            # notation.
-                            # For example:
-                            #   package main;
-                            #   sub new($) { ... }
-                            #   $b = new A::;  # calls A::new
-                            #   $c = new A;    # same thing but suspicious
-                            # This will call A::new but we have a 'new' in
-                            # main:: which looks like a constant.
-                            #
-                            elsif ( $last_nonblank_type eq 'C' ) {
-                                if ( $tok !~ /::$/ ) {
-                                    complain(<<EOM);
-Expecting operator after '$last_nonblank_token' but found bare word '$tok'
-       Maybe indirectet object notation?
-EOM
-                                }
-                            }
-                            else {
-                                error_if_expecting_OPERATOR("bareword");
-                            }
+                    if ( $is_trigraph{$test_tok} ) {
+                        $tok = $test_tok;
+                        $i++;
+                    }
+
+                    # The only current tetragraph is the double diamond operator
+                    # and its first three characters are not a trigraph, so
+                    # we do can do a special test for it
+                    elsif ( $test_tok eq '<<>' ) {
+                        $test_tok .= $rtokens->[ $i + 2 ];
+                        if ( $is_tetragraph{$test_tok} ) {
+                            $tok = $test_tok;
+                            $i += 2;
                          }
+                    }
+                }
+            }
  
-                        # mark bare words immediately followed by a paren as
-                        # functions
-                        $next_tok = $rtokens->[ $i + 1 ];
-                        if ( $next_tok eq '(' ) {
+            $type      = $tok;
+            $next_tok  = $rtokens->[ $i + 1 ];
+            $next_type = $rtoken_type->[ $i + 1 ];
  
-                            # Fix part 2 for git #63.  Leave type as 'w' to keep
-                            # the type the same as if the -> were not separated
-                            $type = 'U' unless ( $last_nonblank_type eq '->' );
-                        }
+            DEBUG_TOKENIZE && do {
+                local $LIST_SEPARATOR = ')(';
+                my @debug_list = (
+                    $last_nonblank_token,      $tok,
+                    $next_tok,                 $brace_depth,
+                    $brace_type[$brace_depth], $paren_depth,
+                    $paren_type[$paren_depth],
+                );
+                print STDOUT "TOKENIZE:(@debug_list)\n";
+            };
  
-                        # underscore after file test operator is file handle
-                        if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
-                            $type = 'Z';
-                        }
+            # Turn off attribute list on first non-blank, non-bareword.
+            # Added '#' to fix c038 (later moved above).
+            if ( $in_attribute_list && $pre_type ne 'w' ) {
+                $in_attribute_list = 0;
+            }
  
-                        # patch for SWITCH/CASE if 'case' and 'when are
-                        # not treated as keywords:
-                        if (
-                            (
-                                   $tok eq 'case'
-                                && $brace_type[$brace_depth] eq 'switch'
-                            )
-                            || (   $tok eq 'when'
-                                && $brace_type[$brace_depth] eq 'given' )
-                          )
-                        {
-                            $statement_type = $tok;    # next '{' is block
-                            $type           = 'k'; # for keyword syntax coloring
-                        }
+            #--------------------------------------------------------
+            # We have the next token, $tok.
+            # Now we have to examine this token and decide what it is
+            # and define its $type
+            #
+            # section 1: bare words
+            #--------------------------------------------------------
  
-                        # patch for SWITCH/CASE if switch and given not keywords
-                        # Switch is not a perl 5 keyword, but we will gamble
-                        # and mark switch followed by paren as a keyword.  This
-                        # is only necessary to get html syntax coloring nice,
-                        # and does not commit this as being a switch/case.
-                        if ( $next_nonblank_token eq '('
-                            && ( $tok eq 'switch' || $tok eq 'given' ) )
-                        {
-                            $type = 'k';    # for keyword syntax coloring
-                        }
-                    }
-                }
+            if ( $pre_type eq 'w' ) {
+                $expecting =
+                  operator_expected( [ $prev_type, $tok, $next_type ] );
+                my $is_last = do_BAREWORD($is_END_or_DATA);
+                last if ($is_last);
              }
  
-            ###############################################################
+            #-----------------------------
              # section 2: strings of digits
-            ###############################################################
+            #-----------------------------
              elsif ( $pre_type eq 'd' ) {
                  $expecting =
                    operator_expected( [ $prev_type, $tok, $next_type ] );
-                error_if_expecting_OPERATOR("Number")
-                  if ( $expecting == OPERATOR );
-
-                my $number = scan_number_fast();
-                if ( !defined($number) ) {
-
-                    # shouldn't happen - we should always get a number
-                    warning("non-number beginning with digit--program bug\n");
-                    report_definite_bug();
-                }
+                do_DIGITS();
              }
  
-            ###############################################################
+            #----------------------------
              # section 3: all other tokens
-            ###############################################################
-
+            #----------------------------
              else {
-                last if ( $tok eq '#' );
                  my $code = $tokenization_code->{$tok};
                  if ($code) {
                      $expecting =
@@ -4268,6 +4880,7 @@ EOM
          # end of main tokenization loop
          # -----------------------------
  
+        # Store the final token
          if ( $i_tok >= 0 ) {
              $routput_token_type->[$i_tok]     = $type;
              $routput_block_type->[$i_tok]     = $block_type;
@@ -4276,7 +4889,8 @@ EOM
              $routput_indent_flag->[$i_tok]    = $indent_flag;
          }
  
-        unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
+        # Remember last nonblank values
+        if ( $type ne 'b' && $type ne '#' ) {
              $last_last_nonblank_token          = $last_nonblank_token;
              $last_last_nonblank_type           = $last_nonblank_type;
              $last_last_nonblank_block_type     = $last_nonblank_block_type;
@@ -4299,539 +4913,455 @@ EOM
              }
          }
  
-        # all done tokenizing this line ...
-        # now prepare the final list of tokens and types
+        $self->[_in_attribute_list_] = $in_attribute_list;
+        $self->[_in_quote_]          = $in_quote;
+        $self->[_quote_target_] =
+          $in_quote ? matching_end_token($quote_character) : EMPTY_STRING;
+        $self->[_rhere_target_list_] = $rhere_target_list;
  
-        my @token_type     = ();   # stack of output token types
-        my @block_type     = ();   # stack of output code block types
-        my @container_type = ();   # stack of output code container types
-        my @type_sequence  = ();   # stack of output type sequence numbers
-        my @tokens         = ();   # output tokens
-        my @levels         = ();   # structural brace levels of output tokens
-        my @slevels        = ();   # secondary nesting levels of output tokens
-        my @nesting_tokens = ();   # string of tokens leading to this depth
-        my @nesting_types  = ();   # string of token types leading to this depth
-        my @nesting_blocks = ();   # string of block types leading to this depth
-        my @nesting_lists  = ();   # string of list types leading to this depth
-        my @ci_string = ();  # string needed to compute continuation indentation
-        my @container_environment = ();    # BLOCK or LIST
-        my $container_environment = '';
-        my $im                    = -1;    # previous $i value
-        my $num;
+        return;
+    } ## end sub tokenizer_main_loop
  
-        # Count the number of '1's in the string (previously sub ones_count)
-        my $ci_string_sum = ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
+    sub tokenizer_wrapup_line {
+        my ( $self, $line_of_tokens ) = @_;
  
-# Computing Token Indentation
-#
-#     The final section of the tokenizer forms tokens and also computes
-#     parameters needed to find indentation.  It is much easier to do it
-#     in the tokenizer than elsewhere.  Here is a brief description of how
-#     indentation is computed.  Perl::Tidy computes indentation as the sum
-#     of 2 terms:
-#
-#     (1) structural indentation, such as if/else/elsif blocks
-#     (2) continuation indentation, such as long parameter call lists.
-#
-#     These are occasionally called primary and secondary indentation.
-#
-#     Structural indentation is introduced by tokens of type '{', although
-#     the actual tokens might be '{', '(', or '['.  Structural indentation
-#     is of two types: BLOCK and non-BLOCK.  Default structural indentation
-#     is 4 characters if the standard indentation scheme is used.
-#
-#     Continuation indentation is introduced whenever a line at BLOCK level
-#     is broken before its termination.  Default continuation indentation
-#     is 2 characters in the standard indentation scheme.
-#
-#     Both types of indentation may be nested arbitrarily deep and
-#     interlaced.  The distinction between the two is somewhat arbitrary.
-#
-#     For each token, we will define two variables which would apply if
-#     the current statement were broken just before that token, so that
-#     that token started a new line:
-#
-#     $level = the structural indentation level,
-#     $ci_level = the continuation indentation level
-#
-#     The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
-#     assuming defaults.  However, in some special cases it is customary
-#     to modify $ci_level from this strict value.
-#
-#     The total structural indentation is easy to compute by adding and
-#     subtracting 1 from a saved value as types '{' and '}' are seen.  The
-#     running value of this variable is $level_in_tokenizer.
-#
-#     The total continuation is much more difficult to compute, and requires
-#     several variables.  These variables are:
-#
-#     $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
-#       each indentation level, if there are intervening open secondary
-#       structures just prior to that level.
-#     $continuation_string_in_tokenizer = a string of 1's and 0's indicating
-#       if the last token at that level is "continued", meaning that it
-#       is not the first token of an expression.
-#     $nesting_block_string = a string of 1's and 0's indicating, for each
-#       indentation level, if the level is of type BLOCK or not.
-#     $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
-#     $nesting_list_string = a string of 1's and 0's indicating, for each
-#       indentation level, if it is appropriate for list formatting.
-#       If so, continuation indentation is used to indent long list items.
-#     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
-#     @{$rslevel_stack} = a stack of total nesting depths at each
-#       structural indentation level, where "total nesting depth" means
-#       the nesting depth that would occur if every nesting token -- '{', '[',
-#       and '(' -- , regardless of context, is used to compute a nesting
-#       depth.
-
-        #my $nesting_block_flag = ($nesting_block_string =~ /1$/);
-        #my $nesting_list_flag = ($nesting_list_string =~ /1$/);
-
-        my ( $ci_string_i, $level_i, $nesting_block_string_i,
-            $nesting_list_string_i, $nesting_token_string_i,
-            $nesting_type_string_i, );
-
-        foreach my $i ( @{$routput_token_list} )
-        {    # scan the list of pre-tokens indexes
-
-            # self-checking for valid token types
-            my $type                    = $routput_token_type->[$i];
-            my $forced_indentation_flag = $routput_indent_flag->[$i];
-
-            # See if we should undo the $forced_indentation_flag.
-            # Forced indentation after 'if', 'unless', 'while' and 'until'
-            # expressions without trailing parens is optional and doesn't
-            # always look good.  It is usually okay for a trailing logical
-            # expression, but if the expression is a function call, code block,
-            # or some kind of list it puts in an unwanted extra indentation
-            # level which is hard to remove.
-            #
-            # Example where extra indentation looks ok:
-            # return 1
-            #   if $det_a < 0 and $det_b > 0
-            #       or $det_a > 0 and $det_b < 0;
-            #
-            # Example where extra indentation is not needed because
-            # the eval brace also provides indentation:
-            # print "not " if defined eval {
-            #     reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
-            # };
-            #
-            # The following rule works fairly well:
-            #   Undo the flag if the end of this line, or start of the next
-            #   line, is an opening container token or a comma.
-            # This almost always works, but if not after another pass it will
-            # be stable.
-            if ( $forced_indentation_flag && $type eq 'k' ) {
-                my $ixlast  = -1;
-                my $ilast   = $routput_token_list->[$ixlast];
-                my $toklast = $routput_token_type->[$ilast];
-                if ( $toklast eq '#' ) {
-                    $ixlast--;
-                    $ilast   = $routput_token_list->[$ixlast];
-                    $toklast = $routput_token_type->[$ilast];
-                }
-                if ( $toklast eq 'b' ) {
-                    $ixlast--;
-                    $ilast   = $routput_token_list->[$ixlast];
-                    $toklast = $routput_token_type->[$ilast];
-                }
-                if ( $toklast =~ /^[\{,]$/ ) {
-                    $forced_indentation_flag = 0;
-                }
-                else {
-                    ( $toklast, my $i_next ) =
-                      find_next_nonblank_token( $max_token_index, $rtokens,
-                        $max_token_index );
-                    if ( $toklast =~ /^[\{,]$/ ) {
-                        $forced_indentation_flag = 0;
-                    }
-                }
-            }
+        #---------------------------------------------------------
+        # Package a line of tokens for shipping back to the caller
+        #---------------------------------------------------------
  
-            # if we are already in an indented if, see if we should outdent
-            if ($indented_if_level) {
+        # Most of the remaining work involves defining the two indentation
+        # parameters that the formatter needs for each token:
+        # - $level    = structural indentation level and
+        # - $ci_level = continuation indentation level
  
-                # don't try to nest trailing if's - shouldn't happen
-                if ( $type eq 'k' ) {
-                    $forced_indentation_flag = 0;
-                }
+        # The method for setting the indentation level is straightforward.
+        # But the method used to define the continuation indentation is
+        # complicated because it has evolved over a long time by trial and
+        # error. It could undoubtedly be simplified but it works okay as is.
  
-                # check for the normal case - outdenting at next ';'
-                elsif ( $type eq ';' ) {
-                    if ( $level_in_tokenizer == $indented_if_level ) {
-                        $forced_indentation_flag = -1;
-                        $indented_if_level       = 0;
-                    }
-                }
+        # Here is a brief description of how indentation is computed.
+        # Perl::Tidy computes indentation as the sum of 2 terms:
+        #
+        # (1) structural indentation, such as if/else/elsif blocks
+        # (2) continuation indentation, such as long parameter call lists.
+        #
+        # These are occasionally called primary and secondary indentation.
+        #
+        # Structural indentation is introduced by tokens of type '{',
+        # although the actual tokens might be '{', '(', or '['.  Structural
+        # indentation is of two types: BLOCK and non-BLOCK.  Default
+        # structural indentation is 4 characters if the standard indentation
+        # scheme is used.
+        #
+        # Continuation indentation is introduced whenever a line at BLOCK
+        # level is broken before its termination.  Default continuation
+        # indentation is 2 characters in the standard indentation scheme.
+        #
+        # Both types of indentation may be nested arbitrarily deep and
+        # interlaced.  The distinction between the two is somewhat arbitrary.
+        #
+        # For each token, we will define two variables which would apply if
+        # the current statement were broken just before that token, so that
+        # that token started a new line:
+        #
+        # $level = the structural indentation level,
+        # $ci_level = the continuation indentation level
+        #
+        # The total indentation will be $level * (4 spaces) + $ci_level * (2
+        # spaces), assuming defaults.  However, in some special cases it is
+        # customary to modify $ci_level from this strict value.
+        #
+        # The total structural indentation is easy to compute by adding and
+        # subtracting 1 from a saved value as types '{' and '}' are seen.
+        # The running value of this variable is $level_in_tokenizer.
+        #
+        # The total continuation is much more difficult to compute, and
+        # requires several variables.  These variables are:
+        #
+        # $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
+        #   each indentation level, if there are intervening open secondary
+        #   structures just prior to that level.
+        # $continuation_string_in_tokenizer = a string of 1's and 0's
+        #   indicating if the last token at that level is "continued", meaning
+        #   that it is not the first token of an expression.
+        # $nesting_block_string = a string of 1's and 0's indicating, for each
+        #   indentation level, if the level is of type BLOCK or not.
+        # $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
+        # $nesting_list_string = a string of 1's and 0's indicating, for each
+        #   indentation level, if it is appropriate for list formatting.
+        #   If so, continuation indentation is used to indent long list items.
+        # $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
+        # @{$rslevel_stack} = a stack of total nesting depths at each
+        #   structural indentation level, where "total nesting depth" means
+        #   the nesting depth that would occur if every nesting token
+        #   -- '{', '[', #   and '(' -- , regardless of context, is used to
+        #   compute a nesting depth.
+
+        # Notes on the Continuation Indentation
+        #
+        # There is a sort of chicken-and-egg problem with continuation
+        # indentation.  The formatter can't make decisions on line breaks
+        # without knowing what 'ci' will be at arbitrary locations.
+        #
+        # But a problem with setting the continuation indentation (ci) here
+        # in the tokenizer is that we do not know where line breaks will
+        # actually be.  As a result, we don't know if we should propagate
+        # continuation indentation to higher levels of structure.
+        #
+        # For nesting of only structural indentation, we never need to do
+        # this.  For example, in a long if statement, like this
+        #
+        #   if ( !$output_block_type[$i]
+        #     && ($in_statement_continuation) )
+        #   {           <--outdented
+        #       do_something();
+        #   }
+        #
+        # the second line has ci but we do normally give the lines within
+        # the BLOCK any ci.  This would be true if we had blocks nested
+        # arbitrarily deeply.
+        #
+        # But consider something like this, where we have created a break
+        # after an opening paren on line 1, and the paren is not (currently)
+        # a structural indentation token:
+        #
+        # my $file = $menubar->Menubutton(
+        #   qw/-text File -underline 0 -menuitems/ => [
+        #       [
+        #           Cascade    => '~View',
+        #           -menuitems => [
+        #           ...
+        #
+        # The second line has ci, so it would seem reasonable to propagate
+        # it down, giving the third line 1 ci + 1 indentation.  This
+        # suggests the following rule, which is currently used to
+        # propagating ci down: if there are any non-structural opening
+        # parens (or brackets, or braces), before an opening structural
+        # brace, then ci is propagated down, and otherwise
+        # not.  The variable $intervening_secondary_structure contains this
+        # information for the current token, and the string
+        # "$ci_string_in_tokenizer" is a stack of previous values of this
+        # variable.
+
+        my @token_type    = ();    # stack of output token types
+        my @block_type    = ();    # stack of output code block types
+        my @type_sequence = ();    # stack of output type sequence numbers
+        my @tokens        = ();    # output tokens
+        my @levels        = ();    # structural brace levels of output tokens
+        my @ci_string = ();  # string needed to compute continuation indentation
  
-                # handle case of missing semicolon
-                elsif ( $type eq '}' ) {
-                    if ( $level_in_tokenizer == $indented_if_level ) {
-                        $indented_if_level = 0;
+        # Count the number of '1's in the string (previously sub ones_count)
+        my $ci_string_sum = ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
  
-                        # TBD: This could be a subroutine call
-                        $level_in_tokenizer--;
-                        if ( @{$rslevel_stack} > 1 ) {
-                            pop( @{$rslevel_stack} );
-                        }
-                        if ( length($nesting_block_string) > 1 )
-                        {    # true for valid script
-                            chop $nesting_block_string;
-                            chop $nesting_list_string;
-                        }
+        $line_of_tokens->{_nesting_tokens_0} = $nesting_token_string;
  
-                    }
-                }
-            }
+        my ( $ci_string_i, $level_i );
  
-            my $tok = $rtokens->[$i];  # the token, but ONLY if same as pretoken
+        #-----------------
+        # Loop over tokens
+        #-----------------
+        my $rtoken_map_im;
+        foreach my $i ( @{$routput_token_list} ) {
+
+            my $type_i = $routput_token_type->[$i];
              $level_i = $level_in_tokenizer;
  
-            # This can happen by running perltidy on non-scripts
-            # although it could also be bug introduced by programming change.
-            # Perl silently accepts a 032 (^Z) and takes it as the end
-            if ( !$is_valid_token_type{$type} ) {
-                my $val = ord($type);
-                warning(
-                    "unexpected character decimal $val ($type) in script\n");
-                $tokenizer_self->[_in_error_] = 1;
+            # Quick handling of indentation levels for blanks and comments
+            if ( $type_i eq 'b' || $type_i eq '#' ) {
+                $ci_string_i = $ci_string_sum + $in_statement_continuation;
              }
  
-            # ----------------------------------------------------------------
-            # TOKEN TYPE PATCHES
-            #  output __END__, __DATA__, and format as type 'k' instead of ';'
-            # to make html colors correct, etc.
-            my $fix_type = $type;
-            if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
-
-            # output anonymous 'sub' as keyword
-            if ( $type eq 't' && $is_sub{$tok} ) { $fix_type = 'k' }
-
-            # -----------------------------------------------------------------
-
-            $nesting_token_string_i = $nesting_token_string;
-            $nesting_type_string_i  = $nesting_type_string;
-            $nesting_block_string_i = $nesting_block_string;
-            $nesting_list_string_i  = $nesting_list_string;
-
-            # set primary indentation levels based on structural braces
-            # Note: these are set so that the leading braces have a HIGHER
-            # level than their CONTENTS, which is convenient for indentation
-            # Also, define continuation indentation for each token.
-            if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
-            {
+            # All other types
+            else {
  
-                # use environment before updating
-                $container_environment =
-                    $nesting_block_flag ? 'BLOCK'
-                  : $nesting_list_flag  ? 'LIST'
-                  :                       "";
-
-                # if the difference between total nesting levels is not 1,
-                # there are intervening non-structural nesting types between
-                # this '{' and the previous unclosed '{'
-                my $intervening_secondary_structure = 0;
-                if ( @{$rslevel_stack} ) {
-                    $intervening_secondary_structure =
-                      $slevel_in_tokenizer - $rslevel_stack->[-1];
-                }
+                # $tok_i is the PRE-token.  It only equals the token for symbols
+                my $tok_i = $rtokens->[$i];
  
-     # Continuation Indentation
-     #
-     # Having tried setting continuation indentation both in the formatter and
-     # in the tokenizer, I can say that setting it in the tokenizer is much,
-     # much easier.  The formatter already has too much to do, and can't
-     # make decisions on line breaks without knowing what 'ci' will be at
-     # arbitrary locations.
-     #
-     # But a problem with setting the continuation indentation (ci) here
-     # in the tokenizer is that we do not know where line breaks will actually
-     # be.  As a result, we don't know if we should propagate continuation
-     # indentation to higher levels of structure.
-     #
-     # For nesting of only structural indentation, we never need to do this.
-     # For example, in a long if statement, like this
-     #
-     #   if ( !$output_block_type[$i]
-     #     && ($in_statement_continuation) )
-     #   {           <--outdented
-     #       do_something();
-     #   }
-     #
-     # the second line has ci but we do normally give the lines within the BLOCK
-     # any ci.  This would be true if we had blocks nested arbitrarily deeply.
-     #
-     # But consider something like this, where we have created a break after
-     # an opening paren on line 1, and the paren is not (currently) a
-     # structural indentation token:
-     #
-     # my $file = $menubar->Menubutton(
-     #   qw/-text File -underline 0 -menuitems/ => [
-     #       [
-     #           Cascade    => '~View',
-     #           -menuitems => [
-     #           ...
-     #
-     # The second line has ci, so it would seem reasonable to propagate it
-     # down, giving the third line 1 ci + 1 indentation.  This suggests the
-     # following rule, which is currently used to propagating ci down: if there
-     # are any non-structural opening parens (or brackets, or braces), before
-     # an opening structural brace, then ci is propagated down, and otherwise
-     # not.  The variable $intervening_secondary_structure contains this
-     # information for the current token, and the string
-     # "$ci_string_in_tokenizer" is a stack of previous values of this
-     # variable.
-
-                # save the current states
-                push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
-                $level_in_tokenizer++;
-
-                if ( $level_in_tokenizer > $tokenizer_self->[_maximum_level_] )
+                # Check for an invalid token type..
+                # This can happen by running perltidy on non-scripts although
+                # it could also be bug introduced by programming change.  Perl
+                # silently accepts a 032 (^Z) and takes it as the end
+                if ( !$is_valid_token_type{$type_i} ) {
+                    my $val = ord($type_i);
+                    warning(
+"unexpected character decimal $val ($type_i) in script\n"
+                    );
+                    $self->[_in_error_] = 1;
+                }
+
+                # $ternary_indentation_flag indicates that we need a change
+                # in level at a nested ternary, as follows
+                #     1 => at a nested ternary ?
+                #    -1 => at a nested ternary :
+                #     0 => otherwise
+                my $ternary_indentation_flag = $routput_indent_flag->[$i];
+
+                #-------------------------------------------
+                # Section 1: handle a level-increasing token
+                #-------------------------------------------
+                # set primary indentation levels based on structural braces
+                # Note: these are set so that the leading braces have a HIGHER
+                # level than their CONTENTS, which is convenient for indentation
+                # Also, define continuation indentation for each token.
+                if (   $type_i eq '{'
+                    || $type_i eq 'L'
+                    || $ternary_indentation_flag > 0 )
                  {
-                    $tokenizer_self->[_maximum_level_] = $level_in_tokenizer;
-                }
-
-                if ($forced_indentation_flag) {
  
-                    # break BEFORE '?' when there is forced indentation
-                    if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
-                    if ( $type eq 'k' ) {
-                        $indented_if_level = $level_in_tokenizer;
+                    # if the difference between total nesting levels is not 1,
+                    # there are intervening non-structural nesting types between
+                    # this '{' and the previous unclosed '{'
+                    my $intervening_secondary_structure = 0;
+                    if ( @{$rslevel_stack} ) {
+                        $intervening_secondary_structure =
+                          $slevel_in_tokenizer - $rslevel_stack->[-1];
                      }
  
-                    # do not change container environment here if we are not
-                    # at a real list. Adding this check prevents "blinkers"
-                    # often near 'unless" clauses, such as in the following
-                    # code:
-##          next
-##            unless -e (
-##                    $archive =
-##                      File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
-##            );
-
-                    $nesting_block_string .= "$nesting_block_flag";
-                }
-                else {
+                    # save the current states
+                    push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
+                    $level_in_tokenizer++;
  
-                    if ( $routput_block_type->[$i] ) {
-                        $nesting_block_flag = 1;
-                        $nesting_block_string .= '1';
+                    if ( $level_in_tokenizer > $self->[_maximum_level_] ) {
+                        $self->[_maximum_level_] = $level_in_tokenizer;
                      }
+
+                    if ($ternary_indentation_flag) {
+
+                        # break BEFORE '?' in a nested ternary
+                        if ( $type_i eq '?' ) {
+                            $level_i = $level_in_tokenizer;
+                        }
+
+                        $nesting_block_string .= "$nesting_block_flag";
+                    } ## end if ($ternary_indentation_flag)
                      else {
-                        $nesting_block_flag = 0;
-                        $nesting_block_string .= '0';
+
+                        if ( $routput_block_type->[$i] ) {
+                            $nesting_block_flag = 1;
+                            $nesting_block_string .= '1';
+                        }
+                        else {
+                            $nesting_block_flag = 0;
+                            $nesting_block_string .= '0';
+                        }
                      }
-                }
  
-                # we will use continuation indentation within containers
-                # which are not blocks and not logical expressions
-                my $bit = 0;
-                if ( !$routput_block_type->[$i] ) {
+                    # we will use continuation indentation within containers
+                    # which are not blocks and not logical expressions
+                    my $bit = 0;
+                    if ( !$routput_block_type->[$i] ) {
  
-                    # propagate flag down at nested open parens
-                    if ( $routput_container_type->[$i] eq '(' ) {
-                        $bit = 1 if $nesting_list_flag;
-                    }
+                        # propagate flag down at nested open parens
+                        if ( $routput_container_type->[$i] eq '(' ) {
+                            $bit = 1 if $nesting_list_flag;
+                        }
  
                    # use list continuation if not a logical grouping
                    # /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
-                    else {
-                        $bit = 1
-                          unless
-                          $is_logical_container{ $routput_container_type->[$i]
-                          };
+                        else {
+                            $bit = 1
+                              unless
+                              $is_logical_container{ $routput_container_type
+                                  ->[$i] };
+                        }
                      }
-                }
-                $nesting_list_string .= $bit;
-                $nesting_list_flag = $bit;
-
-                $ci_string_in_tokenizer .=
-                  ( $intervening_secondary_structure != 0 ) ? '1' : '0';
-                $ci_string_sum =
-                  ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
-                $continuation_string_in_tokenizer .=
-                  ( $in_statement_continuation > 0 ) ? '1' : '0';
-
-   #  Sometimes we want to give an opening brace continuation indentation,
-   #  and sometimes not.  For code blocks, we don't do it, so that the leading
-   #  '{' gets outdented, like this:
-   #
-   #   if ( !$output_block_type[$i]
-   #     && ($in_statement_continuation) )
-   #   {           <--outdented
-   #
-   #  For other types, we will give them continuation indentation.  For example,
-   #  here is how a list looks with the opening paren indented:
-   #
-   #     @LoL =
-   #       ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
-   #         [ "homer", "marge", "bart" ], );
-   #
-   #  This looks best when 'ci' is one-half of the indentation  (i.e., 2 and 4)
-
-                my $total_ci = $ci_string_sum;
-                if (
-                    !$routput_block_type->[$i]    # patch: skip for BLOCK
-                    && ($in_statement_continuation)
-                    && !( $forced_indentation_flag && $type eq ':' )
-                  )
-                {
-                    $total_ci += $in_statement_continuation
-                      unless ( substr( $ci_string_in_tokenizer, -1 ) eq '1' );
-                }
+                    $nesting_list_string .= $bit;
+                    $nesting_list_flag = $bit;
  
-                $ci_string_i               = $total_ci;
-                $in_statement_continuation = 0;
-            }
+                    $ci_string_in_tokenizer .=
+                      ( $intervening_secondary_structure != 0 ) ? '1' : '0';
+                    $ci_string_sum =
+                      ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
+                    $continuation_string_in_tokenizer .=
+                      ( $in_statement_continuation > 0 ) ? '1' : '0';
  
-            elsif ($type eq '}'
-                || $type eq 'R'
-                || $forced_indentation_flag < 0 )
-            {
+                    #  Sometimes we want to give an opening brace
+                    #  continuation indentation, and sometimes not.  For code
+                    #  blocks, we don't do it, so that the leading '{' gets
+                    #  outdented, like this:
+                    #
+                    #   if ( !$output_block_type[$i]
+                    #     && ($in_statement_continuation) )
+                    #   {           <--outdented
+                    #
+                    #  For other types, we will give them continuation
+                    #  indentation.  For example, here is how a list looks
+                    #  with the opening paren indented:
+                    #
+                    #  @LoL =
+                    #    ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
+                    #      [ "homer", "marge", "bart" ], );
+                    #
+                    #  This looks best when 'ci' is one-half of the
+                    #  indentation  (i.e., 2 and 4)
+
+                    my $total_ci = $ci_string_sum;
+                    if (
+                        !$routput_block_type->[$i]    # patch: skip for BLOCK
+                        && ($in_statement_continuation)
+                        && !( $ternary_indentation_flag && $type_i eq ':' )
+                      )
+                    {
+                        $total_ci += $in_statement_continuation
+                          unless (
+                            substr( $ci_string_in_tokenizer, -1 ) eq '1' );
+                    }
  
-                # only a nesting error in the script would prevent popping here
-                if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
+                    $ci_string_i               = $total_ci;
+                    $in_statement_continuation = 0;
+                } ## end if ( $type_i eq '{' ||...})
  
-                $level_i = --$level_in_tokenizer;
+                #-------------------------------------------
+                # Section 2: handle a level-decreasing token
+                #-------------------------------------------
+                elsif ($type_i eq '}'
+                    || $type_i eq 'R'
+                    || $ternary_indentation_flag < 0 )
+                {
  
-                # restore previous level values
-                if ( length($nesting_block_string) > 1 )
-                {    # true for valid script
-                    chop $nesting_block_string;
-                    $nesting_block_flag =
-                      substr( $nesting_block_string, -1 ) eq '1';
-                    chop $nesting_list_string;
-                    $nesting_list_flag =
-                      substr( $nesting_list_string, -1 ) eq '1';
+                    # only a nesting error in the script would prevent
+                    # popping here
+                    if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
  
-                    chop $ci_string_in_tokenizer;
-                    $ci_string_sum =
-                      ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
+                    $level_i = --$level_in_tokenizer;
  
-                    $in_statement_continuation =
-                      chop $continuation_string_in_tokenizer;
+                    if ( $level_in_tokenizer < 0 ) {
+                        unless ( $self->[_saw_negative_indentation_] ) {
+                            $self->[_saw_negative_indentation_] = 1;
+                            warning("Starting negative indentation\n");
+                        }
+                    }
  
-                    # zero continuation flag at terminal BLOCK '}' which
-                    # ends a statement.
-                    if ( $routput_block_type->[$i] ) {
+                    # restore previous level values
+                    if ( length($nesting_block_string) > 1 )
+                    {    # true for valid script
+                        chop $nesting_block_string;
+                        $nesting_block_flag =
+                          substr( $nesting_block_string, -1 ) eq '1';
+                        chop $nesting_list_string;
+                        $nesting_list_flag =
+                          substr( $nesting_list_string, -1 ) eq '1';
+
+                        chop $ci_string_in_tokenizer;
+                        $ci_string_sum =
+                          ( my $str = $ci_string_in_tokenizer ) =~ tr/1/0/;
+
+                        $in_statement_continuation =
+                          chop $continuation_string_in_tokenizer;
+
+                        # zero continuation flag at terminal BLOCK '}' which
+                        # ends a statement.
+                        my $block_type_i = $routput_block_type->[$i];
+                        if ($block_type_i) {
+
+                            # ...These include non-anonymous subs
+                            # note: could be sub ::abc { or sub 'abc
+                            if ( substr( $block_type_i, 0, 3 ) eq 'sub'
+                                && $block_type_i =~ m/^sub\s*/gc )
+                            {
  
-                        # ...These include non-anonymous subs
-                        # note: could be sub ::abc { or sub 'abc
-                        if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
+                                # note: older versions of perl require the /gc
+                                # modifier here or else the \G does not work.
+                                $in_statement_continuation = 0
+                                  if ( $block_type_i =~ /\G('|::|\w)/gc );
+                            }
  
-                         # note: older versions of perl require the /gc modifier
-                         # here or else the \G does not work.
-                            if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
+                            # ...and include all block types except user subs
+                            # with block prototypes and these:
+                            # (sort|grep|map|do|eval)
+                            elsif (
+                                $is_zero_continuation_block_type{$block_type_i}
+                              )
                              {
                                  $in_statement_continuation = 0;
                              }
-                        }
  
-# ...and include all block types except user subs with
-# block prototypes and these: (sort|grep|map|do|eval)
-# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
-                        elsif (
-                            $is_zero_continuation_block_type{
-                                $routput_block_type->[$i]
+                            # ..but these are not terminal types:
+                            #     /^(sort|grep|map|do|eval)$/ )
+                            elsif ($is_sort_map_grep_eval_do{$block_type_i}
+                                || $is_grep_alias{$block_type_i} )
+                            {
                              }
-                          )
-                        {
-                            $in_statement_continuation = 0;
-                        }
  
-                        # ..but these are not terminal types:
-                        #     /^(sort|grep|map|do|eval)$/ )
-                        elsif (
-                            $is_not_zero_continuation_block_type{
-                                $routput_block_type->[$i]
+                            # ..and a block introduced by a label
+                            # /^\w+\s*:$/gc ) {
+                            elsif ( $block_type_i =~ /:$/ ) {
+                                $in_statement_continuation = 0;
                              }
-                          )
-                        {
-                        }
-
-                        # ..and a block introduced by a label
-                        # /^\w+\s*:$/gc ) {
-                        elsif ( $routput_block_type->[$i] =~ /:$/ ) {
-                            $in_statement_continuation = 0;
-                        }
  
-                        # user function with block prototype
-                        else {
-                            $in_statement_continuation = 0;
+                            # user function with block prototype
+                            else {
+                                $in_statement_continuation = 0;
+                            }
+                        } ## end if ($block_type_i)
+
+                        # If we are in a list, then
+                        # we must set continuation indentation at the closing
+                        # paren of something like this (paren after $check):
+                        #     assert(
+                        #         __LINE__,
+                        #         ( not defined $check )
+                        #           or ref $check
+                        #           or $check eq "new"
+                        #           or $check eq "old",
+                        #     );
+                        elsif ( $tok_i eq ')' ) {
+                            $in_statement_continuation = 1
+                              if (
+                                $is_list_end_type{
+                                    $routput_container_type->[$i]
+                                }
+                              );
+                            ##if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
                          }
-                    }
+                    } ## end if ( length($nesting_block_string...))
  
-                    # If we are in a list, then
-                    # we must set continuation indentation at the closing
-                    # paren of something like this (paren after $check):
-                    #     assert(
-                    #         __LINE__,
-                    #         ( not defined $check )
-                    #           or ref $check
-                    #           or $check eq "new"
-                    #           or $check eq "old",
-                    #     );
-                    elsif ( $tok eq ')' ) {
-                        $in_statement_continuation = 1
-                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
-                    }
-
-                    elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
-                }
-
-                # use environment after updating
-                $container_environment =
-                    $nesting_block_flag ? 'BLOCK'
-                  : $nesting_list_flag  ? 'LIST'
-                  :                       "";
-                $ci_string_i = $ci_string_sum + $in_statement_continuation;
-                $nesting_block_string_i = $nesting_block_string;
-                $nesting_list_string_i  = $nesting_list_string;
-            }
+                    $ci_string_i = $ci_string_sum + $in_statement_continuation;
+                } ## end elsif ( $type_i eq '}' ||...{)
  
-            # not a structural indentation type..
-            else {
+                #-----------------------------------------
+                # Section 3: handle a constant level token
+                #-----------------------------------------
+                else {
  
-                $container_environment =
-                    $nesting_block_flag ? 'BLOCK'
-                  : $nesting_list_flag  ? 'LIST'
-                  :                       "";
-
-                # zero the continuation indentation at certain tokens so
-                # that they will be at the same level as its container.  For
-                # commas, this simplifies the -lp indentation logic, which
-                # counts commas.  For ?: it makes them stand out.
-                if ($nesting_list_flag) {
-                    ##      $type =~ /^[,\?\:]$/
-                    if ( $is_comma_question_colon{$type} ) {
+                    # zero the continuation indentation at certain tokens so
+                    # that they will be at the same level as its container.  For
+                    # commas, this simplifies the -lp indentation logic, which
+                    # counts commas.  For ?: it makes them stand out.
+                    if (
+                        $nesting_list_flag
+                        ##      $type_i =~ /^[,\?\:]$/
+                        && $is_comma_question_colon{$type_i}
+                      )
+                    {
                          $in_statement_continuation = 0;
                      }
-                }
  
-                # be sure binary operators get continuation indentation
-                if (
-                    $container_environment
-                    && (   $type eq 'k' && $is_binary_keyword{$tok}
-                        || $is_binary_type{$type} )
-                  )
-                {
-                    $in_statement_continuation = 1;
-                }
+                    # Be sure binary operators get continuation indentation.
+                    # Note: the check on $nesting_block_flag is only needed
+                    # to add ci to binary operators following a 'try' block,
+                    # or similar extended syntax block operator (see c158).
+                    if (
+                           !$in_statement_continuation
+                        && ( $nesting_block_flag || $nesting_list_flag )
+                        && (   $type_i eq 'k' && $is_binary_keyword{$tok_i}
+                            || $is_binary_type{$type_i} )
+                      )
+                    {
+                        $in_statement_continuation = 1;
+                    }
  
-                # continuation indentation is sum of any open ci from previous
-                # levels plus the current level
-                $ci_string_i = $ci_string_sum + $in_statement_continuation;
+                    # continuation indentation is sum of any open ci from
+                    # previous levels plus the current level
+                    $ci_string_i = $ci_string_sum + $in_statement_continuation;
  
-                # update continuation flag ...
-                # if this isn't a blank or comment..
-                if ( $type ne 'b' && $type ne '#' ) {
+                    # update continuation flag ...
  
-                    # and we are in a BLOCK
+                    # if we are in a BLOCK
                      if ($nesting_block_flag) {
  
                          # the next token after a ';' and label starts a new stmt
-                        if ( $type eq ';' || $type eq 'J' ) {
+                        if ( $type_i eq ';' || $type_i eq 'J' ) {
                              $in_statement_continuation = 0;
                          }
  
@@ -4858,7 +5388,7 @@ EOM
                          # as a non block, to simplify formatting. But these
                          # are actually blocks and can have semicolons.
                          # See code_block_type() and is_non_structural_brace().
-                        elsif ( $type eq ',' || $type eq ';' ) {
+                        elsif ( $type_i eq ',' || $type_i eq ';' ) {
                              $in_statement_continuation = 0;
                          }
  
@@ -4866,98 +5396,123 @@ EOM
                          else {
                              $in_statement_continuation = 1;
                          }
-                    }
+                    } ## end else [ if ($nesting_block_flag)]
+
+                } ## end else [ if ( $type_i eq '{' ||...})]
+
+                #-------------------------------------------
+                # Section 4: operations common to all levels
+                #-------------------------------------------
+
+                # set secondary nesting levels based on all containment token
+                # types Note: these are set so that the nesting depth is the
+                # depth of the PREVIOUS TOKEN, which is convenient for setting
+                # the strength of token bonds
+
+                #    /^[L\{\(\[]$/
+                if ( $is_opening_type{$type_i} ) {
+                    $slevel_in_tokenizer++;
+                    $nesting_token_string .= $tok_i;
+                    $nesting_type_string  .= $type_i;
                  }
-            }
  
-            if ( $level_in_tokenizer < 0 ) {
-                unless ( $tokenizer_self->[_saw_negative_indentation_] ) {
-                    $tokenizer_self->[_saw_negative_indentation_] = 1;
-                    warning("Starting negative indentation\n");
+                #       /^[R\}\)\]]$/
+                elsif ( $is_closing_type{$type_i} ) {
+                    $slevel_in_tokenizer--;
+                    my $char = chop $nesting_token_string;
+
+                    if ( $char ne $matching_start_token{$tok_i} ) {
+                        $nesting_token_string .= $char . $tok_i;
+                        $nesting_type_string  .= $type_i;
+                    }
+                    else {
+                        chop $nesting_type_string;
+                    }
                  }
-            }
  
-            # set secondary nesting levels based on all containment token types
-            # Note: these are set so that the nesting depth is the depth
-            # of the PREVIOUS TOKEN, which is convenient for setting
-            # the strength of token bonds
-            my $slevel_i = $slevel_in_tokenizer;
+                # apply token type patch:
+                # - output anonymous 'sub' as keyword (type 'k')
+                # - output __END__, __DATA__, and format as type 'k' instead
+                #   of ';' to make html colors correct, etc.
+                # The following hash tests are equivalent to these older tests:
+                #   if ( $type_i eq 't' && $is_sub{$tok_i} ) { $fix_type = 'k' }
+                #   if ( $type_i eq ';' && $tok_i =~ /\w/ ) { $fix_type = 'k' }
+                if (   $is_END_DATA_format_sub{$tok_i}
+                    && $is_semicolon_or_t{$type_i} )
+                {
+                    $type_i = 'k';
+                }
+            } ## end else [ if ( $type_i eq 'b' ||...)]
  
-            #    /^[L\{\(\[]$/
-            if ( $is_opening_type{$type} ) {
-                $slevel_in_tokenizer++;
-                $nesting_token_string .= $tok;
-                $nesting_type_string  .= $type;
-            }
+            #--------------------------------
+            # Store the values for this token
+            #--------------------------------
+            push( @ci_string,     $ci_string_i );
+            push( @levels,        $level_i );
+            push( @block_type,    $routput_block_type->[$i] );
+            push( @type_sequence, $routput_type_sequence->[$i] );
+            push( @token_type,    $type_i );
  
-            #       /^[R\}\)\]]$/
-            elsif ( $is_closing_type{$type} ) {
-                $slevel_in_tokenizer--;
-                my $char = chop $nesting_token_string;
+            # Form and store the PREVIOUS token
+            if ( defined($rtoken_map_im) ) {
+                my $numc =
+                  $rtoken_map->[$i] - $rtoken_map_im;    # how many characters
  
-                if ( $char ne $matching_start_token{$tok} ) {
-                    $nesting_token_string .= $char . $tok;
-                    $nesting_type_string  .= $type;
+                if ( $numc > 0 ) {
+                    push( @tokens,
+                        substr( $input_line, $rtoken_map_im, $numc ) );
                  }
                  else {
-                    chop $nesting_type_string;
+
+                    # Should not happen unless @{$rtoken_map} is corrupted
+                    DEVEL_MODE
+                      && Fault(
+                        "number of characters is '$numc' but should be >0\n");
                  }
              }
  
-            push( @block_type,            $routput_block_type->[$i] );
-            push( @ci_string,             $ci_string_i );
-            push( @container_environment, $container_environment );
-            push( @container_type,        $routput_container_type->[$i] );
-            push( @levels,                $level_i );
-            push( @nesting_tokens,        $nesting_token_string_i );
-            push( @nesting_types,         $nesting_type_string_i );
-            push( @slevels,               $slevel_i );
-            push( @token_type,            $fix_type );
-            push( @type_sequence,         $routput_type_sequence->[$i] );
-            push( @nesting_blocks,        $nesting_block_string );
-            push( @nesting_lists,         $nesting_list_string );
-
-            # now form the previous token
-            if ( $im >= 0 ) {
-                $num =
-                  $rtoken_map->[$i] - $rtoken_map->[$im];  # how many characters
-
-                if ( $num > 0 ) {
-                    push( @tokens,
-                        substr( $input_line, $rtoken_map->[$im], $num ) );
-                }
+            # or grab some values for the leading token (needed for log output)
+            else {
+                $line_of_tokens->{_nesting_blocks_0} = $nesting_block_string;
              }
-            $im = $i;
-        }
  
-        $num = length($input_line) - $rtoken_map->[$im];   # make the last token
-        if ( $num > 0 ) {
-            push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) );
-        }
+            $rtoken_map_im = $rtoken_map->[$i];
+        } ## end foreach my $i ( @{$routput_token_list...})
+
+        #------------------------
+        # End loop to over tokens
+        #------------------------
+
+        # Form and store the final token of this line
+        if ( defined($rtoken_map_im) ) {
+            my $numc = length($input_line) - $rtoken_map_im;
+            if ( $numc > 0 ) {
+                push( @tokens, substr( $input_line, $rtoken_map_im, $numc ) );
+            }
+            else {
  
-        $tokenizer_self->[_in_attribute_list_] = $in_attribute_list;
-        $tokenizer_self->[_in_quote_]          = $in_quote;
-        $tokenizer_self->[_quote_target_] =
-          $in_quote ? matching_end_token($quote_character) : "";
-        $tokenizer_self->[_rhere_target_list_] = $rhere_target_list;
+                # Should not happen unless @{$rtoken_map} is corrupted
+                DEVEL_MODE
+                  && Fault(
+                    "Number of Characters is '$numc' but should be >0\n");
+            }
+        }
  
-        $line_of_tokens->{_rtoken_type}            = \@token_type;
-        $line_of_tokens->{_rtokens}                = \@tokens;
-        $line_of_tokens->{_rblock_type}            = \@block_type;
-        $line_of_tokens->{_rcontainer_type}        = \@container_type;
-        $line_of_tokens->{_rcontainer_environment} = \@container_environment;
-        $line_of_tokens->{_rtype_sequence}         = \@type_sequence;
-        $line_of_tokens->{_rlevels}                = \@levels;
-        $line_of_tokens->{_rslevels}               = \@slevels;
-        $line_of_tokens->{_rnesting_tokens}        = \@nesting_tokens;
-        $line_of_tokens->{_rci_levels}             = \@ci_string;
-        $line_of_tokens->{_rnesting_blocks}        = \@nesting_blocks;
+        #----------------------------------------------------------
+        # Wrap up this line of tokens for shipping to the Formatter
+        #----------------------------------------------------------
+        $line_of_tokens->{_rtoken_type}    = \@token_type;
+        $line_of_tokens->{_rtokens}        = \@tokens;
+        $line_of_tokens->{_rblock_type}    = \@block_type;
+        $line_of_tokens->{_rtype_sequence} = \@type_sequence;
+        $line_of_tokens->{_rlevels}        = \@levels;
+        $line_of_tokens->{_rci_levels}     = \@ci_string;
  
          return;
-    }
-}    # end tokenize_this_line
+    } ## end sub tokenizer_wrapup_line
+} ## end tokenize_this_line
  
-#########i#############################################################
+#######################################################################
  # Tokenizer routines which assist in identifying token types
  #######################################################################
  
@@ -4967,6 +5522,10 @@ my %op_expected_table;
  # exceptions to perl's weird parsing rules after type 'Z'
  my %is_weird_parsing_rule_exception;
  
+my %is_paren_dollar;
+
+my %is_n_v;
+
  BEGIN {
  
      # Always expecting TERM following these types:
@@ -4978,12 +5537,13 @@ BEGIN {
        ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
      );
      push @q, ',';
-    push @q, '(';    # for completeness, not currently a token type
+    push @q, '(';     # for completeness, not currently a token type
+    push @q, '->';    # was previously in UNKNOWN
      @{op_expected_table}{@q} = (TERM) x scalar(@q);
  
-    # Always UNKNOWN following these types:
-    # Fix for c030: added '->' to this list
-    @q = qw( w -> );
+    # Always UNKNOWN following these types;
+    # previously had '->' in this list for c030
+    @q = qw( w );
      @{op_expected_table}{@q} = (UNKNOWN) x scalar(@q);
  
      # Always expecting OPERATOR ...
@@ -4997,7 +5557,13 @@ BEGIN {
  
      # Fix for git #62: added '*' and '%'
      @q = qw( < ? * % );
-    @{is_weird_parsing_rule_exception}{@q} = (OPERATOR) x scalar(@q);
+    @{is_weird_parsing_rule_exception}{@q} = (1) x scalar(@q);
+
+    @q = qw<) $>;
+    @{is_paren_dollar}{@q} = (1) x scalar(@q);
+
+    @q = qw( n v );
+    @{is_n_v}{@q} = (1) x scalar(@q);
  
  }
  
@@ -5054,40 +5620,39 @@ sub operator_expected {
  
      my ($rarg) = @_;
  
-    my $msg = "";
-
-    ##############
+    #-------------
      # Table lookup
-    ##############
+    #-------------
  
      # Many types are can be obtained by a table lookup given the previous type.
      # This typically handles half or more of the calls.
      my $op_expected = $op_expected_table{$last_nonblank_type};
      if ( defined($op_expected) ) {
-        $msg = "Table lookup";
-        goto RETURN;
+        DEBUG_OPERATOR_EXPECTED
+          && print STDOUT
+"OPERATOR_EXPECTED: Table Lookup; returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
+        return $op_expected;
      }
  
-    ######################
+    #---------------------
      # Handle special cases
-    ######################
+    #---------------------
  
      $op_expected = UNKNOWN;
      my ( $prev_type, $tok, $next_type ) = @{$rarg};
  
      # Types 'k', '}' and 'Z' depend on context
-    # FIXME: Types 'i', 'n', 'v', 'q' currently also temporarily depend on
-    # context but that dependence could eventually be eliminated with better
-    # token type definition
+    # Types 'i', 'n', 'v', 'q' currently also temporarily depend on context.
  
      # identifier...
      if ( $last_nonblank_type eq 'i' ) {
          $op_expected = OPERATOR;
  
-        # FIXME: it would be cleaner to make this a special type
-        # expecting VERSION or {} after package NAMESPACE
-        # TODO: maybe mark these words as type 'Y'?
-        if (   $statement_type =~ /^package\b/
+        # TODO: it would be cleaner to make this a special type
+        # expecting VERSION or {} after package NAMESPACE;
+        # maybe mark these words as type 'Y'?
+        if (   substr( $last_nonblank_token, 0, 7 ) eq 'package'
+            && $statement_type      =~ /^package\b/
              && $last_nonblank_token =~ /^package\b/ )
          {
              $op_expected = TERM;
@@ -5116,7 +5681,7 @@ sub operator_expected {
                  $op_expected = OPERATOR;
              }
  
-            # Patch to allow a ? following 'split' to be a depricated pattern
+            # Patch to allow a ? following 'split' to be a deprecated pattern
              # delimiter.  This patch is coordinated with the omission of split
              # from the list
              # %is_keyword_rejecting_question_as_pattern_delimiter. This patch
@@ -5150,10 +5715,12 @@ sub operator_expected {
              $op_expected = OPERATOR;    # block mode following }
          }
  
-        elsif ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) {
+        #       $last_nonblank_token =~ /^(\)|\$|\-\>)/
+        elsif ( $is_paren_dollar{ substr( $last_nonblank_token, 0, 1 ) }
+            || substr( $last_nonblank_token, 0, 2 ) eq '->' )
+        {
              $op_expected = OPERATOR;
              if ( $last_nonblank_token eq '$' ) { $op_expected = UNKNOWN }
-
          }
  
          # Check for smartmatch operator before preceding brace or square
@@ -5202,7 +5769,8 @@ sub operator_expected {
      #     use Module VERSION LIST
      # We could avoid this exception by writing a special sub to parse 'use'
      # statements and perhaps mark these numbers with a new type V (for VERSION)
-    elsif ( $last_nonblank_type =~ /^[nv]$/ ) {
+    ##elsif ( $last_nonblank_type =~ /^[nv]$/ ) {
+    elsif ( $is_n_v{$last_nonblank_type} ) {
          $op_expected = OPERATOR;
          if ( $statement_type eq 'use' ) {
              $op_expected = UNKNOWN;
@@ -5210,13 +5778,11 @@ sub operator_expected {
      }
  
      # quote...
-    # FIXME: labeled prototype words should probably be given type 'A' or maybe
-    # 'J'; not 'q'; or maybe mark as type 'Y'
+    # TODO: labeled prototype words would better be given type 'A' or maybe
+    # 'J'; not 'q'; or maybe mark as type 'Y'?
      elsif ( $last_nonblank_type eq 'q' ) {
          $op_expected = OPERATOR;
-        if ( $last_nonblank_token eq 'prototype' )
-          ##|| $last_nonblank_token eq 'switch' )
-        {
+        if ( $last_nonblank_token eq 'prototype' ) {
              $op_expected = TERM;
          }
      }
@@ -5231,6 +5797,14 @@ sub operator_expected {
              $op_expected = UNKNOWN;
          }
  
+        # Exception to weird parsing rules for 'x(' ... see case b1205:
+        # In something like 'print $vv x(...' the x is an operator;
+        # Likewise in 'print $vv x$ww' the x is an operator (case b1207)
+        # otherwise x follows the weird parsing rules.
+        elsif ( $tok eq 'x' && $next_type =~ /^[\(\$\@\%]$/ ) {
+            $op_expected = OPERATOR;
+        }
+
          # The 'weird parsing rules' of next section do not work for '<' and '?'
          # It is best to mark them as unknown.  Test case:
          #  print $fh <DATA>;
@@ -5275,16 +5849,13 @@ sub operator_expected {
          $op_expected = UNKNOWN;
      }
  
-  RETURN:
-
-    DEBUG_OPERATOR_EXPECTED && do {
-        print STDOUT
-"OPERATOR_EXPECTED: $msg: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
-    };
+    DEBUG_OPERATOR_EXPECTED
+      && print STDOUT
+"OPERATOR_EXPECTED: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
  
      return $op_expected;
  
-} ## end of sub operator_expected
+} ## end sub operator_expected
  
  sub new_statement_ok {
  
@@ -5295,7 +5866,7 @@ sub new_statement_ok {
  
        || $last_nonblank_type eq 'J';    # or we follow a label
  
-}
+} ## end sub new_statement_ok
  
  sub label_ok {
  
@@ -5317,7 +5888,7 @@ sub label_ok {
      else {
          return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
      }
-}
+} ## end sub label_ok
  
  sub code_block_type {
  
@@ -5349,7 +5920,7 @@ sub code_block_type {
  
          # cannot start a code block within an anonymous hash
          else {
-            return "";
+            return EMPTY_STRING;
          }
      }
  
@@ -5379,11 +5950,11 @@ sub code_block_type {
          }
      }
  
-    ################################################################
+    #--------------------------------------------------------------
      # NOTE: braces after type characters start code blocks, but for
      # simplicity these are not identified as such.  See also
      # sub is_non_structural_brace.
-    ################################################################
+    #--------------------------------------------------------------
  
  ##    elsif ( $last_nonblank_type eq 't' ) {
  ##       return $last_nonblank_token;
@@ -5397,17 +5968,22 @@ sub code_block_type {
  # otherwise, look at previous token.  This must be a code block if
  # it follows any of these:
  # /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
-    elsif ( $is_code_block_token{$last_nonblank_token} ) {
+    elsif ($is_code_block_token{$last_nonblank_token}
+        || $is_grep_alias{$last_nonblank_token} )
+    {
  
          # Bug Patch: Note that the opening brace after the 'if' in the following
          # snippet is an anonymous hash ref and not a code block!
          #   print 'hi' if { x => 1, }->{x};
          # We can identify this situation because the last nonblank type
-        # will be a keyword (instead of a closing peren)
-        if (   $last_nonblank_token =~ /^(if|unless)$/
-            && $last_nonblank_type eq 'k' )
+        # will be a keyword (instead of a closing paren)
+        if (
+            $last_nonblank_type eq 'k'
+            && (   $last_nonblank_token eq 'if'
+                || $last_nonblank_token eq 'unless' )
+          )
          {
-            return "";
+            return EMPTY_STRING;
          }
          else {
              return $last_nonblank_token;
@@ -5442,7 +6018,7 @@ sub code_block_type {
  
          # check for syntax 'use MODULE LIST'
          # This fixes b1022 b1025 b1027 b1028 b1029 b1030 b1031
-        return "" if ( $statement_type eq 'use' );
+        return EMPTY_STRING if ( $statement_type eq 'use' );
  
          return decide_if_code_block( $i, $rtokens, $rtoken_type,
              $max_token_index );
@@ -5454,10 +6030,12 @@ sub code_block_type {
      # Check for a code block within a parenthesized function call
      elsif ( $last_nonblank_token eq '(' ) {
          my $paren_type = $paren_type[$paren_depth];
-        if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
+
+        #                   /^(map|grep|sort)$/
+        if ( $paren_type && $is_sort_map_grep{$paren_type} ) {
  
              # We will mark this as a code block but use type 't' instead
-            # of the name of the contining function.  This will allow for
+            # of the name of the containing function.  This will allow for
              # correct parsing but will usually produce better formatting.
              # Braces with block type 't' are not broken open automatically
              # in the formatter as are other code block types, and this usually
@@ -5465,7 +6043,7 @@ sub code_block_type {
              return 't';    # (Not $paren_type)
          }
          else {
-            return "";
+            return EMPTY_STRING;
          }
      }
  
@@ -5477,9 +6055,9 @@ sub code_block_type {
  
      # anything else must be anonymous hash reference
      else {
-        return "";
+        return EMPTY_STRING;
      }
-}
+} ## end sub code_block_type
  
  sub decide_if_code_block {
  
@@ -5500,7 +6078,7 @@ sub decide_if_code_block {
      # Check for the common case of an empty anonymous hash reference:
      # Maybe something like sub { { } }
      if ( $next_nonblank_token eq '}' ) {
-        $code_block_type = "";
+        $code_block_type = EMPTY_STRING;
      }
  
      else {
@@ -5582,17 +6160,18 @@ sub decide_if_code_block {
              # Patched for RT #95708
              if (
  
-                # it is a comma which is not a pattern delimeter except for qw
+                # it is a comma which is not a pattern delimiter except for qw
                  (
-                       $pre_types[$j] eq ','
-                    && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+                    $pre_types[$j] eq ','
+                    ## !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+                    && !$is_q_qq_qx_qr_s_y_tr_m{ $pre_tokens[$jbeg] }
                  )
  
                  # or a =>
                  || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
                )
              {
-                $code_block_type = "";
+                $code_block_type = EMPTY_STRING;
              }
          }
  
@@ -5602,12 +6181,12 @@ sub decide_if_code_block {
              # If this brace follows a bareword, then append a space as a signal
              # to the formatter that this may not be a block brace.  To find the
              # corresponding code in Formatter.pm search for 'b1085'.
-            $code_block_type .= " " if ( $code_block_type =~ /^\w/ );
+            $code_block_type .= SPACE if ( $code_block_type =~ /^\w/ );
          }
      }
  
      return $code_block_type;
-}
+} ## end sub decide_if_code_block
  
  sub report_unexpected {
  
@@ -5626,7 +6205,7 @@ sub report_unexpected {
            make_numbered_line( $input_line_number, $input_line, $pos );
          $underline = write_on_underline( $underline, $pos - $offset, '^' );
  
-        my $trailer = "";
+        my $trailer = EMPTY_STRING;
          if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
              my $pos_prev = $rpretoken_map->[$last_nonblank_i];
              my $num;
@@ -5649,6 +6228,18 @@ sub report_unexpected {
          resume_logfile();
      }
      return;
+} ## end sub report_unexpected
+
+my %is_sigil_or_paren;
+my %is_R_closing_sb;
+
+BEGIN {
+
+    my @q = qw< $ & % * @ ) >;
+    @{is_sigil_or_paren}{@q} = (1) x scalar(@q);
+
+    @q = qw(R ]);
+    @{is_R_closing_sb}{@q} = (1) x scalar(@q);
  }
  
  sub is_non_structural_brace {
@@ -5666,28 +6257,33 @@ sub is_non_structural_brace {
      #    return 0;
      # }
  
-    ################################################################
+    #--------------------------------------------------------------
      # NOTE: braces after type characters start code blocks, but for
      # simplicity these are not identified as such.  See also
      # sub code_block_type
-    ################################################################
+    #--------------------------------------------------------------
  
      ##if ($last_nonblank_type eq 't') {return 0}
  
      # otherwise, it is non-structural if it is decorated
      # by type information.
      # For example, the '{' here is non-structural:   ${xxx}
+    # Removed '::' to fix c074
+    ## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
      return (
-        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
+        ## $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->)/
+        $is_sigil_or_paren{ substr( $last_nonblank_token, 0, 1 ) }
+          || substr( $last_nonblank_token, 0, 2 ) eq '->'
  
            # or if we follow a hash or array closing curly brace or bracket
            # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
            # because the first '}' would have been given type 'R'
-          || $last_nonblank_type =~ /^([R\]])$/
+          ##|| $last_nonblank_type =~ /^([R\]])$/
+          || $is_R_closing_sb{$last_nonblank_type}
      );
-}
+} ## end sub is_non_structural_brace
  
-#########i#############################################################
+#######################################################################
  # Tokenizer routines for tracking container nesting depths
  #######################################################################
  
@@ -5746,8 +6342,10 @@ sub increase_nesting_depth {
      # Sequence numbers increment by number of items.  This keeps
      # a unique set of numbers but still allows the relative location
      # of any type to be determined.
-    $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
-    my $seqno = $nesting_sequence_number[$aa];
+
+    # make a new unique sequence number
+    my $seqno = $next_sequence_number++;
+
      $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
  
      $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
@@ -5772,10 +6370,13 @@ sub increase_nesting_depth {
              }
          }
      }
-    $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
-    $statement_type = "";
+
+    # Fix part #1 for git82: save last token type for propagation of type 'Z'
+    $nested_statement_type[$aa][ $current_depth[$aa] ] =
+      [ $statement_type, $last_nonblank_type, $last_nonblank_token ];
+    $statement_type = EMPTY_STRING;
      return ( $seqno, $indent );
-}
+} ## end sub increase_nesting_depth
  
  sub is_balanced_closing_container {
  
@@ -5796,7 +6397,7 @@ sub is_balanced_closing_container {
  
      # OK, everything will be balanced
      return 1;
-}
+} ## end sub is_balanced_closing_container
  
  sub decrease_nesting_depth {
  
@@ -5818,7 +6419,19 @@ sub decrease_nesting_depth {
          if ( $aa == QUESTION_COLON ) {
              $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
          }
-        $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
+
+        # Fix part #2 for git82: use saved type for propagation of type 'Z'
+        # through type L-R braces.  Perl seems to allow ${bareword}
+        # as an indirect object, but nothing much more complex than that.
+        ( $statement_type, my $saved_type, my $saved_token ) =
+          @{ $nested_statement_type[$aa][ $current_depth[$aa] ] };
+        if (   $aa == BRACE
+            && $saved_type eq 'Z'
+            && $last_nonblank_type eq 'w'
+            && $brace_structural_type[$brace_depth] eq 'L' )
+        {
+            $last_nonblank_type = $saved_type;
+        }
  
          # check that any brace types $bb contained within are balanced
          for my $bb ( 0 .. @closing_brace_names - 1 ) {
@@ -5851,7 +6464,7 @@ sub decrease_nesting_depth {
                      my ($ess);
  
                      if ( $diff == 1 || $diff == -1 ) {
-                        $ess = '';
+                        $ess = EMPTY_STRING;
                      }
                      else {
                          $ess = 's';
@@ -5899,7 +6512,7 @@ EOM
            if ( $closing_brace_names[$aa] ne "':'" );
      }
      return ( $seqno, $outdent );
-}
+} ## end sub decrease_nesting_depth
  
  sub check_final_nesting_depths {
  
@@ -5920,9 +6533,9 @@ EOM
          }
      }
      return;
-}
+} ## end sub check_final_nesting_depths
  
-#########i#############################################################
+#######################################################################
  # Tokenizer routines for looking ahead in input stream
  #######################################################################
  
@@ -5947,7 +6560,7 @@ sub peek_ahead_for_n_nonblank_pre_tokens {
          last;
      }
      return ( $rpre_tokens, $rpre_types );
-}
+} ## end sub peek_ahead_for_n_nonblank_pre_tokens
  
  # look ahead for next non-blank, non-comment line of code
  sub peek_ahead_for_nonblank_token {
@@ -5963,8 +6576,9 @@ sub peek_ahead_for_nonblank_token {
          $line =~ s/^\s*//;                 # trim leading blanks
          next if ( length($line) <= 0 );    # skip blank
          next if ( $line =~ /^#/ );         # skip comment
-        my ( $rtok, $rmap, $rtype ) =
-          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
+
+        # Updated from 2 to 3 to get trigraphs, added for case b1175
+        my ( $rtok, $rmap, $rtype ) = pre_tokenize( $line, 3 );
          my $j = $max_token_index + 1;
  
          foreach my $tok ( @{$rtok} ) {
@@ -5974,9 +6588,9 @@ sub peek_ahead_for_nonblank_token {
          last;
      }
      return;
-}
+} ## end sub peek_ahead_for_nonblank_token
  
-#########i#############################################################
+#######################################################################
  # Tokenizer guessing routines for ambiguous situations
  #######################################################################
  
@@ -6006,15 +6620,29 @@ sub guess_if_pattern_or_conditional {
          # look for a possible ending ? on this line..
          my $in_quote        = 1;
          my $quote_depth     = 0;
-        my $quote_character = '';
+        my $quote_character = EMPTY_STRING;
          my $quote_pos       = 0;
          my $quoted_string;
          (
-            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
-            $quoted_string
-          )
-          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth, $max_token_index );
+
+            $i,
+            $in_quote,
+            $quote_character,
+            $quote_pos,
+            $quote_depth,
+            $quoted_string,
+
+        ) = follow_quoted_string(
+
+            $ibeg,
+            $in_quote,
+            $rtokens,
+            $quote_character,
+            $quote_pos,
+            $quote_depth,
+            $max_token_index,
+
+        );
  
          if ($in_quote) {
  
@@ -6052,7 +6680,7 @@ sub guess_if_pattern_or_conditional {
          }
      }
      return ( $is_pattern, $msg );
-}
+} ## end sub guess_if_pattern_or_conditional
  
  my %is_known_constant;
  my %is_known_function;
@@ -6094,7 +6722,7 @@ sub guess_if_pattern_or_division {
          if ( $divide_possible < 0 ) {
              $msg        = "pattern (division not possible here)\n";
              $is_pattern = 1;
-            goto RETURN;
+            return ( $is_pattern, $msg );
          }
  
          $i = $ibeg + 1;
@@ -6112,12 +6740,12 @@ sub guess_if_pattern_or_division {
          # usually indicates a pattern.  We can use this to break ties.
  
          my $is_pattern_by_spacing =
-          ( $i > 1 && $next_token ne ' ' && $rtokens->[ $i - 2 ] eq ' ' );
+          ( $i > 1 && $next_token !~ m/^\s/ && $rtokens->[ $i - 2 ] =~ m/^\s/ );
  
          # look for a possible ending / on this line..
          my $in_quote        = 1;
          my $quote_depth     = 0;
-        my $quote_character = '';
+        my $quote_character = EMPTY_STRING;
          my $quote_pos       = 0;
          my $quoted_string;
          (
@@ -6225,10 +6853,8 @@ sub guess_if_pattern_or_division {
              }
          }
      }
-
-  RETURN:
      return ( $is_pattern, $msg );
-}
+} ## end sub guess_if_pattern_or_division
  
  # try to resolve here-doc vs. shift by looking ahead for
  # non-code or the end token (currently only looks for end token)
@@ -6283,9 +6909,9 @@ sub guess_if_here_doc {
      }
      write_logfile_entry($msg);
      return $here_doc_expected;
-}
+} ## end sub guess_if_here_doc
  
-#########i#############################################################
+#######################################################################
  # Tokenizer Routines for scanning identifiers and related items
  #######################################################################
  
@@ -6325,7 +6951,7 @@ sub scan_bare_identifier_do {
          # ($,%,@,*) including something like abc::def::ghi
          $type = 'w';
  
-        my $sub_name = "";
+        my $sub_name = EMPTY_STRING;
          if ( defined($2) ) { $sub_name = $2; }
          if ( defined($1) ) {
              $package = $1;
@@ -6395,7 +7021,7 @@ sub scan_bare_identifier_do {
              #    $tok='eval'; # patch to do braces like eval  - doesn't work
              #    $type = 'k';
              #}
-            # FIXME: This could become a separate type to allow for different
+            # TODO: This could become a separate type to allow for different
              # future behavior:
              elsif ( $is_block_function{$package}{$sub_name} ) {
                  $type = 'G';
@@ -6491,7 +7117,7 @@ sub scan_bare_identifier_do {
          warning("didn't find identifier after leading ::\n");
      }
      return ( $i, $tok, $type, $prototype );
-}
+} ## end sub scan_bare_identifier_do
  
  sub scan_id_do {
  
@@ -6516,7 +7142,7 @@ sub scan_id_do {
          $max_token_index )
        = @_;
      use constant DEBUG_NSCAN => 0;
-    my $type = '';
+    my $type = EMPTY_STRING;
      my ( $i_beg, $pos_beg );
  
      #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
@@ -6526,7 +7152,7 @@ sub scan_id_do {
      # on re-entry, start scanning at first token on the line
      if ($id_scan_state) {
          $i_beg = $i;
-        $type  = '';
+        $type  = EMPTY_STRING;
      }
  
      # on initial entry, start scanning just after type token
@@ -6576,7 +7202,7 @@ sub scan_id_do {
                      rtokens         => $rtokens,
                      rtoken_map      => $rtoken_map,
                      id_scan_state   => $id_scan_state,
-                    max_token_index => $max_token_index
+                    max_token_index => $max_token_index,
                  }
              );
          }
@@ -6585,20 +7211,25 @@ sub scan_id_do {
              ( $i, $tok, $type ) =
                do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
                  $rtoken_map, $max_token_index );
-            $id_scan_state = '';
+            $id_scan_state = EMPTY_STRING;
          }
  
          else {
              warning("invalid token in scan_id: $tok\n");
-            $id_scan_state = '';
+            $id_scan_state = EMPTY_STRING;
          }
      }
  
      if ( $id_scan_state && ( !defined($type) || !$type ) ) {
  
          # shouldn't happen:
+        if (DEVEL_MODE) {
+            Fault(<<EOM);
+Program bug in scan_id: undefined type but scan_state=$id_scan_state
+EOM
+        }
          warning(
-"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
+"Possible program bug in sub scan_id: undefined type but scan_state=$id_scan_state\n"
          );
          report_definite_bug();
      }
@@ -6608,7 +7239,7 @@ sub scan_id_do {
            "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
      };
      return ( $i, $tok, $type, $id_scan_state );
-}
+} ## end sub scan_id_do
  
  sub check_prototype {
      my ( $proto, $package, $subname ) = @_;
@@ -6644,7 +7275,7 @@ sub check_prototype {
          $is_user_function{$package}{$subname} = 1;
      }
      return;
-}
+} ## end sub check_prototype
  
  sub do_scan_package {
  
@@ -6727,631 +7358,869 @@ sub do_scan_package {
      }
  
      return ( $i, $tok, $type );
+} ## end sub do_scan_package
+
+my %is_special_variable_char;
+
+BEGIN {
+
+    # These are the only characters which can (currently) form special
+    # variables, like $^W: (issue c066).
+    my @q =
+      qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
+    @{is_special_variable_char}{@q} = (1) x scalar(@q);
  }
  
-sub scan_identifier_do {
+{    ## begin closure for sub scan_complex_identifier
+
+    use constant DEBUG_SCAN_ID => 0;
  
-    # This routine assembles tokens into identifiers.  It maintains a
-    # scan state, id_scan_state.  It updates id_scan_state based upon
-    # current id_scan_state and token, and returns an updated
-    # id_scan_state and the next index after the identifier.
+    # These are the possible states for this scanner:
+    my $scan_state_SIGIL     = '$';
+    my $scan_state_ALPHA     = 'A';
+    my $scan_state_COLON     = ':';
+    my $scan_state_LPAREN    = '(';
+    my $scan_state_RPAREN    = ')';
+    my $scan_state_AMPERSAND = '&';
+    my $scan_state_SPLIT     = '^';
+
+    # Only these non-blank states may be returned to caller:
+    my %is_returnable_scan_state = (
+        $scan_state_SIGIL     => 1,
+        $scan_state_AMPERSAND => 1,
+    );
  
-    # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
-    # $last_nonblank_type
+    # USES GLOBAL VARIABLES:
+    #    $context, $last_nonblank_token, $last_nonblank_type
  
+    #-----------
+    # call args:
+    #-----------
      my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
-        $expecting, $container_type )
-      = @_;
-    use constant DEBUG_SCAN_ID => 0;
-    my $i_begin   = $i;
-    my $type      = '';
-    my $tok_begin = $rtokens->[$i_begin];
-    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
-    my $id_scan_state_begin = $id_scan_state;
-    my $identifier_begin    = $identifier;
-    my $tok                 = $tok_begin;
-    my $message             = "";
-    my $tok_is_blank;    # a flag to speed things up
-
-    my $in_prototype_or_signature =
-      $container_type && $container_type =~ /^sub\b/;
-
-    # these flags will be used to help figure out the type:
+        $expecting, $container_type );
+
+    #-------------------------------------------
+    # my variables, re-initialized on each call:
+    #-------------------------------------------
+    my $i_begin;                # starting index $i
+    my $type;                   # returned identifier type
+    my $tok_begin;              # starting token
+    my $tok;                    # returned token
+    my $id_scan_state_begin;    # starting scan state
+    my $identifier_begin;       # starting identifier
+    my $i_save;                 # a last good index, in case of error
+    my $message;                # hold error message for log file
+    my $tok_is_blank;
+    my $last_tok_is_blank;
+    my $in_prototype_or_signature;
      my $saw_alpha;
      my $saw_type;
+    my $allow_tick;
  
-    # allow old package separator (') except in 'use' statement
-    my $allow_tick = ( $last_nonblank_token ne 'use' );
+    sub initialize_my_scan_id_vars {
  
-    #########################################################
-    # get started by defining a type and a state if necessary
-    #########################################################
+        # Initialize all 'my' vars on entry
+        $i_begin   = $i;
+        $type      = EMPTY_STRING;
+        $tok_begin = $rtokens->[$i_begin];
+        $tok       = $tok_begin;
+        if ( $tok_begin eq ':' ) { $tok_begin = '::' }
+        $id_scan_state_begin = $id_scan_state;
+        $identifier_begin    = $identifier;
+        $i_save              = undef;
  
-    if ( !$id_scan_state ) {
-        $context = UNKNOWN_CONTEXT;
+        $message           = EMPTY_STRING;
+        $tok_is_blank      = undef;          # a flag to speed things up
+        $last_tok_is_blank = undef;
  
-        # fixup for digraph
-        if ( $tok eq '>' ) {
-            $tok       = '->';
-            $tok_begin = $tok;
-        }
-        $identifier = $tok;
+        $in_prototype_or_signature =
+          $container_type && $container_type =~ /^sub\b/;
  
-        if ( $tok eq '$' || $tok eq '*' ) {
-            $id_scan_state = '$';
-            $context       = SCALAR_CONTEXT;
-        }
-        elsif ( $tok eq '%' || $tok eq '@' ) {
-            $id_scan_state = '$';
-            $context       = LIST_CONTEXT;
-        }
-        elsif ( $tok eq '&' ) {
-            $id_scan_state = '&';
+        # these flags will be used to help figure out the type:
+        $saw_alpha = undef;
+        $saw_type  = undef;
+
+        # allow old package separator (') except in 'use' statement
+        $allow_tick = ( $last_nonblank_token ne 'use' );
+        return;
+    } ## end sub initialize_my_scan_id_vars
+
+    #----------------------------------
+    # Routines for handling scan states
+    #----------------------------------
+    sub do_id_scan_state_dollar {
+
+        # We saw a sigil, now looking to start a variable name
+        if ( $tok eq '$' ) {
+
+            $identifier .= $tok;
+
+            # we've got a punctuation variable if end of line (punct.t)
+            if ( $i == $max_token_index ) {
+                $type          = 'i';
+                $id_scan_state = EMPTY_STRING;
+            }
          }
-        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
-            $saw_alpha     = 0;     # 'sub' is considered type info here
-            $id_scan_state = '$';
-            $identifier .= ' ';     # need a space to separate sub from sub name
+        elsif ( $tok =~ /^\w/ ) {    # alphanumeric ..
+            $saw_alpha     = 1;
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $identifier .= $tok;
          }
          elsif ( $tok eq '::' ) {
-            $id_scan_state = 'A';
-        }
-        elsif ( $tok =~ /^\w/ ) {
-            $id_scan_state = ':';
-            $saw_alpha     = 1;
+            $id_scan_state = $scan_state_ALPHA;
+            $identifier .= $tok;
          }
-        elsif ( $tok eq '->' ) {
-            $id_scan_state = '$';
+
+        # POSTDEFREF ->@ ->% ->& ->*
+        elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+            $identifier .= $tok;
          }
-        else {
+        elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
+            $saw_alpha     = 1;
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $identifier .= $tok;
  
-            # shouldn't happen
-            my ( $a, $b, $c ) = caller;
-            warning("Program Bug: scan_identifier given bad token = $tok \n");
-            warning("   called from sub $a  line: $c\n");
-            report_definite_bug();
+            # Perl will accept leading digits in identifiers,
+            # although they may not always produce useful results.
+            # Something like $main::0 is ok.  But this also works:
+            #
+            #  sub howdy::123::bubba{ print "bubba $54321!\n" }
+            #  howdy::123::bubba();
+            #
          }
-        $saw_type = !$saw_alpha;
-    }
-    else {
-        $i--;
-        $saw_alpha = ( $tok =~ /^\w/ );
-        $saw_type  = ( $tok =~ /([\$\%\@\*\&])/ );
-    }
+        elsif ( $tok eq '#' ) {
  
-    ###############################
-    # loop to gather the identifier
-    ###############################
+            my $is_punct_var = $identifier eq '$$';
  
-    my $i_save = $i;
+            # side comment or identifier?
+            if (
  
-    while ( $i < $max_token_index ) {
-        my $last_tok_is_blank = $tok_is_blank;
-        if   ($tok_is_blank) { $tok_is_blank = undef }
-        else                 { $i_save       = $i }
+                # A '#' starts a comment if it follows a space. For example,
+                # the following is equivalent to $ans=40.
+                #   my $ #
+                #     ans = 40;
+                !$last_tok_is_blank
  
-        $tok = $rtokens->[ ++$i ];
+                # a # inside a prototype or signature can only start a
+                # comment
+                && !$in_prototype_or_signature
  
-        # patch to make digraph :: if necessary
-        if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
-            $tok = '::';
-            $i++;
-        }
+                # these are valid punctuation vars: *# %# @# $#
+                # May also be '$#array' or POSTDEFREF ->$#
+                && (   $identifier =~ /^[\%\@\$\*]$/
+                    || $identifier =~ /\$$/ )
  
-        ########################
-        # Starting variable name
-        ########################
+                # but a '#' after '$$' is a side comment; see c147
+                && !$is_punct_var
  
-        if ( $id_scan_state eq '$' ) {
+              )
+            {
+                $identifier .= $tok;    # keep same state, a $ could follow
+            }
+            else {
  
-            if ( $tok eq '$' ) {
+                # otherwise it is a side comment
+                if    ( $identifier eq '->' )                 { }
+                elsif ($is_punct_var)                         { $type = 'i' }
+                elsif ( $id_scan_state eq $scan_state_SIGIL ) { $type = 't' }
+                else                                          { $type = 'i' }
+                $i             = $i_save;
+                $id_scan_state = EMPTY_STRING;
+            }
+        }
  
-                $identifier .= $tok;
+        elsif ( $tok eq '{' ) {
  
-                # we've got a punctuation variable if end of line (punct.t)
-                if ( $i == $max_token_index ) {
-                    $type          = 'i';
-                    $id_scan_state = '';
-                    last;
-                }
-            }
-            elsif ( $tok =~ /^\w/ ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';    # now need ::
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '::' ) {
-                $id_scan_state = 'A';
-                $identifier .= $tok;
+            # check for something like ${#} or ${?}, where ? is a special char
+            if (
+                (
+                       $identifier eq '$'
+                    || $identifier eq '@'
+                    || $identifier eq '$#'
+                )
+                && $i + 2 <= $max_token_index
+                && $rtokens->[ $i + 2 ] eq '}'
+                && $rtokens->[ $i + 1 ] !~ /[\s\w]/
+              )
+            {
+                my $next2 = $rtokens->[ $i + 2 ];
+                my $next1 = $rtokens->[ $i + 1 ];
+                $identifier .= $tok . $next1 . $next2;
+                $i += 2;
+                $id_scan_state = EMPTY_STRING;
              }
+            else {
  
-            # POSTDEFREF ->@ ->% ->& ->*
-            elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';                 # now need ::
-                $identifier .= $tok;
+                # skip something like ${xxx} or ->{
+                $id_scan_state = EMPTY_STRING;
  
-                # Perl will accept leading digits in identifiers,
-                # although they may not always produce useful results.
-                # Something like $main::0 is ok.  But this also works:
-                #
-                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
-                #  howdy::123::bubba();
-                #
+                # if this is the first token of a line, any tokens for this
+                # identifier have already been accumulated
+                if ( $identifier eq '$' || $i == 0 ) {
+                    $identifier = EMPTY_STRING;
+                }
+                $i = $i_save;
              }
-            elsif ( $tok eq '#' ) {
-
-                # side comment or identifier?
-                if (
+        }
  
-                    # A '#' starts a comment if it follows a space. For example,
-                    # the following is equivalent to $ans=40.
-                    #   my $ #
-                    #     ans = 40;
-                    !$last_tok_is_blank
+        # space ok after leading $ % * & @
+        elsif ( $tok =~ /^\s*$/ ) {
  
-                    # a # inside a prototype or signature can only start a
-                    # comment
-                    && !$in_prototype_or_signature
+            $tok_is_blank = 1;
  
-                    # these are valid punctuation vars: *# %# @# $#
-                    # May also be '$#array' or POSTDEFREF ->$#
-                    && ( $identifier =~ /^[\%\@\$\*]$/ || $identifier =~ /\$$/ )
+            # note: an id with a leading '&' does not actually come this way
+            if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
  
-                  )
-                {
-                    $identifier .= $tok;    # keep same state, a $ could follow
+                if ( length($identifier) > 1 ) {
+                    $id_scan_state = EMPTY_STRING;
+                    $i             = $i_save;
+                    $type          = 'i';    # probably punctuation variable
                  }
                  else {
  
-                    # otherwise it is a side comment
-                    if    ( $identifier eq '->' )   { }
-                    elsif ( $id_scan_state eq '$' ) { $type = 't' }
-                    else                            { $type = 'i' }
-                    $i             = $i_save;
-                    $id_scan_state = '';
-                    last;
+                    # fix c139: trim line-ending type 't'
+                    if ( $i == $max_token_index ) {
+                        $i    = $i_save;
+                        $type = 't';
+                    }
+
+                    # spaces after $'s are common, and space after @
+                    # is harmless, so only complain about space
+                    # after other type characters. Space after $ and
+                    # @ will be removed in formatting.  Report space
+                    # after % and * because they might indicate a
+                    # parsing error.  In other words '% ' might be a
+                    # modulo operator.  Delete this warning if it
+                    # gets annoying.
+                    elsif ( $identifier !~ /^[\@\$]$/ ) {
+                        $message =
+                          "Space in identifier, following $identifier\n";
+                    }
+                    else {
+                        ## ok: silently accept space after '$' and '@' sigils
+                    }
                  }
              }
  
-            elsif ( $tok eq '{' ) {
+            elsif ( $identifier eq '->' ) {
  
-                # check for something like ${#} or ${©}
-                if (
-                    (
-                           $identifier eq '$'
-                        || $identifier eq '@'
-                        || $identifier eq '$#'
-                    )
-                    && $i + 2 <= $max_token_index
-                    && $rtokens->[ $i + 2 ] eq '}'
-                    && $rtokens->[ $i + 1 ] !~ /[\s\w]/
-                  )
-                {
-                    my $next2 = $rtokens->[ $i + 2 ];
-                    my $next1 = $rtokens->[ $i + 1 ];
-                    $identifier .= $tok . $next1 . $next2;
-                    $i += 2;
-                    $id_scan_state = '';
-                    last;
+                # space after '->' is ok except at line end ..
+                # so trim line-ending in type '->' (fixes c139)
+                if ( $i == $max_token_index ) {
+                    $i    = $i_save;
+                    $type = '->';
                  }
-
-                # skip something like ${xxx} or ->{
-                $id_scan_state = '';
-
-                # if this is the first token of a line, any tokens for this
-                # identifier have already been accumulated
-                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
-                $i = $i_save;
-                last;
              }
  
-            # space ok after leading $ % * & @
-            elsif ( $tok =~ /^\s*$/ ) {
+            # stop at space after something other than -> or sigil
+            # Example of what can arrive here:
+            #   eval { $MyClass->$$ };
+            else {
+                $id_scan_state = EMPTY_STRING;
+                $i             = $i_save;
+                $type          = 'i';
+            }
+        }
+        elsif ( $tok eq '^' ) {
  
-                $tok_is_blank = 1;
+            # check for some special variables like $^ $^W
+            if ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                $identifier .= $tok;
+                $type = 'i';
  
-                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
+                # There may be one more character, not a space, after the ^
+                my $next1 = $rtokens->[ $i + 1 ];
+                my $chr   = substr( $next1, 0, 1 );
+                if ( $is_special_variable_char{$chr} ) {
  
-                    if ( length($identifier) > 1 ) {
-                        $id_scan_state = '';
-                        $i             = $i_save;
-                        $type          = 'i';    # probably punctuation variable
-                        last;
-                    }
-                    else {
+                    # It is something like $^W
+                    # Test case (c066) : $^Oeq'linux'
+                    $i++;
+                    $identifier .= $next1;
  
-                        # spaces after $'s are common, and space after @
-                        # is harmless, so only complain about space
-                        # after other type characters. Space after $ and
-                        # @ will be removed in formatting.  Report space
-                        # after % and * because they might indicate a
-                        # parsing error.  In other words '% ' might be a
-                        # modulo operator.  Delete this warning if it
-                        # gets annoying.
-                        if ( $identifier !~ /^[\@\$]$/ ) {
-                            $message =
-                              "Space in identifier, following $identifier\n";
-                        }
-                    }
+                    # If pretoken $next1 is more than one character long,
+                    # set a flag indicating that it needs to be split.
+                    $id_scan_state =
+                      ( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
                  }
+                else {
  
-                # else:
-                # space after '->' is ok
+                    # it is just $^
+                    # Simple test case (c065): '$aa=$^if($bb)';
+                    $id_scan_state = EMPTY_STRING;
+                }
+            }
+            else {
+                $id_scan_state = EMPTY_STRING;
+                $i             = $i_save;
              }
-            elsif ( $tok eq '^' ) {
+        }
+        else {    # something else
  
-                # check for some special variables like $^W
-                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
-                    $identifier .= $tok;
-                    $id_scan_state = 'A';
+            if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
  
-                    # Perl accepts '$^]' or '@^]', but
-                    # there must not be a space before the ']'.
-                    my $next1 = $rtokens->[ $i + 1 ];
-                    if ( $next1 eq ']' ) {
-                        $i++;
-                        $identifier .= $next1;
-                        $id_scan_state = "";
-                        last;
-                    }
+                # We might be in an extrusion of
+                #     sub foo2 ( $first, $, $third ) {
+                # looking at a line starting with a comma, like
+                #   $
+                #   ,
+                # in this case the comma ends the signature variable
+                # '$' which will have been previously marked type 't'
+                # rather than 'i'.
+                if ( $i == $i_begin ) {
+                    $identifier = EMPTY_STRING;
+                    $type       = EMPTY_STRING;
                  }
+
+                # at a # we have to mark as type 't' because more may
+                # follow, otherwise, in a signature we can let '$' be an
+                # identifier here for better formatting.
+                # See 'mangle4.in' for a test case.
                  else {
-                    $id_scan_state = '';
+                    $type = 'i';
+                    if ( $id_scan_state eq $scan_state_SIGIL && $tok eq '#' ) {
+                        $type = 't';
+                    }
+                    $i = $i_save;
                  }
+                $id_scan_state = EMPTY_STRING;
              }
-            else {    # something else
-
-                if ( $in_prototype_or_signature && $tok =~ /^[\),=#]/ ) {
-
-                    # We might be in an extrusion of
-                    #     sub foo2 ( $first, $, $third ) {
-                    # looking at a line starting with a comma, like
-                    #   $
-                    #   ,
-                    # in this case the comma ends the signature variable
-                    # '$' which will have been previously marked type 't'
-                    # rather than 'i'.
-                    if ( $i == $i_begin ) {
-                        $identifier = "";
-                        $type       = "";
-                    }
  
-                    # at a # we have to mark as type 't' because more may
-                    # follow, otherwise, in a signature we can let '$' be an
-                    # identifier here for better formatting.
-                    # See 'mangle4.in' for a test case.
-                    else {
-                        $type = 'i';
-                        if ( $id_scan_state eq '$' && $tok eq '#' ) {
-                            $type = 't';
-                        }
-                        $i = $i_save;
-                    }
-                    $id_scan_state = '';
-                    last;
-                }
+            # check for various punctuation variables
+            elsif ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                $identifier .= $tok;
+            }
+
+            # POSTDEFREF: Postfix reference ->$* ->%*  ->@* ->** ->&* ->$#*
+            elsif ($tok eq '*'
+                && $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ )
+            {
+                $identifier .= $tok;
+            }
+
+            elsif ( $identifier eq '$#' ) {
  
-                # check for various punctuation variables
-                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
+
+                # perl seems to allow just these: $#: $#- $#+
+                elsif ( $tok =~ /^[\:\-\+]$/ ) {
+                    $type = 'i';
                      $identifier .= $tok;
                  }
+                else {
+                    $i = $i_save;
+                    write_logfile_entry( 'Use of $# is deprecated' . "\n" );
+                }
+            }
+            elsif ( $identifier eq '$$' ) {
  
-                # POSTDEFREF: Postfix reference ->$* ->%*  ->@* ->** ->&* ->$#*
-                elsif ($tok eq '*'
-                    && $identifier =~ /\-\>([\@\%\$\*\&]|\$\#)$/ )
-                {
-                    $identifier .= $tok;
+                # perl does not allow references to punctuation
+                # variables without braces.  For example, this
+                # won't work:
+                #  $:=\4;
+                #  $a = $$:;
+                # You would have to use
+                #  $a = ${$:};
+
+                # '$$' alone is punctuation variable for PID
+                $i = $i_save;
+                if   ( $tok eq '{' ) { $type = 't' }
+                else                 { $type = 'i' }
+            }
+            elsif ( $identifier eq '->' ) {
+                $i = $i_save;
+            }
+            else {
+                $i = $i_save;
+                if ( length($identifier) == 1 ) {
+                    $identifier = EMPTY_STRING;
                  }
+            }
+            $id_scan_state = EMPTY_STRING;
+        }
+        return;
+    } ## end sub do_id_scan_state_dollar
  
-                elsif ( $identifier eq '$#' ) {
+    sub do_id_scan_state_alpha {
  
-                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
+        # looking for alphanumeric after ::
+        $tok_is_blank = $tok =~ /^\s*$/;
  
-                    # perl seems to allow just these: $#: $#- $#+
-                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
-                        $type = 'i';
-                        $identifier .= $tok;
-                    }
-                    else {
-                        $i = $i_save;
-                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
-                    }
-                }
-                elsif ( $identifier eq '$$' ) {
+        if ( $tok =~ /^\w/ ) {    # found it
+            $identifier .= $tok;
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $saw_alpha     = 1;
+        }
+        elsif ( $tok eq "'" && $allow_tick ) {
+            $identifier .= $tok;
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $saw_alpha     = 1;
+        }
+        elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
+            $id_scan_state = $scan_state_LPAREN;
+            $identifier .= $tok;
+        }
+        elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
+            $id_scan_state = $scan_state_RPAREN;
+            $identifier .= $tok;
+        }
+        else {
+            $id_scan_state = EMPTY_STRING;
+            $i             = $i_save;
+        }
+        return;
+    } ## end sub do_id_scan_state_alpha
  
-                    # perl does not allow references to punctuation
-                    # variables without braces.  For example, this
-                    # won't work:
-                    #  $:=\4;
-                    #  $a = $$:;
-                    # You would have to use
-                    #  $a = ${$:};
+    sub do_id_scan_state_colon {
  
-                    # '$$' alone is punctuation variable for PID
-                    $i = $i_save;
-                    if   ( $tok eq '{' ) { $type = 't' }
-                    else                 { $type = 'i' }
-                }
-                elsif ( $identifier eq '->' ) {
-                    $i = $i_save;
-                }
-                else {
-                    $i = $i_save;
-                    if ( length($identifier) == 1 ) { $identifier = ''; }
-                }
-                $id_scan_state = '';
-                last;
+        # looking for possible :: after alphanumeric
+
+        $tok_is_blank = $tok =~ /^\s*$/;
+
+        if ( $tok eq '::' ) {    # got it
+            $identifier .= $tok;
+            $id_scan_state = $scan_state_ALPHA;    # now require alpha
+        }
+        elsif ( $tok =~ /^\w/ ) {    # more alphanumeric is ok here
+            $identifier .= $tok;
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $saw_alpha     = 1;
+        }
+        elsif ( $tok eq "'" && $allow_tick ) {     # tick
+
+            if ( $is_keyword{$identifier} ) {
+                $id_scan_state = EMPTY_STRING;     # that's all
+                $i             = $i_save;
+            }
+            else {
+                $identifier .= $tok;
              }
          }
+        elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
+            $id_scan_state = $scan_state_LPAREN;
+            $identifier .= $tok;
+        }
+        elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
+            $id_scan_state = $scan_state_RPAREN;
+            $identifier .= $tok;
+        }
+        else {
+            $id_scan_state = EMPTY_STRING;    # that's all
+            $i             = $i_save;
+        }
+        return;
+    } ## end sub do_id_scan_state_colon
+
+    sub do_id_scan_state_left_paren {
+
+        # looking for possible '(' of a prototype
+
+        if ( $tok eq '(' ) {    # got it
+            $identifier .= $tok;
+            $id_scan_state = $scan_state_RPAREN;    # now find the end of it
+        }
+        elsif ( $tok =~ /^\s*$/ ) {                 # blank - keep going
+            $identifier .= $tok;
+            $tok_is_blank = 1;
+        }
+        else {
+            $id_scan_state = EMPTY_STRING;          # that's all - no prototype
+            $i             = $i_save;
+        }
+        return;
+    } ## end sub do_id_scan_state_left_paren
  
-        ###################################
-        # looking for alphanumeric after ::
-        ###################################
+    sub do_id_scan_state_right_paren {
  
-        elsif ( $id_scan_state eq 'A' ) {
+        # looking for a ')' of prototype to close a '('
  
-            $tok_is_blank = $tok =~ /^\s*$/;
+        $tok_is_blank = $tok =~ /^\s*$/;
  
-            if ( $tok =~ /^\w/ ) {    # found it
-                $identifier .= $tok;
-                $id_scan_state = ':';    # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {
-                $identifier .= $tok;
-                $id_scan_state = ':';    # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
-                $id_scan_state = '(';
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
-                $id_scan_state = ')';
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';
-                $i             = $i_save;
-                last;
-            }
+        if ( $tok eq ')' ) {    # got it
+            $identifier .= $tok;
+            $id_scan_state = EMPTY_STRING;    # all done
          }
+        elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
+            $identifier .= $tok;
+        }
+        else {    # probable error in script, but keep going
+            warning("Unexpected '$tok' while seeking end of prototype\n");
+            $identifier .= $tok;
+        }
+        return;
+    } ## end sub do_id_scan_state_right_paren
  
-        ###################################
-        # looking for :: after alphanumeric
-        ###################################
+    sub do_id_scan_state_ampersand {
  
-        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
+        # Starting sub call after seeing an '&'
  
-            $tok_is_blank = $tok =~ /^\s*$/;
+        if ( $tok =~ /^[\$\w]/ ) {    # alphanumeric ..
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $saw_alpha     = 1;
+            $identifier .= $tok;
+        }
+        elsif ( $tok eq "'" && $allow_tick ) {     # alphanumeric ..
+            $id_scan_state = $scan_state_COLON;    # now need ::
+            $saw_alpha     = 1;
+            $identifier .= $tok;
+        }
+        elsif ( $tok =~ /^\s*$/ ) {                # allow space
+            $tok_is_blank = 1;
  
-            if ( $tok eq '::' ) {            # got it
-                $identifier .= $tok;
-                $id_scan_state = 'A';        # now require alpha
+            # fix c139: trim line-ending type 't'
+            if ( length($identifier) == 1 && $i == $max_token_index ) {
+                $i    = $i_save;
+                $type = 't';
              }
-            elsif ( $tok =~ /^\w/ ) {        # more alphanumeric is ok here
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
+        }
+        elsif ( $tok eq '::' ) {                   # leading ::
+            $id_scan_state = $scan_state_ALPHA;    # accept alpha next
+            $identifier .= $tok;
+        }
+        elsif ( $tok eq '{' ) {
+            if ( $identifier eq '&' || $i == 0 ) {
+                $identifier = EMPTY_STRING;
              }
-            elsif ( $tok eq "'" && $allow_tick ) {    # tick
+            $i             = $i_save;
+            $id_scan_state = EMPTY_STRING;
+        }
+        elsif ( $tok eq '^' ) {
+            if ( $identifier eq '&' ) {
  
-                if ( $is_keyword{$identifier} ) {
-                    $id_scan_state = '';              # that's all
-                    $i             = $i_save;
+                # Special variable (c066)
+                $identifier .= $tok;
+                $type = '&';
+
+                # There may be one more character, not a space, after the ^
+                my $next1 = $rtokens->[ $i + 1 ];
+                my $chr   = substr( $next1, 0, 1 );
+                if ( $is_special_variable_char{$chr} ) {
+
+                    # It is something like &^O
+                    $i++;
+                    $identifier .= $next1;
+
+                    # If pretoken $next1 is more than one character long,
+                    # set a flag indicating that it needs to be split.
+                    $id_scan_state =
+                      ( length($next1) > 1 ) ? $scan_state_SPLIT : EMPTY_STRING;
                  }
                  else {
-                    $identifier .= $tok;
+
+                    # it is &^
+                    $id_scan_state = EMPTY_STRING;
                  }
              }
-            elsif ( $tok_is_blank && $identifier =~ /^sub / ) {
-                $id_scan_state = '(';
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '(' && $identifier =~ /^sub / ) {
-                $id_scan_state = ')';
-                $identifier .= $tok;
-            }
              else {
-                $id_scan_state = '';        # that's all
-                $i             = $i_save;
-                last;
+                $identifier = EMPTY_STRING;
+                $i          = $i_save;
              }
          }
+        else {
  
-        ##############################
-        # looking for '(' of prototype
-        ##############################
-
-        elsif ( $id_scan_state eq '(' ) {
-
-            if ( $tok eq '(' ) {    # got it
-                $identifier .= $tok;
-                $id_scan_state = ')';    # now find the end of it
-            }
-            elsif ( $tok =~ /^\s*$/ ) {    # blank - keep going
+            # punctuation variable?
+            # testfile: cunningham4.pl
+            #
+            # We have to be careful here.  If we are in an unknown state,
+            # we will reject the punctuation variable.  In the following
+            # example the '&' is a binary operator but we are in an unknown
+            # state because there is no sigil on 'Prima', so we don't
+            # know what it is.  But it is a bad guess that
+            # '&~' is a function variable.
+            # $self->{text}->{colorMap}->[
+            #   Prima::PodView::COLOR_CODE_FOREGROUND
+            #   & ~tb::COLOR_INDEX ] =
+            #   $sec->{ColorCode}
+
+            # Fix for case c033: a '#' here starts a side comment
+            if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
                  $identifier .= $tok;
-                $tok_is_blank = 1;
              }
              else {
-                $id_scan_state = '';        # that's all - no prototype
-                $i             = $i_save;
-                last;
+                $identifier = EMPTY_STRING;
+                $i          = $i_save;
+                $type       = '&';
              }
+            $id_scan_state = EMPTY_STRING;
          }
+        return;
+    } ## end sub do_id_scan_state_ampersand
+
+    #-------------------
+    # hash of scanner subs
+    #-------------------
+    my $scan_identifier_code = {
+        $scan_state_SIGIL     => \&do_id_scan_state_dollar,
+        $scan_state_ALPHA     => \&do_id_scan_state_alpha,
+        $scan_state_COLON     => \&do_id_scan_state_colon,
+        $scan_state_LPAREN    => \&do_id_scan_state_left_paren,
+        $scan_state_RPAREN    => \&do_id_scan_state_right_paren,
+        $scan_state_AMPERSAND => \&do_id_scan_state_ampersand,
+    };
  
-        ##############################
-        # looking for ')' of prototype
-        ##############################
+    sub scan_complex_identifier {
  
-        elsif ( $id_scan_state eq ')' ) {
+        # This routine assembles tokens into identifiers.  It maintains a
+        # scan state, id_scan_state.  It updates id_scan_state based upon
+        # current id_scan_state and token, and returns an updated
+        # id_scan_state and the next index after the identifier.
  
-            $tok_is_blank = $tok =~ /^\s*$/;
+        # This routine now serves a a backup for sub scan_simple_identifier
+        # which handles most identifiers.
  
-            if ( $tok eq ')' ) {    # got it
-                $identifier .= $tok;
-                $id_scan_state = '';    # all done
-                last;
-            }
-            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
-                $identifier .= $tok;
-            }
-            else {    # probable error in script, but keep going
-                warning("Unexpected '$tok' while seeking end of prototype\n");
-                $identifier .= $tok;
-            }
-        }
+        (
+            $i,         $id_scan_state, $identifier, $rtokens, $max_token_index,
+            $expecting, $container_type
+        ) = @_;
  
-        ###################
-        # Starting sub call
-        ###################
+        # return flag telling caller to split the pretoken
+        my $split_pretoken_flag;
  
-        elsif ( $id_scan_state eq '&' ) {
+        #-------------------
+        # Initialize my vars
+        #-------------------
  
-            if ( $tok =~ /^[\$\w]/ ) {    # alphanumeric ..
-                $id_scan_state = ':';     # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
+        initialize_my_scan_id_vars();
+
+        #--------------------------------------------------------
+        # get started by defining a type and a state if necessary
+        #--------------------------------------------------------
+
+        if ( !$id_scan_state ) {
+            $context = UNKNOWN_CONTEXT;
+
+            # fixup for digraph
+            if ( $tok eq '>' ) {
+                $tok       = '->';
+                $tok_begin = $tok;
              }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $id_scan_state = ':';                 # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
+            $identifier = $tok;
+
+            if ( $last_nonblank_token eq '->' ) {
+                $identifier    = '->' . $identifier;
+                $id_scan_state = $scan_state_SIGIL;
              }
-            elsif ( $tok =~ /^\s*$/ ) {               # allow space
-                $tok_is_blank = 1;
+            elsif ( $tok eq '$' || $tok eq '*' ) {
+                $id_scan_state = $scan_state_SIGIL;
+                $context       = SCALAR_CONTEXT;
              }
-            elsif ( $tok eq '::' ) {                  # leading ::
-                $id_scan_state = 'A';                 # accept alpha next
-                $identifier .= $tok;
+            elsif ( $tok eq '%' || $tok eq '@' ) {
+                $id_scan_state = $scan_state_SIGIL;
+                $context       = LIST_CONTEXT;
              }
-            elsif ( $tok eq '{' ) {
-                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
-                $i             = $i_save;
-                $id_scan_state = '';
-                last;
+            elsif ( $tok eq '&' ) {
+                $id_scan_state = $scan_state_AMPERSAND;
+            }
+            elsif ( $tok eq 'sub' or $tok eq 'package' ) {
+                $saw_alpha     = 0;    # 'sub' is considered type info here
+                $id_scan_state = $scan_state_SIGIL;
+                $identifier .=
+                  SPACE;    # need a space to separate sub from sub name
+            }
+            elsif ( $tok eq '::' ) {
+                $id_scan_state = $scan_state_ALPHA;
+            }
+            elsif ( $tok =~ /^\w/ ) {
+                $id_scan_state = $scan_state_COLON;
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok eq '->' ) {
+                $id_scan_state = $scan_state_SIGIL;
              }
              else {
  
-                # punctuation variable?
-                # testfile: cunningham4.pl
-                #
-                # We have to be careful here.  If we are in an unknown state,
-                # we will reject the punctuation variable.  In the following
-                # example the '&' is a binary operator but we are in an unknown
-                # state because there is no sigil on 'Prima', so we don't
-                # know what it is.  But it is a bad guess that
-                # '&~' is a function variable.
-                # $self->{text}->{colorMap}->[
-                #   Prima::PodView::COLOR_CODE_FOREGROUND
-                #   & ~tb::COLOR_INDEX ] =
-                #   $sec->{ColorCode}
-
-                # Fix for case c033: a '#' here starts a side comment
-                if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
-                    $identifier .= $tok;
+                # shouldn't happen: bad call parameter
+                my $msg =
+"Program bug detected: scan_identifier received bad starting token = '$tok'\n";
+                if (DEVEL_MODE) { Fault($msg) }
+                if ( !$tokenizer_self->[_in_error_] ) {
+                    warning($msg);
+                    $tokenizer_self->[_in_error_] = 1;
+                }
+                $id_scan_state = EMPTY_STRING;
+
+                # emergency return
+                goto RETURN;
+            }
+            $saw_type = !$saw_alpha;
+        }
+        else {
+            $i--;
+            $saw_alpha = ( $tok =~ /^\w/ );
+            $saw_type  = ( $tok =~ /([\$\%\@\*\&])/ );
+
+            # check for a valid starting state
+            if ( DEVEL_MODE && !$is_returnable_scan_state{$id_scan_state} ) {
+                Fault(<<EOM);
+Unexpected starting scan state in sub scan_complex_identifier: '$id_scan_state'
+EOM
+            }
+        }
+
+        #------------------------------
+        # loop to gather the identifier
+        #------------------------------
+
+        $i_save = $i;
+
+        while ( $i < $max_token_index && $id_scan_state ) {
+
+            # Be sure we have code to handle this state before we proceed
+            my $code = $scan_identifier_code->{$id_scan_state};
+            if ( !$code ) {
+
+                if ( $id_scan_state eq $scan_state_SPLIT ) {
+                    ## OK: this is the signal to exit and split the pretoken
                  }
+
+                # unknown state - should not happen
                  else {
-                    $identifier = '';
-                    $i          = $i_save;
-                    $type       = '&';
+                    if (DEVEL_MODE) {
+                        Fault(<<EOM);
+Unknown scan state in sub scan_complex_identifier: '$id_scan_state'
+Scan state at sub entry was '$id_scan_state_begin'
+EOM
+                    }
+                    $id_scan_state = EMPTY_STRING;
+                    $i             = $i_save;
                  }
-                $id_scan_state = '';
                  last;
              }
-        }
  
-        ######################
-        # unknown state - quit
-        ######################
+            # Remember the starting index for progress check below
+            my $i_start_loop = $i;
  
-        else {    # can get here due to error in initialization
-            $id_scan_state = '';
-            $i             = $i_save;
-            last;
-        }
-    } ## end of main loop
+            $last_tok_is_blank = $tok_is_blank;
+            if   ($tok_is_blank) { $tok_is_blank = undef }
+            else                 { $i_save       = $i }
  
-    if ( $id_scan_state eq ')' ) {
-        warning("Hit end of line while seeking ) to end prototype\n");
-    }
+            $tok = $rtokens->[ ++$i ];
  
-    # once we enter the actual identifier, it may not extend beyond
-    # the end of the current line
-    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
-        $id_scan_state = '';
-    }
+            # patch to make digraph :: if necessary
+            if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
+                $tok = '::';
+                $i++;
+            }
+
+            $code->();
  
-    # Patch: the deprecated variable $# does not combine with anything on the
-    # next line.
-    if ( $identifier eq '$#' ) { $id_scan_state = '' }
+            # check for forward progress: a decrease in the index $i
+            # implies that scanning has finished
+            last if ( $i <= $i_start_loop );
  
-    if ( $i < 0 ) { $i = 0 }
+        } ## end of main loop
  
-    # Be sure a token type is defined
-    if ( !$type ) {
+        #-------------
+        # Check result
+        #-------------
  
-        if ($saw_type) {
+        # Be sure a valid state is returned
+        if ($id_scan_state) {
  
-            if ($saw_alpha) {
-                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
-                    $type = 'w';
+            if ( !$is_returnable_scan_state{$id_scan_state} ) {
+
+                if ( $id_scan_state eq $scan_state_SPLIT ) {
+                    $split_pretoken_flag = 1;
+                }
+
+                if ( $id_scan_state eq $scan_state_RPAREN ) {
+                    warning(
+                        "Hit end of line while seeking ) to end prototype\n");
                  }
-                else { $type = 'i' }
+
+                $id_scan_state = EMPTY_STRING;
              }
-            elsif ( $identifier eq '->' ) {
-                $type = '->';
+
+            # Patch: the deprecated variable $# does not combine with anything
+            # on the next line.
+            if ( $identifier eq '$#' ) { $id_scan_state = EMPTY_STRING }
+        }
+
+        # Be sure the token index is valid
+        if ( $i < 0 ) { $i = 0 }
+
+        # Be sure a token type is defined
+        if ( !$type ) {
+
+            if ($saw_type) {
+
+                if ($saw_alpha) {
+
+                  # The type without the -> should be the same as with the -> so
+                  # that if they get separated we get the same bond strengths,
+                  # etc.  See b1234
+                    if (   $identifier =~ /^->/
+                        && $last_nonblank_type eq 'w'
+                        && substr( $identifier, 2, 1 ) =~ /^\w/ )
+                    {
+                        $type = 'w';
+                    }
+                    else { $type = 'i' }
+                }
+                elsif ( $identifier eq '->' ) {
+                    $type = '->';
+                }
+                elsif (
+                    ( length($identifier) > 1 )
+
+                    # In something like '@$=' we have an identifier '@$'
+                    # In something like '$${' we have type '$$' (and only
+                    # part of an identifier)
+                    && !( $identifier =~ /\$$/ && $tok eq '{' )
+
+                    ## && ( $identifier !~ /^(sub |package )$/ )
+                    && $identifier ne 'sub '
+                    && $identifier ne 'package '
+                  )
+                {
+                    $type = 'i';
+                }
+                else { $type = 't' }
              }
-            elsif (
-                ( length($identifier) > 1 )
+            elsif ($saw_alpha) {
  
-                # In something like '@$=' we have an identifier '@$'
-                # In something like '$${' we have type '$$' (and only
-                # part of an identifier)
-                && !( $identifier =~ /\$$/ && $tok eq '{' )
-                && ( $identifier !~ /^(sub |package )$/ )
-              )
-            {
-                $type = 'i';
+                # type 'w' includes anything without leading type info
+                # ($,%,@,*) including something like abc::def::ghi
+                $type = 'w';
+
+                # Fix for b1337, if restarting scan after line break between
+                # '->' or sigil and identifier name, use type 'i'
+                if (   $id_scan_state_begin
+                    && $identifier =~ /^([\$\%\@\*\&]|->)/ )
+                {
+                    $type = 'i';
+                }
              }
-            else { $type = 't' }
+            else {
+                $type = EMPTY_STRING;
+            }    # this can happen on a restart
          }
-        elsif ($saw_alpha) {
  
-            # type 'w' includes anything without leading type info
-            # ($,%,@,*) including something like abc::def::ghi
-            $type = 'w';
+        # See if we formed an identifier...
+        if ($identifier) {
+            $tok = $identifier;
+            if ($message) { write_logfile_entry($message) }
          }
-        else {
-            $type = '';
-        }    # this can happen on a restart
-    }
  
-    # See if we formed an identifier...
-    if ($identifier) {
-        $tok = $identifier;
-        if ($message) { write_logfile_entry($message) }
-    }
+        # did not find an identifier, back  up
+        else {
+            $tok = $tok_begin;
+            $i   = $i_begin;
+        }
  
-    # did not find an identifier, back  up
-    else {
-        $tok = $tok_begin;
-        $i   = $i_begin;
-    }
+      RETURN:
  
-    DEBUG_SCAN_ID && do {
-        my ( $a, $b, $c ) = caller;
-        print STDOUT
+        DEBUG_SCAN_ID && do {
+            my ( $a, $b, $c ) = caller;
+            print STDOUT
  "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
-        print STDOUT
+            print STDOUT
  "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
-    };
-    return ( $i, $tok, $type, $id_scan_state, $identifier );
-}
+        };
+        return ( $i, $tok, $type, $id_scan_state, $identifier,
+            $split_pretoken_flag );
+    } ## end sub scan_complex_identifier
+} ## end closure for sub scan_complex_identifier
  
  {    ## closure for sub do_scan_sub
  
@@ -7369,8 +8238,8 @@ sub scan_identifier_do {
  
      # initialize subname each time a new 'sub' keyword is encountered
      sub initialize_subname {
-        $package_saved = "";
-        $subname_saved = "";
+        $package_saved = EMPTY_STRING;
+        $subname_saved = EMPTY_STRING;
          return;
      }
  
@@ -7445,7 +8314,7 @@ sub scan_identifier_do {
            : $tok eq '('         ? PAREN_CALL
            :                       SUB_CALL;
  
-        $id_scan_state = "";    # normally we get everything in one call
+        $id_scan_state = EMPTY_STRING;  # normally we get everything in one call
          my $subname = $subname_saved;
          my $package = $package_saved;
          my $proto   = undef;
@@ -7582,15 +8451,16 @@ sub scan_identifier_do {
                      $max_token_index );
                  if ($error) { warning("Possibly invalid sub\n") }
  
-            # Patch part #2 to fixes cases b994 and b1053:
-            # Do not let spaces be part of the token of an anonymous sub keyword
-            # which we marked as type 'k' above...i.e. for something like:
-            #    'sub : lvalue { ...'
-            # Back up and let it be parsed as a blank
+                # Patch part #2 to fixes cases b994 and b1053:
+                # Do not let spaces be part of the token of an anonymous sub
+                # keyword which we marked as type 'k' above...i.e. for
+                # something like:
+                #    'sub : lvalue { ...'
+                # Back up and let it be parsed as a blank
                  if (   $type eq 'k'
                      && $attrs
                      && $i > $i_entry
-                    && substr( $rtokens->[$i], 0, 1 ) eq ' ' )
+                    && substr( $rtokens->[$i], 0, 1 ) =~ m/\s/ )
                  {
                      $i--;
                  }
@@ -7633,7 +8503,7 @@ sub scan_identifier_do {
                          else {
                              warning(
  "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
-                            );
+                            ) unless (DEVEL_MODE);
                          }
                      }
                      $saw_function_definition{$subname}{$package} =
@@ -7674,7 +8544,7 @@ sub scan_identifier_do {
                  }
              }
              elsif ($next_nonblank_token) {    # EOF technically ok
-                $subname = "" unless defined($subname);
+                $subname = EMPTY_STRING unless defined($subname);
                  warning(
  "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
                  );
@@ -7687,10 +8557,10 @@ sub scan_identifier_do {
  
          }
          return ( $i, $tok, $type, $id_scan_state );
-    }
+    } ## end sub do_scan_sub
  }
  
-#########i###############################################################
+#########################################################################
  # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
  #########################################################################
  
@@ -7709,14 +8579,73 @@ sub find_next_nonblank_token {
      }
  
      my $next_nonblank_token = $rtokens->[ ++$i ];
-    return ( " ", $i ) unless defined($next_nonblank_token);
+    return ( SPACE, $i )
+      unless ( defined($next_nonblank_token) && length($next_nonblank_token) );
+
+    # Quick test for nonblank ascii char. Note that we just have to
+    # examine the first character here.
+    my $ord = ord( substr( $next_nonblank_token, 0, 1 ) );
+    if (   $ord >= ORD_PRINTABLE_MIN
+        && $ord <= ORD_PRINTABLE_MAX )
+    {
+        return ( $next_nonblank_token, $i );
+    }
+
+    # Quick test to skip over an ascii space or tab
+    elsif ( $ord == ORD_SPACE || $ord == ORD_TAB ) {
+        $next_nonblank_token = $rtokens->[ ++$i ];
+        return ( SPACE, $i ) unless defined($next_nonblank_token);
+    }
  
-    if ( $next_nonblank_token =~ /^\s*$/ ) {
+    # Slow test to skip over something else identified as whitespace
+    elsif ( $next_nonblank_token =~ /^\s*$/ ) {
          $next_nonblank_token = $rtokens->[ ++$i ];
-        return ( " ", $i ) unless defined($next_nonblank_token);
+        return ( SPACE, $i ) unless defined($next_nonblank_token);
      }
+
+    # We should be at a nonblank now
      return ( $next_nonblank_token, $i );
-}
+} ## end sub find_next_nonblank_token
+
+sub find_next_noncomment_type {
+    my ( $i, $rtokens, $max_token_index ) = @_;
+
+    # Given the current character position, look ahead past any comments
+    # and blank lines and return the next token, including digraphs and
+    # trigraphs.
+
+    my ( $next_nonblank_token, $i_next ) =
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
+
+    # skip past any side comment
+    if ( $next_nonblank_token eq '#' ) {
+        ( $next_nonblank_token, $i_next ) =
+          find_next_nonblank_token( $i_next, $rtokens, $max_token_index );
+    }
+
+    # check for a digraph
+    if (   $next_nonblank_token
+        && $next_nonblank_token ne SPACE
+        && defined( $rtokens->[ $i_next + 1 ] ) )
+    {
+        my $test2 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
+        if ( $is_digraph{$test2} ) {
+            $next_nonblank_token = $test2;
+            $i_next              = $i_next + 1;
+
+            # check for a trigraph
+            if ( defined( $rtokens->[ $i_next + 1 ] ) ) {
+                my $test3 = $next_nonblank_token . $rtokens->[ $i_next + 1 ];
+                if ( $is_trigraph{$test3} ) {
+                    $next_nonblank_token = $test3;
+                    $i_next              = $i_next + 1;
+                }
+            }
+        }
+    }
+
+    return ( $next_nonblank_token, $i_next );
+} ## end sub find_next_noncomment_type
  
  sub is_possible_numerator {
  
@@ -7751,7 +8680,7 @@ sub is_possible_numerator {
      }
  
      return $is_possible_numerator;
-}
+} ## end sub is_possible_numerator
  
  {    ## closure for sub pattern_expected
      my %pattern_test;
@@ -7803,7 +8732,7 @@ sub is_possible_numerator {
              }
          }
          return $is_pattern;
-    }
+    } ## end sub pattern_expected
  }
  
  sub find_next_nonblank_token_on_this_line {
@@ -7821,10 +8750,10 @@ sub find_next_nonblank_token_on_this_line {
          }
      }
      else {
-        $next_nonblank_token = "";
+        $next_nonblank_token = EMPTY_STRING;
      }
      return ( $next_nonblank_token, $i );
-}
+} ## end sub find_next_nonblank_token_on_this_line
  
  sub find_angle_operator_termination {
  
@@ -7846,7 +8775,14 @@ sub find_angle_operator_termination {
      elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
  
      # shouldn't happen - we shouldn't be here if operator is expected
-    else { warning("Program Bug in find_angle_operator_termination\n") }
+    else {
+        if (DEVEL_MODE) {
+            Fault(<<EOM);
+Bad call to find_angle_operator_termination
+EOM
+        }
+        return ( $i, $type );
+    }
  
      # To illustrate what we might be looking at, in case we are
      # guessing, here are some examples of valid angle operators
@@ -7886,6 +8822,22 @@ sub find_angle_operator_termination {
              my $pos_beg = $rtoken_map->[$i];
              my $str     = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
  
+            # Test for '<' after possible filehandle, issue c103
+            # print $fh <>;          # syntax error
+            # print $fh <DATA>;      # ok
+            # print $fh < DATA>;     # syntax error at '>'
+            # print STDERR < DATA>;  # ok, prints word 'DATA'
+            # print BLABLA <DATA>;   # ok; does nothing unless BLABLA is defined
+            if ( $last_nonblank_type eq 'Z' ) {
+
+                # $str includes brackets; something like '<DATA>'
+                if (   substr( $last_nonblank_token, 0, 1 ) !~ /[A-Za-z_]/
+                    && substr( $str, 1, 1 ) !~ /[A-Za-z_]/ )
+                {
+                    return ( $i, $type );
+                }
+            }
+
              # Reject if the closing '>' follows a '-' as in:
              # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
              if ( $expecting eq UNKNOWN ) {
@@ -7907,9 +8859,13 @@ sub find_angle_operator_termination {
              # It may be possible that a quote ends midway in a pretoken.
              # If this happens, it may be necessary to split the pretoken.
              if ($error) {
+                if (DEVEL_MODE) {
+                    Fault(<<EOM);
+unexpected error condition returned by inverse_pretoken_map
+EOM
+                }
                  warning(
                      "Possible tokinization error..please check this line\n");
-                report_possible_bug();
              }
  
              # count blanks on inside of brackets
@@ -7978,7 +8934,7 @@ sub find_angle_operator_termination {
          }
      }
      return ( $i, $type );
-}
+} ## end sub find_angle_operator_termination
  
  sub scan_number_do {
  
@@ -8004,8 +8960,11 @@ sub scan_number_do {
  
      # Look for bad starting characters; Shouldn't happen..
      if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
-        warning("Program bug - scan_number given character $first_char\n");
-        report_definite_bug();
+        if (DEVEL_MODE) {
+            Fault(<<EOM);
+Program bug - scan_number given bad first character = '$first_char'
+EOM
+        }
          return ( $i, $type, $number );
      }
  
@@ -8041,10 +9000,10 @@ sub scan_number_do {
             [Pp][+-]?[0-9a-fA-F]          # REQUIRED exponent with digit
             [0-9a-fA-F_]*)                # optional Additional exponent digits
  
-          # or hex integer
+           # or hex integer
             |([xX][0-9a-fA-F_]+)        
  
-          # or octal fraction
+           # or octal fraction
             |([oO]?[0-7_]+          # string of octal digits
             (\.([0-7][0-7_]*)?)?    # optional decimal and fraction
             [Pp][+-]?[0-7]          # REQUIRED exponent, no underscore
@@ -8059,7 +9018,7 @@ sub scan_number_do {
             [Pp][+-]?[01]           # Required exponent indicator, no underscore
             [01_]*)                 # additional exponent bits
  
-          # or binary integer
+           # or binary integer
             |([bB][01_]+)           # 'b' with string of binary digits 
  
             )/gx
@@ -8113,7 +9072,7 @@ sub scan_number_do {
      if ($error) { warning("Possibly invalid number\n") }
  
      return ( $i, $type, $number );
-}
+} ## end sub scan_number_do
  
  sub inverse_pretoken_map {
  
@@ -8135,7 +9094,7 @@ sub inverse_pretoken_map {
          }
      }
      return ( $i, $error );
-}
+} ## end sub inverse_pretoken_map
  
  sub find_here_doc {
  
@@ -8152,8 +9111,8 @@ sub find_here_doc {
      my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
      my $ibeg                 = $i;
      my $found_target         = 0;
-    my $here_doc_target      = '';
-    my $here_quote_character = '';
+    my $here_doc_target      = EMPTY_STRING;
+    my $here_quote_character = EMPTY_STRING;
      my $saw_error            = 0;
      my ( $next_nonblank_token, $i_next_nonblank, $next_token );
      $next_token = $rtokens->[ $i + 1 ];
@@ -8248,7 +9207,7 @@ sub find_here_doc {
  
      return ( $found_target, $here_doc_target, $here_quote_character, $i,
          $saw_error );
-}
+} ## end sub find_here_doc
  
  sub do_quote {
  
@@ -8262,10 +9221,18 @@ sub do_quote {
      #  $quoted_string_1 = quoted string seen while in_quote=1
      #  $quoted_string_2 = quoted string seen while in_quote=2
      my (
-        $i,               $in_quote,    $quote_character,
-        $quote_pos,       $quote_depth, $quoted_string_1,
-        $quoted_string_2, $rtokens,     $rtoken_map,
-        $max_token_index
+
+        $i,
+        $in_quote,
+        $quote_character,
+        $quote_pos,
+        $quote_depth,
+        $quoted_string_1,
+        $quoted_string_2,
+        $rtokens,
+        $rtoken_map,
+        $max_token_index,
+
      ) = @_;
  
      my $in_quote_starting = $in_quote;
@@ -8282,7 +9249,7 @@ sub do_quote {
          $quoted_string_2 .= $quoted_string;
          if ( $in_quote == 1 ) {
              if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
-            $quote_character = '';
+            $quote_character = EMPTY_STRING;
          }
          else {
              $quoted_string_2 .= "\n";
@@ -8302,9 +9269,18 @@ sub do_quote {
              $quoted_string_1 .= "\n";
          }
      }
-    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
-        $quoted_string_1, $quoted_string_2 );
-}
+    return (
+
+        $i,
+        $in_quote,
+        $quote_character,
+        $quote_pos,
+        $quote_depth,
+        $quoted_string_1,
+        $quoted_string_2,
+
+    );
+} ## end sub do_quote
  
  sub follow_quoted_string {
  
@@ -8323,12 +9299,21 @@ sub follow_quoted_string {
      #   $quote_pos = index to check next for alphanumeric delimiter
      #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
      #   $quoted_string = the text of the quote (without quotation tokens)
-    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
-        $max_token_index )
-      = @_;
+    my (
+
+        $i_beg,
+        $in_quote,
+        $rtokens,
+        $beginning_tok,
+        $quote_pos,
+        $quote_depth,
+        $max_token_index,
+
+    ) = @_;
+
      my ( $tok, $end_tok );
      my $i             = $i_beg - 1;
-    my $quoted_string = "";
+    my $quoted_string = EMPTY_STRING;
  
      0 && do {
          print STDOUT
@@ -8380,10 +9365,10 @@ sub follow_quoted_string {
      # characters, whereas for a non-alphanumeric delimiter, only tokens of
      # length 1 can match.
  
-    ###################################################################
+    #----------------------------------------------------------------
      # Case 1 (rare): loop for case of alphanumeric quote delimiter..
      # "quote_pos" is the position the current word to begin searching
-    ###################################################################
+    #----------------------------------------------------------------
      if ( $beginning_tok =~ /\w/ ) {
  
          # Note this because it is not recommended practice except
@@ -8393,7 +9378,8 @@ sub follow_quoted_string {
                  "Note: alphanumeric quote delimiter ($beginning_tok) \n");
          }
  
-        while ( $i < $max_token_index ) {
+        # Note: changed < to <= here to fix c109. Relying on extra end blanks.
+        while ( $i <= $max_token_index ) {
  
              if ( $quote_pos == 0 || ( $i < 0 ) ) {
                  $tok = $rtokens->[ ++$i ];
@@ -8421,6 +9407,11 @@ sub follow_quoted_string {
                  $quoted_string .=
                    substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
  
+                # NOTE: any quote modifiers will be at the end of '$tok'. If we
+                # wanted to check them, this is the place to get them.  But
+                # this quote form is rarely used in practice, so it isn't
+                # worthwhile.
+
                  $quote_depth--;
  
                  if ( $quote_depth == 0 ) {
@@ -8436,9 +9427,9 @@ sub follow_quoted_string {
          }
      }
  
-    ########################################################################
+    #-----------------------------------------------------------------------
      # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
-    ########################################################################
+    #-----------------------------------------------------------------------
      else {
  
          while ( $i < $max_token_index ) {
@@ -8466,9 +9457,17 @@ sub follow_quoted_string {
          }
      }
      if ( $i > $max_token_index ) { $i = $max_token_index }
-    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
-        $quoted_string );
-}
+    return (
+
+        $i,
+        $in_quote,
+        $beginning_tok,
+        $quote_pos,
+        $quote_depth,
+        $quoted_string,
+
+    );
+} ## end sub follow_quoted_string
  
  sub indicate_error {
      my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
@@ -8488,7 +9487,7 @@ sub write_error_indicator_pair {
      $underline =~ s/\s*$//;
      warning( $underline . "\n" );
      return;
-}
+} ## end sub write_error_indicator_pair
  
  sub make_numbered_line {
  
@@ -8545,9 +9544,9 @@ sub make_numbered_line {
      my $numbered_line = sprintf( "%d: ", $lineno );
      $offset -= length($numbered_line);
      $numbered_line .= $str;
-    my $underline = " " x length($numbered_line);
+    my $underline = SPACE x length($numbered_line);
      return ( $offset, $numbered_line, $underline );
-}
+} ## end sub make_numbered_line
  
  sub write_on_underline {
  
@@ -8581,10 +9580,16 @@ sub write_on_underline {
      }
      substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
      return ($underline);
-}
+} ## end sub write_on_underline
  
  sub pre_tokenize {
  
+    my ( $str, $max_tokens_wanted ) = @_;
+
+    # Input parameter:
+    #  $max_tokens_wanted > 0  to stop on reaching this many tokens.
+    #                     = 0 means get all tokens
+
      # Break a string, $str, into a sequence of preliminary tokens.  We
      # are interested in these types of tokens:
      #   words       (type='w'),            example: 'max_tokens_wanted'
@@ -8594,9 +9599,12 @@ sub pre_tokenize {
      # We cannot do better than this yet because we might be in a quoted
      # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
      # tokens.
-    my ( $str, $max_tokens_wanted ) = @_;
  
-    # we return references to these 3 arrays:
+    # An advantage of doing this pre-tokenization step is that it keeps almost
+    # all of the regex work highly localized.  A disadvantage is that in some
+    # very rare instances we will have to go back and split a pre-token.
+
+    # Return parameters:
      my @tokens    = ();     # array of the tokens themselves
      my @token_map = (0);    # string position of start of each token
      my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
@@ -8627,7 +9635,7 @@ sub pre_tokenize {
      } while ( --$max_tokens_wanted != 0 );
  
      return ( \@tokens, \@token_map, \@type );
-}
+} ## end sub pre_tokenize
  
  sub show_tokens {
  
@@ -8641,7 +9649,7 @@ sub show_tokens {
          print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
      }
      return;
-}
+} ## end sub show_tokens
  
  {    ## closure for sub matching end token
      my %matching_end_token;
@@ -8732,6 +9740,8 @@ The following additional token types are defined:
      HERE_END       - last line of here-doc (target word)
      FORMAT         - format section
      FORMAT_END     - last line of format section, '.'
+    SKIP           - code skipping section
+    SKIP_END       - last line of code skipping section, '#>>V'
      DATA_START     - __DATA__ line
      DATA           - unidentified text following __DATA__
      END_START      - __END__ line
@@ -8740,7 +9750,7 @@ The following additional token types are defined:
  END_OF_LIST
  
      return;
-}
+} ## end sub dump_token_types
  
  BEGIN {
  
@@ -8751,11 +9761,16 @@ BEGIN {
      my @q;
  
      my @digraphs = qw(
-      .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+      .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <>
        <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
      );
      @is_digraph{@digraphs} = (1) x scalar(@digraphs);
  
+    @q = qw(
+      . : < > * & | / - = + -  %  ^ !  x ~
+    );
+    @can_start_digraph{@q} = (1) x scalar(@q);
+
      my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
      @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
  
@@ -8800,6 +9815,16 @@ BEGIN {
        switch case given when default catch try finally);
      @is_code_block_token{@q} = (1) x scalar(@q);
  
+    # Note: this hash was formerly named '%is_not_zero_continuation_block_type'
+    # to contrast it with the block types in '%is_zero_continuation_block_type'
+    @q = qw( sort map grep eval do );
+    @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
+
+    @q = qw( sort map grep );
+    @is_sort_map_grep{@q} = (1) x scalar(@q);
+
+    %is_grep_alias = ();
+
      # I'll build the list of keywords incrementally
      my @Keywords = ();
  
@@ -9127,7 +10152,10 @@ BEGIN {
      delete $really_want_term{'F'}; # file test works on $_ if no following term
      delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
                                     # let perl do it
+    @q = qw(q qq qx qr s y tr m);
+    @is_q_qq_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
  
+    # Note added 'qw' here
      @q = qw(q qq qw qx qr s y tr m);
      @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
  
@@ -9138,6 +10166,15 @@ BEGIN {
      push @q, ',';
      @is_comma_question_colon{@q} = (1) x scalar(@q);
  
+    @q = qw( if elsif unless );
+    @is_if_elsif_unless{@q} = (1) x scalar(@q);
+
+    @q = qw( ; t );
+    @is_semicolon_or_t{@q} = (1) x scalar(@q);
+
+    @q = qw( if elsif unless case when );
+    @is_if_elsif_unless_case_when{@q} = (1) x scalar(@q);
+
      # Hash of other possible line endings which may occur.
      # Keep these coordinated with the regex where this is used.
      # Note: chr(13) = chr(015)="\r".
@@ -9257,7 +10294,7 @@ BEGIN {
  
      # These are keywords for which an arg may optionally be omitted.  They are
      # currently only used to disambiguate a ? used as a ternary from one used
-    # as a (depricated) pattern delimiter.  In the future, they might be used
+    # as a (deprecated) pattern delimiter.  In the future, they might be used
      # to give a warning about ambiguous syntax before a /.
      # Note: split has been omitted (see not below).
      my @keywords_taking_optional_arg = qw(
@@ -9332,7 +10369,7 @@ BEGIN {
      @is_keyword_taking_optional_arg{@keywords_taking_optional_arg} =
        (1) x scalar(@keywords_taking_optional_arg);
  
-    # This list is used to decide if a pattern delmited by question marks,
+    # This list is used to decide if a pattern delimited by question marks,
      # ?pattern?, can follow one of these keywords.  Note that from perl 5.22
      # on, a ?pattern? is not recognized, so we can be much more strict than
      # with a /pattern/. Note that 'split' is not in this list. In current