From: don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>
Date: Sat, 29 Jul 2006 05:04:15 +0000 (+0000)
Subject:  Upgrade perltidy to the 20060719 release
X-Git-Tag: debian/20170521-1~41
X-Git-Url: https://git.donarmstrong.com/?p=perltidy.git;a=commitdiff_plain;h=7272c75ca797879bc1a6bb26ad0f82b89255104a

 Upgrade perltidy to the 20060719 release
---

diff --git a/BUGS b/BUGS
index edca9bc..5689aca 100644
--- a/BUGS
+++ b/BUGS
@@ -6,34 +6,12 @@ Perltidy open BUGS
      This file only lists open bugs.  For bugs which have been fixed, 
      see the ChangeLog.  
 
-  A here-doc invoked through an 'e' modifier on a pattern replacement text is not recognized
-    For example, the output of perltidy for this script has a syntax error:
-
-            my $text="Hello World!\n";
-            $text =~ s@Hello@<<'END'@e;
-            Goodbye 
-            Cruel
-            END
-            print "$text\n";
-
-    A workaround is to put the here-doc in a temporary string and then do
-    the substitution:
-
-            my $text="Hello World!\n";
-            my $str=<<'END';
-            Goodbye 
-            Cruel
-            END
-            $text =~ s@Hello@$str@e;
-            print "$text\n";
-
-  The --extrude option can occasionally produce code with syntax errors
+  The --extrude option can produce code with syntax errors
     The --extrude tries to put as many newlines in the formatted code as
-    possible. This option is of limited use for formatting, but it has been
-    helpful for debugging purposes. Occasionally it will produce code which
-    Perl considers to have a syntax error. These problems usually involve
-    code where Perl is having to guess the tokenization. For example,
-    --extrude will currently cause a syntax error in the following line:
-
-     utime $inc+0 ? ($mtime, $ntime) : ($atime, $atime), $file;
+    possible. This option is very useful for testing perltidy but not for
+    actual formatting. Occasionally it will produce code which Perl
+    considers to have a syntax error. These problems usually involve code
+    where Perl is having to guess the tokenization based on whitespace.
+    Since the --extrude option is typically only used for testing perltidy,
+    this type of error should not normally occur in practice.
 
diff --git a/CHANGES b/CHANGES
index b59e99b..b103a05 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,8 +1,98 @@
 Perltidy Change Log
-     You can help Perltidy evolve into a better program.  If you have hit a
-     bug, unusual behavior, annoyance, or have a suggested improvement,
-     please send a note to perltidy at users.sourceforge.net.  All
-     suggestions are welcome.
+  2006 07 19
+     -Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern
+     replacement text was not recognized.  The tokenizer now recursively scans
+     replacement text (but does not reformat it).
+
+     -Improved vertical alignment of terminal else blocks and ternary statements.
+      Thanks to Chris for the suggestion. 
+
+      OLD:
+        if    ( IsBitmap() ) { return GetBitmap(); }
+        elsif ( IsFiles() )  { return GetFiles(); }
+        else { return GetText(); }
+
+      NEW:
+        if    ( IsBitmap() ) { return GetBitmap(); }
+        elsif ( IsFiles() )  { return GetFiles(); }
+        else                 { return GetText(); }
+
+      OLD:
+        $which_search =
+            $opts{"t"} ? 'title'
+          : $opts{"s"} ? 'subject'
+          : $opts{"a"} ? 'author'
+          : 'title';
+
+      NEW:
+        $which_search =
+            $opts{"t"} ? 'title'
+          : $opts{"s"} ? 'subject'
+          : $opts{"a"} ? 'author'
+          :              'title';
+
+     -improved indentation of try/catch blocks and other externally defined
+     functions accepting a block argument.  Thanks to jae.
+
+     -Added support for Perl 5.10 features say and smartmatch.
+
+     -Added flag -pbp (--perl-best-practices) as an abbreviation for parameters
+     suggested in Damian Conway's "Perl Best Practices".  -pbp is the same as:
+
+        -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+        -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = 
+              **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
+
+      Please note that the -st here restricts input to standard input; use
+      -nst if necessary to override.
+
+     -Eliminated some needless breaks at equals signs in -lp indentation.
+
+        OLD:
+            $c =
+              Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                  TOP + $y * (BOTTOM - TOP) / SIZE);
+        NEW:
+            $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                     TOP + $y * (BOTTOM - TOP) / SIZE);
+
+     A break at an equals is sometimes useful for preventing complex statements 
+     from hitting the line length limit.  The decision to do this was 
+     over-eager in some cases and has been improved.  Thanks to Royce Reece.
+
+     -qw quotes contained in braces, square brackets, and parens are being
+     treated more like those containers as far as stacking of tokens.  Also
+     stack of closing tokens ending ');' will outdent to where the ');' would
+     have outdented if the closing stack is matched with a similar opening stack.
+
+      OLD: perltidy -soc -sct
+        __PACKAGE__->load_components(
+            qw(
+              PK::Auto
+              Core
+              )
+        );
+      NEW: perltidy -soc -sct
+        __PACKAGE__->load_components( qw(
+              PK::Auto
+              Core
+        ) );
+      Thanks to Aran Deltac
+
+     -Eliminated some undesirable or marginally desirable vertical alignments.
+     These include terminal colons, opening braces, and equals, and particularly
+     when just two lines would be aligned.
+
+     OLD:
+        my $accurate_timestamps = $Stamps{lnk};
+        my $has_link            = 
+            ...
+     NEW:
+        my $accurate_timestamps = $Stamps{lnk};
+        my $has_link =
+
+     -Corrected a problem with -mangle in which a space would be removed
+     between a keyword and variable beginning with ::.
 
   2006 06 14
      -Attribute argument lists are now correctly treated as quoted strings
diff --git a/META.yml b/META.yml
index 7718427..95ea74c 100644
--- a/META.yml
+++ b/META.yml
@@ -1,10 +1,10 @@
 # http://module-build.sourceforge.net/META-spec.html
 #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
 name:         Perl-Tidy
-version:      20060614
+version:      20060719
 version_from: lib/Perl/Tidy.pm
 installdirs:  site
 requires:
 
 distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+generated_by: ExtUtils::MakeMaker version 6.30
diff --git a/TODO b/TODO
index 463f148..8eaabd6 100644
--- a/TODO
+++ b/TODO
@@ -6,12 +6,8 @@ Perltidy TODO List
   Improved Vertical Alignment
     There are many opportunities for improving vertical alignment.
 
-  More options for controling placement of opening/closing tokens
-    Many have requested even more options to control opening and closing
-    token placement.
-
   improved ?: formatting
-    An indentation level should be associated with ?: statements. This will
+    An indentation level should be associated with ?: statements. This would
     make nested ?: statements more readable.
 
   improved internal if/unless formatting
diff --git a/bin/perltidy b/bin/perltidy
index 24870fd..0822508 100755
--- a/bin/perltidy
+++ b/bin/perltidy
@@ -1860,9 +1860,9 @@ return lists, such as C<sort> and <map>.  This allows chains of these
 operators to be displayed one per line.  Use B<-nbok> to prevent
 retaining these breakpoints.
 
-=item B<-bot>,  B<--break-at-old-trinary-breakpoints>
+=item B<-bot>,  B<--break-at-old-ternary-breakpoints>
 
-By default, if a conditional (trinary) operator is broken at a C<:>,
+By default, if a conditional (ternary) operator is broken at a C<:>,
 then it will remain broken.  To prevent this, and thereby
 form longer lines, use B<-nbot>.
 
@@ -1967,6 +1967,18 @@ style overrides the default style with the following parameters:
 
     -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp
 
+=item B<-pbp>, B<--perl-best-practices>
+
+B<-pbp> is an abbreviation for the parameters in the book B<Perl Best Practices>
+by Damian Conway:
+
+    -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+    -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = 
+          **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
+
+Note that the -st and -se flags make perltidy act as a filter on one file only.  
+These can be overridden with -nst and -nse if necessary.
+
 =back
 
 =head2 Other Controls
@@ -2607,7 +2619,7 @@ perlstyle(1), Perl::Tidy(3)
 
 =head1 VERSION
 
-This man page documents perltidy version 20060614.
+This man page documents perltidy version 20060719.
 
 =head1 CREDITS
 
diff --git a/debian/changelog b/debian/changelog
index 4b3542c..0a41b02 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+perltidy (20060719-1) unstable; urgency=low
+
+  * New upstream release
+
+ -- Don Armstrong <don@debian.org>  Fri, 28 Jul 2006 22:02:55 -0700
+
 perltidy (20060614-1) unstable; urgency=low
 
   * New upstream release
diff --git a/docs/perltidy.1 b/docs/perltidy.1
index fa71edd..2a37636 100644
--- a/docs/perltidy.1
+++ b/docs/perltidy.1
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3
+.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
 .\"
 .\" Standard preamble:
 .\" ========================================================================
@@ -25,11 +25,11 @@
 ..
 .\" Set up some character translations and predefined strings.  \*(-- will
 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote.  | will give a
-.\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
-.\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
-.\" expand to `' in nroff, nothing in troff, for use with C<>.
-.tr \(*W-|\(bv\*(Tr
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
 .ie n \{\
 .    ds -- \(*W-
@@ -129,7 +129,7 @@
 .\" ========================================================================
 .\"
 .IX Title "PERLTIDY 1"
-.TH PERLTIDY 1 "2006-06-13" "perl v5.8.7" "User Contributed Perl Documentation"
+.TH PERLTIDY 1 "2006-07-19" "perl v5.8.8" "User Contributed Perl Documentation"
 .SH "NAME"
 perltidy \- a perl script indenter and reformatter
 .SH "SYNOPSIS"
@@ -1989,9 +1989,9 @@ By default, perltidy will retain a breakpoint before keywords which may
 return lists, such as \f(CW\*(C`sort\*(C'\fR and <map>.  This allows chains of these
 operators to be displayed one per line.  Use \fB\-nbok\fR to prevent
 retaining these breakpoints.
-.IP "\fB\-bot\fR,  \fB\-\-break\-at\-old\-trinary\-breakpoints\fR" 4
-.IX Item "-bot,  --break-at-old-trinary-breakpoints"
-By default, if a conditional (trinary) operator is broken at a \f(CW\*(C`:\*(C'\fR,
+.IP "\fB\-bot\fR,  \fB\-\-break\-at\-old\-ternary\-breakpoints\fR" 4
+.IX Item "-bot,  --break-at-old-ternary-breakpoints"
+By default, if a conditional (ternary) operator is broken at a \f(CW\*(C`:\*(C'\fR,
 then it will remain broken.  To prevent this, and thereby
 form longer lines, use \fB\-nbot\fR.
 .IP "\fB\-iob\fR,  \fB\-\-ignore\-old\-breakpoints\fR" 4
@@ -2072,6 +2072,19 @@ style overrides the default style with the following parameters:
 .Vb 1
 \&    \-lp \-bl \-noll \-pt=2 \-bt=2 \-sbt=2 \-icp
 .Ve
+.IP "\fB\-pbp\fR, \fB\-\-perl\-best\-practices\fR" 4
+.IX Item "-pbp, --perl-best-practices"
+\&\fB\-pbp\fR is an abbreviation for the parameters in the book \fBPerl Best Practices\fR
+by Damian Conway:
+.Sp
+.Vb 3
+\&    \-l=78 \-i=4 \-ci=4 \-st \-se \-vt=2 \-cti=0 \-pt=1 \-bt=1 \-sbt=1 \-bbt=1 \-nsfs \-nolq
+\&    \-wbb="% + \- * / x != == >= <= =~ !~ < > | & >= < = 
+\&          **= += *= &= <<= &&= \-= /= |= >>= ||= .= %= ^= x="
+.Ve
+.Sp
+Note that the \-st and \-se flags make perltidy act as a filter on one file only.  
+These can be overridden with \-nst and \-nse if necessary.
 .Sh "Other Controls"
 .IX Subsection "Other Controls"
 .IP "Deleting selected text" 4
@@ -2695,7 +2708,7 @@ purpose of this rule is to prevent generating confusing filenames such as
 \&\fIperlstyle\fR\|(1), \fIPerl::Tidy\fR\|(3)
 .SH "VERSION"
 .IX Header "VERSION"
-This man page documents perltidy version 20060614.
+This man page documents perltidy version 20060719.
 .SH "CREDITS"
 .IX Header "CREDITS"
 Michael Cartmell supplied code for adaptation to \s-1VMS\s0 and helped with
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm
index e69780c..ecef204 100644
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -63,7 +63,7 @@ use IO::File;
 use File::Basename;
 
 BEGIN {
-    ( $VERSION = q($Id: Tidy.pm,v 1.49 2006/06/14 01:56:24 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+    ( $VERSION = q($Id: Tidy.pm,v 1.56 2006/07/19 23:13:33 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
 }
 
 sub streamhandle {
@@ -308,8 +308,8 @@ sub make_temporary_filename {
         }
         if ($input_file) {
 
-            if ( ref $input_file ) { print STDERR " of reference to:" }
-            else { print STDERR " of file:" }
+            if   ( ref $input_file ) { print STDERR " of reference to:" }
+            else                     { print STDERR " of file:" }
             print STDERR " $input_file";
         }
         print STDERR "\n";
@@ -358,7 +358,7 @@ EOM
             my $hash_ref = $input_hash{$key};
             if ( defined($hash_ref) ) {
                 unless ( ref($hash_ref) eq 'HASH' ) {
-                    my $what   = ref($hash_ref);
+                    my $what = ref($hash_ref);
                     my $but_is =
                       $what ? "but is ref to $what" : "but is not a reference";
                     croak <<EOM;
@@ -1102,6 +1102,7 @@ sub generate_options {
     # chk --> check-multiline-quotes      # check for old bug; to be deleted
     # scl --> short-concatenation-item-length   # helps break at '.'
     # recombine                           # for debugging line breaks
+    # valign                              # for debugging vertical alignment
     # I   --> DIAGNOSTICS                 # for debugging
     ######################################################################
 
@@ -1157,6 +1158,7 @@ sub generate_options {
       no-profile
       npro
       recombine!
+      valign!
     );
 
     my $category = 13;    # Debugging
@@ -1336,7 +1338,7 @@ sub generate_options {
     ########################################
     $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
     $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
-    $add_option->( 'break-at-old-trinary-breakpoints', 'bot', '!' );
+    $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
     $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
 
     ########################################
@@ -1469,7 +1471,7 @@ sub generate_options {
       brace-vertical-tightness-closing=0
       brace-vertical-tightness=0
       break-at-old-logical-breakpoints
-      break-at-old-trinary-breakpoints
+      break-at-old-ternary-breakpoints
       break-at-old-keyword-breakpoints
       comma-arrow-breakpoints=1
       nocheck-syntax
@@ -1512,6 +1514,7 @@ sub generate_options {
       paren-vertical-tightness=0
       pass-version-line
       recombine
+      valign
       short-concatenation-item-length=8
       space-for-semicolon
       square-bracket-tightness=1
@@ -1544,15 +1547,16 @@ sub generate_options {
         'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
         'nooutdent-long-lines' =>
           [qw(nooutdent-long-quotes nooutdent-long-comments)],
-        'noll'                => [qw(nooutdent-long-lines)],
-        'io'                  => [qw(indent-only)],
+        'noll' => [qw(nooutdent-long-lines)],
+        'io'   => [qw(indent-only)],
         'delete-all-comments' =>
           [qw(delete-block-comments delete-side-comments delete-pod)],
         'nodelete-all-comments' =>
           [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
-        'dac'              => [qw(delete-all-comments)],
-        'ndac'             => [qw(nodelete-all-comments)],
-        'gnu'              => [qw(gnu-style)],
+        'dac'  => [qw(delete-all-comments)],
+        'ndac' => [qw(nodelete-all-comments)],
+        'gnu'  => [qw(gnu-style)],
+        'pbp'  => [qw(perl-best-practices)],
         'tee-all-comments' =>
           [qw(tee-block-comments tee-side-comments tee-pod)],
         'notee-all-comments' =>
@@ -1568,6 +1572,8 @@ sub generate_options {
         'baa'                        => [qw(cab=0)],
         'nbaa'                       => [qw(cab=1)],
 
+        'break-at-old-trinary-breakpoints' => [qw(bot)],
+
         'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
         'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
         'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
@@ -1675,6 +1681,12 @@ sub generate_options {
               )
         ],
 
+        # Style suggested in Damian Conway's Perl Best Practices
+        'perl-best-practices' => [
+            qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
+q(wbb=% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=)
+        ],
+
         # Additional styles can be added here
     );
 
@@ -1831,7 +1843,7 @@ EOM
         # look for a config file if we don't have one yet
         my $rconfig_file_chatter;
         $$rconfig_file_chatter = "";
-        $config_file           =
+        $config_file =
           find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
             $rpending_complaint )
           unless $config_file;
@@ -1917,6 +1929,7 @@ EOM
                     }
                   )
                 {
+
                     if ( defined( $Opts{$_} ) ) {
                         delete $Opts{$_};
                         warn "ignoring --$_ in config file: $config_file\n";
@@ -2867,7 +2880,7 @@ Following Old Breakpoints
  -boc    break at old comma breaks: turns off all automatic list formatting
  -bol    break at old logical breakpoints: or, and, ||, && (default)
  -bok    break at old list keyword breakpoints such as map, sort (default)
- -bot    break at old conditional (trinary ?:) operator breakpoints (default)
+ -bot    break at old conditional (ternary ?:) operator breakpoints (default)
  -cab=n  break at commas after a comma-arrow (=>):
          n=0 break at all commas after =>
          n=1 stable: break unless this breaks an existing one-line container
@@ -3664,10 +3677,10 @@ sub make_line_information_string {
     my $line_information_string = "";
     if ($input_line_number) {
 
-        my $output_line_number       = $self->{_output_line_number};
-        my $brace_depth              = $line_of_tokens->{_curly_brace_depth};
-        my $paren_depth              = $line_of_tokens->{_paren_depth};
-        my $square_bracket_depth     = $line_of_tokens->{_square_bracket_depth};
+        my $output_line_number   = $self->{_output_line_number};
+        my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
+        my $paren_depth          = $line_of_tokens->{_paren_depth};
+        my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
         my $python_indentation_level =
           $line_of_tokens->{_python_indentation_level};
         my $rlevels         = $line_of_tokens->{_rlevels};
@@ -3683,13 +3696,13 @@ sub make_line_information_string {
         # for longer scripts it doesn't really matter
         my $extra_space = "";
         $extra_space .=
-            ( $input_line_number < 10 ) ? "  "
+            ( $input_line_number < 10 )  ? "  "
           : ( $input_line_number < 100 ) ? " "
-          : "";
+          :                                "";
         $extra_space .=
-            ( $output_line_number < 10 ) ? "  "
+            ( $output_line_number < 10 )  ? "  "
           : ( $output_line_number < 100 ) ? " "
-          : "";
+          :                                 "";
 
         # there are 2 possible nesting strings:
         # the original which looks like this:  (0 [1 {2
@@ -3863,14 +3876,14 @@ EOM
     elsif ( $saw_code_bug == 1 ) {
         if ( $self->{_saw_extrude} ) {
             $self->warning(<<EOM);
-You may have encountered a bug in perltidy.  However, since you are
-using the -extrude option, the problem may be with perl itself, which
-has occasional parsing problems with this type of file.  If you believe
-that the problem is with perltidy, and the problem is not listed in the
-BUGS file at http://perltidy.sourceforge.net, please report it so that
-it can be corrected.  Include the smallest possible script which has the
-problem, along with the .LOG file. See the manual pages for contact
-information.
+
+You may have encountered a bug in perltidy.  However, since you are using the
+-extrude option, the problem may be with perl or one of its modules, which have
+occasional problems with this type of file.  If you believe that the
+problem is with perltidy, and the problem is not listed in the BUGS file at
+http://perltidy.sourceforge.net, please report it so that it can be corrected.
+Include the smallest possible script which has the problem, along with the .LOG
+file. See the manual pages for contact information.
 Thank you!
 EOM
         }
@@ -4823,7 +4836,7 @@ sub make_frame {
     # 1. Make the table of contents panel, with appropriate changes
     # to the anchor names
     my $src_frame_name = 'SRC';
-    my $first_anchor   =
+    my $first_anchor =
       write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
         $src_frame_name );
 
@@ -5256,7 +5269,7 @@ sub write_line {
         elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
         elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
         elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
-        elsif ( $line_type eq 'END_START' )  {
+        elsif ( $line_type eq 'END_START' ) {
             $line_character = 'k';
             $self->add_toc_item( '__END__', '__END__' );
         }
@@ -5317,10 +5330,10 @@ EOM
     # add the line number if requested
     if ( $rOpts->{'html-line-numbers'} ) {
         my $extra_space .=
-            ( $line_number < 10 ) ? "   "
+            ( $line_number < 10 )   ? "   "
           : ( $line_number < 100 )  ? "  "
           : ( $line_number < 1000 ) ? " "
-          : "";
+          :                           "";
         $html_line = $extra_space . $line_number . " " . $html_line;
     }
 
@@ -5429,6 +5442,7 @@ use vars qw{
   $last_last_nonblank_token_to_go
   @nonblank_lines_at_depth
   $starting_in_quote
+  $ending_in_quote
 
   $in_format_skipping_section
   $format_skipping_pattern_begin
@@ -5449,7 +5463,6 @@ use vars qw{
   $added_semicolon_count
   $first_added_semicolon_at
   $last_added_semicolon_at
-  $saw_negative_indentation
   $first_tabbing_disagreement
   $last_tabbing_disagreement
   $in_tabbing_disagreement
@@ -5499,6 +5512,7 @@ use vars qw{
   %is_assignment
   %is_chain_operator
   %is_if_unless_and_or_last_next_redo_return
+  %is_until_while_for_if_elsif_else
 
   @has_broken_sublist
   @dont_align
@@ -5539,7 +5553,7 @@ use vars qw{
   $rOpts_break_at_old_keyword_breakpoints
   $rOpts_break_at_old_comma_breakpoints
   $rOpts_break_at_old_logical_breakpoints
-  $rOpts_break_at_old_trinary_breakpoints
+  $rOpts_break_at_old_ternary_breakpoints
   $rOpts_closing_side_comment_else_flag
   $rOpts_closing_side_comment_maximum_text
   $rOpts_continuation_indentation
@@ -5615,6 +5629,10 @@ BEGIN {
     @_ = qw(is if unless and or err last next redo return);
     @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
 
+    # always break after a closing curly of these block types:
+    @_ = qw(until while for if elsif else);
+    @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
+
     @_ = qw(last next redo return);
     @is_last_next_redo_return{@_} = (1) x scalar(@_);
 
@@ -5808,7 +5826,6 @@ sub new {
     @want_comma_break   = ();
 
     @ci_stack                   = ("");
-    $saw_negative_indentation   = 0;
     $first_tabbing_disagreement = 0;
     $last_tabbing_disagreement  = 0;
     $tabbing_disagreement_count = 0;
@@ -6123,7 +6140,7 @@ sub set_leading_whitespace {
         my $space_count     = 0;
         my $available_space = 0;
         $level = -1;    # flag to prevent storing in item_list
-        $leading_spaces_to_go[$max_index_to_go]   =
+        $leading_spaces_to_go[$max_index_to_go] =
           $reduced_spaces_to_go[$max_index_to_go] =
           new_lp_indentation_item( $space_count, $level, $ci_level,
             $available_space, 0 );
@@ -6152,17 +6169,33 @@ sub set_leading_whitespace {
             # find the position if we break at the '='
             my $i_test = $last_equals;
             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+
+            # TESTING
+            ##my $too_close = ($i_test==$max_index_to_go-1);
+
             my $test_position = total_line_length( $i_test, $max_index_to_go );
 
             if (
 
+                # the equals is not just before an open paren (testing)
+                ##!$too_close &&
+
                 # if we are beyond the midpoint
                 $gnu_position_predictor > $half_maximum_line_length
 
-                # or if we can save some space by breaking at the '='
-                # without obscuring the second line by the first
-                || ( $test_position > 1 +
-                    total_line_length( $line_start_index_to_go, $last_equals ) )
+                # or we are beyont the 1/4 point and there was an old
+                # break at the equals
+                || (
+                    $gnu_position_predictor > $half_maximum_line_length / 2
+                    && (
+                        $old_breakpoint_to_go[$last_equals]
+                        || (   $last_equals > 0
+                            && $old_breakpoint_to_go[ $last_equals - 1 ] )
+                        || (   $last_equals > 1
+                            && $types_to_go[ $last_equals - 1 ] eq 'b'
+                            && $old_breakpoint_to_go[ $last_equals - 2 ] )
+                    )
+                )
               )
             {
 
@@ -6992,7 +7025,7 @@ EOM
     # Define here tokens which may follow the closing brace of a do statement
     # on the same line, as in:
     #   } while ( $something);
-    @_ = qw(until while unless if ; );
+    @_ = qw(until while unless if ; : );
     push @_, ',';
     @is_do_follower{@_} = (1) x scalar(@_);
 
@@ -7012,14 +7045,14 @@ EOM
     %is_else_brace_follower = ();
 
     # what can follow a multi-line anonymous sub definition closing curly:
-    @_ = qw# ; : => or and  && || ) #;
+    @_ = qw# ; : => or and  && || ~~ ) #;
     push @_, ',';
     @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
 
     # what can follow a one-line anonynomous sub closing curly:
     # one-line anonumous subs also have ']' here...
     # see tk3.t and PP.pm
-    @_ = qw#  ; : => or and  && || ) ] #;
+    @_ = qw#  ; : => or and  && || ) ] ~~ #;
     push @_, ',';
     @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
 
@@ -7085,15 +7118,15 @@ EOM
     );
 
     # frequently used parameters
-    $rOpts_add_newlines                   = $rOpts->{'add-newlines'};
-    $rOpts_add_whitespace                 = $rOpts->{'add-whitespace'};
-    $rOpts_block_brace_tightness          = $rOpts->{'block-brace-tightness'};
+    $rOpts_add_newlines          = $rOpts->{'add-newlines'};
+    $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
+    $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
     $rOpts_block_brace_vertical_tightness =
       $rOpts->{'block-brace-vertical-tightness'};
     $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
     $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
-    $rOpts_break_at_old_trinary_breakpoints =
-      $rOpts->{'break-at-old-trinary-breakpoints'};
+    $rOpts_break_at_old_ternary_breakpoints =
+      $rOpts->{'break-at-old-ternary-breakpoints'};
     $rOpts_break_at_old_comma_breakpoints =
       $rOpts->{'break-at-old-comma-breakpoints'};
     $rOpts_break_at_old_keyword_breakpoints =
@@ -7425,8 +7458,18 @@ EOM
         # for avoiding syntax problems rather than for formatting.
         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
 
-        # never combine two bare words or numbers
-        my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
+        my $result =
+
+          # never combine two bare words or numbers
+          # examples:  and ::ok(1)
+          #            return ::spw(...)
+          #            for bla::bla:: abc
+          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+          #            $input eq"quit" to make $inputeq"quit"
+          #            my $size=-s::SINK if $file;  <==OK but we won't do it
+          # don't join something like: for bla::bla:: abc
+          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+          ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
 
           # do not combine a number with a concatination dot
           # example: pom.caputo:
@@ -7479,7 +7522,11 @@ EOM
 
           # retain any space after possible filehandle
           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
-          || ( $typel eq 'Z' || $typell eq 'Z' )
+          || ( $typel eq 'Z' )
+
+          # Perl is sensitive to whitespace after the + here:
+          #  $b = xvals $a + 0.1 * yvals $a;
+          || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
 
           # keep paren separate in 'use Foo::Bar ()'
           || ( $tokenr eq '('
@@ -7526,9 +7573,6 @@ EOM
           #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
           || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
 
-          # don't join something like: for bla::bla:: abc
-          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
-          || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
           ;    # the value of this long logic sequence is the result we want
         return $result;
     }
@@ -7587,7 +7631,7 @@ sub set_white_space_flag {
 
         my @spaces_both_sides = qw"
           + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
-          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
+          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~
           &&= ||= //= <=> A k f w F n C Y U G v
           ";
 
@@ -7883,7 +7927,7 @@ sub set_white_space_flag {
 
         # patch for SWITCH/CASE: make space at ']{' optional
         # since the '{' might begin a case or when block
-        elsif ( $token eq '{' && $last_token eq ']' ) {
+        elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
             $ws = WS_OPTIONAL;
         }
 
@@ -7932,8 +7976,13 @@ sub set_white_space_flag {
         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
 
         # always preserver whatever space was used after a possible
-        # filehandle or here doc operator
-        if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
+        # filehandle (except _) or here doc operator
+        if (
+            $type ne '#'
+            && ( ( $last_type eq 'Z' && $last_token ne '_' )
+                || $last_type eq 'h' )
+          )
+        {
             $ws = WS_OPTIONAL;
         }
 
@@ -8110,7 +8159,7 @@ sub set_white_space_flag {
         # If this becomes too much of a problem, we might give up and just clip
         # them at zero.
         ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
-        $levels_to_go[$max_index_to_go]        = $level;
+        $levels_to_go[$max_index_to_go] = $level;
         $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
         $lengths_to_go[ $max_index_to_go + 1 ] =
           $lengths_to_go[$max_index_to_go] + length($token);
@@ -8165,16 +8214,6 @@ sub set_white_space_flag {
         return;
     }
 
-    my %is_until_while_for_if_elsif_else;
-
-    BEGIN {
-
-        # always break after a closing curly of these block types:
-        @_ = qw(until while for if elsif else);
-        @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
-
-    }
-
     sub print_line_of_tokens {
 
         my $line_of_tokens = shift;
@@ -8214,7 +8253,8 @@ sub set_white_space_flag {
 
         $in_continued_quote = $starting_in_quote =
           $line_of_tokens->{_starting_in_quote};
-        $in_quote                 = $line_of_tokens->{_ending_in_quote};
+        $in_quote        = $line_of_tokens->{_ending_in_quote};
+        $ending_in_quote = $in_quote;
         $python_indentation_level =
           $line_of_tokens->{_python_indentation_level};
 
@@ -8316,7 +8356,7 @@ sub set_white_space_flag {
             && $rOpts->{'static-block-comments'}
             && $input_line =~ /$static_block_comment_pattern/o )
         {
-            $is_static_block_comment                       = 1;
+            $is_static_block_comment = 1;
             $is_static_block_comment_without_leading_space =
               substr( $input_line, 0, 1 ) eq '#';
         }
@@ -8423,7 +8463,7 @@ sub set_white_space_flag {
         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
         #   Examples:
         #     *VERSION = \'1.01';
-        #     ( $VERSION ) = '$Revision: 1.49 $ ' =~ /\$Revision:\s+([^\s]+)/;
+        #     ( $VERSION ) = '$Revision: 1.56 $ ' =~ /\$Revision:\s+([^\s]+)/;
         #   We will pass such a line straight through without breaking
         #   it unless -npvl is used
 
@@ -8442,10 +8482,11 @@ sub set_white_space_flag {
         }
 
         # take care of indentation-only
-        # also write a line which is entirely a 'qw' list
-        if ( $rOpts->{'indent-only'}
-            || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
-        {
+        # NOTE: In previous versions we sent all qw lines out immediately here.
+        # No longer doing this: also write a line which is entirely a 'qw' list
+        # to allow stacking of opening and closing tokens.  Note that interior
+        # qw lines will still go out at the end of this routine.
+        if ( $rOpts->{'indent-only'} ) {
             flush();
             $input_line =~ s/^\s*//;    # trim left end
             $input_line =~ s/\s*$//;    # trim right end
@@ -8875,7 +8916,14 @@ sub set_white_space_flag {
                     #
                     # But make a line break if the curly ends a
                     # significant block:
-                    if ( $is_until_while_for_if_elsif_else{$block_type} ) {
+                    ##if ( $is_until_while_for_if_elsif_else{$block_type} ) {
+                    if (
+                        $is_block_without_semicolon{$block_type}
+
+                        # if needless semicolon follows we handle it later
+                        && $next_nonblank_token ne ';'
+                      )
+                    {
                         output_line_to_go() unless ($no_internal_newlines);
                     }
                 }
@@ -8911,11 +8959,6 @@ sub set_white_space_flag {
                     }
                 }
 
-                # TESTING ONLY for SWITCH/CASE - this is where to start
-                # recoding to retain else's on the same line as a case,
-                # but there is a lot more that would need to be done.
-                ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};}
-
                 # None of the above: specify what can follow a closing
                 # brace of a block which is not an
                 # if/elsif/else/do/sort/map/grep/eval
@@ -9087,7 +9130,9 @@ sub set_white_space_flag {
             # if there is a side comment
             ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
 
-            # if this line which ends in a quote
+            # if this line ends in a quote
+            # NOTE: This is critically important for insuring that quoted lines
+            # do not get processed by things like -sot and -sct
             || $in_quote
 
             # if this is a VERSION statement
@@ -9239,8 +9284,8 @@ sub starting_one_line_block {
     for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
 
         # old whitespace could be arbitrarily large, so don't use it
-        if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
-        else { $pos += length( $$rtokens[$i] ) }
+        if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
+        else                              { $pos += length( $$rtokens[$i] ) }
 
         # Return false result if we exceed the maximum line length,
         if ( $pos > $rOpts_maximum_line_length ) {
@@ -9463,7 +9508,7 @@ sub set_logical_padding {
                     # we might pad token $ibeg, so be sure that it
                     # is at the same depth as the next line.
                     next
-                      if ( $nesting_depth_to_go[ $ibeg + 1 ] !=
+                      if ( $nesting_depth_to_go[$ibeg] !=
                         $nesting_depth_to_go[$ibeg_next] );
 
                     # We can pad on line 1 of a statement if at least 3
@@ -9854,7 +9899,7 @@ sub correct_lp_indentation {
                 # then we are probably vertically aligned.  We could set
                 # an exact flag in sub scan_list, but this is good
                 # enough.
-                my $indentation_count     = keys %saw_indentation;
+                my $indentation_count = keys %saw_indentation;
                 my $is_vertically_aligned =
                   (      $i == $ibeg
                       && $first_line_comma_count > 1
@@ -9904,11 +9949,10 @@ sub flush {
     Perl::Tidy::VerticalAligner::flush();
 }
 
-# output_line_to_go sends one logical line of tokens on down the
+# sub output_line_to_go sends one logical line of tokens on down the
 # pipeline to the VerticalAligner package, breaking the line into continuation
 # lines as necessary.  The line of tokens is ready to go in the "to_go"
 # arrays.
-
 sub output_line_to_go {
 
     # debug stuff; this routine can be called from many points
@@ -9937,6 +9981,45 @@ sub output_line_to_go {
     # any unfinished items in its stack
     finish_lp_batch();
 
+    # If this line ends in a code block brace, set breaks at any
+    # previous closing code block braces to breakup a chain of code
+    # blocks on one line.  This is very rare but can happen for
+    # user-defined subs.  For example we might be looking at this:
+    #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+    my $saw_good_break = 0;    # flag to force breaks even if short line
+    if (
+
+        # looking for opening or closing block brace
+        $block_type_to_go[$max_index_to_go]
+
+        # but not one of these which are never duplicated on a line:
+        ##&& !$is_until_while_for_if_elsif_else{ $block_type_to_go
+        ##      [$max_index_to_go] }
+        && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
+      )
+    {
+        my $lev = $nesting_depth_to_go[$max_index_to_go];
+
+        # Walk backwards from the end and
+        # set break at any closing block braces at the same level.
+        # But quit if we are not in a chain of blocks.
+        for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+            last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
+            next if ( $levels_to_go[$i] > $lev );    # skip past higher level
+
+            if ( $block_type_to_go[$i] ) {
+                if ( $tokens_to_go[$i] eq '}' ) {
+                    set_forced_breakpoint($i);
+                    $saw_good_break = 1;
+                }
+            }
+
+            # quit if we see anything besides words, function, blanks
+            # at this level
+            elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
+        }
+    }
+
     my $imin = 0;
     my $imax = $max_index_to_go;
 
@@ -9967,7 +10050,9 @@ sub output_line_to_go {
 
             # break before all package declarations
             # MCONVERSION LOCATION - for tokenizaton change
-            elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
+            elsif ($leading_token =~ /^(package\s)/
+                && $leading_type eq 'i' )
+            {
                 $want_blank = ( $rOpts->{'blanks-before-subs'} );
             }
 
@@ -9980,8 +10065,9 @@ sub output_line_to_go {
                   );
             }
 
-            # Break before certain block types if we haven't had a break at this
-            # level for a while.  This is the difficult decision..
+            # Break before certain block types if we haven't had a
+            # break at this level for a while.  This is the
+            # difficult decision..
             elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
                 && $leading_type eq 'k' )
             {
@@ -10033,8 +10119,7 @@ sub output_line_to_go {
         pad_array_to_go();
 
         # set all forced breakpoints for good list formatting
-        my $saw_good_break = 0;
-        my $is_long_line   = excess_line_length( $imin, $max_index_to_go ) > 0;
+        my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
 
         if (
             $max_index_to_go > 0
@@ -10050,7 +10135,7 @@ sub output_line_to_go {
             )
           )
         {
-            $saw_good_break = scan_list();
+            $saw_good_break ||= scan_list();
         }
 
         # let $ri_first and $ri_last be references to lists of
@@ -10130,8 +10215,8 @@ sub set_block_text_accumulator {
     if ( $accumulating_text_for_block !~ /^els/ ) {
         $rleading_block_if_elsif_text = [];
     }
-    $leading_block_text             = "";
-    $leading_block_text_level       = $levels_to_go[$i];
+    $leading_block_text       = "";
+    $leading_block_text_level = $levels_to_go[$i];
     $leading_block_text_line_number =
       $vertical_aligner_object->get_output_line_number();
     $leading_block_text_length_exceeded = 0;
@@ -10473,6 +10558,13 @@ sub add_closing_side_comment {
         && $block_type_to_go[$i_terminal] =~
         /$closing_side_comment_list_pattern/o
 
+        # .. but not an anonymous sub
+        # These are not normally of interest, and their closing braces are
+        # often followed by commas or semicolons anyway.  This also avoids
+        # possible erratic output due to line numbering inconsistencies
+        # in the cases where their closing braces terminate a line.
+        && $block_type_to_go[$i_terminal] ne 'sub'
+
         # ..and the corresponding opening brace must is not in this batch
         # (because we do not need to tag one-line blocks, although this
         # should also be caught with a positive -csci value)
@@ -10589,9 +10681,9 @@ sub add_closing_side_comment {
         else {
 
             # insert the new side comment into the output token stream
-            my $type                  = '#';
-            my $block_type            = '';
-            my $type_sequence         = '';
+            my $type          = '#';
+            my $block_type    = '';
+            my $type_sequence = '';
             my $container_environment =
               $container_environment_to_go[$max_index_to_go];
             my $level                = $levels_to_go[$max_index_to_go];
@@ -10635,6 +10727,9 @@ sub send_lines_to_vertical_aligner {
 
     my $rindentation_list = [0];    # ref to indentations for each line
 
+    # define the array @matching_token_to_go for the output tokens
+    # which will be non-blank for each special token (such as =>)
+    # for which alignment is required.
     set_vertical_alignment_markers( $ri_first, $ri_last );
 
     # flush if necessary to avoid unwanted alignment
@@ -10751,7 +10846,7 @@ sub send_lines_to_vertical_aligner {
                 # Mark most things before arrows as a quote to
                 # get them to line up. Testfile: mixed.pl.
                 if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
-                    my $next_type       = $types_to_go[ $i + 1 ];
+                    my $next_type = $types_to_go[ $i + 1 ];
                     my $i_next_nonblank =
                       ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
 
@@ -10782,8 +10877,8 @@ sub send_lines_to_vertical_aligner {
         # done with this line .. join text of tokens to make the last field
         push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
 
-        my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
-            $is_outdented_line )
+        my ( $indentation, $lev, $level_end, $terminal_type,
+            $is_semicolon_terminated, $is_outdented_line )
           = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
             $ri_first, $ri_last, $rindentation_list );
 
@@ -10813,6 +10908,17 @@ sub send_lines_to_vertical_aligner {
         # flush an outdented line to avoid any unwanted vertical alignment
         Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
 
+        my $is_terminal_ternary = 0;
+        if (   $tokens_to_go[$ibeg] eq ':'
+            || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
+        {
+            if (   ( $terminal_type eq ';' && $level_end <= $lev )
+                || ( $level_end < $lev ) )
+            {
+                $is_terminal_ternary = 1;
+            }
+        }
+
         # send this new line down the pipe
         my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
         Perl::Tidy::VerticalAligner::append_line(
@@ -10824,6 +10930,7 @@ sub send_lines_to_vertical_aligner {
             \@patterns,
             $forced_breakpoint_to_go[$iend] || $in_comma_list,
             $outdent_long_lines,
+            $is_terminal_ternary,
             $is_semicolon_terminated,
             $do_not_pad,
             $rvertical_tightness_flags,
@@ -10980,6 +11087,13 @@ sub get_opening_indentation {
             if ( $saved_opening_indentation{$seqno} ) {
                 ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
             }
+
+            # some kind of serious error
+            # (example is badfile.t)
+            else {
+                $indent = 0;
+                $offset = 0;
+            }
         }
 
         # if no sequence number it must be an unbalanced container
@@ -11140,9 +11254,9 @@ sub lookup_opening_indentation {
             if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
                 && $i_terminal == $ibeg )
             {
-                my $ci              = $ci_levels_to_go[$ibeg];
-                my $lev             = $levels_to_go[$ibeg];
-                my $next_type       = $types_to_go[ $ibeg + 1 ];
+                my $ci        = $ci_levels_to_go[$ibeg];
+                my $lev       = $levels_to_go[$ibeg];
+                my $next_type = $types_to_go[ $ibeg + 1 ];
                 my $i_next_nonblank =
                   ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
                 if (   $i_next_nonblank <= $max_index_to_go
@@ -11429,8 +11543,8 @@ sub lookup_opening_indentation {
             }
         }
 
-        return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
-            $is_outdented_line );
+        return ( $indentation, $lev, $level_end, $terminal_type,
+            $is_semicolon_terminated, $is_outdented_line );
     }
 }
 
@@ -11454,7 +11568,7 @@ sub set_vertical_tightness_flags {
     # These flags are used by sub set_leading_whitespace in
     # the vertical aligner
 
-    my $rvertical_tightness_flags;
+    my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
 
     # For non-BLOCK tokens, we will need to examine the next line
     # too, so we won't consider the last line.
@@ -11599,7 +11713,7 @@ sub set_vertical_tightness_flags {
         # patch to make something like 'qw(' behave like an opening paren
         # (aran.t)
         if ( $types_to_go[$ibeg_next] eq 'q' ) {
-            if ( $token_beg_next =~ /^q.([\[\(\{])$/ ) {
+            if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
                 $token_beg_next = $1;
             }
         }
@@ -11661,9 +11775,34 @@ sub set_vertical_tightness_flags {
           ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
     }
 
+    # pack in the sequence numbers of the ends of this line
+    $rvertical_tightness_flags->[4] = get_seqno($ibeg);
+    $rvertical_tightness_flags->[5] = get_seqno($iend);
     return $rvertical_tightness_flags;
 }
 
+sub get_seqno {
+
+    # get opening and closing sequence numbers of a token for the vertical
+    # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
+    # to be treated somewhat like opening and closing tokens for stacking
+    # tokens by the vertical aligner.
+    my ($ii) = @_;
+    my $seqno = $type_sequence_to_go[$ii];
+    if ( $types_to_go[$ii] eq 'q' ) {
+        my $SEQ_QW = -1;
+        if ( $ii > 0 ) {
+            $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
+        }
+        else {
+            if ( !$ending_in_quote ) {
+                $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
+            }
+        }
+    }
+    return ($seqno);
+}
+
 {
     my %is_vertical_alignment_type;
     my %is_vertical_alignment_keyword;
@@ -11672,7 +11811,7 @@ sub set_vertical_tightness_flags {
 
         @_ = qw#
           = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
-          { ? : => =~ && || //
+          { ? : => =~ && || // ~~
           #;
         @is_vertical_alignment_type{@_} = (1) x scalar(@_);
 
@@ -11682,8 +11821,12 @@ sub set_vertical_tightness_flags {
 
     sub set_vertical_alignment_markers {
 
-        # Look at the tokens in this output batch and define the array
-        # 'matching_token_to_go' which marks tokens at which we would
+        # This routine takes the first step toward vertical alignment of the
+        # lines of output text.  It looks for certain tokens which can serve as
+        # vertical alignment markers (such as an '=').
+        #
+        # Method: We look at each token $i in this output batch and set
+        # $matching_token_to_go[$i] equal to those tokens at which we would
         # accept vertical alignment.
 
         # nothing to do if we aren't allowed to change whitespace
@@ -11696,6 +11839,14 @@ sub set_vertical_tightness_flags {
 
         my ( $ri_first, $ri_last ) = @_;
 
+        # remember the index of last nonblank token before any sidecomment
+        my $i_terminal = $max_index_to_go;
+        if ( $types_to_go[$i_terminal] eq '#' ) {
+            if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
+                if ( $i_terminal > 0 ) { --$i_terminal }
+            }
+        }
+
         # look at each line of this batch..
         my $last_vertical_alignment_before_index;
         my $vert_last_nonblank_type;
@@ -11704,6 +11855,7 @@ sub set_vertical_tightness_flags {
         my $max_line = @$ri_first - 1;
         my ( $i, $type, $token, $block_type, $alignment_type );
         my ( $ibeg, $iend, $line );
+
         foreach $line ( 0 .. $max_line ) {
             $ibeg                                 = $$ri_first[$line];
             $iend                                 = $$ri_last[$line];
@@ -11735,12 +11887,10 @@ sub set_vertical_tightness_flags {
                 # align before the first token and 2) the second
                 # token must be a blank if we are to align before
                 # the third
-                if ( $i < $ibeg + 2 ) {
-                }
+                if ( $i < $ibeg + 2 ) { }
 
                 # must follow a blank token
-                elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
-                }
+                elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
 
                 # align a side comment --
                 elsif ( $type eq '#' ) {
@@ -11765,8 +11915,7 @@ sub set_vertical_tightness_flags {
 
                 # otherwise, do not align two in a row to create a
                 # blank field
-                elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
-                }
+                elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
 
                 # align before one of these keywords
                 # (within a line, since $i>1)
@@ -11783,6 +11932,30 @@ sub set_vertical_tightness_flags {
                 elsif ( $is_vertical_alignment_type{$type} ) {
                     $alignment_type = $token;
 
+                    # Do not align a terminal token.  Although it might
+                    # occasionally look ok to do this, it has been found to be
+                    # a good general rule.  The main problems are:
+                    # (1) that the terminal token (such as an = or :) might get
+                    # moved far to the right where it is hard to see because
+                    # nothing follows it, and
+                    # (2) doing so may prevent other good alignments.
+                    if ( $i == $iend || $i >= $i_terminal ) {
+                        $alignment_type = "";
+                    }
+
+                    # Do not align leading ': ('.  This would prevent
+                    # alignment in something like the following:
+                    #   $extra_space .=
+                    #       ( $input_line_number < 10 )  ? "  "
+                    #     : ( $input_line_number < 100 ) ? " "
+                    #     :                                "";
+                    if (   $i == $ibeg + 2
+                        && $types_to_go[$ibeg]    eq ':'
+                        && $types_to_go[ $i - 1 ] eq 'b' )
+                    {
+                        $alignment_type = "";
+                    }
+
                     # For a paren after keyword, only align something like this:
                     #    if    ( $a ) { &a }
                     #    elsif ( $b ) { &b }
@@ -11797,12 +11970,10 @@ sub set_vertical_tightness_flags {
                     # if ($token ne $type) {$alignment_type .= $type}
                 }
 
-              # NOTE: This is deactivated until the new vertical aligner
-              # is finished because it causes the previous if/elsif alignment
-              # to fail
-              #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
-              #    $alignment_type = $type;
-              #}
+                # NOTE: This is deactivated because it causes the previous
+                # if/elsif alignment to fail
+                #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+                #{ $alignment_type = $type; }
 
                 if ($alignment_type) {
                     $last_vertical_alignment_before_index = $i;
@@ -11977,14 +12148,14 @@ sub terminal_type {
 
             # make these a little weaker than nominal so that they get
             # favored for end-of-line characters
-            @_                       = qw"!= == =~ !~";
-            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
+            @_ = qw"!= == =~ !~ ~~";
+            @left_bond_strength{@_} = (STRONG) x scalar(@_);
             @right_bond_strength{@_} =
               ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
 
             # break AFTER these
-            @_                       = qw" < >  | & >= <=";
-            @left_bond_strength{@_}  = (VERY_STRONG) x scalar(@_);
+            @_ = qw" < >  | & >= <=";
+            @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
             @right_bond_strength{@_} =
               ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
 
@@ -12005,14 +12176,14 @@ sub terminal_type {
             $left_bond_strength{'G'}  = NOMINAL;
             $right_bond_strength{'G'} = STRONG;
 
-            # it is very good to break AFTER various assignment operators
+            # it is good to break AFTER various assignment operators
             @_ = qw(
               = **= += *= &= <<= &&=
               -= /= |= >>= ||= //=
               .= %= ^=
               x=
             );
-            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
+            @left_bond_strength{@_} = (STRONG) x scalar(@_);
             @right_bond_strength{@_} =
               ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
 
@@ -12654,10 +12825,10 @@ sub pad_array_to_go {
 
     # to simplify coding in scan_list and set_bond_strengths, it helps
     # to create some extra blank tokens at the end of the arrays
-    $tokens_to_go[ $max_index_to_go + 1 ]        = '';
-    $tokens_to_go[ $max_index_to_go + 2 ]        = '';
-    $types_to_go[ $max_index_to_go + 1 ]         = 'b';
-    $types_to_go[ $max_index_to_go + 2 ]         = 'b';
+    $tokens_to_go[ $max_index_to_go + 1 ] = '';
+    $tokens_to_go[ $max_index_to_go + 2 ] = '';
+    $types_to_go[ $max_index_to_go + 1 ]  = 'b';
+    $types_to_go[ $max_index_to_go + 2 ]  = 'b';
     $nesting_depth_to_go[ $max_index_to_go + 1 ] =
       $nesting_depth_to_go[$max_index_to_go];
 
@@ -13028,7 +13199,7 @@ sub pad_array_to_go {
 
                         # TESTING: retain break at a ':' line break
                         if ( ( $i == $i_line_start || $i == $i_line_end )
-                            && $rOpts_break_at_old_trinary_breakpoints )
+                            && $rOpts_break_at_old_ternary_breakpoints )
                         {
 
                             # TESTING:
@@ -13105,7 +13276,7 @@ sub pad_array_to_go {
                 $rfor_semicolon_list[$depth]           = [];
                 $i_equals[$depth]                      = -1;
                 $want_comma_break[$depth]              = 0;
-                $container_type[$depth]                =
+                $container_type[$depth] =
                   ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
                   ? $last_nonblank_token
                   : "";
@@ -13412,6 +13583,11 @@ sub pad_array_to_go {
                     if ( $rOpts_line_up_parentheses && $saw_opening_structure )
                     {
                         my $item = $leading_spaces_to_go[ $i_opening + 1 ];
+                        if (   $i_opening + 1 < $max_index_to_go
+                            && $types_to_go[ $i_opening + 1 ] eq 'b' )
+                        {
+                            $item = $leading_spaces_to_go[ $i_opening + 2 ];
+                        }
                         if ( defined($item) ) {
                             my $i_start_2 = $item->get_STARTING_INDEX();
                             if (
@@ -13904,7 +14080,7 @@ sub find_token_starting_list {
         # Looks like a list of items.  We have to look at it and size it up.
         #---------------------------------------------------------------
 
-        my $opening_token       = $tokens_to_go[$i_opening_paren];
+        my $opening_token = $tokens_to_go[$i_opening_paren];
         my $opening_environment =
           $container_environment_to_go[$i_opening_paren];
 
@@ -14014,7 +14190,7 @@ sub find_token_starting_list {
 
         # Field width parameters
         my $pair_width = ( $max_length[0] + $max_length[1] );
-        my $max_width  =
+        my $max_width =
           ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
 
         # Number of free columns across the page width for laying out tables
@@ -14157,8 +14333,8 @@ sub find_token_starting_list {
 #           )
 #           if $style eq 'all';
 
-            my $i_last_comma    = $$rcomma_index[ $comma_count - 1 ];
-            my $long_last_term  = excess_line_length( 0, $i_last_comma ) <= 0;
+            my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
+            my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
             my $long_first_term =
               excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
 
@@ -14238,10 +14414,10 @@ sub find_token_starting_list {
         # align; high sparsity does not look good, especially with few lines
         my $sparsity = ($unused_columns) / ($formatted_columns);
         my $max_allowed_sparsity =
-            ( $item_count < 3 ) ? 0.1
+            ( $item_count < 3 )    ? 0.1
           : ( $packed_lines == 1 ) ? 0.15
           : ( $packed_lines == 2 ) ? 0.4
-          : 0.7;
+          :                          0.7;
 
         # Begin check for shortcut methods, which avoid treating a list
         # as a table for relatively small parenthesized lists.  These
@@ -14617,7 +14793,7 @@ sub get_maximum_fields_wanted {
 
 sub table_columns_available {
     my $i_first_comma = shift;
-    my $columns       =
+    my $columns =
       $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
 
     # Patch: the vertical formatter does not line up lines whose lengths
@@ -15086,7 +15262,7 @@ sub recombine_breakpoints {
                 my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
                 my $imm  = $n > 1          ? $$ri_first[ $n - 2 ] : -1;
                 my $seqno = $type_sequence_to_go[$imidr];
-                my $f_ok  =
+                my $f_ok =
                   (      $types_to_go[$if] eq ':'
                       && $type_sequence_to_go[$if] ==
                       $seqno - TYPE_SEQUENCE_INCREMENT );
@@ -15190,9 +15366,6 @@ sub recombine_breakpoints {
 
                         )
                       );
-
-                    # override breakpoint
-                    ##$forced_breakpoint_to_go[$imid] = 0;
                 }
 
                 # handle leading "if" and "unless"
@@ -15208,10 +15381,6 @@ sub recombine_breakpoints {
                         && $is_and_or{ $tokens_to_go[$if] }
 
                       );
-
-                    # override breakpoint
-                    ##$forced_breakpoint_to_go[$imid] = 0;
-
                 }
 
                 # handle all other leading keywords
@@ -15244,9 +15413,6 @@ sub recombine_breakpoints {
                     && $is_if_unless{ $tokens_to_go[$if] }
 
                   );
-
-                # override breakpoint
-                ##$forced_breakpoint_to_go[$imid] = 0;
             }
 
             #----------------------------------------------------------
@@ -15364,10 +15530,10 @@ sub set_continuation_breaks {
 
         # loop to find next break point
         for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
-            my $type            = $types_to_go[$i_test];
-            my $token           = $tokens_to_go[$i_test];
-            my $next_type       = $types_to_go[ $i_test + 1 ];
-            my $next_token      = $tokens_to_go[ $i_test + 1 ];
+            my $type       = $types_to_go[$i_test];
+            my $token      = $tokens_to_go[$i_test];
+            my $next_type  = $types_to_go[ $i_test + 1 ];
+            my $next_token = $tokens_to_go[ $i_test + 1 ];
             my $i_next_nonblank =
               ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
             my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
@@ -15409,7 +15575,6 @@ sub set_continuation_breaks {
 
                 # There is an implied forced break at a terminal opening brace
                 || ( ( $type eq '{' ) && ( $i_test == $imax ) )
-
               )
             {
 
@@ -15903,7 +16068,7 @@ sub permanently_decrease_AVAILABLE_SPACES {
 
     my ( $item, $spaces_needed ) = @_;
     my $available_spaces = $item->get_AVAILABLE_SPACES();
-    my $deleted_spaces   =
+    my $deleted_spaces =
       ( $available_spaces > $spaces_needed )
       ? $spaces_needed
       : $available_spaces;
@@ -15922,7 +16087,7 @@ sub tentatively_decrease_AVAILABLE_SPACES {
     # caller.
     my ( $item, $spaces_needed ) = @_;
     my $available_spaces = $item->get_AVAILABLE_SPACES();
-    my $deleted_spaces   =
+    my $deleted_spaces =
       ( $available_spaces > $spaces_needed )
       ? $spaces_needed
       : $available_spaces;
@@ -16373,6 +16538,7 @@ BEGIN {
 
     use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
     use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
+    use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
 
     my $debug_warning = sub {
         print "VALIGN_DEBUGGING with key $_[0]\n";
@@ -16421,6 +16587,10 @@ use vars qw(
   $cached_seqno
   $cached_line_valid
   $cached_line_leading_space_count
+  $cached_seqno_string
+
+  $seqno_string
+  $last_nonblank_seqno_string
 
   $rOpts
 
@@ -16429,6 +16599,7 @@ use vars qw(
   $rOpts_indent_columns
   $rOpts_tabs
   $rOpts_entab_leading_whitespace
+  $rOpts_valign
 
   $rOpts_minimum_space_to_comment
 
@@ -16474,6 +16645,11 @@ sub initialize {
     $cached_seqno                    = 0;
     $cached_line_valid               = 0;
     $cached_line_leading_space_count = 0;
+    $cached_seqno_string             = "";
+
+    # string of sequence numbers joined together
+    $seqno_string               = "";
+    $last_nonblank_seqno_string = "";
 
     # frequently used parameters
     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
@@ -16481,6 +16657,7 @@ sub initialize {
     $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
     $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
+    $rOpts_valign                   = $rOpts->{'valign'};
 
     forget_side_comment();
 
@@ -16654,19 +16831,18 @@ sub append_line {
     # The log file warns the user if there are any such tabs.
 
     my (
-        $level,                     $level_end,
-        $indentation,               $rfields,
-        $rtokens,                   $rpatterns,
-        $is_forced_break,           $outdent_long_lines,
-        $is_terminal_statement,     $do_not_pad,
-        $rvertical_tightness_flags, $level_jump,
+        $level,               $level_end,
+        $indentation,         $rfields,
+        $rtokens,             $rpatterns,
+        $is_forced_break,     $outdent_long_lines,
+        $is_terminal_ternary, $is_terminal_statement,
+        $do_not_pad,          $rvertical_tightness_flags,
+        $level_jump,
     ) = @_;
 
     # number of fields is $jmax
     # number of tokens between fields is $jmax-1
     my $jmax = $#{$rfields};
-    $previous_minimum_jmax_seen = $minimum_jmax_seen;
-    $previous_maximum_jmax_seen = $maximum_jmax_seen;
 
     my $leading_space_count = get_SPACES($indentation);
 
@@ -16692,6 +16868,8 @@ sub append_line {
     if ($rvertical_tightness_flags) {
         if (   $maximum_line_index <= 0
             && $cached_line_type
+            && $cached_seqno
+            && $rvertical_tightness_flags->[2]
             && $rvertical_tightness_flags->[2] == $cached_seqno )
         {
             $rvertical_tightness_flags->[3] ||= 1;
@@ -16716,7 +16894,8 @@ sub append_line {
     if ( $level < 0 ) { $level = 0 }
 
     # do not align code across indentation level changes
-    if ( $level != $group_level || $is_outdented ) {
+    # or if vertical alignment is turned off for debugging
+    if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
 
         # we are allowed to shift a group of lines to the right if its
         # level is greater than the previous and next group
@@ -16763,6 +16942,25 @@ sub append_line {
         }
     }
 
+    # --------------------------------------------------------------------
+    # add dummy fields for terminal ternary
+    # --------------------------------------------------------------------
+    if ( $is_terminal_ternary && $current_line ) {
+        fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
+        $jmax = @{$rfields} - 1;
+    }
+
+    # --------------------------------------------------------------------
+    # add dummy fields for else statement
+    # --------------------------------------------------------------------
+    if (   $rfields->[0] =~ /^else\s*$/
+        && $current_line
+        && $level_jump == 0 )
+    {
+        fix_terminal_else( $rfields, $rtokens, $rpatterns );
+        $jmax = @{$rfields} - 1;
+    }
+
     # --------------------------------------------------------------------
     # Step 1. Handle simple line of code with no fields to match.
     # --------------------------------------------------------------------
@@ -16911,6 +17109,8 @@ sub append_line {
     # Future update to allow this to vary:
     $current_line = $new_line if ( $maximum_line_index == 0 );
 
+    my_flush() if ( $group_type eq "TERMINAL" );
+
     # --------------------------------------------------------------------
     # Step 8. Some old debugging stuff
     # --------------------------------------------------------------------
@@ -17139,10 +17339,11 @@ sub eliminate_new_fields {
     my $old_line = shift;
     my $jmax     = $new_line->get_jmax();
 
-    my $old_rtokens   = $old_line->get_rtokens();
-    my $rtokens       = $new_line->get_rtokens();
+    my $old_rtokens = $old_line->get_rtokens();
+    my $rtokens     = $new_line->get_rtokens();
     my $is_assignment =
-      ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
+      (      $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] )
+          || $group_type eq "TERMINAL" );
 
     # must be monotonic variation
     return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
@@ -17166,19 +17367,20 @@ sub eliminate_new_fields {
     my $rpatterns     = $new_line->get_rpatterns();
     my $old_rpatterns = $old_line->get_rpatterns();
 
-    # loop over all old tokens except comment
+    # loop over all OLD tokens except comment and check match
     my $match = 1;
     my $k;
     for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
         if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
-            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
+            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] )
+            && $group_type ne "TERMINAL" )
         {
             $match = 0;
             last;
         }
     }
 
-    # first tokens agree, so combine new tokens
+    # first tokens agree, so combine extra new tokens
     if ($match) {
         for $k ( $maximum_field_index .. $jmax - 1 ) {
 
@@ -17196,6 +17398,221 @@ sub eliminate_new_fields {
     $new_line->set_jmax($jmax);
 }
 
+sub fix_terminal_ternary {
+
+    # Add empty fields as necessary to align a ternary term
+    # like this:
+    #
+    #  my $leapyear =
+    #      $year % 4   ? 0
+    #    : $year % 100 ? 1
+    #    : $year % 400 ? 0
+    #    :               1;
+    #
+    my ( $rfields, $rtokens, $rpatterns ) = @_;
+
+    my $jmax        = @{$rfields} - 1;
+    my $old_line    = $group_lines[$maximum_line_index];
+    my $rfields_old = $old_line->get_rfields();
+
+    my $rpatterns_old       = $old_line->get_rpatterns();
+    my $rtokens_old         = $old_line->get_rtokens();
+    my $maximum_field_index = $old_line->get_jmax();
+
+    # look for the question mark after the :
+    my ($jquestion);
+    my $depth_question;
+    my $pad = "";
+    for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
+        my $tok = $rtokens_old->[$j];
+        if ( $tok =~ /^\?(\d+)$/ ) {
+            $depth_question = $1;
+
+            # depth must be correct
+            next unless ( $depth_question eq $group_level );
+
+            $jquestion = $j;
+            if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
+                $pad = " " x length($1);
+            }
+            else {
+                return;    # shouldn't happen
+            }
+            last;
+        }
+    }
+    return unless ( defined($jquestion) );    # shouldn't happen
+
+    # Now splice the tokens and patterns of the previous line
+    # into the else line to insure a match.  Add empty fields
+    # as necessary.
+    my $jadd = $jquestion;
+
+    # Work on copies of the actual arrays in case we have
+    # to return due to an error
+    my @fields   = @{$rfields};
+    my @patterns = @{$rpatterns};
+    my @tokens   = @{$rtokens};
+
+    VALIGN_DEBUG_FLAG_TERNARY && do {
+        local $" = '><';
+        print "CURRENT FIELDS=<@{$rfields_old}>\n";
+        print "CURRENT TOKENS=<@{$rtokens_old}>\n";
+        print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
+        print "UNMODIFIED FIELDS=<@{$rfields}>\n";
+        print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
+        print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
+    };
+
+    # handle cases of leading colon on this line
+    if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
+
+        my ( $colon, $therest ) = ( $1, $2 );
+
+        # Handle sub-case of first field with leading colon plus additional code
+        # This is the usual situation as at the '1' below:
+        #  ...
+        #  : $year % 400 ? 0
+        #  :               1;
+        if ($therest) {
+
+            # Split the first field after the leading colon and insert padding.
+            # Note that this padding will remain even if the terminal value goes
+            # out on a separate line.  This does not seem to look to bad, so no
+            # mechanism has been included to undo it.
+            my $field1 = shift @fields;
+            unshift @fields, ( $colon, $pad . $therest );
+
+            # change the leading pattern from : to ?
+            return unless ( $patterns[0] =~ s/^\:/?/ );
+
+            # install leading tokens and patterns of existing line
+            unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
+            unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
+
+            # insert appropriate number of empty fields
+            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+        }
+
+        # handle sub-case of first field just equal to leading colon.
+        # This can happen for example in the example below where
+        # the leading '(' would create a new alignment token
+        # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
+        # :                        ( $mname = $name . '->' );
+        else {
+
+            return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
+
+            # prepend a leading ? onto the second pattern
+            $patterns[1] = "?b" . $patterns[1];
+
+            # pad the second field
+            $fields[1] = $pad . $fields[1];
+
+            # install leading tokens and patterns of existing line, replacing
+            # leading token and inserting appropriate number of empty fields
+            splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
+            splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
+            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+        }
+    }
+
+    # Handle case of no leading colon on this line.  This will
+    # be the case when -wba=':' is used.  For example,
+    #  $year % 400 ? 0 :
+    #                1;
+    else {
+
+        # install leading tokens and patterns of existing line
+        $patterns[0] = '?' . 'b' . $patterns[0];
+        unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
+        unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
+
+        # insert appropriate number of empty fields
+        $jadd = $jquestion + 1;
+        $fields[0] = $pad . $fields[0];
+        splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+    }
+
+    VALIGN_DEBUG_FLAG_TERNARY && do {
+        local $" = '><';
+        print "MODIFIED TOKENS=<@tokens>\n";
+        print "MODIFIED PATTERNS=<@patterns>\n";
+        print "MODIFIED FIELDS=<@fields>\n";
+    };
+
+    # all ok .. update the arrays
+    @{$rfields}   = @fields;
+    @{$rtokens}   = @tokens;
+    @{$rpatterns} = @patterns;
+
+    # force a flush after this line
+    $group_type = "TERMINAL";
+    return;
+}
+
+sub fix_terminal_else {
+
+    # Add empty fields as necessary to align a balanced terminal
+    # else block to a previous if/elsif/unless block,
+    # like this:
+    #
+    #  if   ( 1 || $x ) { print "ok 13\n"; }
+    #  else             { print "not ok 13\n"; }
+    #
+    my ( $rfields, $rtokens, $rpatterns ) = @_;
+    my $jmax = @{$rfields} - 1;
+    return unless ( $jmax > 0 );
+
+    # check for balanced else block following if/elsif/unless
+    my $rfields_old = $current_line->get_rfields();
+
+    # TBD: add handling for 'case'
+    return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
+
+    # look for the opening brace after the else, and extrace the depth
+    my $tok_brace = $rtokens->[0];
+    my $depth_brace;
+    if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
+
+    # probably:  "else # side_comment"
+    else { return }
+
+    my $rpatterns_old       = $current_line->get_rpatterns();
+    my $rtokens_old         = $current_line->get_rtokens();
+    my $maximum_field_index = $current_line->get_jmax();
+
+    # be sure the previous if/elsif is followed by an opening paren
+    my $jparen    = 0;
+    my $tok_paren = '(' . $depth_brace;
+    my $tok_test  = $rtokens_old->[$jparen];
+    return unless ( $tok_test eq $tok_paren );    # shouldn't happen
+
+    # Now find the opening block brace
+    my ($jbrace);
+    for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
+        my $tok = $rtokens_old->[$j];
+        if ( $tok eq $tok_brace ) {
+            $jbrace = $j;
+            last;
+        }
+    }
+    return unless ( defined($jbrace) );           # shouldn't happen
+
+    # Now splice the tokens and patterns of the previous line
+    # into the else line to insure a match.  Add empty fields
+    # as necessary.
+    my $jadd = $jbrace - $jparen;
+    splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
+    splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
+    splice( @{$rfields}, 1, 0, ('') x $jadd );
+
+    # force a flush after this line if it does not follow a case
+    $group_type = "TERMINAL"
+      unless ( $rfields_old->[0] =~ /^case\s*$/ );
+    return;
+}
+
 sub check_match {
 
     my $new_line = shift;
@@ -17253,12 +17670,16 @@ sub check_match {
             my $old_tok = $$old_rtokens[$j];
             my $new_tok = $$rtokens[$j];
 
-            # dumb down the match after an equals
+            # Dumb down the match AFTER an equals and
+            # also dumb down after seeing a ? ternary operator ...
+            # Everything after a + is the token which preceded the previous
+            # opening paren (container name).  We won't require them to match.
             if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
                 $new_tok = $1;
                 $old_tok =~ s/\+.*$//;
             }
-            if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 }
+
+            if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
 
             # we never match if the matching tokens differ
             if (   $j < $jlimit
@@ -17405,14 +17826,6 @@ sub check_fit {
     my $maximum_field_index = $old_line->get_jmax();
     for $j ( 0 .. $jmax ) {
 
-        ## testing patch to avoid excessive gaps in previous lines,
-        # due to a line of fewer fields.
-        #   return join( ".",
-        #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
-        #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
-        ## MOVED BELOW AS A TEST
-        ##next if ($jmax < $maximum_field_index && $j==$jmax-1);
-
         $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
 
         if ( $j == 0 ) {
@@ -17455,7 +17868,11 @@ sub check_fit {
             last;
         }
 
-        # TESTING PATCH moved from above to be sure we fit
+        # patch to avoid excessive gaps in previous lines,
+        # due to a line of fewer fields.
+        #   return join( ".",
+        #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
+        #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
         next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
 
         # looks ok, squeeze this field in
@@ -17471,6 +17888,8 @@ sub check_fit {
 
 sub accept_line {
 
+    # The current line either starts a new alignment group or is
+    # accepted into the current alignment group.
     my $new_line = shift;
     $group_lines[ ++$maximum_line_index ] = $new_line;
 
@@ -17503,6 +17922,10 @@ sub accept_line {
           $group_lines[ $maximum_line_index - 1 ]->get_alignments();
         $new_line->set_alignments(@new_alignments);
     }
+
+    # remember group jmax extremes for next call to append_line
+    $previous_minimum_jmax_seen = $minimum_jmax_seen;
+    $previous_maximum_jmax_seen = $maximum_jmax_seen;
 }
 
 sub dump_array {
@@ -17520,11 +17943,13 @@ sub flush {
 
     if ( $maximum_line_index < 0 ) {
         if ($cached_line_type) {
+            $seqno_string = $cached_seqno_string;
             entab_and_output( $cached_line_text,
                 $cached_line_leading_space_count,
                 $last_group_level_written );
-            $cached_line_type = 0;
-            $cached_line_text = "";
+            $cached_line_type    = 0;
+            $cached_line_text    = "";
+            $cached_seqno_string = "";
         }
     }
     else {
@@ -17552,7 +17977,7 @@ sub my_flush {
         # zero leading space count if any lines are too long
         my $max_excess = 0;
         for my $i ( 0 .. $maximum_line_index ) {
-            my $str    = $group_lines[$i];
+            my $str = $group_lines[$i];
             my $excess =
               length($str) + $leading_space_count - $rOpts_maximum_line_length;
             if ( $excess > $max_excess ) {
@@ -17622,6 +18047,7 @@ sub decide_if_aligned {
 
     # Do not try to align two lines which are not really similar
     return unless $maximum_line_index == 1;
+    return if ( $group_type eq "TERMINAL" );
 
     my $group_list_type = $group_lines[0]->get_list_type();
 
@@ -17639,6 +18065,8 @@ sub decide_if_aligned {
             || $group_maximum_gap > 12
 
             # or lines with differing number of alignment tokens
+            # TODO: this could be improved.  It occasionally rejects
+            # good matches.
             || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
           )
     );
@@ -17874,6 +18302,9 @@ sub write_vertically_aligned_line {
             $total_pad_count = 0;
             $str .= $$rfields[$j];
         }
+        else {
+            $total_pad_count = 0;
+        }
 
         # update side comment history buffer
         if ( $j == $maximum_field_index ) {
@@ -17994,7 +18425,7 @@ sub write_leader_and_string {
           length($str) - $side_comment_length + $leading_space_count -
           $rOpts_maximum_line_length;
         if ( $excess > 0 ) {
-            $leading_space_count    = 0;
+            $leading_space_count = 0;
             $last_outdented_line_at =
               $file_writer_object->get_output_line_number();
 
@@ -18020,12 +18451,17 @@ sub write_leader_and_string {
     #   [2] sequence number of container
     #   [3] valid flag: do not append if this flag is false
     #
-    my ( $open_or_close, $tightness_flag, $seqno, $valid );
+    my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+        $seqno_end );
     if ($rvertical_tightness_flags) {
-        ( $open_or_close, $tightness_flag, $seqno, $valid ) =
-          @{$rvertical_tightness_flags};
+        (
+            $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+            $seqno_end
+        ) = @{$rvertical_tightness_flags};
     }
 
+    $seqno_string = $seqno_end;
+
     # handle any cached line ..
     # either append this line to it or write it out
     if ( length($cached_line_text) ) {
@@ -18051,6 +18487,7 @@ sub write_leader_and_string {
             if ( $gap >= 0 ) {
                 $leading_string      = $cached_line_text . ' ' x $gap;
                 $leading_space_count = $cached_line_leading_space_count;
+                $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
             }
             else {
                 entab_and_output( $cached_line_text,
@@ -18064,6 +18501,87 @@ sub write_leader_and_string {
             my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
 
             if ( length($test_line) <= $rOpts_maximum_line_length ) {
+
+                $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+
+                # Patch to outdent closing tokens ending # in ');'
+                # If we are joining a line like ');' to a previous stacked
+                # set of closing tokens, then decide if we may outdent the
+                # combined stack to the indentation of the ');'.  Since we
+                # should not normally outdent any of the other tokens more than
+                # the indentation of the lines that contained them, we will
+                # only do this if all of the corresponding opening
+                # tokens were on the same line.  This can happen with
+                # -sot and -sct.  For example, it is ok here:
+                #   __PACKAGE__->load_components( qw(
+                #         PK::Auto
+                #         Core
+                #   ));
+                #
+                #   But, for example, we do not outdent in this example because
+                #   that would put the closing sub brace out farther than the
+                #   opening sub brace:
+                #
+                #   perltidy -sot -sct
+                #   $c->Tk::bind(
+                #       '<Control-f>' => sub {
+                #           my ($c) = @_;
+                #           my $e = $c->XEvent;
+                #           itemsUnderArea $c;
+                #       } );
+                #
+                if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
+
+                    # The way to tell this is if the stacked sequence numbers
+                    # of this output line are the reverse of the stacked
+                    # sequence numbers of the previous non-blank line of
+                    # sequence numbers.  So we can join if the previous
+                    # nonblank string of tokens is the mirror image.  For
+                    # example if stack )}] is 13:8:6 then we are looking for a
+                    # leading stack like [{( which is 6:8:13 We only need to
+                    # check the two ends, because the intermediate tokens must
+                    # fall in order.  Note on speed: having to split on colons
+                    # and eliminate multiple colons might appear to be slow,
+                    # but it's not an issue because we almost never come
+                    # through here.  In a typical file we don't.
+                    $seqno_string               =~ s/^:+//;
+                    $last_nonblank_seqno_string =~ s/^:+//;
+                    $seqno_string               =~ s/:+/:/g;
+                    $last_nonblank_seqno_string =~ s/:+/:/g;
+
+                    # how many spaces can we outdent?
+                    my $diff =
+                      $cached_line_leading_space_count - $leading_space_count;
+                    if (   $diff > 0
+                        && length($seqno_string)
+                        && length($last_nonblank_seqno_string) ==
+                        length($seqno_string) )
+                    {
+                        my @seqno_last =
+                          ( split ':', $last_nonblank_seqno_string );
+                        my @seqno_now = ( split ':', $seqno_string );
+                        if (   $seqno_now[-1] == $seqno_last[0]
+                            && $seqno_now[0] == $seqno_last[-1] )
+                        {
+
+                            # OK to outdent ..
+                            # for absolute safety, be sure we only remove
+                            # whitespace
+                            my $ws = substr( $test_line, 0, $diff );
+                            if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+
+                                $test_line = substr( $test_line, $diff );
+                                $cached_line_leading_space_count -= $diff;
+                            }
+
+                            # shouldn't happen, but not critical:
+                            ##else {
+                            ## ERROR transferring indentation here
+                            ##}
+                        }
+                    }
+                }
+
                 $str                 = $test_line;
                 $leading_string      = "";
                 $leading_space_count = $cached_line_leading_space_count;
@@ -18082,7 +18600,7 @@ sub write_leader_and_string {
     my $line = $leading_string . $str;
 
     # write or cache this line
-    if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) {
+    if ( !$open_or_close || $side_comment_length > 0 ) {
         entab_and_output( $line, $leading_space_count, $group_level );
     }
     else {
@@ -18092,6 +18610,7 @@ sub write_leader_and_string {
         $cached_seqno                    = $seqno;
         $cached_line_valid               = $valid;
         $cached_line_leading_space_count = $leading_space_count;
+        $cached_seqno_string             = $seqno_string;
     }
 
     $last_group_level_written = $group_level;
@@ -18138,7 +18657,7 @@ sub entab_and_output {
         # Handle option of one tab per level
         else {
             my $leading_string = ( "\t" x $level );
-            my $space_count    =
+            my $space_count =
               $leading_space_count - $level * $rOpts_indent_columns;
 
             # shouldn't happen:
@@ -18166,6 +18685,9 @@ sub entab_and_output {
         }
     }
     $file_writer_object->write_code_line( $line . "\n" );
+    if ($seqno_string) {
+        $last_nonblank_seqno_string = $seqno_string;
+    }
 }
 
 {    # begin get_leading_string
@@ -18663,88 +19185,55 @@ BEGIN {
 }
 
 use Carp;
+
+# PACKAGE VARIABLES for for processing an entire FILE.
 use vars qw{
   $tokenizer_self
-  $level_in_tokenizer
-  $slevel_in_tokenizer
-  $nesting_token_string
-  $nesting_type_string
-  $nesting_block_string
-  $nesting_block_flag
-  $nesting_list_string
-  $nesting_list_flag
-  $saw_negative_indentation
-  $id_scan_state
+
   $last_nonblank_token
   $last_nonblank_type
   $last_nonblank_block_type
-  $last_nonblank_container_type
-  $last_nonblank_type_sequence
-  $last_last_nonblank_token
-  $last_last_nonblank_type
-  $last_last_nonblank_block_type
-  $last_last_nonblank_container_type
-  $last_last_nonblank_type_sequence
-  $last_nonblank_prototype
   $statement_type
-  $identifier
   $in_attribute_list
-  $in_quote
-  $quote_type
-  $quote_character
-  $quote_pos
-  $quote_depth
-  $allowed_quote_modifiers
+  $current_package
+  $context
+
+  %is_constant
+  %is_user_function
+  %user_function_prototype
+  %is_block_function
+  %is_block_list_function
+  %saw_function_definition
+
+  $brace_depth
   $paren_depth
+  $square_bracket_depth
+
+  @current_depth
+  @nesting_sequence_number
+  @current_sequence_number
   @paren_type
   @paren_semicolon_count
   @paren_structural_type
-  $brace_depth
   @brace_type
   @brace_structural_type
   @brace_statement_type
   @brace_context
   @brace_package
-  $square_bracket_depth
   @square_bracket_type
   @square_bracket_structural_type
   @depth_array
   @starting_line_of_current_depth
-  @current_depth
-  @current_sequence_number
-  @nesting_sequence_number
-  @lower_case_labels_at
-  $saw_v_string
-  %is_constant
-  %is_user_function
-  %user_function_prototype
-  %saw_function_definition
-  $max_token_index
-  $peeked_ahead
-  $current_package
-  $unexpected_error_count
-  $input_line
-  $input_line_number
-  $rpretokens
-  $rpretoken_map
-  $rpretoken_type
-  $want_paren
-  $context
-  @slevel_stack
-  $ci_string_in_tokenizer
-  $continuation_string_in_tokenizer
-  $in_statement_continuation
-  $started_looking_for_here_target_at
-  $nearly_matched_here_target_at
+};
 
+# GLOBAL CONSTANTS for routines in this package
+use vars qw{
   %is_indirect_object_taker
   %is_block_operator
   %expecting_operator_token
   %expecting_operator_types
   %expecting_term_types
   %expecting_term_token
-  %is_block_function
-  %is_block_list_function
   %is_digraph
   %is_file_test_operator
   %is_trigraph
@@ -18791,17 +19280,18 @@ sub new {
     # Note: 'tabs' and 'indent_columns' are temporary and should be
     # removed asap
     my %defaults = (
-        source_object       => undef,
-        debugger_object     => undef,
-        diagnostics_object  => undef,
-        logger_object       => undef,
-        starting_level      => undef,
-        indent_columns      => 4,
-        tabs                => 0,
-        look_for_hash_bang  => 0,
-        trim_qw             => 1,
-        look_for_autoloader => 1,
-        look_for_selfloader => 1,
+        source_object        => undef,
+        debugger_object      => undef,
+        diagnostics_object   => undef,
+        logger_object        => undef,
+        starting_level       => undef,
+        indent_columns       => 4,
+        tabs                 => 0,
+        look_for_hash_bang   => 0,
+        trim_qw              => 1,
+        look_for_autoloader  => 1,
+        look_for_selfloader  => 1,
+        starting_line_number => 1,
     );
     my %args = ( %defaults, @_ );
 
@@ -18831,45 +19321,53 @@ sub new {
     # _know_input_tabstr    flag indicating if we know _input_tabstr
     # _line_buffer_object   object with get_line() method to supply source code
     # _diagnostics_object   place to write debugging information
+    # _unexpected_error_count  error count used to limit output
+    # _lower_case_labels_at  line numbers where lower case labels seen
     $tokenizer_self = {
-        _rhere_target_list    => undef,
-        _in_here_doc          => 0,
-        _here_doc_target      => "",
-        _here_quote_character => "",
-        _in_data              => 0,
-        _in_end               => 0,
-        _in_format            => 0,
-        _in_error             => 0,
-        _in_pod               => 0,
-        _in_attribute_list    => 0,
-        _in_quote             => 0,
-        _quote_target         => "",
-        _line_start_quote     => -1,
-        _starting_level       => $args{starting_level},
-        _know_starting_level  => defined( $args{starting_level} ),
-        _tabs                 => $args{tabs},
-        _indent_columns       => $args{indent_columns},
-        _look_for_hash_bang   => $args{look_for_hash_bang},
-        _trim_qw              => $args{trim_qw},
-        _input_tabstr         => "",
-        _know_input_tabstr    => -1,
-        _last_line_number     => 0,
-        _saw_perl_dash_P      => 0,
-        _saw_perl_dash_w      => 0,
-        _saw_use_strict       => 0,
-        _look_for_autoloader  => $args{look_for_autoloader},
-        _look_for_selfloader  => $args{look_for_selfloader},
-        _saw_autoloader       => 0,
-        _saw_selfloader       => 0,
-        _saw_hash_bang        => 0,
-        _saw_end              => 0,
-        _saw_data             => 0,
-        _saw_lc_filehandle    => 0,
-        _started_tokenizing   => 0,
-        _line_buffer_object   => $line_buffer_object,
-        _debugger_object      => $args{debugger_object},
-        _diagnostics_object   => $args{diagnostics_object},
-        _logger_object        => $args{logger_object},
+        _rhere_target_list                  => [],
+        _in_here_doc                        => 0,
+        _here_doc_target                    => "",
+        _here_quote_character               => "",
+        _in_data                            => 0,
+        _in_end                             => 0,
+        _in_format                          => 0,
+        _in_error                           => 0,
+        _in_pod                             => 0,
+        _in_attribute_list                  => 0,
+        _in_quote                           => 0,
+        _quote_target                       => "",
+        _line_start_quote                   => -1,
+        _starting_level                     => $args{starting_level},
+        _know_starting_level                => defined( $args{starting_level} ),
+        _tabs                               => $args{tabs},
+        _indent_columns                     => $args{indent_columns},
+        _look_for_hash_bang                 => $args{look_for_hash_bang},
+        _trim_qw                            => $args{trim_qw},
+        _input_tabstr                       => "",
+        _know_input_tabstr                  => -1,
+        _last_line_number                   => $args{starting_line_number} - 1,
+        _saw_perl_dash_P                    => 0,
+        _saw_perl_dash_w                    => 0,
+        _saw_use_strict                     => 0,
+        _saw_v_string                       => 0,
+        _look_for_autoloader                => $args{look_for_autoloader},
+        _look_for_selfloader                => $args{look_for_selfloader},
+        _saw_autoloader                     => 0,
+        _saw_selfloader                     => 0,
+        _saw_hash_bang                      => 0,
+        _saw_end                            => 0,
+        _saw_data                           => 0,
+        _saw_negative_indentation           => 0,
+        _started_tokenizing                 => 0,
+        _line_buffer_object                 => $line_buffer_object,
+        _debugger_object                    => $args{debugger_object},
+        _diagnostics_object                 => $args{diagnostics_object},
+        _logger_object                      => $args{logger_object},
+        _unexpected_error_count             => 0,
+        _started_looking_for_here_target_at => 0,
+        _nearly_matched_here_target_at      => undef,
+        _line_text                          => "",
+        _rlower_case_labels_at              => undef,
     };
 
     prepare_for_a_new_file();
@@ -18985,38 +19483,6 @@ sub report_tokenization_errors {
         warning("hit EOF while in format description\n");
     }
 
-    # this check may be removed after a year or so
-    if ( $tokenizer_self->{_saw_lc_filehandle} ) {
-
-        warning( <<'EOM' );
-------------------------------------------------------------------------
-PLEASE NOTE: If you get this message, it is because perltidy noticed
-possible ambiguous syntax at one or more places in your script, as
-noted above.  The problem is with statements accepting indirect objects,
-such as print and printf statements of the form
-
-    print bareword ( $etc
-
-Perltidy needs your help in deciding if 'bareword' is a filehandle or a
-function call.  The problem is the space between 'bareword' and '('.  If
-'bareword' is a function call, you should remove the trailing space.  If
-'bareword' is a filehandle, you should avoid the opening paren or else
-globally capitalize 'bareword' to be BAREWORD.  So the above line
-would be: 
-
-    print bareword( $etc    # function
-or
-    print bareword @list    # filehandle
-or
-    print BAREWORD ( $etc   # filehandle
-
-If you want to keep the line as it is, and are sure it is correct,
-you can use -w=0 to prevent this message.
-------------------------------------------------------------------------
-EOM
-
-    }
-
     if ( $tokenizer_self->{_in_pod} ) {
 
         # Just write log entry if this is after __END__ or __DATA__
@@ -19038,6 +19504,8 @@ EOM
 
     if ( $tokenizer_self->{_in_here_doc} ) {
         my $here_doc_target = $tokenizer_self->{_here_doc_target};
+        my $started_looking_for_here_target_at =
+          $tokenizer_self->{_started_looking_for_here_target_at};
         if ($here_doc_target) {
             warning(
 "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
@@ -19048,6 +19516,8 @@ EOM
 "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
             );
         }
+        my $nearly_matched_here_target_at =
+          $tokenizer_self->{_nearly_matched_here_target_at};
         if ($nearly_matched_here_target_at) {
             warning(
 "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
@@ -19058,7 +19528,7 @@ EOM
     if ( $tokenizer_self->{_in_quote} ) {
         my $line_start_quote = $tokenizer_self->{_line_start_quote};
         my $quote_target     = $tokenizer_self->{_quote_target};
-        my $what             =
+        my $what =
           ( $tokenizer_self->{_in_attribute_list} )
           ? "attribute list"
           : "quote/pattern";
@@ -19086,8 +19556,9 @@ EOM
 
     # it is suggested that lables have at least one upper case character
     # for legibility and to avoid code breakage as new keywords are introduced
-    if (@lower_case_labels_at) {
-        my $num = @lower_case_labels_at;
+    if ( $tokenizer_self->{_rlower_case_labels_at} ) {
+        my @lower_case_labels_at =
+          @{ $tokenizer_self->{_rlower_case_labels_at} };
         write_logfile_entry(
             "Suggest using upper case characters in label(s)\n");
         local $" = ')(';
@@ -19099,7 +19570,9 @@ sub report_v_string {
 
     # warn if this version can't handle v-strings
     my $tok = shift;
-    $saw_v_string = $input_line_number;
+    unless ( $tokenizer_self->{_saw_v_string} ) {
+        $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
+    }
     if ( $] < 5.006 ) {
         warning(
 "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
@@ -19116,11 +19589,15 @@ sub get_line {
 
     my $self = shift;
 
+    # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
+    # $square_bracket_depth, $paren_depth
+
     my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
+    $tokenizer_self->{_line_text} = $input_line;
 
     return undef unless ($input_line);
 
-    $tokenizer_self->{_last_line_number}++;
+    my $input_line_number = ++$tokenizer_self->{_last_line_number};
 
     # Find and remove what characters terminate this line, including any
     # control r
@@ -19135,8 +19612,7 @@ sub get_line {
     # for backwards compatability we keep the line text terminated with
     # a newline character
     $input_line .= "\n";
-
-    my $input_line_number = $tokenizer_self->{_last_line_number};
+    $tokenizer_self->{_line_text} = $input_line;    # update
 
     # create a data structure describing this line which will be
     # returned to the caller.
@@ -19181,8 +19657,7 @@ sub get_line {
         _rci_levels               => undef,
         _rnesting_blocks          => undef,
         _python_indentation_level => -1,                   ## 0,
-        _starting_in_quote        =>
-          ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
+        _starting_in_quote    => 0,                    # to be set by subroutine
         _ending_in_quote      => 0,
         _curly_brace_depth    => $brace_depth,
         _square_bracket_depth => $square_bracket_depth,
@@ -19199,21 +19674,22 @@ sub get_line {
         my $candidate_target     = $input_line;
         chomp $candidate_target;
         if ( $candidate_target eq $here_doc_target ) {
-            $nearly_matched_here_target_at = undef;
-            $line_of_tokens->{_line_type} = 'HERE_END';
+            $tokenizer_self->{_nearly_matched_here_target_at} = undef;
+            $line_of_tokens->{_line_type}                     = 'HERE_END';
             write_logfile_entry("Exiting HERE document $here_doc_target\n");
 
             my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
             if (@$rhere_target_list) {    # there can be multiple here targets
                 ( $here_doc_target, $here_quote_character ) =
                   @{ shift @$rhere_target_list };
-                $tokenizer_self->{_here_doc_target}      = $here_doc_target;
+                $tokenizer_self->{_here_doc_target} = $here_doc_target;
                 $tokenizer_self->{_here_quote_character} =
                   $here_quote_character;
                 write_logfile_entry(
                     "Entering HERE document $here_doc_target\n");
-                $nearly_matched_here_target_at      = undef;
-                $started_looking_for_here_target_at = $input_line_number;
+                $tokenizer_self->{_nearly_matched_here_target_at} = undef;
+                $tokenizer_self->{_started_looking_for_here_target_at} =
+                  $input_line_number;
             }
             else {
                 $tokenizer_self->{_in_here_doc}          = 0;
@@ -19228,7 +19704,8 @@ sub get_line {
             $candidate_target =~ s/\s*$//;
             $candidate_target =~ s/^\s*//;
             if ( $candidate_target eq $here_doc_target ) {
-                $nearly_matched_here_target_at = $input_line_number;
+                $tokenizer_self->{_nearly_matched_here_target_at} =
+                  $input_line_number;
             }
         }
         return $line_of_tokens;
@@ -19438,14 +19915,14 @@ sub get_line {
     my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
     if (@$rhere_target_list) {
 
-        #my $here_doc_target = shift @$rhere_target_list;
         my ( $here_doc_target, $here_quote_character ) =
           @{ shift @$rhere_target_list };
         $tokenizer_self->{_in_here_doc}          = 1;
         $tokenizer_self->{_here_doc_target}      = $here_doc_target;
         $tokenizer_self->{_here_quote_character} = $here_quote_character;
         write_logfile_entry("Entering HERE document $here_doc_target\n");
-        $started_looking_for_here_target_at = $input_line_number;
+        $tokenizer_self->{_started_looking_for_here_target_at} =
+          $input_line_number;
     }
 
     # NOTE: __END__ and __DATA__ statements are written unformatted
@@ -19505,9 +19982,11 @@ sub get_line {
         and ( $tokenizer_self->{_line_start_quote} < 0 ) )
     {
 
-        if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
+        #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
+        if (
+            ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
+        {
             $tokenizer_self->{_line_start_quote} = $input_line_number;
-            $tokenizer_self->{_quote_target}     = $quote_target;
             write_logfile_entry(
                 "Start multi-line quote or pattern ending in $quote_target\n");
         }
@@ -19525,6 +20004,7 @@ sub get_line {
 
 sub find_starting_indentation_level {
 
+    # USES GLOBAL VARIABLES: $tokenizer_self
     my $starting_level    = 0;
     my $know_input_tabstr = -1;    # flag for find_indentation_level
 
@@ -19595,6 +20075,8 @@ sub find_starting_indentation_level {
 
 sub find_indentation_level {
     my ( $line, $structural_indentation_level ) = @_;
+
+    # USES GLOBAL VARIABLES: $tokenizer_self
     my $level = 0;
     my $msg   = "";
 
@@ -19668,7 +20150,7 @@ sub find_indentation_level {
             }
             else {
                 $columns = int $columns;
-                $msg     =
+                $msg =
 "old indentation is unclear, using $columns $entabbed spaces\n";
             }
             $input_tabstr = " " x $columns;
@@ -19711,81 +20193,6 @@ sub find_indentation_level {
     return ( $level, $msg );
 }
 
-sub dump_token_types {
-    my $class = shift;
-    my $fh    = shift;
-
-    # This should be the latest list of token types in use
-    # adding NEW_TOKENS: add a comment here
-    print $fh <<'END_OF_LIST';
-
-Here is a list of the token types currently used for lines of type 'CODE'.  
-For the following tokens, the "type" of a token is just the token itself.  
-
-.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
-( ) <= >= == =~ !~ != ++ -- /= x=
-... **= <<= >>= &&= ||= //= <=> 
-, + - / * | % ! x ~ = \ ? : . < > ^ &
-
-The following additional token types are defined:
-
- type    meaning
-    b    blank (white space) 
-    {    indent: opening structural curly brace or square bracket or paren
-         (code block, anonymous hash reference, or anonymous array reference)
-    }    outdent: right structural curly brace or square bracket or paren
-    [    left non-structural square bracket (enclosing an array index)
-    ]    right non-structural square bracket
-    (    left non-structural paren (all but a list right of an =)
-    )    right non-structural parena
-    L    left non-structural curly brace (enclosing a key)
-    R    right non-structural curly brace 
-    ;    terminal semicolon
-    f    indicates a semicolon in a "for" statement
-    h    here_doc operator <<
-    #    a comment
-    Q    indicates a quote or pattern
-    q    indicates a qw quote block
-    k    a perl keyword
-    C    user-defined constant or constant function (with void prototype = ())
-    U    user-defined function taking parameters
-    G    user-defined function taking block parameter (like grep/map/eval)
-    M    (unused, but reserved for subroutine definition name)
-    P    (unused, but -html uses it to label pod text)
-    t    type indicater such as %,$,@,*,&,sub
-    w    bare word (perhaps a subroutine call)
-    i    identifier of some type (with leading %, $, @, *, &, sub, -> )
-    n    a number
-    v    a v-string
-    F    a file test operator (like -e)
-    Y    File handle
-    Z    identifier in indirect object slot: may be file handle, object
-    J    LABEL:  code block label
-    j    LABEL after next, last, redo, goto
-    p    unary +
-    m    unary -
-    pp   pre-increment operator ++
-    mm   pre-decrement operator -- 
-    A    : used as attribute separator
-    
-    Here are the '_line_type' codes used internally:
-    SYSTEM         - system-specific code before hash-bang line
-    CODE           - line of perl code (including comments)
-    POD_START      - line starting pod, such as '=head'
-    POD            - pod documentation text
-    POD_END        - last line of pod section, '=cut'
-    HERE           - text of here-document
-    HERE_END       - last line of here-doc (target word)
-    FORMAT         - format section
-    FORMAT_END     - last line of format section, '.'
-    DATA_START     - __DATA__ line
-    DATA           - unidentified text following __DATA__
-    END_START      - __END__ line
-    END            - unidentified text following __END__
-    ERROR          - we are in big trouble, probably not a perl script
-END_OF_LIST
-}
-
 # This is a currently unused debug routine
 sub dump_functions {
 
@@ -19817,142 +20224,392 @@ sub dump_functions {
 }
 
 sub prepare_for_a_new_file {
-    $saw_negative_indentation = 0;
-    $id_scan_state            = '';
-    $statement_type           = '';     # '' or 'use' or 'sub..' or 'case..'
+
+    # previous tokens needed to determine what to expect next
     $last_nonblank_token      = ';';    # the only possible starting state which
     $last_nonblank_type       = ';';    # will make a leading brace a code block
     $last_nonblank_block_type = '';
-    $last_nonblank_container_type      = '';
-    $last_nonblank_type_sequence       = '';
-    $last_last_nonblank_token          = ';';
-    $last_last_nonblank_type           = ';';
-    $last_last_nonblank_block_type     = '';
-    $last_last_nonblank_container_type = '';
-    $last_last_nonblank_type_sequence  = '';
-    $last_nonblank_prototype           = "";
-    $identifier                        = '';
-    $in_attribute_list                 = 0;     # ATTRS
-    $in_quote   = 0;     # flag telling if we are chasing a quote, and what kind
-    $quote_type = 'Q';
-    $quote_character = "";    # character we seek if chasing a quote
-    $quote_pos   = 0;  # next character index to check for case of alphanum char
-    $quote_depth = 0;
-    $allowed_quote_modifiers                     = "";
-    $paren_depth                                 = 0;
-    $brace_depth                                 = 0;
-    $square_bracket_depth                        = 0;
-    $current_package                             = "main";
+
+    # scalars for remembering statement types across multiple lines
+    $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
+    $in_attribute_list = 0;
+
+    # scalars for remembering where we are in the file
+    $current_package = "main";
+    $context         = UNKNOWN_CONTEXT;
+
+    # hashes used to remember function information
+    %is_constant             = ();      # user-defined constants
+    %is_user_function        = ();      # user-defined functions
+    %user_function_prototype = ();      # their prototypes
+    %is_block_function       = ();
+    %is_block_list_function  = ();
+    %saw_function_definition = ();
+
+    # variables used to track depths of various containers
+    # and report nesting errors
+    $paren_depth          = 0;
+    $brace_depth          = 0;
+    $square_bracket_depth = 0;
     @current_depth[ 0 .. $#closing_brace_names ] =
       (0) x scalar @closing_brace_names;
     @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
       ( 0 .. $#closing_brace_names );
-    @current_sequence_number = ();
-
+    @current_sequence_number             = ();
     $paren_type[$paren_depth]            = '';
     $paren_semicolon_count[$paren_depth] = 0;
+    $paren_structural_type[$brace_depth] = '';
     $brace_type[$brace_depth] = ';';    # identify opening brace as code block
     $brace_structural_type[$brace_depth]                   = '';
     $brace_statement_type[$brace_depth]                    = "";
     $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
-    $paren_structural_type[$brace_depth]                   = '';
+    $brace_package[$paren_depth]                           = $current_package;
     $square_bracket_type[$square_bracket_depth]            = '';
     $square_bracket_structural_type[$square_bracket_depth] = '';
-    $brace_package[$paren_depth]                           = $current_package;
-    %is_constant                      = ();             # user-defined constants
-    %is_user_function                 = ();             # user-defined functions
-    %user_function_prototype          = ();             # their prototypes
-    %is_block_function                = ();
-    %is_block_list_function           = ();
-    %saw_function_definition          = ();
-    $unexpected_error_count           = 0;
-    $want_paren                       = "";
-    $context                          = UNKNOWN_CONTEXT;
-    @slevel_stack                     = ();
-    $ci_string_in_tokenizer           = "";
-    $continuation_string_in_tokenizer = "0";
-    $in_statement_continuation        = 0;
-    @lower_case_labels_at             = ();
-    $saw_v_string         = 0;      # for warning of v-strings on older perl
-    $nesting_token_string = "";
-    $nesting_type_string  = "";
-    $nesting_block_string = '1';    # initially in a block
-    $nesting_block_flag   = 1;
-    $nesting_list_string  = '0';    # initially not in a list
-    $nesting_list_flag    = 0;      # initially not in a list
-    $nearly_matched_here_target_at = undef;
-}
-
-sub get_quote_target {
-    return matching_end_token($quote_character);
-}
-
-sub get_indentation_level {
-    return $level_in_tokenizer;
-}
-
-sub reset_indentation_level {
-    $level_in_tokenizer  = $_[0];
-    $slevel_in_tokenizer = $_[0];
-    push @slevel_stack, $slevel_in_tokenizer;
-}
-
-{    # begin tokenize_this_line
+
+    initialize_tokenizer_state();
+}
+
+{                                       # begin tokenize_this_line
 
     use constant BRACE          => 0;
     use constant SQUARE_BRACKET => 1;
     use constant PAREN          => 2;
     use constant QUESTION_COLON => 3;
 
+    # TV1: scalars for processing one LINE.
+    # Re-initialized on each entry to sub tokenize_this_line.
+    my (
+        $block_type,        $container_type,    $expecting,
+        $i,                 $i_tok,             $input_line,
+        $input_line_number, $last_nonblank_i,   $max_token_index,
+        $next_tok,          $next_type,         $peeked_ahead,
+        $prototype,         $rhere_target_list, $rtoken_map,
+        $rtoken_type,       $rtokens,           $tok,
+        $type,              $type_sequence,
+    );
+
+    # TV2: refs to ARRAYS for processing one LINE
+    # Re-initialized on each call.
+    my $routput_token_list     = [];    # stack of output token indexes
+    my $routput_token_type     = [];    # token types
+    my $routput_block_type     = [];    # types of code block
+    my $routput_container_type = [];    # paren types, such as if, elsif, ..
+    my $routput_type_sequence  = [];    # nesting sequential number
+
+    # TV3: SCALARS for quote variables.  These are initialized with a
+    # subroutine call and continually updated as lines are processed.
+    my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
+        $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
+
+    # TV4: SCALARS for multi-line identifiers and
+    # statements. These are initialized with a subroutine call
+    # and continually updated as lines are processed.
+    my ( $id_scan_state, $identifier, $want_paren, );
+
+    # TV5: SCALARS for tracking indentation level.
+    # Initialized once and continually updated as lines are
+    # processed.
     my (
-        $block_type,      $container_type,       $expecting,
-        $here_doc_target, $here_quote_character, $i,
-        $i_tok,           $last_nonblank_i,      $next_tok,
-        $next_type,       $prototype,            $rtoken_map,
-        $rtoken_type,     $rtokens,              $tok,
-        $type,            $type_sequence,
+        $nesting_token_string,      $nesting_type_string,
+        $nesting_block_string,      $nesting_block_flag,
+        $nesting_list_string,       $nesting_list_flag,
+        $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
+        $in_statement_continuation, $level_in_tokenizer,
+        $slevel_in_tokenizer,       $rslevel_stack,
     );
 
-    my @output_token_list     = ();    # stack of output token indexes
-    my @output_token_type     = ();    # token types
-    my @output_block_type     = ();    # types of code block
-    my @output_container_type = ();    # paren types, such as if, elsif, ..
-    my @output_type_sequence  = ();    # nesting sequential number
+    # TV6: SCALARS for remembering several previous
+    # tokens. Initialized once and continually updated as
+    # lines are processed.
+    my (
+        $last_nonblank_container_type,     $last_nonblank_type_sequence,
+        $last_last_nonblank_token,         $last_last_nonblank_type,
+        $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
+        $last_last_nonblank_type_sequence, $last_nonblank_prototype,
+    );
+
+    # ----------------------------------------------------------------
+    # beginning of tokenizer variable access and manipulation routines
+    # ----------------------------------------------------------------
+
+    sub initialize_tokenizer_state {
+
+        # TV1: initialized on each call
+        # TV2: initialized on each call
+        # TV3:
+        $in_quote                = 0;
+        $quote_type              = 'Q';
+        $quote_character         = "";
+        $quote_pos               = 0;
+        $quote_depth             = 0;
+        $quoted_string_1         = "";
+        $quoted_string_2         = "";
+        $allowed_quote_modifiers = "";
+
+        # TV4:
+        $id_scan_state = '';
+        $identifier    = '';
+        $want_paren    = "";
+
+        # TV5:
+        $nesting_token_string             = "";
+        $nesting_type_string              = "";
+        $nesting_block_string             = '1';    # initially in a block
+        $nesting_block_flag               = 1;
+        $nesting_list_string              = '0';    # initially not in a list
+        $nesting_list_flag                = 0;      # initially not in a list
+        $ci_string_in_tokenizer           = "";
+        $continuation_string_in_tokenizer = "0";
+        $in_statement_continuation        = 0;
+        $level_in_tokenizer               = 0;
+        $slevel_in_tokenizer              = 0;
+        $rslevel_stack                    = [];
+
+        # TV6:
+        $last_nonblank_container_type      = '';
+        $last_nonblank_type_sequence       = '';
+        $last_last_nonblank_token          = ';';
+        $last_last_nonblank_type           = ';';
+        $last_last_nonblank_block_type     = '';
+        $last_last_nonblank_container_type = '';
+        $last_last_nonblank_type_sequence  = '';
+        $last_nonblank_prototype           = "";
+    }
+
+    sub save_tokenizer_state {
+
+        my $rTV1 = [
+            $block_type,        $container_type,    $expecting,
+            $i,                 $i_tok,             $input_line,
+            $input_line_number, $last_nonblank_i,   $max_token_index,
+            $next_tok,          $next_type,         $peeked_ahead,
+            $prototype,         $rhere_target_list, $rtoken_map,
+            $rtoken_type,       $rtokens,           $tok,
+            $type,              $type_sequence,
+        ];
+
+        my $rTV2 = [
+            $routput_token_list, $routput_token_type,
+            $routput_block_type, $routput_container_type,
+            $routput_type_sequence,
+        ];
+
+        my $rTV3 = [
+            $in_quote,        $quote_type,
+            $quote_character, $quote_pos,
+            $quote_depth,     $quoted_string_1,
+            $quoted_string_2, $allowed_quote_modifiers,
+        ];
+
+        my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
+
+        my $rTV5 = [
+            $nesting_token_string,      $nesting_type_string,
+            $nesting_block_string,      $nesting_block_flag,
+            $nesting_list_string,       $nesting_list_flag,
+            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
+            $in_statement_continuation, $level_in_tokenizer,
+            $slevel_in_tokenizer,       $rslevel_stack,
+        ];
+
+        my $rTV6 = [
+            $last_nonblank_container_type,
+            $last_nonblank_type_sequence,
+            $last_last_nonblank_token,
+            $last_last_nonblank_type,
+            $last_last_nonblank_block_type,
+            $last_last_nonblank_container_type,
+            $last_last_nonblank_type_sequence,
+            $last_nonblank_prototype,
+        ];
+        return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
+    }
+
+    sub restore_tokenizer_state {
+        my ($rstate) = @_;
+        my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
+        (
+            $block_type,        $container_type,    $expecting,
+            $i,                 $i_tok,             $input_line,
+            $input_line_number, $last_nonblank_i,   $max_token_index,
+            $next_tok,          $next_type,         $peeked_ahead,
+            $prototype,         $rhere_target_list, $rtoken_map,
+            $rtoken_type,       $rtokens,           $tok,
+            $type,              $type_sequence,
+        ) = @{$rTV1};
+
+        (
+            $routput_token_list, $routput_token_type,
+            $routput_block_type, $routput_container_type,
+            $routput_type_sequence,
+        ) = @{$rTV2};
+
+        (
+            $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
+        ) = @{$rTV3};
+
+        ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
+
+        (
+            $nesting_token_string,      $nesting_type_string,
+            $nesting_block_string,      $nesting_block_flag,
+            $nesting_list_string,       $nesting_list_flag,
+            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
+            $in_statement_continuation, $level_in_tokenizer,
+            $slevel_in_tokenizer,       $rslevel_stack,
+        ) = @{$rTV5};
+
+        (
+            $last_nonblank_container_type,
+            $last_nonblank_type_sequence,
+            $last_last_nonblank_token,
+            $last_last_nonblank_type,
+            $last_last_nonblank_block_type,
+            $last_last_nonblank_container_type,
+            $last_last_nonblank_type_sequence,
+            $last_nonblank_prototype,
+        ) = @{$rTV6};
+    }
+
+    sub get_indentation_level {
+        return $level_in_tokenizer;
+    }
+
+    sub reset_indentation_level {
+        $level_in_tokenizer  = $_[0];
+        $slevel_in_tokenizer = $_[0];
+        push @{$rslevel_stack}, $slevel_in_tokenizer;
+    }
+
+    sub peeked_ahead {
+        $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
+    }
 
-    my @here_target_list = ();         # list of here-doc target strings
+    # ------------------------------------------------------------
+    # end of tokenizer variable access and manipulation routines
+    # ------------------------------------------------------------
 
     # ------------------------------------------------------------
-    # beginning of various scanner interfaces to simplify coding
+    # beginning of various scanner interface routines
     # ------------------------------------------------------------
+    sub scan_replacement_text {
+
+        # check for here-docs in replacement text invoked by
+        # a substitution operator with executable modifier 'e'.
+        #
+        # given:
+        #  $replacement_text
+        # return:
+        #  $rht = reference to any here-doc targets
+        my ($replacement_text) = @_;
+
+        # quick check
+        return undef unless ( $replacement_text =~ /<</ );
+
+        write_logfile_entry("scanning replacement text for here-doc targets\n");
+
+        # save the logger object for error messages
+        my $logger_object = $tokenizer_self->{_logger_object};
+
+        # localize all package variables
+        local (
+            $tokenizer_self,          $last_nonblank_token,
+            $last_nonblank_type,      $last_nonblank_block_type,
+            $statement_type,          $in_attribute_list,
+            $current_package,         $context,
+            %is_constant,             %is_user_function,
+            %user_function_prototype, %is_block_function,
+            %is_block_list_function,  %saw_function_definition,
+            $brace_depth,             $paren_depth,
+            $square_bracket_depth,    @current_depth,
+            @nesting_sequence_number, @current_sequence_number,
+            @paren_type,              @paren_semicolon_count,
+            @paren_structural_type,   @brace_type,
+            @brace_structural_type,   @brace_statement_type,
+            @brace_context,           @brace_package,
+            @square_bracket_type,     @square_bracket_structural_type,
+            @depth_array,             @starting_line_of_current_depth,
+        );
+
+        # save all lexical variables
+        my $rstate = save_tokenizer_state();
+        _decrement_count();    # avoid error check for multiple tokenizers
+
+        # make a new tokenizer
+        my $rOpts = {};
+        my $rpending_logfile_message;
+        my $source_object =
+          Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
+            $rpending_logfile_message );
+        my $tokenizer = Perl::Tidy::Tokenizer->new(
+            source_object        => $source_object,
+            logger_object        => $logger_object,
+            starting_line_number => $input_line_number,
+        );
+
+        # scan the replacement text
+        1 while ( $tokenizer->get_line() );
+
+        # remove any here doc targets
+        my $rht = undef;
+        if ( $tokenizer_self->{_in_here_doc} ) {
+            $rht = [];
+            push @{$rht},
+              [
+                $tokenizer_self->{_here_doc_target},
+                $tokenizer_self->{_here_quote_character}
+              ];
+            if ( $tokenizer_self->{_rhere_target_list} ) {
+                push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
+                $tokenizer_self->{_rhere_target_list} = undef;
+            }
+            $tokenizer_self->{_in_here_doc} = undef;
+        }
+
+        # now its safe to report errors
+        $tokenizer->report_tokenization_errors();
+
+        # restore all tokenizer lexical variables
+        restore_tokenizer_state($rstate);
+
+        # return the here doc targets
+        return $rht;
+    }
+
     sub scan_bare_identifier {
         ( $i, $tok, $type, $prototype ) =
           scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
-            $rtoken_map );
+            $rtoken_map, $max_token_index );
     }
 
     sub scan_identifier {
         ( $i, $tok, $type, $id_scan_state, $identifier ) =
-          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
+          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
+            $max_token_index );
     }
 
     sub scan_id {
         ( $i, $tok, $type, $id_scan_state ) =
           scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
-            $id_scan_state );
+            $id_scan_state, $max_token_index );
     }
 
-    my $number;
-
     sub scan_number {
+        my $number;
         ( $i, $type, $number ) =
-          scan_number_do( $input_line, $i, $rtoken_map, $type );
+          scan_number_do( $input_line, $i, $rtoken_map, $type,
+            $max_token_index );
+        return $number;
     }
 
     # a sub to warn if token found where term expected
     sub error_if_expecting_TERM {
         if ( $expecting == TERM ) {
             if ( $really_want_term{$last_nonblank_type} ) {
-                unexpected( $tok, "term", $i_tok, $last_nonblank_i );
+                unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
+                    $rtoken_type, $input_line );
                 1;
             }
         }
@@ -19962,7 +20619,8 @@ sub reset_indentation_level {
     sub error_if_expecting_OPERATOR {
         if ( $expecting == OPERATOR ) {
             my $thing = defined $_[0] ? $_[0] : $tok;
-            unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
+            unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
+                $rtoken_map, $rtoken_type, $input_line );
             if ( $i_tok == 0 ) {
                 interrupt_logfile();
                 warning("Missing ';' above?\n");
@@ -20028,6 +20686,7 @@ sub reset_indentation_level {
 ##      '||=' => undef,
 ##      '//=' => undef,
 ##      '~'   => undef,
+##      '~~'  => undef,
 
         '>' => sub {
             error_if_expecting_TERM()
@@ -20100,7 +20759,8 @@ sub reset_indentation_level {
                         # error; for example, we might have a constant pi and
                         # invoke it with pi() or just pi;
                         my ( $next_nonblank_token, $i_next ) =
-                          find_next_nonblank_token( $i, $rtokens );
+                          find_next_nonblank_token( $i, $rtokens,
+                            $max_token_index );
                         if ( $next_nonblank_token ne ')' ) {
                             my $hint;
                             error_if_expecting_OPERATOR('(');
@@ -20127,7 +20787,8 @@ sub reset_indentation_level {
                 } ## end if ( $expecting == OPERATOR...
             }
             $paren_type[$paren_depth] = $container_type;
-            $type_sequence = increase_nesting_depth( PAREN, $i_tok );
+            $type_sequence =
+              increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
 
             # propagate types down through nested parens
             # for example: the second paren in 'if ((' would be structural
@@ -20175,7 +20836,8 @@ sub reset_indentation_level {
 
         },
         ')' => sub {
-            $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
+            $type_sequence =
+              decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
 
             if ( $paren_structural_type[$paren_depth] eq '{' ) {
                 $type = '}';
@@ -20256,7 +20918,8 @@ sub reset_indentation_level {
             if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
                 my $msg;
                 ( $is_pattern, $msg ) =
-                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
+                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
+                    $max_token_index );
 
                 if ($msg) {
                     write_diagnostics("DIVIDE:$msg\n");
@@ -20278,11 +20941,11 @@ sub reset_indentation_level {
                     $type = $tok;
                 }
 
-                #DEBUG - collecting info on what tokens follow a divide
-                # for development of guessing algorithm
-                #if ( numerator_expected( $i, $rtokens ) < 0 ) {
-                #    #write_diagnostics( "DIVIDE? $input_line\n" );
-                #}
+              #DEBUG - collecting info on what tokens follow a divide
+              # for development of guessing algorithm
+              #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
+              #    #write_diagnostics( "DIVIDE? $input_line\n" );
+              #}
             }
         },
         '{' => sub {
@@ -20371,15 +21034,17 @@ sub reset_indentation_level {
             # which will be blank for an anonymous hash
             else {
 
-                $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type );
+                $block_type =
+                  code_block_type( $i_tok, $rtokens, $rtoken_type,
+                    $max_token_index );
 
                 # patch to promote bareword type to function taking block
                 if (   $block_type
                     && $last_nonblank_type eq 'w'
                     && $last_nonblank_i >= 0 )
                 {
-                    if ( $output_token_type[$last_nonblank_i] eq 'w' ) {
-                        $output_token_type[$last_nonblank_i] = 'G';
+                    if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
+                        $routput_token_type->[$last_nonblank_i] = 'G';
                     }
                 }
 
@@ -20395,7 +21060,8 @@ sub reset_indentation_level {
             }
             $brace_type[ ++$brace_depth ] = $block_type;
             $brace_package[$brace_depth] = $current_package;
-            $type_sequence = increase_nesting_depth( BRACE, $i_tok );
+            $type_sequence =
+              increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
             $brace_structural_type[$brace_depth] = $type;
             $brace_context[$brace_depth]         = $context;
             $brace_statement_type[$brace_depth]  = $statement_type;
@@ -20410,7 +21076,8 @@ sub reset_indentation_level {
             # can happen on brace error (caught elsewhere)
             else {
             }
-            $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
+            $type_sequence =
+              decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
 
             if ( $brace_structural_type[$brace_depth] eq 'L' ) {
                 $type = 'R';
@@ -20444,7 +21111,7 @@ sub reset_indentation_level {
             if ( $expecting != OPERATOR ) {
                 ( $i, $type ) =
                   find_angle_operator_termination( $input_line, $i, $rtoken_map,
-                    $expecting );
+                    $expecting, $max_token_index );
 
             }
             else {
@@ -20458,7 +21125,8 @@ sub reset_indentation_level {
 
                 my $msg;
                 ( $is_pattern, $msg ) =
-                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
+                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
+                    $max_token_index );
 
                 if ($msg) { write_logfile_entry($msg) }
             }
@@ -20470,9 +21138,9 @@ sub reset_indentation_level {
                 $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
             }
             else {
-
                 $type_sequence =
-                  increase_nesting_depth( QUESTION_COLON, $i_tok );
+                  increase_nesting_depth( QUESTION_COLON,
+                    $$rtoken_map[$i_tok] );
             }
         },
         '*' => sub {    # typeglob, or multiply?
@@ -20538,7 +21206,8 @@ sub reset_indentation_level {
             # otherwise, it should be part of a ?/: operator
             else {
                 $type_sequence =
-                  decrease_nesting_depth( QUESTION_COLON, $i_tok );
+                  decrease_nesting_depth( QUESTION_COLON,
+                    $$rtoken_map[$i_tok] );
                 if ( $last_nonblank_token eq '?' ) {
                     warning("Syntax error near ? :\n");
                 }
@@ -20547,7 +21216,7 @@ sub reset_indentation_level {
         '+' => sub {    # what kind of plus?
 
             if ( $expecting == TERM ) {
-                scan_number();
+                my $number = scan_number();
 
                 # unary plus is safest assumption if not a number
                 if ( !defined($number) ) { $type = 'p'; }
@@ -20577,7 +21246,8 @@ sub reset_indentation_level {
         '[' => sub {
             $square_bracket_type[ ++$square_bracket_depth ] =
               $last_nonblank_token;
-            $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
+            $type_sequence =
+              increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
 
             # It may seem odd, but structural square brackets have
             # type '{' and '}'.  This simplifies the indentation logic.
@@ -20587,7 +21257,8 @@ sub reset_indentation_level {
             $square_bracket_structural_type[$square_bracket_depth] = $type;
         },
         ']' => sub {
-            $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
+            $type_sequence =
+              decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
 
             if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
             {
@@ -20605,7 +21276,7 @@ sub reset_indentation_level {
                 $type = 'F';
             }
             elsif ( $expecting == TERM ) {
-                scan_number();
+                my $number = scan_number();
 
                 # maybe part of bareword token? unary is safest
                 if ( !defined($number) ) { $type = 'm'; }
@@ -20661,12 +21332,17 @@ sub reset_indentation_level {
               ;          # here-doc not possible if end of line
 
             if ( $expecting != OPERATOR ) {
-                my ($found_target);
-                ( $found_target, $here_doc_target, $here_quote_character, $i ) =
-                  find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
+                my ( $found_target, $here_doc_target, $here_quote_character,
+                    $saw_error );
+                (
+                    $found_target, $here_doc_target, $here_quote_character, $i,
+                    $saw_error
+                  )
+                  = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
+                    $max_token_index );
 
                 if ($found_target) {
-                    push @here_target_list,
+                    push @{$rhere_target_list},
                       [ $here_doc_target, $here_quote_character ];
                     $type = 'h';
                     if ( length($here_doc_target) > 80 ) {
@@ -20680,10 +21356,12 @@ sub reset_indentation_level {
                     }
                 }
                 elsif ( $expecting == TERM ) {
+                    unless ($saw_error) {
 
-                    # shouldn't happen..
-                    warning("Program bug; didn't find here doc target\n");
-                    report_definite_bug();
+                        # shouldn't happen..
+                        warning("Program bug; didn't find here doc target\n");
+                        report_definite_bug();
+                    }
                 }
             }
             else {
@@ -20701,7 +21379,7 @@ sub reset_indentation_level {
             if ( $expecting == TERM ) { $type = 'pp' }
             elsif ( $expecting == UNKNOWN ) {
                 my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens );
+                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
                 if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
             }
         },
@@ -20722,7 +21400,7 @@ sub reset_indentation_level {
             if ( $expecting == TERM ) { $type = 'mm' }
             elsif ( $expecting == UNKNOWN ) {
                 my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens );
+                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
                 if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
             }
         },
@@ -20921,6 +21599,9 @@ sub reset_indentation_level {
   # *, then run diff between the output of the previous version and the
   # current version.
   #
+  # *. For another example, search for the smartmatch operator '~~'
+  # with your editor to see where updates were made for it.
+  #
   # -----------------------------------------------------------------------
 
         my $line_of_tokens = shift;
@@ -20933,6 +21614,9 @@ sub reset_indentation_level {
         # extract line number for use in error messages
         $input_line_number = $line_of_tokens->{_line_number};
 
+        # reinitialize for multi-line quote
+        $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
+
         # check for pod documentation
         if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
 
@@ -20956,12 +21640,18 @@ sub reset_indentation_level {
             $input_line =~ s/^\s*//;    # trim left end
         }
 
+        # update the copy of the line for use in error messages
+        # This must be exactly what we give the pre_tokenizer
+        $tokenizer_self->{_line_text} = $input_line;
+
         # re-initialize for the main loop
-        @output_token_list     = ();    # stack of output token indexes
-        @output_token_type     = ();    # token types
-        @output_block_type     = ();    # types of code block
-        @output_container_type = ();    # paren types, such as if, elsif, ..
-        @output_type_sequence  = ();    # nesting sequential number
+        $routput_token_list     = [];    # stack of output token indexes
+        $routput_token_type     = [];    # token types
+        $routput_block_type     = [];    # types of code block
+        $routput_container_type = [];    # paren types, such as if, elsif, ..
+        $routput_type_sequence  = [];    # nesting sequential number
+
+        $rhere_target_list = [];
 
         $tok             = $last_nonblank_token;
         $type            = $last_nonblank_type;
@@ -20970,9 +21660,7 @@ sub reset_indentation_level {
         $block_type      = $last_nonblank_block_type;
         $container_type  = $last_nonblank_container_type;
         $type_sequence   = $last_nonblank_type_sequence;
-        @here_target_list = ();         # list of here-doc target strings
-
-        $peeked_ahead = 0;
+        $peeked_ahead    = 0;
 
         # tokenization is done in two stages..
         # stage 1 is a very simple pre-tokenization
@@ -20984,24 +21672,20 @@ sub reset_indentation_level {
         }
 
         # start by breaking the line into pre-tokens
-        ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
+        ( $rtokens, $rtoken_map, $rtoken_type ) =
           pre_tokenize( $input_line, $max_tokens_wanted );
 
-        $max_token_index = scalar(@$rpretokens) - 1;
-        push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
-        push( @$rpretoken_map,  0,   0,   0 );     # shouldn't be referenced
-        push( @$rpretoken_type, 'b', 'b', 'b' );
-
-        # temporary copies while coding change is underway
-        ( $rtokens, $rtoken_map, $rtoken_type ) =
-          ( $rpretokens, $rpretoken_map, $rpretoken_type );
+        $max_token_index = scalar(@$rtokens) - 1;
+        push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
+        push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
+        push( @$rtoken_type, 'b', 'b', 'b' );
 
         # initialize for main loop
         for $i ( 0 .. $max_token_index + 3 ) {
-            $output_token_type[$i]     = "";
-            $output_block_type[$i]     = "";
-            $output_container_type[$i] = "";
-            $output_type_sequence[$i]  = "";
+            $routput_token_type->[$i]     = "";
+            $routput_block_type->[$i]     = "";
+            $routput_container_type->[$i] = "";
+            $routput_type_sequence->[$i]  = "";
         }
         $i     = -1;
         $i_tok = -1;
@@ -21017,25 +21701,39 @@ sub reset_indentation_level {
             if ($in_quote) {    # continue looking for end of a quote
                 $type = $quote_type;
 
-                unless (@output_token_list) {  # initialize if continuation line
-                    push( @output_token_list, $i );
-                    $output_token_type[$i] = $type;
+                unless ( @{$routput_token_list} )
+                {               # initialize if continuation line
+                    push( @{$routput_token_list}, $i );
+                    $routput_token_type->[$i] = $type;
 
                 }
                 $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
 
                 # scan for the end of the quote or pattern
-                ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-                  do_quote( $i, $in_quote, $quote_character, $quote_pos,
-                    $quote_depth, $rtokens, $rtoken_map );
+                (
+                    $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+                    $quoted_string_1, $quoted_string_2
+                  )
+                  = do_quote(
+                    $i,               $in_quote,    $quote_character,
+                    $quote_pos,       $quote_depth, $quoted_string_1,
+                    $quoted_string_2, $rtokens,     $rtoken_map,
+                    $max_token_index
+                  );
 
                 # all done if we didn't find it
                 last if ($in_quote);
 
+                # save pattern and replacement text for rescanning
+                my $qs1 = $quoted_string_1;
+                my $qs2 = $quoted_string_2;
+
                 # re-initialize for next search
                 $quote_character = '';
                 $quote_pos       = 0;
                 $quote_type      = 'Q';
+                $quoted_string_1 = "";
+                $quoted_string_2 = "";
                 last if ( ++$i > $max_token_index );
 
                 # look for any modifiers
@@ -21044,7 +21742,32 @@ sub reset_indentation_level {
                     # check for exact quote modifiers
                     if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
                         my $str = $$rtokens[$i];
-                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
+                        my $saw_modifier_e;
+                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
+                            my $pos = pos($str);
+                            my $char = substr( $str, $pos - 1, 1 );
+                            $saw_modifier_e ||= ( $char eq 'e' );
+                        }
+
+                        # For an 'e' quote modifier we must scan the replacement
+                        # text for here-doc targets.
+                        if ($saw_modifier_e) {
+
+                            my $rht = scan_replacement_text($qs1);
+
+                            # Change type from 'Q' to 'h' for quotes with
+                            # here-doc targets so that the formatter (see sub
+                            # print_line_of_tokens) will not make any line
+                            # breaks after this point.
+                            if ($rht) {
+                                push @{$rhere_target_list}, @{$rht};
+                                $type = 'h';
+                                if ( $i_tok < 0 ) {
+                                    my $ilast = $routput_token_list->[-1];
+                                    $routput_token_type->[$ilast] = $type;
+                                }
+                            }
+                        }
 
                         if ( defined( pos($str) ) ) {
 
@@ -21108,9 +21831,9 @@ EOM
                     }
                 }
 
-                $last_last_nonblank_token          = $last_nonblank_token;
-                $last_last_nonblank_type           = $last_nonblank_type;
-                $last_last_nonblank_block_type     = $last_nonblank_block_type;
+                $last_last_nonblank_token      = $last_nonblank_token;
+                $last_last_nonblank_type       = $last_nonblank_type;
+                $last_last_nonblank_block_type = $last_nonblank_block_type;
                 $last_last_nonblank_container_type =
                   $last_nonblank_container_type;
                 $last_last_nonblank_type_sequence =
@@ -21126,10 +21849,10 @@ EOM
 
             # store previous token type
             if ( $i_tok >= 0 ) {
-                $output_token_type[$i_tok]     = $type;
-                $output_block_type[$i_tok]     = $block_type;
-                $output_container_type[$i_tok] = $container_type;
-                $output_type_sequence[$i_tok]  = $type_sequence;
+                $routput_token_type->[$i_tok]     = $type;
+                $routput_block_type->[$i_tok]     = $block_type;
+                $routput_container_type->[$i_tok] = $container_type;
+                $routput_type_sequence->[$i_tok]  = $type_sequence;
             }
             my $pre_tok  = $$rtokens[$i];        # get the next pre-token
             my $pre_type = $$rtoken_type[$i];    # and type
@@ -21142,7 +21865,7 @@ EOM
             $i_tok     = $i;
 
             # this pre-token will start an output token
-            push( @output_token_list, $i_tok );
+            push( @{$routput_token_list}, $i_tok );
 
             # continue gathering identifier if necessary
             # but do not start on blanks and comments
@@ -21246,7 +21969,7 @@ EOM
             if ( $pre_type eq 'w' ) {
                 $expecting = operator_expected( $prev_type, $tok, $next_type );
                 my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens );
+                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
 
                 # ATTRS: handle sub and variable attributes
                 if ($in_attribute_list) {
@@ -21275,13 +21998,13 @@ EOM
                             $type = 'C';
                         }
                         elsif ( $is_user_function{$current_package}{$tok} ) {
-                            $type      = 'U';
+                            $type = 'U';
                             $prototype =
                               $user_function_prototype{$current_package}{$tok};
                         }
                         elsif ( $tok =~ /^v\d+$/ ) {
                             $type = 'v';
-                            unless ($saw_v_string) { report_v_string($tok) }
+                            report_v_string($tok);
                         }
                         else { $type = 'w' }
 
@@ -21385,7 +22108,8 @@ EOM
                 {
                     scan_bare_identifier();
                     my ( $next_nonblank_token, $i_next ) =
-                      find_next_nonblank_token( $i, $rtokens );
+                      find_next_nonblank_token( $i, $rtokens,
+                        $max_token_index );
 
                     if ($next_nonblank_token) {
 
@@ -21442,7 +22166,8 @@ EOM
                   )
                 {
                     if ( $tok !~ /A-Z/ ) {
-                        push @lower_case_labels_at, $input_line_number;
+                        push @{ $tokenizer_self->{_rlower_case_labels_at} },
+                          $input_line_number;
                     }
                     $type = 'J';
                     $tok .= ':';
@@ -21583,12 +22308,9 @@ EOM
                             $type = 'U';
                         }
 
-                        # mark bare words following a file test operator as
-                        # something that will expect an operator next.
-                        # patch 072901: unless followed immediately by a paren,
-                        # in which case it must be a function call (pid.t)
-                        if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
-                            $type = 'C';
+                        # underscore after file test operator is file handle
+                        if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
+                            $type = 'Z';
                         }
 
                         # patch for SWITCH/CASE if 'case' and 'when are
@@ -21627,7 +22349,7 @@ EOM
                 $expecting = operator_expected( $prev_type, $tok, $next_type );
                 error_if_expecting_OPERATOR("Number")
                   if ( $expecting == OPERATOR );
-                scan_number();
+                my $number = scan_number();
                 if ( !defined($number) ) {
 
                     # shouldn't happen - we should always get a number
@@ -21657,10 +22379,10 @@ EOM
         # -----------------------------
 
         if ( $i_tok >= 0 ) {
-            $output_token_type[$i_tok]     = $type;
-            $output_block_type[$i_tok]     = $block_type;
-            $output_container_type[$i_tok] = $container_type;
-            $output_type_sequence[$i_tok]  = $type_sequence;
+            $routput_token_type->[$i_tok]     = $type;
+            $routput_block_type->[$i_tok]     = $block_type;
+            $routput_container_type->[$i_tok] = $container_type;
+            $routput_type_sequence->[$i_tok]  = $type_sequence;
         }
 
         unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
@@ -21763,7 +22485,7 @@ EOM
 #       indentation level, if it is is appropriate for list formatting.
 #       If so, continuation indentation is used to indent long list items.
 #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
-#     @slevel_stack = a stack of total nesting depths at each
+#     @{$rslevel_stack} = a stack of total nesting depths at each
 #       structural indentation level, where "total nesting depth" means
 #       the nesting depth that would occur if every nesting token -- '{', '[',
 #       and '(' -- , regardless of context, is used to compute a nesting
@@ -21776,10 +22498,11 @@ EOM
             $nesting_list_string_i, $nesting_token_string_i,
             $nesting_type_string_i, );
 
-        foreach $i (@output_token_list) {  # scan the list of pre-tokens indexes
+        foreach $i ( @{$routput_token_list} )
+        {    # scan the list of pre-tokens indexes
 
             # self-checking for valid token types
-            my $type = $output_token_type[$i];
+            my $type = $routput_token_type->[$i];
             my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
             $level_i = $level_in_tokenizer;
 
@@ -21820,15 +22543,15 @@ EOM
                 $container_environment =
                     $nesting_block_flag ? 'BLOCK'
                   : $nesting_list_flag  ? 'LIST'
-                  : "";
+                  :                       "";
 
                 # if the difference between total nesting levels is not 1,
                 # there are intervening non-structural nesting types between
                 # this '{' and the previous unclosed '{'
                 my $intervening_secondary_structure = 0;
-                if (@slevel_stack) {
+                if ( @{$rslevel_stack} ) {
                     $intervening_secondary_structure =
-                      $slevel_in_tokenizer - $slevel_stack[-1];
+                      $slevel_in_tokenizer - $rslevel_stack->[-1];
                 }
 
      # =head1 Continuation Indentation
@@ -21878,10 +22601,10 @@ EOM
      # variable.
 
                 # save the current states
-                push( @slevel_stack, 1 + $slevel_in_tokenizer );
+                push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
                 $level_in_tokenizer++;
 
-                if ( $output_block_type[$i] ) {
+                if ( $routput_block_type->[$i] ) {
                     $nesting_block_flag = 1;
                     $nesting_block_string .= '1';
                 }
@@ -21893,10 +22616,10 @@ EOM
                 # we will use continuation indentation within containers
                 # which are not blocks and not logical expressions
                 my $bit = 0;
-                if ( !$output_block_type[$i] ) {
+                if ( !$routput_block_type->[$i] ) {
 
                     # propagate flag down at nested open parens
-                    if ( $output_container_type[$i] eq '(' ) {
+                    if ( $routput_container_type->[$i] eq '(' ) {
                         $bit = 1 if $nesting_list_flag;
                     }
 
@@ -21905,7 +22628,8 @@ EOM
                     else {
                         $bit = 1
                           unless
-                          $is_logical_container{ $output_container_type[$i] };
+                          $is_logical_container{ $routput_container_type->[$i]
+                          };
                     }
                 }
                 $nesting_list_string .= $bit;
@@ -21936,7 +22660,7 @@ EOM
 
                 my $total_ci = $ci_string_sum;
                 if (
-                    !$output_block_type[$i]    # patch: skip for BLOCK
+                    !$routput_block_type->[$i]    # patch: skip for BLOCK
                     && ($in_statement_continuation)
                   )
                 {
@@ -21951,7 +22675,7 @@ EOM
             elsif ( $type eq '}' || $type eq 'R' ) {
 
                 # only a nesting error in the script would prevent popping here
-                if ( @slevel_stack > 1 ) { pop(@slevel_stack); }
+                if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
 
                 $level_i = --$level_in_tokenizer;
 
@@ -21972,15 +22696,16 @@ EOM
 
                     # zero continuation flag at terminal BLOCK '}' which
                     # ends a statement.
-                    if ( $output_block_type[$i] ) {
+                    if ( $routput_block_type->[$i] ) {
 
                         # ...These include non-anonymous subs
                         # note: could be sub ::abc { or sub 'abc
-                        if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
+                        if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
 
                          # note: older versions of perl require the /gc modifier
                          # here or else the \G does not work.
-                            if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
+                            if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
+                            {
                                 $in_statement_continuation = 0;
                             }
                         }
@@ -21989,8 +22714,8 @@ EOM
 # block prototypes and these: (sort|grep|map|do|eval)
 # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
                         elsif (
-                            $is_zero_continuation_block_type{ $output_block_type
-                                  [$i] } )
+                            $is_zero_continuation_block_type{
+                                $routput_block_type->[$i] } )
                         {
                             $in_statement_continuation = 0;
                         }
@@ -21999,18 +22724,19 @@ EOM
                         #     /^(sort|grep|map|do|eval)$/ )
                         elsif (
                             $is_not_zero_continuation_block_type{
-                                $output_block_type[$i] } )
+                                $routput_block_type->[$i] } )
                         {
                         }
 
                         # ..and a block introduced by a label
                         # /^\w+\s*:$/gc ) {
-                        elsif ( $output_block_type[$i] =~ /:$/ ) {
+                        elsif ( $routput_block_type->[$i] =~ /:$/ ) {
                             $in_statement_continuation = 0;
                         }
 
-                        # ..nor user function with block prototype
+                        # user function with block prototype
                         else {
+                            $in_statement_continuation = 0;
                         }
                     }
 
@@ -22026,7 +22752,7 @@ EOM
                     #     );
                     elsif ( $tok eq ')' ) {
                         $in_statement_continuation = 1
-                          if $output_container_type[$i] =~ /^[;,\{\}]$/;
+                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
                     }
                 }
 
@@ -22034,7 +22760,7 @@ EOM
                 $container_environment =
                     $nesting_block_flag ? 'BLOCK'
                   : $nesting_list_flag  ? 'LIST'
-                  : "";
+                  :                       "";
                 $ci_string_i = $ci_string_sum + $in_statement_continuation;
                 $nesting_block_string_i = $nesting_block_string;
                 $nesting_list_string_i  = $nesting_list_string;
@@ -22046,7 +22772,7 @@ EOM
                 $container_environment =
                     $nesting_block_flag ? 'BLOCK'
                   : $nesting_list_flag  ? 'LIST'
-                  : "";
+                  :                       "";
 
                 # zero the continuation indentation at certain tokens so
                 # that they will be at the same level as its container.  For
@@ -22113,8 +22839,8 @@ EOM
             }
 
             if ( $level_in_tokenizer < 0 ) {
-                unless ($saw_negative_indentation) {
-                    $saw_negative_indentation = 1;
+                unless ( $tokenizer_self->{_saw_negative_indentation} ) {
+                    $tokenizer_self->{_saw_negative_indentation} = 1;
                     warning("Starting negative indentation\n");
                 }
             }
@@ -22146,16 +22872,16 @@ EOM
                 }
             }
 
-            push( @block_type,            $output_block_type[$i] );
+            push( @block_type,            $routput_block_type->[$i] );
             push( @ci_string,             $ci_string_i );
             push( @container_environment, $container_environment );
-            push( @container_type,        $output_container_type[$i] );
+            push( @container_type,        $routput_container_type->[$i] );
             push( @levels,                $level_i );
             push( @nesting_tokens,        $nesting_token_string_i );
             push( @nesting_types,         $nesting_type_string_i );
             push( @slevels,               $slevel_i );
             push( @token_type,            $fix_type );
-            push( @type_sequence,         $output_type_sequence[$i] );
+            push( @type_sequence,         $routput_type_sequence->[$i] );
             push( @nesting_blocks,        $nesting_block_string );
             push( @nesting_lists,         $nesting_list_string );
 
@@ -22179,7 +22905,9 @@ EOM
 
         $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
         $tokenizer_self->{_in_quote}          = $in_quote;
-        $tokenizer_self->{_rhere_target_list} = \@here_target_list;
+        $tokenizer_self->{_quote_target} =
+          $in_quote ? matching_end_token($quote_character) : "";
+        $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
 
         $line_of_tokens->{_rtoken_type}            = \@token_type;
         $line_of_tokens->{_rtokens}                = \@tokens;
@@ -22197,51 +22925,263 @@ EOM
     }
 }    # end tokenize_this_line
 
-sub new_statement_ok {
-
-    # return true if the current token can start a new statement
+#########i#############################################################
+# Tokenizer routines which assist in identifying token types
+#######################################################################
 
-    return label_ok()    # a label would be ok here
+sub operator_expected {
 
-      || $last_nonblank_type eq 'J';    # or we follow a label
+    # Many perl symbols have two or more meanings.  For example, '<<'
+    # can be a shift operator or a here-doc operator.  The
+    # interpretation of these symbols depends on the current state of
+    # the tokenizer, which may either be expecting a term or an
+    # operator.  For this example, a << would be a shift if an operator
+    # is expected, and a here-doc if a term is expected.  This routine
+    # is called to make this decision for any current token.  It returns
+    # one of three possible values:
+    #
+    #     OPERATOR - operator expected (or at least, not a term)
+    #     UNKNOWN  - can't tell
+    #     TERM     - a term is expected (or at least, not an operator)
+    #
+    # The decision is based on what has been seen so far.  This
+    # information is stored in the "$last_nonblank_type" and
+    # "$last_nonblank_token" variables.  For example, if the
+    # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
+    # if $last_nonblank_type is 'n' (numeric), we are expecting an
+    # OPERATOR.
+    #
+    # If a UNKNOWN is returned, the calling routine must guess. A major
+    # goal of this tokenizer is to minimize the possiblity of returning
+    # UNKNOWN, because a wrong guess can spoil the formatting of a
+    # script.
+    #
+    # adding NEW_TOKENS: it is critically important that this routine be
+    # updated to allow it to determine if an operator or term is to be
+    # expected after the new token.  Doing this simply involves adding
+    # the new token character to one of the regexes in this routine or
+    # to one of the hash lists
+    # that it uses, which are initialized in the BEGIN section.
+    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
+    # $statement_type
 
-}
+    my ( $prev_type, $tok, $next_type ) = @_;
 
-sub label_ok {
+    my $op_expected = UNKNOWN;
 
-    # Decide if a bare word followed by a colon here is a label
+#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
 
-    # if it follows an opening or closing code block curly brace..
-    if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
-        && $last_nonblank_type eq $last_nonblank_token )
-    {
+# Note: function prototype is available for token type 'U' for future
+# program development.  It contains the leading and trailing parens,
+# and no blanks.  It might be used to eliminate token type 'C', for
+# example (prototype = '()'). Thus:
+# if ($last_nonblank_type eq 'U') {
+#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
+# }
 
-        # it is a label if and only if the curly encloses a code block
-        return $brace_type[$brace_depth];
-    }
+    # A possible filehandle (or object) requires some care...
+    if ( $last_nonblank_type eq 'Z' ) {
 
-    # otherwise, it is a label if and only if it follows a ';'
-    # (real or fake)
-    else {
-        return ( $last_nonblank_type eq ';' );
-    }
-}
+        # angle.t
+        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
+            $op_expected = UNKNOWN;
+        }
 
-sub code_block_type {
+        # For possible file handle like "$a", Perl uses weird parsing rules.
+        # For example:
+        # print $a/2,"/hi";   - division
+        # print $a / 2,"/hi"; - division
+        # print $a/ 2,"/hi";  - division
+        # print $a /2,"/hi";  - pattern (and error)!
+        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
+            $op_expected = TERM;
+        }
 
-    # Decide if this is a block of code, and its type.
-    # Must be called only when $type = $token = '{'
-    # The problem is to distinguish between the start of a block of code
-    # and the start of an anonymous hash reference
-    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
-    # to indicate the type of code block.  (For example, 'last_nonblank_token'
-    # might be 'if' for an if block, 'else' for an else block, etc).
+        # Note when an operation is being done where a
+        # filehandle might be expected, since a change in whitespace
+        # could change the interpretation of the statement.
+        else {
+            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
+                complain("operator in print statement not recommended\n");
+                $op_expected = OPERATOR;
+            }
+        }
+    }
 
-    # handle case of multiple '{'s
+    # handle something after 'do' and 'eval'
+    elsif ( $is_block_operator{$last_nonblank_token} ) {
+
+        # something like $a = eval "expression";
+        #                          ^
+        if ( $last_nonblank_type eq 'k' ) {
+            $op_expected = TERM;    # expression or list mode following keyword
+        }
+
+        # something like $a = do { BLOCK } / 2;
+        #                                  ^
+        else {
+            $op_expected = OPERATOR;    # block mode following }
+        }
+    }
+
+    # handle bare word..
+    elsif ( $last_nonblank_type eq 'w' ) {
+
+        # unfortunately, we can't tell what type of token to expect next
+        # after most bare words
+        $op_expected = UNKNOWN;
+    }
+
+    # operator, but not term possible after these types
+    # Note: moved ')' from type to token because parens in list context
+    # get marked as '{' '}' now.  This is a minor glitch in the following:
+    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
+    #
+    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
+        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
+    {
+        $op_expected = OPERATOR;
+
+        # in a 'use' statement, numbers and v-strings are not true
+        # numbers, so to avoid incorrect error messages, we will
+        # mark them as unknown for now (use.t)
+        # TODO: it would be much nicer to create a new token V for VERSION
+        # number in a use statement.  Then this could be a check on type V
+        # and related patches which change $statement_type for '=>'
+        # and ',' could be removed.  Further, it would clean things up to
+        # scan the 'use' statement with a separate subroutine.
+        if (   ( $statement_type eq 'use' )
+            && ( $last_nonblank_type =~ /^[nv]$/ ) )
+        {
+            $op_expected = UNKNOWN;
+        }
+    }
+
+    # no operator after many keywords, such as "die", "warn", etc
+    elsif ( $expecting_term_token{$last_nonblank_token} ) {
+
+        # patch for dor.t (defined or).
+        # perl functions which may be unary operators
+        # TODO: This list is incomplete, and these should be put
+        # into a hash.
+        if (   $tok eq '/'
+            && $next_type          eq '/'
+            && $last_nonblank_type eq 'k'
+            && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
+        {
+            $op_expected = OPERATOR;
+        }
+        else {
+            $op_expected = TERM;
+        }
+    }
+
+    # no operator after things like + - **  (i.e., other operators)
+    elsif ( $expecting_term_types{$last_nonblank_type} ) {
+        $op_expected = TERM;
+    }
+
+    # a few operators, like "time", have an empty prototype () and so
+    # take no parameters but produce a value to operate on
+    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
+        $op_expected = OPERATOR;
+    }
+
+    # post-increment and decrement produce values to be operated on
+    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
+        $op_expected = OPERATOR;
+    }
+
+    # no value to operate on after sub block
+    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
+
+    # a right brace here indicates the end of a simple block.
+    # all non-structural right braces have type 'R'
+    # all braces associated with block operator keywords have been given those
+    # keywords as "last_nonblank_token" and caught above.
+    # (This statement is order dependent, and must come after checking
+    # $last_nonblank_token).
+    elsif ( $last_nonblank_type eq '}' ) {
+
+        # patch for dor.t (defined or).
+        if (   $tok eq '/'
+            && $next_type           eq '/'
+            && $last_nonblank_token eq ']' )
+        {
+            $op_expected = OPERATOR;
+        }
+        else {
+            $op_expected = TERM;
+        }
+    }
+
+    # something else..what did I forget?
+    else {
+
+        # collecting diagnostics on unknown operator types..see what was missed
+        $op_expected = UNKNOWN;
+        write_diagnostics(
+"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
+        );
+    }
+
+    TOKENIZER_DEBUG_FLAG_EXPECT && do {
+        print
+"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
+    };
+    return $op_expected;
+}
+
+sub new_statement_ok {
+
+    # return true if the current token can start a new statement
+    # USES GLOBAL VARIABLES: $last_nonblank_type
+
+    return label_ok()    # a label would be ok here
+
+      || $last_nonblank_type eq 'J';    # or we follow a label
+
+}
+
+sub label_ok {
+
+    # Decide if a bare word followed by a colon here is a label
+    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
+    # $brace_depth, @brace_type
+
+    # if it follows an opening or closing code block curly brace..
+    if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
+        && $last_nonblank_type eq $last_nonblank_token )
+    {
+
+        # it is a label if and only if the curly encloses a code block
+        return $brace_type[$brace_depth];
+    }
+
+    # otherwise, it is a label if and only if it follows a ';'
+    # (real or fake)
+    else {
+        return ( $last_nonblank_type eq ';' );
+    }
+}
+
+sub code_block_type {
+
+    # Decide if this is a block of code, and its type.
+    # Must be called only when $type = $token = '{'
+    # The problem is to distinguish between the start of a block of code
+    # and the start of an anonymous hash reference
+    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
+    # to indicate the type of code block.  (For example, 'last_nonblank_token'
+    # might be 'if' for an if block, 'else' for an else block, etc).
+    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
+    # $last_nonblank_block_type, $brace_depth, @brace_type
+
+    # handle case of multiple '{'s
 
 # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
 
-    my ( $i, $rtokens, $rtoken_type ) = @_;
+    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
     if (   $last_nonblank_token eq '{'
         && $last_nonblank_type eq $last_nonblank_token )
     {
@@ -22249,7 +23189,8 @@ sub code_block_type {
         # opening brace where a statement may appear is probably
         # a code block but might be and anonymous hash reference
         if ( $brace_type[$brace_depth] ) {
-            return decide_if_code_block( $i, $rtokens, $rtoken_type );
+            return decide_if_code_block( $i, $rtokens, $rtoken_type,
+                $max_token_index );
         }
 
         # cannot start a code block within an anonymous hash
@@ -22262,7 +23203,8 @@ sub code_block_type {
 
         # an opening brace where a statement may appear is probably
         # a code block but might be and anonymous hash reference
-        return decide_if_code_block( $i, $rtokens, $rtoken_type );
+        return decide_if_code_block( $i, $rtokens, $rtoken_type,
+            $max_token_index );
     }
 
     # handle case of '}{'
@@ -22273,7 +23215,8 @@ sub code_block_type {
         # a } { situation ...
         # could be hash reference after code block..(blktype1.t)
         if ($last_nonblank_block_type) {
-            return decide_if_code_block( $i, $rtokens, $rtoken_type );
+            return decide_if_code_block( $i, $rtokens, $rtoken_type,
+                $max_token_index );
         }
 
         # must be a block if it follows a closing hash reference
@@ -22315,7 +23258,8 @@ sub code_block_type {
 
     # check bareword
     elsif ( $last_nonblank_type eq 'w' ) {
-        return decide_if_code_block( $i, $rtokens, $rtoken_type );
+        return decide_if_code_block( $i, $rtokens, $rtoken_type,
+            $max_token_index );
     }
 
     # anything else must be anonymous hash reference
@@ -22326,9 +23270,10 @@ sub code_block_type {
 
 sub decide_if_code_block {
 
-    my ( $i, $rtokens, $rtoken_type ) = @_;
+    # USES GLOBAL VARIABLES: $last_nonblank_token
+    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
     my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens );
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
 
     # we are at a '{' where a statement may appear.
     # We must decide if this brace starts an anonymous hash or a code
@@ -22430,12 +23375,16 @@ sub decide_if_code_block {
 sub unexpected {
 
     # report unexpected token type and show where it is
-    my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
-    $unexpected_error_count++;
-    if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
+    # USES GLOBAL VARIABLES: $tokenizer_self
+    my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
+        $rpretoken_type, $input_line )
+      = @_;
+
+    if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
         my $msg = "found $found where $expecting expected";
         my $pos = $$rpretoken_map[$i_tok];
         interrupt_logfile();
+        my $input_line_number = $tokenizer_self->{_last_line_number};
         my ( $offset, $numbered_line, $underline ) =
           make_numbered_line( $input_line_number, $input_line, $pos );
         $underline = write_on_underline( $underline, $pos - $offset, '^' );
@@ -22463,1606 +23412,1367 @@ sub unexpected {
     }
 }
 
-sub indicate_error {
-    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
-    interrupt_logfile();
-    warning($msg);
-    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
-    resume_logfile();
-}
+sub is_non_structural_brace {
 
-sub write_error_indicator_pair {
-    my ( $line_number, $input_line, $pos, $carrat ) = @_;
-    my ( $offset, $numbered_line, $underline ) =
-      make_numbered_line( $line_number, $input_line, $pos );
-    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
-    warning( $numbered_line . "\n" );
-    $underline =~ s/\s*$//;
-    warning( $underline . "\n" );
-}
+    # Decide if a brace or bracket is structural or non-structural
+    # by looking at the previous token and type
+    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
 
-sub make_numbered_line {
+    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
+    # Tentatively deactivated because it caused the wrong operator expectation
+    # for this code:
+    #      $user = @vars[1] / 100;
+    # Must update sub operator_expected before re-implementing.
+    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
+    #    return 0;
+    # }
 
-    #  Given an input line, its line number, and a character position of
-    #  interest, create a string not longer than 80 characters of the form
-    #     $lineno: sub_string
-    #  such that the sub_string of $str contains the position of interest
-    #
-    #  Here is an example of what we want, in this case we add trailing
-    #  '...' because the line is long.
-    #
-    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
-    #
-    #  Here is another example, this time in which we used leading '...'
-    #  because of excessive length:
-    #
-    # 2: ... er of the World Wide Web Consortium's
-    #
-    #  input parameters are:
-    #   $lineno = line number
-    #   $str = the text of the line
-    #   $pos = position of interest (the error) : 0 = first character
-    #
-    #   We return :
-    #     - $offset = an offset which corrects the position in case we only
-    #       display part of a line, such that $pos-$offset is the effective
-    #       position from the start of the displayed line.
-    #     - $numbered_line = the numbered line as above,
-    #     - $underline = a blank 'underline' which is all spaces with the same
-    #       number of characters as the numbered line.
+    # NOTE: braces after type characters start code blocks, but for
+    # simplicity these are not identified as such.  See also
+    # sub code_block_type
+    # if ($last_nonblank_type eq 't') {return 0}
 
-    my ( $lineno, $str, $pos ) = @_;
-    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
-    my $excess = length($str) - $offset - 68;
-    my $numc   = ( $excess > 0 ) ? 68 : undef;
+    # otherwise, it is non-structural if it is decorated
+    # by type information.
+    # For example, the '{' here is non-structural:   ${xxx}
+    (
+        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
 
-    if ( defined($numc) ) {
-        if ( $offset == 0 ) {
-            $str = substr( $str, $offset, $numc - 4 ) . " ...";
-        }
-        else {
-            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
-        }
-    }
-    else {
+          # or if we follow a hash or array closing curly brace or bracket
+          # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
+          # because the first '}' would have been given type 'R'
+          || $last_nonblank_type =~ /^([R\]])$/
+    );
+}
 
-        if ( $offset == 0 ) {
-        }
-        else {
-            $str = "... " . substr( $str, $offset + 4 );
-        }
-    }
+#########i#############################################################
+# Tokenizer routines for tracking container nesting depths
+#######################################################################
 
-    my $numbered_line = sprintf( "%d: ", $lineno );
-    $offset -= length($numbered_line);
-    $numbered_line .= $str;
-    my $underline = " " x length($numbered_line);
-    return ( $offset, $numbered_line, $underline );
-}
+# The following routines keep track of nesting depths of the nesting
+# types, ( [ { and ?.  This is necessary for determining the indentation
+# level, and also for debugging programs.  Not only do they keep track of
+# nesting depths of the individual brace types, but they check that each
+# of the other brace types is balanced within matching pairs.  For
+# example, if the program sees this sequence:
+#
+#         {  ( ( ) }
+#
+# then it can determine that there is an extra left paren somewhere
+# between the { and the }.  And so on with every other possible
+# combination of outer and inner brace types.  For another
+# example:
+#
+#         ( [ ..... ]  ] )
+#
+# which has an extra ] within the parens.
+#
+# The brace types have indexes 0 .. 3 which are indexes into
+# the matrices.
+#
+# The pair ? : are treated as just another nesting type, with ? acting
+# as the opening brace and : acting as the closing brace.
+#
+# The matrix
+#
+#         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+#
+# saves the nesting depth of brace type $b (where $b is either of the other
+# nesting types) when brace type $a enters a new depth.  When this depth
+# decreases, a check is made that the current depth of brace types $b is
+# unchanged, or otherwise there must have been an error.  This can
+# be very useful for localizing errors, particularly when perl runs to
+# the end of a large file (such as this one) and announces that there
+# is a problem somewhere.
+#
+# A numerical sequence number is maintained for every nesting type,
+# so that each matching pair can be uniquely identified in a simple
+# way.
 
-sub write_on_underline {
+sub increase_nesting_depth {
+    my ( $a, $pos ) = @_;
 
-    # The "underline" is a string that shows where an error is; it starts
-    # out as a string of blanks with the same length as the numbered line of
-    # code above it, and we have to add marking to show where an error is.
-    # In the example below, we want to write the string '--^' just below
-    # the line of bad code:
-    #
-    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
-    #                 ---^
-    # We are given the current underline string, plus a position and a
-    # string to write on it.
-    #
-    # In the above example, there will be 2 calls to do this:
-    # First call:  $pos=19, pos_chr=^
-    # Second call: $pos=16, pos_chr=---
-    #
-    # This is a trivial thing to do with substr, but there is some
-    # checking to do.
+    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
+    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
+    my $b;
+    $current_depth[$a]++;
+    my $input_line_number = $tokenizer_self->{_last_line_number};
+    my $input_line        = $tokenizer_self->{_line_text};
 
-    my ( $underline, $pos, $pos_chr ) = @_;
+    # Sequence numbers increment by number of items.  This keeps
+    # a unique set of numbers but still allows the relative location
+    # of any type to be determined.
+    $nesting_sequence_number[$a] += scalar(@closing_brace_names);
+    my $seqno = $nesting_sequence_number[$a];
+    $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
 
-    # check for error..shouldn't happen
-    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
-        return $underline;
-    }
-    my $excess = length($pos_chr) + $pos - length($underline);
-    if ( $excess > 0 ) {
-        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
+    $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
+      [ $input_line_number, $input_line, $pos ];
+
+    for $b ( 0 .. $#closing_brace_names ) {
+        next if ( $b == $a );
+        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
     }
-    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
-    return ($underline);
+    return $seqno;
 }
 
-sub is_non_structural_brace {
-
-    # Decide if a brace or bracket is structural or non-structural
-    # by looking at the previous token and type
-
-    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
-    # Tentatively deactivated because it caused the wrong operator expectation
-    # for this code:
-    #      $user = @vars[1] / 100;
-    # Must update sub operator_expected before re-implementing.
-    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
-    #    return 0;
-    # }
-
-    # NOTE: braces after type characters start code blocks, but for
-    # simplicity these are not identified as such.  See also
-    # sub code_block_type
-    # if ($last_nonblank_type eq 't') {return 0}
+sub decrease_nesting_depth {
 
-    # otherwise, it is non-structural if it is decorated
-    # by type information.
-    # For example, the '{' here is non-structural:   ${xxx}
-    (
-        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
+    my ( $a, $pos ) = @_;
 
-          # or if we follow a hash or array closing curly brace or bracket
-          # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
-          # because the first '}' would have been given type 'R'
-          || $last_nonblank_type =~ /^([R\]])$/
-    );
-}
+    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
+    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
+    my $b;
+    my $seqno             = 0;
+    my $input_line_number = $tokenizer_self->{_last_line_number};
+    my $input_line        = $tokenizer_self->{_line_text};
 
-sub operator_expected {
+    if ( $current_depth[$a] > 0 ) {
 
-    # Many perl symbols have two or more meanings.  For example, '<<'
-    # can be a shift operator or a here-doc operator.  The
-    # interpretation of these symbols depends on the current state of
-    # the tokenizer, which may either be expecting a term or an
-    # operator.  For this example, a << would be a shift if an operator
-    # is expected, and a here-doc if a term is expected.  This routine
-    # is called to make this decision for any current token.  It returns
-    # one of three possible values:
-    #
-    #     OPERATOR - operator expected (or at least, not a term)
-    #     UNKNOWN  - can't tell
-    #     TERM     - a term is expected (or at least, not an operator)
-    #
-    # The decision is based on what has been seen so far.  This
-    # information is stored in the "$last_nonblank_type" and
-    # "$last_nonblank_token" variables.  For example, if the
-    # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
-    # if $last_nonblank_type is 'n' (numeric), we are expecting an
-    # OPERATOR.
-    #
-    # If a UNKNOWN is returned, the calling routine must guess. A major
-    # goal of this tokenizer is to minimize the possiblity of returning
-    # UNKNOWN, because a wrong guess can spoil the formatting of a
-    # script.
-    #
-    # adding NEW_TOKENS: it is critically important that this routine be
-    # updated to allow it to determine if an operator or term is to be
-    # expected after the new token.  Doing this simply involves adding
-    # the new token character to one of the regexes in this routine or
-    # to one of the hash lists
-    # that it uses, which are initialized in the BEGIN section.
+        $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
 
-    my ( $prev_type, $tok, $next_type ) = @_;
-    my $op_expected = UNKNOWN;
+        # check that any brace types $b contained within are balanced
+        for $b ( 0 .. $#closing_brace_names ) {
+            next if ( $b == $a );
 
-#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
+            unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
+                $current_depth[$b] )
+            {
+                my $diff = $current_depth[$b] -
+                  $depth_array[$a][$b][ $current_depth[$a] ];
 
-# Note: function prototype is available for token type 'U' for future
-# program development.  It contains the leading and trailing parens,
-# and no blanks.  It might be used to eliminate token type 'C', for
-# example (prototype = '()'). Thus:
-# if ($last_nonblank_type eq 'U') {
-#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
-# }
+                # don't whine too many times
+                my $saw_brace_error = get_saw_brace_error();
+                if (
+                    $saw_brace_error <= MAX_NAG_MESSAGES
 
-    # A possible filehandle (or object) requires some care...
-    if ( $last_nonblank_type eq 'Z' ) {
+                    # if too many closing types have occured, we probably
+                    # already caught this error
+                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
+                  )
+                {
+                    interrupt_logfile();
+                    my $rsl =
+                      $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+                    my $sl  = $$rsl[0];
+                    my $rel = [ $input_line_number, $input_line, $pos ];
+                    my $el  = $$rel[0];
+                    my ($ess);
 
-        # angle.t
-        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
-            $op_expected = UNKNOWN;
-        }
+                    if ( $diff == 1 || $diff == -1 ) {
+                        $ess = '';
+                    }
+                    else {
+                        $ess = 's';
+                    }
+                    my $bname =
+                      ( $diff > 0 )
+                      ? $opening_brace_names[$b]
+                      : $closing_brace_names[$b];
+                    write_error_indicator_pair( @$rsl, '^' );
+                    my $msg = <<"EOM";
+Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
+EOM
 
-        # For possible file handle like "$a", Perl uses weird parsing rules.
-        # For example:
-        # print $a/2,"/hi";   - division
-        # print $a / 2,"/hi"; - division
-        # print $a/ 2,"/hi";  - division
-        # print $a /2,"/hi";  - pattern (and error)!
-        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
-            $op_expected = TERM;
+                    if ( $diff > 0 ) {
+                        my $rml =
+                          $starting_line_of_current_depth[$b]
+                          [ $current_depth[$b] ];
+                        my $ml = $$rml[0];
+                        $msg .=
+"    The most recent un-matched $bname is on line $ml\n";
+                        write_error_indicator_pair( @$rml, '^' );
+                    }
+                    write_error_indicator_pair( @$rel, '^' );
+                    warning($msg);
+                    resume_logfile();
+                }
+                increment_brace_error();
+            }
         }
+        $current_depth[$a]--;
+    }
+    else {
 
-        # Note when an operation is being done where a
-        # filehandle might be expected, since a change in whitespace
-        # could change the interpretation of the statement.
-        else {
-            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
-                complain("operator in print statement not recommended\n");
-                $op_expected = OPERATOR;
-            }
+        my $saw_brace_error = get_saw_brace_error();
+        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
+            my $msg = <<"EOM";
+There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
+EOM
+            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
         }
+        increment_brace_error();
     }
+    return $seqno;
+}
 
-    # handle something after 'do' and 'eval'
-    elsif ( $is_block_operator{$last_nonblank_token} ) {
+sub check_final_nesting_depths {
+    my ($a);
 
-        # something like $a = eval "expression";
-        #                          ^
-        if ( $last_nonblank_type eq 'k' ) {
-            $op_expected = TERM;    # expression or list mode following keyword
-        }
+    # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
 
-        # something like $a = do { BLOCK } / 2;
-        #                                  ^
-        else {
-            $op_expected = OPERATOR;    # block mode following }
+    for $a ( 0 .. $#closing_brace_names ) {
+
+        if ( $current_depth[$a] ) {
+            my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+            my $sl  = $$rsl[0];
+            my $msg = <<"EOM";
+Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
+The most recent un-matched $opening_brace_names[$a] is on line $sl
+EOM
+            indicate_error( $msg, @$rsl, '^' );
+            increment_brace_error();
         }
     }
+}
 
-    # handle bare word..
-    elsif ( $last_nonblank_type eq 'w' ) {
+#########i#############################################################
+# Tokenizer routines for looking ahead in input stream
+#######################################################################
 
-        # unfortunately, we can't tell what type of token to expect next
-        # after most bare words
-        $op_expected = UNKNOWN;
+sub peek_ahead_for_n_nonblank_pre_tokens {
+
+    # returns next n pretokens if they exist
+    # returns undef's if hits eof without seeing any pretokens
+    # USES GLOBAL VARIABLES: $tokenizer_self
+    my $max_pretokens = shift;
+    my $line;
+    my $i = 0;
+    my ( $rpre_tokens, $rmap, $rpre_types );
+
+    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
+    {
+        $line =~ s/^\s*//;    # trim leading blanks
+        next if ( length($line) <= 0 );    # skip blank
+        next if ( $line =~ /^#/ );         # skip comment
+        ( $rpre_tokens, $rmap, $rpre_types ) =
+          pre_tokenize( $line, $max_pretokens );
+        last;
     }
+    return ( $rpre_tokens, $rpre_types );
+}
 
-    # operator, but not term possible after these types
-    # Note: moved ')' from type to token because parens in list context
-    # get marked as '{' '}' now.  This is a minor glitch in the following:
-    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
-    #
-    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
-        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
+# look ahead for next non-blank, non-comment line of code
+sub peek_ahead_for_nonblank_token {
+
+    # USES GLOBAL VARIABLES: $tokenizer_self
+    my ( $rtokens, $max_token_index ) = @_;
+    my $line;
+    my $i = 0;
+
+    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
     {
-        $op_expected = OPERATOR;
+        $line =~ s/^\s*//;    # trim leading blanks
+        next if ( length($line) <= 0 );    # skip blank
+        next if ( $line =~ /^#/ );         # skip comment
+        my ( $rtok, $rmap, $rtype ) =
+          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
+        my $j = $max_token_index + 1;
+        my $tok;
 
-        # in a 'use' statement, numbers and v-strings are not true
-        # numbers, so to avoid incorrect error messages, we will
-        # mark them as unknown for now (use.t)
-        # TODO: it would be much nicer to create a new token V for VERSION
-        # number in a use statement.  Then this could be a check on type V
-        # and related patches which change $statement_type for '=>'
-        # and ',' could be removed.  Further, it would clean things up to
-        # scan the 'use' statement with a separate subroutine.
-        if (   ( $statement_type eq 'use' )
-            && ( $last_nonblank_type =~ /^[nv]$/ ) )
-        {
-            $op_expected = UNKNOWN;
+        foreach $tok (@$rtok) {
+            last if ( $tok =~ "\n" );
+            $$rtokens[ ++$j ] = $tok;
         }
+        last;
     }
+    return $rtokens;
+}
 
-    # no operator after many keywords, such as "die", "warn", etc
-    elsif ( $expecting_term_token{$last_nonblank_token} ) {
+#########i#############################################################
+# Tokenizer guessing routines for ambiguous situations
+#######################################################################
 
-        # patch for dor.t (defined or).
-        # perl functions which may be unary operators
-        # TODO: This list is incomplete, and these should be put
-        # into a hash.
-        if (   $tok eq '/'
-            && $next_type          eq '/'
-            && $last_nonblank_type eq 'k'
-            && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
-        {
-            $op_expected = OPERATOR;
+sub guess_if_pattern_or_conditional {
+
+    # this routine is called when we have encountered a ? following an
+    # unknown bareword, and we must decide if it starts a pattern or not
+    # input parameters:
+    #   $i - token index of the ? starting possible pattern
+    # output parameters:
+    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
+    #   msg = a warning or diagnostic message
+    # USES GLOBAL VARIABLES: $last_nonblank_token
+    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+    my $is_pattern = 0;
+    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
+
+    if ( $i >= $max_token_index ) {
+        $msg .= "conditional (no end to pattern found on the line)\n";
+    }
+    else {
+        my $ibeg = $i;
+        $i = $ibeg + 1;
+        my $next_token = $$rtokens[$i];    # first token after ?
+
+        # look for a possible ending ? on this line..
+        my $in_quote        = 1;
+        my $quote_depth     = 0;
+        my $quote_character = '';
+        my $quote_pos       = 0;
+        my $quoted_string;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
+
+        if ($in_quote) {
+
+            # we didn't find an ending ? on this line,
+            # so we bias towards conditional
+            $is_pattern = 0;
+            $msg .= "conditional (no ending ? on this line)\n";
+
+            # we found an ending ?, so we bias towards a pattern
         }
         else {
-            $op_expected = TERM;
+
+            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
+                $is_pattern = 1;
+                $msg .= "pattern (found ending ? and pattern expected)\n";
+            }
+            else {
+                $msg .= "pattern (uncertain, but found ending ?)\n";
+            }
         }
     }
+    return ( $is_pattern, $msg );
+}
 
-    # no operator after things like + - **  (i.e., other operators)
-    elsif ( $expecting_term_types{$last_nonblank_type} ) {
-        $op_expected = TERM;
-    }
+sub guess_if_pattern_or_division {
 
-    # a few operators, like "time", have an empty prototype () and so
-    # take no parameters but produce a value to operate on
-    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
-        $op_expected = OPERATOR;
-    }
+    # this routine is called when we have encountered a / following an
+    # unknown bareword, and we must decide if it starts a pattern or is a
+    # division
+    # input parameters:
+    #   $i - token index of the / starting possible pattern
+    # output parameters:
+    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
+    #   msg = a warning or diagnostic message
+    # USES GLOBAL VARIABLES: $last_nonblank_token
+    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+    my $is_pattern = 0;
+    my $msg        = "guessing that / after $last_nonblank_token starts a ";
 
-    # post-increment and decrement produce values to be operated on
-    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
-        $op_expected = OPERATOR;
+    if ( $i >= $max_token_index ) {
+        "division (no end to pattern found on the line)\n";
     }
+    else {
+        my $ibeg = $i;
+        my $divide_expected =
+          numerator_expected( $i, $rtokens, $max_token_index );
+        $i = $ibeg + 1;
+        my $next_token = $$rtokens[$i];    # first token after slash
 
-    # no value to operate on after sub block
-    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
+        # look for a possible ending / on this line..
+        my $in_quote        = 1;
+        my $quote_depth     = 0;
+        my $quote_character = '';
+        my $quote_pos       = 0;
+        my $quoted_string;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
 
-    # a right brace here indicates the end of a simple block.
-    # all non-structural right braces have type 'R'
-    # all braces associated with block operator keywords have been given those
-    # keywords as "last_nonblank_token" and caught above.
-    # (This statement is order dependent, and must come after checking
-    # $last_nonblank_token).
-    elsif ( $last_nonblank_type eq '}' ) {
+        if ($in_quote) {
+
+            # we didn't find an ending / on this line,
+            # so we bias towards division
+            if ( $divide_expected >= 0 ) {
+                $is_pattern = 0;
+                $msg .= "division (no ending / on this line)\n";
+            }
+            else {
+                $msg        = "multi-line pattern (division not possible)\n";
+                $is_pattern = 1;
+            }
 
-        # patch for dor.t (defined or).
-        if (   $tok eq '/'
-            && $next_type           eq '/'
-            && $last_nonblank_token eq ']' )
-        {
-            $op_expected = OPERATOR;
         }
+
+        # we found an ending /, so we bias towards a pattern
         else {
-            $op_expected = TERM;
-        }
-    }
 
-    # something else..what did I forget?
-    else {
+            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
 
-        # collecting diagnostics on unknown operator types..see what was missed
-        $op_expected = UNKNOWN;
-        write_diagnostics(
-"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
-        );
-    }
+                if ( $divide_expected >= 0 ) {
 
-    TOKENIZER_DEBUG_FLAG_EXPECT && do {
-        print
-"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
-    };
-    return $op_expected;
+                    if ( $i - $ibeg > 60 ) {
+                        $msg .= "division (matching / too distant)\n";
+                        $is_pattern = 0;
+                    }
+                    else {
+                        $msg .= "pattern (but division possible too)\n";
+                        $is_pattern = 1;
+                    }
+                }
+                else {
+                    $is_pattern = 1;
+                    $msg .= "pattern (division not possible)\n";
+                }
+            }
+            else {
+
+                if ( $divide_expected >= 0 ) {
+                    $is_pattern = 0;
+                    $msg .= "division (pattern not possible)\n";
+                }
+                else {
+                    $is_pattern = 1;
+                    $msg .=
+                      "pattern (uncertain, but division would not work here)\n";
+                }
+            }
+        }
+    }
+    return ( $is_pattern, $msg );
 }
 
-# The following routines keep track of nesting depths of the nesting
-# types, ( [ { and ?.  This is necessary for determining the indentation
-# level, and also for debugging programs.  Not only do they keep track of
-# nesting depths of the individual brace types, but they check that each
-# of the other brace types is balanced within matching pairs.  For
-# example, if the program sees this sequence:
-#
-#         {  ( ( ) }
-#
-# then it can determine that there is an extra left paren somewhere
-# between the { and the }.  And so on with every other possible
-# combination of outer and inner brace types.  For another
-# example:
-#
-#         ( [ ..... ]  ] )
-#
-# which has an extra ] within the parens.
-#
-# The brace types have indexes 0 .. 3 which are indexes into
-# the matrices.
-#
-# The pair ? : are treated as just another nesting type, with ? acting
-# as the opening brace and : acting as the closing brace.
-#
-# The matrix
-#
-#         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
-#
-# saves the nesting depth of brace type $b (where $b is either of the other
-# nesting types) when brace type $a enters a new depth.  When this depth
-# decreases, a check is made that the current depth of brace types $b is
-# unchanged, or otherwise there must have been an error.  This can
-# be very useful for localizing errors, particularly when perl runs to
-# the end of a large file (such as this one) and announces that there
-# is a problem somewhere.
-#
-# A numerical sequence number is maintained for every nesting type,
-# so that each matching pair can be uniquely identified in a simple
-# way.
+# try to resolve here-doc vs. shift by looking ahead for
+# non-code or the end token (currently only looks for end token)
+# returns 1 if it is probably a here doc, 0 if not
+sub guess_if_here_doc {
 
-sub increase_nesting_depth {
-    my ( $a, $i_tok ) = @_;
-    my $b;
-    $current_depth[$a]++;
+    # This is how many lines we will search for a target as part of the
+    # guessing strategy.  It is a constant because there is probably
+    # little reason to change it.
+    # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
+    # %is_constant,
+    use constant HERE_DOC_WINDOW => 40;
 
-    # Sequence numbers increment by number of items.  This keeps
-    # a unique set of numbers but still allows the relative location
-    # of any type to be determined.
-    $nesting_sequence_number[$a] += scalar(@closing_brace_names);
-    my $seqno = $nesting_sequence_number[$a];
-    $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
+    my $next_token        = shift;
+    my $here_doc_expected = 0;
+    my $line;
+    my $k   = 0;
+    my $msg = "checking <<";
 
-    my $pos = $$rpretoken_map[$i_tok];
-    $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
-      [ $input_line_number, $input_line, $pos ];
+    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
+    {
+        chomp $line;
 
-    for $b ( 0 .. $#closing_brace_names ) {
-        next if ( $b == $a );
-        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+        if ( $line =~ /^$next_token$/ ) {
+            $msg .= " -- found target $next_token ahead $k lines\n";
+            $here_doc_expected = 1;    # got it
+            last;
+        }
+        last if ( $k >= HERE_DOC_WINDOW );
     }
-    return $seqno;
-}
-
-sub decrease_nesting_depth {
 
-    my ( $a, $i_tok ) = @_;
-    my $pos = $$rpretoken_map[$i_tok];
-    my $b;
-    my $seqno = 0;
-
-    if ( $current_depth[$a] > 0 ) {
+    unless ($here_doc_expected) {
 
-        $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
+        if ( !defined($line) ) {
+            $here_doc_expected = -1;    # hit eof without seeing target
+            $msg .= " -- must be shift; target $next_token not in file\n";
 
-        # check that any brace types $b contained within are balanced
-        for $b ( 0 .. $#closing_brace_names ) {
-            next if ( $b == $a );
+        }
+        else {                          # still unsure..taking a wild guess
 
-            unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
-                $current_depth[$b] )
-            {
-                my $diff = $current_depth[$b] -
-                  $depth_array[$a][$b][ $current_depth[$a] ];
+            if ( !$is_constant{$current_package}{$next_token} ) {
+                $here_doc_expected = 1;
+                $msg .=
+                  " -- guessing it's a here-doc ($next_token not a constant)\n";
+            }
+            else {
+                $msg .=
+                  " -- guessing it's a shift ($next_token is a constant)\n";
+            }
+        }
+    }
+    write_logfile_entry($msg);
+    return $here_doc_expected;
+}
 
-                # don't whine too many times
-                my $saw_brace_error = get_saw_brace_error();
-                if (
-                    $saw_brace_error <= MAX_NAG_MESSAGES
+#########i#############################################################
+# Tokenizer Routines for scanning identifiers and related items
+#######################################################################
 
-                    # if too many closing types have occured, we probably
-                    # already caught this error
-                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
-                  )
-                {
-                    interrupt_logfile();
-                    my $rsl =
-                      $starting_line_of_current_depth[$a][ $current_depth[$a] ];
-                    my $sl  = $$rsl[0];
-                    my $rel = [ $input_line_number, $input_line, $pos ];
-                    my $el  = $$rel[0];
-                    my ($ess);
+sub scan_bare_identifier_do {
 
-                    if ( $diff == 1 || $diff == -1 ) {
-                        $ess = '';
-                    }
-                    else {
-                        $ess = 's';
-                    }
-                    my $bname =
-                      ( $diff > 0 )
-                      ? $opening_brace_names[$b]
-                      : $closing_brace_names[$b];
-                    write_error_indicator_pair( @$rsl, '^' );
-                    my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
-EOM
+    # this routine is called to scan a token starting with an alphanumeric
+    # variable or package separator, :: or '.
+    # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
+    # $last_nonblank_type,@paren_type, $paren_depth
 
-                    if ( $diff > 0 ) {
-                        my $rml =
-                          $starting_line_of_current_depth[$b]
-                          [ $current_depth[$b] ];
-                        my $ml = $$rml[0];
-                        $msg .=
-"    The most recent un-matched $bname is on line $ml\n";
-                        write_error_indicator_pair( @$rml, '^' );
-                    }
-                    write_error_indicator_pair( @$rel, '^' );
-                    warning($msg);
-                    resume_logfile();
-                }
-                increment_brace_error();
-            }
-        }
-        $current_depth[$a]--;
-    }
-    else {
+    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
+        $max_token_index )
+      = @_;
+    my $i_begin = $i;
+    my $package = undef;
 
-        my $saw_brace_error = get_saw_brace_error();
-        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
-            my $msg = <<"EOM";
-There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
-EOM
-            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
-        }
-        increment_brace_error();
-    }
-    return $seqno;
-}
+    my $i_beg = $i;
 
-sub check_final_nesting_depths {
-    my ($a);
+    # we have to back up one pretoken at a :: since each : is one pretoken
+    if ( $tok eq '::' ) { $i_beg-- }
+    if ( $tok eq '->' ) { $i_beg-- }
+    my $pos_beg = $$rtoken_map[$i_beg];
+    pos($input_line) = $pos_beg;
 
-    for $a ( 0 .. $#closing_brace_names ) {
+    #  Examples:
+    #   A::B::C
+    #   A::
+    #   ::A
+    #   A'B
+    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
 
-        if ( $current_depth[$a] ) {
-            my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
-            my $sl  = $$rsl[0];
-            my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
-The most recent un-matched $opening_brace_names[$a] is on line $sl
-EOM
-            indicate_error( $msg, @$rsl, '^' );
-            increment_brace_error();
-        }
-    }
-}
+        my $pos  = pos($input_line);
+        my $numc = $pos - $pos_beg;
+        $tok = substr( $input_line, $pos_beg, $numc );
 
-sub numerator_expected {
+        # type 'w' includes anything without leading type info
+        # ($,%,@,*) including something like abc::def::ghi
+        $type = 'w';
 
-    # this is a filter for a possible numerator, in support of guessing
-    # for the / pattern delimiter token.
-    # returns -
-    #   1 - yes
-    #   0 - can't tell
-    #  -1 - no
-    # Note: I am using the convention that variables ending in
-    # _expected have these 3 possible values.
-    my ( $i, $rtokens ) = @_;
-    my $next_token = $$rtokens[ $i + 1 ];
-    if ( $next_token eq '=' ) { $i++; }    # handle /=
-    my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens );
+        my $sub_name = "";
+        if ( defined($2) ) { $sub_name = $2; }
+        if ( defined($1) ) {
+            $package = $1;
 
-    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
-        1;
-    }
-    else {
+            # patch: don't allow isolated package name which just ends
+            # in the old style package separator (single quote).  Example:
+            #   use CGI':all';
+            if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
+                $pos--;
+            }
 
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
-            0;
+            $package =~ s/\'/::/g;
+            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+            $package =~ s/::$//;
         }
         else {
-            -1;
+            $package = $current_package;
+
+            if ( $is_keyword{$tok} ) {
+                $type = 'k';
+            }
         }
-    }
-}
 
-sub pattern_expected {
+        # if it is a bareword..
+        if ( $type eq 'w' ) {
 
-    # This is the start of a filter for a possible pattern.
-    # It looks at the token after a possbible pattern and tries to
-    # determine if that token could end a pattern.
-    # returns -
-    #   1 - yes
-    #   0 - can't tell
-    #  -1 - no
-    my ( $i, $rtokens ) = @_;
-    my $next_token = $$rtokens[ $i + 1 ];
-    if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
-    my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens );
+            # check for v-string with leading 'v' type character
+            # (This seems to have presidence over filehandle, type 'Y')
+            if ( $tok =~ /^v\d[_\d]*$/ ) {
 
-    # list of tokens which may follow a pattern
-    # (can probably be expanded)
-    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
-    {
-        1;
-    }
-    else {
+                # we only have the first part - something like 'v101' -
+                # look for more
+                if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
+                    $pos  = pos($input_line);
+                    $numc = $pos - $pos_beg;
+                    $tok  = substr( $input_line, $pos_beg, $numc );
+                }
+                $type = 'v';
 
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
-            0;
-        }
-        else {
-            -1;
-        }
-    }
-}
+                # warn if this version can't handle v-strings
+                report_v_string($tok);
+            }
 
-sub find_next_nonblank_token_on_this_line {
-    my ( $i, $rtokens ) = @_;
-    my $next_nonblank_token;
+            elsif ( $is_constant{$package}{$sub_name} ) {
+                $type = 'C';
+            }
 
-    if ( $i < $max_token_index ) {
-        $next_nonblank_token = $$rtokens[ ++$i ];
+            # bareword after sort has implied empty prototype; for example:
+            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
+            # This has priority over whatever the user has specified.
+            elsif ($last_nonblank_token eq 'sort'
+                && $last_nonblank_type eq 'k' )
+            {
+                $type = 'Z';
+            }
 
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
+            # Note: strangely, perl does not seem to really let you create
+            # functions which act like eval and do, in the sense that eval
+            # and do may have operators following the final }, but any operators
+            # that you create with prototype (&) apparently do not allow
+            # trailing operators, only terms.  This seems strange.
+            # If this ever changes, here is the update
+            # to make perltidy behave accordingly:
 
-            if ( $i < $max_token_index ) {
-                $next_nonblank_token = $$rtokens[ ++$i ];
+            # elsif ( $is_block_function{$package}{$tok} ) {
+            #    $tok='eval'; # patch to do braces like eval  - doesn't work
+            #    $type = 'k';
+            #}
+            # FIXME: This could become a separate type to allow for different
+            # future behavior:
+            elsif ( $is_block_function{$package}{$sub_name} ) {
+                $type = 'G';
             }
-        }
-    }
-    else {
-        $next_nonblank_token = "";
-    }
-    return ( $next_nonblank_token, $i );
-}
-
-sub find_next_nonblank_token {
-    my ( $i, $rtokens ) = @_;
 
-    if ( $i >= $max_token_index ) {
+            elsif ( $is_block_list_function{$package}{$sub_name} ) {
+                $type = 'G';
+            }
+            elsif ( $is_user_function{$package}{$sub_name} ) {
+                $type      = 'U';
+                $prototype = $user_function_prototype{$package}{$sub_name};
+            }
 
-        if ( !$peeked_ahead ) {
-            $peeked_ahead = 1;
-            $rtokens      = peek_ahead_for_nonblank_token($rtokens);
-        }
-    }
-    my $next_nonblank_token = $$rtokens[ ++$i ];
+            # check for indirect object
+            elsif (
 
-    if ( $next_nonblank_token =~ /^\s*$/ ) {
-        $next_nonblank_token = $$rtokens[ ++$i ];
-    }
-    return ( $next_nonblank_token, $i );
-}
+                # added 2001-03-27: must not be followed immediately by '('
+                # see fhandle.t
+                ( $input_line !~ m/\G\(/gc )
 
-sub peek_ahead_for_n_nonblank_pre_tokens {
+                # and
+                && (
 
-    # returns next n pretokens if they exist
-    # returns undef's if hits eof without seeing any pretokens
-    my $max_pretokens = shift;
-    my $line;
-    my $i = 0;
-    my ( $rpre_tokens, $rmap, $rpre_types );
+                    # preceded by keyword like 'print', 'printf' and friends
+                    $is_indirect_object_taker{$last_nonblank_token}
 
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
-    {
-        $line =~ s/^\s*//;    # trim leading blanks
-        next if ( length($line) <= 0 );    # skip blank
-        next if ( $line =~ /^#/ );         # skip comment
-        ( $rpre_tokens, $rmap, $rpre_types ) =
-          pre_tokenize( $line, $max_pretokens );
-        last;
-    }
-    return ( $rpre_tokens, $rpre_types );
-}
+                    # or preceded by something like 'print(' or 'printf('
+                    || (
+                        ( $last_nonblank_token eq '(' )
+                        && $is_indirect_object_taker{ $paren_type[$paren_depth]
+                        }
 
-# look ahead for next non-blank, non-comment line of code
-sub peek_ahead_for_nonblank_token {
-    my $rtokens = shift;
-    my $line;
-    my $i = 0;
+                    )
+                )
+              )
+            {
 
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
-    {
-        $line =~ s/^\s*//;    # trim leading blanks
-        next if ( length($line) <= 0 );    # skip blank
-        next if ( $line =~ /^#/ );         # skip comment
-        my ( $rtok, $rmap, $rtype ) =
-          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
-        my $j = $max_token_index + 1;
-        my $tok;
+                # may not be indirect object unless followed by a space
+                if ( $input_line =~ m/\G\s+/gc ) {
+                    $type = 'Y';
 
-        foreach $tok (@$rtok) {
-            last if ( $tok =~ "\n" );
-            $$rtokens[ ++$j ] = $tok;
-        }
-        last;
-    }
-    return $rtokens;
-}
+                    # Abandon Hope ...
+                    # Perl's indirect object notation is a very bad
+                    # thing and can cause subtle bugs, especially for
+                    # beginning programmers.  And I haven't even been
+                    # able to figure out a sane warning scheme which
+                    # doesn't get in the way of good scripts.
 
-sub pre_tokenize {
+                    # Complain if a filehandle has any lower case
+                    # letters.  This is suggested good practice, but the
+                    # main reason for this warning is that prior to
+                    # release 20010328, perltidy incorrectly parsed a
+                    # function call after a print/printf, with the
+                    # result that a space got added before the opening
+                    # paren, thereby converting the function name to a
+                    # filehandle according to perl's weird rules.  This
+                    # will not usually generate a syntax error, so this
+                    # is a potentially serious bug.  By warning
+                    # of filehandles with any lower case letters,
+                    # followed by opening parens, we will help the user
+                    # find almost all of these older errors.
+                    # use 'sub_name' because something like
+                    # main::MYHANDLE is ok for filehandle
+                    if ( $sub_name =~ /[a-z]/ ) {
 
-    # Break a string, $str, into a sequence of preliminary tokens.  We
-    # are interested in these types of tokens:
-    #   words       (type='w'),            example: 'max_tokens_wanted'
-    #   digits      (type = 'd'),          example: '0755'
-    #   whitespace  (type = 'b'),          example: '   '
-    #   any other single character (i.e. punct; type = the character itself).
-    # We cannot do better than this yet because we might be in a quoted
-    # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
-    # tokens.
-    my ( $str, $max_tokens_wanted ) = @_;
+                        # could be bug caused by older perltidy if
+                        # followed by '('
+                        if ( $input_line =~ m/\G\s*\(/gc ) {
+                            complain(
+"Caution: unknown word '$tok' in indirect object slot\n"
+                            );
+                        }
+                    }
+                }
 
-    # we return references to these 3 arrays:
-    my @tokens    = ();     # array of the tokens themselves
-    my @token_map = (0);    # string position of start of each token
-    my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
+                # bareword not followed by a space -- may not be filehandle
+                # (may be function call defined in a 'use' statement)
+                else {
+                    $type = 'Z';
+                }
+            }
+        }
 
-    do {
+        # Now we must convert back from character position
+        # to pre_token index.
+        # I don't think an error flag can occur here ..but who knows
+        my $error;
+        ( $i, $error ) =
+          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+        if ($error) {
+            warning("scan_bare_identifier: Possibly invalid tokenization\n");
+        }
+    }
 
-        # whitespace
-        if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
+    # no match but line not blank - could be syntax error
+    # perl will take '::' alone without complaint
+    else {
+        $type = 'w';
 
-        # numbers
-        # note that this must come before words!
-        elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
+        # change this warning to log message if it becomes annoying
+        warning("didn't find identifier after leading ::\n");
+    }
+    return ( $i, $tok, $type, $prototype );
+}
 
-        # words
-        elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
+sub scan_id_do {
 
-        # single-character punctuation
-        elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
+# This is the new scanner and will eventually replace scan_identifier.
+# Only type 'sub' and 'package' are implemented.
+# Token types $ * % @ & -> are not yet implemented.
+#
+# Scan identifier following a type token.
+# The type of call depends on $id_scan_state: $id_scan_state = ''
+# for starting call, in which case $tok must be the token defining
+# the type.
+#
+# If the type token is the last nonblank token on the line, a value
+# of $id_scan_state = $tok is returned, indicating that further
+# calls must be made to get the identifier.  If the type token is
+# not the last nonblank token on the line, the identifier is
+# scanned and handled and a value of '' is returned.
+# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
+# $statement_type, $tokenizer_self
+
+    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
+        $max_token_index )
+      = @_;
+    my $type = '';
+    my ( $i_beg, $pos_beg );
 
-        # that's all..
-        else {
-            return ( \@tokens, \@token_map, \@type );
-        }
+    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
+    #my ($a,$b,$c) = caller;
+    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
 
-        push @tokens,    $1;
-        push @token_map, pos($str);
+    # on re-entry, start scanning at first token on the line
+    if ($id_scan_state) {
+        $i_beg = $i;
+        $type  = '';
+    }
 
-    } while ( --$max_tokens_wanted != 0 );
+    # on initial entry, start scanning just after type token
+    else {
+        $i_beg         = $i + 1;
+        $id_scan_state = $tok;
+        $type          = 't';
+    }
 
-    return ( \@tokens, \@token_map, \@type );
-}
-
-sub show_tokens {
-
-    # this is an old debug routine
-    my ( $rtokens, $rtoken_map ) = @_;
-    my $num = scalar(@$rtokens);
-    my $i;
-
-    for ( $i = 0 ; $i < $num ; $i++ ) {
-        my $len = length( $$rtokens[$i] );
-        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
+    # find $i_beg = index of next nonblank token,
+    # and handle empty lines
+    my $blank_line          = 0;
+    my $next_nonblank_token = $$rtokens[$i_beg];
+    if ( $i_beg > $max_token_index ) {
+        $blank_line = 1;
     }
-}
-
-sub find_angle_operator_termination {
-
-    # We are looking at a '<' and want to know if it is an angle operator.
-    # We are to return:
-    #   $i = pretoken index of ending '>' if found, current $i otherwise
-    #   $type = 'Q' if found, '>' otherwise
-    my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
-    my $i    = $i_beg;
-    my $type = '<';
-    pos($input_line) = 1 + $$rtoken_map[$i];
-
-    my $filter;
-
-    # we just have to find the next '>' if a term is expected
-    if ( $expecting == TERM ) { $filter = '[\>]' }
-
-    # we have to guess if we don't know what is expected
-    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
-
-    # shouldn't happen - we shouldn't be here if operator is expected
-    else { warning("Program Bug in find_angle_operator_termination\n") }
-
-    # To illustrate what we might be looking at, in case we are
-    # guessing, here are some examples of valid angle operators
-    # (or file globs):
-    #  <tmp_imp/*>
-    #  <FH>
-    #  <$fh>
-    #  <*.c *.h>
-    #  <_>
-    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
-    #  <${PREFIX}*img*.$IMAGE_TYPE>
-    #  <img*.$IMAGE_TYPE>
-    #  <Timg*.$IMAGE_TYPE>
-    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
-    #
-    # Here are some examples of lines which do not have angle operators:
-    #  return undef unless $self->[2]++ < $#{$self->[1]};
-    #  < 2  || @$t >
-    #
-    # the following line from dlister.pl caused trouble:
-    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
-    #
-    # If the '<' starts an angle operator, it must end on this line and
-    # it must not have certain characters like ';' and '=' in it.  I use
-    # this to limit the testing.  This filter should be improved if
-    # possible.
+    else {
 
-    if ( $input_line =~ /($filter)/g ) {
+        # only a '#' immediately after a '$' is not a comment
+        if ( $next_nonblank_token eq '#' ) {
+            unless ( $tok eq '$' ) {
+                $blank_line = 1;
+            }
+        }
 
-        if ( $1 eq '>' ) {
+        if ( $next_nonblank_token =~ /^\s/ ) {
+            ( $next_nonblank_token, $i_beg ) =
+              find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
+                $max_token_index );
+            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
+                $blank_line = 1;
+            }
+        }
+    }
 
-            # We MAY have found an angle operator termination if we get
-            # here, but we need to do more to be sure we haven't been
-            # fooled.
-            my $pos = pos($input_line);
+    # handle non-blank line; identifier, if any, must follow
+    unless ($blank_line) {
 
-            my $pos_beg = $$rtoken_map[$i];
-            my $str     = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
+        if ( $id_scan_state eq 'sub' ) {
+            ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
+                $input_line, $i,             $i_beg,
+                $tok,        $type,          $rtokens,
+                $rtoken_map, $id_scan_state, $max_token_index
+            );
+        }
 
-            # Reject if the closing '>' follows a '-' as in:
-            # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
-            if ( $expecting eq UNKNOWN ) {
-                my $check = substr( $input_line, $pos - 2, 1 );
-                if ( $check eq '-' ) {
-                    return ( $i, $type );
-                }
-            }
+        elsif ( $id_scan_state eq 'package' ) {
+            ( $i, $tok, $type ) =
+              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
+                $rtoken_map, $max_token_index );
+            $id_scan_state = '';
+        }
 
-            ######################################debug#####
-            #write_diagnostics( "ANGLE? :$str\n");
-            #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
-            ######################################debug#####
-            $type = 'Q';
-            my $error;
-            ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
+        else {
+            warning("invalid token in scan_id: $tok\n");
+            $id_scan_state = '';
+        }
+    }
 
-            # It may be possible that a quote ends midway in a pretoken.
-            # If this happens, it may be necessary to split the pretoken.
-            if ($error) {
-                warning(
-                    "Possible tokinization error..please check this line\n");
-                report_possible_bug();
-            }
+    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
 
-            # Now let's see where we stand....
-            # OK if math op not possible
-            if ( $expecting == TERM ) {
-            }
+        # shouldn't happen:
+        warning(
+"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
+        );
+        report_definite_bug();
+    }
 
-            # OK if there are no more than 2 pre-tokens inside
-            # (not possible to write 2 token math between < and >)
-            # This catches most common cases
-            elsif ( $i <= $i_beg + 3 ) {
-                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
-            }
+    TOKENIZER_DEBUG_FLAG_NSCAN && do {
+        print
+          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
+    };
+    return ( $i, $tok, $type, $id_scan_state );
+}
 
-            # Not sure..
-            else {
+sub check_prototype {
+    my ( $proto, $package, $subname ) = @_;
+    return unless ( defined($package) && defined($subname) );
+    if ( defined($proto) ) {
+        $proto =~ s/^\s*\(\s*//;
+        $proto =~ s/\s*\)$//;
+        if ($proto) {
+            $is_user_function{$package}{$subname}        = 1;
+            $user_function_prototype{$package}{$subname} = "($proto)";
 
-                # Let's try a Brace Test: any braces inside must balance
-                my $br = 0;
-                while ( $str =~ /\{/g ) { $br++ }
-                while ( $str =~ /\}/g ) { $br-- }
-                my $sb = 0;
-                while ( $str =~ /\[/g ) { $sb++ }
-                while ( $str =~ /\]/g ) { $sb-- }
-                my $pr = 0;
-                while ( $str =~ /\(/g ) { $pr++ }
-                while ( $str =~ /\)/g ) { $pr-- }
+            # prototypes containing '&' must be treated specially..
+            if ( $proto =~ /\&/ ) {
 
-                # if braces do not balance - not angle operator
-                if ( $br || $sb || $pr ) {
-                    $i    = $i_beg;
-                    $type = '<';
-                    write_diagnostics(
-                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
+                # right curly braces of prototypes ending in
+                # '&' may be followed by an operator
+                if ( $proto =~ /\&$/ ) {
+                    $is_block_function{$package}{$subname} = 1;
                 }
 
-                # we should keep doing more checks here...to be continued
-                # Tentatively accepting this as a valid angle operator.
-                # There are lots more things that can be checked.
-                else {
-                    write_diagnostics(
-                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
-                    write_logfile_entry("Guessing angle operator here: $str\n");
+                # right curly braces of prototypes NOT ending in
+                # '&' may NOT be followed by an operator
+                elsif ( $proto !~ /\&$/ ) {
+                    $is_block_list_function{$package}{$subname} = 1;
                 }
             }
         }
-
-        # didn't find ending >
         else {
-            if ( $expecting == TERM ) {
-                warning("No ending > for angle operator\n");
-            }
+            $is_constant{$package}{$subname} = 1;
         }
     }
-    return ( $i, $type );
+    else {
+        $is_user_function{$package}{$subname} = 1;
+    }
 }
 
-sub inverse_pretoken_map {
+sub do_scan_package {
 
-    # Starting with the current pre_token index $i, scan forward until
-    # finding the index of the next pre_token whose position is $pos.
-    my ( $i, $pos, $rtoken_map ) = @_;
-    my $error = 0;
+    # do_scan_package parses a package name
+    # it is called with $i_beg equal to the index of the first nonblank
+    # token following a 'package' token.
+    # USES GLOBAL VARIABLES: $current_package,
 
-    while ( ++$i <= $max_token_index ) {
+    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
+        $max_token_index )
+      = @_;
+    my $package = undef;
+    my $pos_beg = $$rtoken_map[$i_beg];
+    pos($input_line) = $pos_beg;
 
-        if ( $pos <= $$rtoken_map[$i] ) {
+    # handle non-blank line; package name, if any, must follow
+    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
+        $package = $1;
+        $package = ( defined($1) && $1 ) ? $1 : 'main';
+        $package =~ s/\'/::/g;
+        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+        $package =~ s/::$//;
+        my $pos  = pos($input_line);
+        my $numc = $pos - $pos_beg;
+        $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
+        $type = 'i';
 
-            # Let the calling routine handle errors in which we do not
-            # land on a pre-token boundary.  It can happen by running
-            # perltidy on some non-perl scripts, for example.
-            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
-            $i--;
-            last;
+        # Now we must convert back from character position
+        # to pre_token index.
+        # I don't think an error flag can occur here ..but ?
+        my $error;
+        ( $i, $error ) =
+          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+        if ($error) { warning("Possibly invalid package\n") }
+        $current_package = $package;
+
+        # check for error
+        my ( $next_nonblank_token, $i_next ) =
+          find_next_nonblank_token( $i, $rtokens, $max_token_index );
+        if ( $next_nonblank_token !~ /^[;\}]$/ ) {
+            warning(
+                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
+            );
         }
     }
-    return ( $i, $error );
+
+    # no match but line not blank --
+    # could be a label with name package, like package:  , for example.
+    else {
+        $type = 'k';
+    }
+
+    return ( $i, $tok, $type );
 }
 
-sub guess_if_pattern_or_conditional {
+sub scan_identifier_do {
 
-    # this routine is called when we have encountered a ? following an
-    # unknown bareword, and we must decide if it starts a pattern or not
-    # input parameters:
-    #   $i - token index of the ? starting possible pattern
-    # output parameters:
-    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
-    #   msg = a warning or diagnostic message
-    my ( $i, $rtokens, $rtoken_map ) = @_;
-    my $is_pattern = 0;
-    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
+    # This routine assembles tokens into identifiers.  It maintains a
+    # scan state, id_scan_state.  It updates id_scan_state based upon
+    # current id_scan_state and token, and returns an updated
+    # id_scan_state and the next index after the identifier.
+    # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
+    # $last_nonblank_type
 
-    if ( $i >= $max_token_index ) {
-        $msg .= "conditional (no end to pattern found on the line)\n";
-    }
-    else {
-        my $ibeg = $i;
-        $i = $ibeg + 1;
-        my $next_token = $$rtokens[$i];    # first token after ?
+    my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
+    my $i_begin   = $i;
+    my $type      = '';
+    my $tok_begin = $$rtokens[$i_begin];
+    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
+    my $id_scan_state_begin = $id_scan_state;
+    my $identifier_begin    = $identifier;
+    my $tok                 = $tok_begin;
+    my $message             = "";
 
-        # look for a possible ending ? on this line..
-        my $in_quote        = 1;
-        my $quote_depth     = 0;
-        my $quote_character = '';
-        my $quote_pos       = 0;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
+    # these flags will be used to help figure out the type:
+    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
+    my $saw_type;
 
-        if ($in_quote) {
+    # allow old package separator (') except in 'use' statement
+    my $allow_tick = ( $last_nonblank_token ne 'use' );
 
-            # we didn't find an ending ? on this line,
-            # so we bias towards conditional
-            $is_pattern = 0;
-            $msg .= "conditional (no ending ? on this line)\n";
+    # get started by defining a type and a state if necessary
+    unless ($id_scan_state) {
+        $context = UNKNOWN_CONTEXT;
 
-            # we found an ending ?, so we bias towards a pattern
+        # fixup for digraph
+        if ( $tok eq '>' ) {
+            $tok       = '->';
+            $tok_begin = $tok;
+        }
+        $identifier = $tok;
+
+        if ( $tok eq '$' || $tok eq '*' ) {
+            $id_scan_state = '$';
+            $context       = SCALAR_CONTEXT;
+        }
+        elsif ( $tok eq '%' || $tok eq '@' ) {
+            $id_scan_state = '$';
+            $context       = LIST_CONTEXT;
+        }
+        elsif ( $tok eq '&' ) {
+            $id_scan_state = '&';
+        }
+        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
+            $saw_alpha     = 0;     # 'sub' is considered type info here
+            $id_scan_state = '$';
+            $identifier .= ' ';     # need a space to separate sub from sub name
+        }
+        elsif ( $tok eq '::' ) {
+            $id_scan_state = 'A';
+        }
+        elsif ( $tok =~ /^[A-Za-z_]/ ) {
+            $id_scan_state = ':';
+        }
+        elsif ( $tok eq '->' ) {
+            $id_scan_state = '$';
         }
         else {
 
-            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
-                $is_pattern = 1;
-                $msg .= "pattern (found ending ? and pattern expected)\n";
-            }
-            else {
-                $msg .= "pattern (uncertain, but found ending ?)\n";
-            }
+            # shouldn't happen
+            my ( $a, $b, $c ) = caller;
+            warning("Program Bug: scan_identifier given bad token = $tok \n");
+            warning("   called from sub $a  line: $c\n");
+            report_definite_bug();
         }
+        $saw_type = !$saw_alpha;
+    }
+    else {
+        $i--;
+        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
     }
-    return ( $is_pattern, $msg );
-}
 
-sub guess_if_pattern_or_division {
+    # now loop to gather the identifier
+    my $i_save = $i;
 
-    # this routine is called when we have encountered a / following an
-    # unknown bareword, and we must decide if it starts a pattern or is a
-    # division
-    # input parameters:
-    #   $i - token index of the / starting possible pattern
-    # output parameters:
-    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
-    #   msg = a warning or diagnostic message
-    my ( $i, $rtokens, $rtoken_map ) = @_;
-    my $is_pattern = 0;
-    my $msg        = "guessing that / after $last_nonblank_token starts a ";
+    while ( $i < $max_token_index ) {
+        $i_save = $i unless ( $tok =~ /^\s*$/ );
+        $tok = $$rtokens[ ++$i ];
 
-    if ( $i >= $max_token_index ) {
-        "division (no end to pattern found on the line)\n";
-    }
-    else {
-        my $ibeg = $i;
-        my $divide_expected = numerator_expected( $i, $rtokens );
-        $i = $ibeg + 1;
-        my $next_token = $$rtokens[$i];    # first token after slash
+        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
+            $tok = '::';
+            $i++;
+        }
 
-        # look for a possible ending / on this line..
-        my $in_quote        = 1;
-        my $quote_depth     = 0;
-        my $quote_character = '';
-        my $quote_pos       = 0;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
+        if ( $id_scan_state eq '$' ) {    # starting variable name
 
-        if ($in_quote) {
+            if ( $tok eq '$' ) {
 
-            # we didn't find an ending / on this line,
-            # so we bias towards division
-            if ( $divide_expected >= 0 ) {
-                $is_pattern = 0;
-                $msg .= "division (no ending / on this line)\n";
+                $identifier .= $tok;
+
+                # we've got a punctuation variable if end of line (punct.t)
+                if ( $i == $max_token_index ) {
+                    $type          = 'i';
+                    $id_scan_state = '';
+                    last;
+                }
             }
-            else {
-                $msg        = "multi-line pattern (division not possible)\n";
-                $is_pattern = 1;
+            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
+                $saw_alpha     = 1;
+                $id_scan_state = ':';           # now need ::
+                $identifier .= $tok;
             }
+            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
+                $saw_alpha     = 1;
+                $id_scan_state = ':';                 # now need ::
+                $identifier .= $tok;
 
-        }
+                # Perl will accept leading digits in identifiers,
+                # although they may not always produce useful results.
+                # Something like $main::0 is ok.  But this also works:
+                #
+                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
+                #  howdy::123::bubba();
+                #
+            }
+            elsif ( $tok =~ /^[0-9]/ ) {              # numeric
+                $saw_alpha     = 1;
+                $id_scan_state = ':';                 # now need ::
+                $identifier .= $tok;
+            }
+            elsif ( $tok eq '::' ) {
+                $id_scan_state = 'A';
+                $identifier .= $tok;
+            }
+            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
+                $identifier .= $tok;    # keep same state, a $ could follow
+            }
+            elsif ( $tok eq '{' ) {
 
-        # we found an ending /, so we bias towards a pattern
-        else {
+                # check for something like ${#} or ${©}
+                if (   $identifier eq '$'
+                    && $i + 2 <= $max_token_index
+                    && $$rtokens[ $i + 2 ] eq '}'
+                    && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
+                {
+                    my $next2 = $$rtokens[ $i + 2 ];
+                    my $next1 = $$rtokens[ $i + 1 ];
+                    $identifier .= $tok . $next1 . $next2;
+                    $i += 2;
+                    $id_scan_state = '';
+                    last;
+                }
+
+                # skip something like ${xxx} or ->{
+                $id_scan_state = '';
 
-            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
+                # if this is the first token of a line, any tokens for this
+                # identifier have already been accumulated
+                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
+                $i = $i_save;
+                last;
+            }
 
-                if ( $divide_expected >= 0 ) {
+            # space ok after leading $ % * & @
+            elsif ( $tok =~ /^\s*$/ ) {
 
-                    if ( $i - $ibeg > 60 ) {
-                        $msg .= "division (matching / too distant)\n";
-                        $is_pattern = 0;
+                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
+
+                    if ( length($identifier) > 1 ) {
+                        $id_scan_state = '';
+                        $i             = $i_save;
+                        $type          = 'i';    # probably punctuation variable
+                        last;
                     }
                     else {
-                        $msg .= "pattern (but division possible too)\n";
-                        $is_pattern = 1;
+
+                        # spaces after $'s are common, and space after @
+                        # is harmless, so only complain about space
+                        # after other type characters. Space after $ and
+                        # @ will be removed in formatting.  Report space
+                        # after % and * because they might indicate a
+                        # parsing error.  In other words '% ' might be a
+                        # modulo operator.  Delete this warning if it
+                        # gets annoying.
+                        if ( $identifier !~ /^[\@\$]$/ ) {
+                            $message =
+                              "Space in identifier, following $identifier\n";
+                        }
                     }
                 }
-                else {
-                    $is_pattern = 1;
-                    $msg .= "pattern (division not possible)\n";
-                }
+
+                # else:
+                # space after '->' is ok
             }
-            else {
+            elsif ( $tok eq '^' ) {
 
-                if ( $divide_expected >= 0 ) {
-                    $is_pattern = 0;
-                    $msg .= "division (pattern not possible)\n";
+                # check for some special variables like $^W
+                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                    $identifier .= $tok;
+                    $id_scan_state = 'A';
+
+                    # Perl accepts '$^]' or '@^]', but
+                    # there must not be a space before the ']'.
+                    my $next1 = $$rtokens[ $i + 1 ];
+                    if ( $next1 eq ']' ) {
+                        $i++;
+                        $identifier .= $next1;
+                        $id_scan_state = "";
+                        last;
+                    }
                 }
                 else {
-                    $is_pattern = 1;
-                    $msg .=
-                      "pattern (uncertain, but division would not work here)\n";
+                    $id_scan_state = '';
                 }
             }
-        }
-    }
-    return ( $is_pattern, $msg );
-}
-
-sub find_here_doc {
-
-    # find the target of a here document, if any
-    # input parameters:
-    #   $i - token index of the second < of <<
-    #   ($i must be less than the last token index if this is called)
-    # output parameters:
-    #   $found_target = 0 didn't find target; =1 found target
-    #   HERE_TARGET - the target string (may be empty string)
-    #   $i - unchanged if not here doc,
-    #    or index of the last token of the here target
-    my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
-    my $ibeg                 = $i;
-    my $found_target         = 0;
-    my $here_doc_target      = '';
-    my $here_quote_character = '';
-    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
-    $next_token = $$rtokens[ $i + 1 ];
-
-    # perl allows a backslash before the target string (heredoc.t)
-    my $backslash = 0;
-    if ( $next_token eq '\\' ) {
-        $backslash  = 1;
-        $next_token = $$rtokens[ $i + 2 ];
-    }
-
-    ( $next_nonblank_token, $i_next_nonblank ) =
-      find_next_nonblank_token_on_this_line( $i, $rtokens );
-
-    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
-
-        my $in_quote    = 1;
-        my $quote_depth = 0;
-        my $quote_pos   = 0;
-
-        ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
-            $here_quote_character, $quote_pos, $quote_depth );
-
-        if ($in_quote) {    # didn't find end of quote, so no target found
-            $i = $ibeg;
-        }
-        else {              # found ending quote
-            my $j;
-            $found_target = 1;
-
-            my $tokj;
-            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
-                $tokj = $$rtokens[$j];
-
-                # we have to remove any backslash before the quote character
-                # so that the here-doc-target exactly matches this string
-                next
-                  if ( $tokj eq "\\"
-                    && $j < $i - 1
-                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
-                $here_doc_target .= $tokj;
-            }
-        }
-    }
-
-    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
-        $found_target = 1;
-        write_logfile_entry(
-            "found blank here-target after <<; suggest using \"\"\n");
-        $i = $ibeg;
-    }
-    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
-
-        my $here_doc_expected;
-        if ( $expecting == UNKNOWN ) {
-            $here_doc_expected = guess_if_here_doc($next_token);
-        }
-        else {
-            $here_doc_expected = 1;
-        }
-
-        if ($here_doc_expected) {
-            $found_target    = 1;
-            $here_doc_target = $next_token;
-            $i               = $ibeg + 1;
-        }
-
-    }
-    else {
-
-        if ( $expecting == TERM ) {
-            $found_target = 1;
-            write_logfile_entry("Note: bare here-doc operator <<\n");
-        }
-        else {
-            $i = $ibeg;
-        }
-    }
-
-    # patch to neglect any prepended backslash
-    if ( $found_target && $backslash ) { $i++ }
-
-    return ( $found_target, $here_doc_target, $here_quote_character, $i );
-}
-
-# try to resolve here-doc vs. shift by looking ahead for
-# non-code or the end token (currently only looks for end token)
-# returns 1 if it is probably a here doc, 0 if not
-sub guess_if_here_doc {
-
-    # This is how many lines we will search for a target as part of the
-    # guessing strategy.  It is a constant because there is probably
-    # little reason to change it.
-    use constant HERE_DOC_WINDOW => 40;
-
-    my $next_token        = shift;
-    my $here_doc_expected = 0;
-    my $line;
-    my $k   = 0;
-    my $msg = "checking <<";
-
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
-    {
-        chomp $line;
-
-        if ( $line =~ /^$next_token$/ ) {
-            $msg .= " -- found target $next_token ahead $k lines\n";
-            $here_doc_expected = 1;    # got it
-            last;
-        }
-        last if ( $k >= HERE_DOC_WINDOW );
-    }
-
-    unless ($here_doc_expected) {
-
-        if ( !defined($line) ) {
-            $here_doc_expected = -1;    # hit eof without seeing target
-            $msg .= " -- must be shift; target $next_token not in file\n";
-
-        }
-        else {                          # still unsure..taking a wild guess
-
-            if ( !$is_constant{$current_package}{$next_token} ) {
-                $here_doc_expected = 1;
-                $msg .=
-                  " -- guessing it's a here-doc ($next_token not a constant)\n";
-            }
-            else {
-                $msg .=
-                  " -- guessing it's a shift ($next_token is a constant)\n";
-            }
-        }
-    }
-    write_logfile_entry($msg);
-    return $here_doc_expected;
-}
-
-sub do_quote {
-
-    # follow (or continue following) quoted string or pattern
-    # $in_quote return code:
-    #   0 - ok, found end
-    #   1 - still must find end of quote whose target is $quote_character
-    #   2 - still looking for end of first of two quotes
-    my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
-        $rtoken_map )
-      = @_;
-
-    if ( $in_quote == 2 ) {    # two quotes/patterns to follow
-        my $ibeg = $i;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
-
-        if ( $in_quote == 1 ) {
-            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
-            $quote_character = '';
-        }
-    }
-
-    if ( $in_quote == 1 ) {    # one (more) quote to follow
-        my $ibeg = $i;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
-    }
-    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
-}
-
-sub scan_number_do {
-
-    #  scan a number in any of the formats that Perl accepts
-    #  Underbars (_) are allowed in decimal numbers.
-    #  input parameters -
-    #      $input_line  - the string to scan
-    #      $i           - pre_token index to start scanning
-    #    $rtoken_map    - reference to the pre_token map giving starting
-    #                    character position in $input_line of token $i
-    #  output parameters -
-    #    $i            - last pre_token index of the number just scanned
-    #    number        - the number (characters); or undef if not a number
-
-    my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
-    my $pos_beg = $$rtoken_map[$i];
-    my $pos;
-    my $i_begin = $i;
-    my $number  = undef;
-    my $type    = $input_type;
-
-    my $first_char = substr( $input_line, $pos_beg, 1 );
-
-    # Look for bad starting characters; Shouldn't happen..
-    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
-        warning("Program bug - scan_number given character $first_char\n");
-        report_definite_bug();
-        return ( $i, $type, $number );
-    }
-
-    # handle v-string without leading 'v' character ('Two Dot' rule)
-    # (vstring.t)
-    # TODO: v-strings may contain underscores
-    pos($input_line) = $pos_beg;
-    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
-        $pos = pos($input_line);
-        my $numc = $pos - $pos_beg;
-        $number = substr( $input_line, $pos_beg, $numc );
-        $type = 'v';
-        unless ($saw_v_string) { report_v_string($number) }
-    }
-
-    # handle octal, hex, binary
-    if ( !defined($number) ) {
-        pos($input_line) = $pos_beg;
-        if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
-        {
-            $pos = pos($input_line);
-            my $numc = $pos - $pos_beg;
-            $number = substr( $input_line, $pos_beg, $numc );
-            $type = 'n';
-        }
-    }
-
-    # handle decimal
-    if ( !defined($number) ) {
-        pos($input_line) = $pos_beg;
-
-        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
-            $pos = pos($input_line);
-
-            # watch out for things like 0..40 which would give 0. by this;
-            if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
-                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
-            {
-                $pos--;
-            }
-            my $numc = $pos - $pos_beg;
-            $number = substr( $input_line, $pos_beg, $numc );
-            $type = 'n';
-        }
-    }
-
-    # filter out non-numbers like e + - . e2  .e3 +e6
-    # the rule: at least one digit, and any 'e' must be preceded by a digit
-    if (
-        $number !~ /\d/    # no digits
-        || (   $number =~ /^(.*)[eE]/
-            && $1 !~ /\d/ )    # or no digits before the 'e'
-      )
-    {
-        $number = undef;
-        $type   = $input_type;
-        return ( $i, $type, $number );
-    }
-
-    # Found a number; now we must convert back from character position
-    # to pre_token index. An error here implies user syntax error.
-    # An example would be an invalid octal number like '009'.
-    my $error;
-    ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-    if ($error) { warning("Possibly invalid number\n") }
-
-    return ( $i, $type, $number );
-}
-
-sub scan_bare_identifier_do {
-
-    # this routine is called to scan a token starting with an alphanumeric
-    # variable or package separator, :: or '.
-
-    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
-    my $i_begin = $i;
-    my $package = undef;
-
-    my $i_beg = $i;
-
-    # we have to back up one pretoken at a :: since each : is one pretoken
-    if ( $tok eq '::' ) { $i_beg-- }
-    if ( $tok eq '->' ) { $i_beg-- }
-    my $pos_beg = $$rtoken_map[$i_beg];
-    pos($input_line) = $pos_beg;
-
-    #  Examples:
-    #   A::B::C
-    #   A::
-    #   ::A
-    #   A'B
-    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
-
-        my $pos  = pos($input_line);
-        my $numc = $pos - $pos_beg;
-        $tok = substr( $input_line, $pos_beg, $numc );
-
-        # type 'w' includes anything without leading type info
-        # ($,%,@,*) including something like abc::def::ghi
-        $type = 'w';
-
-        my $sub_name = "";
-        if ( defined($2) ) { $sub_name = $2; }
-        if ( defined($1) ) {
-            $package = $1;
+            else {    # something else
 
-            # patch: don't allow isolated package name which just ends
-            # in the old style package separator (single quote).  Example:
-            #   use CGI':all';
-            if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
-                $pos--;
-            }
+                # check for various punctuation variables
+                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                    $identifier .= $tok;
+                }
 
-            $package =~ s/\'/::/g;
-            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
-            $package =~ s/::$//;
-        }
-        else {
-            $package = $current_package;
+                elsif ( $identifier eq '$#' ) {
 
-            if ( $is_keyword{$tok} ) {
-                $type = 'k';
-            }
-        }
+                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
 
-        # if it is a bareword..
-        if ( $type eq 'w' ) {
+                    # perl seems to allow just these: $#: $#- $#+
+                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
+                        $type = 'i';
+                        $identifier .= $tok;
+                    }
+                    else {
+                        $i = $i_save;
+                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
+                    }
+                }
+                elsif ( $identifier eq '$$' ) {
 
-            # check for v-string with leading 'v' type character
-            # (This seems to have presidence over filehandle, type 'Y')
-            if ( $tok =~ /^v\d[_\d]*$/ ) {
+                    # perl does not allow references to punctuation
+                    # variables without braces.  For example, this
+                    # won't work:
+                    #  $:=\4;
+                    #  $a = $$:;
+                    # You would have to use
+                    #  $a = ${$:};
 
-                # we only have the first part - something like 'v101' -
-                # look for more
-                if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
-                    $pos  = pos($input_line);
-                    $numc = $pos - $pos_beg;
-                    $tok  = substr( $input_line, $pos_beg, $numc );
+                    $i = $i_save;
+                    if   ( $tok eq '{' ) { $type = 't' }
+                    else                 { $type = 'i' }
                 }
-                $type = 'v';
-
-                # warn if this version can't handle v-strings
-                unless ($saw_v_string) { report_v_string($tok) }
+                elsif ( $identifier eq '->' ) {
+                    $i = $i_save;
+                }
+                else {
+                    $i = $i_save;
+                    if ( length($identifier) == 1 ) { $identifier = ''; }
+                }
+                $id_scan_state = '';
+                last;
             }
+        }
+        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
 
-            elsif ( $is_constant{$package}{$sub_name} ) {
-                $type = 'C';
+            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
+                $id_scan_state = ':';          # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
             }
-
-            # bareword after sort has implied empty prototype; for example:
-            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
-            # This has priority over whatever the user has specified.
-            elsif ($last_nonblank_token eq 'sort'
-                && $last_nonblank_type eq 'k' )
-            {
-                $type = 'Z';
+            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
+                $id_scan_state = ':';                 # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
             }
-
-            # Note: strangely, perl does not seem to really let you create
-            # functions which act like eval and do, in the sense that eval
-            # and do may have operators following the final }, but any operators
-            # that you create with prototype (&) apparently do not allow
-            # trailing operators, only terms.  This seems strange.
-            # If this ever changes, here is the update
-            # to make perltidy behave accordingly:
-
-            # elsif ( $is_block_function{$package}{$tok} ) {
-            #    $tok='eval'; # patch to do braces like eval  - doesn't work
-            #    $type = 'k';
-            #}
-            # FIXME: This could become a separate type to allow for different
-            # future behavior:
-            elsif ( $is_block_function{$package}{$sub_name} ) {
-                $type = 'G';
+            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
+                $id_scan_state = ':';       # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
             }
-
-            elsif ( $is_block_list_function{$package}{$sub_name} ) {
-                $type = 'G';
+            elsif ( $tok =~ /^\s*$/ ) {     # allow space
             }
-            elsif ( $is_user_function{$package}{$sub_name} ) {
-                $type      = 'U';
-                $prototype = $user_function_prototype{$package}{$sub_name};
+            elsif ( $tok eq '::' ) {        # leading ::
+                $id_scan_state = 'A';       # accept alpha next
+                $identifier .= $tok;
             }
+            elsif ( $tok eq '{' ) {
+                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
+                $i             = $i_save;
+                $id_scan_state = '';
+                last;
+            }
+            else {
 
-            # check for indirect object
-            elsif (
-
-                # added 2001-03-27: must not be followed immediately by '('
-                # see fhandle.t
-                ( $input_line !~ m/\G\(/gc )
-
-                # and
-                && (
-
-                    # preceded by keyword like 'print', 'printf' and friends
-                    $is_indirect_object_taker{$last_nonblank_token}
-
-                    # or preceded by something like 'print(' or 'printf('
-                    || (
-                        ( $last_nonblank_token eq '(' )
-                        && $is_indirect_object_taker{ $paren_type[$paren_depth]
-                        }
-
-                    )
-                )
-              )
-            {
-
-                # may not be indirect object unless followed by a space
-                if ( $input_line =~ m/\G\s+/gc ) {
-                    $type = 'Y';
+                # punctuation variable?
+                # testfile: cunningham4.pl
+                if ( $identifier eq '&' ) {
+                    $identifier .= $tok;
+                }
+                else {
+                    $identifier = '';
+                    $i          = $i_save;
+                    $type       = '&';
+                }
+                $id_scan_state = '';
+                last;
+            }
+        }
+        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
 
-                    # Abandon Hope ...
-                    # Perl's indirect object notation is a very bad
-                    # thing and can cause subtle bugs, especially for
-                    # beginning programmers.  And I haven't even been
-                    # able to figure out a sane warning scheme which
-                    # doesn't get in the way of good scripts.
+            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
+                $identifier .= $tok;
+                $id_scan_state = ':';        # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok eq "'" && $allow_tick ) {
+                $identifier .= $tok;
+                $id_scan_state = ':';        # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
+                $identifier .= $tok;
+                $id_scan_state = ':';        # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+                $id_scan_state = '(';
+                $identifier .= $tok;
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+                $id_scan_state = ')';
+                $identifier .= $tok;
+            }
+            else {
+                $id_scan_state = '';
+                $i             = $i_save;
+                last;
+            }
+        }
+        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
 
-                    # Complain if a filehandle has any lower case
-                    # letters.  This is suggested good practice, but the
-                    # main reason for this warning is that prior to
-                    # release 20010328, perltidy incorrectly parsed a
-                    # function call after a print/printf, with the
-                    # result that a space got added before the opening
-                    # paren, thereby converting the function name to a
-                    # filehandle according to perl's weird rules.  This
-                    # will not usually generate a syntax error, so this
-                    # is a potentially serious bug.  By warning
-                    # of filehandles with any lower case letters,
-                    # followed by opening parens, we will help the user
-                    # find almost all of these older errors.
-                    # use 'sub_name' because something like
-                    # main::MYHANDLE is ok for filehandle
-                    if ( $sub_name =~ /[a-z]/ ) {
+            if ( $tok eq '::' ) {            # got it
+                $identifier .= $tok;
+                $id_scan_state = 'A';        # now require alpha
+            }
+            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
+                $identifier .= $tok;
+                $id_scan_state = ':';           # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
+                $identifier .= $tok;
+                $id_scan_state = ':';           # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok eq "'" && $allow_tick ) {    # tick
 
-                        # could be bug caused by older perltidy if
-                        # followed by '('
-                        if ( $input_line =~ m/\G\s*\(/gc ) {
-                            complain(
-"Caution: unknown word '$tok' in indirect object slot\n"
-                            );
-                        }
-                    }
+                if ( $is_keyword{$identifier} ) {
+                    $id_scan_state = '';              # that's all
+                    $i             = $i_save;
+                }
+                else {
+                    $identifier .= $tok;
                 }
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+                $id_scan_state = '(';
+                $identifier .= $tok;
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+                $id_scan_state = ')';
+                $identifier .= $tok;
+            }
+            else {
+                $id_scan_state = '';        # that's all
+                $i             = $i_save;
+                last;
+            }
+        }
+        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
 
-                # bareword not followed by a space -- may not be filehandle
-                # (may be function call defined in a 'use' statement)
-                else {
-                    $type = 'Z';
-                }
+            if ( $tok eq '(' ) {             # got it
+                $identifier .= $tok;
+                $id_scan_state = ')';        # now find the end of it
+            }
+            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
+                $identifier .= $tok;
+            }
+            else {
+                $id_scan_state = '';         # that's all - no prototype
+                $i             = $i_save;
+                last;
             }
         }
+        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
 
-        # Now we must convert back from character position
-        # to pre_token index.
-        # I don't think an error flag can occur here ..but who knows
-        my $error;
-        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-        if ($error) {
-            warning("scan_bare_identifier: Possibly invalid tokenization\n");
+            if ( $tok eq ')' ) {             # got it
+                $identifier .= $tok;
+                $id_scan_state = '';         # all done
+                last;
+            }
+            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
+                $identifier .= $tok;
+            }
+            else {    # probable error in script, but keep going
+                warning("Unexpected '$tok' while seeking end of prototype\n");
+                $identifier .= $tok;
+            }
+        }
+        else {        # can get here due to error in initialization
+            $id_scan_state = '';
+            $i             = $i_save;
+            last;
         }
     }
 
-    # no match but line not blank - could be syntax error
-    # perl will take '::' alone without complaint
-    else {
-        $type = 'w';
-
-        # change this warning to log message if it becomes annoying
-        warning("didn't find identifier after leading ::\n");
+    if ( $id_scan_state eq ')' ) {
+        warning("Hit end of line while seeking ) to end prototype\n");
     }
-    return ( $i, $tok, $type, $prototype );
-}
-
-sub scan_id_do {
-
-    # This is the new scanner and will eventually replace scan_identifier.
-    # Only type 'sub' and 'package' are implemented.
-    # Token types $ * % @ & -> are not yet implemented.
-    #
-    # Scan identifier following a type token.
-    # The type of call depends on $id_scan_state: $id_scan_state = ''
-    # for starting call, in which case $tok must be the token defining
-    # the type.
-    #
-    # If the type token is the last nonblank token on the line, a value
-    # of $id_scan_state = $tok is returned, indicating that further
-    # calls must be made to get the identifier.  If the type token is
-    # not the last nonblank token on the line, the identifier is
-    # scanned and handled and a value of '' is returned.
-
-    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
-    my $type = '';
-    my ( $i_beg, $pos_beg );
-
-    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
-    #my ($a,$b,$c) = caller;
-    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
 
-    # on re-entry, start scanning at first token on the line
-    if ($id_scan_state) {
-        $i_beg = $i;
-        $type  = '';
+    # once we enter the actual identifier, it may not extend beyond
+    # the end of the current line
+    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
+        $id_scan_state = '';
     }
+    if ( $i < 0 ) { $i = 0 }
 
-    # on initial entry, start scanning just after type token
-    else {
-        $i_beg         = $i + 1;
-        $id_scan_state = $tok;
-        $type          = 't';
-    }
+    unless ($type) {
 
-    # find $i_beg = index of next nonblank token,
-    # and handle empty lines
-    my $blank_line          = 0;
-    my $next_nonblank_token = $$rtokens[$i_beg];
-    if ( $i_beg > $max_token_index ) {
-        $blank_line = 1;
-    }
-    else {
+        if ($saw_type) {
 
-        # only a '#' immediately after a '$' is not a comment
-        if ( $next_nonblank_token eq '#' ) {
-            unless ( $tok eq '$' ) {
-                $blank_line = 1;
+            if ($saw_alpha) {
+                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
+                    $type = 'w';
+                }
+                else { $type = 'i' }
             }
-        }
-
-        if ( $next_nonblank_token =~ /^\s/ ) {
-            ( $next_nonblank_token, $i_beg ) =
-              find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
-            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
-                $blank_line = 1;
+            elsif ( $identifier eq '->' ) {
+                $type = '->';
             }
-        }
-    }
-
-    # handle non-blank line; identifier, if any, must follow
-    unless ($blank_line) {
+            elsif (
+                ( length($identifier) > 1 )
 
-        if ( $id_scan_state eq 'sub' ) {
-            ( $i, $tok, $type, $id_scan_state ) =
-              do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
-                $rtoken_map, $id_scan_state );
+                # In something like '@$=' we have an identifier '@$'
+                # In something like '$${' we have type '$$' (and only
+                # part of an identifier)
+                && !( $identifier =~ /\$$/ && $tok eq '{' )
+                && ( $identifier !~ /^(sub |package )$/ )
+              )
+            {
+                $type = 'i';
+            }
+            else { $type = 't' }
         }
+        elsif ($saw_alpha) {
 
-        elsif ( $id_scan_state eq 'package' ) {
-            ( $i, $tok, $type ) =
-              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
-                $rtoken_map );
-            $id_scan_state = '';
+            # type 'w' includes anything without leading type info
+            # ($,%,@,*) including something like abc::def::ghi
+            $type = 'w';
         }
-
         else {
-            warning("invalid token in scan_id: $tok\n");
-            $id_scan_state = '';
-        }
+            $type = '';
+        }    # this can happen on a restart
     }
 
-    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
-
-        # shouldn't happen:
-        warning(
-"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
-        );
-        report_definite_bug();
+    if ($identifier) {
+        $tok = $identifier;
+        if ($message) { write_logfile_entry($message) }
+    }
+    else {
+        $tok = $tok_begin;
+        $i   = $i_begin;
     }
 
-    TOKENIZER_DEBUG_FLAG_NSCAN && do {
+    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
+        my ( $a, $b, $c ) = caller;
         print
-          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
+"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
+        print
+"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
     };
-    return ( $i, $tok, $type, $id_scan_state );
+    return ( $i, $tok, $type, $id_scan_state, $identifier );
 }
 
 {
@@ -24080,10 +24790,15 @@ sub scan_id_do {
         # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
         # a name is given if and only if a non-anonymous sub is
         # appropriate.
+        # USES GLOBAL VARS: $current_package, $last_nonblank_token,
+        # $in_attribute_list, %saw_function_definition,
+        # $statement_type
 
-        my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
-            $id_scan_state )
-          = @_;
+        my (
+            $input_line, $i,             $i_beg,
+            $tok,        $type,          $rtokens,
+            $rtoken_map, $id_scan_state, $max_token_index
+        ) = @_;
         $id_scan_state = "";    # normally we get everything in one call
         my $subname = undef;
         my $package = undef;
@@ -24165,12 +24880,15 @@ sub scan_id_do {
 
                 # I don't think an error flag can occur here ..but ?
                 my $error;
-                ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
+                ( $i, $error ) =
+                  inverse_pretoken_map( $i, $pos, $rtoken_map,
+                    $max_token_index );
                 if ($error) { warning("Possibly invalid sub\n") }
 
                 # check for multiple definitions of a sub
                 ( $next_nonblank_token, my $i_next ) =
-                  find_next_nonblank_token_on_this_line( $i, $rtokens );
+                  find_next_nonblank_token_on_this_line( $i, $rtokens,
+                    $max_token_index );
             }
 
             if ( $next_nonblank_token =~ /^(\s*|#)$/ )
@@ -24195,7 +24913,7 @@ sub scan_id_do {
                         );
                     }
                     $saw_function_definition{$package}{$subname} =
-                      $input_line_number;
+                      $tokenizer_self->{_last_line_number};
                 }
             }
             elsif ( $next_nonblank_token eq ';' ) {
@@ -24240,554 +24958,537 @@ sub scan_id_do {
     }
 }
 
-sub check_prototype {
-    my ( $proto, $package, $subname ) = @_;
-    return unless ( defined($package) && defined($subname) );
-    if ( defined($proto) ) {
-        $proto =~ s/^\s*\(\s*//;
-        $proto =~ s/\s*\)$//;
-        if ($proto) {
-            $is_user_function{$package}{$subname}        = 1;
-            $user_function_prototype{$package}{$subname} = "($proto)";
-
-            # prototypes containing '&' must be treated specially..
-            if ( $proto =~ /\&/ ) {
-
-                # right curly braces of prototypes ending in
-                # '&' may be followed by an operator
-                if ( $proto =~ /\&$/ ) {
-                    $is_block_function{$package}{$subname} = 1;
-                }
-
-                # right curly braces of prototypes NOT ending in
-                # '&' may NOT be followed by an operator
-                elsif ( $proto !~ /\&$/ ) {
-                    $is_block_list_function{$package}{$subname} = 1;
-                }
-            }
-        }
-        else {
-            $is_constant{$package}{$subname} = 1;
-        }
-    }
-    else {
-        $is_user_function{$package}{$subname} = 1;
-    }
-}
-
-sub do_scan_package {
-
-    # do_scan_package parses a package name
-    # it is called with $i_beg equal to the index of the first nonblank
-    # token following a 'package' token.
-
-    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
-    my $package = undef;
-    my $pos_beg = $$rtoken_map[$i_beg];
-    pos($input_line) = $pos_beg;
-
-    # handle non-blank line; package name, if any, must follow
-    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
-        $package = $1;
-        $package = ( defined($1) && $1 ) ? $1 : 'main';
-        $package =~ s/\'/::/g;
-        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
-        $package =~ s/::$//;
-        my $pos  = pos($input_line);
-        my $numc = $pos - $pos_beg;
-        $tok  = 'package ' . substr( $input_line, $pos_beg, $numc );
-        $type = 'i';
-
-        # Now we must convert back from character position
-        # to pre_token index.
-        # I don't think an error flag can occur here ..but ?
-        my $error;
-        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-        if ($error) { warning("Possibly invalid package\n") }
-        $current_package = $package;
-
-        # check for error
-        my ( $next_nonblank_token, $i_next ) =
-          find_next_nonblank_token( $i, $rtokens );
-        if ( $next_nonblank_token !~ /^[;\}]$/ ) {
-            warning(
-                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
-            );
-        }
-    }
-
-    # no match but line not blank --
-    # could be a label with name package, like package:  , for example.
-    else {
-        $type = 'k';
-    }
-
-    return ( $i, $tok, $type );
-}
-
-sub scan_identifier_do {
+#########i###############################################################
+# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
+#########################################################################
 
-    # This routine assembles tokens into identifiers.  It maintains a
-    # scan state, id_scan_state.  It updates id_scan_state based upon
-    # current id_scan_state and token, and returns an updated
-    # id_scan_state and the next index after the identifier.
+sub find_next_nonblank_token {
+    my ( $i, $rtokens, $max_token_index ) = @_;
 
-    my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
-    my $i_begin   = $i;
-    my $type      = '';
-    my $tok_begin = $$rtokens[$i_begin];
-    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
-    my $id_scan_state_begin = $id_scan_state;
-    my $identifier_begin    = $identifier;
-    my $tok                 = $tok_begin;
-    my $message             = "";
+    if ( $i >= $max_token_index ) {
+        if ( !peeked_ahead() ) {
+            peeked_ahead(1);
+            $rtokens =
+              peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
+        }
+    }
+    my $next_nonblank_token = $$rtokens[ ++$i ];
 
-    # these flags will be used to help figure out the type:
-    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
-    my $saw_type;
+    if ( $next_nonblank_token =~ /^\s*$/ ) {
+        $next_nonblank_token = $$rtokens[ ++$i ];
+    }
+    return ( $next_nonblank_token, $i );
+}
 
-    # allow old package separator (') except in 'use' statement
-    my $allow_tick = ( $last_nonblank_token ne 'use' );
+sub numerator_expected {
 
-    # get started by defining a type and a state if necessary
-    unless ($id_scan_state) {
-        $context = UNKNOWN_CONTEXT;
+    # this is a filter for a possible numerator, in support of guessing
+    # for the / pattern delimiter token.
+    # returns -
+    #   1 - yes
+    #   0 - can't tell
+    #  -1 - no
+    # Note: I am using the convention that variables ending in
+    # _expected have these 3 possible values.
+    my ( $i, $rtokens, $max_token_index ) = @_;
+    my $next_token = $$rtokens[ $i + 1 ];
+    if ( $next_token eq '=' ) { $i++; }    # handle /=
+    my ( $next_nonblank_token, $i_next ) =
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
 
-        # fixup for digraph
-        if ( $tok eq '>' ) {
-            $tok       = '->';
-            $tok_begin = $tok;
-        }
-        $identifier = $tok;
+    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
+        1;
+    }
+    else {
 
-        if ( $tok eq '$' || $tok eq '*' ) {
-            $id_scan_state = '$';
-            $context       = SCALAR_CONTEXT;
-        }
-        elsif ( $tok eq '%' || $tok eq '@' ) {
-            $id_scan_state = '$';
-            $context       = LIST_CONTEXT;
-        }
-        elsif ( $tok eq '&' ) {
-            $id_scan_state = '&';
-        }
-        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
-            $saw_alpha     = 0;     # 'sub' is considered type info here
-            $id_scan_state = '$';
-            $identifier .= ' ';     # need a space to separate sub from sub name
-        }
-        elsif ( $tok eq '::' ) {
-            $id_scan_state = 'A';
-        }
-        elsif ( $tok =~ /^[A-Za-z_]/ ) {
-            $id_scan_state = ':';
-        }
-        elsif ( $tok eq '->' ) {
-            $id_scan_state = '$';
+        if ( $next_nonblank_token =~ /^\s*$/ ) {
+            0;
         }
         else {
-
-            # shouldn't happen
-            my ( $a, $b, $c ) = caller;
-            warning("Program Bug: scan_identifier given bad token = $tok \n");
-            warning("   called from sub $a  line: $c\n");
-            report_definite_bug();
+            -1;
         }
-        $saw_type = !$saw_alpha;
-    }
-    else {
-        $i--;
-        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
     }
+}
 
-    # now loop to gather the identifier
-    my $i_save = $i;
+sub pattern_expected {
 
-    while ( $i < $max_token_index ) {
-        $i_save = $i unless ( $tok =~ /^\s*$/ );
-        $tok    = $$rtokens[ ++$i ];
+    # This is the start of a filter for a possible pattern.
+    # It looks at the token after a possbible pattern and tries to
+    # determine if that token could end a pattern.
+    # returns -
+    #   1 - yes
+    #   0 - can't tell
+    #  -1 - no
+    my ( $i, $rtokens, $max_token_index ) = @_;
+    my $next_token = $$rtokens[ $i + 1 ];
+    if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
+    my ( $next_nonblank_token, $i_next ) =
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
 
-        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
-            $tok = '::';
-            $i++;
-        }
+    # list of tokens which may follow a pattern
+    # (can probably be expanded)
+    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
+    {
+        1;
+    }
+    else {
 
-        if ( $id_scan_state eq '$' ) {    # starting variable name
+        if ( $next_nonblank_token =~ /^\s*$/ ) {
+            0;
+        }
+        else {
+            -1;
+        }
+    }
+}
 
-            if ( $tok eq '$' ) {
+sub find_next_nonblank_token_on_this_line {
+    my ( $i, $rtokens, $max_token_index ) = @_;
+    my $next_nonblank_token;
 
-                $identifier .= $tok;
+    if ( $i < $max_token_index ) {
+        $next_nonblank_token = $$rtokens[ ++$i ];
 
-                # we've got a punctuation variable if end of line (punct.t)
-                if ( $i == $max_token_index ) {
-                    $type          = 'i';
-                    $id_scan_state = '';
-                    last;
-                }
-            }
-            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';           # now need ::
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';                 # now need ::
-                $identifier .= $tok;
+        if ( $next_nonblank_token =~ /^\s*$/ ) {
 
-                # Perl will accept leading digits in identifiers,
-                # although they may not always produce useful results.
-                # Something like $main::0 is ok.  But this also works:
-                #
-                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
-                #  howdy::123::bubba();
-                #
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {              # numeric
-                $saw_alpha     = 1;
-                $id_scan_state = ':';                 # now need ::
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '::' ) {
-                $id_scan_state = 'A';
-                $identifier .= $tok;
-            }
-            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
-                $identifier .= $tok;    # keep same state, a $ could follow
+            if ( $i < $max_token_index ) {
+                $next_nonblank_token = $$rtokens[ ++$i ];
             }
-            elsif ( $tok eq '{' ) {
-
-                # check for something like ${#} or ${©}
-                if (   $identifier eq '$'
-                    && $i + 2 <= $max_token_index
-                    && $$rtokens[ $i + 2 ] eq '}'
-                    && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
-                {
-                    my $next2 = $$rtokens[ $i + 2 ];
-                    my $next1 = $$rtokens[ $i + 1 ];
-                    $identifier .= $tok . $next1 . $next2;
-                    $i += 2;
-                    $id_scan_state = '';
-                    last;
-                }
+        }
+    }
+    else {
+        $next_nonblank_token = "";
+    }
+    return ( $next_nonblank_token, $i );
+}
 
-                # skip something like ${xxx} or ->{
-                $id_scan_state = '';
+sub find_angle_operator_termination {
 
-                # if this is the first token of a line, any tokens for this
-                # identifier have already been accumulated
-                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
-                $i = $i_save;
-                last;
-            }
+    # We are looking at a '<' and want to know if it is an angle operator.
+    # We are to return:
+    #   $i = pretoken index of ending '>' if found, current $i otherwise
+    #   $type = 'Q' if found, '>' otherwise
+    my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
+    my $i    = $i_beg;
+    my $type = '<';
+    pos($input_line) = 1 + $$rtoken_map[$i];
 
-            # space ok after leading $ % * & @
-            elsif ( $tok =~ /^\s*$/ ) {
+    my $filter;
 
-                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
+    # we just have to find the next '>' if a term is expected
+    if ( $expecting == TERM ) { $filter = '[\>]' }
 
-                    if ( length($identifier) > 1 ) {
-                        $id_scan_state = '';
-                        $i             = $i_save;
-                        $type          = 'i';    # probably punctuation variable
-                        last;
-                    }
-                    else {
+    # we have to guess if we don't know what is expected
+    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
 
-                        # spaces after $'s are common, and space after @
-                        # is harmless, so only complain about space
-                        # after other type characters. Space after $ and
-                        # @ will be removed in formatting.  Report space
-                        # after % and * because they might indicate a
-                        # parsing error.  In other words '% ' might be a
-                        # modulo operator.  Delete this warning if it
-                        # gets annoying.
-                        if ( $identifier !~ /^[\@\$]$/ ) {
-                            $message =
-                              "Space in identifier, following $identifier\n";
-                        }
-                    }
-                }
+    # shouldn't happen - we shouldn't be here if operator is expected
+    else { warning("Program Bug in find_angle_operator_termination\n") }
 
-                # else:
-                # space after '->' is ok
-            }
-            elsif ( $tok eq '^' ) {
+    # To illustrate what we might be looking at, in case we are
+    # guessing, here are some examples of valid angle operators
+    # (or file globs):
+    #  <tmp_imp/*>
+    #  <FH>
+    #  <$fh>
+    #  <*.c *.h>
+    #  <_>
+    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
+    #  <${PREFIX}*img*.$IMAGE_TYPE>
+    #  <img*.$IMAGE_TYPE>
+    #  <Timg*.$IMAGE_TYPE>
+    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
+    #
+    # Here are some examples of lines which do not have angle operators:
+    #  return undef unless $self->[2]++ < $#{$self->[1]};
+    #  < 2  || @$t >
+    #
+    # the following line from dlister.pl caused trouble:
+    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
+    #
+    # If the '<' starts an angle operator, it must end on this line and
+    # it must not have certain characters like ';' and '=' in it.  I use
+    # this to limit the testing.  This filter should be improved if
+    # possible.
 
-                # check for some special variables like $^W
-                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
-                    $identifier .= $tok;
-                    $id_scan_state = 'A';
-                }
-                else {
-                    $id_scan_state = '';
-                }
-            }
-            else {    # something else
+    if ( $input_line =~ /($filter)/g ) {
 
-                # check for various punctuation variables
-                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
-                    $identifier .= $tok;
-                }
+        if ( $1 eq '>' ) {
 
-                elsif ( $identifier eq '$#' ) {
+            # We MAY have found an angle operator termination if we get
+            # here, but we need to do more to be sure we haven't been
+            # fooled.
+            my $pos = pos($input_line);
 
-                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
+            my $pos_beg = $$rtoken_map[$i];
+            my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
 
-                    # perl seems to allow just these: $#: $#- $#+
-                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
-                        $type = 'i';
-                        $identifier .= $tok;
-                    }
-                    else {
-                        $i = $i_save;
-                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
-                    }
+            # Reject if the closing '>' follows a '-' as in:
+            # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
+            if ( $expecting eq UNKNOWN ) {
+                my $check = substr( $input_line, $pos - 2, 1 );
+                if ( $check eq '-' ) {
+                    return ( $i, $type );
                 }
-                elsif ( $identifier eq '$$' ) {
+            }
 
-                    # perl does not allow references to punctuation
-                    # variables without braces.  For example, this
-                    # won't work:
-                    #  $:=\4;
-                    #  $a = $$:;
-                    # You would have to use
-                    #  $a = ${$:};
+            ######################################debug#####
+            #write_diagnostics( "ANGLE? :$str\n");
+            #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
+            ######################################debug#####
+            $type = 'Q';
+            my $error;
+            ( $i, $error ) =
+              inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
 
-                    $i = $i_save;
-                    if ( $tok eq '{' ) { $type = 't' }
-                    else { $type = 'i' }
-                }
-                elsif ( $identifier eq '->' ) {
-                    $i = $i_save;
-                }
-                else {
-                    $i = $i_save;
-                    if ( length($identifier) == 1 ) { $identifier = ''; }
-                }
-                $id_scan_state = '';
-                last;
+            # It may be possible that a quote ends midway in a pretoken.
+            # If this happens, it may be necessary to split the pretoken.
+            if ($error) {
+                warning(
+                    "Possible tokinization error..please check this line\n");
+                report_possible_bug();
             }
-        }
-        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
 
-            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
-                $id_scan_state = ':';          # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $id_scan_state = ':';                 # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
-                $id_scan_state = ':';       # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok =~ /^\s*$/ ) {     # allow space
-            }
-            elsif ( $tok eq '::' ) {        # leading ::
-                $id_scan_state = 'A';       # accept alpha next
-                $identifier .= $tok;
+            # Now let's see where we stand....
+            # OK if math op not possible
+            if ( $expecting == TERM ) {
             }
-            elsif ( $tok eq '{' ) {
-                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
-                $i             = $i_save;
-                $id_scan_state = '';
-                last;
+
+            # OK if there are no more than 2 pre-tokens inside
+            # (not possible to write 2 token math between < and >)
+            # This catches most common cases
+            elsif ( $i <= $i_beg + 3 ) {
+                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
             }
+
+            # Not sure..
             else {
 
-                # punctuation variable?
-                # testfile: cunningham4.pl
-                if ( $identifier eq '&' ) {
-                    $identifier .= $tok;
+                # Let's try a Brace Test: any braces inside must balance
+                my $br = 0;
+                while ( $str =~ /\{/g ) { $br++ }
+                while ( $str =~ /\}/g ) { $br-- }
+                my $sb = 0;
+                while ( $str =~ /\[/g ) { $sb++ }
+                while ( $str =~ /\]/g ) { $sb-- }
+                my $pr = 0;
+                while ( $str =~ /\(/g ) { $pr++ }
+                while ( $str =~ /\)/g ) { $pr-- }
+
+                # if braces do not balance - not angle operator
+                if ( $br || $sb || $pr ) {
+                    $i    = $i_beg;
+                    $type = '<';
+                    write_diagnostics(
+                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
                 }
+
+                # we should keep doing more checks here...to be continued
+                # Tentatively accepting this as a valid angle operator.
+                # There are lots more things that can be checked.
                 else {
-                    $identifier = '';
-                    $i          = $i_save;
-                    $type       = '&';
+                    write_diagnostics(
+                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
+                    write_logfile_entry("Guessing angle operator here: $str\n");
                 }
-                $id_scan_state = '';
-                last;
             }
         }
-        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
 
-            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
-                $id_scan_state = '(';
-                $identifier .= $tok;
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
-                $id_scan_state = ')';
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';
-                $i             = $i_save;
-                last;
+        # didn't find ending >
+        else {
+            if ( $expecting == TERM ) {
+                warning("No ending > for angle operator\n");
             }
         }
-        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
+    }
+    return ( $i, $type );
+}
 
-            if ( $tok eq '::' ) {            # got it
-                $identifier .= $tok;
-                $id_scan_state = 'A';        # now require alpha
-            }
-            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
-                $identifier .= $tok;
-                $id_scan_state = ':';           # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
-                $identifier .= $tok;
-                $id_scan_state = ':';           # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # tick
+sub scan_number_do {
 
-                if ( $is_keyword{$identifier} ) {
-                    $id_scan_state = '';              # that's all
-                    $i             = $i_save;
-                }
-                else {
-                    $identifier .= $tok;
-                }
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
-                $id_scan_state = '(';
-                $identifier .= $tok;
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
-                $id_scan_state = ')';
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';        # that's all
-                $i             = $i_save;
-                last;
-            }
-        }
-        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
+    #  scan a number in any of the formats that Perl accepts
+    #  Underbars (_) are allowed in decimal numbers.
+    #  input parameters -
+    #      $input_line  - the string to scan
+    #      $i           - pre_token index to start scanning
+    #    $rtoken_map    - reference to the pre_token map giving starting
+    #                    character position in $input_line of token $i
+    #  output parameters -
+    #    $i            - last pre_token index of the number just scanned
+    #    number        - the number (characters); or undef if not a number
+
+    my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
+    my $pos_beg = $$rtoken_map[$i];
+    my $pos;
+    my $i_begin = $i;
+    my $number  = undef;
+    my $type    = $input_type;
+
+    my $first_char = substr( $input_line, $pos_beg, 1 );
+
+    # Look for bad starting characters; Shouldn't happen..
+    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
+        warning("Program bug - scan_number given character $first_char\n");
+        report_definite_bug();
+        return ( $i, $type, $number );
+    }
+
+    # handle v-string without leading 'v' character ('Two Dot' rule)
+    # (vstring.t)
+    # TODO: v-strings may contain underscores
+    pos($input_line) = $pos_beg;
+    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
+        $pos = pos($input_line);
+        my $numc = $pos - $pos_beg;
+        $number = substr( $input_line, $pos_beg, $numc );
+        $type = 'v';
+        report_v_string($number);
+    }
 
-            if ( $tok eq '(' ) {             # got it
-                $identifier .= $tok;
-                $id_scan_state = ')';        # now find the end of it
-            }
-            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';         # that's all - no prototype
-                $i             = $i_save;
-                last;
-            }
+    # handle octal, hex, binary
+    if ( !defined($number) ) {
+        pos($input_line) = $pos_beg;
+        if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
+        {
+            $pos = pos($input_line);
+            my $numc = $pos - $pos_beg;
+            $number = substr( $input_line, $pos_beg, $numc );
+            $type = 'n';
         }
-        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
+    }
 
-            if ( $tok eq ')' ) {             # got it
-                $identifier .= $tok;
-                $id_scan_state = '';         # all done
-                last;
-            }
-            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
-                $identifier .= $tok;
-            }
-            else {    # probable error in script, but keep going
-                warning("Unexpected '$tok' while seeking end of prototype\n");
-                $identifier .= $tok;
+    # handle decimal
+    if ( !defined($number) ) {
+        pos($input_line) = $pos_beg;
+
+        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
+            $pos = pos($input_line);
+
+            # watch out for things like 0..40 which would give 0. by this;
+            if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
+                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
+            {
+                $pos--;
             }
+            my $numc = $pos - $pos_beg;
+            $number = substr( $input_line, $pos_beg, $numc );
+            $type = 'n';
         }
-        else {        # can get here due to error in initialization
-            $id_scan_state = '';
-            $i             = $i_save;
+    }
+
+    # filter out non-numbers like e + - . e2  .e3 +e6
+    # the rule: at least one digit, and any 'e' must be preceded by a digit
+    if (
+        $number !~ /\d/    # no digits
+        || (   $number =~ /^(.*)[eE]/
+            && $1 !~ /\d/ )    # or no digits before the 'e'
+      )
+    {
+        $number = undef;
+        $type   = $input_type;
+        return ( $i, $type, $number );
+    }
+
+    # Found a number; now we must convert back from character position
+    # to pre_token index. An error here implies user syntax error.
+    # An example would be an invalid octal number like '009'.
+    my $error;
+    ( $i, $error ) =
+      inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+    if ($error) { warning("Possibly invalid number\n") }
+
+    return ( $i, $type, $number );
+}
+
+sub inverse_pretoken_map {
+
+    # Starting with the current pre_token index $i, scan forward until
+    # finding the index of the next pre_token whose position is $pos.
+    my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
+    my $error = 0;
+
+    while ( ++$i <= $max_token_index ) {
+
+        if ( $pos <= $$rtoken_map[$i] ) {
+
+            # Let the calling routine handle errors in which we do not
+            # land on a pre-token boundary.  It can happen by running
+            # perltidy on some non-perl scripts, for example.
+            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
+            $i--;
             last;
         }
     }
+    return ( $i, $error );
+}
 
-    if ( $id_scan_state eq ')' ) {
-        warning("Hit end of line while seeking ) to end prototype\n");
-    }
+sub find_here_doc {
 
-    # once we enter the actual identifier, it may not extend beyond
-    # the end of the current line
-    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
-        $id_scan_state = '';
+    # find the target of a here document, if any
+    # input parameters:
+    #   $i - token index of the second < of <<
+    #   ($i must be less than the last token index if this is called)
+    # output parameters:
+    #   $found_target = 0 didn't find target; =1 found target
+    #   HERE_TARGET - the target string (may be empty string)
+    #   $i - unchanged if not here doc,
+    #    or index of the last token of the here target
+    #   $saw_error - flag noting unbalanced quote on here target
+    my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+    my $ibeg                 = $i;
+    my $found_target         = 0;
+    my $here_doc_target      = '';
+    my $here_quote_character = '';
+    my $saw_error            = 0;
+    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
+    $next_token = $$rtokens[ $i + 1 ];
+
+    # perl allows a backslash before the target string (heredoc.t)
+    my $backslash = 0;
+    if ( $next_token eq '\\' ) {
+        $backslash  = 1;
+        $next_token = $$rtokens[ $i + 2 ];
     }
-    if ( $i < 0 ) { $i = 0 }
 
-    unless ($type) {
+    ( $next_nonblank_token, $i_next_nonblank ) =
+      find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
 
-        if ($saw_type) {
+    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
 
-            if ($saw_alpha) {
-                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
-                    $type = 'w';
-                }
-                else { $type = 'i' }
-            }
-            elsif ( $identifier eq '->' ) {
-                $type = '->';
+        my $in_quote    = 1;
+        my $quote_depth = 0;
+        my $quote_pos   = 0;
+        my $quoted_string;
+
+        (
+            $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
+            $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
+
+        if ($in_quote) {    # didn't find end of quote, so no target found
+            $i = $ibeg;
+            if ( $expecting == TERM ) {
+                warning(
+"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
+                );
+                $saw_error = 1;
             }
-            elsif (
-                ( length($identifier) > 1 )
+        }
+        else {              # found ending quote
+            my $j;
+            $found_target = 1;
 
-                # In something like '@$=' we have an identifier '@$'
-                # In something like '$${' we have type '$$' (and only
-                # part of an identifier)
-                && !( $identifier =~ /\$$/ && $tok eq '{' )
-                && ( $identifier !~ /^(sub |package )$/ )
-              )
-            {
-                $type = 'i';
+            my $tokj;
+            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
+                $tokj = $$rtokens[$j];
+
+                # we have to remove any backslash before the quote character
+                # so that the here-doc-target exactly matches this string
+                next
+                  if ( $tokj eq "\\"
+                    && $j < $i - 1
+                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
+                $here_doc_target .= $tokj;
             }
-            else { $type = 't' }
         }
-        elsif ($saw_alpha) {
+    }
 
-            # type 'w' includes anything without leading type info
-            # ($,%,@,*) including something like abc::def::ghi
-            $type = 'w';
+    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
+        $found_target = 1;
+        write_logfile_entry(
+            "found blank here-target after <<; suggest using \"\"\n");
+        $i = $ibeg;
+    }
+    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
+
+        my $here_doc_expected;
+        if ( $expecting == UNKNOWN ) {
+            $here_doc_expected = guess_if_here_doc($next_token);
         }
         else {
-            $type = '';
-        }    # this can happen on a restart
-    }
+            $here_doc_expected = 1;
+        }
+
+        if ($here_doc_expected) {
+            $found_target    = 1;
+            $here_doc_target = $next_token;
+            $i               = $ibeg + 1;
+        }
 
-    if ($identifier) {
-        $tok = $identifier;
-        if ($message) { write_logfile_entry($message) }
     }
     else {
-        $tok = $tok_begin;
-        $i   = $i_begin;
+
+        if ( $expecting == TERM ) {
+            $found_target = 1;
+            write_logfile_entry("Note: bare here-doc operator <<\n");
+        }
+        else {
+            $i = $ibeg;
+        }
     }
 
-    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
-        my ( $a, $b, $c ) = caller;
-        print
-"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
-        print
-"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
-    };
-    return ( $i, $tok, $type, $id_scan_state, $identifier );
+    # patch to neglect any prepended backslash
+    if ( $found_target && $backslash ) { $i++ }
+
+    return ( $found_target, $here_doc_target, $here_quote_character, $i,
+        $saw_error );
+}
+
+sub do_quote {
+
+    # follow (or continue following) quoted string(s)
+    # $in_quote return code:
+    #   0 - ok, found end
+    #   1 - still must find end of quote whose target is $quote_character
+    #   2 - still looking for end of first of two quotes
+    #
+    # Returns updated strings:
+    #  $quoted_string_1 = quoted string seen while in_quote=1
+    #  $quoted_string_2 = quoted string seen while in_quote=2
+    my (
+        $i,               $in_quote,    $quote_character,
+        $quote_pos,       $quote_depth, $quoted_string_1,
+        $quoted_string_2, $rtokens,     $rtoken_map,
+        $max_token_index
+    ) = @_;
+
+    my $in_quote_starting = $in_quote;
+
+    my $quoted_string;
+    if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
+        my $ibeg = $i;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
+        $quoted_string_2 .= $quoted_string;
+        if ( $in_quote == 1 ) {
+            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
+            $quote_character = '';
+        }
+        else {
+            $quoted_string_2 .= "\n";
+        }
+    }
+
+    if ( $in_quote == 1 ) {    # one (more) quote to follow
+        my $ibeg = $i;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
+        $quoted_string_1 .= $quoted_string;
+        if ( $in_quote == 1 ) {
+            $quoted_string_1 .= "\n";
+        }
+    }
+    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+        $quoted_string_1, $quoted_string_2 );
 }
 
 sub follow_quoted_string {
@@ -24806,10 +25507,13 @@ sub follow_quoted_string {
     #   $beginning_tok = the starting quote character
     #   $quote_pos = index to check next for alphanumeric delimiter
     #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
-    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
+    #   $quoted_string = the text of the quote (without quotation tokens)
+    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
+        $max_token_index )
       = @_;
     my ( $tok, $end_tok );
-    my $i = $i_beg - 1;
+    my $i             = $i_beg - 1;
+    my $quoted_string = "";
 
     TOKENIZER_DEBUG_FLAG_QUOTE && do {
         print
@@ -24861,8 +25565,10 @@ sub follow_quoted_string {
     # characters, whereas for a non-alphanumeric delimiter, only tokens of
     # length 1 can match.
 
-    # loop for case of alphanumeric quote delimiter..
+    ###################################################################
+    # Case 1 (rare): loop for case of alphanumeric quote delimiter..
     # "quote_pos" is the position the current word to begin searching
+    ###################################################################
     if ( $beginning_tok =~ /\w/ ) {
 
         # Note this because it is not recommended practice except
@@ -24879,10 +25585,12 @@ sub follow_quoted_string {
 
                 if ( $tok eq '\\' ) {
 
+                    # retain backslash unless it hides the end token
+                    $quoted_string .= $tok
+                      unless $$rtokens[ $i + 1 ] eq $end_tok;
                     $quote_pos++;
                     last if ( $i >= $max_token_index );
                     $tok = $$rtokens[ ++$i ];
-
                 }
             }
             my $old_pos = $quote_pos;
@@ -24895,6 +25603,9 @@ sub follow_quoted_string {
 
             if ( $quote_pos > 0 ) {
 
+                $quoted_string .=
+                  substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
+
                 $quote_depth--;
 
                 if ( $quote_depth == 0 ) {
@@ -24902,10 +25613,15 @@ sub follow_quoted_string {
                     last;
                 }
             }
+            else {
+                $quoted_string .= substr( $tok, $old_pos );
+            }
         }
     }
 
-    # loop for case of a non-alphanumeric quote delimiter..
+    ########################################################################
+    # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
+    ########################################################################
     else {
 
         while ( $i < $max_token_index ) {
@@ -24923,12 +25639,188 @@ sub follow_quoted_string {
                 $quote_depth++;
             }
             elsif ( $tok eq '\\' ) {
-                $i++;
+
+                # retain backslash unless it hides the beginning or end token
+                $tok = $$rtokens[ ++$i ];
+                $quoted_string .= '\\'
+                  unless ( $tok eq $end_tok || $tok eq $beginning_tok );
             }
+            $quoted_string .= $tok;
         }
     }
     if ( $i > $max_token_index ) { $i = $max_token_index }
-    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
+    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
+        $quoted_string );
+}
+
+sub indicate_error {
+    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
+    interrupt_logfile();
+    warning($msg);
+    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
+    resume_logfile();
+}
+
+sub write_error_indicator_pair {
+    my ( $line_number, $input_line, $pos, $carrat ) = @_;
+    my ( $offset, $numbered_line, $underline ) =
+      make_numbered_line( $line_number, $input_line, $pos );
+    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
+    warning( $numbered_line . "\n" );
+    $underline =~ s/\s*$//;
+    warning( $underline . "\n" );
+}
+
+sub make_numbered_line {
+
+    #  Given an input line, its line number, and a character position of
+    #  interest, create a string not longer than 80 characters of the form
+    #     $lineno: sub_string
+    #  such that the sub_string of $str contains the position of interest
+    #
+    #  Here is an example of what we want, in this case we add trailing
+    #  '...' because the line is long.
+    #
+    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+    #
+    #  Here is another example, this time in which we used leading '...'
+    #  because of excessive length:
+    #
+    # 2: ... er of the World Wide Web Consortium's
+    #
+    #  input parameters are:
+    #   $lineno = line number
+    #   $str = the text of the line
+    #   $pos = position of interest (the error) : 0 = first character
+    #
+    #   We return :
+    #     - $offset = an offset which corrects the position in case we only
+    #       display part of a line, such that $pos-$offset is the effective
+    #       position from the start of the displayed line.
+    #     - $numbered_line = the numbered line as above,
+    #     - $underline = a blank 'underline' which is all spaces with the same
+    #       number of characters as the numbered line.
+
+    my ( $lineno, $str, $pos ) = @_;
+    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
+    my $excess = length($str) - $offset - 68;
+    my $numc   = ( $excess > 0 ) ? 68 : undef;
+
+    if ( defined($numc) ) {
+        if ( $offset == 0 ) {
+            $str = substr( $str, $offset, $numc - 4 ) . " ...";
+        }
+        else {
+            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
+        }
+    }
+    else {
+
+        if ( $offset == 0 ) {
+        }
+        else {
+            $str = "... " . substr( $str, $offset + 4 );
+        }
+    }
+
+    my $numbered_line = sprintf( "%d: ", $lineno );
+    $offset -= length($numbered_line);
+    $numbered_line .= $str;
+    my $underline = " " x length($numbered_line);
+    return ( $offset, $numbered_line, $underline );
+}
+
+sub write_on_underline {
+
+    # The "underline" is a string that shows where an error is; it starts
+    # out as a string of blanks with the same length as the numbered line of
+    # code above it, and we have to add marking to show where an error is.
+    # In the example below, we want to write the string '--^' just below
+    # the line of bad code:
+    #
+    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+    #                 ---^
+    # We are given the current underline string, plus a position and a
+    # string to write on it.
+    #
+    # In the above example, there will be 2 calls to do this:
+    # First call:  $pos=19, pos_chr=^
+    # Second call: $pos=16, pos_chr=---
+    #
+    # This is a trivial thing to do with substr, but there is some
+    # checking to do.
+
+    my ( $underline, $pos, $pos_chr ) = @_;
+
+    # check for error..shouldn't happen
+    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
+        return $underline;
+    }
+    my $excess = length($pos_chr) + $pos - length($underline);
+    if ( $excess > 0 ) {
+        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
+    }
+    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
+    return ($underline);
+}
+
+sub pre_tokenize {
+
+    # Break a string, $str, into a sequence of preliminary tokens.  We
+    # are interested in these types of tokens:
+    #   words       (type='w'),            example: 'max_tokens_wanted'
+    #   digits      (type = 'd'),          example: '0755'
+    #   whitespace  (type = 'b'),          example: '   '
+    #   any other single character (i.e. punct; type = the character itself).
+    # We cannot do better than this yet because we might be in a quoted
+    # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
+    # tokens.
+    my ( $str, $max_tokens_wanted ) = @_;
+
+    # we return references to these 3 arrays:
+    my @tokens    = ();     # array of the tokens themselves
+    my @token_map = (0);    # string position of start of each token
+    my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
+
+    do {
+
+        # whitespace
+        if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
+
+        # numbers
+        # note that this must come before words!
+        elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
+
+        # words
+        elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
+
+        # single-character punctuation
+        elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
+
+        # that's all..
+        else {
+            return ( \@tokens, \@token_map, \@type );
+        }
+
+        push @tokens,    $1;
+        push @token_map, pos($str);
+
+    } while ( --$max_tokens_wanted != 0 );
+
+    return ( \@tokens, \@token_map, \@type );
+}
+
+sub show_tokens {
+
+    # this is an old debug routine
+    my ( $rtokens, $rtoken_map ) = @_;
+    my $num = scalar(@$rtokens);
+    my $i;
+
+    for ( $i = 0 ; $i < $num ; $i++ ) {
+        my $len = length( $$rtokens[$i] );
+        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
+    }
 }
 
 sub matching_end_token {
@@ -24953,15 +25845,91 @@ sub matching_end_token {
     }
 }
 
+sub dump_token_types {
+    my $class = shift;
+    my $fh    = shift;
+
+    # This should be the latest list of token types in use
+    # adding NEW_TOKENS: add a comment here
+    print $fh <<'END_OF_LIST';
+
+Here is a list of the token types currently used for lines of type 'CODE'.  
+For the following tokens, the "type" of a token is just the token itself.  
+
+.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+( ) <= >= == =~ !~ != ++ -- /= x=
+... **= <<= >>= &&= ||= //= <=> 
+, + - / * | % ! x ~ = \ ? : . < > ^ &
+
+The following additional token types are defined:
+
+ type    meaning
+    b    blank (white space) 
+    {    indent: opening structural curly brace or square bracket or paren
+         (code block, anonymous hash reference, or anonymous array reference)
+    }    outdent: right structural curly brace or square bracket or paren
+    [    left non-structural square bracket (enclosing an array index)
+    ]    right non-structural square bracket
+    (    left non-structural paren (all but a list right of an =)
+    )    right non-structural parena
+    L    left non-structural curly brace (enclosing a key)
+    R    right non-structural curly brace 
+    ;    terminal semicolon
+    f    indicates a semicolon in a "for" statement
+    h    here_doc operator <<
+    #    a comment
+    Q    indicates a quote or pattern
+    q    indicates a qw quote block
+    k    a perl keyword
+    C    user-defined constant or constant function (with void prototype = ())
+    U    user-defined function taking parameters
+    G    user-defined function taking block parameter (like grep/map/eval)
+    M    (unused, but reserved for subroutine definition name)
+    P    (unused, but -html uses it to label pod text)
+    t    type indicater such as %,$,@,*,&,sub
+    w    bare word (perhaps a subroutine call)
+    i    identifier of some type (with leading %, $, @, *, &, sub, -> )
+    n    a number
+    v    a v-string
+    F    a file test operator (like -e)
+    Y    File handle
+    Z    identifier in indirect object slot: may be file handle, object
+    J    LABEL:  code block label
+    j    LABEL after next, last, redo, goto
+    p    unary +
+    m    unary -
+    pp   pre-increment operator ++
+    mm   pre-decrement operator -- 
+    A    : used as attribute separator
+    
+    Here are the '_line_type' codes used internally:
+    SYSTEM         - system-specific code before hash-bang line
+    CODE           - line of perl code (including comments)
+    POD_START      - line starting pod, such as '=head'
+    POD            - pod documentation text
+    POD_END        - last line of pod section, '=cut'
+    HERE           - text of here-document
+    HERE_END       - last line of here-doc (target word)
+    FORMAT         - format section
+    FORMAT_END     - last line of format section, '.'
+    DATA_START     - __DATA__ line
+    DATA           - unidentified text following __DATA__
+    END_START      - __END__ line
+    END            - unidentified text following __END__
+    ERROR          - we are in big trouble, probably not a perl script
+END_OF_LIST
+}
+
 BEGIN {
 
     # These names are used in error messages
     @opening_brace_names = qw# '{' '[' '(' '?' #;
     @closing_brace_names = qw# '}' ']' ')' ':' #;
 
+    ## TESTING: added ~~
     my @digraphs = qw(
       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
-      <= >= == =~ !~ != ++ -- /= x=
+      <= >= == =~ !~ != ++ -- /= x= ~~
     );
     @is_digraph{@digraphs} = (1) x scalar(@digraphs);
 
@@ -24992,7 +25960,7 @@ BEGIN {
     @is_block_operator{@_} = (1) x scalar(@_);
 
     # these functions allow an identifier in the indirect object slot
-    @_ = qw( print printf sort exec system );
+    @_ = qw( print printf sort exec system say);
     @is_indirect_object_taker{@_} = (1) x scalar(@_);
 
     # These tokens may precede a code block
@@ -25225,9 +26193,14 @@ BEGIN {
       given
       when
       err
+      say
     );
 
-    # patched above for SWITCH/CASE
+    # patched above for SWITCH/CASE given/when err say
+    # 'err' is a fairly safe addition.
+    # TODO: 'default' still needed if appropriate
+    # 'use feature' seen, but perltidy works ok without it.
+    # Concerned that 'default' could break code.
     push( @Keywords, @value_requestor );
 
     # These are treated the same but are not keywords:
@@ -25282,7 +26255,7 @@ BEGIN {
 
     # these token TYPES expect trailing operator but not a term
     # note: ++ and -- are post-increment and decrement, 'C' = constant
-    my @operator_requestor_types = qw( ++ -- C );
+    my @operator_requestor_types = qw( ++ -- C <> q );
     @expecting_operator_types{@operator_requestor_types} =
       (1) x scalar(@operator_requestor_types);
 
@@ -25292,14 +26265,19 @@ BEGIN {
     my @value_requestor_type = qw#
       L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
       **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
-      <= >= == != => \ > < % * / ? & | ** <=>
-      f F pp mm Y p m U J G
+      <= >= == != => \ > < % * / ? & | ** <=> ~~
+      f F pp mm Y p m U J G j >> << ^ t
       #;
     push( @value_requestor_type, ',' )
       ;    # (perl doesn't like a ',' in a qw block)
     @expecting_term_types{@value_requestor_type} =
       (1) x scalar(@value_requestor_type);
 
+    # Note: the following valid token types are not assigned here to
+    # hashes requesting to be followed by values or terms, but are
+    # instead currently hard-coded into sub operator_expected:
+    # ) -> :: Q R Z ] b h i k n v w } #
+
     # For simple syntax checking, it is nice to have a list of operators which
     # will really be unhappy if not followed by a term.  This includes most
     # of the above...
@@ -25745,7 +26723,7 @@ to perltidy.
 
 =head1 VERSION
 
-This man page documents Perl::Tidy version 20060614.
+This man page documents Perl::Tidy version 20060719.
 
 =head1 AUTHOR