Upgrade perltidy to the 20060719 release

author don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>

Sat, 29 Jul 2006 05:04:15 +0000 (05:04 +0000)

committer don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>

Sat, 29 Jul 2006 05:04:15 +0000 (05:04 +0000)
author don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>
Sat, 29 Jul 2006 05:04:15 +0000 (05:04 +0000)
committer don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>
Sat, 29 Jul 2006 05:04:15 +0000 (05:04 +0000)
diff --git a/BUGS b/BUGS

index edca9bc7fdb9770c1d2a2f2121276a5c5172081b..5689aca83634ae5785c0ef39dca52e815c1f94d1 100644 (file)
--- a/BUGS
+++ b/BUGS
@@ -6,34 +6,12 @@ Perltidy open BUGS
       This file only lists open bugs.  For bugs which have been fixed, 
       see the ChangeLog.  
  
       This file only lists open bugs.  For bugs which have been fixed, 
       see the ChangeLog.  
  
-  A here-doc invoked through an 'e' modifier on a pattern replacement text is not recognized
-    For example, the output of perltidy for this script has a syntax error:
-
-            my $text="Hello World!\n";
-            $text =~ s@Hello@<<'END'@e;
-            Goodbye 
-            Cruel
-            END
-            print "$text\n";
-
-    A workaround is to put the here-doc in a temporary string and then do
-    the substitution:
-
-            my $text="Hello World!\n";
-            my $str=<<'END';
-            Goodbye 
-            Cruel
-            END
-            $text =~ s@Hello@$str@e;
-            print "$text\n";
-
-  The --extrude option can occasionally produce code with syntax errors
+  The --extrude option can produce code with syntax errors
      The --extrude tries to put as many newlines in the formatted code as
      The --extrude tries to put as many newlines in the formatted code as
-    possible. This option is of limited use for formatting, but it has been
-    helpful for debugging purposes. Occasionally it will produce code which
-    Perl considers to have a syntax error. These problems usually involve
-    code where Perl is having to guess the tokenization. For example,
-    --extrude will currently cause a syntax error in the following line:
-
-     utime $inc+0 ? ($mtime, $ntime) : ($atime, $atime), $file;
+    possible. This option is very useful for testing perltidy but not for
+    actual formatting. Occasionally it will produce code which Perl
+    considers to have a syntax error. These problems usually involve code
+    where Perl is having to guess the tokenization based on whitespace.
+    Since the --extrude option is typically only used for testing perltidy,
+    this type of error should not normally occur in practice.
  
  
diff --git a/CHANGES b/CHANGES

index b59e99b2cba1ca44ad152d288178d4414a7d2053..b103a05e462103749847089008461ca135b249eb 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,8 +1,98 @@
  Perltidy Change Log
  Perltidy Change Log
-     You can help Perltidy evolve into a better program.  If you have hit a
-     bug, unusual behavior, annoyance, or have a suggested improvement,
-     please send a note to perltidy at users.sourceforge.net.  All
-     suggestions are welcome.
+  2006 07 19
+     -Eliminated bug where a here-doc invoked through an 'e' modifier on a pattern
+     replacement text was not recognized.  The tokenizer now recursively scans
+     replacement text (but does not reformat it).
+
+     -Improved vertical alignment of terminal else blocks and ternary statements.
+      Thanks to Chris for the suggestion. 
+
+      OLD:
+        if    ( IsBitmap() ) { return GetBitmap(); }
+        elsif ( IsFiles() )  { return GetFiles(); }
+        else { return GetText(); }
+
+      NEW:
+        if    ( IsBitmap() ) { return GetBitmap(); }
+        elsif ( IsFiles() )  { return GetFiles(); }
+        else                 { return GetText(); }
+
+      OLD:
+        $which_search =
+            $opts{"t"} ? 'title'
+          : $opts{"s"} ? 'subject'
+          : $opts{"a"} ? 'author'
+          : 'title';
+
+      NEW:
+        $which_search =
+            $opts{"t"} ? 'title'
+          : $opts{"s"} ? 'subject'
+          : $opts{"a"} ? 'author'
+          :              'title';
+
+     -improved indentation of try/catch blocks and other externally defined
+     functions accepting a block argument.  Thanks to jae.
+
+     -Added support for Perl 5.10 features say and smartmatch.
+
+     -Added flag -pbp (--perl-best-practices) as an abbreviation for parameters
+     suggested in Damian Conway's "Perl Best Practices".  -pbp is the same as:
+
+        -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+        -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = 
+              **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
+
+      Please note that the -st here restricts input to standard input; use
+      -nst if necessary to override.
+
+     -Eliminated some needless breaks at equals signs in -lp indentation.
+
+        OLD:
+            $c =
+              Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                  TOP + $y * (BOTTOM - TOP) / SIZE);
+        NEW:
+            $c = Math::Complex->make(LEFT + $x * (RIGHT - LEFT) / SIZE,
+                                     TOP + $y * (BOTTOM - TOP) / SIZE);
+
+     A break at an equals is sometimes useful for preventing complex statements 
+     from hitting the line length limit.  The decision to do this was 
+     over-eager in some cases and has been improved.  Thanks to Royce Reece.
+
+     -qw quotes contained in braces, square brackets, and parens are being
+     treated more like those containers as far as stacking of tokens.  Also
+     stack of closing tokens ending ');' will outdent to where the ');' would
+     have outdented if the closing stack is matched with a similar opening stack.
+
+      OLD: perltidy -soc -sct
+        __PACKAGE__->load_components(
+            qw(
+              PK::Auto
+              Core
+              )
+        );
+      NEW: perltidy -soc -sct
+        __PACKAGE__->load_components( qw(
+              PK::Auto
+              Core
+        ) );
+      Thanks to Aran Deltac
+
+     -Eliminated some undesirable or marginally desirable vertical alignments.
+     These include terminal colons, opening braces, and equals, and particularly
+     when just two lines would be aligned.
+
+     OLD:
+        my $accurate_timestamps = $Stamps{lnk};
+        my $has_link            = 
+            ...
+     NEW:
+        my $accurate_timestamps = $Stamps{lnk};
+        my $has_link =
+
+     -Corrected a problem with -mangle in which a space would be removed
+     between a keyword and variable beginning with ::.
  
    2006 06 14
       -Attribute argument lists are now correctly treated as quoted strings
  
    2006 06 14
       -Attribute argument lists are now correctly treated as quoted strings
diff --git a/META.yml b/META.yml

index 771842702d135e13e1104063525bc24bda399013..95ea74c1a3166d78ad166f929afc40f737403142 100644 (file)
--- a/META.yml
+++ b/META.yml
@@ -1,10 +1,10 @@
  # http://module-build.sourceforge.net/META-spec.html
  #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
  name:         Perl-Tidy
  # http://module-build.sourceforge.net/META-spec.html
  #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
  name:         Perl-Tidy
-version:      20060614
+version:      20060719
  version_from: lib/Perl/Tidy.pm
  installdirs:  site
  requires:
  
  distribution_type: module
  version_from: lib/Perl/Tidy.pm
  installdirs:  site
  requires:
  
  distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.17
+generated_by: ExtUtils::MakeMaker version 6.30
diff --git a/TODO b/TODO

index 463f148d392d54e51a073fba698b5a7de6992f84..8eaabd68ac248c0196261ad30995240f6ae16a8a 100644 (file)
--- a/TODO
+++ b/TODO
@@ -6,12 +6,8 @@ Perltidy TODO List
    Improved Vertical Alignment
      There are many opportunities for improving vertical alignment.
  
    Improved Vertical Alignment
      There are many opportunities for improving vertical alignment.
  
-  More options for controling placement of opening/closing tokens
-    Many have requested even more options to control opening and closing
-    token placement.
-
    improved ?: formatting
    improved ?: formatting
-    An indentation level should be associated with ?: statements. This will
+    An indentation level should be associated with ?: statements. This would
      make nested ?: statements more readable.
  
    improved internal if/unless formatting
      make nested ?: statements more readable.
  
    improved internal if/unless formatting
diff --git a/bin/perltidy b/bin/perltidy

index 24870fdc9d5d81f9a8195f1f7249a9aceb5764b8..0822508e06a5955aacc9b0fc4ae7b93d17055a4f 100755 (executable)
--- a/bin/perltidy
+++ b/bin/perltidy
@@ -1860,9 +1860,9 @@ return lists, such as C<sort> and <map>.  This allows chains of these
  operators to be displayed one per line.  Use B<-nbok> to prevent
  retaining these breakpoints.
  
  operators to be displayed one per line.  Use B<-nbok> to prevent
  retaining these breakpoints.
  
-=item B<-bot>,  B<--break-at-old-trinary-breakpoints>
+=item B<-bot>,  B<--break-at-old-ternary-breakpoints>
  
  
-By default, if a conditional (trinary) operator is broken at a C<:>,
+By default, if a conditional (ternary) operator is broken at a C<:>,
  then it will remain broken.  To prevent this, and thereby
  form longer lines, use B<-nbot>.
  
  then it will remain broken.  To prevent this, and thereby
  form longer lines, use B<-nbot>.
  
@@ -1967,6 +1967,18 @@ style overrides the default style with the following parameters:
  
      -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp
  
  
      -lp -bl -noll -pt=2 -bt=2 -sbt=2 -icp
  
+=item B<-pbp>, B<--perl-best-practices>
+
+B<-pbp> is an abbreviation for the parameters in the book B<Perl Best Practices>
+by Damian Conway:
+
+    -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
+    -wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = 
+          **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
+
+Note that the -st and -se flags make perltidy act as a filter on one file only.  
+These can be overridden with -nst and -nse if necessary.
+
  =back
  
  =head2 Other Controls
  =back
  
  =head2 Other Controls
@@ -2607,7 +2619,7 @@ perlstyle(1), Perl::Tidy(3)
  
  =head1 VERSION
  
  
  =head1 VERSION
  
-This man page documents perltidy version 20060614.
+This man page documents perltidy version 20060719.
  
  =head1 CREDITS
  
  
  =head1 CREDITS
  
diff --git a/debian/changelog b/debian/changelog

index 4b3542cc9d8e46ba1308397b7834a07b0a65b212..0a41b02cfb0088e6342b2ae0f106ffe0a00ad207 100644 (file)
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+perltidy (20060719-1) unstable; urgency=low
+
+  * New upstream release
+
+ -- Don Armstrong <don@debian.org>  Fri, 28 Jul 2006 22:02:55 -0700
+
  perltidy (20060614-1) unstable; urgency=low
  
    * New upstream release
  perltidy (20060614-1) unstable; urgency=low
  
    * New upstream release
diff --git a/docs/perltidy.1 b/docs/perltidy.1

index fa71edd6c1470578c10daeb4bea681e222c40a41..2a37636af7ce58d1a84904a9bf7a714ccc57ddaa 100644 (file)
--- a/docs/perltidy.1
+++ b/docs/perltidy.1
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3
+.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
  .\"
  .\" Standard preamble:
  .\" ========================================================================
  .\"
  .\" Standard preamble:
  .\" ========================================================================
@@ -25,11 +25,11 @@
  ..
  .\" Set up some character translations and predefined strings.  \*(-- will
  .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
  ..
  .\" Set up some character translations and predefined strings.  \*(-- will
  .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote.  | will give a
-.\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
-.\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
-.\" expand to `' in nroff, nothing in troff, for use with C<>.
-.tr \(*W-|\(bv\*(Tr
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
  .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  .ie n \{\
  .    ds -- \(*W-
  .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  .ie n \{\
  .    ds -- \(*W-
@@ -129,7 +129,7 @@
  .\" ========================================================================
  .\"
  .IX Title "PERLTIDY 1"
  .\" ========================================================================
  .\"
  .IX Title "PERLTIDY 1"
-.TH PERLTIDY 1 "2006-06-13" "perl v5.8.7" "User Contributed Perl Documentation"
+.TH PERLTIDY 1 "2006-07-19" "perl v5.8.8" "User Contributed Perl Documentation"
  .SH "NAME"
  perltidy \- a perl script indenter and reformatter
  .SH "SYNOPSIS"
  .SH "NAME"
  perltidy \- a perl script indenter and reformatter
  .SH "SYNOPSIS"
@@ -1989,9 +1989,9 @@ By default, perltidy will retain a breakpoint before keywords which may
  return lists, such as \f(CW\*(C`sort\*(C'\fR and <map>.  This allows chains of these
  operators to be displayed one per line.  Use \fB\-nbok\fR to prevent
  retaining these breakpoints.
  return lists, such as \f(CW\*(C`sort\*(C'\fR and <map>.  This allows chains of these
  operators to be displayed one per line.  Use \fB\-nbok\fR to prevent
  retaining these breakpoints.
-.IP "\fB\-bot\fR,  \fB\-\-break\-at\-old\-trinary\-breakpoints\fR" 4
-.IX Item "-bot,  --break-at-old-trinary-breakpoints"
-By default, if a conditional (trinary) operator is broken at a \f(CW\*(C`:\*(C'\fR,
+.IP "\fB\-bot\fR,  \fB\-\-break\-at\-old\-ternary\-breakpoints\fR" 4
+.IX Item "-bot,  --break-at-old-ternary-breakpoints"
+By default, if a conditional (ternary) operator is broken at a \f(CW\*(C`:\*(C'\fR,
  then it will remain broken.  To prevent this, and thereby
  form longer lines, use \fB\-nbot\fR.
  .IP "\fB\-iob\fR,  \fB\-\-ignore\-old\-breakpoints\fR" 4
  then it will remain broken.  To prevent this, and thereby
  form longer lines, use \fB\-nbot\fR.
  .IP "\fB\-iob\fR,  \fB\-\-ignore\-old\-breakpoints\fR" 4
@@ -2072,6 +2072,19 @@ style overrides the default style with the following parameters:
  .Vb 1
  \&    \-lp \-bl \-noll \-pt=2 \-bt=2 \-sbt=2 \-icp
  .Ve
  .Vb 1
  \&    \-lp \-bl \-noll \-pt=2 \-bt=2 \-sbt=2 \-icp
  .Ve
+.IP "\fB\-pbp\fR, \fB\-\-perl\-best\-practices\fR" 4
+.IX Item "-pbp, --perl-best-practices"
+\&\fB\-pbp\fR is an abbreviation for the parameters in the book \fBPerl Best Practices\fR
+by Damian Conway:
+.Sp
+.Vb 3
+\&    \-l=78 \-i=4 \-ci=4 \-st \-se \-vt=2 \-cti=0 \-pt=1 \-bt=1 \-sbt=1 \-bbt=1 \-nsfs \-nolq
+\&    \-wbb="% + \- * / x != == >= <= =~ !~ < > | & >= < = 
+\&          **= += *= &= <<= &&= \-= /= |= >>= ||= .= %= ^= x="
+.Ve
+.Sp
+Note that the \-st and \-se flags make perltidy act as a filter on one file only.  
+These can be overridden with \-nst and \-nse if necessary.
  .Sh "Other Controls"
  .IX Subsection "Other Controls"
  .IP "Deleting selected text" 4
  .Sh "Other Controls"
  .IX Subsection "Other Controls"
  .IP "Deleting selected text" 4
@@ -2695,7 +2708,7 @@ purpose of this rule is to prevent generating confusing filenames such as
  \&\fIperlstyle\fR\|(1), \fIPerl::Tidy\fR\|(3)
  .SH "VERSION"
  .IX Header "VERSION"
  \&\fIperlstyle\fR\|(1), \fIPerl::Tidy\fR\|(3)
  .SH "VERSION"
  .IX Header "VERSION"
-This man page documents perltidy version 20060614.
+This man page documents perltidy version 20060719.
  .SH "CREDITS"
  .IX Header "CREDITS"
  Michael Cartmell supplied code for adaptation to \s-1VMS\s0 and helped with
  .SH "CREDITS"
  .IX Header "CREDITS"
  Michael Cartmell supplied code for adaptation to \s-1VMS\s0 and helped with
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm

index e69780c75b3de53dccf003943ac5c5cfa5ad9be7..ecef204d8f8fff12b30df2641181056cf0b1d6f0 100644 (file)
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -63,7 +63,7 @@ use IO::File;
  use File::Basename;
  
  BEGIN {
  use File::Basename;
  
  BEGIN {
-    ( $VERSION = q($Id: Tidy.pm,v 1.49 2006/06/14 01:56:24 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+    ( $VERSION = q($Id: Tidy.pm,v 1.56 2006/07/19 23:13:33 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  }
  
  sub streamhandle {
  }
  
  sub streamhandle {
@@ -308,8 +308,8 @@ sub make_temporary_filename {
          }
          if ($input_file) {
  
          }
          if ($input_file) {
  
-            if ( ref $input_file ) { print STDERR " of reference to:" }
-            else { print STDERR " of file:" }
+            if   ( ref $input_file ) { print STDERR " of reference to:" }
+            else                     { print STDERR " of file:" }
              print STDERR " $input_file";
          }
          print STDERR "\n";
              print STDERR " $input_file";
          }
          print STDERR "\n";
@@ -358,7 +358,7 @@ EOM
              my $hash_ref = $input_hash{$key};
              if ( defined($hash_ref) ) {
                  unless ( ref($hash_ref) eq 'HASH' ) {
              my $hash_ref = $input_hash{$key};
              if ( defined($hash_ref) ) {
                  unless ( ref($hash_ref) eq 'HASH' ) {
-                    my $what   = ref($hash_ref);
+                    my $what = ref($hash_ref);
                      my $but_is =
                        $what ? "but is ref to $what" : "but is not a reference";
                      croak <<EOM;
                      my $but_is =
                        $what ? "but is ref to $what" : "but is not a reference";
                      croak <<EOM;
@@ -1102,6 +1102,7 @@ sub generate_options {
      # chk --> check-multiline-quotes      # check for old bug; to be deleted
      # scl --> short-concatenation-item-length   # helps break at '.'
      # recombine                           # for debugging line breaks
      # chk --> check-multiline-quotes      # check for old bug; to be deleted
      # scl --> short-concatenation-item-length   # helps break at '.'
      # recombine                           # for debugging line breaks
+    # valign                              # for debugging vertical alignment
      # I   --> DIAGNOSTICS                 # for debugging
      ######################################################################
  
      # I   --> DIAGNOSTICS                 # for debugging
      ######################################################################
  
@@ -1157,6 +1158,7 @@ sub generate_options {
        no-profile
        npro
        recombine!
        no-profile
        npro
        recombine!
+      valign!
      );
  
      my $category = 13;    # Debugging
      );
  
      my $category = 13;    # Debugging
@@ -1336,7 +1338,7 @@ sub generate_options {
      ########################################
      $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
      $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
      ########################################
      $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
      $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
-    $add_option->( 'break-at-old-trinary-breakpoints', 'bot', '!' );
+    $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
      $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
  
      ########################################
      $add_option->( 'ignore-old-breakpoints',           'iob', '!' );
  
      ########################################
@@ -1469,7 +1471,7 @@ sub generate_options {
        brace-vertical-tightness-closing=0
        brace-vertical-tightness=0
        break-at-old-logical-breakpoints
        brace-vertical-tightness-closing=0
        brace-vertical-tightness=0
        break-at-old-logical-breakpoints
-      break-at-old-trinary-breakpoints
+      break-at-old-ternary-breakpoints
        break-at-old-keyword-breakpoints
        comma-arrow-breakpoints=1
        nocheck-syntax
        break-at-old-keyword-breakpoints
        comma-arrow-breakpoints=1
        nocheck-syntax
@@ -1512,6 +1514,7 @@ sub generate_options {
        paren-vertical-tightness=0
        pass-version-line
        recombine
        paren-vertical-tightness=0
        pass-version-line
        recombine
+      valign
        short-concatenation-item-length=8
        space-for-semicolon
        square-bracket-tightness=1
        short-concatenation-item-length=8
        space-for-semicolon
        square-bracket-tightness=1
@@ -1544,15 +1547,16 @@ sub generate_options {
          'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
          'nooutdent-long-lines' =>
            [qw(nooutdent-long-quotes nooutdent-long-comments)],
          'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
          'nooutdent-long-lines' =>
            [qw(nooutdent-long-quotes nooutdent-long-comments)],
-        'noll'                => [qw(nooutdent-long-lines)],
-        'io'                  => [qw(indent-only)],
+        'noll' => [qw(nooutdent-long-lines)],
+        'io'   => [qw(indent-only)],
          'delete-all-comments' =>
            [qw(delete-block-comments delete-side-comments delete-pod)],
          'nodelete-all-comments' =>
            [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
          'delete-all-comments' =>
            [qw(delete-block-comments delete-side-comments delete-pod)],
          'nodelete-all-comments' =>
            [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
-        'dac'              => [qw(delete-all-comments)],
-        'ndac'             => [qw(nodelete-all-comments)],
-        'gnu'              => [qw(gnu-style)],
+        'dac'  => [qw(delete-all-comments)],
+        'ndac' => [qw(nodelete-all-comments)],
+        'gnu'  => [qw(gnu-style)],
+        'pbp'  => [qw(perl-best-practices)],
          'tee-all-comments' =>
            [qw(tee-block-comments tee-side-comments tee-pod)],
          'notee-all-comments' =>
          'tee-all-comments' =>
            [qw(tee-block-comments tee-side-comments tee-pod)],
          'notee-all-comments' =>
@@ -1568,6 +1572,8 @@ sub generate_options {
          'baa'                        => [qw(cab=0)],
          'nbaa'                       => [qw(cab=1)],
  
          'baa'                        => [qw(cab=0)],
          'nbaa'                       => [qw(cab=1)],
  
+        'break-at-old-trinary-breakpoints' => [qw(bot)],
+
          'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
          'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
          'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
          'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
          'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
          'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
@@ -1675,6 +1681,12 @@ sub generate_options {
                )
          ],
  
                )
          ],
  
+        # Style suggested in Damian Conway's Perl Best Practices
+        'perl-best-practices' => [
+            qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
+q(wbb=% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=)
+        ],
+
          # Additional styles can be added here
      );
  
          # Additional styles can be added here
      );
  
@@ -1831,7 +1843,7 @@ EOM
          # look for a config file if we don't have one yet
          my $rconfig_file_chatter;
          $$rconfig_file_chatter = "";
          # look for a config file if we don't have one yet
          my $rconfig_file_chatter;
          $$rconfig_file_chatter = "";
-        $config_file           =
+        $config_file =
            find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
              $rpending_complaint )
            unless $config_file;
            find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
              $rpending_complaint )
            unless $config_file;
@@ -1917,6 +1929,7 @@ EOM
                      }
                    )
                  {
                      }
                    )
                  {
+
                      if ( defined( $Opts{$_} ) ) {
                          delete $Opts{$_};
                          warn "ignoring --$_ in config file: $config_file\n";
                      if ( defined( $Opts{$_} ) ) {
                          delete $Opts{$_};
                          warn "ignoring --$_ in config file: $config_file\n";
@@ -2867,7 +2880,7 @@ Following Old Breakpoints
   -boc    break at old comma breaks: turns off all automatic list formatting
   -bol    break at old logical breakpoints: or, and, ||, && (default)
   -bok    break at old list keyword breakpoints such as map, sort (default)
   -boc    break at old comma breaks: turns off all automatic list formatting
   -bol    break at old logical breakpoints: or, and, ||, && (default)
   -bok    break at old list keyword breakpoints such as map, sort (default)
- -bot    break at old conditional (trinary ?:) operator breakpoints (default)
+ -bot    break at old conditional (ternary ?:) operator breakpoints (default)
   -cab=n  break at commas after a comma-arrow (=>):
           n=0 break at all commas after =>
           n=1 stable: break unless this breaks an existing one-line container
   -cab=n  break at commas after a comma-arrow (=>):
           n=0 break at all commas after =>
           n=1 stable: break unless this breaks an existing one-line container
@@ -3664,10 +3677,10 @@ sub make_line_information_string {
      my $line_information_string = "";
      if ($input_line_number) {
  
      my $line_information_string = "";
      if ($input_line_number) {
  
-        my $output_line_number       = $self->{_output_line_number};
-        my $brace_depth              = $line_of_tokens->{_curly_brace_depth};
-        my $paren_depth              = $line_of_tokens->{_paren_depth};
-        my $square_bracket_depth     = $line_of_tokens->{_square_bracket_depth};
+        my $output_line_number   = $self->{_output_line_number};
+        my $brace_depth          = $line_of_tokens->{_curly_brace_depth};
+        my $paren_depth          = $line_of_tokens->{_paren_depth};
+        my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
          my $python_indentation_level =
            $line_of_tokens->{_python_indentation_level};
          my $rlevels         = $line_of_tokens->{_rlevels};
          my $python_indentation_level =
            $line_of_tokens->{_python_indentation_level};
          my $rlevels         = $line_of_tokens->{_rlevels};
@@ -3683,13 +3696,13 @@ sub make_line_information_string {
          # for longer scripts it doesn't really matter
          my $extra_space = "";
          $extra_space .=
          # for longer scripts it doesn't really matter
          my $extra_space = "";
          $extra_space .=
-            ( $input_line_number < 10 ) ? "  "
+            ( $input_line_number < 10 )  ? "  "
            : ( $input_line_number < 100 ) ? " "
            : ( $input_line_number < 100 ) ? " "
-          : "";
+          :                                "";
          $extra_space .=
          $extra_space .=
-            ( $output_line_number < 10 ) ? "  "
+            ( $output_line_number < 10 )  ? "  "
            : ( $output_line_number < 100 ) ? " "
            : ( $output_line_number < 100 ) ? " "
-          : "";
+          :                                 "";
  
          # there are 2 possible nesting strings:
          # the original which looks like this:  (0 [1 {2
  
          # there are 2 possible nesting strings:
          # the original which looks like this:  (0 [1 {2
@@ -3863,14 +3876,14 @@ EOM
      elsif ( $saw_code_bug == 1 ) {
          if ( $self->{_saw_extrude} ) {
              $self->warning(<<EOM);
      elsif ( $saw_code_bug == 1 ) {
          if ( $self->{_saw_extrude} ) {
              $self->warning(<<EOM);
-You may have encountered a bug in perltidy.  However, since you are
-using the -extrude option, the problem may be with perl itself, which
-has occasional parsing problems with this type of file.  If you believe
-that the problem is with perltidy, and the problem is not listed in the
-BUGS file at http://perltidy.sourceforge.net, please report it so that
-it can be corrected.  Include the smallest possible script which has the
-problem, along with the .LOG file. See the manual pages for contact
-information.
+
+You may have encountered a bug in perltidy.  However, since you are using the
+-extrude option, the problem may be with perl or one of its modules, which have
+occasional problems with this type of file.  If you believe that the
+problem is with perltidy, and the problem is not listed in the BUGS file at
+http://perltidy.sourceforge.net, please report it so that it can be corrected.
+Include the smallest possible script which has the problem, along with the .LOG
+file. See the manual pages for contact information.
  Thank you!
  EOM
          }
  Thank you!
  EOM
          }
@@ -4823,7 +4836,7 @@ sub make_frame {
      # 1. Make the table of contents panel, with appropriate changes
      # to the anchor names
      my $src_frame_name = 'SRC';
      # 1. Make the table of contents panel, with appropriate changes
      # to the anchor names
      my $src_frame_name = 'SRC';
-    my $first_anchor   =
+    my $first_anchor =
        write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
          $src_frame_name );
  
        write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
          $src_frame_name );
  
@@ -5256,7 +5269,7 @@ sub write_line {
          elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
          elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
          elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
          elsif ( $line_type eq 'FORMAT' )     { $line_character = 'H' }
          elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
          elsif ( $line_type eq 'SYSTEM' )     { $line_character = 'c' }
-        elsif ( $line_type eq 'END_START' )  {
+        elsif ( $line_type eq 'END_START' ) {
              $line_character = 'k';
              $self->add_toc_item( '__END__', '__END__' );
          }
              $line_character = 'k';
              $self->add_toc_item( '__END__', '__END__' );
          }
@@ -5317,10 +5330,10 @@ EOM
      # add the line number if requested
      if ( $rOpts->{'html-line-numbers'} ) {
          my $extra_space .=
      # add the line number if requested
      if ( $rOpts->{'html-line-numbers'} ) {
          my $extra_space .=
-            ( $line_number < 10 ) ? "   "
+            ( $line_number < 10 )   ? "   "
            : ( $line_number < 100 )  ? "  "
            : ( $line_number < 1000 ) ? " "
            : ( $line_number < 100 )  ? "  "
            : ( $line_number < 1000 ) ? " "
-          : "";
+          :                           "";
          $html_line = $extra_space . $line_number . " " . $html_line;
      }
  
          $html_line = $extra_space . $line_number . " " . $html_line;
      }
  
@@ -5429,6 +5442,7 @@ use vars qw{
    $last_last_nonblank_token_to_go
    @nonblank_lines_at_depth
    $starting_in_quote
    $last_last_nonblank_token_to_go
    @nonblank_lines_at_depth
    $starting_in_quote
+  $ending_in_quote
  
    $in_format_skipping_section
    $format_skipping_pattern_begin
  
    $in_format_skipping_section
    $format_skipping_pattern_begin
@@ -5449,7 +5463,6 @@ use vars qw{
    $added_semicolon_count
    $first_added_semicolon_at
    $last_added_semicolon_at
    $added_semicolon_count
    $first_added_semicolon_at
    $last_added_semicolon_at
-  $saw_negative_indentation
    $first_tabbing_disagreement
    $last_tabbing_disagreement
    $in_tabbing_disagreement
    $first_tabbing_disagreement
    $last_tabbing_disagreement
    $in_tabbing_disagreement
@@ -5499,6 +5512,7 @@ use vars qw{
    %is_assignment
    %is_chain_operator
    %is_if_unless_and_or_last_next_redo_return
    %is_assignment
    %is_chain_operator
    %is_if_unless_and_or_last_next_redo_return
+  %is_until_while_for_if_elsif_else
  
    @has_broken_sublist
    @dont_align
  
    @has_broken_sublist
    @dont_align
@@ -5539,7 +5553,7 @@ use vars qw{
    $rOpts_break_at_old_keyword_breakpoints
    $rOpts_break_at_old_comma_breakpoints
    $rOpts_break_at_old_logical_breakpoints
    $rOpts_break_at_old_keyword_breakpoints
    $rOpts_break_at_old_comma_breakpoints
    $rOpts_break_at_old_logical_breakpoints
-  $rOpts_break_at_old_trinary_breakpoints
+  $rOpts_break_at_old_ternary_breakpoints
    $rOpts_closing_side_comment_else_flag
    $rOpts_closing_side_comment_maximum_text
    $rOpts_continuation_indentation
    $rOpts_closing_side_comment_else_flag
    $rOpts_closing_side_comment_maximum_text
    $rOpts_continuation_indentation
@@ -5615,6 +5629,10 @@ BEGIN {
      @_ = qw(is if unless and or err last next redo return);
      @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
  
      @_ = qw(is if unless and or err last next redo return);
      @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
  
+    # always break after a closing curly of these block types:
+    @_ = qw(until while for if elsif else);
+    @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
+
      @_ = qw(last next redo return);
      @is_last_next_redo_return{@_} = (1) x scalar(@_);
  
      @_ = qw(last next redo return);
      @is_last_next_redo_return{@_} = (1) x scalar(@_);
  
@@ -5808,7 +5826,6 @@ sub new {
      @want_comma_break   = ();
  
      @ci_stack                   = ("");
      @want_comma_break   = ();
  
      @ci_stack                   = ("");
-    $saw_negative_indentation   = 0;
      $first_tabbing_disagreement = 0;
      $last_tabbing_disagreement  = 0;
      $tabbing_disagreement_count = 0;
      $first_tabbing_disagreement = 0;
      $last_tabbing_disagreement  = 0;
      $tabbing_disagreement_count = 0;
@@ -6123,7 +6140,7 @@ sub set_leading_whitespace {
          my $space_count     = 0;
          my $available_space = 0;
          $level = -1;    # flag to prevent storing in item_list
          my $space_count     = 0;
          my $available_space = 0;
          $level = -1;    # flag to prevent storing in item_list
-        $leading_spaces_to_go[$max_index_to_go]   =
+        $leading_spaces_to_go[$max_index_to_go] =
            $reduced_spaces_to_go[$max_index_to_go] =
            new_lp_indentation_item( $space_count, $level, $ci_level,
              $available_space, 0 );
            $reduced_spaces_to_go[$max_index_to_go] =
            new_lp_indentation_item( $space_count, $level, $ci_level,
              $available_space, 0 );
@@ -6152,17 +6169,33 @@ sub set_leading_whitespace {
              # find the position if we break at the '='
              my $i_test = $last_equals;
              if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
              # find the position if we break at the '='
              my $i_test = $last_equals;
              if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
+
+            # TESTING
+            ##my $too_close = ($i_test==$max_index_to_go-1);
+
              my $test_position = total_line_length( $i_test, $max_index_to_go );
  
              if (
  
              my $test_position = total_line_length( $i_test, $max_index_to_go );
  
              if (
  
+                # the equals is not just before an open paren (testing)
+                ##!$too_close &&
+
                  # if we are beyond the midpoint
                  $gnu_position_predictor > $half_maximum_line_length
  
                  # if we are beyond the midpoint
                  $gnu_position_predictor > $half_maximum_line_length
  
-                # or if we can save some space by breaking at the '='
-                # without obscuring the second line by the first
-                || ( $test_position > 1 +
-                    total_line_length( $line_start_index_to_go, $last_equals ) )
+                # or we are beyont the 1/4 point and there was an old
+                # break at the equals
+                || (
+                    $gnu_position_predictor > $half_maximum_line_length / 2
+                    && (
+                        $old_breakpoint_to_go[$last_equals]
+                        || (   $last_equals > 0
+                            && $old_breakpoint_to_go[ $last_equals - 1 ] )
+                        || (   $last_equals > 1
+                            && $types_to_go[ $last_equals - 1 ] eq 'b'
+                            && $old_breakpoint_to_go[ $last_equals - 2 ] )
+                    )
+                )
                )
              {
  
                )
              {
  
@@ -6992,7 +7025,7 @@ EOM
      # Define here tokens which may follow the closing brace of a do statement
      # on the same line, as in:
      #   } while ( $something);
      # Define here tokens which may follow the closing brace of a do statement
      # on the same line, as in:
      #   } while ( $something);
-    @_ = qw(until while unless if ; );
+    @_ = qw(until while unless if ; : );
      push @_, ',';
      @is_do_follower{@_} = (1) x scalar(@_);
  
      push @_, ',';
      @is_do_follower{@_} = (1) x scalar(@_);
  
@@ -7012,14 +7045,14 @@ EOM
      %is_else_brace_follower = ();
  
      # what can follow a multi-line anonymous sub definition closing curly:
      %is_else_brace_follower = ();
  
      # what can follow a multi-line anonymous sub definition closing curly:
-    @_ = qw# ; : => or and  && || ) #;
+    @_ = qw# ; : => or and  && || ~~ ) #;
      push @_, ',';
      @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
  
      # what can follow a one-line anonynomous sub closing curly:
      # one-line anonumous subs also have ']' here...
      # see tk3.t and PP.pm
      push @_, ',';
      @is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
  
      # what can follow a one-line anonynomous sub closing curly:
      # one-line anonumous subs also have ']' here...
      # see tk3.t and PP.pm
-    @_ = qw#  ; : => or and  && || ) ] #;
+    @_ = qw#  ; : => or and  && || ) ] ~~ #;
      push @_, ',';
      @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
  
      push @_, ',';
      @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
  
@@ -7085,15 +7118,15 @@ EOM
      );
  
      # frequently used parameters
      );
  
      # frequently used parameters
-    $rOpts_add_newlines                   = $rOpts->{'add-newlines'};
-    $rOpts_add_whitespace                 = $rOpts->{'add-whitespace'};
-    $rOpts_block_brace_tightness          = $rOpts->{'block-brace-tightness'};
+    $rOpts_add_newlines          = $rOpts->{'add-newlines'};
+    $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
+    $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
      $rOpts_block_brace_vertical_tightness =
        $rOpts->{'block-brace-vertical-tightness'};
      $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
      $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
      $rOpts_block_brace_vertical_tightness =
        $rOpts->{'block-brace-vertical-tightness'};
      $rOpts_brace_left_and_indent   = $rOpts->{'brace-left-and-indent'};
      $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
-    $rOpts_break_at_old_trinary_breakpoints =
-      $rOpts->{'break-at-old-trinary-breakpoints'};
+    $rOpts_break_at_old_ternary_breakpoints =
+      $rOpts->{'break-at-old-ternary-breakpoints'};
      $rOpts_break_at_old_comma_breakpoints =
        $rOpts->{'break-at-old-comma-breakpoints'};
      $rOpts_break_at_old_keyword_breakpoints =
      $rOpts_break_at_old_comma_breakpoints =
        $rOpts->{'break-at-old-comma-breakpoints'};
      $rOpts_break_at_old_keyword_breakpoints =
@@ -7425,8 +7458,18 @@ EOM
          # for avoiding syntax problems rather than for formatting.
          my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
  
          # for avoiding syntax problems rather than for formatting.
          my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
  
-        # never combine two bare words or numbers
-        my $result = ( ( $tokenr =~ /^[\'\w]/ ) && ( $tokenl =~ /[\'\w]$/ ) )
+        my $result =
+
+          # never combine two bare words or numbers
+          # examples:  and ::ok(1)
+          #            return ::spw(...)
+          #            for bla::bla:: abc
+          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+          #            $input eq"quit" to make $inputeq"quit"
+          #            my $size=-s::SINK if $file;  <==OK but we won't do it
+          # don't join something like: for bla::bla:: abc
+          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
+          ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
  
            # do not combine a number with a concatination dot
            # example: pom.caputo:
  
            # do not combine a number with a concatination dot
            # example: pom.caputo:
@@ -7479,7 +7522,11 @@ EOM
  
            # retain any space after possible filehandle
            # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
  
            # retain any space after possible filehandle
            # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
-          || ( $typel eq 'Z' || $typell eq 'Z' )
+          || ( $typel eq 'Z' )
+
+          # Perl is sensitive to whitespace after the + here:
+          #  $b = xvals $a + 0.1 * yvals $a;
+          || ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
  
            # keep paren separate in 'use Foo::Bar ()'
            || ( $tokenr eq '('
  
            # keep paren separate in 'use Foo::Bar ()'
            || ( $tokenr eq '('
@@ -7526,9 +7573,6 @@ EOM
            #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
            || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
  
            #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
            || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
  
-          # don't join something like: for bla::bla:: abc
-          # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
-          || ( $tokenl =~ /\:\:$/ && ( $tokenr =~ /^[\'\w]/ ) )
            ;    # the value of this long logic sequence is the result we want
          return $result;
      }
            ;    # the value of this long logic sequence is the result we want
          return $result;
      }
@@ -7587,7 +7631,7 @@ sub set_white_space_flag {
  
          my @spaces_both_sides = qw"
            + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
  
          my @spaces_both_sides = qw"
            + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
-          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>=
+          .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~
            &&= ||= //= <=> A k f w F n C Y U G v
            ";
  
            &&= ||= //= <=> A k f w F n C Y U G v
            ";
  
@@ -7883,7 +7927,7 @@ sub set_white_space_flag {
  
          # patch for SWITCH/CASE: make space at ']{' optional
          # since the '{' might begin a case or when block
  
          # patch for SWITCH/CASE: make space at ']{' optional
          # since the '{' might begin a case or when block
-        elsif ( $token eq '{' && $last_token eq ']' ) {
+        elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
              $ws = WS_OPTIONAL;
          }
  
              $ws = WS_OPTIONAL;
          }
  
@@ -7932,8 +7976,13 @@ sub set_white_space_flag {
          elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
  
          # always preserver whatever space was used after a possible
          elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
  
          # always preserver whatever space was used after a possible
-        # filehandle or here doc operator
-        if ( $type ne '#' && ( $last_type eq 'Z' || $last_type eq 'h' ) ) {
+        # filehandle (except _) or here doc operator
+        if (
+            $type ne '#'
+            && ( ( $last_type eq 'Z' && $last_token ne '_' )
+                || $last_type eq 'h' )
+          )
+        {
              $ws = WS_OPTIONAL;
          }
  
              $ws = WS_OPTIONAL;
          }
  
@@ -8110,7 +8159,7 @@ sub set_white_space_flag {
          # If this becomes too much of a problem, we might give up and just clip
          # them at zero.
          ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
          # If this becomes too much of a problem, we might give up and just clip
          # them at zero.
          ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
-        $levels_to_go[$max_index_to_go]        = $level;
+        $levels_to_go[$max_index_to_go] = $level;
          $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
          $lengths_to_go[ $max_index_to_go + 1 ] =
            $lengths_to_go[$max_index_to_go] + length($token);
          $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
          $lengths_to_go[ $max_index_to_go + 1 ] =
            $lengths_to_go[$max_index_to_go] + length($token);
@@ -8165,16 +8214,6 @@ sub set_white_space_flag {
          return;
      }
  
          return;
      }
  
-    my %is_until_while_for_if_elsif_else;
-
-    BEGIN {
-
-        # always break after a closing curly of these block types:
-        @_ = qw(until while for if elsif else);
-        @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
-
-    }
-
      sub print_line_of_tokens {
  
          my $line_of_tokens = shift;
      sub print_line_of_tokens {
  
          my $line_of_tokens = shift;
@@ -8214,7 +8253,8 @@ sub set_white_space_flag {
  
          $in_continued_quote = $starting_in_quote =
            $line_of_tokens->{_starting_in_quote};
  
          $in_continued_quote = $starting_in_quote =
            $line_of_tokens->{_starting_in_quote};
-        $in_quote                 = $line_of_tokens->{_ending_in_quote};
+        $in_quote        = $line_of_tokens->{_ending_in_quote};
+        $ending_in_quote = $in_quote;
          $python_indentation_level =
            $line_of_tokens->{_python_indentation_level};
  
          $python_indentation_level =
            $line_of_tokens->{_python_indentation_level};
  
@@ -8316,7 +8356,7 @@ sub set_white_space_flag {
              && $rOpts->{'static-block-comments'}
              && $input_line =~ /$static_block_comment_pattern/o )
          {
              && $rOpts->{'static-block-comments'}
              && $input_line =~ /$static_block_comment_pattern/o )
          {
-            $is_static_block_comment                       = 1;
+            $is_static_block_comment = 1;
              $is_static_block_comment_without_leading_space =
                substr( $input_line, 0, 1 ) eq '#';
          }
              $is_static_block_comment_without_leading_space =
                substr( $input_line, 0, 1 ) eq '#';
          }
@@ -8423,7 +8463,7 @@ sub set_white_space_flag {
          #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
          #   Examples:
          #     *VERSION = \'1.01';
          #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
          #   Examples:
          #     *VERSION = \'1.01';
-        #     ( $VERSION ) = '$Revision: 1.49 $ ' =~ /\$Revision:\s+([^\s]+)/;
+        #     ( $VERSION ) = '$Revision: 1.56 $ ' =~ /\$Revision:\s+([^\s]+)/;
          #   We will pass such a line straight through without breaking
          #   it unless -npvl is used
  
          #   We will pass such a line straight through without breaking
          #   it unless -npvl is used
  
@@ -8442,10 +8482,11 @@ sub set_white_space_flag {
          }
  
          # take care of indentation-only
          }
  
          # take care of indentation-only
-        # also write a line which is entirely a 'qw' list
-        if ( $rOpts->{'indent-only'}
-            || ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq 'q' ) ) )
-        {
+        # NOTE: In previous versions we sent all qw lines out immediately here.
+        # No longer doing this: also write a line which is entirely a 'qw' list
+        # to allow stacking of opening and closing tokens.  Note that interior
+        # qw lines will still go out at the end of this routine.
+        if ( $rOpts->{'indent-only'} ) {
              flush();
              $input_line =~ s/^\s*//;    # trim left end
              $input_line =~ s/\s*$//;    # trim right end
              flush();
              $input_line =~ s/^\s*//;    # trim left end
              $input_line =~ s/\s*$//;    # trim right end
@@ -8875,7 +8916,14 @@ sub set_white_space_flag {
                      #
                      # But make a line break if the curly ends a
                      # significant block:
                      #
                      # But make a line break if the curly ends a
                      # significant block:
-                    if ( $is_until_while_for_if_elsif_else{$block_type} ) {
+                    ##if ( $is_until_while_for_if_elsif_else{$block_type} ) {
+                    if (
+                        $is_block_without_semicolon{$block_type}
+
+                        # if needless semicolon follows we handle it later
+                        && $next_nonblank_token ne ';'
+                      )
+                    {
                          output_line_to_go() unless ($no_internal_newlines);
                      }
                  }
                          output_line_to_go() unless ($no_internal_newlines);
                      }
                  }
@@ -8911,11 +8959,6 @@ sub set_white_space_flag {
                      }
                  }
  
                      }
                  }
  
-                # TESTING ONLY for SWITCH/CASE - this is where to start
-                # recoding to retain else's on the same line as a case,
-                # but there is a lot more that would need to be done.
-                ##elsif ($block_type eq 'case') {$rbrace_follower = {else=>1};}
-
                  # None of the above: specify what can follow a closing
                  # brace of a block which is not an
                  # if/elsif/else/do/sort/map/grep/eval
                  # None of the above: specify what can follow a closing
                  # brace of a block which is not an
                  # if/elsif/else/do/sort/map/grep/eval
@@ -9087,7 +9130,9 @@ sub set_white_space_flag {
              # if there is a side comment
              ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
  
              # if there is a side comment
              ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
  
-            # if this line which ends in a quote
+            # if this line ends in a quote
+            # NOTE: This is critically important for insuring that quoted lines
+            # do not get processed by things like -sot and -sct
              || $in_quote
  
              # if this is a VERSION statement
              || $in_quote
  
              # if this is a VERSION statement
@@ -9239,8 +9284,8 @@ sub starting_one_line_block {
      for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
  
          # old whitespace could be arbitrarily large, so don't use it
      for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
  
          # old whitespace could be arbitrarily large, so don't use it
-        if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
-        else { $pos += length( $$rtokens[$i] ) }
+        if   ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
+        else                              { $pos += length( $$rtokens[$i] ) }
  
          # Return false result if we exceed the maximum line length,
          if ( $pos > $rOpts_maximum_line_length ) {
  
          # Return false result if we exceed the maximum line length,
          if ( $pos > $rOpts_maximum_line_length ) {
@@ -9463,7 +9508,7 @@ sub set_logical_padding {
                      # we might pad token $ibeg, so be sure that it
                      # is at the same depth as the next line.
                      next
                      # we might pad token $ibeg, so be sure that it
                      # is at the same depth as the next line.
                      next
-                      if ( $nesting_depth_to_go[ $ibeg + 1 ] !=
+                      if ( $nesting_depth_to_go[$ibeg] !=
                          $nesting_depth_to_go[$ibeg_next] );
  
                      # We can pad on line 1 of a statement if at least 3
                          $nesting_depth_to_go[$ibeg_next] );
  
                      # We can pad on line 1 of a statement if at least 3
@@ -9854,7 +9899,7 @@ sub correct_lp_indentation {
                  # then we are probably vertically aligned.  We could set
                  # an exact flag in sub scan_list, but this is good
                  # enough.
                  # then we are probably vertically aligned.  We could set
                  # an exact flag in sub scan_list, but this is good
                  # enough.
-                my $indentation_count     = keys %saw_indentation;
+                my $indentation_count = keys %saw_indentation;
                  my $is_vertically_aligned =
                    (      $i == $ibeg
                        && $first_line_comma_count > 1
                  my $is_vertically_aligned =
                    (      $i == $ibeg
                        && $first_line_comma_count > 1
@@ -9904,11 +9949,10 @@ sub flush {
      Perl::Tidy::VerticalAligner::flush();
  }
  
      Perl::Tidy::VerticalAligner::flush();
  }
  
-# output_line_to_go sends one logical line of tokens on down the
+# sub output_line_to_go sends one logical line of tokens on down the
  # pipeline to the VerticalAligner package, breaking the line into continuation
  # lines as necessary.  The line of tokens is ready to go in the "to_go"
  # arrays.
  # pipeline to the VerticalAligner package, breaking the line into continuation
  # lines as necessary.  The line of tokens is ready to go in the "to_go"
  # arrays.
-
  sub output_line_to_go {
  
      # debug stuff; this routine can be called from many points
  sub output_line_to_go {
  
      # debug stuff; this routine can be called from many points
@@ -9937,6 +9981,45 @@ sub output_line_to_go {
      # any unfinished items in its stack
      finish_lp_batch();
  
      # any unfinished items in its stack
      finish_lp_batch();
  
+    # If this line ends in a code block brace, set breaks at any
+    # previous closing code block braces to breakup a chain of code
+    # blocks on one line.  This is very rare but can happen for
+    # user-defined subs.  For example we might be looking at this:
+    #  BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
+    my $saw_good_break = 0;    # flag to force breaks even if short line
+    if (
+
+        # looking for opening or closing block brace
+        $block_type_to_go[$max_index_to_go]
+
+        # but not one of these which are never duplicated on a line:
+        ##&& !$is_until_while_for_if_elsif_else{ $block_type_to_go
+        ##      [$max_index_to_go] }
+        && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
+      )
+    {
+        my $lev = $nesting_depth_to_go[$max_index_to_go];
+
+        # Walk backwards from the end and
+        # set break at any closing block braces at the same level.
+        # But quit if we are not in a chain of blocks.
+        for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
+            last if ( $levels_to_go[$i] < $lev );    # stop at a lower level
+            next if ( $levels_to_go[$i] > $lev );    # skip past higher level
+
+            if ( $block_type_to_go[$i] ) {
+                if ( $tokens_to_go[$i] eq '}' ) {
+                    set_forced_breakpoint($i);
+                    $saw_good_break = 1;
+                }
+            }
+
+            # quit if we see anything besides words, function, blanks
+            # at this level
+            elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
+        }
+    }
+
      my $imin = 0;
      my $imax = $max_index_to_go;
  
      my $imin = 0;
      my $imax = $max_index_to_go;
  
@@ -9967,7 +10050,9 @@ sub output_line_to_go {
  
              # break before all package declarations
              # MCONVERSION LOCATION - for tokenizaton change
  
              # break before all package declarations
              # MCONVERSION LOCATION - for tokenizaton change
-            elsif ( $leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) {
+            elsif ($leading_token =~ /^(package\s)/
+                && $leading_type eq 'i' )
+            {
                  $want_blank = ( $rOpts->{'blanks-before-subs'} );
              }
  
                  $want_blank = ( $rOpts->{'blanks-before-subs'} );
              }
  
@@ -9980,8 +10065,9 @@ sub output_line_to_go {
                    );
              }
  
                    );
              }
  
-            # Break before certain block types if we haven't had a break at this
-            # level for a while.  This is the difficult decision..
+            # Break before certain block types if we haven't had a
+            # break at this level for a while.  This is the
+            # difficult decision..
              elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
                  && $leading_type eq 'k' )
              {
              elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
                  && $leading_type eq 'k' )
              {
@@ -10033,8 +10119,7 @@ sub output_line_to_go {
          pad_array_to_go();
  
          # set all forced breakpoints for good list formatting
          pad_array_to_go();
  
          # set all forced breakpoints for good list formatting
-        my $saw_good_break = 0;
-        my $is_long_line   = excess_line_length( $imin, $max_index_to_go ) > 0;
+        my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
  
          if (
              $max_index_to_go > 0
  
          if (
              $max_index_to_go > 0
@@ -10050,7 +10135,7 @@ sub output_line_to_go {
              )
            )
          {
              )
            )
          {
-            $saw_good_break = scan_list();
+            $saw_good_break ||= scan_list();
          }
  
          # let $ri_first and $ri_last be references to lists of
          }
  
          # let $ri_first and $ri_last be references to lists of
@@ -10130,8 +10215,8 @@ sub set_block_text_accumulator {
      if ( $accumulating_text_for_block !~ /^els/ ) {
          $rleading_block_if_elsif_text = [];
      }
      if ( $accumulating_text_for_block !~ /^els/ ) {
          $rleading_block_if_elsif_text = [];
      }
-    $leading_block_text             = "";
-    $leading_block_text_level       = $levels_to_go[$i];
+    $leading_block_text       = "";
+    $leading_block_text_level = $levels_to_go[$i];
      $leading_block_text_line_number =
        $vertical_aligner_object->get_output_line_number();
      $leading_block_text_length_exceeded = 0;
      $leading_block_text_line_number =
        $vertical_aligner_object->get_output_line_number();
      $leading_block_text_length_exceeded = 0;
@@ -10473,6 +10558,13 @@ sub add_closing_side_comment {
          && $block_type_to_go[$i_terminal] =~
          /$closing_side_comment_list_pattern/o
  
          && $block_type_to_go[$i_terminal] =~
          /$closing_side_comment_list_pattern/o
  
+        # .. but not an anonymous sub
+        # These are not normally of interest, and their closing braces are
+        # often followed by commas or semicolons anyway.  This also avoids
+        # possible erratic output due to line numbering inconsistencies
+        # in the cases where their closing braces terminate a line.
+        && $block_type_to_go[$i_terminal] ne 'sub'
+
          # ..and the corresponding opening brace must is not in this batch
          # (because we do not need to tag one-line blocks, although this
          # should also be caught with a positive -csci value)
          # ..and the corresponding opening brace must is not in this batch
          # (because we do not need to tag one-line blocks, although this
          # should also be caught with a positive -csci value)
@@ -10589,9 +10681,9 @@ sub add_closing_side_comment {
          else {
  
              # insert the new side comment into the output token stream
          else {
  
              # insert the new side comment into the output token stream
-            my $type                  = '#';
-            my $block_type            = '';
-            my $type_sequence         = '';
+            my $type          = '#';
+            my $block_type    = '';
+            my $type_sequence = '';
              my $container_environment =
                $container_environment_to_go[$max_index_to_go];
              my $level                = $levels_to_go[$max_index_to_go];
              my $container_environment =
                $container_environment_to_go[$max_index_to_go];
              my $level                = $levels_to_go[$max_index_to_go];
@@ -10635,6 +10727,9 @@ sub send_lines_to_vertical_aligner {
  
      my $rindentation_list = [0];    # ref to indentations for each line
  
  
      my $rindentation_list = [0];    # ref to indentations for each line
  
+    # define the array @matching_token_to_go for the output tokens
+    # which will be non-blank for each special token (such as =>)
+    # for which alignment is required.
      set_vertical_alignment_markers( $ri_first, $ri_last );
  
      # flush if necessary to avoid unwanted alignment
      set_vertical_alignment_markers( $ri_first, $ri_last );
  
      # flush if necessary to avoid unwanted alignment
@@ -10751,7 +10846,7 @@ sub send_lines_to_vertical_aligner {
                  # Mark most things before arrows as a quote to
                  # get them to line up. Testfile: mixed.pl.
                  if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
                  # Mark most things before arrows as a quote to
                  # get them to line up. Testfile: mixed.pl.
                  if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
-                    my $next_type       = $types_to_go[ $i + 1 ];
+                    my $next_type = $types_to_go[ $i + 1 ];
                      my $i_next_nonblank =
                        ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
  
                      my $i_next_nonblank =
                        ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
  
@@ -10782,8 +10877,8 @@ sub send_lines_to_vertical_aligner {
          # done with this line .. join text of tokens to make the last field
          push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
  
          # done with this line .. join text of tokens to make the last field
          push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
  
-        my ( $indentation, $lev, $level_end, $is_semicolon_terminated,
-            $is_outdented_line )
+        my ( $indentation, $lev, $level_end, $terminal_type,
+            $is_semicolon_terminated, $is_outdented_line )
            = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
              $ri_first, $ri_last, $rindentation_list );
  
            = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
              $ri_first, $ri_last, $rindentation_list );
  
@@ -10813,6 +10908,17 @@ sub send_lines_to_vertical_aligner {
          # flush an outdented line to avoid any unwanted vertical alignment
          Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
  
          # flush an outdented line to avoid any unwanted vertical alignment
          Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
  
+        my $is_terminal_ternary = 0;
+        if (   $tokens_to_go[$ibeg] eq ':'
+            || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
+        {
+            if (   ( $terminal_type eq ';' && $level_end <= $lev )
+                || ( $level_end < $lev ) )
+            {
+                $is_terminal_ternary = 1;
+            }
+        }
+
          # send this new line down the pipe
          my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
          Perl::Tidy::VerticalAligner::append_line(
          # send this new line down the pipe
          my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
          Perl::Tidy::VerticalAligner::append_line(
@@ -10824,6 +10930,7 @@ sub send_lines_to_vertical_aligner {
              \@patterns,
              $forced_breakpoint_to_go[$iend] || $in_comma_list,
              $outdent_long_lines,
              \@patterns,
              $forced_breakpoint_to_go[$iend] || $in_comma_list,
              $outdent_long_lines,
+            $is_terminal_ternary,
              $is_semicolon_terminated,
              $do_not_pad,
              $rvertical_tightness_flags,
              $is_semicolon_terminated,
              $do_not_pad,
              $rvertical_tightness_flags,
@@ -10980,6 +11087,13 @@ sub get_opening_indentation {
              if ( $saved_opening_indentation{$seqno} ) {
                  ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
              }
              if ( $saved_opening_indentation{$seqno} ) {
                  ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
              }
+
+            # some kind of serious error
+            # (example is badfile.t)
+            else {
+                $indent = 0;
+                $offset = 0;
+            }
          }
  
          # if no sequence number it must be an unbalanced container
          }
  
          # if no sequence number it must be an unbalanced container
@@ -11140,9 +11254,9 @@ sub lookup_opening_indentation {
              if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
                  && $i_terminal == $ibeg )
              {
              if (   $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
                  && $i_terminal == $ibeg )
              {
-                my $ci              = $ci_levels_to_go[$ibeg];
-                my $lev             = $levels_to_go[$ibeg];
-                my $next_type       = $types_to_go[ $ibeg + 1 ];
+                my $ci        = $ci_levels_to_go[$ibeg];
+                my $lev       = $levels_to_go[$ibeg];
+                my $next_type = $types_to_go[ $ibeg + 1 ];
                  my $i_next_nonblank =
                    ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
                  if (   $i_next_nonblank <= $max_index_to_go
                  my $i_next_nonblank =
                    ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
                  if (   $i_next_nonblank <= $max_index_to_go
@@ -11429,8 +11543,8 @@ sub lookup_opening_indentation {
              }
          }
  
              }
          }
  
-        return ( $indentation, $lev, $level_end, $is_semicolon_terminated,
-            $is_outdented_line );
+        return ( $indentation, $lev, $level_end, $terminal_type,
+            $is_semicolon_terminated, $is_outdented_line );
      }
  }
  
      }
  }
  
@@ -11454,7 +11568,7 @@ sub set_vertical_tightness_flags {
      # These flags are used by sub set_leading_whitespace in
      # the vertical aligner
  
      # These flags are used by sub set_leading_whitespace in
      # the vertical aligner
  
-    my $rvertical_tightness_flags;
+    my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
  
      # For non-BLOCK tokens, we will need to examine the next line
      # too, so we won't consider the last line.
  
      # For non-BLOCK tokens, we will need to examine the next line
      # too, so we won't consider the last line.
@@ -11599,7 +11713,7 @@ sub set_vertical_tightness_flags {
          # patch to make something like 'qw(' behave like an opening paren
          # (aran.t)
          if ( $types_to_go[$ibeg_next] eq 'q' ) {
          # patch to make something like 'qw(' behave like an opening paren
          # (aran.t)
          if ( $types_to_go[$ibeg_next] eq 'q' ) {
-            if ( $token_beg_next =~ /^q.([\[\(\{])$/ ) {
+            if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
                  $token_beg_next = $1;
              }
          }
                  $token_beg_next = $1;
              }
          }
@@ -11661,9 +11775,34 @@ sub set_vertical_tightness_flags {
            ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
      }
  
            ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
      }
  
+    # pack in the sequence numbers of the ends of this line
+    $rvertical_tightness_flags->[4] = get_seqno($ibeg);
+    $rvertical_tightness_flags->[5] = get_seqno($iend);
      return $rvertical_tightness_flags;
  }
  
      return $rvertical_tightness_flags;
  }
  
+sub get_seqno {
+
+    # get opening and closing sequence numbers of a token for the vertical
+    # aligner.  Assign qw quotes a value to allow qw opening and closing tokens
+    # to be treated somewhat like opening and closing tokens for stacking
+    # tokens by the vertical aligner.
+    my ($ii) = @_;
+    my $seqno = $type_sequence_to_go[$ii];
+    if ( $types_to_go[$ii] eq 'q' ) {
+        my $SEQ_QW = -1;
+        if ( $ii > 0 ) {
+            $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
+        }
+        else {
+            if ( !$ending_in_quote ) {
+                $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
+            }
+        }
+    }
+    return ($seqno);
+}
+
  {
      my %is_vertical_alignment_type;
      my %is_vertical_alignment_keyword;
  {
      my %is_vertical_alignment_type;
      my %is_vertical_alignment_keyword;
@@ -11672,7 +11811,7 @@ sub set_vertical_tightness_flags {
  
          @_ = qw#
            = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
  
          @_ = qw#
            = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
-          { ? : => =~ && || //
+          { ? : => =~ && || // ~~
            #;
          @is_vertical_alignment_type{@_} = (1) x scalar(@_);
  
            #;
          @is_vertical_alignment_type{@_} = (1) x scalar(@_);
  
@@ -11682,8 +11821,12 @@ sub set_vertical_tightness_flags {
  
      sub set_vertical_alignment_markers {
  
  
      sub set_vertical_alignment_markers {
  
-        # Look at the tokens in this output batch and define the array
-        # 'matching_token_to_go' which marks tokens at which we would
+        # This routine takes the first step toward vertical alignment of the
+        # lines of output text.  It looks for certain tokens which can serve as
+        # vertical alignment markers (such as an '=').
+        #
+        # Method: We look at each token $i in this output batch and set
+        # $matching_token_to_go[$i] equal to those tokens at which we would
          # accept vertical alignment.
  
          # nothing to do if we aren't allowed to change whitespace
          # accept vertical alignment.
  
          # nothing to do if we aren't allowed to change whitespace
@@ -11696,6 +11839,14 @@ sub set_vertical_tightness_flags {
  
          my ( $ri_first, $ri_last ) = @_;
  
  
          my ( $ri_first, $ri_last ) = @_;
  
+        # remember the index of last nonblank token before any sidecomment
+        my $i_terminal = $max_index_to_go;
+        if ( $types_to_go[$i_terminal] eq '#' ) {
+            if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
+                if ( $i_terminal > 0 ) { --$i_terminal }
+            }
+        }
+
          # look at each line of this batch..
          my $last_vertical_alignment_before_index;
          my $vert_last_nonblank_type;
          # look at each line of this batch..
          my $last_vertical_alignment_before_index;
          my $vert_last_nonblank_type;
@@ -11704,6 +11855,7 @@ sub set_vertical_tightness_flags {
          my $max_line = @$ri_first - 1;
          my ( $i, $type, $token, $block_type, $alignment_type );
          my ( $ibeg, $iend, $line );
          my $max_line = @$ri_first - 1;
          my ( $i, $type, $token, $block_type, $alignment_type );
          my ( $ibeg, $iend, $line );
+
          foreach $line ( 0 .. $max_line ) {
              $ibeg                                 = $$ri_first[$line];
              $iend                                 = $$ri_last[$line];
          foreach $line ( 0 .. $max_line ) {
              $ibeg                                 = $$ri_first[$line];
              $iend                                 = $$ri_last[$line];
@@ -11735,12 +11887,10 @@ sub set_vertical_tightness_flags {
                  # align before the first token and 2) the second
                  # token must be a blank if we are to align before
                  # the third
                  # align before the first token and 2) the second
                  # token must be a blank if we are to align before
                  # the third
-                if ( $i < $ibeg + 2 ) {
-                }
+                if ( $i < $ibeg + 2 ) { }
  
                  # must follow a blank token
  
                  # must follow a blank token
-                elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
-                }
+                elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
  
                  # align a side comment --
                  elsif ( $type eq '#' ) {
  
                  # align a side comment --
                  elsif ( $type eq '#' ) {
@@ -11765,8 +11915,7 @@ sub set_vertical_tightness_flags {
  
                  # otherwise, do not align two in a row to create a
                  # blank field
  
                  # otherwise, do not align two in a row to create a
                  # blank field
-                elsif ( $last_vertical_alignment_before_index == $i - 2 ) {
-                }
+                elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
  
                  # align before one of these keywords
                  # (within a line, since $i>1)
  
                  # align before one of these keywords
                  # (within a line, since $i>1)
@@ -11783,6 +11932,30 @@ sub set_vertical_tightness_flags {
                  elsif ( $is_vertical_alignment_type{$type} ) {
                      $alignment_type = $token;
  
                  elsif ( $is_vertical_alignment_type{$type} ) {
                      $alignment_type = $token;
  
+                    # Do not align a terminal token.  Although it might
+                    # occasionally look ok to do this, it has been found to be
+                    # a good general rule.  The main problems are:
+                    # (1) that the terminal token (such as an = or :) might get
+                    # moved far to the right where it is hard to see because
+                    # nothing follows it, and
+                    # (2) doing so may prevent other good alignments.
+                    if ( $i == $iend || $i >= $i_terminal ) {
+                        $alignment_type = "";
+                    }
+
+                    # Do not align leading ': ('.  This would prevent
+                    # alignment in something like the following:
+                    #   $extra_space .=
+                    #       ( $input_line_number < 10 )  ? "  "
+                    #     : ( $input_line_number < 100 ) ? " "
+                    #     :                                "";
+                    if (   $i == $ibeg + 2
+                        && $types_to_go[$ibeg]    eq ':'
+                        && $types_to_go[ $i - 1 ] eq 'b' )
+                    {
+                        $alignment_type = "";
+                    }
+
                      # For a paren after keyword, only align something like this:
                      #    if    ( $a ) { &a }
                      #    elsif ( $b ) { &b }
                      # For a paren after keyword, only align something like this:
                      #    if    ( $a ) { &a }
                      #    elsif ( $b ) { &b }
@@ -11797,12 +11970,10 @@ sub set_vertical_tightness_flags {
                      # if ($token ne $type) {$alignment_type .= $type}
                  }
  
                      # if ($token ne $type) {$alignment_type .= $type}
                  }
  
-              # NOTE: This is deactivated until the new vertical aligner
-              # is finished because it causes the previous if/elsif alignment
-              # to fail
-              #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) {
-              #    $alignment_type = $type;
-              #}
+                # NOTE: This is deactivated because it causes the previous
+                # if/elsif alignment to fail
+                #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
+                #{ $alignment_type = $type; }
  
                  if ($alignment_type) {
                      $last_vertical_alignment_before_index = $i;
  
                  if ($alignment_type) {
                      $last_vertical_alignment_before_index = $i;
@@ -11977,14 +12148,14 @@ sub terminal_type {
  
              # make these a little weaker than nominal so that they get
              # favored for end-of-line characters
  
              # make these a little weaker than nominal so that they get
              # favored for end-of-line characters
-            @_                       = qw"!= == =~ !~";
-            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
+            @_ = qw"!= == =~ !~ ~~";
+            @left_bond_strength{@_} = (STRONG) x scalar(@_);
              @right_bond_strength{@_} =
                ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
  
              # break AFTER these
              @right_bond_strength{@_} =
                ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
  
              # break AFTER these
-            @_                       = qw" < >  | & >= <=";
-            @left_bond_strength{@_}  = (VERY_STRONG) x scalar(@_);
+            @_ = qw" < >  | & >= <=";
+            @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
              @right_bond_strength{@_} =
                ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
  
              @right_bond_strength{@_} =
                ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
  
@@ -12005,14 +12176,14 @@ sub terminal_type {
              $left_bond_strength{'G'}  = NOMINAL;
              $right_bond_strength{'G'} = STRONG;
  
              $left_bond_strength{'G'}  = NOMINAL;
              $right_bond_strength{'G'} = STRONG;
  
-            # it is very good to break AFTER various assignment operators
+            # it is good to break AFTER various assignment operators
              @_ = qw(
                = **= += *= &= <<= &&=
                -= /= |= >>= ||= //=
                .= %= ^=
                x=
              );
              @_ = qw(
                = **= += *= &= <<= &&=
                -= /= |= >>= ||= //=
                .= %= ^=
                x=
              );
-            @left_bond_strength{@_}  = (STRONG) x scalar(@_);
+            @left_bond_strength{@_} = (STRONG) x scalar(@_);
              @right_bond_strength{@_} =
                ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
  
              @right_bond_strength{@_} =
                ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
  
@@ -12654,10 +12825,10 @@ sub pad_array_to_go {
  
      # to simplify coding in scan_list and set_bond_strengths, it helps
      # to create some extra blank tokens at the end of the arrays
  
      # to simplify coding in scan_list and set_bond_strengths, it helps
      # to create some extra blank tokens at the end of the arrays
-    $tokens_to_go[ $max_index_to_go + 1 ]        = '';
-    $tokens_to_go[ $max_index_to_go + 2 ]        = '';
-    $types_to_go[ $max_index_to_go + 1 ]         = 'b';
-    $types_to_go[ $max_index_to_go + 2 ]         = 'b';
+    $tokens_to_go[ $max_index_to_go + 1 ] = '';
+    $tokens_to_go[ $max_index_to_go + 2 ] = '';
+    $types_to_go[ $max_index_to_go + 1 ]  = 'b';
+    $types_to_go[ $max_index_to_go + 2 ]  = 'b';
      $nesting_depth_to_go[ $max_index_to_go + 1 ] =
        $nesting_depth_to_go[$max_index_to_go];
  
      $nesting_depth_to_go[ $max_index_to_go + 1 ] =
        $nesting_depth_to_go[$max_index_to_go];
  
@@ -13028,7 +13199,7 @@ sub pad_array_to_go {
  
                          # TESTING: retain break at a ':' line break
                          if ( ( $i == $i_line_start || $i == $i_line_end )
  
                          # TESTING: retain break at a ':' line break
                          if ( ( $i == $i_line_start || $i == $i_line_end )
-                            && $rOpts_break_at_old_trinary_breakpoints )
+                            && $rOpts_break_at_old_ternary_breakpoints )
                          {
  
                              # TESTING:
                          {
  
                              # TESTING:
@@ -13105,7 +13276,7 @@ sub pad_array_to_go {
                  $rfor_semicolon_list[$depth]           = [];
                  $i_equals[$depth]                      = -1;
                  $want_comma_break[$depth]              = 0;
                  $rfor_semicolon_list[$depth]           = [];
                  $i_equals[$depth]                      = -1;
                  $want_comma_break[$depth]              = 0;
-                $container_type[$depth]                =
+                $container_type[$depth] =
                    ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
                    ? $last_nonblank_token
                    : "";
                    ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
                    ? $last_nonblank_token
                    : "";
@@ -13412,6 +13583,11 @@ sub pad_array_to_go {
                      if ( $rOpts_line_up_parentheses && $saw_opening_structure )
                      {
                          my $item = $leading_spaces_to_go[ $i_opening + 1 ];
                      if ( $rOpts_line_up_parentheses && $saw_opening_structure )
                      {
                          my $item = $leading_spaces_to_go[ $i_opening + 1 ];
+                        if (   $i_opening + 1 < $max_index_to_go
+                            && $types_to_go[ $i_opening + 1 ] eq 'b' )
+                        {
+                            $item = $leading_spaces_to_go[ $i_opening + 2 ];
+                        }
                          if ( defined($item) ) {
                              my $i_start_2 = $item->get_STARTING_INDEX();
                              if (
                          if ( defined($item) ) {
                              my $i_start_2 = $item->get_STARTING_INDEX();
                              if (
@@ -13904,7 +14080,7 @@ sub find_token_starting_list {
          # Looks like a list of items.  We have to look at it and size it up.
          #---------------------------------------------------------------
  
          # Looks like a list of items.  We have to look at it and size it up.
          #---------------------------------------------------------------
  
-        my $opening_token       = $tokens_to_go[$i_opening_paren];
+        my $opening_token = $tokens_to_go[$i_opening_paren];
          my $opening_environment =
            $container_environment_to_go[$i_opening_paren];
  
          my $opening_environment =
            $container_environment_to_go[$i_opening_paren];
  
@@ -14014,7 +14190,7 @@ sub find_token_starting_list {
  
          # Field width parameters
          my $pair_width = ( $max_length[0] + $max_length[1] );
  
          # Field width parameters
          my $pair_width = ( $max_length[0] + $max_length[1] );
-        my $max_width  =
+        my $max_width =
            ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
  
          # Number of free columns across the page width for laying out tables
            ( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
  
          # Number of free columns across the page width for laying out tables
@@ -14157,8 +14333,8 @@ sub find_token_starting_list {
  #           )
  #           if $style eq 'all';
  
  #           )
  #           if $style eq 'all';
  
-            my $i_last_comma    = $$rcomma_index[ $comma_count - 1 ];
-            my $long_last_term  = excess_line_length( 0, $i_last_comma ) <= 0;
+            my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
+            my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
              my $long_first_term =
                excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
  
              my $long_first_term =
                excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
  
@@ -14238,10 +14414,10 @@ sub find_token_starting_list {
          # align; high sparsity does not look good, especially with few lines
          my $sparsity = ($unused_columns) / ($formatted_columns);
          my $max_allowed_sparsity =
          # align; high sparsity does not look good, especially with few lines
          my $sparsity = ($unused_columns) / ($formatted_columns);
          my $max_allowed_sparsity =
-            ( $item_count < 3 ) ? 0.1
+            ( $item_count < 3 )    ? 0.1
            : ( $packed_lines == 1 ) ? 0.15
            : ( $packed_lines == 2 ) ? 0.4
            : ( $packed_lines == 1 ) ? 0.15
            : ( $packed_lines == 2 ) ? 0.4
-          : 0.7;
+          :                          0.7;
  
          # Begin check for shortcut methods, which avoid treating a list
          # as a table for relatively small parenthesized lists.  These
  
          # Begin check for shortcut methods, which avoid treating a list
          # as a table for relatively small parenthesized lists.  These
@@ -14617,7 +14793,7 @@ sub get_maximum_fields_wanted {
  
  sub table_columns_available {
      my $i_first_comma = shift;
  
  sub table_columns_available {
      my $i_first_comma = shift;
-    my $columns       =
+    my $columns =
        $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
  
      # Patch: the vertical formatter does not line up lines whose lengths
        $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
  
      # Patch: the vertical formatter does not line up lines whose lengths
@@ -15086,7 +15262,7 @@ sub recombine_breakpoints {
                  my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
                  my $imm  = $n > 1          ? $$ri_first[ $n - 2 ] : -1;
                  my $seqno = $type_sequence_to_go[$imidr];
                  my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
                  my $imm  = $n > 1          ? $$ri_first[ $n - 2 ] : -1;
                  my $seqno = $type_sequence_to_go[$imidr];
-                my $f_ok  =
+                my $f_ok =
                    (      $types_to_go[$if] eq ':'
                        && $type_sequence_to_go[$if] ==
                        $seqno - TYPE_SEQUENCE_INCREMENT );
                    (      $types_to_go[$if] eq ':'
                        && $type_sequence_to_go[$if] ==
                        $seqno - TYPE_SEQUENCE_INCREMENT );
@@ -15190,9 +15366,6 @@ sub recombine_breakpoints {
  
                          )
                        );
  
                          )
                        );
-
-                    # override breakpoint
-                    ##$forced_breakpoint_to_go[$imid] = 0;
                  }
  
                  # handle leading "if" and "unless"
                  }
  
                  # handle leading "if" and "unless"
@@ -15208,10 +15381,6 @@ sub recombine_breakpoints {
                          && $is_and_or{ $tokens_to_go[$if] }
  
                        );
                          && $is_and_or{ $tokens_to_go[$if] }
  
                        );
-
-                    # override breakpoint
-                    ##$forced_breakpoint_to_go[$imid] = 0;
-
                  }
  
                  # handle all other leading keywords
                  }
  
                  # handle all other leading keywords
@@ -15244,9 +15413,6 @@ sub recombine_breakpoints {
                      && $is_if_unless{ $tokens_to_go[$if] }
  
                    );
                      && $is_if_unless{ $tokens_to_go[$if] }
  
                    );
-
-                # override breakpoint
-                ##$forced_breakpoint_to_go[$imid] = 0;
              }
  
              #----------------------------------------------------------
              }
  
              #----------------------------------------------------------
@@ -15364,10 +15530,10 @@ sub set_continuation_breaks {
  
          # loop to find next break point
          for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
  
          # loop to find next break point
          for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
-            my $type            = $types_to_go[$i_test];
-            my $token           = $tokens_to_go[$i_test];
-            my $next_type       = $types_to_go[ $i_test + 1 ];
-            my $next_token      = $tokens_to_go[ $i_test + 1 ];
+            my $type       = $types_to_go[$i_test];
+            my $token      = $tokens_to_go[$i_test];
+            my $next_type  = $types_to_go[ $i_test + 1 ];
+            my $next_token = $tokens_to_go[ $i_test + 1 ];
              my $i_next_nonblank =
                ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
              my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
              my $i_next_nonblank =
                ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
              my $next_nonblank_type       = $types_to_go[$i_next_nonblank];
@@ -15409,7 +15575,6 @@ sub set_continuation_breaks {
  
                  # There is an implied forced break at a terminal opening brace
                  || ( ( $type eq '{' ) && ( $i_test == $imax ) )
  
                  # There is an implied forced break at a terminal opening brace
                  || ( ( $type eq '{' ) && ( $i_test == $imax ) )
-
                )
              {
  
                )
              {
  
@@ -15903,7 +16068,7 @@ sub permanently_decrease_AVAILABLE_SPACES {
  
      my ( $item, $spaces_needed ) = @_;
      my $available_spaces = $item->get_AVAILABLE_SPACES();
  
      my ( $item, $spaces_needed ) = @_;
      my $available_spaces = $item->get_AVAILABLE_SPACES();
-    my $deleted_spaces   =
+    my $deleted_spaces =
        ( $available_spaces > $spaces_needed )
        ? $spaces_needed
        : $available_spaces;
        ( $available_spaces > $spaces_needed )
        ? $spaces_needed
        : $available_spaces;
@@ -15922,7 +16087,7 @@ sub tentatively_decrease_AVAILABLE_SPACES {
      # caller.
      my ( $item, $spaces_needed ) = @_;
      my $available_spaces = $item->get_AVAILABLE_SPACES();
      # caller.
      my ( $item, $spaces_needed ) = @_;
      my $available_spaces = $item->get_AVAILABLE_SPACES();
-    my $deleted_spaces   =
+    my $deleted_spaces =
        ( $available_spaces > $spaces_needed )
        ? $spaces_needed
        : $available_spaces;
        ( $available_spaces > $spaces_needed )
        ? $spaces_needed
        : $available_spaces;
@@ -16373,6 +16538,7 @@ BEGIN {
  
      use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
      use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
  
      use constant VALIGN_DEBUG_FLAG_APPEND  => 0;
      use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
+    use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
  
      my $debug_warning = sub {
          print "VALIGN_DEBUGGING with key $_[0]\n";
  
      my $debug_warning = sub {
          print "VALIGN_DEBUGGING with key $_[0]\n";
@@ -16421,6 +16587,10 @@ use vars qw(
    $cached_seqno
    $cached_line_valid
    $cached_line_leading_space_count
    $cached_seqno
    $cached_line_valid
    $cached_line_leading_space_count
+  $cached_seqno_string
+
+  $seqno_string
+  $last_nonblank_seqno_string
  
    $rOpts
  
  
    $rOpts
  
@@ -16429,6 +16599,7 @@ use vars qw(
    $rOpts_indent_columns
    $rOpts_tabs
    $rOpts_entab_leading_whitespace
    $rOpts_indent_columns
    $rOpts_tabs
    $rOpts_entab_leading_whitespace
+  $rOpts_valign
  
    $rOpts_minimum_space_to_comment
  
  
    $rOpts_minimum_space_to_comment
  
@@ -16474,6 +16645,11 @@ sub initialize {
      $cached_seqno                    = 0;
      $cached_line_valid               = 0;
      $cached_line_leading_space_count = 0;
      $cached_seqno                    = 0;
      $cached_line_valid               = 0;
      $cached_line_leading_space_count = 0;
+    $cached_seqno_string             = "";
+
+    # string of sequence numbers joined together
+    $seqno_string               = "";
+    $last_nonblank_seqno_string = "";
  
      # frequently used parameters
      $rOpts_indent_columns           = $rOpts->{'indent-columns'};
  
      # frequently used parameters
      $rOpts_indent_columns           = $rOpts->{'indent-columns'};
@@ -16481,6 +16657,7 @@ sub initialize {
      $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
      $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
      $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
      $rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
      $rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
      $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
+    $rOpts_valign                   = $rOpts->{'valign'};
  
      forget_side_comment();
  
  
      forget_side_comment();
  
@@ -16654,19 +16831,18 @@ sub append_line {
      # The log file warns the user if there are any such tabs.
  
      my (
      # The log file warns the user if there are any such tabs.
  
      my (
-        $level,                     $level_end,
-        $indentation,               $rfields,
-        $rtokens,                   $rpatterns,
-        $is_forced_break,           $outdent_long_lines,
-        $is_terminal_statement,     $do_not_pad,
-        $rvertical_tightness_flags, $level_jump,
+        $level,               $level_end,
+        $indentation,         $rfields,
+        $rtokens,             $rpatterns,
+        $is_forced_break,     $outdent_long_lines,
+        $is_terminal_ternary, $is_terminal_statement,
+        $do_not_pad,          $rvertical_tightness_flags,
+        $level_jump,
      ) = @_;
  
      # number of fields is $jmax
      # number of tokens between fields is $jmax-1
      my $jmax = $#{$rfields};
      ) = @_;
  
      # number of fields is $jmax
      # number of tokens between fields is $jmax-1
      my $jmax = $#{$rfields};
-    $previous_minimum_jmax_seen = $minimum_jmax_seen;
-    $previous_maximum_jmax_seen = $maximum_jmax_seen;
  
      my $leading_space_count = get_SPACES($indentation);
  
  
      my $leading_space_count = get_SPACES($indentation);
  
@@ -16692,6 +16868,8 @@ sub append_line {
      if ($rvertical_tightness_flags) {
          if (   $maximum_line_index <= 0
              && $cached_line_type
      if ($rvertical_tightness_flags) {
          if (   $maximum_line_index <= 0
              && $cached_line_type
+            && $cached_seqno
+            && $rvertical_tightness_flags->[2]
              && $rvertical_tightness_flags->[2] == $cached_seqno )
          {
              $rvertical_tightness_flags->[3] ||= 1;
              && $rvertical_tightness_flags->[2] == $cached_seqno )
          {
              $rvertical_tightness_flags->[3] ||= 1;
@@ -16716,7 +16894,8 @@ sub append_line {
      if ( $level < 0 ) { $level = 0 }
  
      # do not align code across indentation level changes
      if ( $level < 0 ) { $level = 0 }
  
      # do not align code across indentation level changes
-    if ( $level != $group_level || $is_outdented ) {
+    # or if vertical alignment is turned off for debugging
+    if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
  
          # we are allowed to shift a group of lines to the right if its
          # level is greater than the previous and next group
  
          # we are allowed to shift a group of lines to the right if its
          # level is greater than the previous and next group
@@ -16763,6 +16942,25 @@ sub append_line {
          }
      }
  
          }
      }
  
+    # --------------------------------------------------------------------
+    # add dummy fields for terminal ternary
+    # --------------------------------------------------------------------
+    if ( $is_terminal_ternary && $current_line ) {
+        fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
+        $jmax = @{$rfields} - 1;
+    }
+
+    # --------------------------------------------------------------------
+    # add dummy fields for else statement
+    # --------------------------------------------------------------------
+    if (   $rfields->[0] =~ /^else\s*$/
+        && $current_line
+        && $level_jump == 0 )
+    {
+        fix_terminal_else( $rfields, $rtokens, $rpatterns );
+        $jmax = @{$rfields} - 1;
+    }
+
      # --------------------------------------------------------------------
      # Step 1. Handle simple line of code with no fields to match.
      # --------------------------------------------------------------------
      # --------------------------------------------------------------------
      # Step 1. Handle simple line of code with no fields to match.
      # --------------------------------------------------------------------
@@ -16911,6 +17109,8 @@ sub append_line {
      # Future update to allow this to vary:
      $current_line = $new_line if ( $maximum_line_index == 0 );
  
      # Future update to allow this to vary:
      $current_line = $new_line if ( $maximum_line_index == 0 );
  
+    my_flush() if ( $group_type eq "TERMINAL" );
+
      # --------------------------------------------------------------------
      # Step 8. Some old debugging stuff
      # --------------------------------------------------------------------
      # --------------------------------------------------------------------
      # Step 8. Some old debugging stuff
      # --------------------------------------------------------------------
@@ -17139,10 +17339,11 @@ sub eliminate_new_fields {
      my $old_line = shift;
      my $jmax     = $new_line->get_jmax();
  
      my $old_line = shift;
      my $jmax     = $new_line->get_jmax();
  
-    my $old_rtokens   = $old_line->get_rtokens();
-    my $rtokens       = $new_line->get_rtokens();
+    my $old_rtokens = $old_line->get_rtokens();
+    my $rtokens     = $new_line->get_rtokens();
      my $is_assignment =
      my $is_assignment =
-      ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
+      (      $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] )
+          || $group_type eq "TERMINAL" );
  
      # must be monotonic variation
      return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
  
      # must be monotonic variation
      return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
@@ -17166,19 +17367,20 @@ sub eliminate_new_fields {
      my $rpatterns     = $new_line->get_rpatterns();
      my $old_rpatterns = $old_line->get_rpatterns();
  
      my $rpatterns     = $new_line->get_rpatterns();
      my $old_rpatterns = $old_line->get_rpatterns();
  
-    # loop over all old tokens except comment
+    # loop over all OLD tokens except comment and check match
      my $match = 1;
      my $k;
      for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
          if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
      my $match = 1;
      my $k;
      for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
          if (   ( $$old_rtokens[$k] ne $$rtokens[$k] )
-            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
+            || ( $$old_rpatterns[$k] ne $$rpatterns[$k] )
+            && $group_type ne "TERMINAL" )
          {
              $match = 0;
              last;
          }
      }
  
          {
              $match = 0;
              last;
          }
      }
  
-    # first tokens agree, so combine new tokens
+    # first tokens agree, so combine extra new tokens
      if ($match) {
          for $k ( $maximum_field_index .. $jmax - 1 ) {
  
      if ($match) {
          for $k ( $maximum_field_index .. $jmax - 1 ) {
  
@@ -17196,6 +17398,221 @@ sub eliminate_new_fields {
      $new_line->set_jmax($jmax);
  }
  
      $new_line->set_jmax($jmax);
  }
  
+sub fix_terminal_ternary {
+
+    # Add empty fields as necessary to align a ternary term
+    # like this:
+    #
+    #  my $leapyear =
+    #      $year % 4   ? 0
+    #    : $year % 100 ? 1
+    #    : $year % 400 ? 0
+    #    :               1;
+    #
+    my ( $rfields, $rtokens, $rpatterns ) = @_;
+
+    my $jmax        = @{$rfields} - 1;
+    my $old_line    = $group_lines[$maximum_line_index];
+    my $rfields_old = $old_line->get_rfields();
+
+    my $rpatterns_old       = $old_line->get_rpatterns();
+    my $rtokens_old         = $old_line->get_rtokens();
+    my $maximum_field_index = $old_line->get_jmax();
+
+    # look for the question mark after the :
+    my ($jquestion);
+    my $depth_question;
+    my $pad = "";
+    for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
+        my $tok = $rtokens_old->[$j];
+        if ( $tok =~ /^\?(\d+)$/ ) {
+            $depth_question = $1;
+
+            # depth must be correct
+            next unless ( $depth_question eq $group_level );
+
+            $jquestion = $j;
+            if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
+                $pad = " " x length($1);
+            }
+            else {
+                return;    # shouldn't happen
+            }
+            last;
+        }
+    }
+    return unless ( defined($jquestion) );    # shouldn't happen
+
+    # Now splice the tokens and patterns of the previous line
+    # into the else line to insure a match.  Add empty fields
+    # as necessary.
+    my $jadd = $jquestion;
+
+    # Work on copies of the actual arrays in case we have
+    # to return due to an error
+    my @fields   = @{$rfields};
+    my @patterns = @{$rpatterns};
+    my @tokens   = @{$rtokens};
+
+    VALIGN_DEBUG_FLAG_TERNARY && do {
+        local $" = '><';
+        print "CURRENT FIELDS=<@{$rfields_old}>\n";
+        print "CURRENT TOKENS=<@{$rtokens_old}>\n";
+        print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
+        print "UNMODIFIED FIELDS=<@{$rfields}>\n";
+        print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
+        print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
+    };
+
+    # handle cases of leading colon on this line
+    if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
+
+        my ( $colon, $therest ) = ( $1, $2 );
+
+        # Handle sub-case of first field with leading colon plus additional code
+        # This is the usual situation as at the '1' below:
+        #  ...
+        #  : $year % 400 ? 0
+        #  :               1;
+        if ($therest) {
+
+            # Split the first field after the leading colon and insert padding.
+            # Note that this padding will remain even if the terminal value goes
+            # out on a separate line.  This does not seem to look to bad, so no
+            # mechanism has been included to undo it.
+            my $field1 = shift @fields;
+            unshift @fields, ( $colon, $pad . $therest );
+
+            # change the leading pattern from : to ?
+            return unless ( $patterns[0] =~ s/^\:/?/ );
+
+            # install leading tokens and patterns of existing line
+            unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
+            unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
+
+            # insert appropriate number of empty fields
+            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+        }
+
+        # handle sub-case of first field just equal to leading colon.
+        # This can happen for example in the example below where
+        # the leading '(' would create a new alignment token
+        # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
+        # :                        ( $mname = $name . '->' );
+        else {
+
+            return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
+
+            # prepend a leading ? onto the second pattern
+            $patterns[1] = "?b" . $patterns[1];
+
+            # pad the second field
+            $fields[1] = $pad . $fields[1];
+
+            # install leading tokens and patterns of existing line, replacing
+            # leading token and inserting appropriate number of empty fields
+            splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
+            splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
+            splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
+        }
+    }
+
+    # Handle case of no leading colon on this line.  This will
+    # be the case when -wba=':' is used.  For example,
+    #  $year % 400 ? 0 :
+    #                1;
+    else {
+
+        # install leading tokens and patterns of existing line
+        $patterns[0] = '?' . 'b' . $patterns[0];
+        unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
+        unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
+
+        # insert appropriate number of empty fields
+        $jadd = $jquestion + 1;
+        $fields[0] = $pad . $fields[0];
+        splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
+    }
+
+    VALIGN_DEBUG_FLAG_TERNARY && do {
+        local $" = '><';
+        print "MODIFIED TOKENS=<@tokens>\n";
+        print "MODIFIED PATTERNS=<@patterns>\n";
+        print "MODIFIED FIELDS=<@fields>\n";
+    };
+
+    # all ok .. update the arrays
+    @{$rfields}   = @fields;
+    @{$rtokens}   = @tokens;
+    @{$rpatterns} = @patterns;
+
+    # force a flush after this line
+    $group_type = "TERMINAL";
+    return;
+}
+
+sub fix_terminal_else {
+
+    # Add empty fields as necessary to align a balanced terminal
+    # else block to a previous if/elsif/unless block,
+    # like this:
+    #
+    #  if   ( 1 || $x ) { print "ok 13\n"; }
+    #  else             { print "not ok 13\n"; }
+    #
+    my ( $rfields, $rtokens, $rpatterns ) = @_;
+    my $jmax = @{$rfields} - 1;
+    return unless ( $jmax > 0 );
+
+    # check for balanced else block following if/elsif/unless
+    my $rfields_old = $current_line->get_rfields();
+
+    # TBD: add handling for 'case'
+    return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
+
+    # look for the opening brace after the else, and extrace the depth
+    my $tok_brace = $rtokens->[0];
+    my $depth_brace;
+    if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
+
+    # probably:  "else # side_comment"
+    else { return }
+
+    my $rpatterns_old       = $current_line->get_rpatterns();
+    my $rtokens_old         = $current_line->get_rtokens();
+    my $maximum_field_index = $current_line->get_jmax();
+
+    # be sure the previous if/elsif is followed by an opening paren
+    my $jparen    = 0;
+    my $tok_paren = '(' . $depth_brace;
+    my $tok_test  = $rtokens_old->[$jparen];
+    return unless ( $tok_test eq $tok_paren );    # shouldn't happen
+
+    # Now find the opening block brace
+    my ($jbrace);
+    for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
+        my $tok = $rtokens_old->[$j];
+        if ( $tok eq $tok_brace ) {
+            $jbrace = $j;
+            last;
+        }
+    }
+    return unless ( defined($jbrace) );           # shouldn't happen
+
+    # Now splice the tokens and patterns of the previous line
+    # into the else line to insure a match.  Add empty fields
+    # as necessary.
+    my $jadd = $jbrace - $jparen;
+    splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
+    splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
+    splice( @{$rfields}, 1, 0, ('') x $jadd );
+
+    # force a flush after this line if it does not follow a case
+    $group_type = "TERMINAL"
+      unless ( $rfields_old->[0] =~ /^case\s*$/ );
+    return;
+}
+
  sub check_match {
  
      my $new_line = shift;
  sub check_match {
  
      my $new_line = shift;
@@ -17253,12 +17670,16 @@ sub check_match {
              my $old_tok = $$old_rtokens[$j];
              my $new_tok = $$rtokens[$j];
  
              my $old_tok = $$old_rtokens[$j];
              my $new_tok = $$rtokens[$j];
  
-            # dumb down the match after an equals
+            # Dumb down the match AFTER an equals and
+            # also dumb down after seeing a ? ternary operator ...
+            # Everything after a + is the token which preceded the previous
+            # opening paren (container name).  We won't require them to match.
              if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
                  $new_tok = $1;
                  $old_tok =~ s/\+.*$//;
              }
              if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
                  $new_tok = $1;
                  $old_tok =~ s/\+.*$//;
              }
-            if ( $new_tok =~ /^=\d*$/ ) { $saw_equals = 1 }
+
+            if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
  
              # we never match if the matching tokens differ
              if (   $j < $jlimit
  
              # we never match if the matching tokens differ
              if (   $j < $jlimit
@@ -17405,14 +17826,6 @@ sub check_fit {
      my $maximum_field_index = $old_line->get_jmax();
      for $j ( 0 .. $jmax ) {
  
      my $maximum_field_index = $old_line->get_jmax();
      for $j ( 0 .. $jmax ) {
  
-        ## testing patch to avoid excessive gaps in previous lines,
-        # due to a line of fewer fields.
-        #   return join( ".",
-        #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
-        #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
-        ## MOVED BELOW AS A TEST
-        ##next if ($jmax < $maximum_field_index && $j==$jmax-1);
-
          $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
  
          if ( $j == 0 ) {
          $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
  
          if ( $j == 0 ) {
@@ -17455,7 +17868,11 @@ sub check_fit {
              last;
          }
  
              last;
          }
  
-        # TESTING PATCH moved from above to be sure we fit
+        # patch to avoid excessive gaps in previous lines,
+        # due to a line of fewer fields.
+        #   return join( ".",
+        #       $self->{"dfi"},  $self->{"aa"}, $self->rsvd,     $self->{"rd"},
+        #       $self->{"area"}, $self->{"id"}, $self->{"sel"} );
          next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
  
          # looks ok, squeeze this field in
          next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
  
          # looks ok, squeeze this field in
@@ -17471,6 +17888,8 @@ sub check_fit {
  
  sub accept_line {
  
  
  sub accept_line {
  
+    # The current line either starts a new alignment group or is
+    # accepted into the current alignment group.
      my $new_line = shift;
      $group_lines[ ++$maximum_line_index ] = $new_line;
  
      my $new_line = shift;
      $group_lines[ ++$maximum_line_index ] = $new_line;
  
@@ -17503,6 +17922,10 @@ sub accept_line {
            $group_lines[ $maximum_line_index - 1 ]->get_alignments();
          $new_line->set_alignments(@new_alignments);
      }
            $group_lines[ $maximum_line_index - 1 ]->get_alignments();
          $new_line->set_alignments(@new_alignments);
      }
+
+    # remember group jmax extremes for next call to append_line
+    $previous_minimum_jmax_seen = $minimum_jmax_seen;
+    $previous_maximum_jmax_seen = $maximum_jmax_seen;
  }
  
  sub dump_array {
  }
  
  sub dump_array {
@@ -17520,11 +17943,13 @@ sub flush {
  
      if ( $maximum_line_index < 0 ) {
          if ($cached_line_type) {
  
      if ( $maximum_line_index < 0 ) {
          if ($cached_line_type) {
+            $seqno_string = $cached_seqno_string;
              entab_and_output( $cached_line_text,
                  $cached_line_leading_space_count,
                  $last_group_level_written );
              entab_and_output( $cached_line_text,
                  $cached_line_leading_space_count,
                  $last_group_level_written );
-            $cached_line_type = 0;
-            $cached_line_text = "";
+            $cached_line_type    = 0;
+            $cached_line_text    = "";
+            $cached_seqno_string = "";
          }
      }
      else {
          }
      }
      else {
@@ -17552,7 +17977,7 @@ sub my_flush {
          # zero leading space count if any lines are too long
          my $max_excess = 0;
          for my $i ( 0 .. $maximum_line_index ) {
          # zero leading space count if any lines are too long
          my $max_excess = 0;
          for my $i ( 0 .. $maximum_line_index ) {
-            my $str    = $group_lines[$i];
+            my $str = $group_lines[$i];
              my $excess =
                length($str) + $leading_space_count - $rOpts_maximum_line_length;
              if ( $excess > $max_excess ) {
              my $excess =
                length($str) + $leading_space_count - $rOpts_maximum_line_length;
              if ( $excess > $max_excess ) {
@@ -17622,6 +18047,7 @@ sub decide_if_aligned {
  
      # Do not try to align two lines which are not really similar
      return unless $maximum_line_index == 1;
  
      # Do not try to align two lines which are not really similar
      return unless $maximum_line_index == 1;
+    return if ( $group_type eq "TERMINAL" );
  
      my $group_list_type = $group_lines[0]->get_list_type();
  
  
      my $group_list_type = $group_lines[0]->get_list_type();
  
@@ -17639,6 +18065,8 @@ sub decide_if_aligned {
              || $group_maximum_gap > 12
  
              # or lines with differing number of alignment tokens
              || $group_maximum_gap > 12
  
              # or lines with differing number of alignment tokens
+            # TODO: this could be improved.  It occasionally rejects
+            # good matches.
              || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
            )
      );
              || $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
            )
      );
@@ -17874,6 +18302,9 @@ sub write_vertically_aligned_line {
              $total_pad_count = 0;
              $str .= $$rfields[$j];
          }
              $total_pad_count = 0;
              $str .= $$rfields[$j];
          }
+        else {
+            $total_pad_count = 0;
+        }
  
          # update side comment history buffer
          if ( $j == $maximum_field_index ) {
  
          # update side comment history buffer
          if ( $j == $maximum_field_index ) {
@@ -17994,7 +18425,7 @@ sub write_leader_and_string {
            length($str) - $side_comment_length + $leading_space_count -
            $rOpts_maximum_line_length;
          if ( $excess > 0 ) {
            length($str) - $side_comment_length + $leading_space_count -
            $rOpts_maximum_line_length;
          if ( $excess > 0 ) {
-            $leading_space_count    = 0;
+            $leading_space_count = 0;
              $last_outdented_line_at =
                $file_writer_object->get_output_line_number();
  
              $last_outdented_line_at =
                $file_writer_object->get_output_line_number();
  
@@ -18020,12 +18451,17 @@ sub write_leader_and_string {
      #   [2] sequence number of container
      #   [3] valid flag: do not append if this flag is false
      #
      #   [2] sequence number of container
      #   [3] valid flag: do not append if this flag is false
      #
-    my ( $open_or_close, $tightness_flag, $seqno, $valid );
+    my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+        $seqno_end );
      if ($rvertical_tightness_flags) {
      if ($rvertical_tightness_flags) {
-        ( $open_or_close, $tightness_flag, $seqno, $valid ) =
-          @{$rvertical_tightness_flags};
+        (
+            $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
+            $seqno_end
+        ) = @{$rvertical_tightness_flags};
      }
  
      }
  
+    $seqno_string = $seqno_end;
+
      # handle any cached line ..
      # either append this line to it or write it out
      if ( length($cached_line_text) ) {
      # handle any cached line ..
      # either append this line to it or write it out
      if ( length($cached_line_text) ) {
@@ -18051,6 +18487,7 @@ sub write_leader_and_string {
              if ( $gap >= 0 ) {
                  $leading_string      = $cached_line_text . ' ' x $gap;
                  $leading_space_count = $cached_line_leading_space_count;
              if ( $gap >= 0 ) {
                  $leading_string      = $cached_line_text . ' ' x $gap;
                  $leading_space_count = $cached_line_leading_space_count;
+                $seqno_string        = $cached_seqno_string . ':' . $seqno_beg;
              }
              else {
                  entab_and_output( $cached_line_text,
              }
              else {
                  entab_and_output( $cached_line_text,
@@ -18064,6 +18501,87 @@ sub write_leader_and_string {
              my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
  
              if ( length($test_line) <= $rOpts_maximum_line_length ) {
              my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
  
              if ( length($test_line) <= $rOpts_maximum_line_length ) {
+
+                $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+
+                # Patch to outdent closing tokens ending # in ');'
+                # If we are joining a line like ');' to a previous stacked
+                # set of closing tokens, then decide if we may outdent the
+                # combined stack to the indentation of the ');'.  Since we
+                # should not normally outdent any of the other tokens more than
+                # the indentation of the lines that contained them, we will
+                # only do this if all of the corresponding opening
+                # tokens were on the same line.  This can happen with
+                # -sot and -sct.  For example, it is ok here:
+                #   __PACKAGE__->load_components( qw(
+                #         PK::Auto
+                #         Core
+                #   ));
+                #
+                #   But, for example, we do not outdent in this example because
+                #   that would put the closing sub brace out farther than the
+                #   opening sub brace:
+                #
+                #   perltidy -sot -sct
+                #   $c->Tk::bind(
+                #       '<Control-f>' => sub {
+                #           my ($c) = @_;
+                #           my $e = $c->XEvent;
+                #           itemsUnderArea $c;
+                #       } );
+                #
+                if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
+
+                    # The way to tell this is if the stacked sequence numbers
+                    # of this output line are the reverse of the stacked
+                    # sequence numbers of the previous non-blank line of
+                    # sequence numbers.  So we can join if the previous
+                    # nonblank string of tokens is the mirror image.  For
+                    # example if stack )}] is 13:8:6 then we are looking for a
+                    # leading stack like [{( which is 6:8:13 We only need to
+                    # check the two ends, because the intermediate tokens must
+                    # fall in order.  Note on speed: having to split on colons
+                    # and eliminate multiple colons might appear to be slow,
+                    # but it's not an issue because we almost never come
+                    # through here.  In a typical file we don't.
+                    $seqno_string               =~ s/^:+//;
+                    $last_nonblank_seqno_string =~ s/^:+//;
+                    $seqno_string               =~ s/:+/:/g;
+                    $last_nonblank_seqno_string =~ s/:+/:/g;
+
+                    # how many spaces can we outdent?
+                    my $diff =
+                      $cached_line_leading_space_count - $leading_space_count;
+                    if (   $diff > 0
+                        && length($seqno_string)
+                        && length($last_nonblank_seqno_string) ==
+                        length($seqno_string) )
+                    {
+                        my @seqno_last =
+                          ( split ':', $last_nonblank_seqno_string );
+                        my @seqno_now = ( split ':', $seqno_string );
+                        if (   $seqno_now[-1] == $seqno_last[0]
+                            && $seqno_now[0] == $seqno_last[-1] )
+                        {
+
+                            # OK to outdent ..
+                            # for absolute safety, be sure we only remove
+                            # whitespace
+                            my $ws = substr( $test_line, 0, $diff );
+                            if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+
+                                $test_line = substr( $test_line, $diff );
+                                $cached_line_leading_space_count -= $diff;
+                            }
+
+                            # shouldn't happen, but not critical:
+                            ##else {
+                            ## ERROR transferring indentation here
+                            ##}
+                        }
+                    }
+                }
+
                  $str                 = $test_line;
                  $leading_string      = "";
                  $leading_space_count = $cached_line_leading_space_count;
                  $str                 = $test_line;
                  $leading_string      = "";
                  $leading_space_count = $cached_line_leading_space_count;
@@ -18082,7 +18600,7 @@ sub write_leader_and_string {
      my $line = $leading_string . $str;
  
      # write or cache this line
      my $line = $leading_string . $str;
  
      # write or cache this line
-    if ( !$rvertical_tightness_flags || $side_comment_length > 0 ) {
+    if ( !$open_or_close || $side_comment_length > 0 ) {
          entab_and_output( $line, $leading_space_count, $group_level );
      }
      else {
          entab_and_output( $line, $leading_space_count, $group_level );
      }
      else {
@@ -18092,6 +18610,7 @@ sub write_leader_and_string {
          $cached_seqno                    = $seqno;
          $cached_line_valid               = $valid;
          $cached_line_leading_space_count = $leading_space_count;
          $cached_seqno                    = $seqno;
          $cached_line_valid               = $valid;
          $cached_line_leading_space_count = $leading_space_count;
+        $cached_seqno_string             = $seqno_string;
      }
  
      $last_group_level_written = $group_level;
      }
  
      $last_group_level_written = $group_level;
@@ -18138,7 +18657,7 @@ sub entab_and_output {
          # Handle option of one tab per level
          else {
              my $leading_string = ( "\t" x $level );
          # Handle option of one tab per level
          else {
              my $leading_string = ( "\t" x $level );
-            my $space_count    =
+            my $space_count =
                $leading_space_count - $level * $rOpts_indent_columns;
  
              # shouldn't happen:
                $leading_space_count - $level * $rOpts_indent_columns;
  
              # shouldn't happen:
@@ -18166,6 +18685,9 @@ sub entab_and_output {
          }
      }
      $file_writer_object->write_code_line( $line . "\n" );
          }
      }
      $file_writer_object->write_code_line( $line . "\n" );
+    if ($seqno_string) {
+        $last_nonblank_seqno_string = $seqno_string;
+    }
  }
  
  {    # begin get_leading_string
  }
  
  {    # begin get_leading_string
@@ -18663,88 +19185,55 @@ BEGIN {
  }
  
  use Carp;
  }
  
  use Carp;
+
+# PACKAGE VARIABLES for for processing an entire FILE.
  use vars qw{
    $tokenizer_self
  use vars qw{
    $tokenizer_self
-  $level_in_tokenizer
-  $slevel_in_tokenizer
-  $nesting_token_string
-  $nesting_type_string
-  $nesting_block_string
-  $nesting_block_flag
-  $nesting_list_string
-  $nesting_list_flag
-  $saw_negative_indentation
-  $id_scan_state
+
    $last_nonblank_token
    $last_nonblank_type
    $last_nonblank_block_type
    $last_nonblank_token
    $last_nonblank_type
    $last_nonblank_block_type
-  $last_nonblank_container_type
-  $last_nonblank_type_sequence
-  $last_last_nonblank_token
-  $last_last_nonblank_type
-  $last_last_nonblank_block_type
-  $last_last_nonblank_container_type
-  $last_last_nonblank_type_sequence
-  $last_nonblank_prototype
    $statement_type
    $statement_type
-  $identifier
    $in_attribute_list
    $in_attribute_list
-  $in_quote
-  $quote_type
-  $quote_character
-  $quote_pos
-  $quote_depth
-  $allowed_quote_modifiers
+  $current_package
+  $context
+
+  %is_constant
+  %is_user_function
+  %user_function_prototype
+  %is_block_function
+  %is_block_list_function
+  %saw_function_definition
+
+  $brace_depth
    $paren_depth
    $paren_depth
+  $square_bracket_depth
+
+  @current_depth
+  @nesting_sequence_number
+  @current_sequence_number
    @paren_type
    @paren_semicolon_count
    @paren_structural_type
    @paren_type
    @paren_semicolon_count
    @paren_structural_type
-  $brace_depth
    @brace_type
    @brace_structural_type
    @brace_statement_type
    @brace_context
    @brace_package
    @brace_type
    @brace_structural_type
    @brace_statement_type
    @brace_context
    @brace_package
-  $square_bracket_depth
    @square_bracket_type
    @square_bracket_structural_type
    @depth_array
    @starting_line_of_current_depth
    @square_bracket_type
    @square_bracket_structural_type
    @depth_array
    @starting_line_of_current_depth
-  @current_depth
-  @current_sequence_number
-  @nesting_sequence_number
-  @lower_case_labels_at
-  $saw_v_string
-  %is_constant
-  %is_user_function
-  %user_function_prototype
-  %saw_function_definition
-  $max_token_index
-  $peeked_ahead
-  $current_package
-  $unexpected_error_count
-  $input_line
-  $input_line_number
-  $rpretokens
-  $rpretoken_map
-  $rpretoken_type
-  $want_paren
-  $context
-  @slevel_stack
-  $ci_string_in_tokenizer
-  $continuation_string_in_tokenizer
-  $in_statement_continuation
-  $started_looking_for_here_target_at
-  $nearly_matched_here_target_at
+};
  
  
+# GLOBAL CONSTANTS for routines in this package
+use vars qw{
    %is_indirect_object_taker
    %is_block_operator
    %expecting_operator_token
    %expecting_operator_types
    %expecting_term_types
    %expecting_term_token
    %is_indirect_object_taker
    %is_block_operator
    %expecting_operator_token
    %expecting_operator_types
    %expecting_term_types
    %expecting_term_token
-  %is_block_function
-  %is_block_list_function
    %is_digraph
    %is_file_test_operator
    %is_trigraph
    %is_digraph
    %is_file_test_operator
    %is_trigraph
@@ -18791,17 +19280,18 @@ sub new {
      # Note: 'tabs' and 'indent_columns' are temporary and should be
      # removed asap
      my %defaults = (
      # Note: 'tabs' and 'indent_columns' are temporary and should be
      # removed asap
      my %defaults = (
-        source_object       => undef,
-        debugger_object     => undef,
-        diagnostics_object  => undef,
-        logger_object       => undef,
-        starting_level      => undef,
-        indent_columns      => 4,
-        tabs                => 0,
-        look_for_hash_bang  => 0,
-        trim_qw             => 1,
-        look_for_autoloader => 1,
-        look_for_selfloader => 1,
+        source_object        => undef,
+        debugger_object      => undef,
+        diagnostics_object   => undef,
+        logger_object        => undef,
+        starting_level       => undef,
+        indent_columns       => 4,
+        tabs                 => 0,
+        look_for_hash_bang   => 0,
+        trim_qw              => 1,
+        look_for_autoloader  => 1,
+        look_for_selfloader  => 1,
+        starting_line_number => 1,
      );
      my %args = ( %defaults, @_ );
  
      );
      my %args = ( %defaults, @_ );
  
@@ -18831,45 +19321,53 @@ sub new {
      # _know_input_tabstr    flag indicating if we know _input_tabstr
      # _line_buffer_object   object with get_line() method to supply source code
      # _diagnostics_object   place to write debugging information
      # _know_input_tabstr    flag indicating if we know _input_tabstr
      # _line_buffer_object   object with get_line() method to supply source code
      # _diagnostics_object   place to write debugging information
+    # _unexpected_error_count  error count used to limit output
+    # _lower_case_labels_at  line numbers where lower case labels seen
      $tokenizer_self = {
      $tokenizer_self = {
-        _rhere_target_list    => undef,
-        _in_here_doc          => 0,
-        _here_doc_target      => "",
-        _here_quote_character => "",
-        _in_data              => 0,
-        _in_end               => 0,
-        _in_format            => 0,
-        _in_error             => 0,
-        _in_pod               => 0,
-        _in_attribute_list    => 0,
-        _in_quote             => 0,
-        _quote_target         => "",
-        _line_start_quote     => -1,
-        _starting_level       => $args{starting_level},
-        _know_starting_level  => defined( $args{starting_level} ),
-        _tabs                 => $args{tabs},
-        _indent_columns       => $args{indent_columns},
-        _look_for_hash_bang   => $args{look_for_hash_bang},
-        _trim_qw              => $args{trim_qw},
-        _input_tabstr         => "",
-        _know_input_tabstr    => -1,
-        _last_line_number     => 0,
-        _saw_perl_dash_P      => 0,
-        _saw_perl_dash_w      => 0,
-        _saw_use_strict       => 0,
-        _look_for_autoloader  => $args{look_for_autoloader},
-        _look_for_selfloader  => $args{look_for_selfloader},
-        _saw_autoloader       => 0,
-        _saw_selfloader       => 0,
-        _saw_hash_bang        => 0,
-        _saw_end              => 0,
-        _saw_data             => 0,
-        _saw_lc_filehandle    => 0,
-        _started_tokenizing   => 0,
-        _line_buffer_object   => $line_buffer_object,
-        _debugger_object      => $args{debugger_object},
-        _diagnostics_object   => $args{diagnostics_object},
-        _logger_object        => $args{logger_object},
+        _rhere_target_list                  => [],
+        _in_here_doc                        => 0,
+        _here_doc_target                    => "",
+        _here_quote_character               => "",
+        _in_data                            => 0,
+        _in_end                             => 0,
+        _in_format                          => 0,
+        _in_error                           => 0,
+        _in_pod                             => 0,
+        _in_attribute_list                  => 0,
+        _in_quote                           => 0,
+        _quote_target                       => "",
+        _line_start_quote                   => -1,
+        _starting_level                     => $args{starting_level},
+        _know_starting_level                => defined( $args{starting_level} ),
+        _tabs                               => $args{tabs},
+        _indent_columns                     => $args{indent_columns},
+        _look_for_hash_bang                 => $args{look_for_hash_bang},
+        _trim_qw                            => $args{trim_qw},
+        _input_tabstr                       => "",
+        _know_input_tabstr                  => -1,
+        _last_line_number                   => $args{starting_line_number} - 1,
+        _saw_perl_dash_P                    => 0,
+        _saw_perl_dash_w                    => 0,
+        _saw_use_strict                     => 0,
+        _saw_v_string                       => 0,
+        _look_for_autoloader                => $args{look_for_autoloader},
+        _look_for_selfloader                => $args{look_for_selfloader},
+        _saw_autoloader                     => 0,
+        _saw_selfloader                     => 0,
+        _saw_hash_bang                      => 0,
+        _saw_end                            => 0,
+        _saw_data                           => 0,
+        _saw_negative_indentation           => 0,
+        _started_tokenizing                 => 0,
+        _line_buffer_object                 => $line_buffer_object,
+        _debugger_object                    => $args{debugger_object},
+        _diagnostics_object                 => $args{diagnostics_object},
+        _logger_object                      => $args{logger_object},
+        _unexpected_error_count             => 0,
+        _started_looking_for_here_target_at => 0,
+        _nearly_matched_here_target_at      => undef,
+        _line_text                          => "",
+        _rlower_case_labels_at              => undef,
      };
  
      prepare_for_a_new_file();
      };
  
      prepare_for_a_new_file();
@@ -18985,38 +19483,6 @@ sub report_tokenization_errors {
          warning("hit EOF while in format description\n");
      }
  
          warning("hit EOF while in format description\n");
      }
  
-    # this check may be removed after a year or so
-    if ( $tokenizer_self->{_saw_lc_filehandle} ) {
-
-        warning( <<'EOM' );
-------------------------------------------------------------------------
-PLEASE NOTE: If you get this message, it is because perltidy noticed
-possible ambiguous syntax at one or more places in your script, as
-noted above.  The problem is with statements accepting indirect objects,
-such as print and printf statements of the form
-
-    print bareword ( $etc
-
-Perltidy needs your help in deciding if 'bareword' is a filehandle or a
-function call.  The problem is the space between 'bareword' and '('.  If
-'bareword' is a function call, you should remove the trailing space.  If
-'bareword' is a filehandle, you should avoid the opening paren or else
-globally capitalize 'bareword' to be BAREWORD.  So the above line
-would be: 
-
-    print bareword( $etc    # function
-or
-    print bareword @list    # filehandle
-or
-    print BAREWORD ( $etc   # filehandle
-
-If you want to keep the line as it is, and are sure it is correct,
-you can use -w=0 to prevent this message.
-------------------------------------------------------------------------
-EOM
-
-    }
-
      if ( $tokenizer_self->{_in_pod} ) {
  
          # Just write log entry if this is after __END__ or __DATA__
      if ( $tokenizer_self->{_in_pod} ) {
  
          # Just write log entry if this is after __END__ or __DATA__
@@ -19038,6 +19504,8 @@ EOM
  
      if ( $tokenizer_self->{_in_here_doc} ) {
          my $here_doc_target = $tokenizer_self->{_here_doc_target};
  
      if ( $tokenizer_self->{_in_here_doc} ) {
          my $here_doc_target = $tokenizer_self->{_here_doc_target};
+        my $started_looking_for_here_target_at =
+          $tokenizer_self->{_started_looking_for_here_target_at};
          if ($here_doc_target) {
              warning(
  "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
          if ($here_doc_target) {
              warning(
  "hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
@@ -19048,6 +19516,8 @@ EOM
  "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
              );
          }
  "hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
              );
          }
+        my $nearly_matched_here_target_at =
+          $tokenizer_self->{_nearly_matched_here_target_at};
          if ($nearly_matched_here_target_at) {
              warning(
  "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
          if ($nearly_matched_here_target_at) {
              warning(
  "NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
@@ -19058,7 +19528,7 @@ EOM
      if ( $tokenizer_self->{_in_quote} ) {
          my $line_start_quote = $tokenizer_self->{_line_start_quote};
          my $quote_target     = $tokenizer_self->{_quote_target};
      if ( $tokenizer_self->{_in_quote} ) {
          my $line_start_quote = $tokenizer_self->{_line_start_quote};
          my $quote_target     = $tokenizer_self->{_quote_target};
-        my $what             =
+        my $what =
            ( $tokenizer_self->{_in_attribute_list} )
            ? "attribute list"
            : "quote/pattern";
            ( $tokenizer_self->{_in_attribute_list} )
            ? "attribute list"
            : "quote/pattern";
@@ -19086,8 +19556,9 @@ EOM
  
      # it is suggested that lables have at least one upper case character
      # for legibility and to avoid code breakage as new keywords are introduced
  
      # it is suggested that lables have at least one upper case character
      # for legibility and to avoid code breakage as new keywords are introduced
-    if (@lower_case_labels_at) {
-        my $num = @lower_case_labels_at;
+    if ( $tokenizer_self->{_rlower_case_labels_at} ) {
+        my @lower_case_labels_at =
+          @{ $tokenizer_self->{_rlower_case_labels_at} };
          write_logfile_entry(
              "Suggest using upper case characters in label(s)\n");
          local $" = ')(';
          write_logfile_entry(
              "Suggest using upper case characters in label(s)\n");
          local $" = ')(';
@@ -19099,7 +19570,9 @@ sub report_v_string {
  
      # warn if this version can't handle v-strings
      my $tok = shift;
  
      # warn if this version can't handle v-strings
      my $tok = shift;
-    $saw_v_string = $input_line_number;
+    unless ( $tokenizer_self->{_saw_v_string} ) {
+        $tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
+    }
      if ( $] < 5.006 ) {
          warning(
  "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
      if ( $] < 5.006 ) {
          warning(
  "Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
@@ -19116,11 +19589,15 @@ sub get_line {
  
      my $self = shift;
  
  
      my $self = shift;
  
+    # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
+    # $square_bracket_depth, $paren_depth
+
      my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
      my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
+    $tokenizer_self->{_line_text} = $input_line;
  
      return undef unless ($input_line);
  
  
      return undef unless ($input_line);
  
-    $tokenizer_self->{_last_line_number}++;
+    my $input_line_number = ++$tokenizer_self->{_last_line_number};
  
      # Find and remove what characters terminate this line, including any
      # control r
  
      # Find and remove what characters terminate this line, including any
      # control r
@@ -19135,8 +19612,7 @@ sub get_line {
      # for backwards compatability we keep the line text terminated with
      # a newline character
      $input_line .= "\n";
      # for backwards compatability we keep the line text terminated with
      # a newline character
      $input_line .= "\n";
-
-    my $input_line_number = $tokenizer_self->{_last_line_number};
+    $tokenizer_self->{_line_text} = $input_line;    # update
  
      # create a data structure describing this line which will be
      # returned to the caller.
  
      # create a data structure describing this line which will be
      # returned to the caller.
@@ -19181,8 +19657,7 @@ sub get_line {
          _rci_levels               => undef,
          _rnesting_blocks          => undef,
          _python_indentation_level => -1,                   ## 0,
          _rci_levels               => undef,
          _rnesting_blocks          => undef,
          _python_indentation_level => -1,                   ## 0,
-        _starting_in_quote        =>
-          ( $tokenizer_self->{_in_quote} && ( $quote_type eq 'Q' ) ),
+        _starting_in_quote    => 0,                    # to be set by subroutine
          _ending_in_quote      => 0,
          _curly_brace_depth    => $brace_depth,
          _square_bracket_depth => $square_bracket_depth,
          _ending_in_quote      => 0,
          _curly_brace_depth    => $brace_depth,
          _square_bracket_depth => $square_bracket_depth,
@@ -19199,21 +19674,22 @@ sub get_line {
          my $candidate_target     = $input_line;
          chomp $candidate_target;
          if ( $candidate_target eq $here_doc_target ) {
          my $candidate_target     = $input_line;
          chomp $candidate_target;
          if ( $candidate_target eq $here_doc_target ) {
-            $nearly_matched_here_target_at = undef;
-            $line_of_tokens->{_line_type} = 'HERE_END';
+            $tokenizer_self->{_nearly_matched_here_target_at} = undef;
+            $line_of_tokens->{_line_type}                     = 'HERE_END';
              write_logfile_entry("Exiting HERE document $here_doc_target\n");
  
              my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
              if (@$rhere_target_list) {    # there can be multiple here targets
                  ( $here_doc_target, $here_quote_character ) =
                    @{ shift @$rhere_target_list };
              write_logfile_entry("Exiting HERE document $here_doc_target\n");
  
              my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
              if (@$rhere_target_list) {    # there can be multiple here targets
                  ( $here_doc_target, $here_quote_character ) =
                    @{ shift @$rhere_target_list };
-                $tokenizer_self->{_here_doc_target}      = $here_doc_target;
+                $tokenizer_self->{_here_doc_target} = $here_doc_target;
                  $tokenizer_self->{_here_quote_character} =
                    $here_quote_character;
                  write_logfile_entry(
                      "Entering HERE document $here_doc_target\n");
                  $tokenizer_self->{_here_quote_character} =
                    $here_quote_character;
                  write_logfile_entry(
                      "Entering HERE document $here_doc_target\n");
-                $nearly_matched_here_target_at      = undef;
-                $started_looking_for_here_target_at = $input_line_number;
+                $tokenizer_self->{_nearly_matched_here_target_at} = undef;
+                $tokenizer_self->{_started_looking_for_here_target_at} =
+                  $input_line_number;
              }
              else {
                  $tokenizer_self->{_in_here_doc}          = 0;
              }
              else {
                  $tokenizer_self->{_in_here_doc}          = 0;
@@ -19228,7 +19704,8 @@ sub get_line {
              $candidate_target =~ s/\s*$//;
              $candidate_target =~ s/^\s*//;
              if ( $candidate_target eq $here_doc_target ) {
              $candidate_target =~ s/\s*$//;
              $candidate_target =~ s/^\s*//;
              if ( $candidate_target eq $here_doc_target ) {
-                $nearly_matched_here_target_at = $input_line_number;
+                $tokenizer_self->{_nearly_matched_here_target_at} =
+                  $input_line_number;
              }
          }
          return $line_of_tokens;
              }
          }
          return $line_of_tokens;
@@ -19438,14 +19915,14 @@ sub get_line {
      my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
      if (@$rhere_target_list) {
  
      my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
      if (@$rhere_target_list) {
  
-        #my $here_doc_target = shift @$rhere_target_list;
          my ( $here_doc_target, $here_quote_character ) =
            @{ shift @$rhere_target_list };
          $tokenizer_self->{_in_here_doc}          = 1;
          $tokenizer_self->{_here_doc_target}      = $here_doc_target;
          $tokenizer_self->{_here_quote_character} = $here_quote_character;
          write_logfile_entry("Entering HERE document $here_doc_target\n");
          my ( $here_doc_target, $here_quote_character ) =
            @{ shift @$rhere_target_list };
          $tokenizer_self->{_in_here_doc}          = 1;
          $tokenizer_self->{_here_doc_target}      = $here_doc_target;
          $tokenizer_self->{_here_quote_character} = $here_quote_character;
          write_logfile_entry("Entering HERE document $here_doc_target\n");
-        $started_looking_for_here_target_at = $input_line_number;
+        $tokenizer_self->{_started_looking_for_here_target_at} =
+          $input_line_number;
      }
  
      # NOTE: __END__ and __DATA__ statements are written unformatted
      }
  
      # NOTE: __END__ and __DATA__ statements are written unformatted
@@ -19505,9 +19982,11 @@ sub get_line {
          and ( $tokenizer_self->{_line_start_quote} < 0 ) )
      {
  
          and ( $tokenizer_self->{_line_start_quote} < 0 ) )
      {
  
-        if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
+        #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
+        if (
+            ( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
+        {
              $tokenizer_self->{_line_start_quote} = $input_line_number;
              $tokenizer_self->{_line_start_quote} = $input_line_number;
-            $tokenizer_self->{_quote_target}     = $quote_target;
              write_logfile_entry(
                  "Start multi-line quote or pattern ending in $quote_target\n");
          }
              write_logfile_entry(
                  "Start multi-line quote or pattern ending in $quote_target\n");
          }
@@ -19525,6 +20004,7 @@ sub get_line {
  
  sub find_starting_indentation_level {
  
  
  sub find_starting_indentation_level {
  
+    # USES GLOBAL VARIABLES: $tokenizer_self
      my $starting_level    = 0;
      my $know_input_tabstr = -1;    # flag for find_indentation_level
  
      my $starting_level    = 0;
      my $know_input_tabstr = -1;    # flag for find_indentation_level
  
@@ -19595,6 +20075,8 @@ sub find_starting_indentation_level {
  
  sub find_indentation_level {
      my ( $line, $structural_indentation_level ) = @_;
  
  sub find_indentation_level {
      my ( $line, $structural_indentation_level ) = @_;
+
+    # USES GLOBAL VARIABLES: $tokenizer_self
      my $level = 0;
      my $msg   = "";
  
      my $level = 0;
      my $msg   = "";
  
@@ -19668,7 +20150,7 @@ sub find_indentation_level {
              }
              else {
                  $columns = int $columns;
              }
              else {
                  $columns = int $columns;
-                $msg     =
+                $msg =
  "old indentation is unclear, using $columns $entabbed spaces\n";
              }
              $input_tabstr = " " x $columns;
  "old indentation is unclear, using $columns $entabbed spaces\n";
              }
              $input_tabstr = " " x $columns;
@@ -19711,81 +20193,6 @@ sub find_indentation_level {
      return ( $level, $msg );
  }
  
      return ( $level, $msg );
  }
  
-sub dump_token_types {
-    my $class = shift;
-    my $fh    = shift;
-
-    # This should be the latest list of token types in use
-    # adding NEW_TOKENS: add a comment here
-    print $fh <<'END_OF_LIST';
-
-Here is a list of the token types currently used for lines of type 'CODE'.  
-For the following tokens, the "type" of a token is just the token itself.  
-
-.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
-( ) <= >= == =~ !~ != ++ -- /= x=
-... **= <<= >>= &&= ||= //= <=> 
-, + - / * | % ! x ~ = \ ? : . < > ^ &
-
-The following additional token types are defined:
-
- type    meaning
-    b    blank (white space) 
-    {    indent: opening structural curly brace or square bracket or paren
-         (code block, anonymous hash reference, or anonymous array reference)
-    }    outdent: right structural curly brace or square bracket or paren
-    [    left non-structural square bracket (enclosing an array index)
-    ]    right non-structural square bracket
-    (    left non-structural paren (all but a list right of an =)
-    )    right non-structural parena
-    L    left non-structural curly brace (enclosing a key)
-    R    right non-structural curly brace 
-    ;    terminal semicolon
-    f    indicates a semicolon in a "for" statement
-    h    here_doc operator <<
-    #    a comment
-    Q    indicates a quote or pattern
-    q    indicates a qw quote block
-    k    a perl keyword
-    C    user-defined constant or constant function (with void prototype = ())
-    U    user-defined function taking parameters
-    G    user-defined function taking block parameter (like grep/map/eval)
-    M    (unused, but reserved for subroutine definition name)
-    P    (unused, but -html uses it to label pod text)
-    t    type indicater such as %,$,@,*,&,sub
-    w    bare word (perhaps a subroutine call)
-    i    identifier of some type (with leading %, $, @, *, &, sub, -> )
-    n    a number
-    v    a v-string
-    F    a file test operator (like -e)
-    Y    File handle
-    Z    identifier in indirect object slot: may be file handle, object
-    J    LABEL:  code block label
-    j    LABEL after next, last, redo, goto
-    p    unary +
-    m    unary -
-    pp   pre-increment operator ++
-    mm   pre-decrement operator -- 
-    A    : used as attribute separator
-    
-    Here are the '_line_type' codes used internally:
-    SYSTEM         - system-specific code before hash-bang line
-    CODE           - line of perl code (including comments)
-    POD_START      - line starting pod, such as '=head'
-    POD            - pod documentation text
-    POD_END        - last line of pod section, '=cut'
-    HERE           - text of here-document
-    HERE_END       - last line of here-doc (target word)
-    FORMAT         - format section
-    FORMAT_END     - last line of format section, '.'
-    DATA_START     - __DATA__ line
-    DATA           - unidentified text following __DATA__
-    END_START      - __END__ line
-    END            - unidentified text following __END__
-    ERROR          - we are in big trouble, probably not a perl script
-END_OF_LIST
-}
-
  # This is a currently unused debug routine
  sub dump_functions {
  
  # This is a currently unused debug routine
  sub dump_functions {
  
@@ -19817,142 +20224,392 @@ sub dump_functions {
  }
  
  sub prepare_for_a_new_file {
  }
  
  sub prepare_for_a_new_file {
-    $saw_negative_indentation = 0;
-    $id_scan_state            = '';
-    $statement_type           = '';     # '' or 'use' or 'sub..' or 'case..'
+
+    # previous tokens needed to determine what to expect next
      $last_nonblank_token      = ';';    # the only possible starting state which
      $last_nonblank_type       = ';';    # will make a leading brace a code block
      $last_nonblank_block_type = '';
      $last_nonblank_token      = ';';    # the only possible starting state which
      $last_nonblank_type       = ';';    # will make a leading brace a code block
      $last_nonblank_block_type = '';
-    $last_nonblank_container_type      = '';
-    $last_nonblank_type_sequence       = '';
-    $last_last_nonblank_token          = ';';
-    $last_last_nonblank_type           = ';';
-    $last_last_nonblank_block_type     = '';
-    $last_last_nonblank_container_type = '';
-    $last_last_nonblank_type_sequence  = '';
-    $last_nonblank_prototype           = "";
-    $identifier                        = '';
-    $in_attribute_list                 = 0;     # ATTRS
-    $in_quote   = 0;     # flag telling if we are chasing a quote, and what kind
-    $quote_type = 'Q';
-    $quote_character = "";    # character we seek if chasing a quote
-    $quote_pos   = 0;  # next character index to check for case of alphanum char
-    $quote_depth = 0;
-    $allowed_quote_modifiers                     = "";
-    $paren_depth                                 = 0;
-    $brace_depth                                 = 0;
-    $square_bracket_depth                        = 0;
-    $current_package                             = "main";
+
+    # scalars for remembering statement types across multiple lines
+    $statement_type    = '';            # '' or 'use' or 'sub..' or 'case..'
+    $in_attribute_list = 0;
+
+    # scalars for remembering where we are in the file
+    $current_package = "main";
+    $context         = UNKNOWN_CONTEXT;
+
+    # hashes used to remember function information
+    %is_constant             = ();      # user-defined constants
+    %is_user_function        = ();      # user-defined functions
+    %user_function_prototype = ();      # their prototypes
+    %is_block_function       = ();
+    %is_block_list_function  = ();
+    %saw_function_definition = ();
+
+    # variables used to track depths of various containers
+    # and report nesting errors
+    $paren_depth          = 0;
+    $brace_depth          = 0;
+    $square_bracket_depth = 0;
      @current_depth[ 0 .. $#closing_brace_names ] =
        (0) x scalar @closing_brace_names;
      @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
        ( 0 .. $#closing_brace_names );
      @current_depth[ 0 .. $#closing_brace_names ] =
        (0) x scalar @closing_brace_names;
      @nesting_sequence_number[ 0 .. $#closing_brace_names ] =
        ( 0 .. $#closing_brace_names );
-    @current_sequence_number = ();
-
+    @current_sequence_number             = ();
      $paren_type[$paren_depth]            = '';
      $paren_semicolon_count[$paren_depth] = 0;
      $paren_type[$paren_depth]            = '';
      $paren_semicolon_count[$paren_depth] = 0;
+    $paren_structural_type[$brace_depth] = '';
      $brace_type[$brace_depth] = ';';    # identify opening brace as code block
      $brace_structural_type[$brace_depth]                   = '';
      $brace_statement_type[$brace_depth]                    = "";
      $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
      $brace_type[$brace_depth] = ';';    # identify opening brace as code block
      $brace_structural_type[$brace_depth]                   = '';
      $brace_statement_type[$brace_depth]                    = "";
      $brace_context[$brace_depth]                           = UNKNOWN_CONTEXT;
-    $paren_structural_type[$brace_depth]                   = '';
+    $brace_package[$paren_depth]                           = $current_package;
      $square_bracket_type[$square_bracket_depth]            = '';
      $square_bracket_structural_type[$square_bracket_depth] = '';
      $square_bracket_type[$square_bracket_depth]            = '';
      $square_bracket_structural_type[$square_bracket_depth] = '';
-    $brace_package[$paren_depth]                           = $current_package;
-    %is_constant                      = ();             # user-defined constants
-    %is_user_function                 = ();             # user-defined functions
-    %user_function_prototype          = ();             # their prototypes
-    %is_block_function                = ();
-    %is_block_list_function           = ();
-    %saw_function_definition          = ();
-    $unexpected_error_count           = 0;
-    $want_paren                       = "";
-    $context                          = UNKNOWN_CONTEXT;
-    @slevel_stack                     = ();
-    $ci_string_in_tokenizer           = "";
-    $continuation_string_in_tokenizer = "0";
-    $in_statement_continuation        = 0;
-    @lower_case_labels_at             = ();
-    $saw_v_string         = 0;      # for warning of v-strings on older perl
-    $nesting_token_string = "";
-    $nesting_type_string  = "";
-    $nesting_block_string = '1';    # initially in a block
-    $nesting_block_flag   = 1;
-    $nesting_list_string  = '0';    # initially not in a list
-    $nesting_list_flag    = 0;      # initially not in a list
-    $nearly_matched_here_target_at = undef;
-}
-
-sub get_quote_target {
-    return matching_end_token($quote_character);
-}
-
-sub get_indentation_level {
-    return $level_in_tokenizer;
-}
-
-sub reset_indentation_level {
-    $level_in_tokenizer  = $_[0];
-    $slevel_in_tokenizer = $_[0];
-    push @slevel_stack, $slevel_in_tokenizer;
-}
-
-{    # begin tokenize_this_line
+
+    initialize_tokenizer_state();
+}
+
+{                                       # begin tokenize_this_line
  
      use constant BRACE          => 0;
      use constant SQUARE_BRACKET => 1;
      use constant PAREN          => 2;
      use constant QUESTION_COLON => 3;
  
  
      use constant BRACE          => 0;
      use constant SQUARE_BRACKET => 1;
      use constant PAREN          => 2;
      use constant QUESTION_COLON => 3;
  
+    # TV1: scalars for processing one LINE.
+    # Re-initialized on each entry to sub tokenize_this_line.
+    my (
+        $block_type,        $container_type,    $expecting,
+        $i,                 $i_tok,             $input_line,
+        $input_line_number, $last_nonblank_i,   $max_token_index,
+        $next_tok,          $next_type,         $peeked_ahead,
+        $prototype,         $rhere_target_list, $rtoken_map,
+        $rtoken_type,       $rtokens,           $tok,
+        $type,              $type_sequence,
+    );
+
+    # TV2: refs to ARRAYS for processing one LINE
+    # Re-initialized on each call.
+    my $routput_token_list     = [];    # stack of output token indexes
+    my $routput_token_type     = [];    # token types
+    my $routput_block_type     = [];    # types of code block
+    my $routput_container_type = [];    # paren types, such as if, elsif, ..
+    my $routput_type_sequence  = [];    # nesting sequential number
+
+    # TV3: SCALARS for quote variables.  These are initialized with a
+    # subroutine call and continually updated as lines are processed.
+    my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
+        $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
+
+    # TV4: SCALARS for multi-line identifiers and
+    # statements. These are initialized with a subroutine call
+    # and continually updated as lines are processed.
+    my ( $id_scan_state, $identifier, $want_paren, );
+
+    # TV5: SCALARS for tracking indentation level.
+    # Initialized once and continually updated as lines are
+    # processed.
      my (
      my (
-        $block_type,      $container_type,       $expecting,
-        $here_doc_target, $here_quote_character, $i,
-        $i_tok,           $last_nonblank_i,      $next_tok,
-        $next_type,       $prototype,            $rtoken_map,
-        $rtoken_type,     $rtokens,              $tok,
-        $type,            $type_sequence,
+        $nesting_token_string,      $nesting_type_string,
+        $nesting_block_string,      $nesting_block_flag,
+        $nesting_list_string,       $nesting_list_flag,
+        $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
+        $in_statement_continuation, $level_in_tokenizer,
+        $slevel_in_tokenizer,       $rslevel_stack,
      );
  
      );
  
-    my @output_token_list     = ();    # stack of output token indexes
-    my @output_token_type     = ();    # token types
-    my @output_block_type     = ();    # types of code block
-    my @output_container_type = ();    # paren types, such as if, elsif, ..
-    my @output_type_sequence  = ();    # nesting sequential number
+    # TV6: SCALARS for remembering several previous
+    # tokens. Initialized once and continually updated as
+    # lines are processed.
+    my (
+        $last_nonblank_container_type,     $last_nonblank_type_sequence,
+        $last_last_nonblank_token,         $last_last_nonblank_type,
+        $last_last_nonblank_block_type,    $last_last_nonblank_container_type,
+        $last_last_nonblank_type_sequence, $last_nonblank_prototype,
+    );
+
+    # ----------------------------------------------------------------
+    # beginning of tokenizer variable access and manipulation routines
+    # ----------------------------------------------------------------
+
+    sub initialize_tokenizer_state {
+
+        # TV1: initialized on each call
+        # TV2: initialized on each call
+        # TV3:
+        $in_quote                = 0;
+        $quote_type              = 'Q';
+        $quote_character         = "";
+        $quote_pos               = 0;
+        $quote_depth             = 0;
+        $quoted_string_1         = "";
+        $quoted_string_2         = "";
+        $allowed_quote_modifiers = "";
+
+        # TV4:
+        $id_scan_state = '';
+        $identifier    = '';
+        $want_paren    = "";
+
+        # TV5:
+        $nesting_token_string             = "";
+        $nesting_type_string              = "";
+        $nesting_block_string             = '1';    # initially in a block
+        $nesting_block_flag               = 1;
+        $nesting_list_string              = '0';    # initially not in a list
+        $nesting_list_flag                = 0;      # initially not in a list
+        $ci_string_in_tokenizer           = "";
+        $continuation_string_in_tokenizer = "0";
+        $in_statement_continuation        = 0;
+        $level_in_tokenizer               = 0;
+        $slevel_in_tokenizer              = 0;
+        $rslevel_stack                    = [];
+
+        # TV6:
+        $last_nonblank_container_type      = '';
+        $last_nonblank_type_sequence       = '';
+        $last_last_nonblank_token          = ';';
+        $last_last_nonblank_type           = ';';
+        $last_last_nonblank_block_type     = '';
+        $last_last_nonblank_container_type = '';
+        $last_last_nonblank_type_sequence  = '';
+        $last_nonblank_prototype           = "";
+    }
+
+    sub save_tokenizer_state {
+
+        my $rTV1 = [
+            $block_type,        $container_type,    $expecting,
+            $i,                 $i_tok,             $input_line,
+            $input_line_number, $last_nonblank_i,   $max_token_index,
+            $next_tok,          $next_type,         $peeked_ahead,
+            $prototype,         $rhere_target_list, $rtoken_map,
+            $rtoken_type,       $rtokens,           $tok,
+            $type,              $type_sequence,
+        ];
+
+        my $rTV2 = [
+            $routput_token_list, $routput_token_type,
+            $routput_block_type, $routput_container_type,
+            $routput_type_sequence,
+        ];
+
+        my $rTV3 = [
+            $in_quote,        $quote_type,
+            $quote_character, $quote_pos,
+            $quote_depth,     $quoted_string_1,
+            $quoted_string_2, $allowed_quote_modifiers,
+        ];
+
+        my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
+
+        my $rTV5 = [
+            $nesting_token_string,      $nesting_type_string,
+            $nesting_block_string,      $nesting_block_flag,
+            $nesting_list_string,       $nesting_list_flag,
+            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
+            $in_statement_continuation, $level_in_tokenizer,
+            $slevel_in_tokenizer,       $rslevel_stack,
+        ];
+
+        my $rTV6 = [
+            $last_nonblank_container_type,
+            $last_nonblank_type_sequence,
+            $last_last_nonblank_token,
+            $last_last_nonblank_type,
+            $last_last_nonblank_block_type,
+            $last_last_nonblank_container_type,
+            $last_last_nonblank_type_sequence,
+            $last_nonblank_prototype,
+        ];
+        return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
+    }
+
+    sub restore_tokenizer_state {
+        my ($rstate) = @_;
+        my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
+        (
+            $block_type,        $container_type,    $expecting,
+            $i,                 $i_tok,             $input_line,
+            $input_line_number, $last_nonblank_i,   $max_token_index,
+            $next_tok,          $next_type,         $peeked_ahead,
+            $prototype,         $rhere_target_list, $rtoken_map,
+            $rtoken_type,       $rtokens,           $tok,
+            $type,              $type_sequence,
+        ) = @{$rTV1};
+
+        (
+            $routput_token_list, $routput_token_type,
+            $routput_block_type, $routput_container_type,
+            $routput_type_sequence,
+        ) = @{$rTV2};
+
+        (
+            $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
+        ) = @{$rTV3};
+
+        ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
+
+        (
+            $nesting_token_string,      $nesting_type_string,
+            $nesting_block_string,      $nesting_block_flag,
+            $nesting_list_string,       $nesting_list_flag,
+            $ci_string_in_tokenizer,    $continuation_string_in_tokenizer,
+            $in_statement_continuation, $level_in_tokenizer,
+            $slevel_in_tokenizer,       $rslevel_stack,
+        ) = @{$rTV5};
+
+        (
+            $last_nonblank_container_type,
+            $last_nonblank_type_sequence,
+            $last_last_nonblank_token,
+            $last_last_nonblank_type,
+            $last_last_nonblank_block_type,
+            $last_last_nonblank_container_type,
+            $last_last_nonblank_type_sequence,
+            $last_nonblank_prototype,
+        ) = @{$rTV6};
+    }
+
+    sub get_indentation_level {
+        return $level_in_tokenizer;
+    }
+
+    sub reset_indentation_level {
+        $level_in_tokenizer  = $_[0];
+        $slevel_in_tokenizer = $_[0];
+        push @{$rslevel_stack}, $slevel_in_tokenizer;
+    }
+
+    sub peeked_ahead {
+        $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
+    }
  
  
-    my @here_target_list = ();         # list of here-doc target strings
+    # ------------------------------------------------------------
+    # end of tokenizer variable access and manipulation routines
+    # ------------------------------------------------------------
  
      # ------------------------------------------------------------
  
      # ------------------------------------------------------------
-    # beginning of various scanner interfaces to simplify coding
+    # beginning of various scanner interface routines
      # ------------------------------------------------------------
      # ------------------------------------------------------------
+    sub scan_replacement_text {
+
+        # check for here-docs in replacement text invoked by
+        # a substitution operator with executable modifier 'e'.
+        #
+        # given:
+        #  $replacement_text
+        # return:
+        #  $rht = reference to any here-doc targets
+        my ($replacement_text) = @_;
+
+        # quick check
+        return undef unless ( $replacement_text =~ /<</ );
+
+        write_logfile_entry("scanning replacement text for here-doc targets\n");
+
+        # save the logger object for error messages
+        my $logger_object = $tokenizer_self->{_logger_object};
+
+        # localize all package variables
+        local (
+            $tokenizer_self,          $last_nonblank_token,
+            $last_nonblank_type,      $last_nonblank_block_type,
+            $statement_type,          $in_attribute_list,
+            $current_package,         $context,
+            %is_constant,             %is_user_function,
+            %user_function_prototype, %is_block_function,
+            %is_block_list_function,  %saw_function_definition,
+            $brace_depth,             $paren_depth,
+            $square_bracket_depth,    @current_depth,
+            @nesting_sequence_number, @current_sequence_number,
+            @paren_type,              @paren_semicolon_count,
+            @paren_structural_type,   @brace_type,
+            @brace_structural_type,   @brace_statement_type,
+            @brace_context,           @brace_package,
+            @square_bracket_type,     @square_bracket_structural_type,
+            @depth_array,             @starting_line_of_current_depth,
+        );
+
+        # save all lexical variables
+        my $rstate = save_tokenizer_state();
+        _decrement_count();    # avoid error check for multiple tokenizers
+
+        # make a new tokenizer
+        my $rOpts = {};
+        my $rpending_logfile_message;
+        my $source_object =
+          Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
+            $rpending_logfile_message );
+        my $tokenizer = Perl::Tidy::Tokenizer->new(
+            source_object        => $source_object,
+            logger_object        => $logger_object,
+            starting_line_number => $input_line_number,
+        );
+
+        # scan the replacement text
+        1 while ( $tokenizer->get_line() );
+
+        # remove any here doc targets
+        my $rht = undef;
+        if ( $tokenizer_self->{_in_here_doc} ) {
+            $rht = [];
+            push @{$rht},
+              [
+                $tokenizer_self->{_here_doc_target},
+                $tokenizer_self->{_here_quote_character}
+              ];
+            if ( $tokenizer_self->{_rhere_target_list} ) {
+                push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
+                $tokenizer_self->{_rhere_target_list} = undef;
+            }
+            $tokenizer_self->{_in_here_doc} = undef;
+        }
+
+        # now its safe to report errors
+        $tokenizer->report_tokenization_errors();
+
+        # restore all tokenizer lexical variables
+        restore_tokenizer_state($rstate);
+
+        # return the here doc targets
+        return $rht;
+    }
+
      sub scan_bare_identifier {
          ( $i, $tok, $type, $prototype ) =
            scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
      sub scan_bare_identifier {
          ( $i, $tok, $type, $prototype ) =
            scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
-            $rtoken_map );
+            $rtoken_map, $max_token_index );
      }
  
      sub scan_identifier {
          ( $i, $tok, $type, $id_scan_state, $identifier ) =
      }
  
      sub scan_identifier {
          ( $i, $tok, $type, $id_scan_state, $identifier ) =
-          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens );
+          scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
+            $max_token_index );
      }
  
      sub scan_id {
          ( $i, $tok, $type, $id_scan_state ) =
            scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
      }
  
      sub scan_id {
          ( $i, $tok, $type, $id_scan_state ) =
            scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
-            $id_scan_state );
+            $id_scan_state, $max_token_index );
      }
  
      }
  
-    my $number;
-
      sub scan_number {
      sub scan_number {
+        my $number;
          ( $i, $type, $number ) =
          ( $i, $type, $number ) =
-          scan_number_do( $input_line, $i, $rtoken_map, $type );
+          scan_number_do( $input_line, $i, $rtoken_map, $type,
+            $max_token_index );
+        return $number;
      }
  
      # a sub to warn if token found where term expected
      sub error_if_expecting_TERM {
          if ( $expecting == TERM ) {
              if ( $really_want_term{$last_nonblank_type} ) {
      }
  
      # a sub to warn if token found where term expected
      sub error_if_expecting_TERM {
          if ( $expecting == TERM ) {
              if ( $really_want_term{$last_nonblank_type} ) {
-                unexpected( $tok, "term", $i_tok, $last_nonblank_i );
+                unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
+                    $rtoken_type, $input_line );
                  1;
              }
          }
                  1;
              }
          }
@@ -19962,7 +20619,8 @@ sub reset_indentation_level {
      sub error_if_expecting_OPERATOR {
          if ( $expecting == OPERATOR ) {
              my $thing = defined $_[0] ? $_[0] : $tok;
      sub error_if_expecting_OPERATOR {
          if ( $expecting == OPERATOR ) {
              my $thing = defined $_[0] ? $_[0] : $tok;
-            unexpected( $thing, "operator", $i_tok, $last_nonblank_i );
+            unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
+                $rtoken_map, $rtoken_type, $input_line );
              if ( $i_tok == 0 ) {
                  interrupt_logfile();
                  warning("Missing ';' above?\n");
              if ( $i_tok == 0 ) {
                  interrupt_logfile();
                  warning("Missing ';' above?\n");
@@ -20028,6 +20686,7 @@ sub reset_indentation_level {
  ##      '||=' => undef,
  ##      '//=' => undef,
  ##      '~'   => undef,
  ##      '||=' => undef,
  ##      '//=' => undef,
  ##      '~'   => undef,
+##      '~~'  => undef,
  
          '>' => sub {
              error_if_expecting_TERM()
  
          '>' => sub {
              error_if_expecting_TERM()
@@ -20100,7 +20759,8 @@ sub reset_indentation_level {
                          # error; for example, we might have a constant pi and
                          # invoke it with pi() or just pi;
                          my ( $next_nonblank_token, $i_next ) =
                          # error; for example, we might have a constant pi and
                          # invoke it with pi() or just pi;
                          my ( $next_nonblank_token, $i_next ) =
-                          find_next_nonblank_token( $i, $rtokens );
+                          find_next_nonblank_token( $i, $rtokens,
+                            $max_token_index );
                          if ( $next_nonblank_token ne ')' ) {
                              my $hint;
                              error_if_expecting_OPERATOR('(');
                          if ( $next_nonblank_token ne ')' ) {
                              my $hint;
                              error_if_expecting_OPERATOR('(');
@@ -20127,7 +20787,8 @@ sub reset_indentation_level {
                  } ## end if ( $expecting == OPERATOR...
              }
              $paren_type[$paren_depth] = $container_type;
                  } ## end if ( $expecting == OPERATOR...
              }
              $paren_type[$paren_depth] = $container_type;
-            $type_sequence = increase_nesting_depth( PAREN, $i_tok );
+            $type_sequence =
+              increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
  
              # propagate types down through nested parens
              # for example: the second paren in 'if ((' would be structural
  
              # propagate types down through nested parens
              # for example: the second paren in 'if ((' would be structural
@@ -20175,7 +20836,8 @@ sub reset_indentation_level {
  
          },
          ')' => sub {
  
          },
          ')' => sub {
-            $type_sequence = decrease_nesting_depth( PAREN, $i_tok );
+            $type_sequence =
+              decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
  
              if ( $paren_structural_type[$paren_depth] eq '{' ) {
                  $type = '}';
  
              if ( $paren_structural_type[$paren_depth] eq '{' ) {
                  $type = '}';
@@ -20256,7 +20918,8 @@ sub reset_indentation_level {
              if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
                  my $msg;
                  ( $is_pattern, $msg ) =
              if ( $expecting == UNKNOWN ) {    # indeterminte, must guess..
                  my $msg;
                  ( $is_pattern, $msg ) =
-                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map );
+                  guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
+                    $max_token_index );
  
                  if ($msg) {
                      write_diagnostics("DIVIDE:$msg\n");
  
                  if ($msg) {
                      write_diagnostics("DIVIDE:$msg\n");
@@ -20278,11 +20941,11 @@ sub reset_indentation_level {
                      $type = $tok;
                  }
  
                      $type = $tok;
                  }
  
-                #DEBUG - collecting info on what tokens follow a divide
-                # for development of guessing algorithm
-                #if ( numerator_expected( $i, $rtokens ) < 0 ) {
-                #    #write_diagnostics( "DIVIDE? $input_line\n" );
-                #}
+              #DEBUG - collecting info on what tokens follow a divide
+              # for development of guessing algorithm
+              #if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
+              #    #write_diagnostics( "DIVIDE? $input_line\n" );
+              #}
              }
          },
          '{' => sub {
              }
          },
          '{' => sub {
@@ -20371,15 +21034,17 @@ sub reset_indentation_level {
              # which will be blank for an anonymous hash
              else {
  
              # which will be blank for an anonymous hash
              else {
  
-                $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type );
+                $block_type =
+                  code_block_type( $i_tok, $rtokens, $rtoken_type,
+                    $max_token_index );
  
                  # patch to promote bareword type to function taking block
                  if (   $block_type
                      && $last_nonblank_type eq 'w'
                      && $last_nonblank_i >= 0 )
                  {
  
                  # patch to promote bareword type to function taking block
                  if (   $block_type
                      && $last_nonblank_type eq 'w'
                      && $last_nonblank_i >= 0 )
                  {
-                    if ( $output_token_type[$last_nonblank_i] eq 'w' ) {
-                        $output_token_type[$last_nonblank_i] = 'G';
+                    if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
+                        $routput_token_type->[$last_nonblank_i] = 'G';
                      }
                  }
  
                      }
                  }
  
@@ -20395,7 +21060,8 @@ sub reset_indentation_level {
              }
              $brace_type[ ++$brace_depth ] = $block_type;
              $brace_package[$brace_depth] = $current_package;
              }
              $brace_type[ ++$brace_depth ] = $block_type;
              $brace_package[$brace_depth] = $current_package;
-            $type_sequence = increase_nesting_depth( BRACE, $i_tok );
+            $type_sequence =
+              increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
              $brace_structural_type[$brace_depth] = $type;
              $brace_context[$brace_depth]         = $context;
              $brace_statement_type[$brace_depth]  = $statement_type;
              $brace_structural_type[$brace_depth] = $type;
              $brace_context[$brace_depth]         = $context;
              $brace_statement_type[$brace_depth]  = $statement_type;
@@ -20410,7 +21076,8 @@ sub reset_indentation_level {
              # can happen on brace error (caught elsewhere)
              else {
              }
              # can happen on brace error (caught elsewhere)
              else {
              }
-            $type_sequence = decrease_nesting_depth( BRACE, $i_tok );
+            $type_sequence =
+              decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
  
              if ( $brace_structural_type[$brace_depth] eq 'L' ) {
                  $type = 'R';
  
              if ( $brace_structural_type[$brace_depth] eq 'L' ) {
                  $type = 'R';
@@ -20444,7 +21111,7 @@ sub reset_indentation_level {
              if ( $expecting != OPERATOR ) {
                  ( $i, $type ) =
                    find_angle_operator_termination( $input_line, $i, $rtoken_map,
              if ( $expecting != OPERATOR ) {
                  ( $i, $type ) =
                    find_angle_operator_termination( $input_line, $i, $rtoken_map,
-                    $expecting );
+                    $expecting, $max_token_index );
  
              }
              else {
  
              }
              else {
@@ -20458,7 +21125,8 @@ sub reset_indentation_level {
  
                  my $msg;
                  ( $is_pattern, $msg ) =
  
                  my $msg;
                  ( $is_pattern, $msg ) =
-                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map );
+                  guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
+                    $max_token_index );
  
                  if ($msg) { write_logfile_entry($msg) }
              }
  
                  if ($msg) { write_logfile_entry($msg) }
              }
@@ -20470,9 +21138,9 @@ sub reset_indentation_level {
                  $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
              }
              else {
                  $allowed_quote_modifiers = '[cgimosx]';    # TBD:check this
              }
              else {
-
                  $type_sequence =
                  $type_sequence =
-                  increase_nesting_depth( QUESTION_COLON, $i_tok );
+                  increase_nesting_depth( QUESTION_COLON,
+                    $$rtoken_map[$i_tok] );
              }
          },
          '*' => sub {    # typeglob, or multiply?
              }
          },
          '*' => sub {    # typeglob, or multiply?
@@ -20538,7 +21206,8 @@ sub reset_indentation_level {
              # otherwise, it should be part of a ?/: operator
              else {
                  $type_sequence =
              # otherwise, it should be part of a ?/: operator
              else {
                  $type_sequence =
-                  decrease_nesting_depth( QUESTION_COLON, $i_tok );
+                  decrease_nesting_depth( QUESTION_COLON,
+                    $$rtoken_map[$i_tok] );
                  if ( $last_nonblank_token eq '?' ) {
                      warning("Syntax error near ? :\n");
                  }
                  if ( $last_nonblank_token eq '?' ) {
                      warning("Syntax error near ? :\n");
                  }
@@ -20547,7 +21216,7 @@ sub reset_indentation_level {
          '+' => sub {    # what kind of plus?
  
              if ( $expecting == TERM ) {
          '+' => sub {    # what kind of plus?
  
              if ( $expecting == TERM ) {
-                scan_number();
+                my $number = scan_number();
  
                  # unary plus is safest assumption if not a number
                  if ( !defined($number) ) { $type = 'p'; }
  
                  # unary plus is safest assumption if not a number
                  if ( !defined($number) ) { $type = 'p'; }
@@ -20577,7 +21246,8 @@ sub reset_indentation_level {
          '[' => sub {
              $square_bracket_type[ ++$square_bracket_depth ] =
                $last_nonblank_token;
          '[' => sub {
              $square_bracket_type[ ++$square_bracket_depth ] =
                $last_nonblank_token;
-            $type_sequence = increase_nesting_depth( SQUARE_BRACKET, $i_tok );
+            $type_sequence =
+              increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
  
              # It may seem odd, but structural square brackets have
              # type '{' and '}'.  This simplifies the indentation logic.
  
              # It may seem odd, but structural square brackets have
              # type '{' and '}'.  This simplifies the indentation logic.
@@ -20587,7 +21257,8 @@ sub reset_indentation_level {
              $square_bracket_structural_type[$square_bracket_depth] = $type;
          },
          ']' => sub {
              $square_bracket_structural_type[$square_bracket_depth] = $type;
          },
          ']' => sub {
-            $type_sequence = decrease_nesting_depth( SQUARE_BRACKET, $i_tok );
+            $type_sequence =
+              decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
  
              if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
              {
  
              if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
              {
@@ -20605,7 +21276,7 @@ sub reset_indentation_level {
                  $type = 'F';
              }
              elsif ( $expecting == TERM ) {
                  $type = 'F';
              }
              elsif ( $expecting == TERM ) {
-                scan_number();
+                my $number = scan_number();
  
                  # maybe part of bareword token? unary is safest
                  if ( !defined($number) ) { $type = 'm'; }
  
                  # maybe part of bareword token? unary is safest
                  if ( !defined($number) ) { $type = 'm'; }
@@ -20661,12 +21332,17 @@ sub reset_indentation_level {
                ;          # here-doc not possible if end of line
  
              if ( $expecting != OPERATOR ) {
                ;          # here-doc not possible if end of line
  
              if ( $expecting != OPERATOR ) {
-                my ($found_target);
-                ( $found_target, $here_doc_target, $here_quote_character, $i ) =
-                  find_here_doc( $expecting, $i, $rtokens, $rtoken_map );
+                my ( $found_target, $here_doc_target, $here_quote_character,
+                    $saw_error );
+                (
+                    $found_target, $here_doc_target, $here_quote_character, $i,
+                    $saw_error
+                  )
+                  = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
+                    $max_token_index );
  
                  if ($found_target) {
  
                  if ($found_target) {
-                    push @here_target_list,
+                    push @{$rhere_target_list},
                        [ $here_doc_target, $here_quote_character ];
                      $type = 'h';
                      if ( length($here_doc_target) > 80 ) {
                        [ $here_doc_target, $here_quote_character ];
                      $type = 'h';
                      if ( length($here_doc_target) > 80 ) {
@@ -20680,10 +21356,12 @@ sub reset_indentation_level {
                      }
                  }
                  elsif ( $expecting == TERM ) {
                      }
                  }
                  elsif ( $expecting == TERM ) {
+                    unless ($saw_error) {
  
  
-                    # shouldn't happen..
-                    warning("Program bug; didn't find here doc target\n");
-                    report_definite_bug();
+                        # shouldn't happen..
+                        warning("Program bug; didn't find here doc target\n");
+                        report_definite_bug();
+                    }
                  }
              }
              else {
                  }
              }
              else {
@@ -20701,7 +21379,7 @@ sub reset_indentation_level {
              if ( $expecting == TERM ) { $type = 'pp' }
              elsif ( $expecting == UNKNOWN ) {
                  my ( $next_nonblank_token, $i_next ) =
              if ( $expecting == TERM ) { $type = 'pp' }
              elsif ( $expecting == UNKNOWN ) {
                  my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens );
+                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
                  if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
              }
          },
                  if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
              }
          },
@@ -20722,7 +21400,7 @@ sub reset_indentation_level {
              if ( $expecting == TERM ) { $type = 'mm' }
              elsif ( $expecting == UNKNOWN ) {
                  my ( $next_nonblank_token, $i_next ) =
              if ( $expecting == TERM ) { $type = 'mm' }
              elsif ( $expecting == UNKNOWN ) {
                  my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens );
+                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
                  if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
              }
          },
                  if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
              }
          },
@@ -20921,6 +21599,9 @@ sub reset_indentation_level {
    # *, then run diff between the output of the previous version and the
    # current version.
    #
    # *, then run diff between the output of the previous version and the
    # current version.
    #
+  # *. For another example, search for the smartmatch operator '~~'
+  # with your editor to see where updates were made for it.
+  #
    # -----------------------------------------------------------------------
  
          my $line_of_tokens = shift;
    # -----------------------------------------------------------------------
  
          my $line_of_tokens = shift;
@@ -20933,6 +21614,9 @@ sub reset_indentation_level {
          # extract line number for use in error messages
          $input_line_number = $line_of_tokens->{_line_number};
  
          # extract line number for use in error messages
          $input_line_number = $line_of_tokens->{_line_number};
  
+        # reinitialize for multi-line quote
+        $line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
+
          # check for pod documentation
          if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
  
          # check for pod documentation
          if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
  
@@ -20956,12 +21640,18 @@ sub reset_indentation_level {
              $input_line =~ s/^\s*//;    # trim left end
          }
  
              $input_line =~ s/^\s*//;    # trim left end
          }
  
+        # update the copy of the line for use in error messages
+        # This must be exactly what we give the pre_tokenizer
+        $tokenizer_self->{_line_text} = $input_line;
+
          # re-initialize for the main loop
          # re-initialize for the main loop
-        @output_token_list     = ();    # stack of output token indexes
-        @output_token_type     = ();    # token types
-        @output_block_type     = ();    # types of code block
-        @output_container_type = ();    # paren types, such as if, elsif, ..
-        @output_type_sequence  = ();    # nesting sequential number
+        $routput_token_list     = [];    # stack of output token indexes
+        $routput_token_type     = [];    # token types
+        $routput_block_type     = [];    # types of code block
+        $routput_container_type = [];    # paren types, such as if, elsif, ..
+        $routput_type_sequence  = [];    # nesting sequential number
+
+        $rhere_target_list = [];
  
          $tok             = $last_nonblank_token;
          $type            = $last_nonblank_type;
  
          $tok             = $last_nonblank_token;
          $type            = $last_nonblank_type;
@@ -20970,9 +21660,7 @@ sub reset_indentation_level {
          $block_type      = $last_nonblank_block_type;
          $container_type  = $last_nonblank_container_type;
          $type_sequence   = $last_nonblank_type_sequence;
          $block_type      = $last_nonblank_block_type;
          $container_type  = $last_nonblank_container_type;
          $type_sequence   = $last_nonblank_type_sequence;
-        @here_target_list = ();         # list of here-doc target strings
-
-        $peeked_ahead = 0;
+        $peeked_ahead    = 0;
  
          # tokenization is done in two stages..
          # stage 1 is a very simple pre-tokenization
  
          # tokenization is done in two stages..
          # stage 1 is a very simple pre-tokenization
@@ -20984,24 +21672,20 @@ sub reset_indentation_level {
          }
  
          # start by breaking the line into pre-tokens
          }
  
          # start by breaking the line into pre-tokens
-        ( $rpretokens, $rpretoken_map, $rpretoken_type ) =
+        ( $rtokens, $rtoken_map, $rtoken_type ) =
            pre_tokenize( $input_line, $max_tokens_wanted );
  
            pre_tokenize( $input_line, $max_tokens_wanted );
  
-        $max_token_index = scalar(@$rpretokens) - 1;
-        push( @$rpretokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
-        push( @$rpretoken_map,  0,   0,   0 );     # shouldn't be referenced
-        push( @$rpretoken_type, 'b', 'b', 'b' );
-
-        # temporary copies while coding change is underway
-        ( $rtokens, $rtoken_map, $rtoken_type ) =
-          ( $rpretokens, $rpretoken_map, $rpretoken_type );
+        $max_token_index = scalar(@$rtokens) - 1;
+        push( @$rtokens,    ' ', ' ', ' ' ); # extra whitespace simplifies logic
+        push( @$rtoken_map, 0,   0,   0 );   # shouldn't be referenced
+        push( @$rtoken_type, 'b', 'b', 'b' );
  
          # initialize for main loop
          for $i ( 0 .. $max_token_index + 3 ) {
  
          # initialize for main loop
          for $i ( 0 .. $max_token_index + 3 ) {
-            $output_token_type[$i]     = "";
-            $output_block_type[$i]     = "";
-            $output_container_type[$i] = "";
-            $output_type_sequence[$i]  = "";
+            $routput_token_type->[$i]     = "";
+            $routput_block_type->[$i]     = "";
+            $routput_container_type->[$i] = "";
+            $routput_type_sequence->[$i]  = "";
          }
          $i     = -1;
          $i_tok = -1;
          }
          $i     = -1;
          $i_tok = -1;
@@ -21017,25 +21701,39 @@ sub reset_indentation_level {
              if ($in_quote) {    # continue looking for end of a quote
                  $type = $quote_type;
  
              if ($in_quote) {    # continue looking for end of a quote
                  $type = $quote_type;
  
-                unless (@output_token_list) {  # initialize if continuation line
-                    push( @output_token_list, $i );
-                    $output_token_type[$i] = $type;
+                unless ( @{$routput_token_list} )
+                {               # initialize if continuation line
+                    push( @{$routput_token_list}, $i );
+                    $routput_token_type->[$i] = $type;
  
                  }
                  $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
  
                  # scan for the end of the quote or pattern
  
                  }
                  $tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
  
                  # scan for the end of the quote or pattern
-                ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-                  do_quote( $i, $in_quote, $quote_character, $quote_pos,
-                    $quote_depth, $rtokens, $rtoken_map );
+                (
+                    $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+                    $quoted_string_1, $quoted_string_2
+                  )
+                  = do_quote(
+                    $i,               $in_quote,    $quote_character,
+                    $quote_pos,       $quote_depth, $quoted_string_1,
+                    $quoted_string_2, $rtokens,     $rtoken_map,
+                    $max_token_index
+                  );
  
                  # all done if we didn't find it
                  last if ($in_quote);
  
  
                  # all done if we didn't find it
                  last if ($in_quote);
  
+                # save pattern and replacement text for rescanning
+                my $qs1 = $quoted_string_1;
+                my $qs2 = $quoted_string_2;
+
                  # re-initialize for next search
                  $quote_character = '';
                  $quote_pos       = 0;
                  $quote_type      = 'Q';
                  # re-initialize for next search
                  $quote_character = '';
                  $quote_pos       = 0;
                  $quote_type      = 'Q';
+                $quoted_string_1 = "";
+                $quoted_string_2 = "";
                  last if ( ++$i > $max_token_index );
  
                  # look for any modifiers
                  last if ( ++$i > $max_token_index );
  
                  # look for any modifiers
@@ -21044,7 +21742,32 @@ sub reset_indentation_level {
                      # check for exact quote modifiers
                      if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
                          my $str = $$rtokens[$i];
                      # check for exact quote modifiers
                      if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
                          my $str = $$rtokens[$i];
-                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) { }
+                        my $saw_modifier_e;
+                        while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
+                            my $pos = pos($str);
+                            my $char = substr( $str, $pos - 1, 1 );
+                            $saw_modifier_e ||= ( $char eq 'e' );
+                        }
+
+                        # For an 'e' quote modifier we must scan the replacement
+                        # text for here-doc targets.
+                        if ($saw_modifier_e) {
+
+                            my $rht = scan_replacement_text($qs1);
+
+                            # Change type from 'Q' to 'h' for quotes with
+                            # here-doc targets so that the formatter (see sub
+                            # print_line_of_tokens) will not make any line
+                            # breaks after this point.
+                            if ($rht) {
+                                push @{$rhere_target_list}, @{$rht};
+                                $type = 'h';
+                                if ( $i_tok < 0 ) {
+                                    my $ilast = $routput_token_list->[-1];
+                                    $routput_token_type->[$ilast] = $type;
+                                }
+                            }
+                        }
  
                          if ( defined( pos($str) ) ) {
  
  
                          if ( defined( pos($str) ) ) {
  
@@ -21108,9 +21831,9 @@ EOM
                      }
                  }
  
                      }
                  }
  
-                $last_last_nonblank_token          = $last_nonblank_token;
-                $last_last_nonblank_type           = $last_nonblank_type;
-                $last_last_nonblank_block_type     = $last_nonblank_block_type;
+                $last_last_nonblank_token      = $last_nonblank_token;
+                $last_last_nonblank_type       = $last_nonblank_type;
+                $last_last_nonblank_block_type = $last_nonblank_block_type;
                  $last_last_nonblank_container_type =
                    $last_nonblank_container_type;
                  $last_last_nonblank_type_sequence =
                  $last_last_nonblank_container_type =
                    $last_nonblank_container_type;
                  $last_last_nonblank_type_sequence =
@@ -21126,10 +21849,10 @@ EOM
  
              # store previous token type
              if ( $i_tok >= 0 ) {
  
              # store previous token type
              if ( $i_tok >= 0 ) {
-                $output_token_type[$i_tok]     = $type;
-                $output_block_type[$i_tok]     = $block_type;
-                $output_container_type[$i_tok] = $container_type;
-                $output_type_sequence[$i_tok]  = $type_sequence;
+                $routput_token_type->[$i_tok]     = $type;
+                $routput_block_type->[$i_tok]     = $block_type;
+                $routput_container_type->[$i_tok] = $container_type;
+                $routput_type_sequence->[$i_tok]  = $type_sequence;
              }
              my $pre_tok  = $$rtokens[$i];        # get the next pre-token
              my $pre_type = $$rtoken_type[$i];    # and type
              }
              my $pre_tok  = $$rtokens[$i];        # get the next pre-token
              my $pre_type = $$rtoken_type[$i];    # and type
@@ -21142,7 +21865,7 @@ EOM
              $i_tok     = $i;
  
              # this pre-token will start an output token
              $i_tok     = $i;
  
              # this pre-token will start an output token
-            push( @output_token_list, $i_tok );
+            push( @{$routput_token_list}, $i_tok );
  
              # continue gathering identifier if necessary
              # but do not start on blanks and comments
  
              # continue gathering identifier if necessary
              # but do not start on blanks and comments
@@ -21246,7 +21969,7 @@ EOM
              if ( $pre_type eq 'w' ) {
                  $expecting = operator_expected( $prev_type, $tok, $next_type );
                  my ( $next_nonblank_token, $i_next ) =
              if ( $pre_type eq 'w' ) {
                  $expecting = operator_expected( $prev_type, $tok, $next_type );
                  my ( $next_nonblank_token, $i_next ) =
-                  find_next_nonblank_token( $i, $rtokens );
+                  find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
                  # ATTRS: handle sub and variable attributes
                  if ($in_attribute_list) {
  
                  # ATTRS: handle sub and variable attributes
                  if ($in_attribute_list) {
@@ -21275,13 +21998,13 @@ EOM
                              $type = 'C';
                          }
                          elsif ( $is_user_function{$current_package}{$tok} ) {
                              $type = 'C';
                          }
                          elsif ( $is_user_function{$current_package}{$tok} ) {
-                            $type      = 'U';
+                            $type = 'U';
                              $prototype =
                                $user_function_prototype{$current_package}{$tok};
                          }
                          elsif ( $tok =~ /^v\d+$/ ) {
                              $type = 'v';
                              $prototype =
                                $user_function_prototype{$current_package}{$tok};
                          }
                          elsif ( $tok =~ /^v\d+$/ ) {
                              $type = 'v';
-                            unless ($saw_v_string) { report_v_string($tok) }
+                            report_v_string($tok);
                          }
                          else { $type = 'w' }
  
                          }
                          else { $type = 'w' }
  
@@ -21385,7 +22108,8 @@ EOM
                  {
                      scan_bare_identifier();
                      my ( $next_nonblank_token, $i_next ) =
                  {
                      scan_bare_identifier();
                      my ( $next_nonblank_token, $i_next ) =
-                      find_next_nonblank_token( $i, $rtokens );
+                      find_next_nonblank_token( $i, $rtokens,
+                        $max_token_index );
  
                      if ($next_nonblank_token) {
  
  
                      if ($next_nonblank_token) {
  
@@ -21442,7 +22166,8 @@ EOM
                    )
                  {
                      if ( $tok !~ /A-Z/ ) {
                    )
                  {
                      if ( $tok !~ /A-Z/ ) {
-                        push @lower_case_labels_at, $input_line_number;
+                        push @{ $tokenizer_self->{_rlower_case_labels_at} },
+                          $input_line_number;
                      }
                      $type = 'J';
                      $tok .= ':';
                      }
                      $type = 'J';
                      $tok .= ':';
@@ -21583,12 +22308,9 @@ EOM
                              $type = 'U';
                          }
  
                              $type = 'U';
                          }
  
-                        # mark bare words following a file test operator as
-                        # something that will expect an operator next.
-                        # patch 072901: unless followed immediately by a paren,
-                        # in which case it must be a function call (pid.t)
-                        if ( $last_nonblank_type eq 'F' && $next_tok ne '(' ) {
-                            $type = 'C';
+                        # underscore after file test operator is file handle
+                        if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
+                            $type = 'Z';
                          }
  
                          # patch for SWITCH/CASE if 'case' and 'when are
                          }
  
                          # patch for SWITCH/CASE if 'case' and 'when are
@@ -21627,7 +22349,7 @@ EOM
                  $expecting = operator_expected( $prev_type, $tok, $next_type );
                  error_if_expecting_OPERATOR("Number")
                    if ( $expecting == OPERATOR );
                  $expecting = operator_expected( $prev_type, $tok, $next_type );
                  error_if_expecting_OPERATOR("Number")
                    if ( $expecting == OPERATOR );
-                scan_number();
+                my $number = scan_number();
                  if ( !defined($number) ) {
  
                      # shouldn't happen - we should always get a number
                  if ( !defined($number) ) {
  
                      # shouldn't happen - we should always get a number
@@ -21657,10 +22379,10 @@ EOM
          # -----------------------------
  
          if ( $i_tok >= 0 ) {
          # -----------------------------
  
          if ( $i_tok >= 0 ) {
-            $output_token_type[$i_tok]     = $type;
-            $output_block_type[$i_tok]     = $block_type;
-            $output_container_type[$i_tok] = $container_type;
-            $output_type_sequence[$i_tok]  = $type_sequence;
+            $routput_token_type->[$i_tok]     = $type;
+            $routput_block_type->[$i_tok]     = $block_type;
+            $routput_container_type->[$i_tok] = $container_type;
+            $routput_type_sequence->[$i_tok]  = $type_sequence;
          }
  
          unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
          }
  
          unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
@@ -21763,7 +22485,7 @@ EOM
  #       indentation level, if it is is appropriate for list formatting.
  #       If so, continuation indentation is used to indent long list items.
  #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
  #       indentation level, if it is is appropriate for list formatting.
  #       If so, continuation indentation is used to indent long list items.
  #     $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
-#     @slevel_stack = a stack of total nesting depths at each
+#     @{$rslevel_stack} = a stack of total nesting depths at each
  #       structural indentation level, where "total nesting depth" means
  #       the nesting depth that would occur if every nesting token -- '{', '[',
  #       and '(' -- , regardless of context, is used to compute a nesting
  #       structural indentation level, where "total nesting depth" means
  #       the nesting depth that would occur if every nesting token -- '{', '[',
  #       and '(' -- , regardless of context, is used to compute a nesting
@@ -21776,10 +22498,11 @@ EOM
              $nesting_list_string_i, $nesting_token_string_i,
              $nesting_type_string_i, );
  
              $nesting_list_string_i, $nesting_token_string_i,
              $nesting_type_string_i, );
  
-        foreach $i (@output_token_list) {  # scan the list of pre-tokens indexes
+        foreach $i ( @{$routput_token_list} )
+        {    # scan the list of pre-tokens indexes
  
              # self-checking for valid token types
  
              # self-checking for valid token types
-            my $type = $output_token_type[$i];
+            my $type = $routput_token_type->[$i];
              my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
              $level_i = $level_in_tokenizer;
  
              my $tok = $$rtokens[$i];   # the token, but ONLY if same as pretoken
              $level_i = $level_in_tokenizer;
  
@@ -21820,15 +22543,15 @@ EOM
                  $container_environment =
                      $nesting_block_flag ? 'BLOCK'
                    : $nesting_list_flag  ? 'LIST'
                  $container_environment =
                      $nesting_block_flag ? 'BLOCK'
                    : $nesting_list_flag  ? 'LIST'
-                  : "";
+                  :                       "";
  
                  # if the difference between total nesting levels is not 1,
                  # there are intervening non-structural nesting types between
                  # this '{' and the previous unclosed '{'
                  my $intervening_secondary_structure = 0;
  
                  # if the difference between total nesting levels is not 1,
                  # there are intervening non-structural nesting types between
                  # this '{' and the previous unclosed '{'
                  my $intervening_secondary_structure = 0;
-                if (@slevel_stack) {
+                if ( @{$rslevel_stack} ) {
                      $intervening_secondary_structure =
                      $intervening_secondary_structure =
-                      $slevel_in_tokenizer - $slevel_stack[-1];
+                      $slevel_in_tokenizer - $rslevel_stack->[-1];
                  }
  
       # =head1 Continuation Indentation
                  }
  
       # =head1 Continuation Indentation
@@ -21878,10 +22601,10 @@ EOM
       # variable.
  
                  # save the current states
       # variable.
  
                  # save the current states
-                push( @slevel_stack, 1 + $slevel_in_tokenizer );
+                push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
                  $level_in_tokenizer++;
  
                  $level_in_tokenizer++;
  
-                if ( $output_block_type[$i] ) {
+                if ( $routput_block_type->[$i] ) {
                      $nesting_block_flag = 1;
                      $nesting_block_string .= '1';
                  }
                      $nesting_block_flag = 1;
                      $nesting_block_string .= '1';
                  }
@@ -21893,10 +22616,10 @@ EOM
                  # we will use continuation indentation within containers
                  # which are not blocks and not logical expressions
                  my $bit = 0;
                  # we will use continuation indentation within containers
                  # which are not blocks and not logical expressions
                  my $bit = 0;
-                if ( !$output_block_type[$i] ) {
+                if ( !$routput_block_type->[$i] ) {
  
                      # propagate flag down at nested open parens
  
                      # propagate flag down at nested open parens
-                    if ( $output_container_type[$i] eq '(' ) {
+                    if ( $routput_container_type->[$i] eq '(' ) {
                          $bit = 1 if $nesting_list_flag;
                      }
  
                          $bit = 1 if $nesting_list_flag;
                      }
  
@@ -21905,7 +22628,8 @@ EOM
                      else {
                          $bit = 1
                            unless
                      else {
                          $bit = 1
                            unless
-                          $is_logical_container{ $output_container_type[$i] };
+                          $is_logical_container{ $routput_container_type->[$i]
+                          };
                      }
                  }
                  $nesting_list_string .= $bit;
                      }
                  }
                  $nesting_list_string .= $bit;
@@ -21936,7 +22660,7 @@ EOM
  
                  my $total_ci = $ci_string_sum;
                  if (
  
                  my $total_ci = $ci_string_sum;
                  if (
-                    !$output_block_type[$i]    # patch: skip for BLOCK
+                    !$routput_block_type->[$i]    # patch: skip for BLOCK
                      && ($in_statement_continuation)
                    )
                  {
                      && ($in_statement_continuation)
                    )
                  {
@@ -21951,7 +22675,7 @@ EOM
              elsif ( $type eq '}' || $type eq 'R' ) {
  
                  # only a nesting error in the script would prevent popping here
              elsif ( $type eq '}' || $type eq 'R' ) {
  
                  # only a nesting error in the script would prevent popping here
-                if ( @slevel_stack > 1 ) { pop(@slevel_stack); }
+                if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
  
                  $level_i = --$level_in_tokenizer;
  
  
                  $level_i = --$level_in_tokenizer;
  
@@ -21972,15 +22696,16 @@ EOM
  
                      # zero continuation flag at terminal BLOCK '}' which
                      # ends a statement.
  
                      # zero continuation flag at terminal BLOCK '}' which
                      # ends a statement.
-                    if ( $output_block_type[$i] ) {
+                    if ( $routput_block_type->[$i] ) {
  
                          # ...These include non-anonymous subs
                          # note: could be sub ::abc { or sub 'abc
  
                          # ...These include non-anonymous subs
                          # note: could be sub ::abc { or sub 'abc
-                        if ( $output_block_type[$i] =~ m/^sub\s*/gc ) {
+                        if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
  
                           # note: older versions of perl require the /gc modifier
                           # here or else the \G does not work.
  
                           # note: older versions of perl require the /gc modifier
                           # here or else the \G does not work.
-                            if ( $output_block_type[$i] =~ /\G('|::|\w)/gc ) {
+                            if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
+                            {
                                  $in_statement_continuation = 0;
                              }
                          }
                                  $in_statement_continuation = 0;
                              }
                          }
@@ -21989,8 +22714,8 @@ EOM
  # block prototypes and these: (sort|grep|map|do|eval)
  # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
                          elsif (
  # block prototypes and these: (sort|grep|map|do|eval)
  # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
                          elsif (
-                            $is_zero_continuation_block_type{ $output_block_type
-                                  [$i] } )
+                            $is_zero_continuation_block_type{
+                                $routput_block_type->[$i] } )
                          {
                              $in_statement_continuation = 0;
                          }
                          {
                              $in_statement_continuation = 0;
                          }
@@ -21999,18 +22724,19 @@ EOM
                          #     /^(sort|grep|map|do|eval)$/ )
                          elsif (
                              $is_not_zero_continuation_block_type{
                          #     /^(sort|grep|map|do|eval)$/ )
                          elsif (
                              $is_not_zero_continuation_block_type{
-                                $output_block_type[$i] } )
+                                $routput_block_type->[$i] } )
                          {
                          }
  
                          # ..and a block introduced by a label
                          # /^\w+\s*:$/gc ) {
                          {
                          }
  
                          # ..and a block introduced by a label
                          # /^\w+\s*:$/gc ) {
-                        elsif ( $output_block_type[$i] =~ /:$/ ) {
+                        elsif ( $routput_block_type->[$i] =~ /:$/ ) {
                              $in_statement_continuation = 0;
                          }
  
                              $in_statement_continuation = 0;
                          }
  
-                        # ..nor user function with block prototype
+                        # user function with block prototype
                          else {
                          else {
+                            $in_statement_continuation = 0;
                          }
                      }
  
                          }
                      }
  
@@ -22026,7 +22752,7 @@ EOM
                      #     );
                      elsif ( $tok eq ')' ) {
                          $in_statement_continuation = 1
                      #     );
                      elsif ( $tok eq ')' ) {
                          $in_statement_continuation = 1
-                          if $output_container_type[$i] =~ /^[;,\{\}]$/;
+                          if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
                      }
                  }
  
                      }
                  }
  
@@ -22034,7 +22760,7 @@ EOM
                  $container_environment =
                      $nesting_block_flag ? 'BLOCK'
                    : $nesting_list_flag  ? 'LIST'
                  $container_environment =
                      $nesting_block_flag ? 'BLOCK'
                    : $nesting_list_flag  ? 'LIST'
-                  : "";
+                  :                       "";
                  $ci_string_i = $ci_string_sum + $in_statement_continuation;
                  $nesting_block_string_i = $nesting_block_string;
                  $nesting_list_string_i  = $nesting_list_string;
                  $ci_string_i = $ci_string_sum + $in_statement_continuation;
                  $nesting_block_string_i = $nesting_block_string;
                  $nesting_list_string_i  = $nesting_list_string;
@@ -22046,7 +22772,7 @@ EOM
                  $container_environment =
                      $nesting_block_flag ? 'BLOCK'
                    : $nesting_list_flag  ? 'LIST'
                  $container_environment =
                      $nesting_block_flag ? 'BLOCK'
                    : $nesting_list_flag  ? 'LIST'
-                  : "";
+                  :                       "";
  
                  # zero the continuation indentation at certain tokens so
                  # that they will be at the same level as its container.  For
  
                  # zero the continuation indentation at certain tokens so
                  # that they will be at the same level as its container.  For
@@ -22113,8 +22839,8 @@ EOM
              }
  
              if ( $level_in_tokenizer < 0 ) {
              }
  
              if ( $level_in_tokenizer < 0 ) {
-                unless ($saw_negative_indentation) {
-                    $saw_negative_indentation = 1;
+                unless ( $tokenizer_self->{_saw_negative_indentation} ) {
+                    $tokenizer_self->{_saw_negative_indentation} = 1;
                      warning("Starting negative indentation\n");
                  }
              }
                      warning("Starting negative indentation\n");
                  }
              }
@@ -22146,16 +22872,16 @@ EOM
                  }
              }
  
                  }
              }
  
-            push( @block_type,            $output_block_type[$i] );
+            push( @block_type,            $routput_block_type->[$i] );
              push( @ci_string,             $ci_string_i );
              push( @container_environment, $container_environment );
              push( @ci_string,             $ci_string_i );
              push( @container_environment, $container_environment );
-            push( @container_type,        $output_container_type[$i] );
+            push( @container_type,        $routput_container_type->[$i] );
              push( @levels,                $level_i );
              push( @nesting_tokens,        $nesting_token_string_i );
              push( @nesting_types,         $nesting_type_string_i );
              push( @slevels,               $slevel_i );
              push( @token_type,            $fix_type );
              push( @levels,                $level_i );
              push( @nesting_tokens,        $nesting_token_string_i );
              push( @nesting_types,         $nesting_type_string_i );
              push( @slevels,               $slevel_i );
              push( @token_type,            $fix_type );
-            push( @type_sequence,         $output_type_sequence[$i] );
+            push( @type_sequence,         $routput_type_sequence->[$i] );
              push( @nesting_blocks,        $nesting_block_string );
              push( @nesting_lists,         $nesting_list_string );
  
              push( @nesting_blocks,        $nesting_block_string );
              push( @nesting_lists,         $nesting_list_string );
  
@@ -22179,7 +22905,9 @@ EOM
  
          $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
          $tokenizer_self->{_in_quote}          = $in_quote;
  
          $tokenizer_self->{_in_attribute_list} = $in_attribute_list;
          $tokenizer_self->{_in_quote}          = $in_quote;
-        $tokenizer_self->{_rhere_target_list} = \@here_target_list;
+        $tokenizer_self->{_quote_target} =
+          $in_quote ? matching_end_token($quote_character) : "";
+        $tokenizer_self->{_rhere_target_list} = $rhere_target_list;
  
          $line_of_tokens->{_rtoken_type}            = \@token_type;
          $line_of_tokens->{_rtokens}                = \@tokens;
  
          $line_of_tokens->{_rtoken_type}            = \@token_type;
          $line_of_tokens->{_rtokens}                = \@tokens;
@@ -22197,51 +22925,263 @@ EOM
      }
  }    # end tokenize_this_line
  
      }
  }    # end tokenize_this_line
  
-sub new_statement_ok {
-
-    # return true if the current token can start a new statement
+#########i#############################################################
+# Tokenizer routines which assist in identifying token types
+#######################################################################
  
  
-    return label_ok()    # a label would be ok here
+sub operator_expected {
  
  
-      || $last_nonblank_type eq 'J';    # or we follow a label
+    # Many perl symbols have two or more meanings.  For example, '<<'
+    # can be a shift operator or a here-doc operator.  The
+    # interpretation of these symbols depends on the current state of
+    # the tokenizer, which may either be expecting a term or an
+    # operator.  For this example, a << would be a shift if an operator
+    # is expected, and a here-doc if a term is expected.  This routine
+    # is called to make this decision for any current token.  It returns
+    # one of three possible values:
+    #
+    #     OPERATOR - operator expected (or at least, not a term)
+    #     UNKNOWN  - can't tell
+    #     TERM     - a term is expected (or at least, not an operator)
+    #
+    # The decision is based on what has been seen so far.  This
+    # information is stored in the "$last_nonblank_type" and
+    # "$last_nonblank_token" variables.  For example, if the
+    # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
+    # if $last_nonblank_type is 'n' (numeric), we are expecting an
+    # OPERATOR.
+    #
+    # If a UNKNOWN is returned, the calling routine must guess. A major
+    # goal of this tokenizer is to minimize the possiblity of returning
+    # UNKNOWN, because a wrong guess can spoil the formatting of a
+    # script.
+    #
+    # adding NEW_TOKENS: it is critically important that this routine be
+    # updated to allow it to determine if an operator or term is to be
+    # expected after the new token.  Doing this simply involves adding
+    # the new token character to one of the regexes in this routine or
+    # to one of the hash lists
+    # that it uses, which are initialized in the BEGIN section.
+    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
+    # $statement_type
  
  
-}
+    my ( $prev_type, $tok, $next_type ) = @_;
  
  
-sub label_ok {
+    my $op_expected = UNKNOWN;
  
  
-    # Decide if a bare word followed by a colon here is a label
+#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
  
  
-    # if it follows an opening or closing code block curly brace..
-    if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
-        && $last_nonblank_type eq $last_nonblank_token )
-    {
+# Note: function prototype is available for token type 'U' for future
+# program development.  It contains the leading and trailing parens,
+# and no blanks.  It might be used to eliminate token type 'C', for
+# example (prototype = '()'). Thus:
+# if ($last_nonblank_type eq 'U') {
+#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
+# }
  
  
-        # it is a label if and only if the curly encloses a code block
-        return $brace_type[$brace_depth];
-    }
+    # A possible filehandle (or object) requires some care...
+    if ( $last_nonblank_type eq 'Z' ) {
  
  
-    # otherwise, it is a label if and only if it follows a ';'
-    # (real or fake)
-    else {
-        return ( $last_nonblank_type eq ';' );
-    }
-}
+        # angle.t
+        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
+            $op_expected = UNKNOWN;
+        }
  
  
-sub code_block_type {
+        # For possible file handle like "$a", Perl uses weird parsing rules.
+        # For example:
+        # print $a/2,"/hi";   - division
+        # print $a / 2,"/hi"; - division
+        # print $a/ 2,"/hi";  - division
+        # print $a /2,"/hi";  - pattern (and error)!
+        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
+            $op_expected = TERM;
+        }
  
  
-    # Decide if this is a block of code, and its type.
-    # Must be called only when $type = $token = '{'
-    # The problem is to distinguish between the start of a block of code
-    # and the start of an anonymous hash reference
-    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
-    # to indicate the type of code block.  (For example, 'last_nonblank_token'
-    # might be 'if' for an if block, 'else' for an else block, etc).
+        # Note when an operation is being done where a
+        # filehandle might be expected, since a change in whitespace
+        # could change the interpretation of the statement.
+        else {
+            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
+                complain("operator in print statement not recommended\n");
+                $op_expected = OPERATOR;
+            }
+        }
+    }
  
  
-    # handle case of multiple '{'s
+    # handle something after 'do' and 'eval'
+    elsif ( $is_block_operator{$last_nonblank_token} ) {
+
+        # something like $a = eval "expression";
+        #                          ^
+        if ( $last_nonblank_type eq 'k' ) {
+            $op_expected = TERM;    # expression or list mode following keyword
+        }
+
+        # something like $a = do { BLOCK } / 2;
+        #                                  ^
+        else {
+            $op_expected = OPERATOR;    # block mode following }
+        }
+    }
+
+    # handle bare word..
+    elsif ( $last_nonblank_type eq 'w' ) {
+
+        # unfortunately, we can't tell what type of token to expect next
+        # after most bare words
+        $op_expected = UNKNOWN;
+    }
+
+    # operator, but not term possible after these types
+    # Note: moved ')' from type to token because parens in list context
+    # get marked as '{' '}' now.  This is a minor glitch in the following:
+    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
+    #
+    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
+        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
+    {
+        $op_expected = OPERATOR;
+
+        # in a 'use' statement, numbers and v-strings are not true
+        # numbers, so to avoid incorrect error messages, we will
+        # mark them as unknown for now (use.t)
+        # TODO: it would be much nicer to create a new token V for VERSION
+        # number in a use statement.  Then this could be a check on type V
+        # and related patches which change $statement_type for '=>'
+        # and ',' could be removed.  Further, it would clean things up to
+        # scan the 'use' statement with a separate subroutine.
+        if (   ( $statement_type eq 'use' )
+            && ( $last_nonblank_type =~ /^[nv]$/ ) )
+        {
+            $op_expected = UNKNOWN;
+        }
+    }
+
+    # no operator after many keywords, such as "die", "warn", etc
+    elsif ( $expecting_term_token{$last_nonblank_token} ) {
+
+        # patch for dor.t (defined or).
+        # perl functions which may be unary operators
+        # TODO: This list is incomplete, and these should be put
+        # into a hash.
+        if (   $tok eq '/'
+            && $next_type          eq '/'
+            && $last_nonblank_type eq 'k'
+            && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
+        {
+            $op_expected = OPERATOR;
+        }
+        else {
+            $op_expected = TERM;
+        }
+    }
+
+    # no operator after things like + - **  (i.e., other operators)
+    elsif ( $expecting_term_types{$last_nonblank_type} ) {
+        $op_expected = TERM;
+    }
+
+    # a few operators, like "time", have an empty prototype () and so
+    # take no parameters but produce a value to operate on
+    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
+        $op_expected = OPERATOR;
+    }
+
+    # post-increment and decrement produce values to be operated on
+    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
+        $op_expected = OPERATOR;
+    }
+
+    # no value to operate on after sub block
+    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
+
+    # a right brace here indicates the end of a simple block.
+    # all non-structural right braces have type 'R'
+    # all braces associated with block operator keywords have been given those
+    # keywords as "last_nonblank_token" and caught above.
+    # (This statement is order dependent, and must come after checking
+    # $last_nonblank_token).
+    elsif ( $last_nonblank_type eq '}' ) {
+
+        # patch for dor.t (defined or).
+        if (   $tok eq '/'
+            && $next_type           eq '/'
+            && $last_nonblank_token eq ']' )
+        {
+            $op_expected = OPERATOR;
+        }
+        else {
+            $op_expected = TERM;
+        }
+    }
+
+    # something else..what did I forget?
+    else {
+
+        # collecting diagnostics on unknown operator types..see what was missed
+        $op_expected = UNKNOWN;
+        write_diagnostics(
+"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
+        );
+    }
+
+    TOKENIZER_DEBUG_FLAG_EXPECT && do {
+        print
+"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
+    };
+    return $op_expected;
+}
+
+sub new_statement_ok {
+
+    # return true if the current token can start a new statement
+    # USES GLOBAL VARIABLES: $last_nonblank_type
+
+    return label_ok()    # a label would be ok here
+
+      || $last_nonblank_type eq 'J';    # or we follow a label
+
+}
+
+sub label_ok {
+
+    # Decide if a bare word followed by a colon here is a label
+    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
+    # $brace_depth, @brace_type
+
+    # if it follows an opening or closing code block curly brace..
+    if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
+        && $last_nonblank_type eq $last_nonblank_token )
+    {
+
+        # it is a label if and only if the curly encloses a code block
+        return $brace_type[$brace_depth];
+    }
+
+    # otherwise, it is a label if and only if it follows a ';'
+    # (real or fake)
+    else {
+        return ( $last_nonblank_type eq ';' );
+    }
+}
+
+sub code_block_type {
+
+    # Decide if this is a block of code, and its type.
+    # Must be called only when $type = $token = '{'
+    # The problem is to distinguish between the start of a block of code
+    # and the start of an anonymous hash reference
+    # Returns "" if not code block, otherwise returns 'last_nonblank_token'
+    # to indicate the type of code block.  (For example, 'last_nonblank_token'
+    # might be 'if' for an if block, 'else' for an else block, etc).
+    # USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
+    # $last_nonblank_block_type, $brace_depth, @brace_type
+
+    # handle case of multiple '{'s
  
  # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
  
  
  # print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
  
-    my ( $i, $rtokens, $rtoken_type ) = @_;
+    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
      if (   $last_nonblank_token eq '{'
          && $last_nonblank_type eq $last_nonblank_token )
      {
      if (   $last_nonblank_token eq '{'
          && $last_nonblank_type eq $last_nonblank_token )
      {
@@ -22249,7 +23189,8 @@ sub code_block_type {
          # opening brace where a statement may appear is probably
          # a code block but might be and anonymous hash reference
          if ( $brace_type[$brace_depth] ) {
          # opening brace where a statement may appear is probably
          # a code block but might be and anonymous hash reference
          if ( $brace_type[$brace_depth] ) {
-            return decide_if_code_block( $i, $rtokens, $rtoken_type );
+            return decide_if_code_block( $i, $rtokens, $rtoken_type,
+                $max_token_index );
          }
  
          # cannot start a code block within an anonymous hash
          }
  
          # cannot start a code block within an anonymous hash
@@ -22262,7 +23203,8 @@ sub code_block_type {
  
          # an opening brace where a statement may appear is probably
          # a code block but might be and anonymous hash reference
  
          # an opening brace where a statement may appear is probably
          # a code block but might be and anonymous hash reference
-        return decide_if_code_block( $i, $rtokens, $rtoken_type );
+        return decide_if_code_block( $i, $rtokens, $rtoken_type,
+            $max_token_index );
      }
  
      # handle case of '}{'
      }
  
      # handle case of '}{'
@@ -22273,7 +23215,8 @@ sub code_block_type {
          # a } { situation ...
          # could be hash reference after code block..(blktype1.t)
          if ($last_nonblank_block_type) {
          # a } { situation ...
          # could be hash reference after code block..(blktype1.t)
          if ($last_nonblank_block_type) {
-            return decide_if_code_block( $i, $rtokens, $rtoken_type );
+            return decide_if_code_block( $i, $rtokens, $rtoken_type,
+                $max_token_index );
          }
  
          # must be a block if it follows a closing hash reference
          }
  
          # must be a block if it follows a closing hash reference
@@ -22315,7 +23258,8 @@ sub code_block_type {
  
      # check bareword
      elsif ( $last_nonblank_type eq 'w' ) {
  
      # check bareword
      elsif ( $last_nonblank_type eq 'w' ) {
-        return decide_if_code_block( $i, $rtokens, $rtoken_type );
+        return decide_if_code_block( $i, $rtokens, $rtoken_type,
+            $max_token_index );
      }
  
      # anything else must be anonymous hash reference
      }
  
      # anything else must be anonymous hash reference
@@ -22326,9 +23270,10 @@ sub code_block_type {
  
  sub decide_if_code_block {
  
  
  sub decide_if_code_block {
  
-    my ( $i, $rtokens, $rtoken_type ) = @_;
+    # USES GLOBAL VARIABLES: $last_nonblank_token
+    my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
      my ( $next_nonblank_token, $i_next ) =
      my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens );
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
      # we are at a '{' where a statement may appear.
      # We must decide if this brace starts an anonymous hash or a code
  
      # we are at a '{' where a statement may appear.
      # We must decide if this brace starts an anonymous hash or a code
@@ -22430,12 +23375,16 @@ sub decide_if_code_block {
  sub unexpected {
  
      # report unexpected token type and show where it is
  sub unexpected {
  
      # report unexpected token type and show where it is
-    my ( $found, $expecting, $i_tok, $last_nonblank_i ) = @_;
-    $unexpected_error_count++;
-    if ( $unexpected_error_count <= MAX_NAG_MESSAGES ) {
+    # USES GLOBAL VARIABLES: $tokenizer_self
+    my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
+        $rpretoken_type, $input_line )
+      = @_;
+
+    if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
          my $msg = "found $found where $expecting expected";
          my $pos = $$rpretoken_map[$i_tok];
          interrupt_logfile();
          my $msg = "found $found where $expecting expected";
          my $pos = $$rpretoken_map[$i_tok];
          interrupt_logfile();
+        my $input_line_number = $tokenizer_self->{_last_line_number};
          my ( $offset, $numbered_line, $underline ) =
            make_numbered_line( $input_line_number, $input_line, $pos );
          $underline = write_on_underline( $underline, $pos - $offset, '^' );
          my ( $offset, $numbered_line, $underline ) =
            make_numbered_line( $input_line_number, $input_line, $pos );
          $underline = write_on_underline( $underline, $pos - $offset, '^' );
@@ -22463,1606 +23412,1367 @@ sub unexpected {
      }
  }
  
      }
  }
  
-sub indicate_error {
-    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
-    interrupt_logfile();
-    warning($msg);
-    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
-    resume_logfile();
-}
+sub is_non_structural_brace {
  
  
-sub write_error_indicator_pair {
-    my ( $line_number, $input_line, $pos, $carrat ) = @_;
-    my ( $offset, $numbered_line, $underline ) =
-      make_numbered_line( $line_number, $input_line, $pos );
-    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
-    warning( $numbered_line . "\n" );
-    $underline =~ s/\s*$//;
-    warning( $underline . "\n" );
-}
+    # Decide if a brace or bracket is structural or non-structural
+    # by looking at the previous token and type
+    # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
  
  
-sub make_numbered_line {
+    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
+    # Tentatively deactivated because it caused the wrong operator expectation
+    # for this code:
+    #      $user = @vars[1] / 100;
+    # Must update sub operator_expected before re-implementing.
+    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
+    #    return 0;
+    # }
  
  
-    #  Given an input line, its line number, and a character position of
-    #  interest, create a string not longer than 80 characters of the form
-    #     $lineno: sub_string
-    #  such that the sub_string of $str contains the position of interest
-    #
-    #  Here is an example of what we want, in this case we add trailing
-    #  '...' because the line is long.
-    #
-    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
-    #
-    #  Here is another example, this time in which we used leading '...'
-    #  because of excessive length:
-    #
-    # 2: ... er of the World Wide Web Consortium's
-    #
-    #  input parameters are:
-    #   $lineno = line number
-    #   $str = the text of the line
-    #   $pos = position of interest (the error) : 0 = first character
-    #
-    #   We return :
-    #     - $offset = an offset which corrects the position in case we only
-    #       display part of a line, such that $pos-$offset is the effective
-    #       position from the start of the displayed line.
-    #     - $numbered_line = the numbered line as above,
-    #     - $underline = a blank 'underline' which is all spaces with the same
-    #       number of characters as the numbered line.
+    # NOTE: braces after type characters start code blocks, but for
+    # simplicity these are not identified as such.  See also
+    # sub code_block_type
+    # if ($last_nonblank_type eq 't') {return 0}
  
  
-    my ( $lineno, $str, $pos ) = @_;
-    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
-    my $excess = length($str) - $offset - 68;
-    my $numc   = ( $excess > 0 ) ? 68 : undef;
+    # otherwise, it is non-structural if it is decorated
+    # by type information.
+    # For example, the '{' here is non-structural:   ${xxx}
+    (
+        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
  
  
-    if ( defined($numc) ) {
-        if ( $offset == 0 ) {
-            $str = substr( $str, $offset, $numc - 4 ) . " ...";
-        }
-        else {
-            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
-        }
-    }
-    else {
+          # or if we follow a hash or array closing curly brace or bracket
+          # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
+          # because the first '}' would have been given type 'R'
+          || $last_nonblank_type =~ /^([R\]])$/
+    );
+}
  
  
-        if ( $offset == 0 ) {
-        }
-        else {
-            $str = "... " . substr( $str, $offset + 4 );
-        }
-    }
+#########i#############################################################
+# Tokenizer routines for tracking container nesting depths
+#######################################################################
  
  
-    my $numbered_line = sprintf( "%d: ", $lineno );
-    $offset -= length($numbered_line);
-    $numbered_line .= $str;
-    my $underline = " " x length($numbered_line);
-    return ( $offset, $numbered_line, $underline );
-}
+# The following routines keep track of nesting depths of the nesting
+# types, ( [ { and ?.  This is necessary for determining the indentation
+# level, and also for debugging programs.  Not only do they keep track of
+# nesting depths of the individual brace types, but they check that each
+# of the other brace types is balanced within matching pairs.  For
+# example, if the program sees this sequence:
+#
+#         {  ( ( ) }
+#
+# then it can determine that there is an extra left paren somewhere
+# between the { and the }.  And so on with every other possible
+# combination of outer and inner brace types.  For another
+# example:
+#
+#         ( [ ..... ]  ] )
+#
+# which has an extra ] within the parens.
+#
+# The brace types have indexes 0 .. 3 which are indexes into
+# the matrices.
+#
+# The pair ? : are treated as just another nesting type, with ? acting
+# as the opening brace and : acting as the closing brace.
+#
+# The matrix
+#
+#         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+#
+# saves the nesting depth of brace type $b (where $b is either of the other
+# nesting types) when brace type $a enters a new depth.  When this depth
+# decreases, a check is made that the current depth of brace types $b is
+# unchanged, or otherwise there must have been an error.  This can
+# be very useful for localizing errors, particularly when perl runs to
+# the end of a large file (such as this one) and announces that there
+# is a problem somewhere.
+#
+# A numerical sequence number is maintained for every nesting type,
+# so that each matching pair can be uniquely identified in a simple
+# way.
  
  
-sub write_on_underline {
+sub increase_nesting_depth {
+    my ( $a, $pos ) = @_;
  
  
-    # The "underline" is a string that shows where an error is; it starts
-    # out as a string of blanks with the same length as the numbered line of
-    # code above it, and we have to add marking to show where an error is.
-    # In the example below, we want to write the string '--^' just below
-    # the line of bad code:
-    #
-    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
-    #                 ---^
-    # We are given the current underline string, plus a position and a
-    # string to write on it.
-    #
-    # In the above example, there will be 2 calls to do this:
-    # First call:  $pos=19, pos_chr=^
-    # Second call: $pos=16, pos_chr=---
-    #
-    # This is a trivial thing to do with substr, but there is some
-    # checking to do.
+    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
+    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
+    my $b;
+    $current_depth[$a]++;
+    my $input_line_number = $tokenizer_self->{_last_line_number};
+    my $input_line        = $tokenizer_self->{_line_text};
  
  
-    my ( $underline, $pos, $pos_chr ) = @_;
+    # Sequence numbers increment by number of items.  This keeps
+    # a unique set of numbers but still allows the relative location
+    # of any type to be determined.
+    $nesting_sequence_number[$a] += scalar(@closing_brace_names);
+    my $seqno = $nesting_sequence_number[$a];
+    $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
  
  
-    # check for error..shouldn't happen
-    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
-        return $underline;
-    }
-    my $excess = length($pos_chr) + $pos - length($underline);
-    if ( $excess > 0 ) {
-        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
+    $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
+      [ $input_line_number, $input_line, $pos ];
+
+    for $b ( 0 .. $#closing_brace_names ) {
+        next if ( $b == $a );
+        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
      }
      }
-    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
-    return ($underline);
+    return $seqno;
  }
  
  }
  
-sub is_non_structural_brace {
-
-    # Decide if a brace or bracket is structural or non-structural
-    # by looking at the previous token and type
-
-    # EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
-    # Tentatively deactivated because it caused the wrong operator expectation
-    # for this code:
-    #      $user = @vars[1] / 100;
-    # Must update sub operator_expected before re-implementing.
-    # if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
-    #    return 0;
-    # }
-
-    # NOTE: braces after type characters start code blocks, but for
-    # simplicity these are not identified as such.  See also
-    # sub code_block_type
-    # if ($last_nonblank_type eq 't') {return 0}
+sub decrease_nesting_depth {
  
  
-    # otherwise, it is non-structural if it is decorated
-    # by type information.
-    # For example, the '{' here is non-structural:   ${xxx}
-    (
-        $last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
+    my ( $a, $pos ) = @_;
  
  
-          # or if we follow a hash or array closing curly brace or bracket
-          # For example, the second '{' in this is non-structural: $a{'x'}{'y'}
-          # because the first '}' would have been given type 'R'
-          || $last_nonblank_type =~ /^([R\]])$/
-    );
-}
+    # USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
+    # @current_sequence_number, @depth_array, @starting_line_of_current_depth
+    my $b;
+    my $seqno             = 0;
+    my $input_line_number = $tokenizer_self->{_last_line_number};
+    my $input_line        = $tokenizer_self->{_line_text};
  
  
-sub operator_expected {
+    if ( $current_depth[$a] > 0 ) {
  
  
-    # Many perl symbols have two or more meanings.  For example, '<<'
-    # can be a shift operator or a here-doc operator.  The
-    # interpretation of these symbols depends on the current state of
-    # the tokenizer, which may either be expecting a term or an
-    # operator.  For this example, a << would be a shift if an operator
-    # is expected, and a here-doc if a term is expected.  This routine
-    # is called to make this decision for any current token.  It returns
-    # one of three possible values:
-    #
-    #     OPERATOR - operator expected (or at least, not a term)
-    #     UNKNOWN  - can't tell
-    #     TERM     - a term is expected (or at least, not an operator)
-    #
-    # The decision is based on what has been seen so far.  This
-    # information is stored in the "$last_nonblank_type" and
-    # "$last_nonblank_token" variables.  For example, if the
-    # $last_nonblank_type is '=~', then we are expecting a TERM, whereas
-    # if $last_nonblank_type is 'n' (numeric), we are expecting an
-    # OPERATOR.
-    #
-    # If a UNKNOWN is returned, the calling routine must guess. A major
-    # goal of this tokenizer is to minimize the possiblity of returning
-    # UNKNOWN, because a wrong guess can spoil the formatting of a
-    # script.
-    #
-    # adding NEW_TOKENS: it is critically important that this routine be
-    # updated to allow it to determine if an operator or term is to be
-    # expected after the new token.  Doing this simply involves adding
-    # the new token character to one of the regexes in this routine or
-    # to one of the hash lists
-    # that it uses, which are initialized in the BEGIN section.
+        $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
  
  
-    my ( $prev_type, $tok, $next_type ) = @_;
-    my $op_expected = UNKNOWN;
+        # check that any brace types $b contained within are balanced
+        for $b ( 0 .. $#closing_brace_names ) {
+            next if ( $b == $a );
  
  
-#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
+            unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
+                $current_depth[$b] )
+            {
+                my $diff = $current_depth[$b] -
+                  $depth_array[$a][$b][ $current_depth[$a] ];
  
  
-# Note: function prototype is available for token type 'U' for future
-# program development.  It contains the leading and trailing parens,
-# and no blanks.  It might be used to eliminate token type 'C', for
-# example (prototype = '()'). Thus:
-# if ($last_nonblank_type eq 'U') {
-#     print "previous token=$last_nonblank_token  type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
-# }
+                # don't whine too many times
+                my $saw_brace_error = get_saw_brace_error();
+                if (
+                    $saw_brace_error <= MAX_NAG_MESSAGES
  
  
-    # A possible filehandle (or object) requires some care...
-    if ( $last_nonblank_type eq 'Z' ) {
+                    # if too many closing types have occured, we probably
+                    # already caught this error
+                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
+                  )
+                {
+                    interrupt_logfile();
+                    my $rsl =
+                      $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+                    my $sl  = $$rsl[0];
+                    my $rel = [ $input_line_number, $input_line, $pos ];
+                    my $el  = $$rel[0];
+                    my ($ess);
  
  
-        # angle.t
-        if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
-            $op_expected = UNKNOWN;
-        }
+                    if ( $diff == 1 || $diff == -1 ) {
+                        $ess = '';
+                    }
+                    else {
+                        $ess = 's';
+                    }
+                    my $bname =
+                      ( $diff > 0 )
+                      ? $opening_brace_names[$b]
+                      : $closing_brace_names[$b];
+                    write_error_indicator_pair( @$rsl, '^' );
+                    my $msg = <<"EOM";
+Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
+EOM
  
  
-        # For possible file handle like "$a", Perl uses weird parsing rules.
-        # For example:
-        # print $a/2,"/hi";   - division
-        # print $a / 2,"/hi"; - division
-        # print $a/ 2,"/hi";  - division
-        # print $a /2,"/hi";  - pattern (and error)!
-        elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
-            $op_expected = TERM;
+                    if ( $diff > 0 ) {
+                        my $rml =
+                          $starting_line_of_current_depth[$b]
+                          [ $current_depth[$b] ];
+                        my $ml = $$rml[0];
+                        $msg .=
+"    The most recent un-matched $bname is on line $ml\n";
+                        write_error_indicator_pair( @$rml, '^' );
+                    }
+                    write_error_indicator_pair( @$rel, '^' );
+                    warning($msg);
+                    resume_logfile();
+                }
+                increment_brace_error();
+            }
          }
          }
+        $current_depth[$a]--;
+    }
+    else {
  
  
-        # Note when an operation is being done where a
-        # filehandle might be expected, since a change in whitespace
-        # could change the interpretation of the statement.
-        else {
-            if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
-                complain("operator in print statement not recommended\n");
-                $op_expected = OPERATOR;
-            }
+        my $saw_brace_error = get_saw_brace_error();
+        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
+            my $msg = <<"EOM";
+There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
+EOM
+            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
          }
          }
+        increment_brace_error();
      }
      }
+    return $seqno;
+}
  
  
-    # handle something after 'do' and 'eval'
-    elsif ( $is_block_operator{$last_nonblank_token} ) {
+sub check_final_nesting_depths {
+    my ($a);
  
  
-        # something like $a = eval "expression";
-        #                          ^
-        if ( $last_nonblank_type eq 'k' ) {
-            $op_expected = TERM;    # expression or list mode following keyword
-        }
+    # USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
  
  
-        # something like $a = do { BLOCK } / 2;
-        #                                  ^
-        else {
-            $op_expected = OPERATOR;    # block mode following }
+    for $a ( 0 .. $#closing_brace_names ) {
+
+        if ( $current_depth[$a] ) {
+            my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
+            my $sl  = $$rsl[0];
+            my $msg = <<"EOM";
+Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
+The most recent un-matched $opening_brace_names[$a] is on line $sl
+EOM
+            indicate_error( $msg, @$rsl, '^' );
+            increment_brace_error();
          }
      }
          }
      }
+}
  
  
-    # handle bare word..
-    elsif ( $last_nonblank_type eq 'w' ) {
+#########i#############################################################
+# Tokenizer routines for looking ahead in input stream
+#######################################################################
  
  
-        # unfortunately, we can't tell what type of token to expect next
-        # after most bare words
-        $op_expected = UNKNOWN;
+sub peek_ahead_for_n_nonblank_pre_tokens {
+
+    # returns next n pretokens if they exist
+    # returns undef's if hits eof without seeing any pretokens
+    # USES GLOBAL VARIABLES: $tokenizer_self
+    my $max_pretokens = shift;
+    my $line;
+    my $i = 0;
+    my ( $rpre_tokens, $rmap, $rpre_types );
+
+    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
+    {
+        $line =~ s/^\s*//;    # trim leading blanks
+        next if ( length($line) <= 0 );    # skip blank
+        next if ( $line =~ /^#/ );         # skip comment
+        ( $rpre_tokens, $rmap, $rpre_types ) =
+          pre_tokenize( $line, $max_pretokens );
+        last;
      }
      }
+    return ( $rpre_tokens, $rpre_types );
+}
  
  
-    # operator, but not term possible after these types
-    # Note: moved ')' from type to token because parens in list context
-    # get marked as '{' '}' now.  This is a minor glitch in the following:
-    #    my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
-    #
-    elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
-        || ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
+# look ahead for next non-blank, non-comment line of code
+sub peek_ahead_for_nonblank_token {
+
+    # USES GLOBAL VARIABLES: $tokenizer_self
+    my ( $rtokens, $max_token_index ) = @_;
+    my $line;
+    my $i = 0;
+
+    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
      {
      {
-        $op_expected = OPERATOR;
+        $line =~ s/^\s*//;    # trim leading blanks
+        next if ( length($line) <= 0 );    # skip blank
+        next if ( $line =~ /^#/ );         # skip comment
+        my ( $rtok, $rmap, $rtype ) =
+          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
+        my $j = $max_token_index + 1;
+        my $tok;
  
  
-        # in a 'use' statement, numbers and v-strings are not true
-        # numbers, so to avoid incorrect error messages, we will
-        # mark them as unknown for now (use.t)
-        # TODO: it would be much nicer to create a new token V for VERSION
-        # number in a use statement.  Then this could be a check on type V
-        # and related patches which change $statement_type for '=>'
-        # and ',' could be removed.  Further, it would clean things up to
-        # scan the 'use' statement with a separate subroutine.
-        if (   ( $statement_type eq 'use' )
-            && ( $last_nonblank_type =~ /^[nv]$/ ) )
-        {
-            $op_expected = UNKNOWN;
+        foreach $tok (@$rtok) {
+            last if ( $tok =~ "\n" );
+            $$rtokens[ ++$j ] = $tok;
          }
          }
+        last;
      }
      }
+    return $rtokens;
+}
  
  
-    # no operator after many keywords, such as "die", "warn", etc
-    elsif ( $expecting_term_token{$last_nonblank_token} ) {
+#########i#############################################################
+# Tokenizer guessing routines for ambiguous situations
+#######################################################################
  
  
-        # patch for dor.t (defined or).
-        # perl functions which may be unary operators
-        # TODO: This list is incomplete, and these should be put
-        # into a hash.
-        if (   $tok eq '/'
-            && $next_type          eq '/'
-            && $last_nonblank_type eq 'k'
-            && $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
-        {
-            $op_expected = OPERATOR;
+sub guess_if_pattern_or_conditional {
+
+    # this routine is called when we have encountered a ? following an
+    # unknown bareword, and we must decide if it starts a pattern or not
+    # input parameters:
+    #   $i - token index of the ? starting possible pattern
+    # output parameters:
+    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
+    #   msg = a warning or diagnostic message
+    # USES GLOBAL VARIABLES: $last_nonblank_token
+    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+    my $is_pattern = 0;
+    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
+
+    if ( $i >= $max_token_index ) {
+        $msg .= "conditional (no end to pattern found on the line)\n";
+    }
+    else {
+        my $ibeg = $i;
+        $i = $ibeg + 1;
+        my $next_token = $$rtokens[$i];    # first token after ?
+
+        # look for a possible ending ? on this line..
+        my $in_quote        = 1;
+        my $quote_depth     = 0;
+        my $quote_character = '';
+        my $quote_pos       = 0;
+        my $quoted_string;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
+
+        if ($in_quote) {
+
+            # we didn't find an ending ? on this line,
+            # so we bias towards conditional
+            $is_pattern = 0;
+            $msg .= "conditional (no ending ? on this line)\n";
+
+            # we found an ending ?, so we bias towards a pattern
          }
          else {
          }
          else {
-            $op_expected = TERM;
+
+            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
+                $is_pattern = 1;
+                $msg .= "pattern (found ending ? and pattern expected)\n";
+            }
+            else {
+                $msg .= "pattern (uncertain, but found ending ?)\n";
+            }
          }
      }
          }
      }
+    return ( $is_pattern, $msg );
+}
  
  
-    # no operator after things like + - **  (i.e., other operators)
-    elsif ( $expecting_term_types{$last_nonblank_type} ) {
-        $op_expected = TERM;
-    }
+sub guess_if_pattern_or_division {
  
  
-    # a few operators, like "time", have an empty prototype () and so
-    # take no parameters but produce a value to operate on
-    elsif ( $expecting_operator_token{$last_nonblank_token} ) {
-        $op_expected = OPERATOR;
-    }
+    # this routine is called when we have encountered a / following an
+    # unknown bareword, and we must decide if it starts a pattern or is a
+    # division
+    # input parameters:
+    #   $i - token index of the / starting possible pattern
+    # output parameters:
+    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
+    #   msg = a warning or diagnostic message
+    # USES GLOBAL VARIABLES: $last_nonblank_token
+    my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+    my $is_pattern = 0;
+    my $msg        = "guessing that / after $last_nonblank_token starts a ";
  
  
-    # post-increment and decrement produce values to be operated on
-    elsif ( $expecting_operator_types{$last_nonblank_type} ) {
-        $op_expected = OPERATOR;
+    if ( $i >= $max_token_index ) {
+        "division (no end to pattern found on the line)\n";
      }
      }
+    else {
+        my $ibeg = $i;
+        my $divide_expected =
+          numerator_expected( $i, $rtokens, $max_token_index );
+        $i = $ibeg + 1;
+        my $next_token = $$rtokens[$i];    # first token after slash
  
  
-    # no value to operate on after sub block
-    elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
+        # look for a possible ending / on this line..
+        my $in_quote        = 1;
+        my $quote_depth     = 0;
+        my $quote_character = '';
+        my $quote_pos       = 0;
+        my $quoted_string;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
  
  
-    # a right brace here indicates the end of a simple block.
-    # all non-structural right braces have type 'R'
-    # all braces associated with block operator keywords have been given those
-    # keywords as "last_nonblank_token" and caught above.
-    # (This statement is order dependent, and must come after checking
-    # $last_nonblank_token).
-    elsif ( $last_nonblank_type eq '}' ) {
+        if ($in_quote) {
+
+            # we didn't find an ending / on this line,
+            # so we bias towards division
+            if ( $divide_expected >= 0 ) {
+                $is_pattern = 0;
+                $msg .= "division (no ending / on this line)\n";
+            }
+            else {
+                $msg        = "multi-line pattern (division not possible)\n";
+                $is_pattern = 1;
+            }
  
  
-        # patch for dor.t (defined or).
-        if (   $tok eq '/'
-            && $next_type           eq '/'
-            && $last_nonblank_token eq ']' )
-        {
-            $op_expected = OPERATOR;
          }
          }
+
+        # we found an ending /, so we bias towards a pattern
          else {
          else {
-            $op_expected = TERM;
-        }
-    }
  
  
-    # something else..what did I forget?
-    else {
+            if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
  
  
-        # collecting diagnostics on unknown operator types..see what was missed
-        $op_expected = UNKNOWN;
-        write_diagnostics(
-"OP: unknown after type=$last_nonblank_type  token=$last_nonblank_token\n"
-        );
-    }
+                if ( $divide_expected >= 0 ) {
  
  
-    TOKENIZER_DEBUG_FLAG_EXPECT && do {
-        print
-"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
-    };
-    return $op_expected;
+                    if ( $i - $ibeg > 60 ) {
+                        $msg .= "division (matching / too distant)\n";
+                        $is_pattern = 0;
+                    }
+                    else {
+                        $msg .= "pattern (but division possible too)\n";
+                        $is_pattern = 1;
+                    }
+                }
+                else {
+                    $is_pattern = 1;
+                    $msg .= "pattern (division not possible)\n";
+                }
+            }
+            else {
+
+                if ( $divide_expected >= 0 ) {
+                    $is_pattern = 0;
+                    $msg .= "division (pattern not possible)\n";
+                }
+                else {
+                    $is_pattern = 1;
+                    $msg .=
+                      "pattern (uncertain, but division would not work here)\n";
+                }
+            }
+        }
+    }
+    return ( $is_pattern, $msg );
  }
  
  }
  
-# The following routines keep track of nesting depths of the nesting
-# types, ( [ { and ?.  This is necessary for determining the indentation
-# level, and also for debugging programs.  Not only do they keep track of
-# nesting depths of the individual brace types, but they check that each
-# of the other brace types is balanced within matching pairs.  For
-# example, if the program sees this sequence:
-#
-#         {  ( ( ) }
-#
-# then it can determine that there is an extra left paren somewhere
-# between the { and the }.  And so on with every other possible
-# combination of outer and inner brace types.  For another
-# example:
-#
-#         ( [ ..... ]  ] )
-#
-# which has an extra ] within the parens.
-#
-# The brace types have indexes 0 .. 3 which are indexes into
-# the matrices.
-#
-# The pair ? : are treated as just another nesting type, with ? acting
-# as the opening brace and : acting as the closing brace.
-#
-# The matrix
-#
-#         $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
-#
-# saves the nesting depth of brace type $b (where $b is either of the other
-# nesting types) when brace type $a enters a new depth.  When this depth
-# decreases, a check is made that the current depth of brace types $b is
-# unchanged, or otherwise there must have been an error.  This can
-# be very useful for localizing errors, particularly when perl runs to
-# the end of a large file (such as this one) and announces that there
-# is a problem somewhere.
-#
-# A numerical sequence number is maintained for every nesting type,
-# so that each matching pair can be uniquely identified in a simple
-# way.
+# try to resolve here-doc vs. shift by looking ahead for
+# non-code or the end token (currently only looks for end token)
+# returns 1 if it is probably a here doc, 0 if not
+sub guess_if_here_doc {
  
  
-sub increase_nesting_depth {
-    my ( $a, $i_tok ) = @_;
-    my $b;
-    $current_depth[$a]++;
+    # This is how many lines we will search for a target as part of the
+    # guessing strategy.  It is a constant because there is probably
+    # little reason to change it.
+    # USES GLOBAL VARIABLES: $tokenizer_self, $current_package
+    # %is_constant,
+    use constant HERE_DOC_WINDOW => 40;
  
  
-    # Sequence numbers increment by number of items.  This keeps
-    # a unique set of numbers but still allows the relative location
-    # of any type to be determined.
-    $nesting_sequence_number[$a] += scalar(@closing_brace_names);
-    my $seqno = $nesting_sequence_number[$a];
-    $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
+    my $next_token        = shift;
+    my $here_doc_expected = 0;
+    my $line;
+    my $k   = 0;
+    my $msg = "checking <<";
  
  
-    my $pos = $$rpretoken_map[$i_tok];
-    $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
-      [ $input_line_number, $input_line, $pos ];
+    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
+    {
+        chomp $line;
  
  
-    for $b ( 0 .. $#closing_brace_names ) {
-        next if ( $b == $a );
-        $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+        if ( $line =~ /^$next_token$/ ) {
+            $msg .= " -- found target $next_token ahead $k lines\n";
+            $here_doc_expected = 1;    # got it
+            last;
+        }
+        last if ( $k >= HERE_DOC_WINDOW );
      }
      }
-    return $seqno;
-}
-
-sub decrease_nesting_depth {
  
  
-    my ( $a, $i_tok ) = @_;
-    my $pos = $$rpretoken_map[$i_tok];
-    my $b;
-    my $seqno = 0;
-
-    if ( $current_depth[$a] > 0 ) {
+    unless ($here_doc_expected) {
  
  
-        $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
+        if ( !defined($line) ) {
+            $here_doc_expected = -1;    # hit eof without seeing target
+            $msg .= " -- must be shift; target $next_token not in file\n";
  
  
-        # check that any brace types $b contained within are balanced
-        for $b ( 0 .. $#closing_brace_names ) {
-            next if ( $b == $a );
+        }
+        else {                          # still unsure..taking a wild guess
  
  
-            unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
-                $current_depth[$b] )
-            {
-                my $diff = $current_depth[$b] -
-                  $depth_array[$a][$b][ $current_depth[$a] ];
+            if ( !$is_constant{$current_package}{$next_token} ) {
+                $here_doc_expected = 1;
+                $msg .=
+                  " -- guessing it's a here-doc ($next_token not a constant)\n";
+            }
+            else {
+                $msg .=
+                  " -- guessing it's a shift ($next_token is a constant)\n";
+            }
+        }
+    }
+    write_logfile_entry($msg);
+    return $here_doc_expected;
+}
  
  
-                # don't whine too many times
-                my $saw_brace_error = get_saw_brace_error();
-                if (
-                    $saw_brace_error <= MAX_NAG_MESSAGES
+#########i#############################################################
+# Tokenizer Routines for scanning identifiers and related items
+#######################################################################
  
  
-                    # if too many closing types have occured, we probably
-                    # already caught this error
-                    && ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
-                  )
-                {
-                    interrupt_logfile();
-                    my $rsl =
-                      $starting_line_of_current_depth[$a][ $current_depth[$a] ];
-                    my $sl  = $$rsl[0];
-                    my $rel = [ $input_line_number, $input_line, $pos ];
-                    my $el  = $$rel[0];
-                    my ($ess);
+sub scan_bare_identifier_do {
  
  
-                    if ( $diff == 1 || $diff == -1 ) {
-                        $ess = '';
-                    }
-                    else {
-                        $ess = 's';
-                    }
-                    my $bname =
-                      ( $diff > 0 )
-                      ? $opening_brace_names[$b]
-                      : $closing_brace_names[$b];
-                    write_error_indicator_pair( @$rsl, '^' );
-                    my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
-EOM
+    # this routine is called to scan a token starting with an alphanumeric
+    # variable or package separator, :: or '.
+    # USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
+    # $last_nonblank_type,@paren_type, $paren_depth
  
  
-                    if ( $diff > 0 ) {
-                        my $rml =
-                          $starting_line_of_current_depth[$b]
-                          [ $current_depth[$b] ];
-                        my $ml = $$rml[0];
-                        $msg .=
-"    The most recent un-matched $bname is on line $ml\n";
-                        write_error_indicator_pair( @$rml, '^' );
-                    }
-                    write_error_indicator_pair( @$rel, '^' );
-                    warning($msg);
-                    resume_logfile();
-                }
-                increment_brace_error();
-            }
-        }
-        $current_depth[$a]--;
-    }
-    else {
+    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
+        $max_token_index )
+      = @_;
+    my $i_begin = $i;
+    my $package = undef;
  
  
-        my $saw_brace_error = get_saw_brace_error();
-        if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
-            my $msg = <<"EOM";
-There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
-EOM
-            indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
-        }
-        increment_brace_error();
-    }
-    return $seqno;
-}
+    my $i_beg = $i;
  
  
-sub check_final_nesting_depths {
-    my ($a);
+    # we have to back up one pretoken at a :: since each : is one pretoken
+    if ( $tok eq '::' ) { $i_beg-- }
+    if ( $tok eq '->' ) { $i_beg-- }
+    my $pos_beg = $$rtoken_map[$i_beg];
+    pos($input_line) = $pos_beg;
  
  
-    for $a ( 0 .. $#closing_brace_names ) {
+    #  Examples:
+    #   A::B::C
+    #   A::
+    #   ::A
+    #   A'B
+    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
  
  
-        if ( $current_depth[$a] ) {
-            my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
-            my $sl  = $$rsl[0];
-            my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
-The most recent un-matched $opening_brace_names[$a] is on line $sl
-EOM
-            indicate_error( $msg, @$rsl, '^' );
-            increment_brace_error();
-        }
-    }
-}
+        my $pos  = pos($input_line);
+        my $numc = $pos - $pos_beg;
+        $tok = substr( $input_line, $pos_beg, $numc );
  
  
-sub numerator_expected {
+        # type 'w' includes anything without leading type info
+        # ($,%,@,*) including something like abc::def::ghi
+        $type = 'w';
  
  
-    # this is a filter for a possible numerator, in support of guessing
-    # for the / pattern delimiter token.
-    # returns -
-    #   1 - yes
-    #   0 - can't tell
-    #  -1 - no
-    # Note: I am using the convention that variables ending in
-    # _expected have these 3 possible values.
-    my ( $i, $rtokens ) = @_;
-    my $next_token = $$rtokens[ $i + 1 ];
-    if ( $next_token eq '=' ) { $i++; }    # handle /=
-    my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens );
+        my $sub_name = "";
+        if ( defined($2) ) { $sub_name = $2; }
+        if ( defined($1) ) {
+            $package = $1;
  
  
-    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
-        1;
-    }
-    else {
+            # patch: don't allow isolated package name which just ends
+            # in the old style package separator (single quote).  Example:
+            #   use CGI':all';
+            if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
+                $pos--;
+            }
  
  
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
-            0;
+            $package =~ s/\'/::/g;
+            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+            $package =~ s/::$//;
          }
          else {
          }
          else {
-            -1;
+            $package = $current_package;
+
+            if ( $is_keyword{$tok} ) {
+                $type = 'k';
+            }
          }
          }
-    }
-}
  
  
-sub pattern_expected {
+        # if it is a bareword..
+        if ( $type eq 'w' ) {
  
  
-    # This is the start of a filter for a possible pattern.
-    # It looks at the token after a possbible pattern and tries to
-    # determine if that token could end a pattern.
-    # returns -
-    #   1 - yes
-    #   0 - can't tell
-    #  -1 - no
-    my ( $i, $rtokens ) = @_;
-    my $next_token = $$rtokens[ $i + 1 ];
-    if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
-    my ( $next_nonblank_token, $i_next ) =
-      find_next_nonblank_token( $i, $rtokens );
+            # check for v-string with leading 'v' type character
+            # (This seems to have presidence over filehandle, type 'Y')
+            if ( $tok =~ /^v\d[_\d]*$/ ) {
  
  
-    # list of tokens which may follow a pattern
-    # (can probably be expanded)
-    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
-    {
-        1;
-    }
-    else {
+                # we only have the first part - something like 'v101' -
+                # look for more
+                if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
+                    $pos  = pos($input_line);
+                    $numc = $pos - $pos_beg;
+                    $tok  = substr( $input_line, $pos_beg, $numc );
+                }
+                $type = 'v';
  
  
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
-            0;
-        }
-        else {
-            -1;
-        }
-    }
-}
+                # warn if this version can't handle v-strings
+                report_v_string($tok);
+            }
  
  
-sub find_next_nonblank_token_on_this_line {
-    my ( $i, $rtokens ) = @_;
-    my $next_nonblank_token;
+            elsif ( $is_constant{$package}{$sub_name} ) {
+                $type = 'C';
+            }
  
  
-    if ( $i < $max_token_index ) {
-        $next_nonblank_token = $$rtokens[ ++$i ];
+            # bareword after sort has implied empty prototype; for example:
+            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
+            # This has priority over whatever the user has specified.
+            elsif ($last_nonblank_token eq 'sort'
+                && $last_nonblank_type eq 'k' )
+            {
+                $type = 'Z';
+            }
  
  
-        if ( $next_nonblank_token =~ /^\s*$/ ) {
+            # Note: strangely, perl does not seem to really let you create
+            # functions which act like eval and do, in the sense that eval
+            # and do may have operators following the final }, but any operators
+            # that you create with prototype (&) apparently do not allow
+            # trailing operators, only terms.  This seems strange.
+            # If this ever changes, here is the update
+            # to make perltidy behave accordingly:
  
  
-            if ( $i < $max_token_index ) {
-                $next_nonblank_token = $$rtokens[ ++$i ];
+            # elsif ( $is_block_function{$package}{$tok} ) {
+            #    $tok='eval'; # patch to do braces like eval  - doesn't work
+            #    $type = 'k';
+            #}
+            # FIXME: This could become a separate type to allow for different
+            # future behavior:
+            elsif ( $is_block_function{$package}{$sub_name} ) {
+                $type = 'G';
              }
              }
-        }
-    }
-    else {
-        $next_nonblank_token = "";
-    }
-    return ( $next_nonblank_token, $i );
-}
-
-sub find_next_nonblank_token {
-    my ( $i, $rtokens ) = @_;
  
  
-    if ( $i >= $max_token_index ) {
+            elsif ( $is_block_list_function{$package}{$sub_name} ) {
+                $type = 'G';
+            }
+            elsif ( $is_user_function{$package}{$sub_name} ) {
+                $type      = 'U';
+                $prototype = $user_function_prototype{$package}{$sub_name};
+            }
  
  
-        if ( !$peeked_ahead ) {
-            $peeked_ahead = 1;
-            $rtokens      = peek_ahead_for_nonblank_token($rtokens);
-        }
-    }
-    my $next_nonblank_token = $$rtokens[ ++$i ];
+            # check for indirect object
+            elsif (
  
  
-    if ( $next_nonblank_token =~ /^\s*$/ ) {
-        $next_nonblank_token = $$rtokens[ ++$i ];
-    }
-    return ( $next_nonblank_token, $i );
-}
+                # added 2001-03-27: must not be followed immediately by '('
+                # see fhandle.t
+                ( $input_line !~ m/\G\(/gc )
  
  
-sub peek_ahead_for_n_nonblank_pre_tokens {
+                # and
+                && (
  
  
-    # returns next n pretokens if they exist
-    # returns undef's if hits eof without seeing any pretokens
-    my $max_pretokens = shift;
-    my $line;
-    my $i = 0;
-    my ( $rpre_tokens, $rmap, $rpre_types );
+                    # preceded by keyword like 'print', 'printf' and friends
+                    $is_indirect_object_taker{$last_nonblank_token}
  
  
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
-    {
-        $line =~ s/^\s*//;    # trim leading blanks
-        next if ( length($line) <= 0 );    # skip blank
-        next if ( $line =~ /^#/ );         # skip comment
-        ( $rpre_tokens, $rmap, $rpre_types ) =
-          pre_tokenize( $line, $max_pretokens );
-        last;
-    }
-    return ( $rpre_tokens, $rpre_types );
-}
+                    # or preceded by something like 'print(' or 'printf('
+                    || (
+                        ( $last_nonblank_token eq '(' )
+                        && $is_indirect_object_taker{ $paren_type[$paren_depth]
+                        }
  
  
-# look ahead for next non-blank, non-comment line of code
-sub peek_ahead_for_nonblank_token {
-    my $rtokens = shift;
-    my $line;
-    my $i = 0;
+                    )
+                )
+              )
+            {
  
  
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
-    {
-        $line =~ s/^\s*//;    # trim leading blanks
-        next if ( length($line) <= 0 );    # skip blank
-        next if ( $line =~ /^#/ );         # skip comment
-        my ( $rtok, $rmap, $rtype ) =
-          pre_tokenize( $line, 2 );        # only need 2 pre-tokens
-        my $j = $max_token_index + 1;
-        my $tok;
+                # may not be indirect object unless followed by a space
+                if ( $input_line =~ m/\G\s+/gc ) {
+                    $type = 'Y';
  
  
-        foreach $tok (@$rtok) {
-            last if ( $tok =~ "\n" );
-            $$rtokens[ ++$j ] = $tok;
-        }
-        last;
-    }
-    return $rtokens;
-}
+                    # Abandon Hope ...
+                    # Perl's indirect object notation is a very bad
+                    # thing and can cause subtle bugs, especially for
+                    # beginning programmers.  And I haven't even been
+                    # able to figure out a sane warning scheme which
+                    # doesn't get in the way of good scripts.
  
  
-sub pre_tokenize {
+                    # Complain if a filehandle has any lower case
+                    # letters.  This is suggested good practice, but the
+                    # main reason for this warning is that prior to
+                    # release 20010328, perltidy incorrectly parsed a
+                    # function call after a print/printf, with the
+                    # result that a space got added before the opening
+                    # paren, thereby converting the function name to a
+                    # filehandle according to perl's weird rules.  This
+                    # will not usually generate a syntax error, so this
+                    # is a potentially serious bug.  By warning
+                    # of filehandles with any lower case letters,
+                    # followed by opening parens, we will help the user
+                    # find almost all of these older errors.
+                    # use 'sub_name' because something like
+                    # main::MYHANDLE is ok for filehandle
+                    if ( $sub_name =~ /[a-z]/ ) {
  
  
-    # Break a string, $str, into a sequence of preliminary tokens.  We
-    # are interested in these types of tokens:
-    #   words       (type='w'),            example: 'max_tokens_wanted'
-    #   digits      (type = 'd'),          example: '0755'
-    #   whitespace  (type = 'b'),          example: '   '
-    #   any other single character (i.e. punct; type = the character itself).
-    # We cannot do better than this yet because we might be in a quoted
-    # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
-    # tokens.
-    my ( $str, $max_tokens_wanted ) = @_;
+                        # could be bug caused by older perltidy if
+                        # followed by '('
+                        if ( $input_line =~ m/\G\s*\(/gc ) {
+                            complain(
+"Caution: unknown word '$tok' in indirect object slot\n"
+                            );
+                        }
+                    }
+                }
  
  
-    # we return references to these 3 arrays:
-    my @tokens    = ();     # array of the tokens themselves
-    my @token_map = (0);    # string position of start of each token
-    my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
+                # bareword not followed by a space -- may not be filehandle
+                # (may be function call defined in a 'use' statement)
+                else {
+                    $type = 'Z';
+                }
+            }
+        }
  
  
-    do {
+        # Now we must convert back from character position
+        # to pre_token index.
+        # I don't think an error flag can occur here ..but who knows
+        my $error;
+        ( $i, $error ) =
+          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+        if ($error) {
+            warning("scan_bare_identifier: Possibly invalid tokenization\n");
+        }
+    }
  
  
-        # whitespace
-        if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
+    # no match but line not blank - could be syntax error
+    # perl will take '::' alone without complaint
+    else {
+        $type = 'w';
  
  
-        # numbers
-        # note that this must come before words!
-        elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
+        # change this warning to log message if it becomes annoying
+        warning("didn't find identifier after leading ::\n");
+    }
+    return ( $i, $tok, $type, $prototype );
+}
  
  
-        # words
-        elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
+sub scan_id_do {
  
  
-        # single-character punctuation
-        elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
+# This is the new scanner and will eventually replace scan_identifier.
+# Only type 'sub' and 'package' are implemented.
+# Token types $ * % @ & -> are not yet implemented.
+#
+# Scan identifier following a type token.
+# The type of call depends on $id_scan_state: $id_scan_state = ''
+# for starting call, in which case $tok must be the token defining
+# the type.
+#
+# If the type token is the last nonblank token on the line, a value
+# of $id_scan_state = $tok is returned, indicating that further
+# calls must be made to get the identifier.  If the type token is
+# not the last nonblank token on the line, the identifier is
+# scanned and handled and a value of '' is returned.
+# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
+# $statement_type, $tokenizer_self
+
+    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
+        $max_token_index )
+      = @_;
+    my $type = '';
+    my ( $i_beg, $pos_beg );
  
  
-        # that's all..
-        else {
-            return ( \@tokens, \@token_map, \@type );
-        }
+    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
+    #my ($a,$b,$c) = caller;
+    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
  
  
-        push @tokens,    $1;
-        push @token_map, pos($str);
+    # on re-entry, start scanning at first token on the line
+    if ($id_scan_state) {
+        $i_beg = $i;
+        $type  = '';
+    }
  
  
-    } while ( --$max_tokens_wanted != 0 );
+    # on initial entry, start scanning just after type token
+    else {
+        $i_beg         = $i + 1;
+        $id_scan_state = $tok;
+        $type          = 't';
+    }
  
  
-    return ( \@tokens, \@token_map, \@type );
-}
-
-sub show_tokens {
-
-    # this is an old debug routine
-    my ( $rtokens, $rtoken_map ) = @_;
-    my $num = scalar(@$rtokens);
-    my $i;
-
-    for ( $i = 0 ; $i < $num ; $i++ ) {
-        my $len = length( $$rtokens[$i] );
-        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
+    # find $i_beg = index of next nonblank token,
+    # and handle empty lines
+    my $blank_line          = 0;
+    my $next_nonblank_token = $$rtokens[$i_beg];
+    if ( $i_beg > $max_token_index ) {
+        $blank_line = 1;
      }
      }
-}
-
-sub find_angle_operator_termination {
-
-    # We are looking at a '<' and want to know if it is an angle operator.
-    # We are to return:
-    #   $i = pretoken index of ending '>' if found, current $i otherwise
-    #   $type = 'Q' if found, '>' otherwise
-    my ( $input_line, $i_beg, $rtoken_map, $expecting ) = @_;
-    my $i    = $i_beg;
-    my $type = '<';
-    pos($input_line) = 1 + $$rtoken_map[$i];
-
-    my $filter;
-
-    # we just have to find the next '>' if a term is expected
-    if ( $expecting == TERM ) { $filter = '[\>]' }
-
-    # we have to guess if we don't know what is expected
-    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
-
-    # shouldn't happen - we shouldn't be here if operator is expected
-    else { warning("Program Bug in find_angle_operator_termination\n") }
-
-    # To illustrate what we might be looking at, in case we are
-    # guessing, here are some examples of valid angle operators
-    # (or file globs):
-    #  <tmp_imp/*>
-    #  <FH>
-    #  <$fh>
-    #  <*.c *.h>
-    #  <_>
-    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
-    #  <${PREFIX}*img*.$IMAGE_TYPE>
-    #  <img*.$IMAGE_TYPE>
-    #  <Timg*.$IMAGE_TYPE>
-    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
-    #
-    # Here are some examples of lines which do not have angle operators:
-    #  return undef unless $self->[2]++ < $#{$self->[1]};
-    #  < 2  || @$t >
-    #
-    # the following line from dlister.pl caused trouble:
-    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
-    #
-    # If the '<' starts an angle operator, it must end on this line and
-    # it must not have certain characters like ';' and '=' in it.  I use
-    # this to limit the testing.  This filter should be improved if
-    # possible.
+    else {
  
  
-    if ( $input_line =~ /($filter)/g ) {
+        # only a '#' immediately after a '$' is not a comment
+        if ( $next_nonblank_token eq '#' ) {
+            unless ( $tok eq '$' ) {
+                $blank_line = 1;
+            }
+        }
  
  
-        if ( $1 eq '>' ) {
+        if ( $next_nonblank_token =~ /^\s/ ) {
+            ( $next_nonblank_token, $i_beg ) =
+              find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
+                $max_token_index );
+            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
+                $blank_line = 1;
+            }
+        }
+    }
  
  
-            # We MAY have found an angle operator termination if we get
-            # here, but we need to do more to be sure we haven't been
-            # fooled.
-            my $pos = pos($input_line);
+    # handle non-blank line; identifier, if any, must follow
+    unless ($blank_line) {
  
  
-            my $pos_beg = $$rtoken_map[$i];
-            my $str     = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
+        if ( $id_scan_state eq 'sub' ) {
+            ( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
+                $input_line, $i,             $i_beg,
+                $tok,        $type,          $rtokens,
+                $rtoken_map, $id_scan_state, $max_token_index
+            );
+        }
  
  
-            # Reject if the closing '>' follows a '-' as in:
-            # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
-            if ( $expecting eq UNKNOWN ) {
-                my $check = substr( $input_line, $pos - 2, 1 );
-                if ( $check eq '-' ) {
-                    return ( $i, $type );
-                }
-            }
+        elsif ( $id_scan_state eq 'package' ) {
+            ( $i, $tok, $type ) =
+              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
+                $rtoken_map, $max_token_index );
+            $id_scan_state = '';
+        }
  
  
-            ######################################debug#####
-            #write_diagnostics( "ANGLE? :$str\n");
-            #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
-            ######################################debug#####
-            $type = 'Q';
-            my $error;
-            ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
+        else {
+            warning("invalid token in scan_id: $tok\n");
+            $id_scan_state = '';
+        }
+    }
  
  
-            # It may be possible that a quote ends midway in a pretoken.
-            # If this happens, it may be necessary to split the pretoken.
-            if ($error) {
-                warning(
-                    "Possible tokinization error..please check this line\n");
-                report_possible_bug();
-            }
+    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
  
  
-            # Now let's see where we stand....
-            # OK if math op not possible
-            if ( $expecting == TERM ) {
-            }
+        # shouldn't happen:
+        warning(
+"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
+        );
+        report_definite_bug();
+    }
  
  
-            # OK if there are no more than 2 pre-tokens inside
-            # (not possible to write 2 token math between < and >)
-            # This catches most common cases
-            elsif ( $i <= $i_beg + 3 ) {
-                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
-            }
+    TOKENIZER_DEBUG_FLAG_NSCAN && do {
+        print
+          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
+    };
+    return ( $i, $tok, $type, $id_scan_state );
+}
  
  
-            # Not sure..
-            else {
+sub check_prototype {
+    my ( $proto, $package, $subname ) = @_;
+    return unless ( defined($package) && defined($subname) );
+    if ( defined($proto) ) {
+        $proto =~ s/^\s*\(\s*//;
+        $proto =~ s/\s*\)$//;
+        if ($proto) {
+            $is_user_function{$package}{$subname}        = 1;
+            $user_function_prototype{$package}{$subname} = "($proto)";
  
  
-                # Let's try a Brace Test: any braces inside must balance
-                my $br = 0;
-                while ( $str =~ /\{/g ) { $br++ }
-                while ( $str =~ /\}/g ) { $br-- }
-                my $sb = 0;
-                while ( $str =~ /\[/g ) { $sb++ }
-                while ( $str =~ /\]/g ) { $sb-- }
-                my $pr = 0;
-                while ( $str =~ /\(/g ) { $pr++ }
-                while ( $str =~ /\)/g ) { $pr-- }
+            # prototypes containing '&' must be treated specially..
+            if ( $proto =~ /\&/ ) {
  
  
-                # if braces do not balance - not angle operator
-                if ( $br || $sb || $pr ) {
-                    $i    = $i_beg;
-                    $type = '<';
-                    write_diagnostics(
-                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
+                # right curly braces of prototypes ending in
+                # '&' may be followed by an operator
+                if ( $proto =~ /\&$/ ) {
+                    $is_block_function{$package}{$subname} = 1;
                  }
  
                  }
  
-                # we should keep doing more checks here...to be continued
-                # Tentatively accepting this as a valid angle operator.
-                # There are lots more things that can be checked.
-                else {
-                    write_diagnostics(
-                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
-                    write_logfile_entry("Guessing angle operator here: $str\n");
+                # right curly braces of prototypes NOT ending in
+                # '&' may NOT be followed by an operator
+                elsif ( $proto !~ /\&$/ ) {
+                    $is_block_list_function{$package}{$subname} = 1;
                  }
              }
          }
                  }
              }
          }
-
-        # didn't find ending >
          else {
          else {
-            if ( $expecting == TERM ) {
-                warning("No ending > for angle operator\n");
-            }
+            $is_constant{$package}{$subname} = 1;
          }
      }
          }
      }
-    return ( $i, $type );
+    else {
+        $is_user_function{$package}{$subname} = 1;
+    }
  }
  
  }
  
-sub inverse_pretoken_map {
+sub do_scan_package {
  
  
-    # Starting with the current pre_token index $i, scan forward until
-    # finding the index of the next pre_token whose position is $pos.
-    my ( $i, $pos, $rtoken_map ) = @_;
-    my $error = 0;
+    # do_scan_package parses a package name
+    # it is called with $i_beg equal to the index of the first nonblank
+    # token following a 'package' token.
+    # USES GLOBAL VARIABLES: $current_package,
  
  
-    while ( ++$i <= $max_token_index ) {
+    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
+        $max_token_index )
+      = @_;
+    my $package = undef;
+    my $pos_beg = $$rtoken_map[$i_beg];
+    pos($input_line) = $pos_beg;
  
  
-        if ( $pos <= $$rtoken_map[$i] ) {
+    # handle non-blank line; package name, if any, must follow
+    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
+        $package = $1;
+        $package = ( defined($1) && $1 ) ? $1 : 'main';
+        $package =~ s/\'/::/g;
+        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
+        $package =~ s/::$//;
+        my $pos  = pos($input_line);
+        my $numc = $pos - $pos_beg;
+        $tok = 'package ' . substr( $input_line, $pos_beg, $numc );
+        $type = 'i';
  
  
-            # Let the calling routine handle errors in which we do not
-            # land on a pre-token boundary.  It can happen by running
-            # perltidy on some non-perl scripts, for example.
-            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
-            $i--;
-            last;
+        # Now we must convert back from character position
+        # to pre_token index.
+        # I don't think an error flag can occur here ..but ?
+        my $error;
+        ( $i, $error ) =
+          inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+        if ($error) { warning("Possibly invalid package\n") }
+        $current_package = $package;
+
+        # check for error
+        my ( $next_nonblank_token, $i_next ) =
+          find_next_nonblank_token( $i, $rtokens, $max_token_index );
+        if ( $next_nonblank_token !~ /^[;\}]$/ ) {
+            warning(
+                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
+            );
          }
      }
          }
      }
-    return ( $i, $error );
+
+    # no match but line not blank --
+    # could be a label with name package, like package:  , for example.
+    else {
+        $type = 'k';
+    }
+
+    return ( $i, $tok, $type );
  }
  
  }
  
-sub guess_if_pattern_or_conditional {
+sub scan_identifier_do {
  
  
-    # this routine is called when we have encountered a ? following an
-    # unknown bareword, and we must decide if it starts a pattern or not
-    # input parameters:
-    #   $i - token index of the ? starting possible pattern
-    # output parameters:
-    #   $is_pattern = 0 if probably not pattern,  =1 if probably a pattern
-    #   msg = a warning or diagnostic message
-    my ( $i, $rtokens, $rtoken_map ) = @_;
-    my $is_pattern = 0;
-    my $msg        = "guessing that ? after $last_nonblank_token starts a ";
+    # This routine assembles tokens into identifiers.  It maintains a
+    # scan state, id_scan_state.  It updates id_scan_state based upon
+    # current id_scan_state and token, and returns an updated
+    # id_scan_state and the next index after the identifier.
+    # USES GLOBAL VARIABLES: $context, $last_nonblank_token,
+    # $last_nonblank_type
  
  
-    if ( $i >= $max_token_index ) {
-        $msg .= "conditional (no end to pattern found on the line)\n";
-    }
-    else {
-        my $ibeg = $i;
-        $i = $ibeg + 1;
-        my $next_token = $$rtokens[$i];    # first token after ?
+    my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
+    my $i_begin   = $i;
+    my $type      = '';
+    my $tok_begin = $$rtokens[$i_begin];
+    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
+    my $id_scan_state_begin = $id_scan_state;
+    my $identifier_begin    = $identifier;
+    my $tok                 = $tok_begin;
+    my $message             = "";
  
  
-        # look for a possible ending ? on this line..
-        my $in_quote        = 1;
-        my $quote_depth     = 0;
-        my $quote_character = '';
-        my $quote_pos       = 0;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
+    # these flags will be used to help figure out the type:
+    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
+    my $saw_type;
  
  
-        if ($in_quote) {
+    # allow old package separator (') except in 'use' statement
+    my $allow_tick = ( $last_nonblank_token ne 'use' );
  
  
-            # we didn't find an ending ? on this line,
-            # so we bias towards conditional
-            $is_pattern = 0;
-            $msg .= "conditional (no ending ? on this line)\n";
+    # get started by defining a type and a state if necessary
+    unless ($id_scan_state) {
+        $context = UNKNOWN_CONTEXT;
  
  
-            # we found an ending ?, so we bias towards a pattern
+        # fixup for digraph
+        if ( $tok eq '>' ) {
+            $tok       = '->';
+            $tok_begin = $tok;
+        }
+        $identifier = $tok;
+
+        if ( $tok eq '$' || $tok eq '*' ) {
+            $id_scan_state = '$';
+            $context       = SCALAR_CONTEXT;
+        }
+        elsif ( $tok eq '%' || $tok eq '@' ) {
+            $id_scan_state = '$';
+            $context       = LIST_CONTEXT;
+        }
+        elsif ( $tok eq '&' ) {
+            $id_scan_state = '&';
+        }
+        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
+            $saw_alpha     = 0;     # 'sub' is considered type info here
+            $id_scan_state = '$';
+            $identifier .= ' ';     # need a space to separate sub from sub name
+        }
+        elsif ( $tok eq '::' ) {
+            $id_scan_state = 'A';
+        }
+        elsif ( $tok =~ /^[A-Za-z_]/ ) {
+            $id_scan_state = ':';
+        }
+        elsif ( $tok eq '->' ) {
+            $id_scan_state = '$';
          }
          else {
  
          }
          else {
  
-            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
-                $is_pattern = 1;
-                $msg .= "pattern (found ending ? and pattern expected)\n";
-            }
-            else {
-                $msg .= "pattern (uncertain, but found ending ?)\n";
-            }
+            # shouldn't happen
+            my ( $a, $b, $c ) = caller;
+            warning("Program Bug: scan_identifier given bad token = $tok \n");
+            warning("   called from sub $a  line: $c\n");
+            report_definite_bug();
          }
          }
+        $saw_type = !$saw_alpha;
+    }
+    else {
+        $i--;
+        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
      }
      }
-    return ( $is_pattern, $msg );
-}
  
  
-sub guess_if_pattern_or_division {
+    # now loop to gather the identifier
+    my $i_save = $i;
  
  
-    # this routine is called when we have encountered a / following an
-    # unknown bareword, and we must decide if it starts a pattern or is a
-    # division
-    # input parameters:
-    #   $i - token index of the / starting possible pattern
-    # output parameters:
-    #   $is_pattern = 0 if probably division,  =1 if probably a pattern
-    #   msg = a warning or diagnostic message
-    my ( $i, $rtokens, $rtoken_map ) = @_;
-    my $is_pattern = 0;
-    my $msg        = "guessing that / after $last_nonblank_token starts a ";
+    while ( $i < $max_token_index ) {
+        $i_save = $i unless ( $tok =~ /^\s*$/ );
+        $tok = $$rtokens[ ++$i ];
  
  
-    if ( $i >= $max_token_index ) {
-        "division (no end to pattern found on the line)\n";
-    }
-    else {
-        my $ibeg = $i;
-        my $divide_expected = numerator_expected( $i, $rtokens );
-        $i = $ibeg + 1;
-        my $next_token = $$rtokens[$i];    # first token after slash
+        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
+            $tok = '::';
+            $i++;
+        }
  
  
-        # look for a possible ending / on this line..
-        my $in_quote        = 1;
-        my $quote_depth     = 0;
-        my $quote_character = '';
-        my $quote_pos       = 0;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
+        if ( $id_scan_state eq '$' ) {    # starting variable name
  
  
-        if ($in_quote) {
+            if ( $tok eq '$' ) {
  
  
-            # we didn't find an ending / on this line,
-            # so we bias towards division
-            if ( $divide_expected >= 0 ) {
-                $is_pattern = 0;
-                $msg .= "division (no ending / on this line)\n";
+                $identifier .= $tok;
+
+                # we've got a punctuation variable if end of line (punct.t)
+                if ( $i == $max_token_index ) {
+                    $type          = 'i';
+                    $id_scan_state = '';
+                    last;
+                }
              }
              }
-            else {
-                $msg        = "multi-line pattern (division not possible)\n";
-                $is_pattern = 1;
+            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
+                $saw_alpha     = 1;
+                $id_scan_state = ':';           # now need ::
+                $identifier .= $tok;
              }
              }
+            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
+                $saw_alpha     = 1;
+                $id_scan_state = ':';                 # now need ::
+                $identifier .= $tok;
  
  
-        }
+                # Perl will accept leading digits in identifiers,
+                # although they may not always produce useful results.
+                # Something like $main::0 is ok.  But this also works:
+                #
+                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
+                #  howdy::123::bubba();
+                #
+            }
+            elsif ( $tok =~ /^[0-9]/ ) {              # numeric
+                $saw_alpha     = 1;
+                $id_scan_state = ':';                 # now need ::
+                $identifier .= $tok;
+            }
+            elsif ( $tok eq '::' ) {
+                $id_scan_state = 'A';
+                $identifier .= $tok;
+            }
+            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
+                $identifier .= $tok;    # keep same state, a $ could follow
+            }
+            elsif ( $tok eq '{' ) {
  
  
-        # we found an ending /, so we bias towards a pattern
-        else {
+                # check for something like ${#} or ${©}
+                if (   $identifier eq '$'
+                    && $i + 2 <= $max_token_index
+                    && $$rtokens[ $i + 2 ] eq '}'
+                    && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
+                {
+                    my $next2 = $$rtokens[ $i + 2 ];
+                    my $next1 = $$rtokens[ $i + 1 ];
+                    $identifier .= $tok . $next1 . $next2;
+                    $i += 2;
+                    $id_scan_state = '';
+                    last;
+                }
+
+                # skip something like ${xxx} or ->{
+                $id_scan_state = '';
  
  
-            if ( pattern_expected( $i, $rtokens ) >= 0 ) {
+                # if this is the first token of a line, any tokens for this
+                # identifier have already been accumulated
+                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
+                $i = $i_save;
+                last;
+            }
  
  
-                if ( $divide_expected >= 0 ) {
+            # space ok after leading $ % * & @
+            elsif ( $tok =~ /^\s*$/ ) {
  
  
-                    if ( $i - $ibeg > 60 ) {
-                        $msg .= "division (matching / too distant)\n";
-                        $is_pattern = 0;
+                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
+
+                    if ( length($identifier) > 1 ) {
+                        $id_scan_state = '';
+                        $i             = $i_save;
+                        $type          = 'i';    # probably punctuation variable
+                        last;
                      }
                      else {
                      }
                      else {
-                        $msg .= "pattern (but division possible too)\n";
-                        $is_pattern = 1;
+
+                        # spaces after $'s are common, and space after @
+                        # is harmless, so only complain about space
+                        # after other type characters. Space after $ and
+                        # @ will be removed in formatting.  Report space
+                        # after % and * because they might indicate a
+                        # parsing error.  In other words '% ' might be a
+                        # modulo operator.  Delete this warning if it
+                        # gets annoying.
+                        if ( $identifier !~ /^[\@\$]$/ ) {
+                            $message =
+                              "Space in identifier, following $identifier\n";
+                        }
                      }
                  }
                      }
                  }
-                else {
-                    $is_pattern = 1;
-                    $msg .= "pattern (division not possible)\n";
-                }
+
+                # else:
+                # space after '->' is ok
              }
              }
-            else {
+            elsif ( $tok eq '^' ) {
  
  
-                if ( $divide_expected >= 0 ) {
-                    $is_pattern = 0;
-                    $msg .= "division (pattern not possible)\n";
+                # check for some special variables like $^W
+                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                    $identifier .= $tok;
+                    $id_scan_state = 'A';
+
+                    # Perl accepts '$^]' or '@^]', but
+                    # there must not be a space before the ']'.
+                    my $next1 = $$rtokens[ $i + 1 ];
+                    if ( $next1 eq ']' ) {
+                        $i++;
+                        $identifier .= $next1;
+                        $id_scan_state = "";
+                        last;
+                    }
                  }
                  else {
                  }
                  else {
-                    $is_pattern = 1;
-                    $msg .=
-                      "pattern (uncertain, but division would not work here)\n";
+                    $id_scan_state = '';
                  }
              }
                  }
              }
-        }
-    }
-    return ( $is_pattern, $msg );
-}
-
-sub find_here_doc {
-
-    # find the target of a here document, if any
-    # input parameters:
-    #   $i - token index of the second < of <<
-    #   ($i must be less than the last token index if this is called)
-    # output parameters:
-    #   $found_target = 0 didn't find target; =1 found target
-    #   HERE_TARGET - the target string (may be empty string)
-    #   $i - unchanged if not here doc,
-    #    or index of the last token of the here target
-    my ( $expecting, $i, $rtokens, $rtoken_map ) = @_;
-    my $ibeg                 = $i;
-    my $found_target         = 0;
-    my $here_doc_target      = '';
-    my $here_quote_character = '';
-    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
-    $next_token = $$rtokens[ $i + 1 ];
-
-    # perl allows a backslash before the target string (heredoc.t)
-    my $backslash = 0;
-    if ( $next_token eq '\\' ) {
-        $backslash  = 1;
-        $next_token = $$rtokens[ $i + 2 ];
-    }
-
-    ( $next_nonblank_token, $i_next_nonblank ) =
-      find_next_nonblank_token_on_this_line( $i, $rtokens );
-
-    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
-
-        my $in_quote    = 1;
-        my $quote_depth = 0;
-        my $quote_pos   = 0;
-
-        ( $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
-            $here_quote_character, $quote_pos, $quote_depth );
-
-        if ($in_quote) {    # didn't find end of quote, so no target found
-            $i = $ibeg;
-        }
-        else {              # found ending quote
-            my $j;
-            $found_target = 1;
-
-            my $tokj;
-            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
-                $tokj = $$rtokens[$j];
-
-                # we have to remove any backslash before the quote character
-                # so that the here-doc-target exactly matches this string
-                next
-                  if ( $tokj eq "\\"
-                    && $j < $i - 1
-                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
-                $here_doc_target .= $tokj;
-            }
-        }
-    }
-
-    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
-        $found_target = 1;
-        write_logfile_entry(
-            "found blank here-target after <<; suggest using \"\"\n");
-        $i = $ibeg;
-    }
-    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
-
-        my $here_doc_expected;
-        if ( $expecting == UNKNOWN ) {
-            $here_doc_expected = guess_if_here_doc($next_token);
-        }
-        else {
-            $here_doc_expected = 1;
-        }
-
-        if ($here_doc_expected) {
-            $found_target    = 1;
-            $here_doc_target = $next_token;
-            $i               = $ibeg + 1;
-        }
-
-    }
-    else {
-
-        if ( $expecting == TERM ) {
-            $found_target = 1;
-            write_logfile_entry("Note: bare here-doc operator <<\n");
-        }
-        else {
-            $i = $ibeg;
-        }
-    }
-
-    # patch to neglect any prepended backslash
-    if ( $found_target && $backslash ) { $i++ }
-
-    return ( $found_target, $here_doc_target, $here_quote_character, $i );
-}
-
-# try to resolve here-doc vs. shift by looking ahead for
-# non-code or the end token (currently only looks for end token)
-# returns 1 if it is probably a here doc, 0 if not
-sub guess_if_here_doc {
-
-    # This is how many lines we will search for a target as part of the
-    # guessing strategy.  It is a constant because there is probably
-    # little reason to change it.
-    use constant HERE_DOC_WINDOW => 40;
-
-    my $next_token        = shift;
-    my $here_doc_expected = 0;
-    my $line;
-    my $k   = 0;
-    my $msg = "checking <<";
-
-    while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
-    {
-        chomp $line;
-
-        if ( $line =~ /^$next_token$/ ) {
-            $msg .= " -- found target $next_token ahead $k lines\n";
-            $here_doc_expected = 1;    # got it
-            last;
-        }
-        last if ( $k >= HERE_DOC_WINDOW );
-    }
-
-    unless ($here_doc_expected) {
-
-        if ( !defined($line) ) {
-            $here_doc_expected = -1;    # hit eof without seeing target
-            $msg .= " -- must be shift; target $next_token not in file\n";
-
-        }
-        else {                          # still unsure..taking a wild guess
-
-            if ( !$is_constant{$current_package}{$next_token} ) {
-                $here_doc_expected = 1;
-                $msg .=
-                  " -- guessing it's a here-doc ($next_token not a constant)\n";
-            }
-            else {
-                $msg .=
-                  " -- guessing it's a shift ($next_token is a constant)\n";
-            }
-        }
-    }
-    write_logfile_entry($msg);
-    return $here_doc_expected;
-}
-
-sub do_quote {
-
-    # follow (or continue following) quoted string or pattern
-    # $in_quote return code:
-    #   0 - ok, found end
-    #   1 - still must find end of quote whose target is $quote_character
-    #   2 - still looking for end of first of two quotes
-    my ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth, $rtokens,
-        $rtoken_map )
-      = @_;
-
-    if ( $in_quote == 2 ) {    # two quotes/patterns to follow
-        my $ibeg = $i;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
-
-        if ( $in_quote == 1 ) {
-            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
-            $quote_character = '';
-        }
-    }
-
-    if ( $in_quote == 1 ) {    # one (more) quote to follow
-        my $ibeg = $i;
-        ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth ) =
-          follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
-            $quote_pos, $quote_depth );
-    }
-    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth );
-}
-
-sub scan_number_do {
-
-    #  scan a number in any of the formats that Perl accepts
-    #  Underbars (_) are allowed in decimal numbers.
-    #  input parameters -
-    #      $input_line  - the string to scan
-    #      $i           - pre_token index to start scanning
-    #    $rtoken_map    - reference to the pre_token map giving starting
-    #                    character position in $input_line of token $i
-    #  output parameters -
-    #    $i            - last pre_token index of the number just scanned
-    #    number        - the number (characters); or undef if not a number
-
-    my ( $input_line, $i, $rtoken_map, $input_type ) = @_;
-    my $pos_beg = $$rtoken_map[$i];
-    my $pos;
-    my $i_begin = $i;
-    my $number  = undef;
-    my $type    = $input_type;
-
-    my $first_char = substr( $input_line, $pos_beg, 1 );
-
-    # Look for bad starting characters; Shouldn't happen..
-    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
-        warning("Program bug - scan_number given character $first_char\n");
-        report_definite_bug();
-        return ( $i, $type, $number );
-    }
-
-    # handle v-string without leading 'v' character ('Two Dot' rule)
-    # (vstring.t)
-    # TODO: v-strings may contain underscores
-    pos($input_line) = $pos_beg;
-    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
-        $pos = pos($input_line);
-        my $numc = $pos - $pos_beg;
-        $number = substr( $input_line, $pos_beg, $numc );
-        $type = 'v';
-        unless ($saw_v_string) { report_v_string($number) }
-    }
-
-    # handle octal, hex, binary
-    if ( !defined($number) ) {
-        pos($input_line) = $pos_beg;
-        if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
-        {
-            $pos = pos($input_line);
-            my $numc = $pos - $pos_beg;
-            $number = substr( $input_line, $pos_beg, $numc );
-            $type = 'n';
-        }
-    }
-
-    # handle decimal
-    if ( !defined($number) ) {
-        pos($input_line) = $pos_beg;
-
-        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
-            $pos = pos($input_line);
-
-            # watch out for things like 0..40 which would give 0. by this;
-            if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
-                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
-            {
-                $pos--;
-            }
-            my $numc = $pos - $pos_beg;
-            $number = substr( $input_line, $pos_beg, $numc );
-            $type = 'n';
-        }
-    }
-
-    # filter out non-numbers like e + - . e2  .e3 +e6
-    # the rule: at least one digit, and any 'e' must be preceded by a digit
-    if (
-        $number !~ /\d/    # no digits
-        || (   $number =~ /^(.*)[eE]/
-            && $1 !~ /\d/ )    # or no digits before the 'e'
-      )
-    {
-        $number = undef;
-        $type   = $input_type;
-        return ( $i, $type, $number );
-    }
-
-    # Found a number; now we must convert back from character position
-    # to pre_token index. An error here implies user syntax error.
-    # An example would be an invalid octal number like '009'.
-    my $error;
-    ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-    if ($error) { warning("Possibly invalid number\n") }
-
-    return ( $i, $type, $number );
-}
-
-sub scan_bare_identifier_do {
-
-    # this routine is called to scan a token starting with an alphanumeric
-    # variable or package separator, :: or '.
-
-    my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map ) = @_;
-    my $i_begin = $i;
-    my $package = undef;
-
-    my $i_beg = $i;
-
-    # we have to back up one pretoken at a :: since each : is one pretoken
-    if ( $tok eq '::' ) { $i_beg-- }
-    if ( $tok eq '->' ) { $i_beg-- }
-    my $pos_beg = $$rtoken_map[$i_beg];
-    pos($input_line) = $pos_beg;
-
-    #  Examples:
-    #   A::B::C
-    #   A::
-    #   ::A
-    #   A'B
-    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
-
-        my $pos  = pos($input_line);
-        my $numc = $pos - $pos_beg;
-        $tok = substr( $input_line, $pos_beg, $numc );
-
-        # type 'w' includes anything without leading type info
-        # ($,%,@,*) including something like abc::def::ghi
-        $type = 'w';
-
-        my $sub_name = "";
-        if ( defined($2) ) { $sub_name = $2; }
-        if ( defined($1) ) {
-            $package = $1;
+            else {    # something else
  
  
-            # patch: don't allow isolated package name which just ends
-            # in the old style package separator (single quote).  Example:
-            #   use CGI':all';
-            if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
-                $pos--;
-            }
+                # check for various punctuation variables
+                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
+                    $identifier .= $tok;
+                }
  
  
-            $package =~ s/\'/::/g;
-            if ( $package =~ /^\:/ ) { $package = 'main' . $package }
-            $package =~ s/::$//;
-        }
-        else {
-            $package = $current_package;
+                elsif ( $identifier eq '$#' ) {
  
  
-            if ( $is_keyword{$tok} ) {
-                $type = 'k';
-            }
-        }
+                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
  
  
-        # if it is a bareword..
-        if ( $type eq 'w' ) {
+                    # perl seems to allow just these: $#: $#- $#+
+                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
+                        $type = 'i';
+                        $identifier .= $tok;
+                    }
+                    else {
+                        $i = $i_save;
+                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
+                    }
+                }
+                elsif ( $identifier eq '$$' ) {
  
  
-            # check for v-string with leading 'v' type character
-            # (This seems to have presidence over filehandle, type 'Y')
-            if ( $tok =~ /^v\d[_\d]*$/ ) {
+                    # perl does not allow references to punctuation
+                    # variables without braces.  For example, this
+                    # won't work:
+                    #  $:=\4;
+                    #  $a = $$:;
+                    # You would have to use
+                    #  $a = ${$:};
  
  
-                # we only have the first part - something like 'v101' -
-                # look for more
-                if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
-                    $pos  = pos($input_line);
-                    $numc = $pos - $pos_beg;
-                    $tok  = substr( $input_line, $pos_beg, $numc );
+                    $i = $i_save;
+                    if   ( $tok eq '{' ) { $type = 't' }
+                    else                 { $type = 'i' }
                  }
                  }
-                $type = 'v';
-
-                # warn if this version can't handle v-strings
-                unless ($saw_v_string) { report_v_string($tok) }
+                elsif ( $identifier eq '->' ) {
+                    $i = $i_save;
+                }
+                else {
+                    $i = $i_save;
+                    if ( length($identifier) == 1 ) { $identifier = ''; }
+                }
+                $id_scan_state = '';
+                last;
              }
              }
+        }
+        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
  
  
-            elsif ( $is_constant{$package}{$sub_name} ) {
-                $type = 'C';
+            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
+                $id_scan_state = ':';          # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
              }
              }
-
-            # bareword after sort has implied empty prototype; for example:
-            # @sorted = sort numerically ( 53, 29, 11, 32, 7 );
-            # This has priority over whatever the user has specified.
-            elsif ($last_nonblank_token eq 'sort'
-                && $last_nonblank_type eq 'k' )
-            {
-                $type = 'Z';
+            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
+                $id_scan_state = ':';                 # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
              }
              }
-
-            # Note: strangely, perl does not seem to really let you create
-            # functions which act like eval and do, in the sense that eval
-            # and do may have operators following the final }, but any operators
-            # that you create with prototype (&) apparently do not allow
-            # trailing operators, only terms.  This seems strange.
-            # If this ever changes, here is the update
-            # to make perltidy behave accordingly:
-
-            # elsif ( $is_block_function{$package}{$tok} ) {
-            #    $tok='eval'; # patch to do braces like eval  - doesn't work
-            #    $type = 'k';
-            #}
-            # FIXME: This could become a separate type to allow for different
-            # future behavior:
-            elsif ( $is_block_function{$package}{$sub_name} ) {
-                $type = 'G';
+            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
+                $id_scan_state = ':';       # now need ::
+                $saw_alpha     = 1;
+                $identifier .= $tok;
              }
              }
-
-            elsif ( $is_block_list_function{$package}{$sub_name} ) {
-                $type = 'G';
+            elsif ( $tok =~ /^\s*$/ ) {     # allow space
              }
              }
-            elsif ( $is_user_function{$package}{$sub_name} ) {
-                $type      = 'U';
-                $prototype = $user_function_prototype{$package}{$sub_name};
+            elsif ( $tok eq '::' ) {        # leading ::
+                $id_scan_state = 'A';       # accept alpha next
+                $identifier .= $tok;
              }
              }
+            elsif ( $tok eq '{' ) {
+                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
+                $i             = $i_save;
+                $id_scan_state = '';
+                last;
+            }
+            else {
  
  
-            # check for indirect object
-            elsif (
-
-                # added 2001-03-27: must not be followed immediately by '('
-                # see fhandle.t
-                ( $input_line !~ m/\G\(/gc )
-
-                # and
-                && (
-
-                    # preceded by keyword like 'print', 'printf' and friends
-                    $is_indirect_object_taker{$last_nonblank_token}
-
-                    # or preceded by something like 'print(' or 'printf('
-                    || (
-                        ( $last_nonblank_token eq '(' )
-                        && $is_indirect_object_taker{ $paren_type[$paren_depth]
-                        }
-
-                    )
-                )
-              )
-            {
-
-                # may not be indirect object unless followed by a space
-                if ( $input_line =~ m/\G\s+/gc ) {
-                    $type = 'Y';
+                # punctuation variable?
+                # testfile: cunningham4.pl
+                if ( $identifier eq '&' ) {
+                    $identifier .= $tok;
+                }
+                else {
+                    $identifier = '';
+                    $i          = $i_save;
+                    $type       = '&';
+                }
+                $id_scan_state = '';
+                last;
+            }
+        }
+        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
  
  
-                    # Abandon Hope ...
-                    # Perl's indirect object notation is a very bad
-                    # thing and can cause subtle bugs, especially for
-                    # beginning programmers.  And I haven't even been
-                    # able to figure out a sane warning scheme which
-                    # doesn't get in the way of good scripts.
+            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
+                $identifier .= $tok;
+                $id_scan_state = ':';        # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok eq "'" && $allow_tick ) {
+                $identifier .= $tok;
+                $id_scan_state = ':';        # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
+                $identifier .= $tok;
+                $id_scan_state = ':';        # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+                $id_scan_state = '(';
+                $identifier .= $tok;
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+                $id_scan_state = ')';
+                $identifier .= $tok;
+            }
+            else {
+                $id_scan_state = '';
+                $i             = $i_save;
+                last;
+            }
+        }
+        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
  
  
-                    # Complain if a filehandle has any lower case
-                    # letters.  This is suggested good practice, but the
-                    # main reason for this warning is that prior to
-                    # release 20010328, perltidy incorrectly parsed a
-                    # function call after a print/printf, with the
-                    # result that a space got added before the opening
-                    # paren, thereby converting the function name to a
-                    # filehandle according to perl's weird rules.  This
-                    # will not usually generate a syntax error, so this
-                    # is a potentially serious bug.  By warning
-                    # of filehandles with any lower case letters,
-                    # followed by opening parens, we will help the user
-                    # find almost all of these older errors.
-                    # use 'sub_name' because something like
-                    # main::MYHANDLE is ok for filehandle
-                    if ( $sub_name =~ /[a-z]/ ) {
+            if ( $tok eq '::' ) {            # got it
+                $identifier .= $tok;
+                $id_scan_state = 'A';        # now require alpha
+            }
+            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
+                $identifier .= $tok;
+                $id_scan_state = ':';           # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
+                $identifier .= $tok;
+                $id_scan_state = ':';           # now need ::
+                $saw_alpha     = 1;
+            }
+            elsif ( $tok eq "'" && $allow_tick ) {    # tick
  
  
-                        # could be bug caused by older perltidy if
-                        # followed by '('
-                        if ( $input_line =~ m/\G\s*\(/gc ) {
-                            complain(
-"Caution: unknown word '$tok' in indirect object slot\n"
-                            );
-                        }
-                    }
+                if ( $is_keyword{$identifier} ) {
+                    $id_scan_state = '';              # that's all
+                    $i             = $i_save;
+                }
+                else {
+                    $identifier .= $tok;
                  }
                  }
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
+                $id_scan_state = '(';
+                $identifier .= $tok;
+            }
+            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
+                $id_scan_state = ')';
+                $identifier .= $tok;
+            }
+            else {
+                $id_scan_state = '';        # that's all
+                $i             = $i_save;
+                last;
+            }
+        }
+        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
  
  
-                # bareword not followed by a space -- may not be filehandle
-                # (may be function call defined in a 'use' statement)
-                else {
-                    $type = 'Z';
-                }
+            if ( $tok eq '(' ) {             # got it
+                $identifier .= $tok;
+                $id_scan_state = ')';        # now find the end of it
+            }
+            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
+                $identifier .= $tok;
+            }
+            else {
+                $id_scan_state = '';         # that's all - no prototype
+                $i             = $i_save;
+                last;
              }
          }
              }
          }
+        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
  
  
-        # Now we must convert back from character position
-        # to pre_token index.
-        # I don't think an error flag can occur here ..but who knows
-        my $error;
-        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-        if ($error) {
-            warning("scan_bare_identifier: Possibly invalid tokenization\n");
+            if ( $tok eq ')' ) {             # got it
+                $identifier .= $tok;
+                $id_scan_state = '';         # all done
+                last;
+            }
+            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
+                $identifier .= $tok;
+            }
+            else {    # probable error in script, but keep going
+                warning("Unexpected '$tok' while seeking end of prototype\n");
+                $identifier .= $tok;
+            }
+        }
+        else {        # can get here due to error in initialization
+            $id_scan_state = '';
+            $i             = $i_save;
+            last;
          }
      }
  
          }
      }
  
-    # no match but line not blank - could be syntax error
-    # perl will take '::' alone without complaint
-    else {
-        $type = 'w';
-
-        # change this warning to log message if it becomes annoying
-        warning("didn't find identifier after leading ::\n");
+    if ( $id_scan_state eq ')' ) {
+        warning("Hit end of line while seeking ) to end prototype\n");
      }
      }
-    return ( $i, $tok, $type, $prototype );
-}
-
-sub scan_id_do {
-
-    # This is the new scanner and will eventually replace scan_identifier.
-    # Only type 'sub' and 'package' are implemented.
-    # Token types $ * % @ & -> are not yet implemented.
-    #
-    # Scan identifier following a type token.
-    # The type of call depends on $id_scan_state: $id_scan_state = ''
-    # for starting call, in which case $tok must be the token defining
-    # the type.
-    #
-    # If the type token is the last nonblank token on the line, a value
-    # of $id_scan_state = $tok is returned, indicating that further
-    # calls must be made to get the identifier.  If the type token is
-    # not the last nonblank token on the line, the identifier is
-    # scanned and handled and a value of '' is returned.
-
-    my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state ) = @_;
-    my $type = '';
-    my ( $i_beg, $pos_beg );
-
-    #print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
-    #my ($a,$b,$c) = caller;
-    #print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
  
  
-    # on re-entry, start scanning at first token on the line
-    if ($id_scan_state) {
-        $i_beg = $i;
-        $type  = '';
+    # once we enter the actual identifier, it may not extend beyond
+    # the end of the current line
+    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
+        $id_scan_state = '';
      }
      }
+    if ( $i < 0 ) { $i = 0 }
  
  
-    # on initial entry, start scanning just after type token
-    else {
-        $i_beg         = $i + 1;
-        $id_scan_state = $tok;
-        $type          = 't';
-    }
+    unless ($type) {
  
  
-    # find $i_beg = index of next nonblank token,
-    # and handle empty lines
-    my $blank_line          = 0;
-    my $next_nonblank_token = $$rtokens[$i_beg];
-    if ( $i_beg > $max_token_index ) {
-        $blank_line = 1;
-    }
-    else {
+        if ($saw_type) {
  
  
-        # only a '#' immediately after a '$' is not a comment
-        if ( $next_nonblank_token eq '#' ) {
-            unless ( $tok eq '$' ) {
-                $blank_line = 1;
+            if ($saw_alpha) {
+                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
+                    $type = 'w';
+                }
+                else { $type = 'i' }
              }
              }
-        }
-
-        if ( $next_nonblank_token =~ /^\s/ ) {
-            ( $next_nonblank_token, $i_beg ) =
-              find_next_nonblank_token_on_this_line( $i_beg, $rtokens );
-            if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
-                $blank_line = 1;
+            elsif ( $identifier eq '->' ) {
+                $type = '->';
              }
              }
-        }
-    }
-
-    # handle non-blank line; identifier, if any, must follow
-    unless ($blank_line) {
+            elsif (
+                ( length($identifier) > 1 )
  
  
-        if ( $id_scan_state eq 'sub' ) {
-            ( $i, $tok, $type, $id_scan_state ) =
-              do_scan_sub( $input_line, $i, $i_beg, $tok, $type, $rtokens,
-                $rtoken_map, $id_scan_state );
+                # In something like '@$=' we have an identifier '@$'
+                # In something like '$${' we have type '$$' (and only
+                # part of an identifier)
+                && !( $identifier =~ /\$$/ && $tok eq '{' )
+                && ( $identifier !~ /^(sub |package )$/ )
+              )
+            {
+                $type = 'i';
+            }
+            else { $type = 't' }
          }
          }
+        elsif ($saw_alpha) {
  
  
-        elsif ( $id_scan_state eq 'package' ) {
-            ( $i, $tok, $type ) =
-              do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
-                $rtoken_map );
-            $id_scan_state = '';
+            # type 'w' includes anything without leading type info
+            # ($,%,@,*) including something like abc::def::ghi
+            $type = 'w';
          }
          }
-
          else {
          else {
-            warning("invalid token in scan_id: $tok\n");
-            $id_scan_state = '';
-        }
+            $type = '';
+        }    # this can happen on a restart
      }
  
      }
  
-    if ( $id_scan_state && ( !defined($type) || !$type ) ) {
-
-        # shouldn't happen:
-        warning(
-"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
-        );
-        report_definite_bug();
+    if ($identifier) {
+        $tok = $identifier;
+        if ($message) { write_logfile_entry($message) }
+    }
+    else {
+        $tok = $tok_begin;
+        $i   = $i_begin;
      }
  
      }
  
-    TOKENIZER_DEBUG_FLAG_NSCAN && do {
+    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
+        my ( $a, $b, $c ) = caller;
          print
          print
-          "NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
+"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
+        print
+"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
      };
      };
-    return ( $i, $tok, $type, $id_scan_state );
+    return ( $i, $tok, $type, $id_scan_state, $identifier );
  }
  
  {
  }
  
  {
@@ -24080,10 +24790,15 @@ sub scan_id_do {
          # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
          # a name is given if and only if a non-anonymous sub is
          # appropriate.
          # sub name.  For example, 'sub &doit' is wrong.  Also, be sure
          # a name is given if and only if a non-anonymous sub is
          # appropriate.
+        # USES GLOBAL VARS: $current_package, $last_nonblank_token,
+        # $in_attribute_list, %saw_function_definition,
+        # $statement_type
  
  
-        my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
-            $id_scan_state )
-          = @_;
+        my (
+            $input_line, $i,             $i_beg,
+            $tok,        $type,          $rtokens,
+            $rtoken_map, $id_scan_state, $max_token_index
+        ) = @_;
          $id_scan_state = "";    # normally we get everything in one call
          my $subname = undef;
          my $package = undef;
          $id_scan_state = "";    # normally we get everything in one call
          my $subname = undef;
          my $package = undef;
@@ -24165,12 +24880,15 @@ sub scan_id_do {
  
                  # I don't think an error flag can occur here ..but ?
                  my $error;
  
                  # I don't think an error flag can occur here ..but ?
                  my $error;
-                ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
+                ( $i, $error ) =
+                  inverse_pretoken_map( $i, $pos, $rtoken_map,
+                    $max_token_index );
                  if ($error) { warning("Possibly invalid sub\n") }
  
                  # check for multiple definitions of a sub
                  ( $next_nonblank_token, my $i_next ) =
                  if ($error) { warning("Possibly invalid sub\n") }
  
                  # check for multiple definitions of a sub
                  ( $next_nonblank_token, my $i_next ) =
-                  find_next_nonblank_token_on_this_line( $i, $rtokens );
+                  find_next_nonblank_token_on_this_line( $i, $rtokens,
+                    $max_token_index );
              }
  
              if ( $next_nonblank_token =~ /^(\s*|#)$/ )
              }
  
              if ( $next_nonblank_token =~ /^(\s*|#)$/ )
@@ -24195,7 +24913,7 @@ sub scan_id_do {
                          );
                      }
                      $saw_function_definition{$package}{$subname} =
                          );
                      }
                      $saw_function_definition{$package}{$subname} =
-                      $input_line_number;
+                      $tokenizer_self->{_last_line_number};
                  }
              }
              elsif ( $next_nonblank_token eq ';' ) {
                  }
              }
              elsif ( $next_nonblank_token eq ';' ) {
@@ -24240,554 +24958,537 @@ sub scan_id_do {
      }
  }
  
      }
  }
  
-sub check_prototype {
-    my ( $proto, $package, $subname ) = @_;
-    return unless ( defined($package) && defined($subname) );
-    if ( defined($proto) ) {
-        $proto =~ s/^\s*\(\s*//;
-        $proto =~ s/\s*\)$//;
-        if ($proto) {
-            $is_user_function{$package}{$subname}        = 1;
-            $user_function_prototype{$package}{$subname} = "($proto)";
-
-            # prototypes containing '&' must be treated specially..
-            if ( $proto =~ /\&/ ) {
-
-                # right curly braces of prototypes ending in
-                # '&' may be followed by an operator
-                if ( $proto =~ /\&$/ ) {
-                    $is_block_function{$package}{$subname} = 1;
-                }
-
-                # right curly braces of prototypes NOT ending in
-                # '&' may NOT be followed by an operator
-                elsif ( $proto !~ /\&$/ ) {
-                    $is_block_list_function{$package}{$subname} = 1;
-                }
-            }
-        }
-        else {
-            $is_constant{$package}{$subname} = 1;
-        }
-    }
-    else {
-        $is_user_function{$package}{$subname} = 1;
-    }
-}
-
-sub do_scan_package {
-
-    # do_scan_package parses a package name
-    # it is called with $i_beg equal to the index of the first nonblank
-    # token following a 'package' token.
-
-    my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map ) = @_;
-    my $package = undef;
-    my $pos_beg = $$rtoken_map[$i_beg];
-    pos($input_line) = $pos_beg;
-
-    # handle non-blank line; package name, if any, must follow
-    if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
-        $package = $1;
-        $package = ( defined($1) && $1 ) ? $1 : 'main';
-        $package =~ s/\'/::/g;
-        if ( $package =~ /^\:/ ) { $package = 'main' . $package }
-        $package =~ s/::$//;
-        my $pos  = pos($input_line);
-        my $numc = $pos - $pos_beg;
-        $tok  = 'package ' . substr( $input_line, $pos_beg, $numc );
-        $type = 'i';
-
-        # Now we must convert back from character position
-        # to pre_token index.
-        # I don't think an error flag can occur here ..but ?
-        my $error;
-        ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map );
-        if ($error) { warning("Possibly invalid package\n") }
-        $current_package = $package;
-
-        # check for error
-        my ( $next_nonblank_token, $i_next ) =
-          find_next_nonblank_token( $i, $rtokens );
-        if ( $next_nonblank_token !~ /^[;\}]$/ ) {
-            warning(
-                "Unexpected '$next_nonblank_token' after package name '$tok'\n"
-            );
-        }
-    }
-
-    # no match but line not blank --
-    # could be a label with name package, like package:  , for example.
-    else {
-        $type = 'k';
-    }
-
-    return ( $i, $tok, $type );
-}
-
-sub scan_identifier_do {
+#########i###############################################################
+# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
+#########################################################################
  
  
-    # This routine assembles tokens into identifiers.  It maintains a
-    # scan state, id_scan_state.  It updates id_scan_state based upon
-    # current id_scan_state and token, and returns an updated
-    # id_scan_state and the next index after the identifier.
+sub find_next_nonblank_token {
+    my ( $i, $rtokens, $max_token_index ) = @_;
  
  
-    my ( $i, $id_scan_state, $identifier, $rtokens ) = @_;
-    my $i_begin   = $i;
-    my $type      = '';
-    my $tok_begin = $$rtokens[$i_begin];
-    if ( $tok_begin eq ':' ) { $tok_begin = '::' }
-    my $id_scan_state_begin = $id_scan_state;
-    my $identifier_begin    = $identifier;
-    my $tok                 = $tok_begin;
-    my $message             = "";
+    if ( $i >= $max_token_index ) {
+        if ( !peeked_ahead() ) {
+            peeked_ahead(1);
+            $rtokens =
+              peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
+        }
+    }
+    my $next_nonblank_token = $$rtokens[ ++$i ];
  
  
-    # these flags will be used to help figure out the type:
-    my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
-    my $saw_type;
+    if ( $next_nonblank_token =~ /^\s*$/ ) {
+        $next_nonblank_token = $$rtokens[ ++$i ];
+    }
+    return ( $next_nonblank_token, $i );
+}
  
  
-    # allow old package separator (') except in 'use' statement
-    my $allow_tick = ( $last_nonblank_token ne 'use' );
+sub numerator_expected {
  
  
-    # get started by defining a type and a state if necessary
-    unless ($id_scan_state) {
-        $context = UNKNOWN_CONTEXT;
+    # this is a filter for a possible numerator, in support of guessing
+    # for the / pattern delimiter token.
+    # returns -
+    #   1 - yes
+    #   0 - can't tell
+    #  -1 - no
+    # Note: I am using the convention that variables ending in
+    # _expected have these 3 possible values.
+    my ( $i, $rtokens, $max_token_index ) = @_;
+    my $next_token = $$rtokens[ $i + 1 ];
+    if ( $next_token eq '=' ) { $i++; }    # handle /=
+    my ( $next_nonblank_token, $i_next ) =
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
  
-        # fixup for digraph
-        if ( $tok eq '>' ) {
-            $tok       = '->';
-            $tok_begin = $tok;
-        }
-        $identifier = $tok;
+    if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
+        1;
+    }
+    else {
  
  
-        if ( $tok eq '$' || $tok eq '*' ) {
-            $id_scan_state = '$';
-            $context       = SCALAR_CONTEXT;
-        }
-        elsif ( $tok eq '%' || $tok eq '@' ) {
-            $id_scan_state = '$';
-            $context       = LIST_CONTEXT;
-        }
-        elsif ( $tok eq '&' ) {
-            $id_scan_state = '&';
-        }
-        elsif ( $tok eq 'sub' or $tok eq 'package' ) {
-            $saw_alpha     = 0;     # 'sub' is considered type info here
-            $id_scan_state = '$';
-            $identifier .= ' ';     # need a space to separate sub from sub name
-        }
-        elsif ( $tok eq '::' ) {
-            $id_scan_state = 'A';
-        }
-        elsif ( $tok =~ /^[A-Za-z_]/ ) {
-            $id_scan_state = ':';
-        }
-        elsif ( $tok eq '->' ) {
-            $id_scan_state = '$';
+        if ( $next_nonblank_token =~ /^\s*$/ ) {
+            0;
          }
          else {
          }
          else {
-
-            # shouldn't happen
-            my ( $a, $b, $c ) = caller;
-            warning("Program Bug: scan_identifier given bad token = $tok \n");
-            warning("   called from sub $a  line: $c\n");
-            report_definite_bug();
+            -1;
          }
          }
-        $saw_type = !$saw_alpha;
-    }
-    else {
-        $i--;
-        $saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
      }
      }
+}
  
  
-    # now loop to gather the identifier
-    my $i_save = $i;
+sub pattern_expected {
  
  
-    while ( $i < $max_token_index ) {
-        $i_save = $i unless ( $tok =~ /^\s*$/ );
-        $tok    = $$rtokens[ ++$i ];
+    # This is the start of a filter for a possible pattern.
+    # It looks at the token after a possbible pattern and tries to
+    # determine if that token could end a pattern.
+    # returns -
+    #   1 - yes
+    #   0 - can't tell
+    #  -1 - no
+    my ( $i, $rtokens, $max_token_index ) = @_;
+    my $next_token = $$rtokens[ $i + 1 ];
+    if ( $next_token =~ /^[cgimosx]/ ) { $i++; }    # skip possible modifier
+    my ( $next_nonblank_token, $i_next ) =
+      find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
  
-        if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
-            $tok = '::';
-            $i++;
-        }
+    # list of tokens which may follow a pattern
+    # (can probably be expanded)
+    if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
+    {
+        1;
+    }
+    else {
  
  
-        if ( $id_scan_state eq '$' ) {    # starting variable name
+        if ( $next_nonblank_token =~ /^\s*$/ ) {
+            0;
+        }
+        else {
+            -1;
+        }
+    }
+}
  
  
-            if ( $tok eq '$' ) {
+sub find_next_nonblank_token_on_this_line {
+    my ( $i, $rtokens, $max_token_index ) = @_;
+    my $next_nonblank_token;
  
  
-                $identifier .= $tok;
+    if ( $i < $max_token_index ) {
+        $next_nonblank_token = $$rtokens[ ++$i ];
  
  
-                # we've got a punctuation variable if end of line (punct.t)
-                if ( $i == $max_token_index ) {
-                    $type          = 'i';
-                    $id_scan_state = '';
-                    last;
-                }
-            }
-            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';           # now need ::
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $saw_alpha     = 1;
-                $id_scan_state = ':';                 # now need ::
-                $identifier .= $tok;
+        if ( $next_nonblank_token =~ /^\s*$/ ) {
  
  
-                # Perl will accept leading digits in identifiers,
-                # although they may not always produce useful results.
-                # Something like $main::0 is ok.  But this also works:
-                #
-                #  sub howdy::123::bubba{ print "bubba $54321!\n" }
-                #  howdy::123::bubba();
-                #
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {              # numeric
-                $saw_alpha     = 1;
-                $id_scan_state = ':';                 # now need ::
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq '::' ) {
-                $id_scan_state = 'A';
-                $identifier .= $tok;
-            }
-            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
-                $identifier .= $tok;    # keep same state, a $ could follow
+            if ( $i < $max_token_index ) {
+                $next_nonblank_token = $$rtokens[ ++$i ];
              }
              }
-            elsif ( $tok eq '{' ) {
-
-                # check for something like ${#} or ${©}
-                if (   $identifier eq '$'
-                    && $i + 2 <= $max_token_index
-                    && $$rtokens[ $i + 2 ] eq '}'
-                    && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
-                {
-                    my $next2 = $$rtokens[ $i + 2 ];
-                    my $next1 = $$rtokens[ $i + 1 ];
-                    $identifier .= $tok . $next1 . $next2;
-                    $i += 2;
-                    $id_scan_state = '';
-                    last;
-                }
+        }
+    }
+    else {
+        $next_nonblank_token = "";
+    }
+    return ( $next_nonblank_token, $i );
+}
  
  
-                # skip something like ${xxx} or ->{
-                $id_scan_state = '';
+sub find_angle_operator_termination {
  
  
-                # if this is the first token of a line, any tokens for this
-                # identifier have already been accumulated
-                if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
-                $i = $i_save;
-                last;
-            }
+    # We are looking at a '<' and want to know if it is an angle operator.
+    # We are to return:
+    #   $i = pretoken index of ending '>' if found, current $i otherwise
+    #   $type = 'Q' if found, '>' otherwise
+    my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
+    my $i    = $i_beg;
+    my $type = '<';
+    pos($input_line) = 1 + $$rtoken_map[$i];
  
  
-            # space ok after leading $ % * & @
-            elsif ( $tok =~ /^\s*$/ ) {
+    my $filter;
  
  
-                if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
+    # we just have to find the next '>' if a term is expected
+    if ( $expecting == TERM ) { $filter = '[\>]' }
  
  
-                    if ( length($identifier) > 1 ) {
-                        $id_scan_state = '';
-                        $i             = $i_save;
-                        $type          = 'i';    # probably punctuation variable
-                        last;
-                    }
-                    else {
+    # we have to guess if we don't know what is expected
+    elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
  
  
-                        # spaces after $'s are common, and space after @
-                        # is harmless, so only complain about space
-                        # after other type characters. Space after $ and
-                        # @ will be removed in formatting.  Report space
-                        # after % and * because they might indicate a
-                        # parsing error.  In other words '% ' might be a
-                        # modulo operator.  Delete this warning if it
-                        # gets annoying.
-                        if ( $identifier !~ /^[\@\$]$/ ) {
-                            $message =
-                              "Space in identifier, following $identifier\n";
-                        }
-                    }
-                }
+    # shouldn't happen - we shouldn't be here if operator is expected
+    else { warning("Program Bug in find_angle_operator_termination\n") }
  
  
-                # else:
-                # space after '->' is ok
-            }
-            elsif ( $tok eq '^' ) {
+    # To illustrate what we might be looking at, in case we are
+    # guessing, here are some examples of valid angle operators
+    # (or file globs):
+    #  <tmp_imp/*>
+    #  <FH>
+    #  <$fh>
+    #  <*.c *.h>
+    #  <_>
+    #  <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
+    #  <${PREFIX}*img*.$IMAGE_TYPE>
+    #  <img*.$IMAGE_TYPE>
+    #  <Timg*.$IMAGE_TYPE>
+    #  <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
+    #
+    # Here are some examples of lines which do not have angle operators:
+    #  return undef unless $self->[2]++ < $#{$self->[1]};
+    #  < 2  || @$t >
+    #
+    # the following line from dlister.pl caused trouble:
+    #  print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
+    #
+    # If the '<' starts an angle operator, it must end on this line and
+    # it must not have certain characters like ';' and '=' in it.  I use
+    # this to limit the testing.  This filter should be improved if
+    # possible.
  
  
-                # check for some special variables like $^W
-                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
-                    $identifier .= $tok;
-                    $id_scan_state = 'A';
-                }
-                else {
-                    $id_scan_state = '';
-                }
-            }
-            else {    # something else
+    if ( $input_line =~ /($filter)/g ) {
  
  
-                # check for various punctuation variables
-                if ( $identifier =~ /^[\$\*\@\%]$/ ) {
-                    $identifier .= $tok;
-                }
+        if ( $1 eq '>' ) {
  
  
-                elsif ( $identifier eq '$#' ) {
+            # We MAY have found an angle operator termination if we get
+            # here, but we need to do more to be sure we haven't been
+            # fooled.
+            my $pos = pos($input_line);
  
  
-                    if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
+            my $pos_beg = $$rtoken_map[$i];
+            my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
  
  
-                    # perl seems to allow just these: $#: $#- $#+
-                    elsif ( $tok =~ /^[\:\-\+]$/ ) {
-                        $type = 'i';
-                        $identifier .= $tok;
-                    }
-                    else {
-                        $i = $i_save;
-                        write_logfile_entry( 'Use of $# is deprecated' . "\n" );
-                    }
+            # Reject if the closing '>' follows a '-' as in:
+            # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
+            if ( $expecting eq UNKNOWN ) {
+                my $check = substr( $input_line, $pos - 2, 1 );
+                if ( $check eq '-' ) {
+                    return ( $i, $type );
                  }
                  }
-                elsif ( $identifier eq '$$' ) {
+            }
  
  
-                    # perl does not allow references to punctuation
-                    # variables without braces.  For example, this
-                    # won't work:
-                    #  $:=\4;
-                    #  $a = $$:;
-                    # You would have to use
-                    #  $a = ${$:};
+            ######################################debug#####
+            #write_diagnostics( "ANGLE? :$str\n");
+            #print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
+            ######################################debug#####
+            $type = 'Q';
+            my $error;
+            ( $i, $error ) =
+              inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
  
  
-                    $i = $i_save;
-                    if ( $tok eq '{' ) { $type = 't' }
-                    else { $type = 'i' }
-                }
-                elsif ( $identifier eq '->' ) {
-                    $i = $i_save;
-                }
-                else {
-                    $i = $i_save;
-                    if ( length($identifier) == 1 ) { $identifier = ''; }
-                }
-                $id_scan_state = '';
-                last;
+            # It may be possible that a quote ends midway in a pretoken.
+            # If this happens, it may be necessary to split the pretoken.
+            if ($error) {
+                warning(
+                    "Possible tokinization error..please check this line\n");
+                report_possible_bug();
              }
              }
-        }
-        elsif ( $id_scan_state eq '&' ) {    # starting sub call?
  
  
-            if ( $tok =~ /^[\$A-Za-z_]/ ) {    # alphanumeric ..
-                $id_scan_state = ':';          # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # alphanumeric ..
-                $id_scan_state = ':';                 # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {    # numeric..see comments above
-                $id_scan_state = ':';       # now need ::
-                $saw_alpha     = 1;
-                $identifier .= $tok;
-            }
-            elsif ( $tok =~ /^\s*$/ ) {     # allow space
-            }
-            elsif ( $tok eq '::' ) {        # leading ::
-                $id_scan_state = 'A';       # accept alpha next
-                $identifier .= $tok;
+            # Now let's see where we stand....
+            # OK if math op not possible
+            if ( $expecting == TERM ) {
              }
              }
-            elsif ( $tok eq '{' ) {
-                if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
-                $i             = $i_save;
-                $id_scan_state = '';
-                last;
+
+            # OK if there are no more than 2 pre-tokens inside
+            # (not possible to write 2 token math between < and >)
+            # This catches most common cases
+            elsif ( $i <= $i_beg + 3 ) {
+                write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
              }
              }
+
+            # Not sure..
              else {
  
              else {
  
-                # punctuation variable?
-                # testfile: cunningham4.pl
-                if ( $identifier eq '&' ) {
-                    $identifier .= $tok;
+                # Let's try a Brace Test: any braces inside must balance
+                my $br = 0;
+                while ( $str =~ /\{/g ) { $br++ }
+                while ( $str =~ /\}/g ) { $br-- }
+                my $sb = 0;
+                while ( $str =~ /\[/g ) { $sb++ }
+                while ( $str =~ /\]/g ) { $sb-- }
+                my $pr = 0;
+                while ( $str =~ /\(/g ) { $pr++ }
+                while ( $str =~ /\)/g ) { $pr-- }
+
+                # if braces do not balance - not angle operator
+                if ( $br || $sb || $pr ) {
+                    $i    = $i_beg;
+                    $type = '<';
+                    write_diagnostics(
+                        "NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
                  }
                  }
+
+                # we should keep doing more checks here...to be continued
+                # Tentatively accepting this as a valid angle operator.
+                # There are lots more things that can be checked.
                  else {
                  else {
-                    $identifier = '';
-                    $i          = $i_save;
-                    $type       = '&';
+                    write_diagnostics(
+                        "ANGLE-Guessing yes: $str expecting=$expecting\n");
+                    write_logfile_entry("Guessing angle operator here: $str\n");
                  }
                  }
-                $id_scan_state = '';
-                last;
              }
          }
              }
          }
-        elsif ( $id_scan_state eq 'A' ) {    # looking for alpha (after ::)
  
  
-            if ( $tok =~ /^[A-Za-z_]/ ) {    # found it
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {     # numeric..see comments above
-                $identifier .= $tok;
-                $id_scan_state = ':';        # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
-                $id_scan_state = '(';
-                $identifier .= $tok;
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
-                $id_scan_state = ')';
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';
-                $i             = $i_save;
-                last;
+        # didn't find ending >
+        else {
+            if ( $expecting == TERM ) {
+                warning("No ending > for angle operator\n");
              }
          }
              }
          }
-        elsif ( $id_scan_state eq ':' ) {    # looking for :: after alpha
+    }
+    return ( $i, $type );
+}
  
  
-            if ( $tok eq '::' ) {            # got it
-                $identifier .= $tok;
-                $id_scan_state = 'A';        # now require alpha
-            }
-            elsif ( $tok =~ /^[A-Za-z_]/ ) {    # more alphanumeric is ok here
-                $identifier .= $tok;
-                $id_scan_state = ':';           # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok =~ /^[0-9]/ ) {        # numeric..see comments above
-                $identifier .= $tok;
-                $id_scan_state = ':';           # now need ::
-                $saw_alpha     = 1;
-            }
-            elsif ( $tok eq "'" && $allow_tick ) {    # tick
+sub scan_number_do {
  
  
-                if ( $is_keyword{$identifier} ) {
-                    $id_scan_state = '';              # that's all
-                    $i             = $i_save;
-                }
-                else {
-                    $identifier .= $tok;
-                }
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
-                $id_scan_state = '(';
-                $identifier .= $tok;
-            }
-            elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
-                $id_scan_state = ')';
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';        # that's all
-                $i             = $i_save;
-                last;
-            }
-        }
-        elsif ( $id_scan_state eq '(' ) {    # looking for ( of prototype
+    #  scan a number in any of the formats that Perl accepts
+    #  Underbars (_) are allowed in decimal numbers.
+    #  input parameters -
+    #      $input_line  - the string to scan
+    #      $i           - pre_token index to start scanning
+    #    $rtoken_map    - reference to the pre_token map giving starting
+    #                    character position in $input_line of token $i
+    #  output parameters -
+    #    $i            - last pre_token index of the number just scanned
+    #    number        - the number (characters); or undef if not a number
+
+    my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
+    my $pos_beg = $$rtoken_map[$i];
+    my $pos;
+    my $i_begin = $i;
+    my $number  = undef;
+    my $type    = $input_type;
+
+    my $first_char = substr( $input_line, $pos_beg, 1 );
+
+    # Look for bad starting characters; Shouldn't happen..
+    if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
+        warning("Program bug - scan_number given character $first_char\n");
+        report_definite_bug();
+        return ( $i, $type, $number );
+    }
+
+    # handle v-string without leading 'v' character ('Two Dot' rule)
+    # (vstring.t)
+    # TODO: v-strings may contain underscores
+    pos($input_line) = $pos_beg;
+    if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
+        $pos = pos($input_line);
+        my $numc = $pos - $pos_beg;
+        $number = substr( $input_line, $pos_beg, $numc );
+        $type = 'v';
+        report_v_string($number);
+    }
  
  
-            if ( $tok eq '(' ) {             # got it
-                $identifier .= $tok;
-                $id_scan_state = ')';        # now find the end of it
-            }
-            elsif ( $tok =~ /^\s*$/ ) {      # blank - keep going
-                $identifier .= $tok;
-            }
-            else {
-                $id_scan_state = '';         # that's all - no prototype
-                $i             = $i_save;
-                last;
-            }
+    # handle octal, hex, binary
+    if ( !defined($number) ) {
+        pos($input_line) = $pos_beg;
+        if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
+        {
+            $pos = pos($input_line);
+            my $numc = $pos - $pos_beg;
+            $number = substr( $input_line, $pos_beg, $numc );
+            $type = 'n';
          }
          }
-        elsif ( $id_scan_state eq ')' ) {    # looking for ) to end
+    }
  
  
-            if ( $tok eq ')' ) {             # got it
-                $identifier .= $tok;
-                $id_scan_state = '';         # all done
-                last;
-            }
-            elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
-                $identifier .= $tok;
-            }
-            else {    # probable error in script, but keep going
-                warning("Unexpected '$tok' while seeking end of prototype\n");
-                $identifier .= $tok;
+    # handle decimal
+    if ( !defined($number) ) {
+        pos($input_line) = $pos_beg;
+
+        if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
+            $pos = pos($input_line);
+
+            # watch out for things like 0..40 which would give 0. by this;
+            if (   ( substr( $input_line, $pos - 1, 1 ) eq '.' )
+                && ( substr( $input_line, $pos, 1 ) eq '.' ) )
+            {
+                $pos--;
              }
              }
+            my $numc = $pos - $pos_beg;
+            $number = substr( $input_line, $pos_beg, $numc );
+            $type = 'n';
          }
          }
-        else {        # can get here due to error in initialization
-            $id_scan_state = '';
-            $i             = $i_save;
+    }
+
+    # filter out non-numbers like e + - . e2  .e3 +e6
+    # the rule: at least one digit, and any 'e' must be preceded by a digit
+    if (
+        $number !~ /\d/    # no digits
+        || (   $number =~ /^(.*)[eE]/
+            && $1 !~ /\d/ )    # or no digits before the 'e'
+      )
+    {
+        $number = undef;
+        $type   = $input_type;
+        return ( $i, $type, $number );
+    }
+
+    # Found a number; now we must convert back from character position
+    # to pre_token index. An error here implies user syntax error.
+    # An example would be an invalid octal number like '009'.
+    my $error;
+    ( $i, $error ) =
+      inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
+    if ($error) { warning("Possibly invalid number\n") }
+
+    return ( $i, $type, $number );
+}
+
+sub inverse_pretoken_map {
+
+    # Starting with the current pre_token index $i, scan forward until
+    # finding the index of the next pre_token whose position is $pos.
+    my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
+    my $error = 0;
+
+    while ( ++$i <= $max_token_index ) {
+
+        if ( $pos <= $$rtoken_map[$i] ) {
+
+            # Let the calling routine handle errors in which we do not
+            # land on a pre-token boundary.  It can happen by running
+            # perltidy on some non-perl scripts, for example.
+            if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
+            $i--;
              last;
          }
      }
              last;
          }
      }
+    return ( $i, $error );
+}
  
  
-    if ( $id_scan_state eq ')' ) {
-        warning("Hit end of line while seeking ) to end prototype\n");
-    }
+sub find_here_doc {
  
  
-    # once we enter the actual identifier, it may not extend beyond
-    # the end of the current line
-    if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
-        $id_scan_state = '';
+    # find the target of a here document, if any
+    # input parameters:
+    #   $i - token index of the second < of <<
+    #   ($i must be less than the last token index if this is called)
+    # output parameters:
+    #   $found_target = 0 didn't find target; =1 found target
+    #   HERE_TARGET - the target string (may be empty string)
+    #   $i - unchanged if not here doc,
+    #    or index of the last token of the here target
+    #   $saw_error - flag noting unbalanced quote on here target
+    my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
+    my $ibeg                 = $i;
+    my $found_target         = 0;
+    my $here_doc_target      = '';
+    my $here_quote_character = '';
+    my $saw_error            = 0;
+    my ( $next_nonblank_token, $i_next_nonblank, $next_token );
+    $next_token = $$rtokens[ $i + 1 ];
+
+    # perl allows a backslash before the target string (heredoc.t)
+    my $backslash = 0;
+    if ( $next_token eq '\\' ) {
+        $backslash  = 1;
+        $next_token = $$rtokens[ $i + 2 ];
      }
      }
-    if ( $i < 0 ) { $i = 0 }
  
  
-    unless ($type) {
+    ( $next_nonblank_token, $i_next_nonblank ) =
+      find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
  
  
-        if ($saw_type) {
+    if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
  
  
-            if ($saw_alpha) {
-                if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
-                    $type = 'w';
-                }
-                else { $type = 'i' }
-            }
-            elsif ( $identifier eq '->' ) {
-                $type = '->';
+        my $in_quote    = 1;
+        my $quote_depth = 0;
+        my $quote_pos   = 0;
+        my $quoted_string;
+
+        (
+            $i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
+            $here_quote_character, $quote_pos, $quote_depth, $max_token_index );
+
+        if ($in_quote) {    # didn't find end of quote, so no target found
+            $i = $ibeg;
+            if ( $expecting == TERM ) {
+                warning(
+"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
+                );
+                $saw_error = 1;
              }
              }
-            elsif (
-                ( length($identifier) > 1 )
+        }
+        else {              # found ending quote
+            my $j;
+            $found_target = 1;
  
  
-                # In something like '@$=' we have an identifier '@$'
-                # In something like '$${' we have type '$$' (and only
-                # part of an identifier)
-                && !( $identifier =~ /\$$/ && $tok eq '{' )
-                && ( $identifier !~ /^(sub |package )$/ )
-              )
-            {
-                $type = 'i';
+            my $tokj;
+            for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
+                $tokj = $$rtokens[$j];
+
+                # we have to remove any backslash before the quote character
+                # so that the here-doc-target exactly matches this string
+                next
+                  if ( $tokj eq "\\"
+                    && $j < $i - 1
+                    && $$rtokens[ $j + 1 ] eq $here_quote_character );
+                $here_doc_target .= $tokj;
              }
              }
-            else { $type = 't' }
          }
          }
-        elsif ($saw_alpha) {
+    }
  
  
-            # type 'w' includes anything without leading type info
-            # ($,%,@,*) including something like abc::def::ghi
-            $type = 'w';
+    elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
+        $found_target = 1;
+        write_logfile_entry(
+            "found blank here-target after <<; suggest using \"\"\n");
+        $i = $ibeg;
+    }
+    elsif ( $next_token =~ /^\w/ ) {    # simple bareword or integer after <<
+
+        my $here_doc_expected;
+        if ( $expecting == UNKNOWN ) {
+            $here_doc_expected = guess_if_here_doc($next_token);
          }
          else {
          }
          else {
-            $type = '';
-        }    # this can happen on a restart
-    }
+            $here_doc_expected = 1;
+        }
+
+        if ($here_doc_expected) {
+            $found_target    = 1;
+            $here_doc_target = $next_token;
+            $i               = $ibeg + 1;
+        }
  
  
-    if ($identifier) {
-        $tok = $identifier;
-        if ($message) { write_logfile_entry($message) }
      }
      else {
      }
      else {
-        $tok = $tok_begin;
-        $i   = $i_begin;
+
+        if ( $expecting == TERM ) {
+            $found_target = 1;
+            write_logfile_entry("Note: bare here-doc operator <<\n");
+        }
+        else {
+            $i = $ibeg;
+        }
      }
  
      }
  
-    TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
-        my ( $a, $b, $c ) = caller;
-        print
-"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
-        print
-"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
-    };
-    return ( $i, $tok, $type, $id_scan_state, $identifier );
+    # patch to neglect any prepended backslash
+    if ( $found_target && $backslash ) { $i++ }
+
+    return ( $found_target, $here_doc_target, $here_quote_character, $i,
+        $saw_error );
+}
+
+sub do_quote {
+
+    # follow (or continue following) quoted string(s)
+    # $in_quote return code:
+    #   0 - ok, found end
+    #   1 - still must find end of quote whose target is $quote_character
+    #   2 - still looking for end of first of two quotes
+    #
+    # Returns updated strings:
+    #  $quoted_string_1 = quoted string seen while in_quote=1
+    #  $quoted_string_2 = quoted string seen while in_quote=2
+    my (
+        $i,               $in_quote,    $quote_character,
+        $quote_pos,       $quote_depth, $quoted_string_1,
+        $quoted_string_2, $rtokens,     $rtoken_map,
+        $max_token_index
+    ) = @_;
+
+    my $in_quote_starting = $in_quote;
+
+    my $quoted_string;
+    if ( $in_quote == 2 ) {    # two quotes/quoted_string_1s to follow
+        my $ibeg = $i;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
+        $quoted_string_2 .= $quoted_string;
+        if ( $in_quote == 1 ) {
+            if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
+            $quote_character = '';
+        }
+        else {
+            $quoted_string_2 .= "\n";
+        }
+    }
+
+    if ( $in_quote == 1 ) {    # one (more) quote to follow
+        my $ibeg = $i;
+        (
+            $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+            $quoted_string
+          )
+          = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
+            $quote_pos, $quote_depth, $max_token_index );
+        $quoted_string_1 .= $quoted_string;
+        if ( $in_quote == 1 ) {
+            $quoted_string_1 .= "\n";
+        }
+    }
+    return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
+        $quoted_string_1, $quoted_string_2 );
  }
  
  sub follow_quoted_string {
  }
  
  sub follow_quoted_string {
@@ -24806,10 +25507,13 @@ sub follow_quoted_string {
      #   $beginning_tok = the starting quote character
      #   $quote_pos = index to check next for alphanumeric delimiter
      #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
      #   $beginning_tok = the starting quote character
      #   $quote_pos = index to check next for alphanumeric delimiter
      #   $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
-    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth )
+    #   $quoted_string = the text of the quote (without quotation tokens)
+    my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
+        $max_token_index )
        = @_;
      my ( $tok, $end_tok );
        = @_;
      my ( $tok, $end_tok );
-    my $i = $i_beg - 1;
+    my $i             = $i_beg - 1;
+    my $quoted_string = "";
  
      TOKENIZER_DEBUG_FLAG_QUOTE && do {
          print
  
      TOKENIZER_DEBUG_FLAG_QUOTE && do {
          print
@@ -24861,8 +25565,10 @@ sub follow_quoted_string {
      # characters, whereas for a non-alphanumeric delimiter, only tokens of
      # length 1 can match.
  
      # characters, whereas for a non-alphanumeric delimiter, only tokens of
      # length 1 can match.
  
-    # loop for case of alphanumeric quote delimiter..
+    ###################################################################
+    # Case 1 (rare): loop for case of alphanumeric quote delimiter..
      # "quote_pos" is the position the current word to begin searching
      # "quote_pos" is the position the current word to begin searching
+    ###################################################################
      if ( $beginning_tok =~ /\w/ ) {
  
          # Note this because it is not recommended practice except
      if ( $beginning_tok =~ /\w/ ) {
  
          # Note this because it is not recommended practice except
@@ -24879,10 +25585,12 @@ sub follow_quoted_string {
  
                  if ( $tok eq '\\' ) {
  
  
                  if ( $tok eq '\\' ) {
  
+                    # retain backslash unless it hides the end token
+                    $quoted_string .= $tok
+                      unless $$rtokens[ $i + 1 ] eq $end_tok;
                      $quote_pos++;
                      last if ( $i >= $max_token_index );
                      $tok = $$rtokens[ ++$i ];
                      $quote_pos++;
                      last if ( $i >= $max_token_index );
                      $tok = $$rtokens[ ++$i ];
-
                  }
              }
              my $old_pos = $quote_pos;
                  }
              }
              my $old_pos = $quote_pos;
@@ -24895,6 +25603,9 @@ sub follow_quoted_string {
  
              if ( $quote_pos > 0 ) {
  
  
              if ( $quote_pos > 0 ) {
  
+                $quoted_string .=
+                  substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
+
                  $quote_depth--;
  
                  if ( $quote_depth == 0 ) {
                  $quote_depth--;
  
                  if ( $quote_depth == 0 ) {
@@ -24902,10 +25613,15 @@ sub follow_quoted_string {
                      last;
                  }
              }
                      last;
                  }
              }
+            else {
+                $quoted_string .= substr( $tok, $old_pos );
+            }
          }
      }
  
          }
      }
  
-    # loop for case of a non-alphanumeric quote delimiter..
+    ########################################################################
+    # Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
+    ########################################################################
      else {
  
          while ( $i < $max_token_index ) {
      else {
  
          while ( $i < $max_token_index ) {
@@ -24923,12 +25639,188 @@ sub follow_quoted_string {
                  $quote_depth++;
              }
              elsif ( $tok eq '\\' ) {
                  $quote_depth++;
              }
              elsif ( $tok eq '\\' ) {
-                $i++;
+
+                # retain backslash unless it hides the beginning or end token
+                $tok = $$rtokens[ ++$i ];
+                $quoted_string .= '\\'
+                  unless ( $tok eq $end_tok || $tok eq $beginning_tok );
              }
              }
+            $quoted_string .= $tok;
          }
      }
      if ( $i > $max_token_index ) { $i = $max_token_index }
          }
      }
      if ( $i > $max_token_index ) { $i = $max_token_index }
-    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth );
+    return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
+        $quoted_string );
+}
+
+sub indicate_error {
+    my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
+    interrupt_logfile();
+    warning($msg);
+    write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
+    resume_logfile();
+}
+
+sub write_error_indicator_pair {
+    my ( $line_number, $input_line, $pos, $carrat ) = @_;
+    my ( $offset, $numbered_line, $underline ) =
+      make_numbered_line( $line_number, $input_line, $pos );
+    $underline = write_on_underline( $underline, $pos - $offset, $carrat );
+    warning( $numbered_line . "\n" );
+    $underline =~ s/\s*$//;
+    warning( $underline . "\n" );
+}
+
+sub make_numbered_line {
+
+    #  Given an input line, its line number, and a character position of
+    #  interest, create a string not longer than 80 characters of the form
+    #     $lineno: sub_string
+    #  such that the sub_string of $str contains the position of interest
+    #
+    #  Here is an example of what we want, in this case we add trailing
+    #  '...' because the line is long.
+    #
+    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+    #
+    #  Here is another example, this time in which we used leading '...'
+    #  because of excessive length:
+    #
+    # 2: ... er of the World Wide Web Consortium's
+    #
+    #  input parameters are:
+    #   $lineno = line number
+    #   $str = the text of the line
+    #   $pos = position of interest (the error) : 0 = first character
+    #
+    #   We return :
+    #     - $offset = an offset which corrects the position in case we only
+    #       display part of a line, such that $pos-$offset is the effective
+    #       position from the start of the displayed line.
+    #     - $numbered_line = the numbered line as above,
+    #     - $underline = a blank 'underline' which is all spaces with the same
+    #       number of characters as the numbered line.
+
+    my ( $lineno, $str, $pos ) = @_;
+    my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
+    my $excess = length($str) - $offset - 68;
+    my $numc   = ( $excess > 0 ) ? 68 : undef;
+
+    if ( defined($numc) ) {
+        if ( $offset == 0 ) {
+            $str = substr( $str, $offset, $numc - 4 ) . " ...";
+        }
+        else {
+            $str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
+        }
+    }
+    else {
+
+        if ( $offset == 0 ) {
+        }
+        else {
+            $str = "... " . substr( $str, $offset + 4 );
+        }
+    }
+
+    my $numbered_line = sprintf( "%d: ", $lineno );
+    $offset -= length($numbered_line);
+    $numbered_line .= $str;
+    my $underline = " " x length($numbered_line);
+    return ( $offset, $numbered_line, $underline );
+}
+
+sub write_on_underline {
+
+    # The "underline" is a string that shows where an error is; it starts
+    # out as a string of blanks with the same length as the numbered line of
+    # code above it, and we have to add marking to show where an error is.
+    # In the example below, we want to write the string '--^' just below
+    # the line of bad code:
+    #
+    # 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
+    #                 ---^
+    # We are given the current underline string, plus a position and a
+    # string to write on it.
+    #
+    # In the above example, there will be 2 calls to do this:
+    # First call:  $pos=19, pos_chr=^
+    # Second call: $pos=16, pos_chr=---
+    #
+    # This is a trivial thing to do with substr, but there is some
+    # checking to do.
+
+    my ( $underline, $pos, $pos_chr ) = @_;
+
+    # check for error..shouldn't happen
+    unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
+        return $underline;
+    }
+    my $excess = length($pos_chr) + $pos - length($underline);
+    if ( $excess > 0 ) {
+        $pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
+    }
+    substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
+    return ($underline);
+}
+
+sub pre_tokenize {
+
+    # Break a string, $str, into a sequence of preliminary tokens.  We
+    # are interested in these types of tokens:
+    #   words       (type='w'),            example: 'max_tokens_wanted'
+    #   digits      (type = 'd'),          example: '0755'
+    #   whitespace  (type = 'b'),          example: '   '
+    #   any other single character (i.e. punct; type = the character itself).
+    # We cannot do better than this yet because we might be in a quoted
+    # string or pattern.  Caller sets $max_tokens_wanted to 0 to get all
+    # tokens.
+    my ( $str, $max_tokens_wanted ) = @_;
+
+    # we return references to these 3 arrays:
+    my @tokens    = ();     # array of the tokens themselves
+    my @token_map = (0);    # string position of start of each token
+    my @type      = ();     # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
+
+    do {
+
+        # whitespace
+        if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
+
+        # numbers
+        # note that this must come before words!
+        elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
+
+        # words
+        elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
+
+        # single-character punctuation
+        elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
+
+        # that's all..
+        else {
+            return ( \@tokens, \@token_map, \@type );
+        }
+
+        push @tokens,    $1;
+        push @token_map, pos($str);
+
+    } while ( --$max_tokens_wanted != 0 );
+
+    return ( \@tokens, \@token_map, \@type );
+}
+
+sub show_tokens {
+
+    # this is an old debug routine
+    my ( $rtokens, $rtoken_map ) = @_;
+    my $num = scalar(@$rtokens);
+    my $i;
+
+    for ( $i = 0 ; $i < $num ; $i++ ) {
+        my $len = length( $$rtokens[$i] );
+        print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
+    }
  }
  
  sub matching_end_token {
  }
  
  sub matching_end_token {
@@ -24953,15 +25845,91 @@ sub matching_end_token {
      }
  }
  
      }
  }
  
+sub dump_token_types {
+    my $class = shift;
+    my $fh    = shift;
+
+    # This should be the latest list of token types in use
+    # adding NEW_TOKENS: add a comment here
+    print $fh <<'END_OF_LIST';
+
+Here is a list of the token types currently used for lines of type 'CODE'.  
+For the following tokens, the "type" of a token is just the token itself.  
+
+.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
+( ) <= >= == =~ !~ != ++ -- /= x=
+... **= <<= >>= &&= ||= //= <=> 
+, + - / * | % ! x ~ = \ ? : . < > ^ &
+
+The following additional token types are defined:
+
+ type    meaning
+    b    blank (white space) 
+    {    indent: opening structural curly brace or square bracket or paren
+         (code block, anonymous hash reference, or anonymous array reference)
+    }    outdent: right structural curly brace or square bracket or paren
+    [    left non-structural square bracket (enclosing an array index)
+    ]    right non-structural square bracket
+    (    left non-structural paren (all but a list right of an =)
+    )    right non-structural parena
+    L    left non-structural curly brace (enclosing a key)
+    R    right non-structural curly brace 
+    ;    terminal semicolon
+    f    indicates a semicolon in a "for" statement
+    h    here_doc operator <<
+    #    a comment
+    Q    indicates a quote or pattern
+    q    indicates a qw quote block
+    k    a perl keyword
+    C    user-defined constant or constant function (with void prototype = ())
+    U    user-defined function taking parameters
+    G    user-defined function taking block parameter (like grep/map/eval)
+    M    (unused, but reserved for subroutine definition name)
+    P    (unused, but -html uses it to label pod text)
+    t    type indicater such as %,$,@,*,&,sub
+    w    bare word (perhaps a subroutine call)
+    i    identifier of some type (with leading %, $, @, *, &, sub, -> )
+    n    a number
+    v    a v-string
+    F    a file test operator (like -e)
+    Y    File handle
+    Z    identifier in indirect object slot: may be file handle, object
+    J    LABEL:  code block label
+    j    LABEL after next, last, redo, goto
+    p    unary +
+    m    unary -
+    pp   pre-increment operator ++
+    mm   pre-decrement operator -- 
+    A    : used as attribute separator
+    
+    Here are the '_line_type' codes used internally:
+    SYSTEM         - system-specific code before hash-bang line
+    CODE           - line of perl code (including comments)
+    POD_START      - line starting pod, such as '=head'
+    POD            - pod documentation text
+    POD_END        - last line of pod section, '=cut'
+    HERE           - text of here-document
+    HERE_END       - last line of here-doc (target word)
+    FORMAT         - format section
+    FORMAT_END     - last line of format section, '.'
+    DATA_START     - __DATA__ line
+    DATA           - unidentified text following __DATA__
+    END_START      - __END__ line
+    END            - unidentified text following __END__
+    ERROR          - we are in big trouble, probably not a perl script
+END_OF_LIST
+}
+
  BEGIN {
  
      # These names are used in error messages
      @opening_brace_names = qw# '{' '[' '(' '?' #;
      @closing_brace_names = qw# '}' ']' ')' ':' #;
  
  BEGIN {
  
      # These names are used in error messages
      @opening_brace_names = qw# '{' '[' '(' '?' #;
      @closing_brace_names = qw# '}' ']' ')' ':' #;
  
+    ## TESTING: added ~~
      my @digraphs = qw(
        .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
      my @digraphs = qw(
        .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
-      <= >= == =~ !~ != ++ -- /= x=
+      <= >= == =~ !~ != ++ -- /= x= ~~
      );
      @is_digraph{@digraphs} = (1) x scalar(@digraphs);
  
      );
      @is_digraph{@digraphs} = (1) x scalar(@digraphs);
  
@@ -24992,7 +25960,7 @@ BEGIN {
      @is_block_operator{@_} = (1) x scalar(@_);
  
      # these functions allow an identifier in the indirect object slot
      @is_block_operator{@_} = (1) x scalar(@_);
  
      # these functions allow an identifier in the indirect object slot
-    @_ = qw( print printf sort exec system );
+    @_ = qw( print printf sort exec system say);
      @is_indirect_object_taker{@_} = (1) x scalar(@_);
  
      # These tokens may precede a code block
      @is_indirect_object_taker{@_} = (1) x scalar(@_);
  
      # These tokens may precede a code block
@@ -25225,9 +26193,14 @@ BEGIN {
        given
        when
        err
        given
        when
        err
+      say
      );
  
      );
  
-    # patched above for SWITCH/CASE
+    # patched above for SWITCH/CASE given/when err say
+    # 'err' is a fairly safe addition.
+    # TODO: 'default' still needed if appropriate
+    # 'use feature' seen, but perltidy works ok without it.
+    # Concerned that 'default' could break code.
      push( @Keywords, @value_requestor );
  
      # These are treated the same but are not keywords:
      push( @Keywords, @value_requestor );
  
      # These are treated the same but are not keywords:
@@ -25282,7 +26255,7 @@ BEGIN {
  
      # these token TYPES expect trailing operator but not a term
      # note: ++ and -- are post-increment and decrement, 'C' = constant
  
      # these token TYPES expect trailing operator but not a term
      # note: ++ and -- are post-increment and decrement, 'C' = constant
-    my @operator_requestor_types = qw( ++ -- C );
+    my @operator_requestor_types = qw( ++ -- C <> q );
      @expecting_operator_types{@operator_requestor_types} =
        (1) x scalar(@operator_requestor_types);
  
      @expecting_operator_types{@operator_requestor_types} =
        (1) x scalar(@operator_requestor_types);
  
@@ -25292,14 +26265,19 @@ BEGIN {
      my @value_requestor_type = qw#
        L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
        **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
      my @value_requestor_type = qw#
        L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
        **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
-      <= >= == != => \ > < % * / ? & | ** <=>
-      f F pp mm Y p m U J G
+      <= >= == != => \ > < % * / ? & | ** <=> ~~
+      f F pp mm Y p m U J G j >> << ^ t
        #;
      push( @value_requestor_type, ',' )
        ;    # (perl doesn't like a ',' in a qw block)
      @expecting_term_types{@value_requestor_type} =
        (1) x scalar(@value_requestor_type);
  
        #;
      push( @value_requestor_type, ',' )
        ;    # (perl doesn't like a ',' in a qw block)
      @expecting_term_types{@value_requestor_type} =
        (1) x scalar(@value_requestor_type);
  
+    # Note: the following valid token types are not assigned here to
+    # hashes requesting to be followed by values or terms, but are
+    # instead currently hard-coded into sub operator_expected:
+    # ) -> :: Q R Z ] b h i k n v w } #
+
      # For simple syntax checking, it is nice to have a list of operators which
      # will really be unhappy if not followed by a term.  This includes most
      # of the above...
      # For simple syntax checking, it is nice to have a list of operators which
      # will really be unhappy if not followed by a term.  This includes most
      # of the above...
@@ -25745,7 +26723,7 @@ to perltidy.
  
  =head1 VERSION
  
  
  =head1 VERSION
  
-This man page documents Perl::Tidy version 20060614.
+This man page documents Perl::Tidy version 20060719.
  
  =head1 AUTHOR
  
  
  =head1 AUTHOR
author	don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>
	Sat, 29 Jul 2006 05:04:15 +0000 (05:04 +0000)
committer	don <don@8f7917da-ec0b-0410-a553-b9b0e350d17e>
	Sat, 29 Jul 2006 05:04:15 +0000 (05:04 +0000)
BUGS		patch \| blob \| history
CHANGES		patch \| blob \| history
META.yml		patch \| blob \| history
TODO		patch \| blob \| history
bin/perltidy		patch \| blob \| history
debian/changelog		patch \| blob \| history
docs/perltidy.1		patch \| blob \| history
lib/Perl/Tidy.pm		patch \| blob \| history