New upstream version 20160302

[perltidy.git] / lib / Perl / Tidy.pm
diff --git a/lib/Perl/Tidy.pm b/lib/Perl/Tidy.pm

index c326a8f69718f15df1c444dc625708e87df6c73e..2b0df0ebb207c656ed6471bcbd07b3082c59de84 100644 (file)
--- a/lib/Perl/Tidy.pm
+++ b/lib/Perl/Tidy.pm
@@ -3,7 +3,7 @@
  #
  #    perltidy - a perl script indenter and formatter
  #
-#    Copyright (c) 2000-2014 by Steve Hancock
+#    Copyright (c) 2000-2016 by Steve Hancock
  #    Distributed under the GPL license agreement; see file COPYING
  #
  #    This program is free software; you can redistribute it and/or modify
@@ -53,9 +53,11 @@
  ############################################################
  
  package Perl::Tidy;
-use 5.004;    # need IO::File from 5.004 or later
-BEGIN { $^W = 1; }    # turn on warnings
  
+# Actually should use a version later than about 5.8.5 to use
+# wide characters.
+use 5.004;    # need IO::File from 5.004 or later
+use warnings;
  use strict;
  use Exporter;
  use Carp;
@@ -67,19 +69,21 @@ use vars qw{
    @EXPORT
    $missing_file_spec
    $fh_stderr
+  $rOpts_character_encoding
  };
  
  @ISA    = qw( Exporter );
  @EXPORT = qw( &perltidy );
  
  use Cwd;
+use Encode ();
  use IO::File;
  use File::Basename;
  use File::Copy;
  use File::Temp qw(tempfile);
  
  BEGIN {
-    ( $VERSION = q($Id: Tidy.pm,v 1.74 2014/03/28 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+    ( $VERSION = q($Id: Tidy.pm,v 1.74 2016/03/02 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
  }
  
  sub streamhandle {
@@ -121,7 +125,10 @@ sub streamhandle {
              # skipped and we can just let it crash if there is no
              # getline.
              if ( $mode =~ /[rR]/ ) {
-                if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+
+                # RT#97159; part 1 of 2: updated to use 'can'
+                ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+                if ( $ref->can('getline') ) {
                      $New = sub { $filename };
                  }
                  else {
@@ -138,7 +145,10 @@ EOM
              # Accept an object with a print method for writing.
              # See note above about IO::File
              if ( $mode =~ /[wW]/ ) {
-                if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+
+                # RT#97159; part 2 of 2: updated to use 'can'
+                ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+                if ( $ref->can('print') ) {
                      $New = sub { $filename };
                  }
                  else {
@@ -165,6 +175,7 @@ EOM
      }
      $fh = $New->( $filename, $mode )
        or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
+
      return $fh, ( $ref or $filename );
  }
  
@@ -459,16 +470,17 @@ EOM
      #---------------------------------------------------------------
      # get command line options
      #---------------------------------------------------------------
-    my (
-        $rOpts,       $config_file,      $rraw_options,
-        $saw_extrude, $saw_pbp,          $roption_string,
-        $rexpansion,  $roption_category, $roption_range
-      )
+    my ( $rOpts, $config_file, $rraw_options, $roption_string,
+        $rexpansion, $roption_category, $roption_range )
        = process_command_line(
          $perltidyrc_stream,  $is_Windows, $Windows_type,
          $rpending_complaint, $dump_options_type,
        );
  
+    my $saw_extrude = ( grep m/^-extrude$/, @$rraw_options ) ? 1 : 0;
+    my $saw_pbp =
+      ( grep m/^-(pbp|perl-best-practices)$/, @$rraw_options ) ? 1 : 0;
+
      #---------------------------------------------------------------
      # Handle requests to dump information
      #---------------------------------------------------------------
@@ -544,6 +556,8 @@ EOM
          user => '',
      );
  
+    $rOpts_character_encoding = $rOpts->{'character-encoding'};
+
      # be sure we have a valid output format
      unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
          my $formats = join ' ',
@@ -794,12 +808,33 @@ EOM
          # Prefilters and postfilters: The prefilter is a code reference
          # that will be applied to the source before tidying, and the
          # postfilter is a code reference to the result before outputting.
-        if ($prefilter) {
+        if (
+            $prefilter
+            || (   $rOpts_character_encoding
+                && $rOpts_character_encoding eq 'utf8' )
+          )
+        {
              my $buf = '';
              while ( my $line = $source_object->get_line() ) {
                  $buf .= $line;
              }
-            $buf = $prefilter->($buf);
+
+            $buf = $prefilter->($buf) if $prefilter;
+
+            if (   $rOpts_character_encoding
+                && $rOpts_character_encoding eq 'utf8'
+                && !utf8::is_utf8($buf) )
+            {
+                eval {
+                    $buf = Encode::decode( 'UTF-8', $buf,
+                        Encode::FB_CROAK | Encode::LEAVE_SRC );
+                };
+                if ($@) {
+                    Warn
+"skipping file: $input_file: Unable to decode source as UTF-8\n";
+                    next;
+                }
+            }
  
              $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
                  $rpending_logfile_message );
@@ -898,9 +933,9 @@ EOM
          # Eventually all I/O may be done with binmode, but for now it is
          # only done when a user requests a particular line separator
          # through the -ple or -ole flags
-        my $binmode = 0;
-        if   ( defined($line_separator) ) { $binmode        = 1 }
-        else                              { $line_separator = "\n" }
+        my $binmode = defined($line_separator)
+          || defined($rOpts_character_encoding);
+        $line_separator = "\n" unless defined($line_separator);
  
          my ( $sink_object, $postfilter_buffer );
          if ($postfilter) {
@@ -1046,6 +1081,7 @@ EOM
                  look_for_autoloader => $rOpts->{'look-for-autoloader'},
                  look_for_selfloader => $rOpts->{'look-for-selfloader'},
                  trim_qw             => $rOpts->{'trim-qw'},
+                extended_syntax     => $rOpts->{'extended-syntax'},
  
                  continuation_indentation =>
                    $rOpts->{'continuation-indentation'},
@@ -1304,7 +1340,7 @@ sub get_stream_as_named_file {
              my ( $fh_stream, $fh_name ) =
                Perl::Tidy::streamhandle( $stream, 'r' );
              if ($fh_stream) {
-                my ( $fout, $tmpnam ) = tempfile();
+                my ( $fout, $tmpnam ) = File::Temp::tempfile();
                  if ($fout) {
                      $fname      = $tmpnam;
                      $is_tmpfile = 1;
@@ -1528,6 +1564,7 @@ sub generate_options {
      $add_option->( 'standard-error-output',      'se',    '!' );
      $add_option->( 'standard-output',            'st',    '!' );
      $add_option->( 'warning-output',             'w',     '!' );
+    $add_option->( 'character-encoding',         'enc',   '=s' );
  
      # options which are both toggle switches and values moved here
      # to hide from tidyview (which does not show category 0 flags):
@@ -1549,6 +1586,7 @@ sub generate_options {
      $add_option->( 'preserve-line-endings',        'ple',  '!' );
      $add_option->( 'tabs',                         't',    '!' );
      $add_option->( 'default-tabsize',              'dt',   '=i' );
+    $add_option->( 'extended-syntax',              'xs',   '!' );
  
      ########################################
      $category = 2;    # Code indentation control
@@ -1759,6 +1797,7 @@ sub generate_options {
      %option_range = (
          'format'             => [ 'tidy', 'html', 'user' ],
          'output-line-ending' => [ 'dos',  'win',  'mac', 'unix' ],
+        'character-encoding' => [ 'none', 'utf8' ],
  
          'block-brace-tightness'    => [ 0, 2 ],
          'brace-tightness'          => [ 0, 2 ],
@@ -1821,6 +1860,7 @@ sub generate_options {
        continuation-indentation=2
        delete-old-newlines
        delete-semicolons
+      extended-syntax
        fuzzy-line-length
        hanging-side-comments
        indent-block-comments
@@ -1845,6 +1885,7 @@ sub generate_options {
        nostatic-side-comments
        notabs
        nowarning-output
+      character-encoding=none
        outdent-labels
        outdent-long-quotes
        outdent-long-comments
@@ -1910,6 +1951,9 @@ sub generate_options {
          'nhtml' => [qw(format=tidy)],
          'tidy'  => [qw(format=tidy)],
  
+        'utf8' => [qw(character-encoding=utf8)],
+        'UTF8' => [qw(character-encoding=utf8)],
+
          'swallow-optional-blank-lines'   => [qw(kbl=0)],
          'noswallow-optional-blank-lines' => [qw(kbl=1)],
          'sob'                            => [qw(kbl=0)],
@@ -1968,7 +2012,7 @@ sub generate_options {
          'sct'                    => [qw(scp schb scsb)],
          'stack-closing-tokens'   => => [qw(scp schb scsb)],
          'nsct'                   => [qw(nscp nschb nscsb)],
-        'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
+        'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
  
          'sac'                    => [qw(sot sct)],
          'nsac'                   => [qw(nsot nsct)],
@@ -2164,8 +2208,6 @@ sub _process_command_line {
      my @raw_options        = ();
      my $config_file        = "";
      my $saw_ignore_profile = 0;
-    my $saw_extrude        = 0;
-    my $saw_pbp            = 0;
      my $saw_dump_profile   = 0;
      my $i;
  
@@ -2214,12 +2256,6 @@ sub _process_command_line {
          elsif ( $i =~ /^-(pro|profile)=?$/ ) {
              Die "usage: -pro=filename or --profile=filename, no spaces\n";
          }
-        elsif ( $i =~ /^-extrude$/ ) {
-            $saw_extrude = 1;
-        }
-        elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) {
-            $saw_pbp = 1;
-        }
          elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
              usage();
              Exit 0;
@@ -2299,10 +2335,9 @@ EOM
  
          if ($fh_config) {
  
-            my ( $rconfig_list, $death_message, $_saw_pbp ) =
+            my ( $rconfig_list, $death_message ) =
                read_config_file( $fh_config, $config_file, $rexpansion );
              Die $death_message if ($death_message);
-            $saw_pbp ||= $_saw_pbp;
  
              # process any .perltidyrc parameters right now so we can
              # localize errors
@@ -2380,12 +2415,9 @@ EOM
          Die "Error on command line; for help try 'perltidy -h'\n";
      }
  
-    return (
-        \%Opts,       $config_file,      \@raw_options,
-        $saw_extrude, $saw_pbp,          $roption_string,
-        $rexpansion,  $roption_category, $roption_range
-    );
-}    # end of process_command_line
+    return ( \%Opts, $config_file, \@raw_options, $roption_string,
+        $rexpansion, $roption_category, $roption_range );
+}    # end of _process_command_line
  
  sub check_options {
  
@@ -3005,13 +3037,13 @@ sub read_config_file {
  
      my ( $fh, $config_file, $rexpansion ) = @_;
      my @config_list = ();
-    my $saw_pbp;
  
      # file is bad if non-empty $death_message is returned
      my $death_message = "";
  
      my $name = undef;
      my $line_no;
+    my $opening_brace_line;
      while ( my $line = $fh->getline() ) {
          $line_no++;
          chomp $line;
@@ -3022,69 +3054,86 @@ sub read_config_file {
          $line =~ s/^\s*(.*?)\s*$/$1/;    # trim both ends
          next unless $line;
  
-        # look for something of the general form
-        #    newname { body }
-        # or just
-        #    body
-
          my $body = $line;
-        my ($newname);
-        if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
-            ( $newname, $body ) = ( $2, $3, );
-        }
-        if ($body) {
+        my $newname;
  
-            if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) {
-                $saw_pbp = 1;
-            }
+        # Look for complete or partial abbreviation definition of the form
+        #     name { body }   or  name {   or    name { body
+        # See rules in perltidy's perldoc page
+        # Section: Other Controls - Creating a new abbreviation
+        if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
+            my $oldname = $name;
+            ( $name, $body ) = ( $2, $3 );
+
+            # Cannot start new abbreviation unless old abbreviation is complete
+            last if ($opening_brace_line);
+
+            $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
  
              # handle a new alias definition
-            if ($newname) {
-                if ($name) {
-                    $death_message =
-"No '}' seen after $name and before $newname in config file $config_file line $.\n";
-                    last;
-                }
-                $name = $newname;
+            if ( ${$rexpansion}{$name} ) {
+                local $" = ')(';
+                my @names = sort keys %$rexpansion;
+                $death_message =
+                    "Here is a list of all installed aliases\n(@names)\n"
+                  . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
+                last;
+            }
+            ${$rexpansion}{$name} = [];
+        }
  
-                if ( ${$rexpansion}{$name} ) {
-                    local $" = ')(';
-                    my @names = sort keys %$rexpansion;
-                    $death_message =
-                        "Here is a list of all installed aliases\n(@names)\n"
-                      . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
-                    last;
-                }
-                ${$rexpansion}{$name} = [];
+        # leading opening braces not allowed
+        elsif ( $line =~ /^{/ ) {
+            $opening_brace_line = undef;
+            $death_message =
+              "Unexpected '{' at line $line_no in config file '$config_file'\n";
+            last;
+        }
+
+        # Look for abbreviation closing:    body }   or    }
+        elsif ( $line =~ /^(.*)?\}$/ ) {
+            $body = $1;
+            if ($opening_brace_line) {
+                $opening_brace_line = undef;
              }
+            else {
+                $death_message =
+"Unexpected '}' at line $line_no in config file '$config_file'\n";
+                last;
+            }
+        }
  
-            # now do the body
-            if ($body) {
+        # Now store any parameters
+        if ($body) {
  
-                my ( $rbody_parts, $msg ) = parse_args($body);
-                if ($msg) {
-                    $death_message = <<EOM;
+            my ( $rbody_parts, $msg ) = parse_args($body);
+            if ($msg) {
+                $death_message = <<EOM;
  Error reading file '$config_file' at line number $line_no.
  $msg
  Please fix this line or use -npro to avoid reading this file
  EOM
-                    last;
-                }
+                last;
+            }
  
-                if ($name) {
+            if ($name) {
  
-                    # remove leading dashes if this is an alias
-                    foreach (@$rbody_parts) { s/^\-+//; }
-                    push @{ ${$rexpansion}{$name} }, @$rbody_parts;
-                }
-                else {
-                    push( @config_list, @$rbody_parts );
-                }
+                # remove leading dashes if this is an alias
+                foreach (@$rbody_parts) { s/^\-+//; }
+                push @{ ${$rexpansion}{$name} }, @$rbody_parts;
+            }
+            else {
+                push( @config_list, @$rbody_parts );
              }
          }
      }
+
+    if ($opening_brace_line) {
+        $death_message =
+"Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
+    }
      eval { $fh->close() };
-    return ( \@config_list, $death_message, $saw_pbp );
+    return ( \@config_list, $death_message );
  }
  
  sub strip_comment {
@@ -3300,7 +3349,7 @@ sub show_version {
      print STDOUT <<"EOM";
  This is perltidy, v$VERSION 
  
-Copyright 2000-2014, Steve Hancock
+Copyright 2000-2016, Steve Hancock
  
  Perltidy is free software and may be copied under the terms of the GNU
  General Public License, which is included in the distribution files.
@@ -3907,7 +3956,12 @@ sub new {
          $output_file_open = 1;
          if ($binmode) {
              if ( ref($fh) eq 'IO::File' ) {
-                binmode $fh;
+                if (   $rOpts->{'character-encoding'}
+                    && $rOpts->{'character-encoding'} eq 'utf8' )
+                {
+                    binmode $fh, ":encoding(UTF-8)";
+                }
+                else { binmode $fh }
              }
              if ( $output_file eq '-' ) { binmode STDOUT }
          }
@@ -4351,6 +4405,7 @@ sub warning {
              Perl::Tidy::Warn "## Please see file $filename\n"
                unless ref($warning_file);
              $self->{_fh_warnings} = $fh_warnings;
+            $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
          }
  
          if ( $warning_count < WARNING_LIMIT ) {
@@ -5128,7 +5183,7 @@ sub pod_to_html {
      }
  
      # Pod::Html requires a real temporary filename
-    my ( $fh_tmp, $tmpfile ) = tempfile();
+    my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
      unless ($fh_tmp) {
          Perl::Tidy::Warn
            "unable to open temporary file $tmpfile; cannot use pod2html\n";
@@ -6108,6 +6163,7 @@ use vars qw{
    %is_assignment
    %is_chain_operator
    %is_if_unless_and_or_last_next_redo_return
+  %ok_to_add_semicolon_for_block_type
  
    @has_broken_sublist
    @dont_align
@@ -6263,6 +6319,20 @@ BEGIN {
        unless while until for foreach given when default);
      @is_block_without_semicolon{@_} = (1) x scalar(@_);
  
+    # We will allow semicolons to be added within these block types
+    # as well as sub and package blocks.
+    # NOTES:
+    # 1. Note that these keywords are omitted:
+    #     switch case given when default sort map grep
+    # 2. It is also ok to add for sub and package blocks and a labeled block
+    # 3. But not okay for other perltidy types including:
+    #     { } ; G t
+    # 4. Test files: blktype.t, blktype1.t, semicolon.t
+    @_ =
+      qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+      unless do while until eval for foreach );
+    @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_);
+
      # 'L' is token for opening { at hash key
      @_ = qw" L { ( [ ";
      @is_opening_type{@_} = (1) x scalar(@_);
@@ -7772,6 +7842,13 @@ EOM
          $rOpts->{'long-block-line-count'} = 1000000;
      }
  
+    my $enc = $rOpts->{'character-encoding'};
+    if ( $enc && $enc !~ /^(none|utf8)$/i ) {
+        Perl::Tidy::Die <<EOM;
+Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
+EOM
+    }
+
      my $ole = $rOpts->{'output-line-ending'};
      if ($ole) {
          my %endings = (
@@ -7780,16 +7857,38 @@ EOM
              mac  => "\015",
              unix => "\012",
          );
-        $ole = lc $ole;
-        unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
-            my $str = join " ", keys %endings;
-            Perl::Tidy::Die <<EOM;
+
+        # Patch for RT #99514, a memoization issue.
+        # Normally, the user enters one of 'dos', 'win', etc, and we change the
+        # value in the options parameter to be the corresponding line ending
+        # character.  But, if we are using memoization, on later passes through
+        # here the option parameter will already have the desired ending
+        # character rather than the keyword 'dos', 'win', etc.  So
+        # we must check to see if conversion has already been done and, if so,
+        # bypass the conversion step.
+        my %endings_inverted = (
+            "\015\012" => 'dos',
+            "\015\012" => 'win',
+            "\015"     => 'mac',
+            "\012"     => 'unix',
+        );
+
+        if ( defined( $endings_inverted{$ole} ) ) {
+
+            # we already have valid line ending, nothing more to do
+        }
+        else {
+            $ole = lc $ole;
+            unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+                my $str = join " ", keys %endings;
+                Perl::Tidy::Die <<EOM;
  Unrecognized line ending '$ole'; expecting one of: $str
  EOM
-        }
-        if ( $rOpts->{'preserve-line-endings'} ) {
-            Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
-            $rOpts->{'preserve-line-endings'} = undef;
+            }
+            if ( $rOpts->{'preserve-line-endings'} ) {
+                Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
+                $rOpts->{'preserve-line-endings'} = undef;
+            }
          }
      }
  
@@ -9396,15 +9495,23 @@ sub set_white_space_flag {
          #     *VERSION = \'1.01';
          #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
          #   We will pass such a line straight through without breaking
-        #   it unless -npvl is used
+        #   it unless -npvl is used.
+
+        #   Patch for problem reported in RT #81866, where files
+        #   had been flattened into a single line and couldn't be
+        #   tidied without -npvl.  There are two parts to this patch:
+        #   First, it is not done for a really long line (80 tokens for now).
+        #   Second, we will only allow up to one semicolon
+        #   before the VERSION.  We need to allow at least one semicolon
+        #   for statements like this:
+        #      require Exporter;  our $VERSION = $Exporter::VERSION;
+        #   where both statements must be on a single line for MakeMaker
  
          my $is_VERSION_statement = 0;
-
-        if (
-              !$saw_VERSION_in_this_file
-            && $input_line =~ /VERSION/    # quick check to reject most lines
-            && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
-          )
+        if (  !$saw_VERSION_in_this_file
+            && $jmax < 80
+            && $input_line =~
+            /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
          {
              $saw_VERSION_in_this_file = 1;
              $is_VERSION_statement     = 1;
@@ -9419,10 +9526,20 @@ sub set_white_space_flag {
          # qw lines will still go out at the end of this routine.
          if ( $rOpts->{'indent-only'} ) {
              flush();
-            trim($input_line);
+            my $line = $input_line;
+
+            # delete side comments if requested with -io, but
+            # we will not allow deleting of closing side comments with -io
+            # because the coding would be more complex
+            if (   $rOpts->{'delete-side-comments'}
+                && $rtoken_type->[$jmax] eq '#' )
+            {
+                $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ];
+            }
+            trim($line);
  
              extract_token(0);
-            $token                 = $input_line;
+            $token                 = $line;
              $type                  = 'q';
              $block_type            = "";
              $container_type        = "";
@@ -9473,11 +9590,22 @@ sub set_white_space_flag {
          }
  
          # This is a good place to kill incomplete one-line blocks
-        if (   ( $semicolons_before_block_self_destruct == 0 )
-            && ( $max_index_to_go >= 0 )
-            && ( $types_to_go[$max_index_to_go] eq ';' )
-            && ( $$rtokens[0] ne '}' ) )
+        if (
+            (
+                   ( $semicolons_before_block_self_destruct == 0 )
+                && ( $max_index_to_go >= 0 )
+                && ( $types_to_go[$max_index_to_go] eq ';' )
+                && ( $$rtokens[0] ne '}' )
+            )
+
+            # Patch for RT #98902. Honor request to break at old commas.
+            || (   $rOpts_break_at_old_comma_breakpoints
+                && $max_index_to_go >= 0
+                && $types_to_go[$max_index_to_go] eq ',' )
+          )
          {
+            $forced_breakpoint_to_go[$max_index_to_go] = 1
+              if ($rOpts_break_at_old_comma_breakpoints);
              destroy_one_line_block();
              output_line_to_go();
          }
@@ -9801,24 +9929,14 @@ sub set_white_space_flag {
                          # and we don't have one
                          && ( $last_nonblank_type ne ';' )
  
-                        # patch until some block type issues are fixed:
-                        # Do not add semi-colon for block types '{',
-                        # '}', and ';' because we cannot be sure yet
-                        # that this is a block and not an anonymous
-                        # hash (blktype.t, blktype1.t)
-                        && ( $block_type !~ /^[\{\};]$/ )
-
-                        # patch: and do not add semi-colons for recently
-                        # added block types (see tmp/semicolon.t)
-                        && ( $block_type !~
-                            /^(switch|case|given|when|default)$/ )
-
-                        # it seems best not to add semicolons in these
-                        # special block types: sort|map|grep
-                        && ( !$is_sort_map_grep{$block_type} )
-
                          # and we are allowed to do so.
                          && $rOpts->{'add-semicolons'}
+
+                        # and we are allowed to for this block type
+                        && (   $ok_to_add_semicolon_for_block_type{$block_type}
+                            || $block_type =~ /^(sub|package)/
+                            || $block_type =~ /^\w+\:$/ )
+
                        )
                      {
  
@@ -9888,7 +10006,13 @@ sub set_white_space_flag {
                      # But make a line break if the curly ends a
                      # significant block:
                      if (
-                        $is_block_without_semicolon{$block_type}
+                        (
+                            $is_block_without_semicolon{$block_type}
+
+                            # Follow users break point for
+                            # one line block types U & G, such as a 'try' block
+                            || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
+                        )
  
                          # if needless semicolon follows we handle it later
                          && $next_nonblank_token ne ';'
@@ -10463,6 +10587,41 @@ sub starting_one_line_block {
          $i_start = $max_index_to_go;
      }
  
+    # the previous nonblank token should start these block types
+    elsif (( $last_last_nonblank_token_to_go eq $block_type )
+        || ( $block_type =~ /^sub/ )
+        || $block_type =~ /\(\)/ )
+    {
+        $i_start = $last_last_nonblank_index_to_go;
+
+        # Patch for signatures and extended syntax ...
+        # if the previous token was a closing paren we should walk back up to
+        # find the keyword (sub). Otherwise, we might form a one line block,
+        # which stays intact, and cause the parenthesized expression to break
+        # open.  That looks bad.
+        if ( $tokens_to_go[$i_start] eq ')' ) {
+
+            # walk back to find the first token with this level
+            # it should be the opening paren...
+            my $lev_want = $levels_to_go[$i_start];
+            for ( $i_start-- ; $i_start >= 0 ; $i_start-- ) {
+                if ( $i_start <= 0 ) { return 0 }
+                my $lev = $levels_to_go[$i_start];
+                if ( $lev <= $lev_want ) {
+
+                    # if not an opening paren then probably a syntax error
+                    if ( $tokens_to_go[$i_start] ne '(' ) { return 0 }
+
+                    # now step back to the opening keyword (sub)
+                    $i_start--;
+                    if ( $i_start > 0 && $types_to_go[$i_start] eq 'b' ) {
+                        $i_start--;
+                    }
+                }
+            }
+        }
+    }
+
      elsif ( $last_last_nonblank_token_to_go eq ')' ) {
  
          # For something like "if (xxx) {", the keyword "if" will be
@@ -10476,18 +10635,19 @@ sub starting_one_line_block {
              $i_start++;
          }
  
-        unless ( $tokens_to_go[$i_start] eq $block_type ) {
+        # Patch to avoid breaking short blocks defined with extended_syntax:
+        # Strip off any trailing () which was added in the parser to mark
+        # the opening keyword.  For example, in the following
+        #    create( TypeFoo $e) {$bubba}
+        # the blocktype would be marked as create()
+        my $stripped_block_type = $block_type;
+        $stripped_block_type =~ s/\(\)$//;
+
+        unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
              return 0;
          }
      }
  
-    # the previous nonblank token should start these block types
-    elsif (( $last_last_nonblank_token_to_go eq $block_type )
-        || ( $block_type =~ /^sub/ ) )
-    {
-        $i_start = $last_last_nonblank_index_to_go;
-    }
-
      # patch for SWITCH/CASE to retain one-line case/when blocks
      elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
  
@@ -10625,7 +10785,7 @@ sub unstore_token_to_go {
  
  sub want_blank_line {
      flush();
-    $file_writer_object->want_blank_line();
+    $file_writer_object->want_blank_line() unless $in_format_skipping_section;
  }
  
  sub write_unindented_line {
@@ -12917,6 +13077,31 @@ sub lookup_opening_indentation {
                  {
                      $adjust_indentation = 1;
                  }
+
+                # Patch for RT #96101, in which closing brace of anonymous subs
+                # was not outdented.  We should look ahead and see if there is
+                # a level decrease at the next token (i.e., a closing token),
+                # but right now we do not have that information.  For now
+                # we see if we are in a list, and this works well.
+                # See test files 'sub*.t' for good test cases.
+                if (   $block_type_to_go[$ibeg] =~ /^sub\s*\(?/
+                    && $container_environment_to_go[$i_terminal] eq 'LIST'
+                    && !$rOpts->{'indent-closing-brace'} )
+                {
+                    (
+                        $opening_indentation, $opening_offset,
+                        $is_leading,          $opening_exists
+                      )
+                      = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+                        $rindentation_list );
+                    my $indentation = $leading_spaces_to_go[$ibeg];
+                    if ( defined($opening_indentation)
+                        && get_SPACES($indentation) >
+                        get_SPACES($opening_indentation) )
+                    {
+                        $adjust_indentation = 1;
+                    }
+                }
              }
  
              # YVES patch 1 of 2:
@@ -12935,7 +13120,8 @@ sub lookup_opening_indentation {
                      $rindentation_list );
                  my $indentation = $leading_spaces_to_go[$ibeg];
                  if ( defined($opening_indentation)
-                    && $indentation > $opening_indentation )
+                    && get_SPACES($indentation) >
+                    get_SPACES($opening_indentation) )
                  {
                      $adjust_indentation = 1;
                  }
@@ -17058,6 +17244,26 @@ sub undo_forced_breakpoint_stack {
          @is_mult_div{@_} = (1) x scalar(@_);
      }
  
+    sub DUMP_BREAKPOINTS {
+
+        # Debug routine to dump current breakpoints...not normally called
+        # We are given indexes to the current lines:
+        # $ri_beg = ref to array of BEGinning indexes of each line
+        # $ri_end = ref to array of ENDing indexes of each line
+        my ( $ri_beg, $ri_end, $msg ) = @_;
+        print STDERR "----Dumping breakpoints from: $msg----\n";
+        for my $n ( 0 .. @{$ri_end} - 1 ) {
+            my $ibeg = $$ri_beg[$n];
+            my $iend = $$ri_end[$n];
+            my $text = "";
+            foreach my $i ( $ibeg .. $iend ) {
+                $text .= $tokens_to_go[$i];
+            }
+            print STDERR "$n ($ibeg:$iend) $text\n";
+        }
+        print STDERR "----\n";
+    }
+
      sub recombine_breakpoints {
  
          # sub set_continuation_breaks is very liberal in setting line breaks
@@ -18564,12 +18770,24 @@ sub set_continuation_breaks {
                  #     }
                  # };
                  #
-                || (   $line_count
+                || (
+                       $line_count
                      && ( $token eq ')' )
                      && ( $next_nonblank_type eq '{' )
                      && ($next_nonblank_block_type)
                      && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
-                    && !$rOpts->{'opening-brace-always-on-right'} )
+
+                    # RT #104427: Dont break before opening sub brace because
+                    # sub block breaks handled at higher level, unless
+                    # it looks like the preceeding list is long and broken
+                    && !(
+                        $next_nonblank_block_type =~ /^sub/
+                        && ( $nesting_depth_to_go[$i_begin] ==
+                            $nesting_depth_to_go[$i_next_nonblank] )
+                    )
+
+                    && !$rOpts->{'opening-brace-always-on-right'}
+                )
  
                  # There is an implied forced break at a terminal opening brace
                  || ( ( $type eq '{' ) && ( $i_test == $imax ) )
@@ -21996,8 +22214,38 @@ sub valign_output_step_C {
  
          # Start storing lines when we see a line with multiple stacked opening
          # tokens.
-        if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) {
+        # patch for RT #94354, requested by Colin Williams
+        if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
+        {
+
+            # This test is efficient but a little subtle: The first test says
+            # that we have multiple sequence numbers and hence multiple opening
+            # or closing tokens in this line.  The second part of the test
+            # rejects stacked closing and ternary tokens.  So if we get here
+            # then we should have stacked unbalanced opening tokens.
+
+            # Here is a complex example:
+
+            # Foo($Bar[0], {  # (side comment)
+            #  baz => 1,
+            # });
+
+            # The first line has sequence 6::4.  It does not begin with
+            # a closing token or ternary, so it passes the test and must be
+            # stacked opening tokens.
+
+            # The last line has sequence 4:6 but is a stack of closing tokens,
+            # so it gets rejected.
+
+            # Note that the sequence number of an opening token for a qw quote
+            # is a negative number and will be rejected.
+            # For example, for the following line:
+            #    skip_symbols([qw(
+            # $seqno_string='10:5:-1'.  It would be okay to accept it but
+            # I decided not to do this after testing.
+
              $valign_buffer_filling = $seqno_string;
+
          }
      }
  }
@@ -22706,6 +22954,7 @@ sub new {
          look_for_autoloader  => 1,
          look_for_selfloader  => 1,
          starting_line_number => 1,
+        extended_syntax      => 0,
      );
      my %args = ( %defaults, @_ );
  
@@ -22780,6 +23029,7 @@ sub new {
          _nearly_matched_here_target_at      => undef,
          _line_text                          => "",
          _rlower_case_labels_at              => undef,
+        _extended_syntax                    => $args{extended_syntax},
      };
  
      prepare_for_a_new_file();
@@ -23913,7 +24163,7 @@ sub prepare_for_a_new_file {
      sub scan_identifier {
          ( $i, $tok, $type, $id_scan_state, $identifier ) =
            scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
-            $max_token_index, $expecting );
+            $max_token_index, $expecting, $paren_type[$paren_depth] );
      }
  
      sub scan_id {
@@ -23973,7 +24223,8 @@ sub prepare_for_a_new_file {
      # keyword ( .... ) { BLOCK }
      # patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
      my %is_blocktype_with_paren;
-    @_ = qw(if elsif unless while until for foreach switch case given when);
+    @_ =
+      qw(if elsif unless while until for foreach switch case given when catch);
      @is_blocktype_with_paren{@_} = (1) x scalar(@_);
  
      # ------------------------------------------------------------
@@ -24056,6 +24307,9 @@ sub prepare_for_a_new_file {
                  $container_type = $want_paren;
                  $want_paren     = "";
              }
+            elsif ( $statement_type =~ /^sub/ ) {
+                $container_type = $statement_type;
+            }
              else {
                  $container_type = $last_nonblank_token;
  
@@ -24259,7 +24513,7 @@ sub prepare_for_a_new_file {
              if ($is_pattern) {
                  $in_quote                = 1;
                  $type                    = 'Q';
-                $allowed_quote_modifiers = '[msixpodualgc]';
+                $allowed_quote_modifiers = '[msixpodualngc]';
              }
              else {    # not a pattern; check for a /= token
  
@@ -24314,9 +24568,21 @@ sub prepare_for_a_new_file {
  
                  # check for syntax error here;
                  unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
-                    my $list = join( ' ', sort keys %is_blocktype_with_paren );
-                    warning(
-                        "syntax error at ') {', didn't see one of: $list\n");
+                    if ( $tokenizer_self->{'_extended_syntax'} ) {
+
+                        # we append a trailing () to mark this as an unknown
+                        # block type.  This allows perltidy to format some
+                        # common extensions of perl syntax.
+                        # This is used by sub code_block_type
+                        $last_nonblank_token .= '()';
+                    }
+                    else {
+                        my $list =
+                          join( ' ', sort keys %is_blocktype_with_paren );
+                        warning(
+"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
+                        );
+                    }
                  }
              }
  
@@ -24365,12 +24631,6 @@ sub prepare_for_a_new_file {
                  $block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
                      $max_token_index );
  
-                # remember a preceding smartmatch operator
-                ## SMARTMATCH
-                ##if ( $last_nonblank_type eq '~~' ) {
-                ##    $block_type = $last_nonblank_type;
-                ##}
-
                  # patch to promote bareword type to function taking block
                  if (   $block_type
                      && $last_nonblank_type eq 'w'
@@ -24391,6 +24651,7 @@ sub prepare_for_a_new_file {
                      }
                  }
              }
+
              $brace_type[ ++$brace_depth ]        = $block_type;
              $brace_package[$brace_depth]         = $current_package;
              $brace_structural_type[$brace_depth] = $type;
@@ -24418,8 +24679,6 @@ sub prepare_for_a_new_file {
              # propagate type information for 'do' and 'eval' blocks, and also
              # for smartmatch operator.  This is necessary to enable us to know
              # if an operator or term is expected next.
-            ## SMARTMATCH
-            ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) {
              if ( $is_block_operator{$block_type} ) {
                  $tok = $block_type;
              }
@@ -24481,7 +24740,7 @@ sub prepare_for_a_new_file {
              if ($is_pattern) {
                  $in_quote                = 1;
                  $type                    = 'Q';
-                $allowed_quote_modifiers = '[msixpodualgc]';
+                $allowed_quote_modifiers = '[msixpodualngc]';
              }
              else {
                  ( $type_sequence, $indent_flag ) =
@@ -24845,21 +25104,22 @@ sub prepare_for_a_new_file {
          '__DATA__' => '_in_data',
      );
  
-    # ref: camel 3 p 147,
+    # original ref: camel 3 p 147,
      # but perl may accept undocumented flags
      # perl 5.10 adds 'p' (preserve)
-    # Perl version 5.16, http://perldoc.perl.org/perlop.html,  has these:
-    # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
-    # s/PATTERN/REPLACEMENT/msixpodualgcer
+    # Perl version 5.22 added 'n'
+    # From http://perldoc.perl.org/perlop.html we have
+    # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
+    # s/PATTERN/REPLACEMENT/msixpodualngcer
      # y/SEARCHLIST/REPLACEMENTLIST/cdsr
      # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
-    # qr/STRING/msixpodual
+    # qr/STRING/msixpodualn
      my %quote_modifiers = (
-        's'  => '[msixpodualgcer]',
+        's'  => '[msixpodualngcer]',
          'y'  => '[cdsr]',
          'tr' => '[cdsr]',
-        'm'  => '[msixpodualgc]',
-        'qr' => '[msixpodual]',
+        'm'  => '[msixpodualngc]',
+        'qr' => '[msixpodualn]',
          'q'  => "",
          'qq' => "",
          'qw' => "",
@@ -25286,11 +25546,20 @@ EOM
                  # '//' must be defined_or operator if an operator is expected.
                  # TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
                  # could be migrated here for clarity
-                if ( $test_tok eq '//' ) {
+
+              # Patch for RT#102371, misparsing a // in the following snippet:
+              #     state $b //= ccc();
+              # The solution is to always accept the digraph (or trigraph) after
+              # token type 'Z' (possible file handle).  The reason is that
+              # sub operator_expected gives TERM expected here, which is
+              # wrong in this case.
+                if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
                      my $next_type = $$rtokens[ $i + 1 ];
                      my $expecting =
                        operator_expected( $prev_type, $tok, $next_type );
-                    $combine_ok = 0 unless ( $expecting == OPERATOR );
+
+                    # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
+                    $combine_ok = 0 if ( $expecting == TERM );
                  }
              }
  
@@ -25645,9 +25914,17 @@ EOM
                      elsif ( $tok eq 'else' ) {
  
                          # patched for SWITCH/CASE
-                        if (   $last_nonblank_token ne ';'
+                        if (
+                               $last_nonblank_token ne ';'
                              && $last_nonblank_block_type !~
-                            /^(if|elsif|unless|case|when)$/ )
+                            /^(if|elsif|unless|case|when)$/
+
+                            # patch to avoid an unwanted error message for
+                            # the case of a parenless 'case' (RT 105484):
+                            # switch ( 1 ) { case x { 2 } else { } }
+                            && $statement_type !~
+                            /^(if|elsif|unless|case|when)$/
+                          )
                          {
                              warning(
  "expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
@@ -26386,8 +26663,15 @@ EOM
                              $in_statement_continuation = 0;
                          }
  
-                       # otherwise, the next token after a ',' starts a new term
-                        elsif ( $type eq ',' ) {
+                        # otherwise, the token after a ',' starts a new term
+
+                        # Patch FOR RT#99961; no continuation after a ';'
+                        # This is needed because perltidy currently marks
+                        # a block preceded by a type character like % or @
+                        # as a non block, to simplify formatting. But these
+                        # are actually blocks and can have semicolons.
+                        # See code_block_type() and is_non_structural_brace().
+                        elsif ( $type eq ',' || $type eq ';' ) {
                              $in_statement_continuation = 0;
                          }
  
@@ -26805,9 +27089,11 @@ sub code_block_type {
          }
      }
  
+    ################################################################
      # NOTE: braces after type characters start code blocks, but for
      # simplicity these are not identified as such.  See also
      # sub is_non_structural_brace.
+    ################################################################
  
  ##    elsif ( $last_nonblank_type eq 't' ) {
  ##       return $last_nonblank_token;
@@ -26860,6 +27146,33 @@ sub code_block_type {
              $max_token_index );
      }
  
+    # Patch for bug # RT #94338 reported by Daniel Trizen
+    # for-loop in a parenthesized block-map triggering an error message:
+    #    map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
+    # Check for a code block within a parenthesized function call
+    elsif ( $last_nonblank_token eq '(' ) {
+        my $paren_type = $paren_type[$paren_depth];
+        if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
+
+            # We will mark this as a code block but use type 't' instead
+            # of the name of the contining function.  This will allow for
+            # correct parsing but will usually produce better formatting.
+            # Braces with block type 't' are not broken open automatically
+            # in the formatter as are other code block types, and this usually
+            # works best.
+            return 't';    # (Not $paren_type)
+        }
+        else {
+            return "";
+        }
+    }
+
+    # handle unknown syntax ') {'
+    # we previously appended a '()' to mark this case
+    elsif ( $last_nonblank_token =~ /\(\)$/ ) {
+        return $last_nonblank_token;
+    }
+
      # anything else must be anonymous hash reference
      else {
          return "";
@@ -26870,6 +27183,7 @@ sub decide_if_code_block {
  
      # USES GLOBAL VARIABLES: $last_nonblank_token
      my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
+
      my ( $next_nonblank_token, $i_next ) =
        find_next_nonblank_token( $i, $rtokens, $max_token_index );
  
@@ -26907,8 +27221,14 @@ sub decide_if_code_block {
  
          # We are only going to look ahead one more (nonblank/comment) line.
          # Strange formatting could cause a bad guess, but that's unlikely.
-        my @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
-        my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+        my @pre_types;
+        my @pre_tokens;
+
+        # Ignore the rest of this line if it is a side comment
+        if ( $next_nonblank_token ne '#' ) {
+            @pre_types  = @$rtoken_type[ $i + 1 .. $max_token_index ];
+            @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+        }
          my ( $rpre_tokens, $rpre_types ) =
            peek_ahead_for_n_nonblank_pre_tokens(20);    # 20 is arbitrary but
                                                         # generous, and prevents
@@ -26921,6 +27241,7 @@ sub decide_if_code_block {
  
          # put a sentinel token to simplify stopping the search
          push @pre_types, '}';
+        push @pre_types, '}';
  
          my $jbeg = 0;
          $jbeg = 1 if $pre_types[0] eq 'b';
@@ -26947,9 +27268,7 @@ sub decide_if_code_block {
              $j++;
          }
          elsif ( $pre_types[$j] eq 'w' ) {
-            unless ( $is_keyword{ $pre_tokens[$j] } ) {
-                $j++;
-            }
+            $j++;
          }
          elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
              $j++;
@@ -26958,9 +27277,18 @@ sub decide_if_code_block {
  
              $j++ if $pre_types[$j] eq 'b';
  
-            # it's a hash ref if a comma or => follow next
-            if ( $pre_types[$j] eq ','
-                || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
+            # Patched for RT #95708
+            if (
+
+                # it is a comma which is not a pattern delimeter except for qw
+                (
+                       $pre_types[$j] eq ','
+                    && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+                )
+
+                # or a =>
+                || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
+              )
              {
                  $code_block_type = "";
              }
@@ -27025,9 +27353,11 @@ sub is_non_structural_brace {
      #    return 0;
      # }
  
+    ################################################################
      # NOTE: braces after type characters start code blocks, but for
      # simplicity these are not identified as such.  See also
      # sub code_block_type
+    ################################################################
  
      ##if ($last_nonblank_type eq 't') {return 0}
  
@@ -27955,7 +28285,7 @@ sub scan_identifier_do {
      # $last_nonblank_type
  
      my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
-        $expecting )
+        $expecting, $container_type )
        = @_;
      my $i_begin   = $i;
      my $type      = '';
@@ -27966,6 +28296,8 @@ sub scan_identifier_do {
      my $tok                 = $tok_begin;
      my $message             = "";
  
+    my $in_prototype_or_signature = $container_type =~ /^sub/;
+
      # these flags will be used to help figure out the type:
      my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
      my $saw_type;
@@ -28049,6 +28381,11 @@ sub scan_identifier_do {
                      last;
                  }
              }
+
+            # POSTDEFREF ->@ ->% ->& ->*
+            elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+                $identifier .= $tok;
+            }
              elsif ( $tok =~ /^[A-Za-z_]/ ) {    # alphanumeric ..
                  $saw_alpha     = 1;
                  $id_scan_state = ':';           # now need ::
@@ -28076,7 +28413,9 @@ sub scan_identifier_do {
                  $id_scan_state = 'A';
                  $identifier .= $tok;
              }
-            elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) {    # $#array
+
+            # $# and POSTDEFREF ->$#
+            elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) {    # $#array
                  $identifier .= $tok;    # keep same state, a $ could follow
              }
              elsif ( $tok eq '{' ) {
@@ -28166,11 +28505,23 @@ sub scan_identifier_do {
              }
              else {    # something else
  
+                if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
+                    $id_scan_state = '';
+                    $i             = $i_save;
+                    $type          = 'i';       # probably punctuation variable
+                    last;
+                }
+
                  # check for various punctuation variables
                  if ( $identifier =~ /^[\$\*\@\%]$/ ) {
                      $identifier .= $tok;
                  }
  
+                # POSTDEFREF: Postfix reference ->$* ->%*  ->@* ->** ->&* ->$#*
+                elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
+                    $identifier .= $tok;
+                }
+
                  elsif ( $identifier eq '$#' ) {
  
                      if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
@@ -28476,20 +28827,16 @@ sub scan_identifier_do {
          my $pos_beg = $$rtoken_map[$i_beg];
          pos($input_line) = $pos_beg;
  
-        # sub NAME PROTO ATTRS
+        # Look for the sub NAME
          if (
              $input_line =~ m/\G\s*
          ((?:\w*(?:'|::))*)  # package - something that ends in :: or '
          (\w+)               # NAME    - required
-        (\s*\([^){]*\))?    # PROTO   - something in parens
-        (\s*:)?             # ATTRS   - leading : of attribute list
          /gcx
            )
          {
              $match   = 1;
              $subname = $2;
-            $proto   = $3;
-            $attrs   = $4;
  
              $package = ( defined($1) && $1 ) ? $1 : $current_package;
              $package =~ s/\'/::/g;
@@ -28501,20 +28848,35 @@ sub scan_identifier_do {
              $type = 'i';
          }
  
-        # Look for prototype/attributes not preceded on this line by subname;
-        # This might be an anonymous sub with attributes,
+        # Now look for PROTO ATTRS
+        # Look for prototype/attributes which are usually on the same
+        # line as the sub name but which might be on a separate line.
+        # For example, we might have an anonymous sub with attributes,
          # or a prototype on a separate line from its sub name
-        elsif (
-            $input_line =~ m/\G(\s*\([^){]*\))?  # PROTO
+
+        # NOTE: We only want to parse PROTOTYPES here. If we see anything that
+        # does not look like a prototype, we assume it is a SIGNATURE and we
+        # will stop and let the the standard tokenizer handle it.  In
+        # particular, we stop if we see any nested parens, braces, or commas.
+        my $saw_opening_paren = $input_line =~ /\G\s*\(/;
+        if (
+            $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))?  # PROTO
              (\s*:)?                              # ATTRS leading ':'
              /gcx
              && ( $1 || $2 )
            )
          {
-            $match = 1;
              $proto = $1;
              $attrs = $2;
  
+            # If we also found the sub name on this call then append PROTO.
+            # This is not necessary but for compatability with previous
+            # versions when the -csc flag is used:
+            if ( $match && $proto ) {
+                $tok .= $proto;
+            }
+            $match ||= 1;
+
              # Handle prototype on separate line from subname
              if ($subname_saved) {
                  $package = $package_saved;
@@ -28541,8 +28903,8 @@ sub scan_identifier_do {
                  $in_attribute_list = 1;
              }
  
-            # We must convert back from character position
-            # to pre_token index.
+            # Otherwise, if we found a match we must convert back from
+            # string position to the pre_token index for continued parsing.
              else {
  
                  # I don't think an error flag can occur here ..but ?
@@ -28570,6 +28932,8 @@ sub scan_identifier_do {
              }
              $package_saved = "";
              $subname_saved = "";
+
+            # See what's next...
              if ( $next_nonblank_token eq '{' ) {
                  if ($subname) {
  
@@ -28601,19 +28965,21 @@ sub scan_identifier_do {
                  $statement_type = $tok;
              }
  
-            # see if PROTO follows on another line:
+            # if we stopped before an open paren ...
              elsif ( $next_nonblank_token eq '(' ) {
-                if ( $attrs || $proto ) {
-                    warning(
-"unexpected '(' after definition or declaration of sub '$subname'\n"
-                    );
-                }
-                else {
-                    $id_scan_state  = 'sub';    # we must come back to get proto
-                    $statement_type = $tok;
-                    $package_saved  = $package;
-                    $subname_saved  = $subname;
+
+                # If we DID NOT see this paren above then it must be on the
+                # next line so we will set a flag to come back here and see if
+                # it is a PROTOTYPE
+
+                # Otherwise, we assume it is a SIGNATURE rather than a
+                # PROTOTYPE and let the normal tokenizer handle it as a list
+                if ( !$saw_opening_paren ) {
+                    $id_scan_state = 'sub';     # we must come back to get proto
+                    $package_saved = $package;
+                    $subname_saved = $subname;
                  }
+                $statement_type = $tok;
              }
              elsif ($next_nonblank_token) {      # EOF technically ok
                  warning(
@@ -29601,11 +29967,11 @@ BEGIN {
  
      my @digraphs = qw(
        .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
-      <= >= == =~ !~ != ++ -- /= x= ~~
+      <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
      );
      @is_digraph{@digraphs} = (1) x scalar(@digraphs);
  
-    my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
+    my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=);
      @is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
  
      # make a hash of all valid token types for self-checking the tokenizer
@@ -29636,11 +30002,12 @@ BEGIN {
      @is_indirect_object_taker{@_} = (1) x scalar(@_);
  
      # These tokens may precede a code block
-    # patched for SWITCH/CASE
+    # patched for SWITCH/CASE/CATCH.  Actually these could be removed
+    # now and we could let the extended-syntax coding handle them
      @_ =
        qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
        unless do while until eval for foreach map grep sort
-      switch case given when);
+      switch case given when catch);
      @is_code_block_token{@_} = (1) x scalar(@_);
  
      # I'll build the list of keywords incrementally
@@ -29942,6 +30309,7 @@ BEGIN {
        **= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
        <= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
        f F pp mm Y p m U J G j >> << ^ t
+      ~. ^. |. &. ^.= |.= &.=
        #;
      push( @value_requestor_type, ',' )
        ;    # (perl doesn't like a ',' in a qw block)
@@ -30087,4 +30455,3 @@ BEGIN {
  }
  1;
  __END__
-