#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2013 by Steve Hancock
+# Copyright (c) 2000-2017 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
############################################################
package Perl::Tidy;
-use 5.004; # need IO::File from 5.004 or later
-BEGIN { $^W = 1; } # turn on warnings
+# Actually should use a version later than about 5.8.5 to use
+# wide characters.
+use 5.004; # need IO::File from 5.004 or later
+use warnings;
use strict;
use Exporter;
use Carp;
@EXPORT
$missing_file_spec
$fh_stderr
+ $rOpts_character_encoding
};
@ISA = qw( Exporter );
@EXPORT = qw( &perltidy );
use Cwd;
+use Encode ();
use IO::File;
use File::Basename;
use File::Copy;
+use File::Temp qw(tempfile);
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.74 2013/09/22 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+ ( $VERSION = q($Id: Tidy.pm,v 1.74 2017/05/21 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
}
sub streamhandle {
# skipped and we can just let it crash if there is no
# getline.
if ( $mode =~ /[rR]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+
+ # RT#97159; part 1 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+ if ( $ref->can('getline') ) {
$New = sub { $filename };
}
else {
# Accept an object with a print method for writing.
# See note above about IO::File
if ( $mode =~ /[wW]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+
+ # RT#97159; part 2 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+ if ( $ref->can('print') ) {
$New = sub { $filename };
}
else {
}
$fh = $New->( $filename, $mode )
or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
+
return $fh, ( $ref or $filename );
}
return undef;
}
-sub make_temporary_filename {
-
- # Make a temporary filename.
- # The POSIX tmpnam() function has been unreliable for non-unix systems
- # (at least for the win32 systems that I've tested), so use a pre-defined
- # name for them. A disadvantage of this is that two perltidy
- # runs in the same working directory may conflict. However, the chance of
- # that is small and manageable by the user, especially on systems for which
- # the POSIX tmpnam function doesn't work.
- my $name = "perltidy.TMP";
- if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
- return $name;
- }
- eval "use POSIX qw(tmpnam)";
- if ($@) { return $name }
- use IO::File;
-
- # just make a couple of tries before giving up and using the default
- for ( 0 .. 3 ) {
- my $tmpname = tmpnam();
- my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
- if ($fh) {
- $fh->close();
- return ($tmpname);
- last;
- }
- }
- return ($name);
-}
-
# Here is a map of the flow of data from the input source to the output
# line sink:
#
#---------------------------------------------------------------
# get command line options
#---------------------------------------------------------------
- my (
- $rOpts, $config_file, $rraw_options,
- $saw_extrude, $saw_pbp, $roption_string,
- $rexpansion, $roption_category, $roption_range
- )
+ my ( $rOpts, $config_file, $rraw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range )
= process_command_line(
$perltidyrc_stream, $is_Windows, $Windows_type,
$rpending_complaint, $dump_options_type,
);
+ my $saw_extrude = ( grep m/^-extrude$/, @$rraw_options ) ? 1 : 0;
+ my $saw_pbp =
+ ( grep m/^-(pbp|perl-best-practices)$/, @$rraw_options ) ? 1 : 0;
+
#---------------------------------------------------------------
# Handle requests to dump information
#---------------------------------------------------------------
user => '',
);
+ $rOpts_character_encoding = $rOpts->{'character-encoding'};
+
# be sure we have a valid output format
unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
my $formats = join ' ',
my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
&& $rOpts->{'format'} eq 'tidy';
- # turn off -b with warnings in case of conflicts with other options
+ # Turn off -b with warnings in case of conflicts with other options.
+ # NOTE: Do this silently, without warnings, if there is a source or
+ # destination stream, or standard output is used. This is because the -b
+ # flag may have been in a .perltidyrc file and warnings break
+ # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
if ($in_place_modify) {
if ( $rOpts->{'standard-output'} ) {
- my $msg = "Ignoring -b; you may not use -b and -st together";
- $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
- Warn "$msg\n";
+## my $msg = "Ignoring -b; you may not use -b and -st together";
+## $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
+## Warn "$msg\n";
$in_place_modify = 0;
}
if ($destination_stream) {
- Warn
-"Ignoring -b; you may not specify a destination stream and -b together\n";
+ ##Warn "Ignoring -b; you may not specify a destination stream and -b together\n";
$in_place_modify = 0;
}
if ( ref($source_stream) ) {
- Warn
-"Ignoring -b; you may not specify a source array and -b together\n";
+ ##Warn "Ignoring -b; you may not specify a source array and -b together\n";
$in_place_modify = 0;
}
if ( $rOpts->{'outfile'} ) {
- Warn "Ignoring -b; you may not use -b and -o together\n";
+ ##Warn "Ignoring -b; you may not use -b and -o together\n";
$in_place_modify = 0;
}
if ( defined( $rOpts->{'output-path'} ) ) {
- Warn "Ignoring -b; you may not use -b and -opath together\n";
+ ##Warn "Ignoring -b; you may not use -b and -opath together\n";
$in_place_modify = 0;
}
}
#---------------------------------------------------------------
if ($source_stream) {
$fileroot = "perltidy";
+
+ # If the source is from an array or string, then .LOG output
+ # is only possible if a logfile stream is specified. This prevents
+ # unexpected perltidy.LOG files.
+ if ( !defined($logfile_stream) ) {
+ $logfile_stream = Perl::Tidy::DevNull->new();
+ }
}
elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
$fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
# Prefilters and postfilters: The prefilter is a code reference
# that will be applied to the source before tidying, and the
# postfilter is a code reference to the result before outputting.
- if ($prefilter) {
+ if (
+ $prefilter
+ || ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8' )
+ )
+ {
my $buf = '';
while ( my $line = $source_object->get_line() ) {
$buf .= $line;
}
- $buf = $prefilter->($buf);
+
+ $buf = $prefilter->($buf) if $prefilter;
+
+ if ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8'
+ && !utf8::is_utf8($buf) )
+ {
+ eval {
+ $buf = Encode::decode( 'UTF-8', $buf,
+ Encode::FB_CROAK | Encode::LEAVE_SRC );
+ };
+ if ($@) {
+ Warn
+"skipping file: $input_file: Unable to decode source as UTF-8\n";
+ next;
+ }
+ }
$source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
$rpending_logfile_message );
# Eventually all I/O may be done with binmode, but for now it is
# only done when a user requests a particular line separator
# through the -ple or -ole flags
- my $binmode = 0;
- if ( defined($line_separator) ) { $binmode = 1 }
- else { $line_separator = "\n" }
+ my $binmode = defined($line_separator)
+ || defined($rOpts_character_encoding);
+ $line_separator = "\n" unless defined($line_separator);
my ( $sink_object, $postfilter_buffer );
if ($postfilter) {
look_for_autoloader => $rOpts->{'look-for-autoloader'},
look_for_selfloader => $rOpts->{'look-for-selfloader'},
trim_qw => $rOpts->{'trim-qw'},
+ extended_syntax => $rOpts->{'extended-syntax'},
continuation_indentation =>
$rOpts->{'continuation-indentation'},
my $fout = IO::File->new("> $input_file")
or Die
"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
- binmode $fout;
+ if ($binmode) {
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ binmode $fout, ":encoding(UTF-8)";
+ }
+ else { binmode $fout }
+ }
my $line;
while ( $line = $output_file->getline() ) {
$fout->print($line);
my ( $fh_stream, $fh_name ) =
Perl::Tidy::streamhandle( $stream, 'r' );
if ($fh_stream) {
- my ( $fout, $tmpnam );
-
- # TODO: fix the tmpnam routine to return an open filehandle
- $tmpnam = Perl::Tidy::make_temporary_filename();
- $fout = IO::File->new( $tmpnam, 'w' );
-
+ my ( $fout, $tmpnam ) = File::Temp::tempfile();
if ($fout) {
$fname = $tmpnam;
$is_tmpfile = 1;
$add_option->( 'standard-error-output', 'se', '!' );
$add_option->( 'standard-output', 'st', '!' );
$add_option->( 'warning-output', 'w', '!' );
+ $add_option->( 'character-encoding', 'enc', '=s' );
# options which are both toggle switches and values moved here
# to hide from tidyview (which does not show category 0 flags):
$add_option->( 'preserve-line-endings', 'ple', '!' );
$add_option->( 'tabs', 't', '!' );
$add_option->( 'default-tabsize', 'dt', '=i' );
+ $add_option->( 'extended-syntax', 'xs', '!' );
########################################
$category = 2; # Code indentation control
$add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
$add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
+ $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' );
+ $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' );
+ $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' );
+ $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' );
+
########################################
$category = 9; # Other controls
########################################
%option_range = (
'format' => [ 'tidy', 'html', 'user' ],
'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
+ 'character-encoding' => [ 'none', 'utf8' ],
'block-brace-tightness' => [ 0, 2 ],
'brace-tightness' => [ 0, 2 ],
continuation-indentation=2
delete-old-newlines
delete-semicolons
+ extended-syntax
fuzzy-line-length
hanging-side-comments
indent-block-comments
nostatic-side-comments
notabs
nowarning-output
+ character-encoding=none
outdent-labels
outdent-long-quotes
outdent-long-comments
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
+ 'utf8' => [qw(character-encoding=utf8)],
+ 'UTF8' => [qw(character-encoding=utf8)],
+
'swallow-optional-blank-lines' => [qw(kbl=0)],
'noswallow-optional-blank-lines' => [qw(kbl=1)],
'sob' => [qw(kbl=0)],
'sct' => [qw(scp schb scsb)],
'stack-closing-tokens' => => [qw(scp schb scsb)],
'nsct' => [qw(nscp nschb nscsb)],
- 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
+ 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
'sac' => [qw(sot sct)],
'nsac' => [qw(nsot nsct)],
use Getopt::Long;
+ # Save any current Getopt::Long configuration
+ # and set to Getopt::Long defaults. Use eval to avoid
+ # breaking old versions of Perl without these routines.
+ # Previous configuration is reset at the exit of this routine.
+ my $glc;
+ eval { $glc = Getopt::Long::Configure() };
+ unless ($@) {
+ eval { Getopt::Long::ConfigDefaults() };
+ }
+ else { $glc = undef }
+
my (
$roption_string, $rdefaults, $rexpansion,
$roption_category, $roption_range
unless ( $dump_options_type eq 'perltidyrc' ) {
for $i (@$rdefaults) { push @ARGV, "--" . $i }
}
-
- # Patch to save users Getopt::Long configuration
- # and set to Getopt::Long defaults. Use eval to avoid
- # breaking old versions of Perl without these routines.
- my $glc;
- eval { $glc = Getopt::Long::Configure() };
- unless ($@) {
- eval { Getopt::Long::ConfigDefaults() };
- }
- else { $glc = undef }
-
if ( !GetOptions( \%Opts, @$roption_string ) ) {
Die "Programming Bug: error in setting default options";
}
-
- # Patch to put the previous Getopt::Long configuration back
- eval { Getopt::Long::Configure($glc) } if defined $glc;
}
my $word;
my @raw_options = ();
my $config_file = "";
my $saw_ignore_profile = 0;
- my $saw_extrude = 0;
- my $saw_pbp = 0;
my $saw_dump_profile = 0;
my $i;
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
Die "usage: -pro=filename or --profile=filename, no spaces\n";
}
- elsif ( $i =~ /^-extrude$/ ) {
- $saw_extrude = 1;
- }
- elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) {
- $saw_pbp = 1;
- }
elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
usage();
Exit 0;
if ($fh_config) {
- my ( $rconfig_list, $death_message, $_saw_pbp ) =
+ my ( $rconfig_list, $death_message ) =
read_config_file( $fh_config, $config_file, $rexpansion );
Die $death_message if ($death_message);
- $saw_pbp ||= $_saw_pbp;
# process any .perltidyrc parameters right now so we can
# localize errors
Die "Error on command line; for help try 'perltidy -h'\n";
}
- return (
- \%Opts, $config_file, \@raw_options,
- $saw_extrude, $saw_pbp, $roption_string,
- $rexpansion, $roption_category, $roption_range
- );
-} # end of process_command_line
+ # reset Getopt::Long configuration back to its previous value
+ eval { Getopt::Long::Configure($glc) } if defined $glc;
+
+ return ( \%Opts, $config_file, \@raw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range );
+} # end of _process_command_line
sub check_options {
$rOpts->{'iterations'} = 1;
}
- # check for reasonable number of blank lines and fix to avoid problems
- if ( $rOpts->{'blank-lines-before-subs'} ) {
- if ( $rOpts->{'blank-lines-before-subs'} < 0 ) {
- $rOpts->{'blank-lines-before-subs'} = 0;
- Warn "negative value of -blbs, setting 0\n";
- }
- if ( $rOpts->{'blank-lines-before-subs'} > 100 ) {
- Warn "unreasonably large value of -blbs, reducing\n";
- $rOpts->{'blank-lines-before-subs'} = 100;
- }
- }
- if ( $rOpts->{'blank-lines-before-packages'} ) {
- if ( $rOpts->{'blank-lines-before-packages'} < 0 ) {
- Warn "negative value of -blbp, setting 0\n";
- $rOpts->{'blank-lines-before-packages'} = 0;
- }
- if ( $rOpts->{'blank-lines-before-packages'} > 100 ) {
- Warn "unreasonably large value of -blbp, reducing\n";
- $rOpts->{'blank-lines-before-packages'} = 100;
+ my $check_blank_count = sub {
+ my ( $key, $abbrev ) = @_;
+ if ( $rOpts->{$key} ) {
+ if ( $rOpts->{$key} < 0 ) {
+ $rOpts->{$key} = 0;
+ Warn "negative value of $abbrev, setting 0\n";
+ }
+ if ( $rOpts->{$key} > 100 ) {
+ Warn "unreasonably large value of $abbrev, reducing\n";
+ $rOpts->{$key} = 100;
+ }
}
- }
-
- # see if user set a non-negative logfile-gap
- if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
+ };
- # a zero gap will be taken as a 1
- if ( $rOpts->{'logfile-gap'} == 0 ) {
- $rOpts->{'logfile-gap'} = 1;
- }
+ # check for reasonable number of blank lines and fix to avoid problems
+ $check_blank_count->( 'blank-lines-before-subs', '-blbs' );
+ $check_blank_count->( 'blank-lines-before-packages', '-blbp' );
+ $check_blank_count->( 'blank-lines-after-block-opening', '-blao' );
+ $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' );
- # setting a non-negative logfile gap causes logfile to be saved
+ # setting a non-negative logfile gap causes logfile to be saved
+ if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
$rOpts->{'logfile'} = 1;
}
- # not setting logfile gap, or setting it negative, causes default of 50
- else {
- $rOpts->{'logfile-gap'} = 50;
- }
-
# set short-cut flag when only indentation is to be done.
# Note that the user may or may not have already set the
# indent-only flag.
my ( $fh, $config_file, $rexpansion ) = @_;
my @config_list = ();
- my $saw_pbp;
# file is bad if non-empty $death_message is returned
my $death_message = "";
my $name = undef;
my $line_no;
+ my $opening_brace_line;
while ( my $line = $fh->getline() ) {
$line_no++;
chomp $line;
$line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
next unless $line;
- # look for something of the general form
- # newname { body }
- # or just
- # body
-
my $body = $line;
- my ($newname);
- if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
- ( $newname, $body ) = ( $2, $3, );
- }
- if ($body) {
+ my $newname;
- if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) {
- $saw_pbp = 1;
- }
+ # Look for complete or partial abbreviation definition of the form
+ # name { body } or name { or name { body
+ # See rules in perltidy's perldoc page
+ # Section: Other Controls - Creating a new abbreviation
+ if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
+ my $oldname = $name;
+ ( $name, $body ) = ( $2, $3 );
+
+ # Cannot start new abbreviation unless old abbreviation is complete
+ last if ($opening_brace_line);
+
+ $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
# handle a new alias definition
- if ($newname) {
- if ($name) {
- $death_message =
-"No '}' seen after $name and before $newname in config file $config_file line $.\n";
- last;
- }
- $name = $newname;
+ if ( ${$rexpansion}{$name} ) {
+ local $" = ')(';
+ my @names = sort keys %$rexpansion;
+ $death_message =
+ "Here is a list of all installed aliases\n(@names)\n"
+ . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
+ last;
+ }
+ ${$rexpansion}{$name} = [];
+ }
- if ( ${$rexpansion}{$name} ) {
- local $" = ')(';
- my @names = sort keys %$rexpansion;
- $death_message =
- "Here is a list of all installed aliases\n(@names)\n"
- . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
- last;
- }
- ${$rexpansion}{$name} = [];
+ # leading opening braces not allowed
+ elsif ( $line =~ /^{/ ) {
+ $opening_brace_line = undef;
+ $death_message =
+ "Unexpected '{' at line $line_no in config file '$config_file'\n";
+ last;
+ }
+
+ # Look for abbreviation closing: body } or }
+ elsif ( $line =~ /^(.*)?\}$/ ) {
+ $body = $1;
+ if ($opening_brace_line) {
+ $opening_brace_line = undef;
+ }
+ else {
+ $death_message =
+"Unexpected '}' at line $line_no in config file '$config_file'\n";
+ last;
}
+ }
- # now do the body
- if ($body) {
+ # Now store any parameters
+ if ($body) {
- my ( $rbody_parts, $msg ) = parse_args($body);
- if ($msg) {
- $death_message = <<EOM;
+ my ( $rbody_parts, $msg ) = parse_args($body);
+ if ($msg) {
+ $death_message = <<EOM;
Error reading file '$config_file' at line number $line_no.
$msg
Please fix this line or use -npro to avoid reading this file
EOM
- last;
- }
+ last;
+ }
- if ($name) {
+ if ($name) {
- # remove leading dashes if this is an alias
- foreach (@$rbody_parts) { s/^\-+//; }
- push @{ ${$rexpansion}{$name} }, @$rbody_parts;
- }
- else {
- push( @config_list, @$rbody_parts );
- }
+ # remove leading dashes if this is an alias
+ foreach (@$rbody_parts) { s/^\-+//; }
+ push @{ ${$rexpansion}{$name} }, @$rbody_parts;
+ }
+ else {
+ push( @config_list, @$rbody_parts );
}
}
}
+
+ if ($opening_brace_line) {
+ $death_message =
+"Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
+ }
eval { $fh->close() };
- return ( \@config_list, $death_message, $saw_pbp );
+ return ( \@config_list, $death_message );
}
sub strip_comment {
print STDOUT <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2013, Steve Hancock
+Copyright 2000-2017, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
# now wish for luck...
my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
- unlink $stream_filename if ($is_tmpfile);
+ if ($is_tmpfile) {
+ unlink $stream_filename
+ or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
+ }
return $stream_filename, $msg;
}
unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
$output_file_open = 1;
if ($binmode) {
- if ( ref($fh) eq 'IO::File' ) {
- binmode $fh;
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ if ( ref($fh) eq 'IO::File' ) {
+ $fh->binmode(":encoding(UTF-8)");
+ }
+ elsif ( $output_file eq '-' ) {
+ binmode STDOUT, ":encoding(UTF-8)";
+ }
}
- if ( $output_file eq '-' ) { binmode STDOUT }
+ elsif ( $output_file eq '-' ) { binmode STDOUT }
}
}
# remove any old error output file if we might write a new one
unless ( $fh_warnings || ref($warning_file) ) {
- if ( -e $warning_file ) { unlink($warning_file) }
+ if ( -e $warning_file ) {
+ unlink($warning_file)
+ or Perl::Tidy::Die(
+ "couldn't unlink warning file $warning_file: $!\n");
+ }
}
+ my $logfile_gap =
+ defined( $rOpts->{'logfile-gap'} )
+ ? $rOpts->{'logfile-gap'}
+ : 50;
+ if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
+
bless {
_log_file => $log_file,
+ _logfile_gap => $logfile_gap,
_rOpts => $rOpts,
_fh_warnings => $fh_warnings,
_last_input_line_written => 0,
if (
(
( $input_line_number - $last_input_line_written ) >=
- $rOpts->{'logfile-gap'}
+ $self->{_logfile_gap}
)
|| ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
)
Perl::Tidy::Warn "## Please see file $filename\n"
unless ref($warning_file);
$self->{_fh_warnings} = $fh_warnings;
+ $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
}
if ( $warning_count < WARNING_LIMIT ) {
if ( $self->get_use_prefix() > 0 ) {
my $input_line_number =
Perl::Tidy::Tokenizer::get_input_line_number();
+ if ( !defined($input_line_number) ) { $input_line_number = -1 }
$fh_warnings->print("$input_line_number:\t@_");
$self->write_logfile_entry("WARNING: @_");
}
}
if ( $self->{_saw_brace_error}
- && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
+ && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
{
$self->warning("To save a full .LOG file rerun with -g\n");
}
}
# Pod::Html requires a real temporary filename
- # If we are making a frame, we have a name available
- # Otherwise, we have to fine one
- my $tmpfile;
- if ( $rOpts->{'frames'} ) {
- $tmpfile = $self->{_toc_filename};
- }
- else {
- $tmpfile = Perl::Tidy::make_temporary_filename();
- }
- my $fh_tmp = IO::File->new( $tmpfile, 'w' );
+ my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
unless ($fh_tmp) {
Perl::Tidy::Warn
"unable to open temporary file $tmpfile; cannot use pod2html\n";
# note that we have to unlink tmpfile before making frames
# because the tmpfile may be one of the names used for frames
- unlink $tmpfile if -e $tmpfile;
+ if ( -e $tmpfile ) {
+ unless ( unlink($tmpfile) ) {
+ Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
+ $success_flag = 0;
+ }
+ }
+
if ( $success_flag && $rOpts->{'frames'} ) {
$self->make_frame( \@toc );
}
$closing_side_comment_prefix_pattern
$closing_side_comment_list_pattern
+ $blank_lines_after_opening_block_pattern
+ $blank_lines_before_closing_block_pattern
+
$last_nonblank_token
$last_nonblank_type
$last_last_nonblank_token
%is_assignment
%is_chain_operator
%is_if_unless_and_or_last_next_redo_return
+ %ok_to_add_semicolon_for_block_type
@has_broken_sublist
@dont_align
%is_opening_type
%is_closing_token
%is_opening_token
+
+ $SUB_PATTERN
+ $ASUB_PATTERN
};
BEGIN {
unless while until for foreach given when default);
@is_block_without_semicolon{@_} = (1) x scalar(@_);
+ # We will allow semicolons to be added within these block types
+ # as well as sub and package blocks.
+ # NOTES:
+ # 1. Note that these keywords are omitted:
+ # switch case given when default sort map grep
+ # 2. It is also ok to add for sub and package blocks and a labeled block
+ # 3. But not okay for other perltidy types including:
+ # { } ; G t
+ # 4. Test files: blktype.t, blktype1.t, semicolon.t
+ @_ =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+ unless do while until eval for foreach );
+ @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_);
+
# 'L' is token for opening { at hash key
@_ = qw" L { ( [ ";
@is_opening_type{@_} = (1) x scalar(@_);
@_ = qw" } ) ] ";
@is_closing_token{@_} = (1) x scalar(@_);
+
+ # Patterns for standardizing matches to block types for regular subs and
+ # anonymous subs. Examples
+ # 'sub process' is a named sub
+ # 'sub ::m' is a named sub
+ # 'sub' is an anonymous sub
+ # 'sub:' is a label, not a sub
+ # 'substr' is a keyword
+ $SUB_PATTERN = '^sub\s+(::|\w)';
+ $ASUB_PATTERN = '^sub$';
}
# whitespace codes
make_bli_pattern();
make_block_brace_vertical_tightness_pattern();
+ make_blank_line_pattern();
if ( $rOpts->{'line-up-parentheses'} ) {
# default keywords for which space is introduced before an opening paren
# (at present, including them messes up vertical alignment)
@_ = qw(my local our and or err eq ne if else elsif until
- unless while for foreach return switch case given when);
+ unless while for foreach return switch case given when catch);
@space_after_keyword{@_} = (1) x scalar(@_);
# first remove any or all of these if desired
$rOpts->{'long-block-line-count'} = 1000000;
}
+ my $enc = $rOpts->{'character-encoding'};
+ if ( $enc && $enc !~ /^(none|utf8)$/i ) {
+ Perl::Tidy::Die <<EOM;
+Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
+EOM
+ }
+
my $ole = $rOpts->{'output-line-ending'};
if ($ole) {
my %endings = (
mac => "\015",
unix => "\012",
);
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- Perl::Tidy::Die <<EOM;
+
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
+
+ if ( defined( $endings_inverted{$ole} ) ) {
+
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Perl::Tidy::Die <<EOM;
Unrecognized line ending '$ole'; expecting one of: $str
EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
- $rOpts->{'preserve-line-endings'} = undef;
+ }
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
+ $rOpts->{'preserve-line-endings'} = undef;
+ }
}
}
}
}
+sub make_blank_line_pattern {
+
+ $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
+ my $key = 'blank-lines-before-closing-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_before_closing_block_pattern =
+ make_block_pattern( '-blbcl', $rOpts->{$key} );
+ }
+
+ $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
+ $key = 'blank-lines-after-opening-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_after_opening_block_pattern =
+ make_block_pattern( '-blaol', $rOpts->{$key} );
+ }
+}
+
sub make_block_pattern {
# given a string of block-type keywords, return a regex to match them
# input string: "if else elsif unless while for foreach do : sub";
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ # Minor Update:
+ #
+ # To distinguish between anonymous subs and named subs, use 'sub' to
+ # indicate a named sub, and 'asub' to indicate an anonymous sub
+
my ( $abbrev, $string ) = @_;
my @list = split_words($string);
my @words = ();
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
+ elsif ( $i eq 'asub' ) {
+ }
elsif ( $i eq ';' ) {
push @words, ';';
}
}
}
my $pattern = '(' . join( '|', @words ) . ')$';
+ my $sub_patterns = "";
if ( $seen{'sub'} ) {
- $pattern = '(' . $pattern . '|sub)';
+ $sub_patterns .= '|' . $SUB_PATTERN;
+ }
+ if ( $seen{'asub'} ) {
+ $sub_patterns .= '|' . $ASUB_PATTERN;
+ }
+ if ($sub_patterns) {
+ $pattern = '(' . $pattern . $sub_patterns . ')';
}
$pattern = '^' . $pattern;
return $pattern;
# but watch out for this: [ [ ] (misc.t)
&& $last_token ne $token
+
+ # double diamond is usually spaced
+ && $token ne '<<>>'
+
)
{
# *VERSION = \'1.01';
# ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
- # it unless -npvl is used
+ # it unless -npvl is used.
+
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
my $is_VERSION_statement = 0;
-
- if (
- !$saw_VERSION_in_this_file
- && $input_line =~ /VERSION/ # quick check to reject most lines
- && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- )
+ if ( !$saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
{
$saw_VERSION_in_this_file = 1;
$is_VERSION_statement = 1;
# qw lines will still go out at the end of this routine.
if ( $rOpts->{'indent-only'} ) {
flush();
- trim($input_line);
+ my $line = $input_line;
+
+ # delete side comments if requested with -io, but
+ # we will not allow deleting of closing side comments with -io
+ # because the coding would be more complex
+ if ( $rOpts->{'delete-side-comments'}
+ && $rtoken_type->[$jmax] eq '#' )
+ {
+ $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ];
+ }
+ trim($line);
extract_token(0);
- $token = $input_line;
+ $token = $line;
$type = 'q';
$block_type = "";
$container_type = "";
}
# This is a good place to kill incomplete one-line blocks
- if ( ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $types_to_go[$max_index_to_go] eq ';' )
- && ( $$rtokens[0] ne '}' ) )
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $max_index_to_go >= 0 )
+ && ( $types_to_go[$max_index_to_go] eq ';' )
+ && ( $$rtokens[0] ne '}' )
+ )
+
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $max_index_to_go >= 0
+ && $types_to_go[$max_index_to_go] eq ',' )
+ )
{
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
destroy_one_line_block();
output_line_to_go();
}
$type = $type_save;
}
- if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
+ if ( $token =~ /$SUB_PATTERN/ ) { $token =~ s/\s+/ /g }
# trim identifiers of trailing blanks which can occur
# under some unusual circumstances, such as if the
my $want_break =
# use -bl flag if not a sub block of any type
- $block_type !~ /^sub/
+ #$block_type !~ /^sub/
+ $block_type !~ /^sub\b/
? $rOpts->{'opening-brace-on-new-line'}
# use -sbl flag for a named sub block
- : $block_type !~ /^sub\W*$/
+ : $block_type !~ /$ASUB_PATTERN/
? $rOpts->{'opening-sub-brace-on-new-line'}
# use -asbl flag for an anonymous sub block
# and we don't have one
&& ( $last_nonblank_type ne ';' )
- # patch until some block type issues are fixed:
- # Do not add semi-colon for block types '{',
- # '}', and ';' because we cannot be sure yet
- # that this is a block and not an anonymous
- # hash (blktype.t, blktype1.t)
- && ( $block_type !~ /^[\{\};]$/ )
-
- # patch: and do not add semi-colons for recently
- # added block types (see tmp/semicolon.t)
- && ( $block_type !~
- /^(switch|case|given|when|default)$/ )
-
- # it seems best not to add semicolons in these
- # special block types: sort|map|grep
- && ( !$is_sort_map_grep{$block_type} )
-
# and we are allowed to do so.
&& $rOpts->{'add-semicolons'}
+
+ # and we are allowed to for this block type
+ && ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ )
+
)
{
# But make a line break if the curly ends a
# significant block:
if (
- $is_block_without_semicolon{$block_type}
+ (
+ $is_block_without_semicolon{$block_type}
+
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
+ )
# if needless semicolon follows we handle it later
&& $next_nonblank_token ne ';'
}
# anonymous sub
- elsif ( $block_type =~ /^sub\W*$/ ) {
+ elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
if ($is_one_line_block) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
- || $last_nonblank_block_type =~ /^sub\s+\w/
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
|| $last_nonblank_block_type =~ /^\w+:$/ )
)
|| $last_nonblank_type eq ';'
);
}
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
+ }
+ }
+ }
+
if ($want_blank) {
# future: send blank line down normal path to VerticalAligner
$do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
}
send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
+
+ # Insert any requested blank lines after an opening brace. We have to
+ # skip back before any side comment to find the terminal token
+ my $iterm;
+ for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
+ next if $types_to_go[$iterm] eq '#';
+ next if $types_to_go[$iterm] eq 'b';
+ last;
+ }
+
+ # write requested number of blank lines after an opening block brace
+ if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
+ if ( $rOpts->{'blank-lines-after-opening-block'}
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
+ Perl::Tidy::VerticalAligner::flush();
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
+ }
}
+
prepare_for_new_input_lines();
# output any new -cscw block comment
$i_start = $max_index_to_go;
}
+ # the previous nonblank token should start these block types
+ elsif (( $last_last_nonblank_token_to_go eq $block_type )
+ || ( $block_type =~ /^sub\b/ )
+ || $block_type =~ /\(\)/ )
+ {
+ $i_start = $last_last_nonblank_index_to_go;
+
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intack, and cause the parenthesized
+ # expression to break open. That looks bad. However, actually
+ # searching for the opening paren is slow and tedius.
+ # The actual keyword is often at the start of a line, but might not be.
+ # For example, we might have an anonymous sub with signature list
+ # following a =>. It is safe to mark the start anywhere before the
+ # opening paren, so we just go back to the prevoious break (or start of
+ # the line) if that is before the opening paren. The minor downside is
+ # that we may very occasionally break open a block unnecessarily.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $level ) { return 0 }
+ }
+ }
+
elsif ( $last_last_nonblank_token_to_go eq ')' ) {
# For something like "if (xxx) {", the keyword "if" will be
$i_start++;
}
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ $stripped_block_type =~ s/\(\)$//;
+
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
return 0;
}
}
- # the previous nonblank token should start these block types
- elsif (( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub/ ) )
- {
- $i_start = $last_last_nonblank_index_to_go;
- }
-
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
sub want_blank_line {
flush();
- $file_writer_object->want_blank_line();
+ $file_writer_object->want_blank_line() unless $in_format_skipping_section;
}
sub write_unindented_line {
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
- @_ = qw(if elsif else unless while until for foreach case when);
+ @_ = qw(if elsif else unless while until for foreach case when catch);
@is_if_elsif_else_unless_while_until_for_foreach{@_} =
(1) x scalar(@_);
}
# remove sub names to allow one-line sub braces to align
# regardless of name
- if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+ #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+ if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
# allow all control-type blocks to align
if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
{
$adjust_indentation = 1;
}
+
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_SPACES($indentation) >
+ get_SPACES($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
}
# YVES patch 1 of 2:
$rindentation_list );
my $indentation = $leading_spaces_to_go[$ibeg];
if ( defined($opening_indentation)
- && $indentation > $opening_indentation )
+ && get_SPACES($indentation) >
+ get_SPACES($opening_indentation) )
{
$adjust_indentation = 1;
}
# 3 - ignore =>
# 4 - always open up if vt=0
# 5 - stable: even for one line blocks if vt=0
- if (
- !$is_long_term
- ##BUBBA: TYPO && $tokens_to_go[$i_opening] =~ /^[\(\{\]L]$/
+ if ( !$is_long_term
&& $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
&& $index_before_arrow[ $depth + 1 ] > 0
&& !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
# don't break pointer calls, such as the following:
# File::Spec->curdir => 1,
# (This is tokenized as adjacent 'w' tokens)
- if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+ ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+
+ # And don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
set_forced_breakpoint($ibreak);
}
} ## end if ( $types_to_go[$ibreak...])
@is_mult_div{@_} = (1) x scalar(@_);
}
+ sub DUMP_BREAKPOINTS {
+
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $$ri_beg[$n];
+ my $iend = $$ri_end[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
+ }
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ }
+
sub recombine_breakpoints {
# sub set_continuation_breaks is very liberal in setting line breaks
&& ( $iend_2 - $ibeg_2 <= 7 )
)
);
-##BUBBA: RT #81854
+##X: RT #81854
$forced_breakpoint_to_go[$iend_1] = 0
unless $old_breakpoint_to_go[$iend_1];
}
# }
# };
#
- || ( $line_count
+ || (
+ $line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
- && !$rOpts->{'opening-brace-always-on-right'} )
+
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceeding list is long and broken
+ && !(
+ $next_nonblank_block_type =~ /^sub\b/
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
+
+ && !$rOpts->{'opening-brace-always-on-right'}
+ )
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
# Start storing lines when we see a line with multiple stacked opening
# tokens.
- if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) {
+ # patch for RT #94354, requested by Colin Williams
+ if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
+ {
+
+ # This test is efficient but a little subtle: The first test says
+ # that we have multiple sequence numbers and hence multiple opening
+ # or closing tokens in this line. The second part of the test
+ # rejects stacked closing and ternary tokens. So if we get here
+ # then we should have stacked unbalanced opening tokens.
+
+ # Here is a complex example:
+
+ # Foo($Bar[0], { # (side comment)
+ # baz => 1,
+ # });
+
+ # The first line has sequence 6::4. It does not begin with
+ # a closing token or ternary, so it passes the test and must be
+ # stacked opening tokens.
+
+ # The last line has sequence 4:6 but is a stack of closing tokens,
+ # so it gets rejected.
+
+ # Note that the sequence number of an opening token for a qw quote
+ # is a negative number and will be rejected.
+ # For example, for the following line:
+ # skip_symbols([qw(
+ # $seqno_string='10:5:-1'. It would be okay to accept it but
+ # I decided not to do this after testing.
+
$valign_buffer_filling = $seqno_string;
+
}
}
}
%is_digraph
%is_file_test_operator
%is_trigraph
+ %is_tetragraph
%is_valid_token_type
%is_keyword
%is_code_block_token
look_for_autoloader => 1,
look_for_selfloader => 1,
starting_line_number => 1,
+ extended_syntax => 0,
);
my %args = ( %defaults, @_ );
_nearly_matched_here_target_at => undef,
_line_text => "",
_rlower_case_labels_at => undef,
+ _extended_syntax => $args{extended_syntax},
};
prepare_for_a_new_file();
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
- $max_token_index, $expecting );
+ $max_token_index, $expecting, $paren_type[$paren_depth] );
}
sub scan_id {
# keyword ( .... ) { BLOCK }
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
my %is_blocktype_with_paren;
- @_ = qw(if elsif unless while until for foreach switch case given when);
+ @_ =
+ qw(if elsif unless while until for foreach switch case given when catch);
@is_blocktype_with_paren{@_} = (1) x scalar(@_);
# ------------------------------------------------------------
$container_type = $want_paren;
$want_paren = "";
}
+ elsif ( $statement_type =~ /^sub\b/ ) {
+ $container_type = $statement_type;
+ }
else {
$container_type = $last_nonblank_token;
$container_type = $paren_type[$paren_depth];
+ # restore statement type as 'sub' at closing paren of a signature
+ # so that a subsequent ':' is identified as an attribute
+ if ( $container_type =~ /^sub\b/ ) {
+ $statement_type = $container_type;
+ }
+
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
my $num_sc = $paren_semicolon_count[$paren_depth];
';' => sub {
$context = UNKNOWN_CONTEXT;
$statement_type = '';
+ $want_paren = "";
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } )
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[msixpodualgc]';
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else { # not a pattern; check for a /= token
# check for syntax error here;
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
- my $list = join( ' ', sort keys %is_blocktype_with_paren );
- warning(
- "syntax error at ') {', didn't see one of: $list\n");
+ if ( $tokenizer_self->{'_extended_syntax'} ) {
+
+ # we append a trailing () to mark this as an unknown
+ # block type. This allows perltidy to format some
+ # common extensions of perl syntax.
+ # This is used by sub code_block_type
+ $last_nonblank_token .= '()';
+ }
+ else {
+ my $list =
+ join( ' ', sort keys %is_blocktype_with_paren );
+ warning(
+"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
+ );
+ }
}
}
$block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
$max_token_index );
- # remember a preceding smartmatch operator
- ## SMARTMATCH
- ##if ( $last_nonblank_type eq '~~' ) {
- ## $block_type = $last_nonblank_type;
- ##}
-
# patch to promote bareword type to function taking block
if ( $block_type
&& $last_nonblank_type eq 'w'
}
}
}
+
$brace_type[ ++$brace_depth ] = $block_type;
$brace_package[$brace_depth] = $current_package;
$brace_structural_type[$brace_depth] = $type;
# propagate type information for 'do' and 'eval' blocks, and also
# for smartmatch operator. This is necessary to enable us to know
# if an operator or term is expected next.
- ## SMARTMATCH
- ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) {
if ( $is_block_operator{$block_type} ) {
$tok = $block_type;
}
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[msixpodualgc]';
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else {
( $type_sequence, $indent_flag ) =
# ATTRS: check for a ':' which introduces an attribute list
# (this might eventually get its own token type)
- elsif ( $statement_type =~ /^sub/ ) {
+ elsif ( $statement_type =~ /^sub\b/ ) {
$type = 'A';
$in_attribute_list = 1;
}
'__DATA__' => '_in_data',
);
- # ref: camel 3 p 147,
+ # original ref: camel 3 p 147,
# but perl may accept undocumented flags
# perl 5.10 adds 'p' (preserve)
- # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these:
- # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
- # s/PATTERN/REPLACEMENT/msixpodualgcer
+ # Perl version 5.22 added 'n'
+ # From http://perldoc.perl.org/perlop.html we have
+ # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
+ # s/PATTERN/REPLACEMENT/msixpodualngcer
# y/SEARCHLIST/REPLACEMENTLIST/cdsr
# tr/SEARCHLIST/REPLACEMENTLIST/cdsr
- # qr/STRING/msixpodual
+ # qr/STRING/msixpodualn
my %quote_modifiers = (
- 's' => '[msixpodualgcer]',
+ 's' => '[msixpodualngcer]',
'y' => '[cdsr]',
'tr' => '[cdsr]',
- 'm' => '[msixpodualgc]',
- 'qr' => '[msixpodual]',
+ 'm' => '[msixpodualngc]',
+ 'qr' => '[msixpodualn]',
'q' => "",
'qq' => "",
'qw' => "",
$input_line =~ s/^\s*//; # trim left end
}
+ # Set a flag to indicate if we might be at an __END__ or __DATA__ line
+ # This will be used below to avoid quoting a bare word followed by
+ # a fat comma.
+ my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
+
# update the copy of the line for use in error messages
# This must be exactly what we give the pre_tokenizer
$tokenizer_self->{_line_text} = $input_line;
# '//' must be defined_or operator if an operator is expected.
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
# could be migrated here for clarity
- if ( $test_tok eq '//' ) {
+
+ # Patch for RT#102371, misparsing a // in the following snippet:
+ # state $b //= ccc();
+ # The solution is to always accept the digraph (or trigraph) after
+ # token type 'Z' (possible file handle). The reason is that
+ # sub operator_expected gives TERM expected here, which is
+ # wrong in this case.
+ if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
my $next_type = $$rtokens[ $i + 1 ];
my $expecting =
operator_expected( $prev_type, $tok, $next_type );
- $combine_ok = 0 unless ( $expecting == OPERATOR );
+
+ # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
+ $combine_ok = 0 if ( $expecting == TERM );
}
}
$tok = $test_tok;
$i++;
}
+
+ # The only current tetragraph is the double diamond operator
+ # and its first three characters are not a trigraph, so
+ # we do can do a special test for it
+ elsif ( $test_tok eq '<<>' ) {
+ $test_tok .= $$rtokens[ $i + 2 ];
+ if ( $is_tetragraph{$test_tok} ) {
+ $tok = $test_tok;
+ $i += 2;
+ }
+ }
}
$type = $tok;
}
# quote a word followed by => operator
- if ( $next_nonblank_token eq '=' ) {
+ # unless the word __END__ or __DATA__ and the only word on
+ # the line.
+ if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
if ( $is_constant{$current_package}{$tok} ) {
# various quote operators
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
+##NICOL PATCH
if ( $expecting == OPERATOR ) {
- # patch for paren-less for/foreach glitch, part 1
- # perl will accept this construct as valid:
+ # Be careful not to call an error for a qw quote
+ # where a parenthesized list is allowed. For example,
+ # it could also be a for/foreach construct such as
#
# foreach my $key qw\Uno Due Tres Quadro\ {
# print "Set $key\n";
# }
- unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
+ #
+
+ # Or it could be a function call.
+ # NOTE: Braces in something like &{ xxx } are not
+ # marked as a block, we might have a method call.
+ # &method(...), $method->(..), &{method}(...),
+ # $ref[2](list) is ok & short for $ref[2]->(list)
+ #
+ # See notes in 'sub code_block_type' and
+ # 'sub is_non_structural_brace'
+
+ unless (
+ $tok eq 'qw'
+ && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
+ || $is_for_foreach{$want_paren} )
+ )
{
error_if_expecting_OPERATOR();
}
elsif ( $tok eq 'else' ) {
# patched for SWITCH/CASE
- if ( $last_nonblank_token ne ';'
+ if (
+ $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
- /^(if|elsif|unless|case|when)$/ )
+ /^(if|elsif|unless|case|when)$/
+
+ # patch to avoid an unwanted error message for
+ # the case of a parenless 'case' (RT 105484):
+ # switch ( 1 ) { case x { 2 } else { } }
+ && $statement_type !~
+ /^(if|elsif|unless|case|when)$/
+ )
{
warning(
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
$in_statement_continuation = 0;
}
- # otherwise, the next token after a ',' starts a new term
- elsif ( $type eq ',' ) {
+ # otherwise, the token after a ',' starts a new term
+
+ # Patch FOR RT#99961; no continuation after a ';'
+ # This is needed because perltidy currently marks
+ # a block preceded by a type character like % or @
+ # as a non block, to simplify formatting. But these
+ # are actually blocks and can have semicolons.
+ # See code_block_type() and is_non_structural_brace().
+ elsif ( $type eq ',' || $type eq ';' ) {
$in_statement_continuation = 0;
}
{
$op_expected = OPERATOR;
}
+
+ # Patch for RT #116344: misparse a ternary operator after an anonymous
+ # hash, like this:
+ # return ref {} ? 1 : 0;
+ # The right brace should really be marked type 'R' in this case, and
+ # it is safest to return an UNKNOWN here. Expecting a TERM will
+ # cause the '?' to always be interpreted as a pattern delimiter
+ # rather than introducing a ternary operator.
+ elsif ( $tok eq '?' ) {
+ $op_expected = UNKNOWN;
+ }
else {
$op_expected = TERM;
}
}
}
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub is_non_structural_brace.
- # elsif ( $last_nonblank_type eq 't' ) {
- # return $last_nonblank_token;
- # }
+ ################################################################
+
+## elsif ( $last_nonblank_type eq 't' ) {
+## return $last_nonblank_token;
+## }
# brace after label:
elsif ( $last_nonblank_type eq 'J' ) {
$max_token_index );
}
+ # Patch for bug # RT #94338 reported by Daniel Trizen
+ # for-loop in a parenthesized block-map triggering an error message:
+ # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
+ # Check for a code block within a parenthesized function call
+ elsif ( $last_nonblank_token eq '(' ) {
+ my $paren_type = $paren_type[$paren_depth];
+ if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
+
+ # We will mark this as a code block but use type 't' instead
+ # of the name of the contining function. This will allow for
+ # correct parsing but will usually produce better formatting.
+ # Braces with block type 't' are not broken open automatically
+ # in the formatter as are other code block types, and this usually
+ # works best.
+ return 't'; # (Not $paren_type)
+ }
+ else {
+ return "";
+ }
+ }
+
+ # handle unknown syntax ') {'
+ # we previously appended a '()' to mark this case
+ elsif ( $last_nonblank_token =~ /\(\)$/ ) {
+ return $last_nonblank_token;
+ }
+
# anything else must be anonymous hash reference
else {
return "";
# USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
+
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# We are only going to look ahead one more (nonblank/comment) line.
# Strange formatting could cause a bad guess, but that's unlikely.
- my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
- my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ my @pre_types;
+ my @pre_tokens;
+
+ # Ignore the rest of this line if it is a side comment
+ if ( $next_nonblank_token ne '#' ) {
+ @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
+ @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ }
my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
# generous, and prevents
# put a sentinel token to simplify stopping the search
push @pre_types, '}';
+ push @pre_types, '}';
my $jbeg = 0;
$jbeg = 1 if $pre_types[0] eq 'b';
$j++;
}
elsif ( $pre_types[$j] eq 'w' ) {
- unless ( $is_keyword{ $pre_tokens[$j] } ) {
- $j++;
- }
+ $j++;
}
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
$j++;
$j++ if $pre_types[$j] eq 'b';
- # it's a hash ref if a comma or => follow next
- if ( $pre_types[$j] eq ','
- || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
+ # Patched for RT #95708
+ if (
+
+ # it is a comma which is not a pattern delimeter except for qw
+ (
+ $pre_types[$j] eq ','
+ && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+ )
+
+ # or a =>
+ || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
+ )
{
$code_block_type = "";
}
# return 0;
# }
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub code_block_type
- # if ($last_nonblank_type eq 't') {return 0}
+ ################################################################
+
+ ##if ($last_nonblank_type eq 't') {return 0}
# otherwise, it is non-structural if it is decorated
# by type information.
# $last_nonblank_type
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
- $expecting )
+ $expecting, $container_type )
= @_;
my $i_begin = $i;
my $type = '';
my $tok = $tok_begin;
my $message = "";
+ my $in_prototype_or_signature = $container_type =~ /^sub/;
+
# these flags will be used to help figure out the type:
my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
my $saw_type;
last;
}
}
+
+ # POSTDEFREF ->@ ->% ->& ->*
+ elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+ $identifier .= $tok;
+ }
elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
$saw_alpha = 1;
$id_scan_state = ':'; # now need ::
$id_scan_state = 'A';
$identifier .= $tok;
}
- elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
+
+ # $# and POSTDEFREF ->$#
+ elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
$identifier .= $tok; # keep same state, a $ could follow
}
elsif ( $tok eq '{' ) {
}
else { # something else
+ if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
+ $id_scan_state = '';
+ $i = $i_save;
+ $type = 'i'; # probably punctuation variable
+ last;
+ }
+
# check for various punctuation variables
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
}
+ # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
+ elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
+ $identifier .= $tok;
+ }
+
elsif ( $identifier eq '$#' ) {
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
my $pos_beg = $$rtoken_map[$i_beg];
pos($input_line) = $pos_beg;
- # sub NAME PROTO ATTRS
+ # Look for the sub NAME
if (
$input_line =~ m/\G\s*
((?:\w*(?:'|::))*) # package - something that ends in :: or '
(\w+) # NAME - required
- (\s*\([^){]*\))? # PROTO - something in parens
- (\s*:)? # ATTRS - leading : of attribute list
/gcx
)
{
$match = 1;
$subname = $2;
- $proto = $3;
- $attrs = $4;
$package = ( defined($1) && $1 ) ? $1 : $current_package;
$package =~ s/\'/::/g;
$type = 'i';
}
- # Look for prototype/attributes not preceded on this line by subname;
- # This might be an anonymous sub with attributes,
+ # Now look for PROTO ATTRS
+ # Look for prototype/attributes which are usually on the same
+ # line as the sub name but which might be on a separate line.
+ # For example, we might have an anonymous sub with attributes,
# or a prototype on a separate line from its sub name
- elsif (
- $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
+
+ # NOTE: We only want to parse PROTOTYPES here. If we see anything that
+ # does not look like a prototype, we assume it is a SIGNATURE and we
+ # will stop and let the the standard tokenizer handle it. In
+ # particular, we stop if we see any nested parens, braces, or commas.
+ my $saw_opening_paren = $input_line =~ /\G\s*\(/;
+ if (
+ $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
(\s*:)? # ATTRS leading ':'
/gcx
&& ( $1 || $2 )
)
{
- $match = 1;
$proto = $1;
$attrs = $2;
+ # If we also found the sub name on this call then append PROTO.
+ # This is not necessary but for compatability with previous
+ # versions when the -csc flag is used:
+ if ( $match && $proto ) {
+ $tok .= $proto;
+ }
+ $match ||= 1;
+
# Handle prototype on separate line from subname
if ($subname_saved) {
$package = $package_saved;
$in_attribute_list = 1;
}
- # We must convert back from character position
- # to pre_token index.
+ # Otherwise, if we found a match we must convert back from
+ # string position to the pre_token index for continued parsing.
else {
# I don't think an error flag can occur here ..but ?
}
$package_saved = "";
$subname_saved = "";
+
+ # See what's next...
if ( $next_nonblank_token eq '{' ) {
if ($subname) {
$statement_type = $tok;
}
- # see if PROTO follows on another line:
+ # if we stopped before an open paren ...
elsif ( $next_nonblank_token eq '(' ) {
- if ( $attrs || $proto ) {
- warning(
-"unexpected '(' after definition or declaration of sub '$subname'\n"
- );
- }
- else {
- $id_scan_state = 'sub'; # we must come back to get proto
- $statement_type = $tok;
- $package_saved = $package;
- $subname_saved = $subname;
+
+ # If we DID NOT see this paren above then it must be on the
+ # next line so we will set a flag to come back here and see if
+ # it is a PROTOTYPE
+
+ # Otherwise, we assume it is a SIGNATURE rather than a
+ # PROTOTYPE and let the normal tokenizer handle it as a list
+ if ( !$saw_opening_paren ) {
+ $id_scan_state = 'sub'; # we must come back to get proto
+ $package_saved = $package;
+ $subname_saved = $subname;
}
+ $statement_type = $tok;
}
elsif ($next_nonblank_token) { # EOF technically ok
warning(
my @digraphs = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x= ~~
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
);
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
- my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
+ my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=);
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
+ my @tetragraphs = qw( <<>> );
+ @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
+
# make a hash of all valid token types for self-checking the tokenizer
# (adding NEW_TOKENS : select a new character and add to this list)
my @valid_token_types = qw#
#;
push( @valid_token_types, @digraphs );
push( @valid_token_types, @trigraphs );
+ push( @valid_token_types, @tetragraphs );
push( @valid_token_types, ( '#', ',', 'CORE::' ) );
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
@is_indirect_object_taker{@_} = (1) x scalar(@_);
# These tokens may precede a code block
- # patched for SWITCH/CASE
+ # patched for SWITCH/CASE/CATCH. Actually these could be removed
+ # now and we could let the extended-syntax coding handle them
@_ =
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
- switch case given when);
+ switch case given when catch try finally);
@is_code_block_token{@_} = (1) x scalar(@_);
# I'll build the list of keywords incrementally
when
err
say
+
+ catch
);
# patched above for SWITCH/CASE given/when err say
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
f F pp mm Y p m U J G j >> << ^ t
+ ~. ^. |. &. ^.= |.= &.=
#;
push( @value_requestor_type, ',' )
; # (perl doesn't like a ',' in a qw block)
@is_keyword{@Keywords} = (1) x scalar(@Keywords);
}
1;
-__END__
-