#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2014 by Steve Hancock
+# Copyright (c) 2000-2016 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
############################################################
package Perl::Tidy;
-use 5.004; # need IO::File from 5.004 or later
-BEGIN { $^W = 1; } # turn on warnings
+# Actually should use a version later than about 5.8.5 to use
+# wide characters.
+use 5.004; # need IO::File from 5.004 or later
+use warnings;
use strict;
use Exporter;
use Carp;
@EXPORT
$missing_file_spec
$fh_stderr
+ $rOpts_character_encoding
};
@ISA = qw( Exporter );
@EXPORT = qw( &perltidy );
use Cwd;
+use Encode ();
use IO::File;
use File::Basename;
use File::Copy;
use File::Temp qw(tempfile);
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.74 2014/03/28 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+ ( $VERSION = q($Id: Tidy.pm,v 1.74 2016/03/02 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
}
sub streamhandle {
# skipped and we can just let it crash if there is no
# getline.
if ( $mode =~ /[rR]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+
+ # RT#97159; part 1 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+ if ( $ref->can('getline') ) {
$New = sub { $filename };
}
else {
# Accept an object with a print method for writing.
# See note above about IO::File
if ( $mode =~ /[wW]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+
+ # RT#97159; part 2 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+ if ( $ref->can('print') ) {
$New = sub { $filename };
}
else {
}
$fh = $New->( $filename, $mode )
or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
+
return $fh, ( $ref or $filename );
}
#---------------------------------------------------------------
# get command line options
#---------------------------------------------------------------
- my (
- $rOpts, $config_file, $rraw_options,
- $saw_extrude, $saw_pbp, $roption_string,
- $rexpansion, $roption_category, $roption_range
- )
+ my ( $rOpts, $config_file, $rraw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range )
= process_command_line(
$perltidyrc_stream, $is_Windows, $Windows_type,
$rpending_complaint, $dump_options_type,
);
+ my $saw_extrude = ( grep m/^-extrude$/, @$rraw_options ) ? 1 : 0;
+ my $saw_pbp =
+ ( grep m/^-(pbp|perl-best-practices)$/, @$rraw_options ) ? 1 : 0;
+
#---------------------------------------------------------------
# Handle requests to dump information
#---------------------------------------------------------------
user => '',
);
+ $rOpts_character_encoding = $rOpts->{'character-encoding'};
+
# be sure we have a valid output format
unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
my $formats = join ' ',
# Prefilters and postfilters: The prefilter is a code reference
# that will be applied to the source before tidying, and the
# postfilter is a code reference to the result before outputting.
- if ($prefilter) {
+ if (
+ $prefilter
+ || ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8' )
+ )
+ {
my $buf = '';
while ( my $line = $source_object->get_line() ) {
$buf .= $line;
}
- $buf = $prefilter->($buf);
+
+ $buf = $prefilter->($buf) if $prefilter;
+
+ if ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8'
+ && !utf8::is_utf8($buf) )
+ {
+ eval {
+ $buf = Encode::decode( 'UTF-8', $buf,
+ Encode::FB_CROAK | Encode::LEAVE_SRC );
+ };
+ if ($@) {
+ Warn
+"skipping file: $input_file: Unable to decode source as UTF-8\n";
+ next;
+ }
+ }
$source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
$rpending_logfile_message );
# Eventually all I/O may be done with binmode, but for now it is
# only done when a user requests a particular line separator
# through the -ple or -ole flags
- my $binmode = 0;
- if ( defined($line_separator) ) { $binmode = 1 }
- else { $line_separator = "\n" }
+ my $binmode = defined($line_separator)
+ || defined($rOpts_character_encoding);
+ $line_separator = "\n" unless defined($line_separator);
my ( $sink_object, $postfilter_buffer );
if ($postfilter) {
look_for_autoloader => $rOpts->{'look-for-autoloader'},
look_for_selfloader => $rOpts->{'look-for-selfloader'},
trim_qw => $rOpts->{'trim-qw'},
+ extended_syntax => $rOpts->{'extended-syntax'},
continuation_indentation =>
$rOpts->{'continuation-indentation'},
my ( $fh_stream, $fh_name ) =
Perl::Tidy::streamhandle( $stream, 'r' );
if ($fh_stream) {
- my ( $fout, $tmpnam ) = tempfile();
+ my ( $fout, $tmpnam ) = File::Temp::tempfile();
if ($fout) {
$fname = $tmpnam;
$is_tmpfile = 1;
$add_option->( 'standard-error-output', 'se', '!' );
$add_option->( 'standard-output', 'st', '!' );
$add_option->( 'warning-output', 'w', '!' );
+ $add_option->( 'character-encoding', 'enc', '=s' );
# options which are both toggle switches and values moved here
# to hide from tidyview (which does not show category 0 flags):
$add_option->( 'preserve-line-endings', 'ple', '!' );
$add_option->( 'tabs', 't', '!' );
$add_option->( 'default-tabsize', 'dt', '=i' );
+ $add_option->( 'extended-syntax', 'xs', '!' );
########################################
$category = 2; # Code indentation control
%option_range = (
'format' => [ 'tidy', 'html', 'user' ],
'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
+ 'character-encoding' => [ 'none', 'utf8' ],
'block-brace-tightness' => [ 0, 2 ],
'brace-tightness' => [ 0, 2 ],
continuation-indentation=2
delete-old-newlines
delete-semicolons
+ extended-syntax
fuzzy-line-length
hanging-side-comments
indent-block-comments
nostatic-side-comments
notabs
nowarning-output
+ character-encoding=none
outdent-labels
outdent-long-quotes
outdent-long-comments
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
+ 'utf8' => [qw(character-encoding=utf8)],
+ 'UTF8' => [qw(character-encoding=utf8)],
+
'swallow-optional-blank-lines' => [qw(kbl=0)],
'noswallow-optional-blank-lines' => [qw(kbl=1)],
'sob' => [qw(kbl=0)],
'sct' => [qw(scp schb scsb)],
'stack-closing-tokens' => => [qw(scp schb scsb)],
'nsct' => [qw(nscp nschb nscsb)],
- 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
+ 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
'sac' => [qw(sot sct)],
'nsac' => [qw(nsot nsct)],
my @raw_options = ();
my $config_file = "";
my $saw_ignore_profile = 0;
- my $saw_extrude = 0;
- my $saw_pbp = 0;
my $saw_dump_profile = 0;
my $i;
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
Die "usage: -pro=filename or --profile=filename, no spaces\n";
}
- elsif ( $i =~ /^-extrude$/ ) {
- $saw_extrude = 1;
- }
- elsif ( $i =~ /^-(pbp|perl-best-practices)$/ ) {
- $saw_pbp = 1;
- }
elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
usage();
Exit 0;
if ($fh_config) {
- my ( $rconfig_list, $death_message, $_saw_pbp ) =
+ my ( $rconfig_list, $death_message ) =
read_config_file( $fh_config, $config_file, $rexpansion );
Die $death_message if ($death_message);
- $saw_pbp ||= $_saw_pbp;
# process any .perltidyrc parameters right now so we can
# localize errors
Die "Error on command line; for help try 'perltidy -h'\n";
}
- return (
- \%Opts, $config_file, \@raw_options,
- $saw_extrude, $saw_pbp, $roption_string,
- $rexpansion, $roption_category, $roption_range
- );
-} # end of process_command_line
+ return ( \%Opts, $config_file, \@raw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range );
+} # end of _process_command_line
sub check_options {
my ( $fh, $config_file, $rexpansion ) = @_;
my @config_list = ();
- my $saw_pbp;
# file is bad if non-empty $death_message is returned
my $death_message = "";
my $name = undef;
my $line_no;
+ my $opening_brace_line;
while ( my $line = $fh->getline() ) {
$line_no++;
chomp $line;
$line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
next unless $line;
- # look for something of the general form
- # newname { body }
- # or just
- # body
-
my $body = $line;
- my ($newname);
- if ( $line =~ /^((\w+)\s*\{)(.*)\}$/ ) {
- ( $newname, $body ) = ( $2, $3, );
- }
- if ($body) {
+ my $newname;
- if ( !$saw_pbp && $body =~ /-(pbp|perl-best-practices)/ ) {
- $saw_pbp = 1;
- }
+ # Look for complete or partial abbreviation definition of the form
+ # name { body } or name { or name { body
+ # See rules in perltidy's perldoc page
+ # Section: Other Controls - Creating a new abbreviation
+ if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
+ my $oldname = $name;
+ ( $name, $body ) = ( $2, $3 );
+
+ # Cannot start new abbreviation unless old abbreviation is complete
+ last if ($opening_brace_line);
+
+ $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
# handle a new alias definition
- if ($newname) {
- if ($name) {
- $death_message =
-"No '}' seen after $name and before $newname in config file $config_file line $.\n";
- last;
- }
- $name = $newname;
+ if ( ${$rexpansion}{$name} ) {
+ local $" = ')(';
+ my @names = sort keys %$rexpansion;
+ $death_message =
+ "Here is a list of all installed aliases\n(@names)\n"
+ . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
+ last;
+ }
+ ${$rexpansion}{$name} = [];
+ }
- if ( ${$rexpansion}{$name} ) {
- local $" = ')(';
- my @names = sort keys %$rexpansion;
- $death_message =
- "Here is a list of all installed aliases\n(@names)\n"
- . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
- last;
- }
- ${$rexpansion}{$name} = [];
+ # leading opening braces not allowed
+ elsif ( $line =~ /^{/ ) {
+ $opening_brace_line = undef;
+ $death_message =
+ "Unexpected '{' at line $line_no in config file '$config_file'\n";
+ last;
+ }
+
+ # Look for abbreviation closing: body } or }
+ elsif ( $line =~ /^(.*)?\}$/ ) {
+ $body = $1;
+ if ($opening_brace_line) {
+ $opening_brace_line = undef;
}
+ else {
+ $death_message =
+"Unexpected '}' at line $line_no in config file '$config_file'\n";
+ last;
+ }
+ }
- # now do the body
- if ($body) {
+ # Now store any parameters
+ if ($body) {
- my ( $rbody_parts, $msg ) = parse_args($body);
- if ($msg) {
- $death_message = <<EOM;
+ my ( $rbody_parts, $msg ) = parse_args($body);
+ if ($msg) {
+ $death_message = <<EOM;
Error reading file '$config_file' at line number $line_no.
$msg
Please fix this line or use -npro to avoid reading this file
EOM
- last;
- }
+ last;
+ }
- if ($name) {
+ if ($name) {
- # remove leading dashes if this is an alias
- foreach (@$rbody_parts) { s/^\-+//; }
- push @{ ${$rexpansion}{$name} }, @$rbody_parts;
- }
- else {
- push( @config_list, @$rbody_parts );
- }
+ # remove leading dashes if this is an alias
+ foreach (@$rbody_parts) { s/^\-+//; }
+ push @{ ${$rexpansion}{$name} }, @$rbody_parts;
+ }
+ else {
+ push( @config_list, @$rbody_parts );
}
}
}
+
+ if ($opening_brace_line) {
+ $death_message =
+"Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
+ }
eval { $fh->close() };
- return ( \@config_list, $death_message, $saw_pbp );
+ return ( \@config_list, $death_message );
}
sub strip_comment {
print STDOUT <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2014, Steve Hancock
+Copyright 2000-2016, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
$output_file_open = 1;
if ($binmode) {
if ( ref($fh) eq 'IO::File' ) {
- binmode $fh;
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ binmode $fh, ":encoding(UTF-8)";
+ }
+ else { binmode $fh }
}
if ( $output_file eq '-' ) { binmode STDOUT }
}
Perl::Tidy::Warn "## Please see file $filename\n"
unless ref($warning_file);
$self->{_fh_warnings} = $fh_warnings;
+ $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
}
if ( $warning_count < WARNING_LIMIT ) {
}
# Pod::Html requires a real temporary filename
- my ( $fh_tmp, $tmpfile ) = tempfile();
+ my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
unless ($fh_tmp) {
Perl::Tidy::Warn
"unable to open temporary file $tmpfile; cannot use pod2html\n";
%is_assignment
%is_chain_operator
%is_if_unless_and_or_last_next_redo_return
+ %ok_to_add_semicolon_for_block_type
@has_broken_sublist
@dont_align
unless while until for foreach given when default);
@is_block_without_semicolon{@_} = (1) x scalar(@_);
+ # We will allow semicolons to be added within these block types
+ # as well as sub and package blocks.
+ # NOTES:
+ # 1. Note that these keywords are omitted:
+ # switch case given when default sort map grep
+ # 2. It is also ok to add for sub and package blocks and a labeled block
+ # 3. But not okay for other perltidy types including:
+ # { } ; G t
+ # 4. Test files: blktype.t, blktype1.t, semicolon.t
+ @_ =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+ unless do while until eval for foreach );
+ @ok_to_add_semicolon_for_block_type{@_} = (1) x scalar(@_);
+
# 'L' is token for opening { at hash key
@_ = qw" L { ( [ ";
@is_opening_type{@_} = (1) x scalar(@_);
$rOpts->{'long-block-line-count'} = 1000000;
}
+ my $enc = $rOpts->{'character-encoding'};
+ if ( $enc && $enc !~ /^(none|utf8)$/i ) {
+ Perl::Tidy::Die <<EOM;
+Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
+EOM
+ }
+
my $ole = $rOpts->{'output-line-ending'};
if ($ole) {
my %endings = (
mac => "\015",
unix => "\012",
);
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- Perl::Tidy::Die <<EOM;
+
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
+
+ if ( defined( $endings_inverted{$ole} ) ) {
+
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Perl::Tidy::Die <<EOM;
Unrecognized line ending '$ole'; expecting one of: $str
EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
- $rOpts->{'preserve-line-endings'} = undef;
+ }
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
+ $rOpts->{'preserve-line-endings'} = undef;
+ }
}
}
# *VERSION = \'1.01';
# ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
- # it unless -npvl is used
+ # it unless -npvl is used.
+
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
my $is_VERSION_statement = 0;
-
- if (
- !$saw_VERSION_in_this_file
- && $input_line =~ /VERSION/ # quick check to reject most lines
- && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- )
+ if ( !$saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
{
$saw_VERSION_in_this_file = 1;
$is_VERSION_statement = 1;
# qw lines will still go out at the end of this routine.
if ( $rOpts->{'indent-only'} ) {
flush();
- trim($input_line);
+ my $line = $input_line;
+
+ # delete side comments if requested with -io, but
+ # we will not allow deleting of closing side comments with -io
+ # because the coding would be more complex
+ if ( $rOpts->{'delete-side-comments'}
+ && $rtoken_type->[$jmax] eq '#' )
+ {
+ $line = join "", @{$rtokens}[ 0 .. $jmax - 1 ];
+ }
+ trim($line);
extract_token(0);
- $token = $input_line;
+ $token = $line;
$type = 'q';
$block_type = "";
$container_type = "";
}
# This is a good place to kill incomplete one-line blocks
- if ( ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $types_to_go[$max_index_to_go] eq ';' )
- && ( $$rtokens[0] ne '}' ) )
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $max_index_to_go >= 0 )
+ && ( $types_to_go[$max_index_to_go] eq ';' )
+ && ( $$rtokens[0] ne '}' )
+ )
+
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $max_index_to_go >= 0
+ && $types_to_go[$max_index_to_go] eq ',' )
+ )
{
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
destroy_one_line_block();
output_line_to_go();
}
# and we don't have one
&& ( $last_nonblank_type ne ';' )
- # patch until some block type issues are fixed:
- # Do not add semi-colon for block types '{',
- # '}', and ';' because we cannot be sure yet
- # that this is a block and not an anonymous
- # hash (blktype.t, blktype1.t)
- && ( $block_type !~ /^[\{\};]$/ )
-
- # patch: and do not add semi-colons for recently
- # added block types (see tmp/semicolon.t)
- && ( $block_type !~
- /^(switch|case|given|when|default)$/ )
-
- # it seems best not to add semicolons in these
- # special block types: sort|map|grep
- && ( !$is_sort_map_grep{$block_type} )
-
# and we are allowed to do so.
&& $rOpts->{'add-semicolons'}
+
+ # and we are allowed to for this block type
+ && ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ )
+
)
{
# But make a line break if the curly ends a
# significant block:
if (
- $is_block_without_semicolon{$block_type}
+ (
+ $is_block_without_semicolon{$block_type}
+
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
+ )
# if needless semicolon follows we handle it later
&& $next_nonblank_token ne ';'
$i_start = $max_index_to_go;
}
+ # the previous nonblank token should start these block types
+ elsif (( $last_last_nonblank_token_to_go eq $block_type )
+ || ( $block_type =~ /^sub/ )
+ || $block_type =~ /\(\)/ )
+ {
+ $i_start = $last_last_nonblank_index_to_go;
+
+ # Patch for signatures and extended syntax ...
+ # if the previous token was a closing paren we should walk back up to
+ # find the keyword (sub). Otherwise, we might form a one line block,
+ # which stays intact, and cause the parenthesized expression to break
+ # open. That looks bad.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
+
+ # walk back to find the first token with this level
+ # it should be the opening paren...
+ my $lev_want = $levels_to_go[$i_start];
+ for ( $i_start-- ; $i_start >= 0 ; $i_start-- ) {
+ if ( $i_start <= 0 ) { return 0 }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev <= $lev_want ) {
+
+ # if not an opening paren then probably a syntax error
+ if ( $tokens_to_go[$i_start] ne '(' ) { return 0 }
+
+ # now step back to the opening keyword (sub)
+ $i_start--;
+ if ( $i_start > 0 && $types_to_go[$i_start] eq 'b' ) {
+ $i_start--;
+ }
+ }
+ }
+ }
+ }
+
elsif ( $last_last_nonblank_token_to_go eq ')' ) {
# For something like "if (xxx) {", the keyword "if" will be
$i_start++;
}
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ $stripped_block_type =~ s/\(\)$//;
+
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
return 0;
}
}
- # the previous nonblank token should start these block types
- elsif (( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub/ ) )
- {
- $i_start = $last_last_nonblank_index_to_go;
- }
-
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
sub want_blank_line {
flush();
- $file_writer_object->want_blank_line();
+ $file_writer_object->want_blank_line() unless $in_format_skipping_section;
}
sub write_unindented_line {
{
$adjust_indentation = 1;
}
+
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $block_type_to_go[$ibeg] =~ /^sub\s*\(?/
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_SPACES($indentation) >
+ get_SPACES($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
}
# YVES patch 1 of 2:
$rindentation_list );
my $indentation = $leading_spaces_to_go[$ibeg];
if ( defined($opening_indentation)
- && $indentation > $opening_indentation )
+ && get_SPACES($indentation) >
+ get_SPACES($opening_indentation) )
{
$adjust_indentation = 1;
}
@is_mult_div{@_} = (1) x scalar(@_);
}
+ sub DUMP_BREAKPOINTS {
+
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $$ri_beg[$n];
+ my $iend = $$ri_end[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
+ }
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ }
+
sub recombine_breakpoints {
# sub set_continuation_breaks is very liberal in setting line breaks
# }
# };
#
- || ( $line_count
+ || (
+ $line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
- && !$rOpts->{'opening-brace-always-on-right'} )
+
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceeding list is long and broken
+ && !(
+ $next_nonblank_block_type =~ /^sub/
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
+
+ && !$rOpts->{'opening-brace-always-on-right'}
+ )
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
# Start storing lines when we see a line with multiple stacked opening
# tokens.
- if ( $args[0] =~ /[\{\(\[]\s*[\{\(\[]$/ ) {
+ # patch for RT #94354, requested by Colin Williams
+ if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
+ {
+
+ # This test is efficient but a little subtle: The first test says
+ # that we have multiple sequence numbers and hence multiple opening
+ # or closing tokens in this line. The second part of the test
+ # rejects stacked closing and ternary tokens. So if we get here
+ # then we should have stacked unbalanced opening tokens.
+
+ # Here is a complex example:
+
+ # Foo($Bar[0], { # (side comment)
+ # baz => 1,
+ # });
+
+ # The first line has sequence 6::4. It does not begin with
+ # a closing token or ternary, so it passes the test and must be
+ # stacked opening tokens.
+
+ # The last line has sequence 4:6 but is a stack of closing tokens,
+ # so it gets rejected.
+
+ # Note that the sequence number of an opening token for a qw quote
+ # is a negative number and will be rejected.
+ # For example, for the following line:
+ # skip_symbols([qw(
+ # $seqno_string='10:5:-1'. It would be okay to accept it but
+ # I decided not to do this after testing.
+
$valign_buffer_filling = $seqno_string;
+
}
}
}
look_for_autoloader => 1,
look_for_selfloader => 1,
starting_line_number => 1,
+ extended_syntax => 0,
);
my %args = ( %defaults, @_ );
_nearly_matched_here_target_at => undef,
_line_text => "",
_rlower_case_labels_at => undef,
+ _extended_syntax => $args{extended_syntax},
};
prepare_for_a_new_file();
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
- $max_token_index, $expecting );
+ $max_token_index, $expecting, $paren_type[$paren_depth] );
}
sub scan_id {
# keyword ( .... ) { BLOCK }
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
my %is_blocktype_with_paren;
- @_ = qw(if elsif unless while until for foreach switch case given when);
+ @_ =
+ qw(if elsif unless while until for foreach switch case given when catch);
@is_blocktype_with_paren{@_} = (1) x scalar(@_);
# ------------------------------------------------------------
$container_type = $want_paren;
$want_paren = "";
}
+ elsif ( $statement_type =~ /^sub/ ) {
+ $container_type = $statement_type;
+ }
else {
$container_type = $last_nonblank_token;
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[msixpodualgc]';
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else { # not a pattern; check for a /= token
# check for syntax error here;
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
- my $list = join( ' ', sort keys %is_blocktype_with_paren );
- warning(
- "syntax error at ') {', didn't see one of: $list\n");
+ if ( $tokenizer_self->{'_extended_syntax'} ) {
+
+ # we append a trailing () to mark this as an unknown
+ # block type. This allows perltidy to format some
+ # common extensions of perl syntax.
+ # This is used by sub code_block_type
+ $last_nonblank_token .= '()';
+ }
+ else {
+ my $list =
+ join( ' ', sort keys %is_blocktype_with_paren );
+ warning(
+"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
+ );
+ }
}
}
$block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
$max_token_index );
- # remember a preceding smartmatch operator
- ## SMARTMATCH
- ##if ( $last_nonblank_type eq '~~' ) {
- ## $block_type = $last_nonblank_type;
- ##}
-
# patch to promote bareword type to function taking block
if ( $block_type
&& $last_nonblank_type eq 'w'
}
}
}
+
$brace_type[ ++$brace_depth ] = $block_type;
$brace_package[$brace_depth] = $current_package;
$brace_structural_type[$brace_depth] = $type;
# propagate type information for 'do' and 'eval' blocks, and also
# for smartmatch operator. This is necessary to enable us to know
# if an operator or term is expected next.
- ## SMARTMATCH
- ##if ( $is_block_operator{$block_type} || $block_type eq '~~' ) {
if ( $is_block_operator{$block_type} ) {
$tok = $block_type;
}
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[msixpodualgc]';
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else {
( $type_sequence, $indent_flag ) =
'__DATA__' => '_in_data',
);
- # ref: camel 3 p 147,
+ # original ref: camel 3 p 147,
# but perl may accept undocumented flags
# perl 5.10 adds 'p' (preserve)
- # Perl version 5.16, http://perldoc.perl.org/perlop.html, has these:
- # /PATTERN/msixpodualgc or m?PATTERN?msixpodualgc
- # s/PATTERN/REPLACEMENT/msixpodualgcer
+ # Perl version 5.22 added 'n'
+ # From http://perldoc.perl.org/perlop.html we have
+ # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
+ # s/PATTERN/REPLACEMENT/msixpodualngcer
# y/SEARCHLIST/REPLACEMENTLIST/cdsr
# tr/SEARCHLIST/REPLACEMENTLIST/cdsr
- # qr/STRING/msixpodual
+ # qr/STRING/msixpodualn
my %quote_modifiers = (
- 's' => '[msixpodualgcer]',
+ 's' => '[msixpodualngcer]',
'y' => '[cdsr]',
'tr' => '[cdsr]',
- 'm' => '[msixpodualgc]',
- 'qr' => '[msixpodual]',
+ 'm' => '[msixpodualngc]',
+ 'qr' => '[msixpodualn]',
'q' => "",
'qq' => "",
'qw' => "",
# '//' must be defined_or operator if an operator is expected.
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
# could be migrated here for clarity
- if ( $test_tok eq '//' ) {
+
+ # Patch for RT#102371, misparsing a // in the following snippet:
+ # state $b //= ccc();
+ # The solution is to always accept the digraph (or trigraph) after
+ # token type 'Z' (possible file handle). The reason is that
+ # sub operator_expected gives TERM expected here, which is
+ # wrong in this case.
+ if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
my $next_type = $$rtokens[ $i + 1 ];
my $expecting =
operator_expected( $prev_type, $tok, $next_type );
- $combine_ok = 0 unless ( $expecting == OPERATOR );
+
+ # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
+ $combine_ok = 0 if ( $expecting == TERM );
}
}
elsif ( $tok eq 'else' ) {
# patched for SWITCH/CASE
- if ( $last_nonblank_token ne ';'
+ if (
+ $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
- /^(if|elsif|unless|case|when)$/ )
+ /^(if|elsif|unless|case|when)$/
+
+ # patch to avoid an unwanted error message for
+ # the case of a parenless 'case' (RT 105484):
+ # switch ( 1 ) { case x { 2 } else { } }
+ && $statement_type !~
+ /^(if|elsif|unless|case|when)$/
+ )
{
warning(
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
$in_statement_continuation = 0;
}
- # otherwise, the next token after a ',' starts a new term
- elsif ( $type eq ',' ) {
+ # otherwise, the token after a ',' starts a new term
+
+ # Patch FOR RT#99961; no continuation after a ';'
+ # This is needed because perltidy currently marks
+ # a block preceded by a type character like % or @
+ # as a non block, to simplify formatting. But these
+ # are actually blocks and can have semicolons.
+ # See code_block_type() and is_non_structural_brace().
+ elsif ( $type eq ',' || $type eq ';' ) {
$in_statement_continuation = 0;
}
}
}
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub is_non_structural_brace.
+ ################################################################
## elsif ( $last_nonblank_type eq 't' ) {
## return $last_nonblank_token;
$max_token_index );
}
+ # Patch for bug # RT #94338 reported by Daniel Trizen
+ # for-loop in a parenthesized block-map triggering an error message:
+ # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
+ # Check for a code block within a parenthesized function call
+ elsif ( $last_nonblank_token eq '(' ) {
+ my $paren_type = $paren_type[$paren_depth];
+ if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
+
+ # We will mark this as a code block but use type 't' instead
+ # of the name of the contining function. This will allow for
+ # correct parsing but will usually produce better formatting.
+ # Braces with block type 't' are not broken open automatically
+ # in the formatter as are other code block types, and this usually
+ # works best.
+ return 't'; # (Not $paren_type)
+ }
+ else {
+ return "";
+ }
+ }
+
+ # handle unknown syntax ') {'
+ # we previously appended a '()' to mark this case
+ elsif ( $last_nonblank_token =~ /\(\)$/ ) {
+ return $last_nonblank_token;
+ }
+
# anything else must be anonymous hash reference
else {
return "";
# USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
+
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# We are only going to look ahead one more (nonblank/comment) line.
# Strange formatting could cause a bad guess, but that's unlikely.
- my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
- my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ my @pre_types;
+ my @pre_tokens;
+
+ # Ignore the rest of this line if it is a side comment
+ if ( $next_nonblank_token ne '#' ) {
+ @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
+ @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ }
my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
# generous, and prevents
# put a sentinel token to simplify stopping the search
push @pre_types, '}';
+ push @pre_types, '}';
my $jbeg = 0;
$jbeg = 1 if $pre_types[0] eq 'b';
$j++;
}
elsif ( $pre_types[$j] eq 'w' ) {
- unless ( $is_keyword{ $pre_tokens[$j] } ) {
- $j++;
- }
+ $j++;
}
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
$j++;
$j++ if $pre_types[$j] eq 'b';
- # it's a hash ref if a comma or => follow next
- if ( $pre_types[$j] eq ','
- || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
+ # Patched for RT #95708
+ if (
+
+ # it is a comma which is not a pattern delimeter except for qw
+ (
+ $pre_types[$j] eq ','
+ && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+ )
+
+ # or a =>
+ || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
+ )
{
$code_block_type = "";
}
# return 0;
# }
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub code_block_type
+ ################################################################
##if ($last_nonblank_type eq 't') {return 0}
# $last_nonblank_type
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
- $expecting )
+ $expecting, $container_type )
= @_;
my $i_begin = $i;
my $type = '';
my $tok = $tok_begin;
my $message = "";
+ my $in_prototype_or_signature = $container_type =~ /^sub/;
+
# these flags will be used to help figure out the type:
my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
my $saw_type;
last;
}
}
+
+ # POSTDEFREF ->@ ->% ->& ->*
+ elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+ $identifier .= $tok;
+ }
elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
$saw_alpha = 1;
$id_scan_state = ':'; # now need ::
$id_scan_state = 'A';
$identifier .= $tok;
}
- elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
+
+ # $# and POSTDEFREF ->$#
+ elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
$identifier .= $tok; # keep same state, a $ could follow
}
elsif ( $tok eq '{' ) {
}
else { # something else
+ if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
+ $id_scan_state = '';
+ $i = $i_save;
+ $type = 'i'; # probably punctuation variable
+ last;
+ }
+
# check for various punctuation variables
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
}
+ # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
+ elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
+ $identifier .= $tok;
+ }
+
elsif ( $identifier eq '$#' ) {
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
my $pos_beg = $$rtoken_map[$i_beg];
pos($input_line) = $pos_beg;
- # sub NAME PROTO ATTRS
+ # Look for the sub NAME
if (
$input_line =~ m/\G\s*
((?:\w*(?:'|::))*) # package - something that ends in :: or '
(\w+) # NAME - required
- (\s*\([^){]*\))? # PROTO - something in parens
- (\s*:)? # ATTRS - leading : of attribute list
/gcx
)
{
$match = 1;
$subname = $2;
- $proto = $3;
- $attrs = $4;
$package = ( defined($1) && $1 ) ? $1 : $current_package;
$package =~ s/\'/::/g;
$type = 'i';
}
- # Look for prototype/attributes not preceded on this line by subname;
- # This might be an anonymous sub with attributes,
+ # Now look for PROTO ATTRS
+ # Look for prototype/attributes which are usually on the same
+ # line as the sub name but which might be on a separate line.
+ # For example, we might have an anonymous sub with attributes,
# or a prototype on a separate line from its sub name
- elsif (
- $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
+
+ # NOTE: We only want to parse PROTOTYPES here. If we see anything that
+ # does not look like a prototype, we assume it is a SIGNATURE and we
+ # will stop and let the the standard tokenizer handle it. In
+ # particular, we stop if we see any nested parens, braces, or commas.
+ my $saw_opening_paren = $input_line =~ /\G\s*\(/;
+ if (
+ $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
(\s*:)? # ATTRS leading ':'
/gcx
&& ( $1 || $2 )
)
{
- $match = 1;
$proto = $1;
$attrs = $2;
+ # If we also found the sub name on this call then append PROTO.
+ # This is not necessary but for compatability with previous
+ # versions when the -csc flag is used:
+ if ( $match && $proto ) {
+ $tok .= $proto;
+ }
+ $match ||= 1;
+
# Handle prototype on separate line from subname
if ($subname_saved) {
$package = $package_saved;
$in_attribute_list = 1;
}
- # We must convert back from character position
- # to pre_token index.
+ # Otherwise, if we found a match we must convert back from
+ # string position to the pre_token index for continued parsing.
else {
# I don't think an error flag can occur here ..but ?
}
$package_saved = "";
$subname_saved = "";
+
+ # See what's next...
if ( $next_nonblank_token eq '{' ) {
if ($subname) {
$statement_type = $tok;
}
- # see if PROTO follows on another line:
+ # if we stopped before an open paren ...
elsif ( $next_nonblank_token eq '(' ) {
- if ( $attrs || $proto ) {
- warning(
-"unexpected '(' after definition or declaration of sub '$subname'\n"
- );
- }
- else {
- $id_scan_state = 'sub'; # we must come back to get proto
- $statement_type = $tok;
- $package_saved = $package;
- $subname_saved = $subname;
+
+ # If we DID NOT see this paren above then it must be on the
+ # next line so we will set a flag to come back here and see if
+ # it is a PROTOTYPE
+
+ # Otherwise, we assume it is a SIGNATURE rather than a
+ # PROTOTYPE and let the normal tokenizer handle it as a list
+ if ( !$saw_opening_paren ) {
+ $id_scan_state = 'sub'; # we must come back to get proto
+ $package_saved = $package;
+ $subname_saved = $subname;
}
+ $statement_type = $tok;
}
elsif ($next_nonblank_token) { # EOF technically ok
warning(
my @digraphs = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x= ~~
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
);
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
- my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
+ my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.=);
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
# make a hash of all valid token types for self-checking the tokenizer
@is_indirect_object_taker{@_} = (1) x scalar(@_);
# These tokens may precede a code block
- # patched for SWITCH/CASE
+ # patched for SWITCH/CASE/CATCH. Actually these could be removed
+ # now and we could let the extended-syntax coding handle them
@_ =
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
- switch case given when);
+ switch case given when catch);
@is_code_block_token{@_} = (1) x scalar(@_);
# I'll build the list of keywords incrementally
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
f F pp mm Y p m U J G j >> << ^ t
+ ~. ^. |. &. ^.= |.= &.=
#;
push( @value_requestor_type, ',' )
; # (perl doesn't like a ',' in a qw block)
}
1;
__END__
-