-############################################################
+#
+###########################################################-
#
# perltidy - a perl script indenter and formatter
#
-# Copyright (c) 2000-2007 by Steve Hancock
+# Copyright (c) 2000-2018 by Steve Hancock
# Distributed under the GPL license agreement; see file COPYING
#
# This program is free software; you can redistribute it and/or modify
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
-# For brief instructions instructions, try 'perltidy -h'.
+# For brief instructions, try 'perltidy -h'.
# For more complete documentation, try 'man perltidy'
# or visit http://perltidy.sourceforge.net
#
#
# perltidy Tidy.pm
#
-# Code Contributions:
+# Code Contributions: See ChangeLog.html for a complete history.
# Michael Cartmell supplied code for adaptation to VMS and helped with
# v-strings.
# Hugh S. Myers supplied sub streamhandle and the supporting code to
# Yves Orton supplied coding to help detect Windows versions.
# Axel Rose supplied a patch for MacPerl.
# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
-# Dan Tyrell sent a patch for binary I/O.
+# Dan Tyrell contributed a patch for binary I/O.
+# Ueli Hugenschmidt contributed a patch for -fpsc
+# Sam Kington supplied a patch to identify the initial indentation of
+# entabbed code.
+# jonathan swartz supplied patches for:
+# * .../ pattern, which looks upwards from directory
+# * --notidy, to be used in directories where we want to avoid
+# accidentally tidying
+# * prefilter and postfilter
+# * iterations option
+#
# Many others have supplied key ideas, suggestions, and bug reports;
# see the CHANGES file.
#
############################################################
package Perl::Tidy;
-use 5.004; # need IO::File from 5.004 or later
-BEGIN { $^W = 1; } # turn on warnings
+# perlver reports minimum version needed is 5.8.0
+# 5.004 needed for IO::File
+# 5.008 needed for wide characters
+use 5.008;
+use warnings;
use strict;
use Exporter;
use Carp;
@ISA
@EXPORT
$missing_file_spec
+ $fh_stderr
+ $rOpts_character_encoding
};
@ISA = qw( Exporter );
@EXPORT = qw( &perltidy );
+use Cwd;
+use Encode ();
use IO::File;
use File::Basename;
+use File::Copy;
+use File::Temp qw(tempfile);
BEGIN {
- ( $VERSION = q($Id: Tidy.pm,v 1.61 2007/04/24 13:31:15 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
+ ( $VERSION = q($Id: Tidy.pm,v 1.74 2018/02/20 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
}
sub streamhandle {
# object object
# (check for 'print' method for 'w' mode)
# (check for 'getline' method for 'r' mode)
- my $ref = ref( my $filename = shift );
- my $mode = shift;
+ my ( $filename, $mode ) = @_;
+
+ my $ref = ref($filename);
my $New;
my $fh;
# skipped and we can just let it crash if there is no
# getline.
if ( $mode =~ /[rR]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+
+ # RT#97159; part 1 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
+ if ( $ref->can('getline') ) {
$New = sub { $filename };
}
else {
# Accept an object with a print method for writing.
# See note above about IO::File
if ( $mode =~ /[wW]/ ) {
- if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+
+ # RT#97159; part 2 of 2: updated to use 'can'
+ ##if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
+ if ( $ref->can('print') ) {
$New = sub { $filename };
}
else {
}
}
$fh = $New->( $filename, $mode )
- or warn "Couldn't open file:$filename in mode:$mode : $!\n";
+ or Warn("Couldn't open file:$filename in mode:$mode : $!\n");
+
return $fh, ( $ref or $filename );
}
if ( ref($input_file) || $input_file eq '-' ) {
return $ending;
}
- open( INFILE, $input_file ) || return $ending;
- binmode INFILE;
+ my $fh;
+ open( $fh, '<', $input_file ) || return $ending;
+
+ binmode $fh;
my $buf;
- read( INFILE, $buf, 1024 );
- close INFILE;
+ read( $fh, $buf, 1024 );
+ close $fh;
if ( $buf && $buf =~ /([\012\015]+)/ ) {
my $test = $1;
# concatenate a path and file basename
# returns undef in case of error
- BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
+ my @parts = @_;
+
+ #BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
+ BEGIN {
+ eval { require File::Spec };
+ $missing_file_spec = $@;
+ }
# use File::Spec if we can
unless ($missing_file_spec) {
- return File::Spec->catfile(@_);
+ return File::Spec->catfile(@parts);
}
# Perl 5.004 systems may not have File::Spec so we'll make
# a simple try. We assume File::Basename is available.
# return undef if not successful.
- my $name = pop @_;
- my $path = join '/', @_;
+ my $name = pop @parts;
+ my $path = join '/', @parts;
my $test_file = $path . $name;
my ( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
- return undef if ( $^O eq 'VMS' );
+ return if ( $^O eq 'VMS' );
# this should work at least for Windows and Unix:
$test_file = $path . '/' . $name;
( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
- return undef;
-}
-
-sub make_temporary_filename {
-
- # Make a temporary filename.
- #
- # The POSIX tmpnam() function tends to be unreliable for non-unix
- # systems (at least for the win32 systems that I've tested), so use
- # a pre-defined name. A slight disadvantage of this is that two
- # perltidy runs in the same working directory may conflict.
- # However, the chance of that is small and managable by the user.
- # An alternative would be to check for the file's existance and use,
- # say .TMP0, .TMP1, etc, but that scheme has its own problems. So,
- # keep it simple.
- my $name = "perltidy.TMP";
- if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
- return $name;
- }
- eval "use POSIX qw(tmpnam)";
- if ($@) { return $name }
- use IO::File;
-
- # just make a couple of tries before giving up and using the default
- for ( 0 .. 1 ) {
- my $tmpname = tmpnam();
- my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
- if ($fh) {
- $fh->close();
- return ($tmpname);
- last;
- }
- }
- return ($name);
+ return;
}
# Here is a map of the flow of data from the input source to the output
# messages. It writes a .LOG file, which may be saved with a
# '-log' or a '-g' flag.
-{
+sub perltidy {
- # variables needed by interrupt handler:
- my $tokenizer;
- my $input_file;
-
- # this routine may be called to give a status report if interrupted. If a
- # parameter is given, it will call exit with that parameter. This is no
- # longer used because it works under Unix but not under Windows.
- sub interrupt_handler {
-
- my $exit_flag = shift;
- print STDERR "perltidy interrupted";
- if ($tokenizer) {
- my $input_line_number =
- Perl::Tidy::Tokenizer::get_input_line_number();
- print STDERR " at line $input_line_number";
- }
- if ($input_file) {
-
- if ( ref $input_file ) { print STDERR " of reference to:" }
- else { print STDERR " of file:" }
- print STDERR " $input_file";
- }
- print STDERR "\n";
- exit $exit_flag if defined($exit_flag);
- }
-
- sub perltidy {
-
- my %defaults = (
- argv => undef,
- destination => undef,
- formatter => undef,
- logfile => undef,
- errorfile => undef,
- perltidyrc => undef,
- source => undef,
- stderr => undef,
- dump_options => undef,
- dump_options_type => undef,
- dump_getopt_flags => undef,
- dump_options_category => undef,
- dump_options_range => undef,
- dump_abbreviations => undef,
- );
+ my %input_hash = @_;
- # don't overwrite callers ARGV
- local @ARGV = @ARGV;
+ my %defaults = (
+ argv => undef,
+ destination => undef,
+ formatter => undef,
+ logfile => undef,
+ errorfile => undef,
+ perltidyrc => undef,
+ source => undef,
+ stderr => undef,
+ dump_options => undef,
+ dump_options_type => undef,
+ dump_getopt_flags => undef,
+ dump_options_category => undef,
+ dump_options_range => undef,
+ dump_abbreviations => undef,
+ prefilter => undef,
+ postfilter => undef,
+ );
- my %input_hash = @_;
+ # don't overwrite callers ARGV
+ local @ARGV = @ARGV;
+ local *STDERR = *STDERR;
- if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
- local $" = ')(';
- my @good_keys = sort keys %defaults;
- @bad_keys = sort @bad_keys;
- confess <<EOM;
+ if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
+ local $" = ')(';
+ my @good_keys = sort keys %defaults;
+ @bad_keys = sort @bad_keys;
+ confess <<EOM;
------------------------------------------------------------------------
Unknown perltidy parameter : (@bad_keys)
perltidy only understands : (@good_keys)
------------------------------------------------------------------------
EOM
- }
+ }
- my $get_hash_ref = sub {
- my ($key) = @_;
- my $hash_ref = $input_hash{$key};
- if ( defined($hash_ref) ) {
- unless ( ref($hash_ref) eq 'HASH' ) {
- my $what = ref($hash_ref);
- my $but_is =
- $what ? "but is ref to $what" : "but is not a reference";
- croak <<EOM;
+ my $get_hash_ref = sub {
+ my ($key) = @_;
+ my $hash_ref = $input_hash{$key};
+ if ( defined($hash_ref) ) {
+ unless ( ref($hash_ref) eq 'HASH' ) {
+ my $what = ref($hash_ref);
+ my $but_is =
+ $what ? "but is ref to $what" : "but is not a reference";
+ croak <<EOM;
------------------------------------------------------------------------
error in call to perltidy:
-$key must be reference to HASH $but_is
------------------------------------------------------------------------
EOM
- }
}
- return $hash_ref;
- };
+ }
+ return $hash_ref;
+ };
- %input_hash = ( %defaults, %input_hash );
- my $argv = $input_hash{'argv'};
- my $destination_stream = $input_hash{'destination'};
- my $errorfile_stream = $input_hash{'errorfile'};
- my $logfile_stream = $input_hash{'logfile'};
- my $perltidyrc_stream = $input_hash{'perltidyrc'};
- my $source_stream = $input_hash{'source'};
- my $stderr_stream = $input_hash{'stderr'};
- my $user_formatter = $input_hash{'formatter'};
-
- # various dump parameters
- my $dump_options_type = $input_hash{'dump_options_type'};
- my $dump_options = $get_hash_ref->('dump_options');
- my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
- my $dump_options_category = $get_hash_ref->('dump_options_category');
- my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
- my $dump_options_range = $get_hash_ref->('dump_options_range');
-
- # validate dump_options_type
- if ( defined($dump_options) ) {
- unless ( defined($dump_options_type) ) {
- $dump_options_type = 'perltidyrc';
- }
- unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
- croak <<EOM;
+ %input_hash = ( %defaults, %input_hash );
+ my $argv = $input_hash{'argv'};
+ my $destination_stream = $input_hash{'destination'};
+ my $errorfile_stream = $input_hash{'errorfile'};
+ my $logfile_stream = $input_hash{'logfile'};
+ my $perltidyrc_stream = $input_hash{'perltidyrc'};
+ my $source_stream = $input_hash{'source'};
+ my $stderr_stream = $input_hash{'stderr'};
+ my $user_formatter = $input_hash{'formatter'};
+ my $prefilter = $input_hash{'prefilter'};
+ my $postfilter = $input_hash{'postfilter'};
+
+ if ($stderr_stream) {
+ ( $fh_stderr, my $stderr_file ) =
+ Perl::Tidy::streamhandle( $stderr_stream, 'w' );
+ if ( !$fh_stderr ) {
+ croak <<EOM;
+------------------------------------------------------------------------
+Unable to redirect STDERR to $stderr_stream
+Please check value of -stderr in call to perltidy
+------------------------------------------------------------------------
+EOM
+ }
+ }
+ else {
+ $fh_stderr = *STDERR;
+ }
+
+ sub Warn { my $msg = shift; $fh_stderr->print($msg); return }
+
+ sub Exit {
+ my $flag = shift;
+ if ($flag) { goto ERROR_EXIT }
+ else { goto NORMAL_EXIT }
+ }
+
+ sub Die { my $msg = shift; Warn($msg); Exit(1); }
+
+ # extract various dump parameters
+ my $dump_options_type = $input_hash{'dump_options_type'};
+ my $dump_options = $get_hash_ref->('dump_options');
+ my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
+ my $dump_options_category = $get_hash_ref->('dump_options_category');
+ my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
+ my $dump_options_range = $get_hash_ref->('dump_options_range');
+
+ # validate dump_options_type
+ if ( defined($dump_options) ) {
+ unless ( defined($dump_options_type) ) {
+ $dump_options_type = 'perltidyrc';
+ }
+ unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
+ croak <<EOM;
------------------------------------------------------------------------
Please check value of -dump_options_type in call to perltidy;
saw: '$dump_options_type'
------------------------------------------------------------------------
EOM
- }
- }
- else {
- $dump_options_type = "";
}
+ }
+ else {
+ $dump_options_type = "";
+ }
- if ($user_formatter) {
+ if ($user_formatter) {
- # if the user defines a formatter, there is no output stream,
- # but we need a null stream to keep coding simple
- $destination_stream = Perl::Tidy::DevNull->new();
- }
+ # if the user defines a formatter, there is no output stream,
+ # but we need a null stream to keep coding simple
+ $destination_stream = Perl::Tidy::DevNull->new();
+ }
- # see if ARGV is overridden
- if ( defined($argv) ) {
+ # see if ARGV is overridden
+ if ( defined($argv) ) {
- my $rargv = ref $argv;
- if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
+ my $rargv = ref $argv;
+ if ( $rargv eq 'SCALAR' ) { $argv = ${$argv}; $rargv = undef }
- # ref to ARRAY
- if ($rargv) {
- if ( $rargv eq 'ARRAY' ) {
- @ARGV = @$argv;
- }
- else {
- croak <<EOM;
+ # ref to ARRAY
+ if ($rargv) {
+ if ( $rargv eq 'ARRAY' ) {
+ @ARGV = @{$argv};
+ }
+ else {
+ croak <<EOM;
------------------------------------------------------------------------
Please check value of -argv in call to perltidy;
it must be a string or ref to ARRAY but is: $rargv
------------------------------------------------------------------------
EOM
- }
}
+ }
- # string
- else {
- my ( $rargv, $msg ) = parse_args($argv);
- if ($msg) {
- die <<EOM;
+ # string
+ else {
+ my ( $rargv, $msg ) = parse_args($argv);
+ if ($msg) {
+ Die <<EOM;
Error parsing this string passed to to perltidy with 'argv':
$msg
EOM
- }
- @ARGV = @{$rargv};
}
+ @ARGV = @{$rargv};
}
+ }
- # redirect STDERR if requested
- if ($stderr_stream) {
- my ( $fh_stderr, $stderr_file ) =
- Perl::Tidy::streamhandle( $stderr_stream, 'w' );
- if ($fh_stderr) { *STDERR = $fh_stderr }
- else {
- croak <<EOM;
-------------------------------------------------------------------------
-Unable to redirect STDERR to $stderr_stream
-Please check value of -stderr in call to perltidy
-------------------------------------------------------------------------
-EOM
- }
- }
+ my $rpending_complaint;
+ ${$rpending_complaint} = "";
+ my $rpending_logfile_message;
+ ${$rpending_logfile_message} = "";
- my $rpending_complaint;
- $$rpending_complaint = "";
- my $rpending_logfile_message;
- $$rpending_logfile_message = "";
+ my ( $is_Windows, $Windows_type ) = look_for_Windows($rpending_complaint);
- my ( $is_Windows, $Windows_type ) =
- look_for_Windows($rpending_complaint);
+ # VMS file names are restricted to a 40.40 format, so we append _tdy
+ # instead of .tdy, etc. (but see also sub check_vms_filename)
+ my $dot;
+ my $dot_pattern;
+ if ( $^O eq 'VMS' ) {
+ $dot = '_';
+ $dot_pattern = '_';
+ }
+ else {
+ $dot = '.';
+ $dot_pattern = '\.'; # must escape for use in regex
+ }
- # VMS file names are restricted to a 40.40 format, so we append _tdy
- # instead of .tdy, etc. (but see also sub check_vms_filename)
- my $dot;
- my $dot_pattern;
- if ( $^O eq 'VMS' ) {
- $dot = '_';
- $dot_pattern = '_';
- }
- else {
- $dot = '.';
- $dot_pattern = '\.'; # must escape for use in regex
- }
+ #---------------------------------------------------------------
+ # get command line options
+ #---------------------------------------------------------------
+ my ( $rOpts, $config_file, $rraw_options, $roption_string,
+ $rexpansion, $roption_category, $roption_range )
+ = process_command_line(
+ $perltidyrc_stream, $is_Windows, $Windows_type,
+ $rpending_complaint, $dump_options_type,
+ );
- # handle command line options
- my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
- $rexpansion, $roption_category, $roption_range )
- = process_command_line(
- $perltidyrc_stream, $is_Windows, $Windows_type,
- $rpending_complaint, $dump_options_type,
- );
+ my $saw_extrude = ( grep m/^-extrude$/, @{$rraw_options} ) ? 1 : 0;
+ my $saw_pbp =
+ ( grep m/^-(pbp|perl-best-practices)$/, @{$rraw_options} ) ? 1 : 0;
- # return or exit immediately after all dumps
- my $quit_now = 0;
+ #---------------------------------------------------------------
+ # Handle requests to dump information
+ #---------------------------------------------------------------
- # Getopt parameters and their flags
- if ( defined($dump_getopt_flags) ) {
- $quit_now = 1;
- foreach my $op ( @{$roption_string} ) {
- my $opt = $op;
- my $flag = "";
+ # return or exit immediately after all dumps
+ my $quit_now = 0;
- # Examples:
- # some-option=s
- # some-option=i
- # some-option:i
- # some-option!
- if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
- $opt = $1;
- $flag = $2;
- }
- $dump_getopt_flags->{$opt} = $flag;
+ # Getopt parameters and their flags
+ if ( defined($dump_getopt_flags) ) {
+ $quit_now = 1;
+ foreach my $op ( @{$roption_string} ) {
+ my $opt = $op;
+ my $flag = "";
+
+ # Examples:
+ # some-option=s
+ # some-option=i
+ # some-option:i
+ # some-option!
+ if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
+ $opt = $1;
+ $flag = $2;
}
+ $dump_getopt_flags->{$opt} = $flag;
}
+ }
- if ( defined($dump_options_category) ) {
- $quit_now = 1;
- %{$dump_options_category} = %{$roption_category};
- }
+ if ( defined($dump_options_category) ) {
+ $quit_now = 1;
+ %{$dump_options_category} = %{$roption_category};
+ }
- if ( defined($dump_options_range) ) {
- $quit_now = 1;
- %{$dump_options_range} = %{$roption_range};
- }
+ if ( defined($dump_options_range) ) {
+ $quit_now = 1;
+ %{$dump_options_range} = %{$roption_range};
+ }
- if ( defined($dump_abbreviations) ) {
- $quit_now = 1;
- %{$dump_abbreviations} = %{$rexpansion};
- }
+ if ( defined($dump_abbreviations) ) {
+ $quit_now = 1;
+ %{$dump_abbreviations} = %{$rexpansion};
+ }
- if ( defined($dump_options) ) {
- $quit_now = 1;
- %{$dump_options} = %{$rOpts};
- }
+ if ( defined($dump_options) ) {
+ $quit_now = 1;
+ %{$dump_options} = %{$rOpts};
+ }
- return if ($quit_now);
+ Exit 0 if ($quit_now);
- # dump from command line
- if ( $rOpts->{'dump-options'} ) {
- dump_options( $rOpts, $roption_string );
- exit 1;
- }
+ # make printable string of options for this run as possible diagnostic
+ my $readable_options = readable_options( $rOpts, $roption_string );
- check_options( $rOpts, $is_Windows, $Windows_type,
- $rpending_complaint );
+ # dump from command line
+ if ( $rOpts->{'dump-options'} ) {
+ print STDOUT $readable_options;
+ Exit 0;
+ }
- if ($user_formatter) {
- $rOpts->{'format'} = 'user';
- }
+ #---------------------------------------------------------------
+ # check parameters and their interactions
+ #---------------------------------------------------------------
+ my $tabsize =
+ check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint );
- # there must be one entry here for every possible format
- my %default_file_extension = (
- tidy => 'tdy',
- html => 'html',
- user => '',
- );
+ if ($user_formatter) {
+ $rOpts->{'format'} = 'user';
+ }
- # be sure we have a valid output format
- unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
- my $formats = join ' ',
- sort map { "'" . $_ . "'" } keys %default_file_extension;
- my $fmt = $rOpts->{'format'};
- die "-format='$fmt' but must be one of: $formats\n";
- }
+ # there must be one entry here for every possible format
+ my %default_file_extension = (
+ tidy => 'tdy',
+ html => 'html',
+ user => '',
+ );
- my $output_extension =
- make_extension( $rOpts->{'output-file-extension'},
- $default_file_extension{ $rOpts->{'format'} }, $dot );
+ $rOpts_character_encoding = $rOpts->{'character-encoding'};
- my $backup_extension =
- make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
+ # be sure we have a valid output format
+ unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
+ my $formats = join ' ',
+ sort map { "'" . $_ . "'" } keys %default_file_extension;
+ my $fmt = $rOpts->{'format'};
+ Die "-format='$fmt' but must be one of: $formats\n";
+ }
- my $html_toc_extension =
- make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
+ my $output_extension = make_extension( $rOpts->{'output-file-extension'},
+ $default_file_extension{ $rOpts->{'format'} }, $dot );
- my $html_src_extension =
- make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
+ # If the backup extension contains a / character then the backup should
+ # be deleted when the -b option is used. On older versions of
+ # perltidy this will generate an error message due to an illegal
+ # file name.
+ #
+ # A backup file will still be generated but will be deleted
+ # at the end. If -bext='/' then this extension will be
+ # the default 'bak'. Otherwise it will be whatever characters
+ # remains after all '/' characters are removed. For example:
+ # -bext extension slashes
+ # '/' bak 1
+ # '/delete' delete 1
+ # 'delete/' delete 1
+ # '/dev/null' devnull 2 (Currently not allowed)
+ my $bext = $rOpts->{'backup-file-extension'};
+ my $delete_backup = ( $rOpts->{'backup-file-extension'} =~ s/\///g );
+
+ # At present only one forward slash is allowed. In the future multiple
+ # slashes may be allowed to allow for other options
+ if ( $delete_backup > 1 ) {
+ Die "-bext=$bext contains more than one '/'\n";
+ }
+
+ my $backup_extension =
+ make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
+
+ my $html_toc_extension =
+ make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
+
+ my $html_src_extension =
+ make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
+
+ # check for -b option;
+ # silently ignore unless beautify mode
+ my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
+ && $rOpts->{'format'} eq 'tidy';
+
+ # Turn off -b with warnings in case of conflicts with other options.
+ # NOTE: Do this silently, without warnings, if there is a source or
+ # destination stream, or standard output is used. This is because the -b
+ # flag may have been in a .perltidyrc file and warnings break
+ # Test::NoWarnings. See email discussion with Merijn Brand 26 Feb 2014.
+ if ($in_place_modify) {
+ if ( $rOpts->{'standard-output'}
+ || $destination_stream
+ || ref $source_stream
+ || $rOpts->{'outfile'}
+ || defined( $rOpts->{'output-path'} ) )
+ {
+ $in_place_modify = 0;
+ }
+ }
- # check for -b option;
- my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
- && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
- && @ARGV > 0; # silently ignore if standard input;
- # this allows -b to be in a .perltidyrc file
- # without error messages when running from an editor
+ Perl::Tidy::Formatter::check_options($rOpts);
+ if ( $rOpts->{'format'} eq 'html' ) {
+ Perl::Tidy::HtmlWriter->check_options($rOpts);
+ }
- # turn off -b with warnings in case of conflicts with other options
- if ($in_place_modify) {
- if ( $rOpts->{'standard-output'} ) {
- warn "Ignoring -b; you may not use -b and -st together\n";
- $in_place_modify = 0;
- }
- if ($destination_stream) {
- warn
-"Ignoring -b; you may not specify a destination array and -b together\n";
- $in_place_modify = 0;
- }
- if ($source_stream) {
- warn
-"Ignoring -b; you may not specify a source array and -b together\n";
- $in_place_modify = 0;
- }
- if ( $rOpts->{'outfile'} ) {
- warn "Ignoring -b; you may not use -b and -o together\n";
- $in_place_modify = 0;
- }
- if ( defined( $rOpts->{'output-path'} ) ) {
- warn "Ignoring -b; you may not use -b and -opath together\n";
- $in_place_modify = 0;
- }
- }
+ # make the pattern of file extensions that we shouldn't touch
+ my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
+ if ($output_extension) {
+ my $ext = quotemeta($output_extension);
+ $forbidden_file_extensions .= "|$ext";
+ }
+ if ( $in_place_modify && $backup_extension ) {
+ my $ext = quotemeta($backup_extension);
+ $forbidden_file_extensions .= "|$ext";
+ }
+ $forbidden_file_extensions .= ')$';
- Perl::Tidy::Formatter::check_options($rOpts);
- if ( $rOpts->{'format'} eq 'html' ) {
- Perl::Tidy::HtmlWriter->check_options($rOpts);
- }
+ # Create a diagnostics object if requested;
+ # This is only useful for code development
+ my $diagnostics_object = undef;
+ if ( $rOpts->{'DIAGNOSTICS'} ) {
+ $diagnostics_object = Perl::Tidy::Diagnostics->new();
+ }
- # make the pattern of file extensions that we shouldn't touch
- my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
- if ($output_extension) {
- my $ext = quotemeta($output_extension);
- $forbidden_file_extensions .= "|$ext";
- }
- if ( $in_place_modify && $backup_extension ) {
- my $ext = quotemeta($backup_extension);
- $forbidden_file_extensions .= "|$ext";
+ # no filenames should be given if input is from an array
+ if ($source_stream) {
+ if ( @ARGV > 0 ) {
+ Die
+"You may not specify any filenames when a source array is given\n";
}
- $forbidden_file_extensions .= ')$';
- # Create a diagnostics object if requested;
- # This is only useful for code development
- my $diagnostics_object = undef;
- if ( $rOpts->{'DIAGNOSTICS'} ) {
- $diagnostics_object = Perl::Tidy::Diagnostics->new();
- }
+ # we'll stuff the source array into ARGV
+ unshift( @ARGV, $source_stream );
- # no filenames should be given if input is from an array
- if ($source_stream) {
- if ( @ARGV > 0 ) {
- die
-"You may not specify any filenames when a source array is given\n";
- }
+ # No special treatment for source stream which is a filename.
+ # This will enable checks for binary files and other bad stuff.
+ $source_stream = undef unless ref($source_stream);
+ }
- # we'll stuff the source array into ARGV
- unshift( @ARGV, $source_stream );
+ # use stdin by default if no source array and no args
+ else {
+ unshift( @ARGV, '-' ) unless @ARGV;
+ }
- # No special treatment for source stream which is a filename.
- # This will enable checks for binary files and other bad stuff.
- $source_stream = undef unless ref($source_stream);
- }
+ #---------------------------------------------------------------
+ # Ready to go...
+ # main loop to process all files in argument list
+ #---------------------------------------------------------------
+ my $number_of_files = @ARGV;
+ my $formatter = undef;
+ my $tokenizer = undef;
- # use stdin by default if no source array and no args
- else {
- unshift( @ARGV, '-' ) unless @ARGV;
- }
+ # If requested, process in order of increasing file size
+ # This can significantly reduce perl's virtual memory usage during testing.
+ if ( $number_of_files > 1 && $rOpts->{'file-size-order'} ) {
+ @ARGV =
+ map { $_->[0] }
+ sort { $a->[1] <=> $b->[1] }
+ map { [ $_, -e $_ ? -s $_ : 0 ] } @ARGV;
+ }
- # loop to process all files in argument list
- my $number_of_files = @ARGV;
- my $formatter = undef;
- $tokenizer = undef;
- while ( $input_file = shift @ARGV ) {
- my $fileroot;
- my $input_file_permissions;
+ while ( my $input_file = shift @ARGV ) {
+ my $fileroot;
+ my $input_file_permissions;
- #---------------------------------------------------------------
- # determine the input file name
- #---------------------------------------------------------------
- if ($source_stream) {
- $fileroot = "perltidy";
- }
- elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
- $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
- $in_place_modify = 0;
+ #---------------------------------------------------------------
+ # prepare this input stream
+ #---------------------------------------------------------------
+ if ($source_stream) {
+ $fileroot = "perltidy";
+
+ # If the source is from an array or string, then .LOG output
+ # is only possible if a logfile stream is specified. This prevents
+ # unexpected perltidy.LOG files.
+ if ( !defined($logfile_stream) ) {
+ $logfile_stream = Perl::Tidy::DevNull->new();
}
- else {
- $fileroot = $input_file;
- unless ( -e $input_file ) {
-
- # file doesn't exist - check for a file glob
- if ( $input_file =~ /([\?\*\[\{])/ ) {
-
- # Windows shell may not remove quotes, so do it
- my $input_file = $input_file;
- if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
- if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
- my $pattern = fileglob_to_re($input_file);
- eval "/$pattern/";
- if ( !$@ && opendir( DIR, './' ) ) {
- my @files =
- grep { /$pattern/ && !-d $_ } readdir(DIR);
- closedir(DIR);
- if (@files) {
- unshift @ARGV, @files;
- next;
- }
+ }
+ elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
+ $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
+ $in_place_modify = 0;
+ }
+ else {
+ $fileroot = $input_file;
+ unless ( -e $input_file ) {
+
+ # file doesn't exist - check for a file glob
+ if ( $input_file =~ /([\?\*\[\{])/ ) {
+
+ # Windows shell may not remove quotes, so do it
+ my $input_file = $input_file;
+ if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
+ if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
+ my $pattern = fileglob_to_re($input_file);
+ ##eval "/$pattern/";
+ if ( !$@ && opendir( DIR, './' ) ) {
+ my @files =
+ grep { /$pattern/ && !-d $_ } readdir(DIR);
+ closedir(DIR);
+ if (@files) {
+ unshift @ARGV, @files;
+ next;
}
}
- print "skipping file: '$input_file': no matches found\n";
- next;
}
+ Warn "skipping file: '$input_file': no matches found\n";
+ next;
+ }
- unless ( -f $input_file ) {
- print "skipping file: $input_file: not a regular file\n";
- next;
- }
+ unless ( -f $input_file ) {
+ Warn "skipping file: $input_file: not a regular file\n";
+ next;
+ }
- unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
- print
-"skipping file: $input_file: Non-text (override with -f)\n";
- next;
- }
+ # As a safety precaution, skip zero length files.
+ # If for example a source file got clobbered somehow,
+ # the old .tdy or .bak files might still exist so we
+ # shouldn't overwrite them with zero length files.
+ unless ( -s $input_file ) {
+ Warn "skipping file: $input_file: Zero size\n";
+ next;
+ }
- # we should have a valid filename now
- $fileroot = $input_file;
- $input_file_permissions = ( stat $input_file )[2] & 07777;
+ unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
+ Warn
+ "skipping file: $input_file: Non-text (override with -f)\n";
+ next;
+ }
- if ( $^O eq 'VMS' ) {
- ( $fileroot, $dot ) = check_vms_filename($fileroot);
- }
+ # we should have a valid filename now
+ $fileroot = $input_file;
+ $input_file_permissions = ( stat $input_file )[2] & oct(7777);
+
+ if ( $^O eq 'VMS' ) {
+ ( $fileroot, $dot ) = check_vms_filename($fileroot);
+ }
- # add option to change path here
- if ( defined( $rOpts->{'output-path'} ) ) {
+ # add option to change path here
+ if ( defined( $rOpts->{'output-path'} ) ) {
- my ( $base, $old_path ) = fileparse($fileroot);
- my $new_path = $rOpts->{'output-path'};
- unless ( -d $new_path ) {
- unless ( mkdir $new_path, 0777 ) {
- die "unable to create directory $new_path: $!\n";
- }
+ my ( $base, $old_path ) = fileparse($fileroot);
+ my $new_path = $rOpts->{'output-path'};
+ unless ( -d $new_path ) {
+ unless ( mkdir $new_path, 0777 ) {
+ Die "unable to create directory $new_path: $!\n";
}
- my $path = $new_path;
- $fileroot = catfile( $path, $base );
- unless ($fileroot) {
- die <<EOM;
+ }
+ my $path = $new_path;
+ $fileroot = catfile( $path, $base );
+ unless ($fileroot) {
+ Die <<EOM;
------------------------------------------------------------------------
Problem combining $new_path and $base to make a filename; check -opath
------------------------------------------------------------------------
EOM
- }
}
}
+ }
- # Skip files with same extension as the output files because
- # this can lead to a messy situation with files like
- # script.tdy.tdy.tdy ... or worse problems ... when you
- # rerun perltidy over and over with wildcard input.
- if (
- !$source_stream
- && ( $input_file =~ /$forbidden_file_extensions/o
- || $input_file eq 'DIAGNOSTICS' )
- )
+ # Skip files with same extension as the output files because
+ # this can lead to a messy situation with files like
+ # script.tdy.tdy.tdy ... or worse problems ... when you
+ # rerun perltidy over and over with wildcard input.
+ if (
+ !$source_stream
+ && ( $input_file =~ /$forbidden_file_extensions/o
+ || $input_file eq 'DIAGNOSTICS' )
+ )
+ {
+ Warn "skipping file: $input_file: wrong extension\n";
+ next;
+ }
+
+ # the 'source_object' supplies a method to read the input file
+ my $source_object =
+ Perl::Tidy::LineSource->new( $input_file, $rOpts,
+ $rpending_logfile_message );
+ next unless ($source_object);
+
+ # Prefilters and postfilters: The prefilter is a code reference
+ # that will be applied to the source before tidying, and the
+ # postfilter is a code reference to the result before outputting.
+ if (
+ $prefilter
+ || ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8' )
+ )
+ {
+ my $buf = '';
+ while ( my $line = $source_object->get_line() ) {
+ $buf .= $line;
+ }
+
+ $buf = $prefilter->($buf) if $prefilter;
+
+ if ( $rOpts_character_encoding
+ && $rOpts_character_encoding eq 'utf8'
+ && !utf8::is_utf8($buf) )
{
- print "skipping file: $input_file: wrong extension\n";
- next;
+ eval {
+ $buf = Encode::decode( 'UTF-8', $buf,
+ Encode::FB_CROAK | Encode::LEAVE_SRC );
+ };
+ if ($@) {
+ Warn
+"skipping file: $input_file: Unable to decode source as UTF-8\n";
+ next;
+ }
}
- # the 'source_object' supplies a method to read the input file
- my $source_object =
- Perl::Tidy::LineSource->new( $input_file, $rOpts,
+ $source_object = Perl::Tidy::LineSource->new( \$buf, $rOpts,
$rpending_logfile_message );
- next unless ($source_object);
+ }
- # register this file name with the Diagnostics package
- $diagnostics_object->set_input_file($input_file)
- if $diagnostics_object;
+ # register this file name with the Diagnostics package
+ $diagnostics_object->set_input_file($input_file)
+ if $diagnostics_object;
- #---------------------------------------------------------------
- # determine the output file name
- #---------------------------------------------------------------
- my $output_file = undef;
- my $actual_output_extension;
+ #---------------------------------------------------------------
+ # prepare the output stream
+ #---------------------------------------------------------------
+ my $output_file = undef;
+ my $actual_output_extension;
- if ( $rOpts->{'outfile'} ) {
+ if ( $rOpts->{'outfile'} ) {
- if ( $number_of_files <= 1 ) {
+ if ( $number_of_files <= 1 ) {
- if ( $rOpts->{'standard-output'} ) {
- die "You may not use -o and -st together\n";
- }
- elsif ($destination_stream) {
- die
+ if ( $rOpts->{'standard-output'} ) {
+ my $msg = "You may not use -o and -st together";
+ $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
+ Die "$msg\n";
+ }
+ elsif ($destination_stream) {
+ Die
"You may not specify a destination array and -o together\n";
- }
- elsif ( defined( $rOpts->{'output-path'} ) ) {
- die "You may not specify -o and -opath together\n";
- }
- elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
- die "You may not specify -o and -oext together\n";
- }
- $output_file = $rOpts->{outfile};
-
- # make sure user gives a file name after -o
- if ( $output_file =~ /^-/ ) {
- die "You must specify a valid filename after -o\n";
- }
-
- # do not overwrite input file with -o
- if ( defined($input_file_permissions)
- && ( $output_file eq $input_file ) )
- {
- die
- "Use 'perltidy -b $input_file' to modify in-place\n";
- }
}
- else {
- die "You may not use -o with more than one input file\n";
+ elsif ( defined( $rOpts->{'output-path'} ) ) {
+ Die "You may not specify -o and -opath together\n";
}
- }
- elsif ( $rOpts->{'standard-output'} ) {
- if ($destination_stream) {
- die
-"You may not specify a destination array and -st together\n";
+ elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
+ Die "You may not specify -o and -oext together\n";
}
- $output_file = '-';
+ $output_file = $rOpts->{outfile};
- if ( $number_of_files <= 1 ) {
+ # make sure user gives a file name after -o
+ if ( $output_file =~ /^-/ ) {
+ Die "You must specify a valid filename after -o\n";
}
- else {
- die "You may not use -st with more than one input file\n";
+
+ # do not overwrite input file with -o
+ if ( defined($input_file_permissions)
+ && ( $output_file eq $input_file ) )
+ {
+ Die "Use 'perltidy -b $input_file' to modify in-place\n";
}
}
- elsif ($destination_stream) {
- $output_file = $destination_stream;
+ else {
+ Die "You may not use -o with more than one input file\n";
+ }
+ }
+ elsif ( $rOpts->{'standard-output'} ) {
+ if ($destination_stream) {
+ my $msg =
+ "You may not specify a destination array and -st together\n";
+ $msg .= " (-pbp contains -st; see manual)" if ($saw_pbp);
+ Die "$msg\n";
+ }
+ $output_file = '-';
+
+ if ( $number_of_files <= 1 ) {
}
- elsif ($source_stream) { # source but no destination goes to stdout
- $output_file = '-';
+ else {
+ Die "You may not use -st with more than one input file\n";
}
- elsif ( $input_file eq '-' ) {
- $output_file = '-';
+ }
+ elsif ($destination_stream) {
+ $output_file = $destination_stream;
+ }
+ elsif ($source_stream) { # source but no destination goes to stdout
+ $output_file = '-';
+ }
+ elsif ( $input_file eq '-' ) {
+ $output_file = '-';
+ }
+ else {
+ if ($in_place_modify) {
+ $output_file = IO::File->new_tmpfile()
+ or Die "cannot open temp file for -b option: $!\n";
}
else {
- if ($in_place_modify) {
- $output_file = IO::File->new_tmpfile()
- or die "cannot open temp file for -b option: $!\n";
- }
- else {
- $actual_output_extension = $output_extension;
- $output_file = $fileroot . $output_extension;
- }
+ $actual_output_extension = $output_extension;
+ $output_file = $fileroot . $output_extension;
}
+ }
- # the 'sink_object' knows how to write the output file
- my $tee_file = $fileroot . $dot . "TEE";
+ # the 'sink_object' knows how to write the output file
+ my $tee_file = $fileroot . $dot . "TEE";
- my $line_separator = $rOpts->{'output-line-ending'};
- if ( $rOpts->{'preserve-line-endings'} ) {
- $line_separator = find_input_line_ending($input_file);
- }
+ my $line_separator = $rOpts->{'output-line-ending'};
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ $line_separator = find_input_line_ending($input_file);
+ }
- # Eventually all I/O may be done with binmode, but for now it is
- # only done when a user requests a particular line separator
- # through the -ple or -ole flags
- my $binmode = 0;
- if ( defined($line_separator) ) { $binmode = 1 }
- else { $line_separator = "\n" }
+ # Eventually all I/O may be done with binmode, but for now it is
+ # only done when a user requests a particular line separator
+ # through the -ple or -ole flags
+ my $binmode = defined($line_separator)
+ || defined($rOpts_character_encoding);
+ $line_separator = "\n" unless defined($line_separator);
- my $sink_object =
+ my ( $sink_object, $postfilter_buffer );
+ if ($postfilter) {
+ $sink_object =
+ Perl::Tidy::LineSink->new( \$postfilter_buffer, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ }
+ else {
+ $sink_object =
Perl::Tidy::LineSink->new( $output_file, $tee_file,
$line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ }
- #---------------------------------------------------------------
- # initialize the error logger
- #---------------------------------------------------------------
- my $warning_file = $fileroot . $dot . "ERR";
- if ($errorfile_stream) { $warning_file = $errorfile_stream }
- my $log_file = $fileroot . $dot . "LOG";
- if ($logfile_stream) { $log_file = $logfile_stream }
-
- my $logger_object =
- Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
- $saw_extrude );
- write_logfile_header(
- $rOpts, $logger_object, $config_file,
- $rraw_options, $Windows_type
- );
- if ($$rpending_logfile_message) {
- $logger_object->write_logfile_entry($$rpending_logfile_message);
- }
- if ($$rpending_complaint) {
- $logger_object->complain($$rpending_complaint);
- }
-
- #---------------------------------------------------------------
- # initialize the debug object, if any
- #---------------------------------------------------------------
- my $debugger_object = undef;
- if ( $rOpts->{DEBUG} ) {
- $debugger_object =
- Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
- }
+ #---------------------------------------------------------------
+ # initialize the error logger for this file
+ #---------------------------------------------------------------
+ my $warning_file = $fileroot . $dot . "ERR";
+ if ($errorfile_stream) { $warning_file = $errorfile_stream }
+ my $log_file = $fileroot . $dot . "LOG";
+ if ($logfile_stream) { $log_file = $logfile_stream }
+
+ my $logger_object =
+ Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
+ $fh_stderr, $saw_extrude );
+ write_logfile_header(
+ $rOpts, $logger_object, $config_file,
+ $rraw_options, $Windows_type, $readable_options,
+ );
+ if ( ${$rpending_logfile_message} ) {
+ $logger_object->write_logfile_entry( ${$rpending_logfile_message} );
+ }
+ if ( ${$rpending_complaint} ) {
+ $logger_object->complain( ${$rpending_complaint} );
+ }
- #---------------------------------------------------------------
- # create a formatter for this file : html writer or pretty printer
- #---------------------------------------------------------------
+ #---------------------------------------------------------------
+ # initialize the debug object, if any
+ #---------------------------------------------------------------
+ my $debugger_object = undef;
+ if ( $rOpts->{DEBUG} ) {
+ $debugger_object =
+ Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
+ }
+
+ #---------------------------------------------------------------
+ # loop over iterations for one source stream
+ #---------------------------------------------------------------
+
+ # We will do a convergence test if 3 or more iterations are allowed.
+ # It would be pointless for fewer because we have to make at least
+ # two passes before we can see if we are converged, and the test
+ # would just slow things down.
+ my $max_iterations = $rOpts->{'iterations'};
+ my $convergence_log_message;
+ my %saw_md5;
+ my $do_convergence_test = $max_iterations > 2;
+ if ($do_convergence_test) {
+ eval "use Digest::MD5 qw(md5_hex)";
+ $do_convergence_test = !$@;
+
+ ### Trying to avoid problems with ancient versions of perl
+ ##eval { my $string = "perltidy"; utf8::encode($string) };
+ ##$do_convergence_test = $do_convergence_test && !$@;
+ }
+
+ # save objects to allow redirecting output during iterations
+ my $sink_object_final = $sink_object;
+ my $debugger_object_final = $debugger_object;
+ my $logger_object_final = $logger_object;
+
+ foreach my $iter ( 1 .. $max_iterations ) {
+
+ # send output stream to temp buffers until last iteration
+ my $sink_buffer;
+ if ( $iter < $max_iterations ) {
+ $sink_object =
+ Perl::Tidy::LineSink->new( \$sink_buffer, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message,
+ $binmode );
+ }
+ else {
+ $sink_object = $sink_object_final;
+ }
+
+ # Save logger, debugger output only on pass 1 because:
+ # (1) line number references must be to the starting
+ # source, not an intermediate result, and
+ # (2) we need to know if there are errors so we can stop the
+ # iterations early if necessary.
+ if ( $iter > 1 ) {
+ $debugger_object = undef;
+ $logger_object = undef;
+ }
+
+ #------------------------------------------------------------
+ # create a formatter for this file : html writer or
+ # pretty printer
+ #------------------------------------------------------------
# we have to delete any old formatter because, for safety,
# the formatter will check to see that there is only one.
);
}
else {
- die "I don't know how to do -format=$rOpts->{'format'}\n";
+ Die "I don't know how to do -format=$rOpts->{'format'}\n";
}
unless ($formatter) {
- die "Unable to continue with $rOpts->{'format'} formatting\n";
+ Die "Unable to continue with $rOpts->{'format'} formatting\n";
}
#---------------------------------------------------------------
#---------------------------------------------------------------
$tokenizer = undef; # must destroy old tokenizer
$tokenizer = Perl::Tidy::Tokenizer->new(
- source_object => $source_object,
- logger_object => $logger_object,
- debugger_object => $debugger_object,
- diagnostics_object => $diagnostics_object,
+ source_object => $source_object,
+ logger_object => $logger_object,
+ debugger_object => $debugger_object,
+ diagnostics_object => $diagnostics_object,
+ tabsize => $tabsize,
+
starting_level => $rOpts->{'starting-indentation-level'},
- tabs => $rOpts->{'tabs'},
indent_columns => $rOpts->{'indent-columns'},
look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
look_for_autoloader => $rOpts->{'look-for-autoloader'},
look_for_selfloader => $rOpts->{'look-for-selfloader'},
trim_qw => $rOpts->{'trim-qw'},
+ extended_syntax => $rOpts->{'extended-syntax'},
+
+ continuation_indentation =>
+ $rOpts->{'continuation-indentation'},
+ outdent_labels => $rOpts->{'outdent-labels'},
);
#---------------------------------------------------------------
#---------------------------------------------------------------
$source_object->close_input_file();
- # get file names to use for syntax check
- my $ifname = $source_object->get_input_file_copy_name();
- my $ofname = $sink_object->get_output_file_copy();
+ # line source for next iteration (if any) comes from the current
+ # temporary output buffer
+ if ( $iter < $max_iterations ) {
+
+ $sink_object->close_output_file();
+ $source_object =
+ Perl::Tidy::LineSource->new( \$sink_buffer, $rOpts,
+ $rpending_logfile_message );
+
+ # stop iterations if errors or converged
+ #my $stop_now = $logger_object->{_warning_count};
+ my $stop_now = $tokenizer->report_tokenization_errors();
+ if ($stop_now) {
+ $convergence_log_message = <<EOM;
+Stopping iterations because of severe errors.
+EOM
+ }
+ elsif ($do_convergence_test) {
+
+ # Patch for [rt.cpan.org #88020]
+ # Use utf8::encode since md5_hex() only operates on bytes.
+ # my $digest = md5_hex( utf8::encode($sink_buffer) );
+
+ # Note added 20180114: this patch did not work correctly.
+ # I'm not sure why. But switching to the method
+ # recommended in the Perl 5 documentation for Encode
+ # worked. According to this we can either use
+ # $octets = encode_utf8($string) or equivalently
+ # $octets = encode("utf8",$string)
+ # and then calculate the checksum. So:
+ my $octets = Encode::encode( "utf8", $sink_buffer );
+ my $digest = md5_hex($octets);
+ if ( !$saw_md5{$digest} ) {
+ $saw_md5{$digest} = $iter;
+ }
+ else {
- #---------------------------------------------------------------
- # handle the -b option (backup and modify in-place)
- #---------------------------------------------------------------
- if ($in_place_modify) {
- unless ( -f $input_file ) {
+ # Deja vu, stop iterating
+ $stop_now = 1;
+ my $iterm = $iter - 1;
+ if ( $saw_md5{$digest} != $iterm ) {
- # oh, oh, no real file to backup ..
- # shouldn't happen because of numerous preliminary checks
- die print
-"problem with -b backing up input file '$input_file': not a file\n";
+ # Blinking (oscillating) between two stable
+ # end states. This has happened in the past
+ # but at present there are no known instances.
+ $convergence_log_message = <<EOM;
+Blinking. Output for iteration $iter same as for $saw_md5{$digest}.
+EOM
+ $diagnostics_object->write_diagnostics(
+ $convergence_log_message)
+ if $diagnostics_object;
+ }
+ else {
+ $convergence_log_message = <<EOM;
+Converged. Output for iteration $iter same as for iter $iterm.
+EOM
+ $diagnostics_object->write_diagnostics(
+ $convergence_log_message)
+ if $diagnostics_object && $iterm > 2;
+ }
+ }
+ } ## end if ($do_convergence_test)
+
+ if ($stop_now) {
+
+ # we are stopping the iterations early;
+ # copy the output stream to its final destination
+ $sink_object = $sink_object_final;
+ while ( my $line = $source_object->get_line() ) {
+ $sink_object->write_line($line);
+ }
+ $source_object->close_input_file();
+ last;
}
- my $backup_name = $input_file . $backup_extension;
- if ( -f $backup_name ) {
- unlink($backup_name)
- or die
+ } ## end if ( $iter < $max_iterations)
+ } # end loop over iterations for one source file
+
+ # restore objects which have been temporarily undefined
+ # for second and higher iterations
+ $debugger_object = $debugger_object_final;
+ $logger_object = $logger_object_final;
+
+ $logger_object->write_logfile_entry($convergence_log_message)
+ if $convergence_log_message;
+
+ #---------------------------------------------------------------
+ # Perform any postfilter operation
+ #---------------------------------------------------------------
+ if ($postfilter) {
+ $sink_object->close_output_file();
+ $sink_object =
+ Perl::Tidy::LineSink->new( $output_file, $tee_file,
+ $line_separator, $rOpts, $rpending_logfile_message, $binmode );
+ my $buf = $postfilter->($postfilter_buffer);
+ $source_object =
+ Perl::Tidy::LineSource->new( \$buf, $rOpts,
+ $rpending_logfile_message );
+ while ( my $line = $source_object->get_line() ) {
+ $sink_object->write_line($line);
+ }
+ $source_object->close_input_file();
+ }
+
+ # Save names of the input and output files for syntax check
+ my $ifname = $input_file;
+ my $ofname = $output_file;
+
+ #---------------------------------------------------------------
+ # handle the -b option (backup and modify in-place)
+ #---------------------------------------------------------------
+ if ($in_place_modify) {
+ unless ( -f $input_file ) {
+
+ # oh, oh, no real file to backup ..
+ # shouldn't happen because of numerous preliminary checks
+ Die
+"problem with -b backing up input file '$input_file': not a file\n";
+ }
+ my $backup_name = $input_file . $backup_extension;
+ if ( -f $backup_name ) {
+ unlink($backup_name)
+ or Die
"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
- }
+ }
+
+ # backup the input file
+ # we use copy for symlinks, move for regular files
+ if ( -l $input_file ) {
+ File::Copy::copy( $input_file, $backup_name )
+ or Die "File::Copy failed trying to backup source: $!";
+ }
+ else {
rename( $input_file, $backup_name )
- or die
+ or Die
"problem renaming $input_file to $backup_name for -b option: $!\n";
- $ifname = $backup_name;
+ }
+ $ifname = $backup_name;
+
+ # copy the output to the original input file
+ # NOTE: it would be nice to just close $output_file and use
+ # File::Copy::copy here, but in this case $output_file is the
+ # handle of an open nameless temporary file so we would lose
+ # everything if we closed it.
+ seek( $output_file, 0, 0 )
+ or Die "unable to rewind a temporary file for -b option: $!\n";
+ my $fout = IO::File->new("> $input_file")
+ or Die
+"problem re-opening $input_file for write for -b option; check file and directory permissions: $!\n";
+ if ($binmode) {
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ binmode $fout, ":encoding(UTF-8)";
+ }
+ else { binmode $fout }
+ }
+ my $line;
+ while ( $line = $output_file->getline() ) {
+ $fout->print($line);
+ }
+ $fout->close();
+ $output_file = $input_file;
+ $ofname = $input_file;
+ }
- seek( $output_file, 0, 0 )
- or die "unable to rewind tmp file for -b option: $!\n";
+ #---------------------------------------------------------------
+ # clean up and report errors
+ #---------------------------------------------------------------
+ $sink_object->close_output_file() if $sink_object;
+ $debugger_object->close_debug_file() if $debugger_object;
- my $fout = IO::File->new("> $input_file")
- or die
-"problem opening $input_file for write for -b option; check directory permissions: $!\n";
- binmode $fout;
- my $line;
- while ( $line = $output_file->getline() ) {
- $fout->print($line);
+ # set output file permissions
+ if ( $output_file && -f $output_file && !-l $output_file ) {
+ if ($input_file_permissions) {
+
+ # give output script same permissions as input script, but
+ # make it user-writable or else we can't run perltidy again.
+ # Thus we retain whatever executable flags were set.
+ if ( $rOpts->{'format'} eq 'tidy' ) {
+ chmod( $input_file_permissions | oct(600), $output_file );
}
- $fout->close();
- $output_file = $input_file;
- $ofname = $input_file;
+
+ # else use default permissions for html and any other format
}
+ }
- #---------------------------------------------------------------
- # clean up and report errors
- #---------------------------------------------------------------
- $sink_object->close_output_file() if $sink_object;
- $debugger_object->close_debug_file() if $debugger_object;
+ #---------------------------------------------------------------
+ # Do syntax check if requested and possible
+ #---------------------------------------------------------------
+ my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
+ if ( $logger_object
+ && $rOpts->{'check-syntax'}
+ && $ifname
+ && $ofname )
+ {
+ $infile_syntax_ok =
+ check_syntax( $ifname, $ofname, $logger_object, $rOpts );
+ }
+
+ #---------------------------------------------------------------
+ # remove the original file for in-place modify as follows:
+ # $delete_backup=0 never
+ # $delete_backup=1 only if no errors
+ # $delete_backup>1 always : NOT ALLOWED, too risky, see above
+ #---------------------------------------------------------------
+ if ( $in_place_modify
+ && $delete_backup
+ && -f $ifname
+ && ( $delete_backup > 1 || !$logger_object->{_warning_count} ) )
+ {
- my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
- if ($output_file) {
+ # As an added safety precaution, do not delete the source file
+ # if its size has dropped from positive to zero, since this
+ # could indicate a disaster of some kind, including a hardware
+ # failure. Actually, this could happen if you had a file of
+ # all comments (or pod) and deleted everything with -dac (-dap)
+ # for some reason.
+ if ( !-s $output_file && -s $ifname && $delete_backup == 1 ) {
+ Warn(
+"output file '$output_file' missing or zero length; original '$ifname' not deleted\n"
+ );
+ }
+ else {
+ unlink($ifname)
+ or Die
+"unable to remove previous '$ifname' for -b option; check permissions: $!\n";
+ }
+ }
- if ($input_file_permissions) {
+ $logger_object->finish( $infile_syntax_ok, $formatter )
+ if $logger_object;
+ } # end of main loop to process all files
- # give output script same permissions as input script, but
- # make it user-writable or else we can't run perltidy again.
- # Thus we retain whatever executable flags were set.
- if ( $rOpts->{'format'} eq 'tidy' ) {
- chmod( $input_file_permissions | 0600, $output_file );
- }
+ NORMAL_EXIT:
+ return 0;
- # else use default permissions for html and any other format
+ ERROR_EXIT:
+ return 1;
+} # end of main program perltidy
+sub get_stream_as_named_file {
+
+ # Return the name of a file containing a stream of data, creating
+ # a temporary file if necessary.
+ # Given:
+ # $stream - the name of a file or stream
+ # Returns:
+ # $fname = name of file if possible, or undef
+ # $if_tmpfile = true if temp file, undef if not temp file
+ #
+ # This routine is needed for passing actual files to Perl for
+ # a syntax check.
+ my ($stream) = @_;
+ my $is_tmpfile;
+ my $fname;
+ if ($stream) {
+ if ( ref($stream) ) {
+ my ( $fh_stream, $fh_name ) =
+ Perl::Tidy::streamhandle( $stream, 'r' );
+ if ($fh_stream) {
+ my ( $fout, $tmpnam ) = File::Temp::tempfile();
+ if ($fout) {
+ $fname = $tmpnam;
+ $is_tmpfile = 1;
+ binmode $fout;
+ while ( my $line = $fh_stream->getline() ) {
+ $fout->print($line);
+ }
+ $fout->close();
}
- if ( $logger_object && $rOpts->{'check-syntax'} ) {
- $infile_syntax_ok =
- check_syntax( $ifname, $ofname, $logger_object, $rOpts );
- }
+ $fh_stream->close();
}
-
- $logger_object->finish( $infile_syntax_ok, $formatter )
- if $logger_object;
- } # end of loop to process all files
- } # end of main program
+ }
+ elsif ( $stream ne '-' && -f $stream ) {
+ $fname = $stream;
+ }
+ }
+ return ( $fname, $is_tmpfile );
}
sub fileglob_to_re {
$x =~ s#([./^\$()])#\\$1#g; # escape special characters
$x =~ s#\*#.*#g; # '*' -> '.*'
$x =~ s#\?#.#g; # '?' -> '.'
- "^$x\\z"; # match whole word
+ return "^$x\\z"; # match whole word
}
sub make_extension {
}
sub write_logfile_header {
- my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type ) =
- @_;
+ my (
+ $rOpts, $logger_object, $config_file,
+ $rraw_options, $Windows_type, $readable_options
+ ) = @_;
$logger_object->write_logfile_entry(
"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
);
if ($Windows_type) {
$logger_object->write_logfile_entry("Windows type is $Windows_type\n");
}
- my $options_string = join( ' ', @$rraw_options );
+ my $options_string = join( ' ', @{$rraw_options} );
if ($config_file) {
$logger_object->write_logfile_entry(
$logger_object->write_logfile_entry(
"------------------------------------\n");
- foreach ( keys %{$rOpts} ) {
- $logger_object->write_logfile_entry( '--' . "$_=$rOpts->{$_}\n" );
- }
+ $logger_object->write_logfile_entry($readable_options);
+
$logger_object->write_logfile_entry(
"------------------------------------\n");
}
$logger_object->write_logfile_entry(
"To find error messages search for 'WARNING' with your editor\n");
+ return;
}
sub generate_options {
# fll --> fuzzy-line-length # a trivial parameter which gets
# turned off for the extrude option
# which is mainly for debugging
- # chk --> check-multiline-quotes # check for old bug; to be deleted
# scl --> short-concatenation-item-length # helps break at '.'
# recombine # for debugging line breaks
# valign # for debugging vertical alignment
- # I --> DIAGNOSTICS # for debugging
+ # I --> DIAGNOSTICS # for debugging [**DEACTIVATED**]
######################################################################
# here is a summary of the Getopt codes:
npro
recombine!
valign!
+ notidy
);
my $category = 13; # Debugging
if ($short_name) {
if ( $expansion{$short_name} ) {
my $existing_name = $expansion{$short_name}[0];
- die
+ Die
"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
}
$expansion{$short_name} = [$long_name];
my $nolong_name = 'no' . $long_name;
if ( $expansion{$nshort_name} ) {
my $existing_name = $expansion{$nshort_name}[0];
- die
+ Die
"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
}
$expansion{$nshort_name} = [$nolong_name];
$add_option->( 'backup-file-extension', 'bext', '=s' );
$add_option->( 'force-read-binary', 'f', '!' );
$add_option->( 'format', 'fmt', '=s' );
+ $add_option->( 'iterations', 'it', '=i' );
$add_option->( 'logfile', 'log', '!' );
$add_option->( 'logfile-gap', 'g', ':i' );
$add_option->( 'outfile', 'o', '=s' );
$add_option->( 'standard-error-output', 'se', '!' );
$add_option->( 'standard-output', 'st', '!' );
$add_option->( 'warning-output', 'w', '!' );
+ $add_option->( 'character-encoding', 'enc', '=s' );
# options which are both toggle switches and values moved here
# to hide from tidyview (which does not show category 0 flags):
########################################
$category = 1; # Basic formatting options
########################################
- $add_option->( 'check-syntax', 'syn', '!' );
- $add_option->( 'entab-leading-whitespace', 'et', '=i' );
- $add_option->( 'indent-columns', 'i', '=i' );
- $add_option->( 'maximum-line-length', 'l', '=i' );
- $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
- $add_option->( 'preserve-line-endings', 'ple', '!' );
- $add_option->( 'tabs', 't', '!' );
+ $add_option->( 'check-syntax', 'syn', '!' );
+ $add_option->( 'entab-leading-whitespace', 'et', '=i' );
+ $add_option->( 'indent-columns', 'i', '=i' );
+ $add_option->( 'maximum-line-length', 'l', '=i' );
+ $add_option->( 'variable-maximum-line-length', 'vmll', '!' );
+ $add_option->( 'whitespace-cycle', 'wc', '=i' );
+ $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
+ $add_option->( 'preserve-line-endings', 'ple', '!' );
+ $add_option->( 'tabs', 't', '!' );
+ $add_option->( 'default-tabsize', 'dt', '=i' );
+ $add_option->( 'extended-syntax', 'xs', '!' );
########################################
$category = 2; # Code indentation control
$add_option->( 'square-bracket-tightness', 'sbt', '=i' );
$add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
$add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
+ $add_option->( 'tight-secret-operators', 'tso', '!' );
$add_option->( 'trim-qw', 'tqw', '!' );
+ $add_option->( 'trim-pod', 'trp', '!' );
$add_option->( 'want-left-space', 'wls', '=s' );
$add_option->( 'want-right-space', 'wrs', '=s' );
$add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
$add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
$add_option->( 'closing-side-comments', 'csc', '!' );
+ $add_option->( 'closing-side-comments-balanced', 'cscb', '!' );
$add_option->( 'format-skipping', 'fs', '!' );
$add_option->( 'format-skipping-begin', 'fsb', '=s' );
$add_option->( 'format-skipping-end', 'fse', '=s' );
$add_option->( 'hanging-side-comments', 'hsc', '!' );
$add_option->( 'indent-block-comments', 'ibc', '!' );
$add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
+ $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
$add_option->( 'minimum-space-to-comment', 'msc', '=i' );
$add_option->( 'outdent-long-comments', 'olc', '!' );
$add_option->( 'outdent-static-block-comments', 'osbc', '!' );
$add_option->( 'static-block-comments', 'sbc', '!' );
$add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
$add_option->( 'static-side-comments', 'ssc', '!' );
+ $add_option->( 'ignore-side-comment-lengths', 'iscl', '!' );
########################################
$category = 5; # Linebreak controls
########################################
- $add_option->( 'add-newlines', 'anl', '!' );
- $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
- $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
- $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
- $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
- $add_option->( 'cuddled-else', 'ce', '!' );
- $add_option->( 'delete-old-newlines', 'dnl', '!' );
- $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
- $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
- $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
- $add_option->( 'opening-paren-right', 'opr', '!' );
- $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
- $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
- $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
- $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
- $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
- $add_option->( 'stack-closing-paren', 'scp', '!' );
- $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
- $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
- $add_option->( 'stack-opening-paren', 'sop', '!' );
- $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
- $add_option->( 'vertical-tightness', 'vt', '=i' );
- $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
- $add_option->( 'want-break-after', 'wba', '=s' );
- $add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'add-newlines', 'anl', '!' );
+ $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
+ $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
+ $add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
+ $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
+ $add_option->( 'cuddled-else', 'ce', '!' );
+ $add_option->( 'cuddled-blocks', 'cb', '!' );
+ $add_option->( 'cuddled-block-list', 'cbl', '=s' );
+ $add_option->( 'cuddled-break-option', 'cbo', '=i' );
+ $add_option->( 'delete-old-newlines', 'dnl', '!' );
+ $add_option->( 'opening-brace-always-on-right', 'bar', '!' );
+ $add_option->( 'opening-brace-on-new-line', 'bl', '!' );
+ $add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
+ $add_option->( 'opening-paren-right', 'opr', '!' );
+ $add_option->( 'opening-square-bracket-right', 'osbr', '!' );
+ $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' );
+ $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
+ $add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
+ $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
+ $add_option->( 'weld-nested-containers', 'wn', '!' );
+ $add_option->( 'space-backslash-quote', 'sbq', '=i' );
+ $add_option->( 'stack-closing-block-brace', 'scbb', '!' );
+ $add_option->( 'stack-closing-hash-brace', 'schb', '!' );
+ $add_option->( 'stack-closing-paren', 'scp', '!' );
+ $add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
+ $add_option->( 'stack-opening-block-brace', 'sobb', '!' );
+ $add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
+ $add_option->( 'stack-opening-paren', 'sop', '!' );
+ $add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
+ $add_option->( 'vertical-tightness', 'vt', '=i' );
+ $add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
+ $add_option->( 'want-break-after', 'wba', '=s' );
+ $add_option->( 'want-break-before', 'wbb', '=s' );
+ $add_option->( 'break-after-all-operators', 'baao', '!' );
+ $add_option->( 'break-before-all-operators', 'bbao', '!' );
+ $add_option->( 'keep-interior-semicolons', 'kis', '!' );
########################################
$category = 6; # Controlling list formatting
########################################
$category = 7; # Retaining or ignoring existing line breaks
########################################
- $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
- $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
- $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
- $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
+ $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
+ $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
+ $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
+ $add_option->( 'break-at-old-attribute-breakpoints', 'boa', '!' );
+ $add_option->( 'ignore-old-breakpoints', 'iob', '!' );
########################################
$category = 8; # Blank line control
########################################
- $add_option->( 'blanks-before-blocks', 'bbb', '!' );
- $add_option->( 'blanks-before-comments', 'bbc', '!' );
- $add_option->( 'blanks-before-subs', 'bbs', '!' );
- $add_option->( 'long-block-line-count', 'lbl', '=i' );
- $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
- $add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
+ $add_option->( 'blanks-before-blocks', 'bbb', '!' );
+ $add_option->( 'blanks-before-comments', 'bbc', '!' );
+ $add_option->( 'blank-lines-before-subs', 'blbs', '=i' );
+ $add_option->( 'blank-lines-before-packages', 'blbp', '=i' );
+ $add_option->( 'long-block-line-count', 'lbl', '=i' );
+ $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
+ $add_option->( 'keep-old-blank-lines', 'kbl', '=i' );
+
+ $add_option->( 'blank-lines-after-opening-block', 'blao', '=i' );
+ $add_option->( 'blank-lines-before-closing-block', 'blbc', '=i' );
+ $add_option->( 'blank-lines-after-opening-block-list', 'blaol', '=s' );
+ $add_option->( 'blank-lines-before-closing-block-list', 'blbcl', '=s' );
########################################
$category = 9; # Other controls
########################################
$category = 13; # Debugging
########################################
+## $add_option->( 'DIAGNOSTICS', 'I', '!' );
$add_option->( 'DEBUG', 'D', '!' );
- $add_option->( 'DIAGNOSTICS', 'I', '!' );
- $add_option->( 'check-multiline-quotes', 'chk', '!' );
+ $add_option->( 'dump-cuddled-block-list', 'dcbl', '!' );
$add_option->( 'dump-defaults', 'ddf', '!' );
$add_option->( 'dump-long-names', 'dln', '!' );
$add_option->( 'dump-options', 'dop', '!' );
$add_option->( 'short-concatenation-item-length', 'scl', '=i' );
$add_option->( 'show-options', 'opt', '!' );
$add_option->( 'version', 'v', '' );
+ $add_option->( 'memoize', 'mem', '!' );
+ $add_option->( 'file-size-order', 'fso', '!' );
#---------------------------------------------------------------------
%option_range = (
'format' => [ 'tidy', 'html', 'user' ],
'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
+ 'character-encoding' => [ 'none', 'utf8' ],
+
+ 'space-backslash-quote' => [ 0, 2 ],
'block-brace-tightness' => [ 0, 2 ],
'brace-tightness' => [ 0, 2 ],
'closing-token-indentation' => [ 0, 3 ],
'closing-side-comment-else-flag' => [ 0, 2 ],
- 'comma-arrow-breakpoints' => [ 0, 3 ],
+ 'comma-arrow-breakpoints' => [ 0, 5 ],
);
# Note: we could actually allow negative ci if someone really wants it:
add-whitespace
blanks-before-blocks
blanks-before-comments
- blanks-before-subs
+ blank-lines-before-subs=1
+ blank-lines-before-packages=1
block-brace-tightness=0
block-brace-vertical-tightness=0
brace-tightness=1
brace-vertical-tightness=0
break-at-old-logical-breakpoints
break-at-old-ternary-breakpoints
+ break-at-old-attribute-breakpoints
break-at-old-keyword-breakpoints
- comma-arrow-breakpoints=1
+ comma-arrow-breakpoints=5
nocheck-syntax
closing-side-comment-interval=6
closing-side-comment-maximum-text=20
closing-side-comment-else-flag=0
+ closing-side-comments-balanced
closing-paren-indentation=0
closing-brace-indentation=0
closing-square-bracket-indentation=0
continuation-indentation=2
+ cuddled-break-option=1
delete-old-newlines
delete-semicolons
+ extended-syntax
fuzzy-line-length
hanging-side-comments
indent-block-comments
indent-columns=4
+ iterations=1
+ keep-old-blank-lines=1
long-block-line-count=8
look-for-autoloader
look-for-selfloader
maximum-consecutive-blank-lines=1
maximum-fields-per-table=0
maximum-line-length=80
+ memoize
minimum-space-to-comment=4
nobrace-left-and-indent
nocuddled-else
+ nocuddled-blocks
nodelete-old-whitespace
nohtml
nologfile
noquiet
noshow-options
nostatic-side-comments
- noswallow-optional-blank-lines
notabs
nowarning-output
+ character-encoding=none
outdent-labels
outdent-long-quotes
outdent-long-comments
paren-vertical-tightness-closing=0
paren-vertical-tightness=0
pass-version-line
+ noweld-nested-containers
recombine
valign
short-concatenation-item-length=8
space-for-semicolon
+ space-backslash-quote=1
square-bracket-tightness=1
square-bracket-vertical-tightness-closing=0
square-bracket-vertical-tightness=0
format=tidy
backup-file-extension=bak
format-skipping
+ default-tabsize=8
pod2html
html-table-of-contents
#---------------------------------------------------------------
%expansion = (
%expansion,
- 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
- 'fnl' => [qw(freeze-newlines)],
- 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
- 'fws' => [qw(freeze-whitespace)],
+ 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
+ 'fnl' => [qw(freeze-newlines)],
+ 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
+ 'fws' => [qw(freeze-whitespace)],
+ 'freeze-blank-lines' =>
+ [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)],
+ 'fbl' => [qw(freeze-blank-lines)],
'indent-only' => [qw(freeze-newlines freeze-whitespace)],
'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
'nooutdent-long-lines' =>
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
+ 'utf8' => [qw(character-encoding=utf8)],
+ 'UTF8' => [qw(character-encoding=utf8)],
+
+ 'swallow-optional-blank-lines' => [qw(kbl=0)],
+ 'noswallow-optional-blank-lines' => [qw(kbl=1)],
+ 'sob' => [qw(kbl=0)],
+ 'nsob' => [qw(kbl=1)],
+
'break-after-comma-arrows' => [qw(cab=0)],
'nobreak-after-comma-arrows' => [qw(cab=1)],
'baa' => [qw(cab=0)],
'nbaa' => [qw(cab=1)],
+ 'blanks-before-subs' => [qw(blbs=1 blbp=1)],
+ 'bbs' => [qw(blbs=1 blbp=1)],
+ 'noblanks-before-subs' => [qw(blbs=0 blbp=0)],
+ 'nbbs' => [qw(blbs=0 blbp=0)],
+
'break-at-old-trinary-breakpoints' => [qw(bot)],
'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
'sct' => [qw(scp schb scsb)],
'stack-closing-tokens' => => [qw(scp schb scsb)],
'nsct' => [qw(nscp nschb nscsb)],
- 'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
+ 'nostack-closing-tokens' => [qw(nscp nschb nscsb)],
+
+ 'sac' => [qw(sot sct)],
+ 'nsac' => [qw(nsot nsct)],
+ 'stack-all-containers' => [qw(sot sct)],
+ 'nostack-all-containers' => [qw(nsot nsct)],
+
+ 'act=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
+ 'act=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
+ 'act=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
+ 'all-containers-tightness=0' => [qw(pt=0 sbt=0 bt=0 bbt=0)],
+ 'all-containers-tightness=1' => [qw(pt=1 sbt=1 bt=1 bbt=1)],
+ 'all-containers-tightness=2' => [qw(pt=2 sbt=2 bt=2 bbt=2)],
+
+ 'stack-opening-block-brace' => [qw(bbvt=2 bbvtl=*)],
+ 'sobb' => [qw(bbvt=2 bbvtl=*)],
+ 'nostack-opening-block-brace' => [qw(bbvt=0)],
+ 'nsobb' => [qw(bbvt=0)],
+
+ 'converge' => [qw(it=4)],
+ 'noconverge' => [qw(it=1)],
+ 'conv' => [qw(it=4)],
+ 'nconv' => [qw(it=1)],
# 'mangle' originally deleted pod and comments, but to keep it
# reversible, it no longer does. But if you really want to
'mangle' => [
qw(
check-syntax
+ keep-old-blank-lines=0
delete-old-newlines
delete-old-whitespace
delete-semicolons
noadd-semicolons
noadd-whitespace
noblanks-before-blocks
- noblanks-before-subs
+ blank-lines-before-subs=0
+ blank-lines-before-packages=0
notabs
)
],
# An interesting use for 'extrude' is to do this:
# perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
# which will break up all one-line blocks.
+ #
+ # Removed 'check-syntax' option, which is unsafe because it may execute
+ # code in BEGIN blocks. Example 'Moose/debugger-duck_type.t'.
'extrude' => [
qw(
- check-syntax
ci=0
delete-old-newlines
delete-old-whitespace
noadd-semicolons
noadd-whitespace
noblanks-before-blocks
- noblanks-before-subs
+ blank-lines-before-subs=0
+ blank-lines-before-packages=0
nofuzzy-line-length
notabs
norecombine
} # end of generate_options
+# Memoize process_command_line. Given same @ARGV passed in, return same
+# values and same @ARGV back.
+# This patch was supplied by Jonathan Swartz Nov 2012 and significantly speeds
+# up masontidy (https://metacpan.org/module/masontidy)
+
+my %process_command_line_cache;
+
sub process_command_line {
my (
$rpending_complaint, $dump_options_type
) = @_;
+ my $use_cache = !defined($perltidyrc_stream) && !$dump_options_type;
+ if ($use_cache) {
+ my $cache_key = join( chr(28), @ARGV );
+ if ( my $result = $process_command_line_cache{$cache_key} ) {
+ my ( $argv, @retvals ) = @{$result};
+ @ARGV = @{$argv};
+ return @retvals;
+ }
+ else {
+ my @retvals = _process_command_line(@_);
+ $process_command_line_cache{$cache_key} = [ \@ARGV, @retvals ]
+ if $retvals[0]->{'memoize'};
+ return @retvals;
+ }
+ }
+ else {
+ return _process_command_line(@_);
+ }
+}
+
+# (note the underscore here)
+sub _process_command_line {
+
+ my (
+ $perltidyrc_stream, $is_Windows, $Windows_type,
+ $rpending_complaint, $dump_options_type
+ ) = @_;
+
use Getopt::Long;
+ # Save any current Getopt::Long configuration
+ # and set to Getopt::Long defaults. Use eval to avoid
+ # breaking old versions of Perl without these routines.
+ # Previous configuration is reset at the exit of this routine.
+ my $glc;
+ eval { $glc = Getopt::Long::Configure() };
+ unless ($@) {
+ eval { Getopt::Long::ConfigDefaults() };
+ }
+ else { $glc = undef }
+
my (
$roption_string, $rdefaults, $rexpansion,
$roption_category, $roption_range
my %Opts = ();
{
local @ARGV;
- my $i;
# do not load the defaults if we are just dumping perltidyrc
unless ( $dump_options_type eq 'perltidyrc' ) {
- for $i (@$rdefaults) { push @ARGV, "--" . $i }
- }
-
- # Patch to save users Getopt::Long configuration
- # and set to Getopt::Long defaults. Use eval to avoid
- # breaking old versions of Perl without these routines.
- my $glc;
- eval { $glc = Getopt::Long::Configure() };
- unless ($@) {
- eval { Getopt::Long::ConfigDefaults() };
+ for my $i ( @{$rdefaults} ) { push @ARGV, "--" . $i }
}
- else { $glc = undef }
-
- if ( !GetOptions( \%Opts, @$roption_string ) ) {
- die "Programming Bug: error in setting default options";
+ if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
+ Die
+"Programming Bug reported by 'GetOptions': error in setting default options";
}
-
- # Patch to put the previous Getopt::Long configuration back
- eval { Getopt::Long::Configure($glc) } if defined $glc;
}
my $word;
my @raw_options = ();
my $config_file = "";
my $saw_ignore_profile = 0;
- my $saw_extrude = 0;
my $saw_dump_profile = 0;
- my $i;
#---------------------------------------------------------------
# Take a first look at the command-line parameters. Do as many
# immediate dumps as possible, which can avoid confusion if the
# perltidyrc file has an error.
#---------------------------------------------------------------
- foreach $i (@ARGV) {
+ foreach my $i (@ARGV) {
$i =~ s/^--/-/;
if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
}
elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
if ($config_file) {
- warn
+ Warn
"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
}
$config_file = $2;
+
+ # resolve <dir>/.../<file>, meaning look upwards from directory
+ if ( defined($config_file) ) {
+ if ( my ( $start_dir, $search_file ) =
+ ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
+ {
+ $start_dir = '.' if !$start_dir;
+ $start_dir = Cwd::realpath($start_dir);
+ if ( my $found_file =
+ find_file_upwards( $start_dir, $search_file ) )
+ {
+ $config_file = $found_file;
+ }
+ }
+ }
unless ( -e $config_file ) {
- warn "cannot find file given with -pro=$config_file: $!\n";
+ Warn "cannot find file given with -pro=$config_file: $!\n";
$config_file = "";
}
}
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
- die "usage: -pro=filename or --profile=filename, no spaces\n";
+ Die "usage: -pro=filename or --profile=filename, no spaces\n";
}
- elsif ( $i =~ /^-extrude$/ ) {
- $saw_extrude = 1;
- }
- elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
+ elsif ( $i =~ /^-(help|h|HELP|H|\?)$/ ) {
usage();
- exit 1;
+ Exit 0;
}
elsif ( $i =~ /^-(version|v)$/ ) {
show_version();
- exit 1;
+ Exit 0;
}
elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
- dump_defaults(@$rdefaults);
- exit 1;
+ dump_defaults( @{$rdefaults} );
+ Exit 0;
}
elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
- dump_long_names(@$roption_string);
- exit 1;
+ dump_long_names( @{$roption_string} );
+ Exit 0;
}
elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
dump_short_names($rexpansion);
- exit 1;
+ Exit 0;
}
elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
- exit 1;
+ Exit 0;
}
}
if ( $saw_dump_profile && $saw_ignore_profile ) {
- warn "No profile to dump because of -npro\n";
- exit 1;
+ Warn "No profile to dump because of -npro\n";
+ Exit 1;
}
#---------------------------------------------------------------
# line.
if ($perltidyrc_stream) {
if ($config_file) {
- warn <<EOM;
+ Warn <<EOM;
Conflict: a perltidyrc configuration file was specified both as this
perltidy call parameter: $perltidyrc_stream
and with this -profile=$config_file.
# look for a config file if we don't have one yet
my $rconfig_file_chatter;
- $$rconfig_file_chatter = "";
+ ${$rconfig_file_chatter} = "";
$config_file =
find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint )
( $fh_config, $config_file ) =
Perl::Tidy::streamhandle( $config_file, 'r' );
unless ($fh_config) {
- $$rconfig_file_chatter .=
+ ${$rconfig_file_chatter} .=
"# $config_file exists but cannot be opened\n";
}
}
if ($saw_dump_profile) {
- if ($saw_dump_profile) {
- dump_config_file( $fh_config, $config_file,
- $rconfig_file_chatter );
- exit 1;
- }
+ dump_config_file( $fh_config, $config_file, $rconfig_file_chatter );
+ Exit 0;
}
if ($fh_config) {
my ( $rconfig_list, $death_message ) =
read_config_file( $fh_config, $config_file, $rexpansion );
- die $death_message if ($death_message);
+ Die $death_message if ($death_message);
# process any .perltidyrc parameters right now so we can
# localize errors
- if (@$rconfig_list) {
- local @ARGV = @$rconfig_list;
+ if ( @{$rconfig_list} ) {
+ local @ARGV = @{$rconfig_list};
expand_command_abbreviations( $rexpansion, \@raw_options,
$config_file );
- if ( !GetOptions( \%Opts, @$roption_string ) ) {
- die
+ if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
+ Die
"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
}
last;
}
}
- die <<EOM;
+ Die <<EOM;
There are $count unrecognized values in the configuration file '$config_file':
$str
Use leading dashes for parameters. Use -npro to ignore this file.
# diagnose the cause of the premature exit.
foreach (
qw{
+ dump-cuddled-block-list
dump-defaults
dump-long-names
dump-options
if ( defined( $Opts{$_} ) ) {
delete $Opts{$_};
- warn "ignoring --$_ in config file: $config_file\n";
+ Warn "ignoring --$_ in config file: $config_file\n";
}
}
}
#---------------------------------------------------------------
expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
- if ( !GetOptions( \%Opts, @$roption_string ) ) {
- die "Error on command line; for help try 'perltidy -h'\n";
+ local $SIG{'__WARN__'} = sub { Warn $_[0] };
+ if ( !GetOptions( \%Opts, @{$roption_string} ) ) {
+ Die "Error on command line; for help try 'perltidy -h'\n";
}
- return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
+ # reset Getopt::Long configuration back to its previous value
+ eval { Getopt::Long::Configure($glc) } if defined $glc;
+
+ return ( \%Opts, $config_file, \@raw_options, $roption_string,
$rexpansion, $roption_category, $roption_range );
-} # end of process_command_line
+} # end of _process_command_line
sub check_options {
$rOpts->{'check-syntax'} = 0;
}
+ # Added Dec 2017: Deactivating check-syntax for all systems for safety
+ # because unexpected results can occur when code in BEGIN blocks is
+ # executed. This flag was included to help check for perltidy mistakes,
+ # and may still be useful for debugging. To activate for testing comment
+ # out the next three lines.
+ else {
+ $rOpts->{'check-syntax'} = 0;
+ }
+
# It's really a bad idea to check syntax as root unless you wrote
# the script yourself. FIXME: not sure if this works with VMS
unless ($is_Windows) {
if ( $< == 0 && $rOpts->{'check-syntax'} ) {
$rOpts->{'check-syntax'} = 0;
- $$rpending_complaint .=
+ ${$rpending_complaint} .=
"Syntax check deactivated for safety; you shouldn't run this as root\n";
}
}
- # see if user set a non-negative logfile-gap
- if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
+ # check iteration count and quietly fix if necessary:
+ # - iterations option only applies to code beautification mode
+ # - the convergence check should stop most runs on iteration 2, and
+ # virtually all on iteration 3. But we'll allow up to 6.
+ if ( $rOpts->{'format'} ne 'tidy' ) {
+ $rOpts->{'iterations'} = 1;
+ }
+ elsif ( defined( $rOpts->{'iterations'} ) ) {
+ if ( $rOpts->{'iterations'} <= 0 ) { $rOpts->{'iterations'} = 1 }
+ elsif ( $rOpts->{'iterations'} > 6 ) { $rOpts->{'iterations'} = 6 }
+ }
+ else {
+ $rOpts->{'iterations'} = 1;
+ }
- # a zero gap will be taken as a 1
- if ( $rOpts->{'logfile-gap'} == 0 ) {
- $rOpts->{'logfile-gap'} = 1;
+ my $check_blank_count = sub {
+ my ( $key, $abbrev ) = @_;
+ if ( $rOpts->{$key} ) {
+ if ( $rOpts->{$key} < 0 ) {
+ $rOpts->{$key} = 0;
+ Warn "negative value of $abbrev, setting 0\n";
+ }
+ if ( $rOpts->{$key} > 100 ) {
+ Warn "unreasonably large value of $abbrev, reducing\n";
+ $rOpts->{$key} = 100;
+ }
}
+ };
- # setting a non-negative logfile gap causes logfile to be saved
- $rOpts->{'logfile'} = 1;
- }
+ # check for reasonable number of blank lines and fix to avoid problems
+ $check_blank_count->( 'blank-lines-before-subs', '-blbs' );
+ $check_blank_count->( 'blank-lines-before-packages', '-blbp' );
+ $check_blank_count->( 'blank-lines-after-block-opening', '-blao' );
+ $check_blank_count->( 'blank-lines-before-block-closing', '-blbc' );
- # not setting logfile gap, or setting it negative, causes default of 50
- else {
- $rOpts->{'logfile-gap'} = 50;
+ # setting a non-negative logfile gap causes logfile to be saved
+ if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
+ $rOpts->{'logfile'} = 1;
}
# set short-cut flag when only indentation is to be done.
if ( $rOpts->{'opening-brace-always-on-right'}
&& $rOpts->{'opening-brace-on-new-line'} )
{
- warn <<EOM;
+ Warn <<EOM;
Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
'opening-brace-on-new-line' (-bl). Ignoring -bl.
EOM
$rOpts->{'opening-brace-on-new-line'};
}
- # set shortcut flag if no blanks to be written
- unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
- $rOpts->{'swallow-optional-blank-lines'} = 1;
- }
-
if ( $rOpts->{'entab-leading-whitespace'} ) {
if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
- warn "-et=n must use a positive integer; ignoring -et\n";
+ Warn "-et=n must use a positive integer; ignoring -et\n";
$rOpts->{'entab-leading-whitespace'} = undef;
}
# entab leading whitespace has priority over the older 'tabs' option
if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
}
+
+ # set a default tabsize to be used in guessing the starting indentation
+ # level if and only if this run does not use tabs and the old code does
+ # use tabs
+ if ( $rOpts->{'default-tabsize'} ) {
+ if ( $rOpts->{'default-tabsize'} < 0 ) {
+ Warn "negative value of -dt, setting 0\n";
+ $rOpts->{'default-tabsize'} = 0;
+ }
+ if ( $rOpts->{'default-tabsize'} > 20 ) {
+ Warn "unreasonably large value of -dt, reducing\n";
+ $rOpts->{'default-tabsize'} = 20;
+ }
+ }
+ else {
+ $rOpts->{'default-tabsize'} = 8;
+ }
+
+ # Define $tabsize, the number of spaces per tab for use in
+ # guessing the indentation of source lines with leading tabs.
+ # Assume same as for this run if tabs are used , otherwise assume
+ # a default value, typically 8
+ my $tabsize =
+ $rOpts->{'entab-leading-whitespace'}
+ ? $rOpts->{'entab-leading-whitespace'}
+ : $rOpts->{'tabs'} ? $rOpts->{'indent-columns'}
+ : $rOpts->{'default-tabsize'};
+ return $tabsize;
+}
+
+sub find_file_upwards {
+ my ( $search_dir, $search_file ) = @_;
+
+ $search_dir =~ s{/+$}{};
+ $search_file =~ s{^/+}{};
+
+ while (1) {
+ my $try_path = "$search_dir/$search_file";
+ if ( -f $try_path ) {
+ return $try_path;
+ }
+ elsif ( $search_dir eq '/' ) {
+ return;
+ }
+ else {
+ $search_dir = dirname($search_dir);
+ }
+ }
}
sub expand_command_abbreviations {
# go through @ARGV and expand any abbreviations
my ( $rexpansion, $rraw_options, $config_file ) = @_;
- my ($word);
# set a pass limit to prevent an infinite loop;
# 10 should be plenty, but it may be increased to allow deeply
# keep looping until all expansions have been converted into actual
# dash parameters..
- for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
+ foreach my $pass_count ( 0 .. $max_passes ) {
my @new_argv = ();
my $abbrev_count = 0;
# loop over each item in @ARGV..
- foreach $word (@ARGV) {
+ foreach my $word (@ARGV) {
# convert any leading 'no-' to just 'no'
if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
# save the raw input for debug output in case of circular refs
if ( $pass_count == 0 ) {
- push( @$rraw_options, $word );
+ push( @{$rraw_options}, $word );
}
# recombine abbreviation and flag, if necessary,
# make sure we are not in an infinite loop
if ( $pass_count == $max_passes ) {
- print STDERR
-"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
- print STDERR "Here are the raw options\n";
local $" = ')(';
- print STDERR "(@$rraw_options)\n";
+ Warn <<EOM;
+I'm tired. We seem to be in an infinite loop trying to expand aliases.
+Here are the raw options;
+(rraw_options)
+EOM
my $num = @new_argv;
-
if ( $num < 50 ) {
- print STDERR "After $max_passes passes here is ARGV\n";
- print STDERR "(@new_argv)\n";
+ Warn <<EOM;
+After $max_passes passes here is ARGV
+(@new_argv)
+EOM
}
else {
- print STDERR "After $max_passes passes ARGV has $num entries\n";
+ Warn <<EOM;
+After $max_passes passes ARGV has $num entries
+EOM
}
if ($config_file) {
- die <<"DIE";
+ Die <<"DIE";
Please check your configuration file $config_file for circular-references.
To deactivate it, use -npro.
DIE
}
else {
- die <<'DIE';
+ Die <<'DIE';
Program bug - circular-references in the %expansion hash, probably due to
a recent program change.
DIE
}
} # end of check for circular references
} # end of loop over all passes
+ return;
}
# Debug routine -- this will dump the expansion hash
--------------------------------------------------------------------------
EOM
foreach my $abbrev ( sort keys %$rexpansion ) {
- my @list = @{ $$rexpansion{$abbrev} };
+ my @list = @{ $rexpansion->{$abbrev} };
print STDOUT "$abbrev --> @list\n";
}
+ return;
}
sub check_vms_filename {
#
# Contributed by Michael Cartmell
#
- my ( $base, $path ) = fileparse( $_[0] );
+ my $filename = shift;
+ my ( $base, $path ) = fileparse($filename);
# remove explicit ; version
$base =~ s/;-?\d*$//
# normalise filename, if there are no unescaped dots then append one
$base .= '.' unless $base =~ /(?:^|[^^])\./;
- # if we don't already have an extension then we just append the extention
+ # if we don't already have an extension then we just append the extension
my $separator = ( $base =~ /\.$/ ) ? "" : "_";
return ( $path . $base, $separator );
}
# are welcome.
unless ( defined $os ) {
$os = "";
- $$rpending_complaint .= <<EOS;
+ ${$rpending_complaint} .= <<EOS;
Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
We won't be able to look for a system-wide config file.
EOS
}
- # Unfortunately the logic used for the various versions isnt so clever..
+ # Unfortunately the logic used for the various versions isn't so clever..
# so we have to handle an outside case.
return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
}
sub is_unix {
- return ( $^O !~ /win32|dos/i )
+ return
+ ( $^O !~ /win32|dos/i )
&& ( $^O ne 'VMS' )
&& ( $^O ne 'OS2' )
&& ( $^O ne 'MacOS' );
# system-wide configuration files
my $rpending_complaint = shift;
my $is_Windows = ( $^O =~ /win32|dos/i );
- my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
+ my $Windows_type;
+ $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
return ( $is_Windows, $Windows_type );
}
sub find_config_file {
# look for a .perltidyrc configuration file
+ # For Windows also look for a file named perltidy.ini
my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint ) = @_;
- $$rconfig_file_chatter .= "# Config file search...system reported as:";
+ ${$rconfig_file_chatter} .= "# Config file search...system reported as:";
if ($is_Windows) {
- $$rconfig_file_chatter .= "Windows $Windows_type\n";
+ ${$rconfig_file_chatter} .= "Windows $Windows_type\n";
}
else {
- $$rconfig_file_chatter .= " $^O\n";
+ ${$rconfig_file_chatter} .= " $^O\n";
}
- # sub to check file existance and record all tests
+ # sub to check file existence and record all tests
my $exists_config_file = sub {
my $config_file = shift;
return 0 unless $config_file;
- $$rconfig_file_chatter .= "# Testing: $config_file\n";
+ ${$rconfig_file_chatter} .= "# Testing: $config_file\n";
return -f $config_file;
};
+ # Sub to search upward for config file
+ my $resolve_config_file = sub {
+
+ # resolve <dir>/.../<file>, meaning look upwards from directory
+ my $config_file = shift;
+ if ($config_file) {
+ if ( my ( $start_dir, $search_file ) =
+ ( $config_file =~ m{^(.*)\.\.\./(.*)$} ) )
+ {
+ ${$rconfig_file_chatter} .=
+ "# Searching Upward: $config_file\n";
+ $start_dir = '.' if !$start_dir;
+ $start_dir = Cwd::realpath($start_dir);
+ if ( my $found_file =
+ find_file_upwards( $start_dir, $search_file ) )
+ {
+ $config_file = $found_file;
+ ${$rconfig_file_chatter} .= "# Found: $config_file\n";
+ }
+ }
+ }
+ return $config_file;
+ };
+
my $config_file;
# look in current directory first
$config_file = ".perltidyrc";
return $config_file if $exists_config_file->($config_file);
+ if ($is_Windows) {
+ $config_file = "perltidy.ini";
+ return $config_file if $exists_config_file->($config_file);
+ }
# Default environment vars.
my @envs = qw(PERLTIDY HOME);
# network def
push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
- # Now go through the enviornment ...
+ # Now go through the environment ...
foreach my $var (@envs) {
- $$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
+ ${$rconfig_file_chatter} .= "# Examining: \$ENV{$var}";
if ( defined( $ENV{$var} ) ) {
- $$rconfig_file_chatter .= " = $ENV{$var}\n";
+ ${$rconfig_file_chatter} .= " = $ENV{$var}\n";
# test ENV{ PERLTIDY } as file:
if ( $var eq 'PERLTIDY' ) {
$config_file = "$ENV{$var}";
+ $config_file = $resolve_config_file->($config_file);
return $config_file if $exists_config_file->($config_file);
}
# test ENV as directory:
$config_file = catfile( $ENV{$var}, ".perltidyrc" );
+ $config_file = $resolve_config_file->($config_file);
return $config_file if $exists_config_file->($config_file);
+
+ if ($is_Windows) {
+ $config_file = catfile( $ENV{$var}, "perltidy.ini" );
+ $config_file = $resolve_config_file->($config_file);
+ return $config_file if $exists_config_file->($config_file);
+ }
}
else {
- $$rconfig_file_chatter .= "\n";
+ ${$rconfig_file_chatter} .= "\n";
}
}
Win_Config_Locs( $rpending_complaint, $Windows_type );
# Check All Users directory, if there is one.
+ # i.e. C:\Documents and Settings\User\perltidy.ini
if ($allusers) {
+
$config_file = catfile( $allusers, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ $config_file = catfile( $allusers, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
}
# Check system directory.
+ # retain old code in case someone has been able to create
+ # a file with a leading period.
$config_file = catfile( $system, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
+
+ $config_file = catfile( $system, "perltidy.ini" );
+ return $config_file if $exists_config_file->($config_file);
}
}
# Directory, and All Users Directory. All Users will be empty on a
# 9x/Me box. Contributed by: Yves Orton.
+ # my ( $rpending_complaint, $os ) = @_;
+ # if ( !$os ) { $os = Win_OS_Type(); }
+
my $rpending_complaint = shift;
my $os = (@_) ? shift : Win_OS_Type();
return unless $os;
}
else {
- # This currently would only happen on a win32s computer. I dont have
+ # This currently would only happen on a win32s computer. I don't have
# one to test, so I am unsure how to proceed. Suggestions welcome!
- $$rpending_complaint .=
+ ${$rpending_complaint} .=
"I dont know a sensible place to look for config files on an $os system.\n";
return;
}
}
sub dump_config_file {
- my $fh = shift;
- my $config_file = shift;
- my $rconfig_file_chatter = shift;
+ my ( $fh, $config_file, $rconfig_file_chatter ) = @_;
print STDOUT "$$rconfig_file_chatter";
if ($fh) {
print STDOUT "# Dump of file: '$config_file'\n";
else {
print STDOUT "# ...no config file found\n";
}
+ return;
}
sub read_config_file {
my $name = undef;
my $line_no;
+ my $opening_brace_line;
while ( my $line = $fh->getline() ) {
$line_no++;
chomp $line;
- next if $line =~ /^\s*#/; # skip full-line comment
( $line, $death_message ) =
strip_comment( $line, $config_file, $line_no );
last if ($death_message);
+ next unless $line;
$line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
next unless $line;
- # look for something of the general form
- # newname { body }
- # or just
- # body
+ my $body = $line;
- if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
- my ( $newname, $body, $curly ) = ( $2, $3, $4 );
+ # Look for complete or partial abbreviation definition of the form
+ # name { body } or name { or name { body
+ # See rules in perltidy's perldoc page
+ # Section: Other Controls - Creating a new abbreviation
+ if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
+ my $oldname = $name;
+ ( $name, $body ) = ( $2, $3 );
+
+ # Cannot start new abbreviation unless old abbreviation is complete
+ last if ($opening_brace_line);
+
+ $opening_brace_line = $line_no unless ( $body && $body =~ s/\}$// );
# handle a new alias definition
- if ($newname) {
- if ($name) {
- $death_message =
-"No '}' seen after $name and before $newname in config file $config_file line $.\n";
- last;
- }
- $name = $newname;
+ if ( ${$rexpansion}{$name} ) {
+ local $" = ')(';
+ my @names = sort keys %$rexpansion;
+ $death_message =
+ "Here is a list of all installed aliases\n(@names)\n"
+ . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
+ last;
+ }
+ ${$rexpansion}{$name} = [];
+ }
- if ( ${$rexpansion}{$name} ) {
- local $" = ')(';
- my @names = sort keys %$rexpansion;
- $death_message =
- "Here is a list of all installed aliases\n(@names)\n"
- . "Attempting to redefine alias ($name) in config file $config_file line $.\n";
- last;
- }
- ${$rexpansion}{$name} = [];
+ # leading opening braces not allowed
+ elsif ( $line =~ /^{/ ) {
+ $opening_brace_line = undef;
+ $death_message =
+ "Unexpected '{' at line $line_no in config file '$config_file'\n";
+ last;
+ }
+
+ # Look for abbreviation closing: body } or }
+ elsif ( $line =~ /^(.*)?\}$/ ) {
+ $body = $1;
+ if ($opening_brace_line) {
+ $opening_brace_line = undef;
+ }
+ else {
+ $death_message =
+"Unexpected '}' at line $line_no in config file '$config_file'\n";
+ last;
}
+ }
- # now do the body
- if ($body) {
+ # Now store any parameters
+ if ($body) {
- my ( $rbody_parts, $msg ) = parse_args($body);
- if ($msg) {
- $death_message = <<EOM;
+ my ( $rbody_parts, $msg ) = parse_args($body);
+ if ($msg) {
+ $death_message = <<EOM;
Error reading file '$config_file' at line number $line_no.
$msg
Please fix this line or use -npro to avoid reading this file
EOM
- last;
- }
+ last;
+ }
- if ($name) {
+ if ($name) {
- # remove leading dashes if this is an alias
- foreach (@$rbody_parts) { s/^\-+//; }
- push @{ ${$rexpansion}{$name} }, @$rbody_parts;
- }
- else {
- push( @config_list, @$rbody_parts );
- }
+ # remove leading dashes if this is an alias
+ foreach ( @{$rbody_parts} ) { s/^\-+//; }
+ push @{ ${$rexpansion}{$name} }, @{$rbody_parts};
}
-
- if ($curly) {
- unless ($name) {
- $death_message =
-"Unexpected '}' seen in config file $config_file line $.\n";
- last;
- }
- $name = undef;
+ else {
+ push( @config_list, @{$rbody_parts} );
}
}
}
+
+ if ($opening_brace_line) {
+ $death_message =
+"Didn't see a '}' to match the '{' at line $opening_brace_line in config file '$config_file'\n";
+ }
eval { $fh->close() };
return ( \@config_list, $death_message );
}
sub strip_comment {
+ # Strip any comment from a command line
my ( $instr, $config_file, $line_no ) = @_;
my $msg = "";
+ # check for full-line comment
+ if ( $instr =~ /^\s*#/ ) {
+ return ( "", $msg );
+ }
+
# nothing to do if no comments
if ( $instr !~ /#/ ) {
return ( $instr, $msg );
}
- # use simple method of no quotes
+ # handle case of no quotes
elsif ( $instr !~ /['"]/ ) {
- $instr =~ s/\s*\#.*$//; # simple trim
+
+ # We now require a space before the # of a side comment
+ # this allows something like:
+ # -sbcp=#
+ # Otherwise, it would have to be quoted:
+ # -sbcp='#'
+ $instr =~ s/\s+\#.*$//;
return ( $instr, $msg );
}
$outstr .= $1;
$quote_char = $1;
}
+
+ # Note: not yet enforcing the space-before-hash rule for side
+ # comments if the parameter is quoted.
elsif ( $instr =~ /\G#/gc ) {
last;
}
sub dump_long_names {
- my @names = sort @_;
+ my @names = @_;
print STDOUT <<EOM;
# Command line long names (passed to GetOptions)
#---------------------------------------------------------------
#---------------------------------------------------------------
EOM
- foreach (@names) { print STDOUT "$_\n" }
+ foreach my $name ( sort @names ) { print STDOUT "$name\n" }
+ return;
}
sub dump_defaults {
- my @defaults = sort @_;
+ my @defaults = @_;
print STDOUT "Default command line options:\n";
- foreach (@_) { print STDOUT "$_\n" }
+ foreach my $line ( sort @defaults ) { print STDOUT "$line\n" }
+ return;
}
-sub dump_options {
+sub readable_options {
- # write the options back out as a valid .perltidyrc file
+ # return options for this run as a string which could be
+ # put in a perltidyrc file
my ( $rOpts, $roption_string ) = @_;
my %Getopt_flags;
- my $rGetopt_flags = \%Getopt_flags;
+ my $rGetopt_flags = \%Getopt_flags;
+ my $readable_options = "# Final parameter set for this run.\n";
+ $readable_options .=
+ "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
foreach my $opt ( @{$roption_string} ) {
my $flag = "";
if ( $opt =~ /(.*)(!|=.*)$/ ) {
$rGetopt_flags->{$opt} = $flag;
}
}
- print STDOUT "# Final parameter set for this run:\n";
foreach my $key ( sort keys %{$rOpts} ) {
my $flag = $rGetopt_flags->{$key};
my $value = $rOpts->{$key};
else {
# shouldn't happen
- print
+ $readable_options .=
"# ERROR in dump_options: unrecognized flag $flag for $key\n";
}
}
- print STDOUT $prefix . $key . $suffix . "\n";
+ $readable_options .= $prefix . $key . $suffix . "\n";
}
+ return $readable_options;
}
sub show_version {
- print <<"EOM";
+ print STDOUT <<"EOM";
This is perltidy, v$VERSION
-Copyright 2000-2007, Steve Hancock
+Copyright 2000-2018, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
Complete documentation for perltidy can be found using 'man perltidy'
or on the internet at http://perltidy.sourceforge.net.
EOM
+ return;
}
sub usage {
-npro ignore .perltidyrc configuration command file
-pro=file read configuration commands from file instead of .perltidyrc
-st send output to standard output, STDOUT
- -se send error output to standard error output, STDERR
+ -se send all error output to standard error output, STDERR
-v display version number to standard output and quit
Basic Options:
-bbs add blank line before subs and packages
-bbc add blank line before block comments
-bbb add blank line between major blocks
- -sob swallow optional blank lines
+ -kbl=n keep old blank lines? 0=no, 1=some, 2=all
+ -mbl=n maximum consecutive blank lines to output (default=1)
-ce cuddled else; use this style: '} else {'
+ -cb cuddled blocks (other than 'if-elsif-else')
+ -cbl=s list of blocks to cuddled, default 'try-catch-finally'
-dnl delete old newlines (default)
- -mbl=n maximum consecutive blank lines (default=1)
-l=n maximum line length; default n=80
-bl opening brace on new line
-sbl opening sub brace on new line. value of -bl is used if not given.
token starts new line: 0=always 1=not unless list 1=never
-wba=s want break after tokens in string; i.e. wba=': .'
-wbb=s want break before tokens in string
+ -wn weld nested: combines opening and closing tokens when both are adjacent
Following Old Breakpoints
+ -kis keep interior semicolons. Allows multiple statements per line.
-boc break at old comma breaks: turns off all automatic list formatting
-bol break at old logical breakpoints: or, and, ||, && (default)
-bok break at old list keyword breakpoints such as map, sort (default)
-bot break at old conditional (ternary ?:) operator breakpoints (default)
+ -boa break at old attribute breakpoints
-cab=n break at commas after a comma-arrow (=>):
n=0 break at all commas after =>
n=1 stable: break unless this breaks an existing one-line container
-ibc indent block comments (default)
-isbc indent spaced block comments; may indent unless no leading space
-msc=n minimum desired spaces to side comment, default 4
+ -fpsc=n fix position for side comments; default 0;
-csc add or update closing side comments after closing BLOCK brace
-dcsc delete closing side comments created by a -csc command
-cscp=s change closing side comment prefix to be other than '## end'
or go to the perltidy home page at http://perltidy.sourceforge.net
EOF
+ return;
}
sub process_this_file {
- my ( $truth, $beauty ) = @_;
+ my ( $tokenizer, $formatter ) = @_;
- # loop to process each line of this file
- while ( my $line_of_tokens = $truth->get_line() ) {
- $beauty->write_line($line_of_tokens);
+ while ( my $line = $tokenizer->get_line() ) {
+ $formatter->write_line($line);
}
+ my $severe_error = $tokenizer->report_tokenization_errors();
+ eval { $formatter->finish_formatting($severe_error) };
- # finish up
- eval { $beauty->finish_formatting() };
- $truth->report_tokenization_errors();
+ return;
}
sub check_syntax {
# Use 'perl -c' to make sure that we did not create bad syntax
# This is a very good independent check for programming errors
#
- # Given names of the input and output files, ($ifname, $ofname),
+ # Given names of the input and output files, ($istream, $ostream),
# we do the following:
# - check syntax of the input file
# - if bad, all done (could be an incomplete code snippet)
# - if outfile syntax bad, issue warning; this implies a code bug!
# - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
- my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
+ my ( $istream, $ostream, $logger_object, $rOpts ) = @_;
my $infile_syntax_ok = 0;
my $line_of_dashes = '-' x 42 . "\n";
if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
}
- # this shouldn't happen unless a termporary file couldn't be made
- if ( $ifname eq '-' ) {
+ # this shouldn't happen unless a temporary file couldn't be made
+ if ( $istream eq '-' ) {
$logger_object->write_logfile_entry(
"Cannot run perl -c on STDIN and STDOUT\n");
return $infile_syntax_ok;
$logger_object->write_logfile_entry(
"checking input file syntax with perl $flags\n");
- $logger_object->write_logfile_entry($line_of_dashes);
# Not all operating systems/shells support redirection of the standard
# error output.
my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
- my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
+ my ( $istream_filename, $perl_output ) =
+ do_syntax_check( $istream, $flags, $error_redirection );
+ $logger_object->write_logfile_entry(
+ "Input stream passed to Perl as file $istream_filename\n");
+ $logger_object->write_logfile_entry($line_of_dashes);
$logger_object->write_logfile_entry("$perl_output\n");
if ( $perl_output =~ /syntax\s*OK/ ) {
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->write_logfile_entry(
"checking output file syntax with perl $flags ...\n");
+ my ( $ostream_filename, $perl_output ) =
+ do_syntax_check( $ostream, $flags, $error_redirection );
+ $logger_object->write_logfile_entry(
+ "Output stream passed to Perl as file $ostream_filename\n");
$logger_object->write_logfile_entry($line_of_dashes);
-
- my $perl_output =
- do_syntax_check( $ofname, $flags, $error_redirection );
$logger_object->write_logfile_entry("$perl_output\n");
unless ( $perl_output =~ /syntax\s*OK/ ) {
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->warning(
-"The output file has a syntax error when tested with perl $flags $ofname !\n"
+"The output file has a syntax error when tested with perl $flags $ostream !\n"
);
$logger_object->warning(
- "This implies an error in perltidy; the file $ofname is bad\n");
+ "This implies an error in perltidy; the file $ostream is bad\n"
+ );
$logger_object->report_definite_bug();
# the perl version number will be helpful for diagnosing the problem
# Only warn of perl -c syntax errors. Other messages,
# such as missing modules, are too common. They can be
# seen by running with perltidy -w
- $logger_object->complain("A syntax check using perl $flags gives: \n");
+ $logger_object->complain("A syntax check using perl $flags\n");
+ $logger_object->complain(
+ "for the output in file $istream_filename gives:\n");
$logger_object->complain($line_of_dashes);
$logger_object->complain("$perl_output\n");
$logger_object->complain($line_of_dashes);
}
sub do_syntax_check {
- my ( $fname, $flags, $error_redirection ) = @_;
+ my ( $stream, $flags, $error_redirection ) = @_;
+
+ ############################################################
+ # This code is not reachable because syntax check is deactivated,
+ # but it is retained for reference.
+ ############################################################
+
+ # We need a named input file for executing perl
+ my ( $stream_filename, $is_tmpfile ) = get_stream_as_named_file($stream);
+
+ # TODO: Need to add name of file to log somewhere
+ # otherwise Perl output is hard to read
+ if ( !$stream_filename ) { return $stream_filename, "" }
# We have to quote the filename in case it has unusual characters
# or spaces. Example: this filename #CM11.pm# gives trouble.
- $fname = '"' . $fname . '"';
+ my $quoted_stream_filename = '"' . $stream_filename . '"';
# Under VMS something like -T will become -t (and an error) so we
# will put quotes around the flags. Double quotes seem to work on
$flags = '"' . $flags . '"';
# now wish for luck...
- return qx/perl $flags $fname $error_redirection/;
+ my $msg = qx/perl $flags $quoted_stream_filename $error_redirection/;
+
+ if ($is_tmpfile) {
+ unlink $stream_filename
+ or Perl::Tidy::Die("couldn't unlink stream $stream_filename: $!\n");
+ }
+ return $stream_filename, $msg;
}
#####################################################################
}
if ( $mode eq 'w' ) {
- $$rscalar = "";
+ ${$rscalar} = "";
return bless [ $rscalar, $mode ], $package;
}
elsif ( $mode eq 'r' ) {
# Convert a scalar to an array.
# This avoids looking for "\n" on each call to getline
- my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
+ #
+ # NOTES: The -1 count is needed to avoid loss of trailing blank lines
+ # (which might be important in a DATA section).
+ my @array;
+ if ( $rscalar && ${$rscalar} ) {
+ @array = map { $_ .= "\n" } split /\n/, ${$rscalar}, -1;
+
+ # remove possible extra blank line introduced with split
+ if ( @array && $array[-1] eq "\n" ) { pop @array }
+ }
my $i_next = 0;
return bless [ \@array, $mode, $i_next ], $package;
}
EOM
}
my $i = $self->[2]++;
- ##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
sub print {
- my $self = shift;
+ my ( $self, $msg ) = @_;
my $mode = $self->[1];
if ( $mode ne 'w' ) {
confess <<EOM;
------------------------------------------------------------------------
EOM
}
- ${ $self->[0] } .= $_[0];
+ ${ $self->[0] } .= $msg;
}
sub close { return }
# a getline method which reads lines (mode='r'), or
# a print method which reads lines (mode='w')
#
-# NOTE: this routine assumes that that there aren't any embedded
+# NOTE: this routine assumes that there aren't any embedded
# newlines within any of the array elements. There are no checks
# for that.
#
}
if ( $mode eq 'w' ) {
- @$rarray = ();
+ @{$rarray} = ();
return bless [ $rarray, $mode ], $package;
}
elsif ( $mode eq 'r' ) {
EOM
}
my $i = $self->[2]++;
- ##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
sub print {
- my $self = shift;
+ my ( $self, $msg ) = @_;
my $mode = $self->[1];
if ( $mode ne 'w' ) {
confess <<EOM;
------------------------------------------------------------------------
EOM
}
- push @{ $self->[0] }, $_[0];
+ push @{ $self->[0] }, $msg;
}
sub close { return }
sub new {
my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
- my $input_file_copy = undef;
- my $fh_copy;
my $input_line_ending;
if ( $rOpts->{'preserve-line-endings'} ) {
}
( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
- return undef unless $fh;
+ return unless $fh;
# in order to check output syntax when standard output is used,
# or when it is an object, we have to make a copy of the file
# The reason is that temporary files cause problems on
# on many systems.
$rOpts->{'check-syntax'} = 0;
- $input_file_copy = '-';
- $$rpending_logfile_message .= <<EOM;
+ ${$rpending_logfile_message} .= <<EOM;
Note: --syntax check will be skipped because standard input is used
EOM
return bless {
_fh => $fh,
- _fh_copy => $fh_copy,
_filename => $input_file,
- _input_file_copy => $input_file_copy,
_input_line_ending => $input_line_ending,
_rinput_buffer => [],
_started => 0,
}, $class;
}
-sub get_input_file_copy_name {
- my $self = shift;
- my $ifname = $self->{_input_file_copy};
- unless ($ifname) {
- $ifname = $self->{_filename};
- }
- return $ifname;
-}
-
sub close_input_file {
my $self = shift;
- eval { $self->{_fh}->close() };
- eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
+
+ # Only close physical files, not STDIN and other objects
+ my $filename = $self->{_filename};
+ if ( $filename ne '-' && !ref $filename ) {
+ eval { $self->{_fh}->close() };
+ }
+ return;
}
sub get_line {
my $self = shift;
my $line = undef;
my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
my $rinput_buffer = $self->{_rinput_buffer};
- if ( scalar(@$rinput_buffer) ) {
- $line = shift @$rinput_buffer;
+ if ( scalar( @{$rinput_buffer} ) ) {
+ $line = shift @{$rinput_buffer};
}
else {
$line = $fh->getline();
if ( $line =~ /[\015][^\015\012]/ ) {
# found one -- break the line up and store in a buffer
- @$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
- my $count = @$rinput_buffer;
- $line = shift @$rinput_buffer;
+ @{$rinput_buffer} = map { $_ . "\n" } split /\015/, $line;
+ my $count = @{$rinput_buffer};
+ $line = shift @{$rinput_buffer};
}
$self->{_started}++;
}
}
- if ( $line && $fh_copy ) { $fh_copy->print($line); }
- return $line;
-}
-
-sub old_get_line {
- my $self = shift;
- my $line = undef;
- my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
- $line = $fh->getline();
- if ( $line && $fh_copy ) { $fh_copy->print($line); }
return $line;
}
my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
$rpending_logfile_message, $binmode )
= @_;
- my $fh = undef;
- my $fh_copy = undef;
- my $fh_tee = undef;
- my $output_file_copy = "";
+ my $fh = undef;
+ my $fh_tee = undef;
+
my $output_file_open = 0;
if ( $rOpts->{'format'} eq 'tidy' ) {
( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
- unless ($fh) { die "Cannot write to output stream\n"; }
+ unless ($fh) { Perl::Tidy::Die "Cannot write to output stream\n"; }
$output_file_open = 1;
if ($binmode) {
- if ( ref($fh) eq 'IO::File' ) {
- binmode $fh;
+ if ( $rOpts->{'character-encoding'}
+ && $rOpts->{'character-encoding'} eq 'utf8' )
+ {
+ if ( ref($fh) eq 'IO::File' ) {
+ $fh->binmode(":encoding(UTF-8)");
+ }
+ elsif ( $output_file eq '-' ) {
+ binmode STDOUT, ":encoding(UTF-8)";
+ }
}
- if ( $output_file eq '-' ) { binmode STDOUT }
+
+ # Patch for RT 122030
+ elsif ( ref($fh) eq 'IO::File' ) { $fh->binmode(); }
+
+ elsif ( $output_file eq '-' ) { binmode STDOUT }
}
}
# The reason is that temporary files cause problems on
# on many systems.
$rOpts->{'check-syntax'} = 0;
- $output_file_copy = '-';
- $$rpending_logfile_message .= <<EOM;
+ ${$rpending_logfile_message} .= <<EOM;
Note: --syntax check will be skipped because standard output is used
EOM
}
}
- bless {
+ return bless {
_fh => $fh,
- _fh_copy => $fh_copy,
_fh_tee => $fh_tee,
_output_file => $output_file,
_output_file_open => $output_file_open,
- _output_file_copy => $output_file_copy,
_tee_flag => 0,
_tee_file => $tee_file,
_tee_file_opened => 0,
sub write_line {
- my $self = shift;
- my $fh = $self->{_fh};
- my $fh_copy = $self->{_fh_copy};
+ my ( $self, $line ) = @_;
+ my $fh = $self->{_fh};
my $output_file_open = $self->{_output_file_open};
- chomp $_[0];
- $_[0] .= $self->{_line_separator};
+ chomp $line;
+ $line .= $self->{_line_separator};
- $fh->print( $_[0] ) if ( $self->{_output_file_open} );
- print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
+ $fh->print($line) if ( $self->{_output_file_open} );
if ( $self->{_tee_flag} ) {
unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
my $fh_tee = $self->{_fh_tee};
- print $fh_tee $_[0];
- }
-}
-
-sub get_output_file_copy {
- my $self = shift;
- my $ofname = $self->{_output_file_copy};
- unless ($ofname) {
- $ofname = $self->{_output_file};
+ print $fh_tee $line;
}
- return $ofname;
+ return;
}
sub tee_on {
my $self = shift;
$self->{_tee_flag} = 1;
+ return;
}
sub tee_off {
my $self = shift;
$self->{_tee_flag} = 0;
+ return;
}
sub really_open_tee_file {
my $tee_file = $self->{_tee_file};
my $fh_tee;
$fh_tee = IO::File->new(">$tee_file")
- or die("couldn't open TEE file $tee_file: $!\n");
+ or Perl::Tidy::Die("couldn't open TEE file $tee_file: $!\n");
binmode $fh_tee if $self->{_binmode};
$self->{_tee_file_opened} = 1;
$self->{_fh_tee} = $fh_tee;
+ return;
}
sub close_output_file {
my $self = shift;
- eval { $self->{_fh}->close() } if $self->{_output_file_open};
- eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
+
+ # Only close physical files, not STDOUT and other objects
+ my $output_file = $self->{_output_file};
+ if ( $output_file ne '-' && !ref $output_file ) {
+ eval { $self->{_fh}->close() } if $self->{_output_file_open};
+ }
$self->close_tee_file();
+ return;
}
sub close_tee_file {
my $self = shift;
+ # Only close physical files, not STDOUT and other objects
if ( $self->{_tee_file_opened} ) {
- eval { $self->{_fh_tee}->close() };
- $self->{_tee_file_opened} = 0;
+ my $tee_file = $self->{_tee_file};
+ if ( $tee_file ne '-' && !ref $tee_file ) {
+ eval { $self->{_fh_tee}->close() };
+ $self->{_tee_file_opened} = 0;
+ }
}
+ return;
}
#####################################################################
# Only one such file is created regardless of the number of input
# files processed. This allows the results of processing many files
# to be summarized in a single file.
+
+# Output messages go to a file named DIAGNOSTICS, where
+# they are labeled by file and line. This allows many files to be
+# scanned at once for some particular condition of interest. It was
+# particularly useful for developing guessing strategies.
+#
+# NOTE: This feature is deactivated in final releases but can be
+# reactivated for debugging by un-commenting the 'I' options flag
#
#####################################################################
sub new {
my $class = shift;
- bless {
+ return bless {
_write_diagnostics_count => 0,
_last_diagnostic_file => "",
_input_file => "",
}
sub set_input_file {
- my $self = shift;
- $self->{_input_file} = $_[0];
+ my ( $self, $input_file ) = @_;
+ $self->{_input_file} = $input_file;
+ return;
}
-# This is a diagnostic routine which is useful for program development.
-# Output from debug messages go to a file named DIAGNOSTICS, where
-# they are labeled by file and line. This allows many files to be
-# scanned at once for some particular condition of interest.
sub write_diagnostics {
- my $self = shift;
+ my ( $self, $msg ) = @_;
unless ( $self->{_write_diagnostics_count} ) {
- open DIAGNOSTICS, ">DIAGNOSTICS"
- or death("couldn't open DIAGNOSTICS: $!\n");
+ open( $self->{_fh}, ">", "DIAGNOSTICS" )
+ or Perl::Tidy::Die("couldn't open DIAGNOSTICS: $!\n");
}
+ my $fh = $self->{_fh};
my $last_diagnostic_file = $self->{_last_diagnostic_file};
my $input_file = $self->{_input_file};
if ( $last_diagnostic_file ne $input_file ) {
- print DIAGNOSTICS "\nFILE:$input_file\n";
+ $fh->print("\nFILE:$input_file\n");
}
$self->{_last_diagnostic_file} = $input_file;
my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
- print DIAGNOSTICS "$input_line_number:\t@_";
+ $fh->print("$input_line_number:\t$msg");
$self->{_write_diagnostics_count}++;
+ return;
}
#####################################################################
package Perl::Tidy::Logger;
sub new {
- my $class = shift;
- my $fh;
- my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
- # remove any old error output file
- unless ( ref($warning_file) ) {
- if ( -e $warning_file ) { unlink($warning_file) }
+ my ( $class, $rOpts, $log_file, $warning_file, $fh_stderr, $saw_extrude ) =
+ @_;
+
+ my $fh_warnings = $rOpts->{'standard-error-output'} ? $fh_stderr : undef;
+
+ # remove any old error output file if we might write a new one
+ unless ( $fh_warnings || ref($warning_file) ) {
+ if ( -e $warning_file ) {
+ unlink($warning_file)
+ or Perl::Tidy::Die(
+ "couldn't unlink warning file $warning_file: $!\n");
+ }
}
- bless {
+ my $logfile_gap =
+ defined( $rOpts->{'logfile-gap'} )
+ ? $rOpts->{'logfile-gap'}
+ : 50;
+ if ( $logfile_gap == 0 ) { $logfile_gap = 1 }
+
+ return bless {
_log_file => $log_file,
- _fh_warnings => undef,
+ _logfile_gap => $logfile_gap,
_rOpts => $rOpts,
- _fh_warnings => undef,
+ _fh_warnings => $fh_warnings,
_last_input_line_written => 0,
_at_end_of_file => 0,
_use_prefix => 1,
}, $class;
}
-sub close_log_file {
-
- my $self = shift;
- if ( $self->{_fh_warnings} ) {
- eval { $self->{_fh_warnings}->close() };
- $self->{_fh_warnings} = undef;
- }
-}
-
sub get_warning_count {
my $self = shift;
return $self->{_warning_count};
sub block_log_output {
my $self = shift;
$self->{_block_log_output} = 1;
+ return;
}
sub unblock_log_output {
my $self = shift;
$self->{_block_log_output} = 0;
+ return;
}
sub interrupt_logfile {
$self->{_use_prefix} = 0;
$self->warning("\n");
$self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
+ return;
}
sub resume_logfile {
my $self = shift;
$self->write_logfile_entry( '#' x 60 . "\n" );
$self->{_use_prefix} = 1;
+ return;
}
sub we_are_at_the_last_line {
$self->write_logfile_entry("Last line\n\n");
}
$self->{_at_end_of_file} = 1;
+ return;
}
# record some stuff in case we go down in flames
sub black_box {
- my $self = shift;
- my ( $line_of_tokens, $output_line_number ) = @_;
+ my ( $self, $line_of_tokens, $output_line_number ) = @_;
my $input_line = $line_of_tokens->{_line_text};
my $input_line_number = $line_of_tokens->{_line_number};
if (
(
( $input_line_number - $last_input_line_written ) >=
- $rOpts->{'logfile-gap'}
+ $self->{_logfile_gap}
)
|| ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
)
{
- my $rlevels = $line_of_tokens->{_rlevels};
- my $structural_indentation_level = $$rlevels[0];
+ my $structural_indentation_level = $line_of_tokens->{_level_0};
+ $structural_indentation_level = 0
+ if ( $structural_indentation_level < 0 );
$self->{_last_input_line_written} = $input_line_number;
( my $out_str = $input_line ) =~ s/^\s*//;
chomp $out_str;
}
$self->logfile_output( "", "$out_str\n" );
}
+ return;
}
sub write_logfile_entry {
- my $self = shift;
- # add leading >>> to avoid confusing error mesages and code
- $self->logfile_output( ">>>", "@_" );
+ my ( $self, @msg ) = @_;
+
+ # add leading >>> to avoid confusing error messages and code
+ $self->logfile_output( ">>>", "@msg" );
+ return;
}
sub write_column_headings {
lines levels i k (code begins with one '.' per indent level)
------ ----- - - -------- -------------------------------------------
EOM
+ return;
}
sub make_line_information_string {
my $brace_depth = $line_of_tokens->{_curly_brace_depth};
my $paren_depth = $line_of_tokens->{_paren_depth};
my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
- my $python_indentation_level =
- $line_of_tokens->{_python_indentation_level};
- my $rlevels = $line_of_tokens->{_rlevels};
- my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
- my $rci_levels = $line_of_tokens->{_rci_levels};
- my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+ ##my $rtoken_array = $line_of_tokens->{_rtoken_array};
- my $structural_indentation_level = $$rlevels[0];
+ my $structural_indentation_level = $line_of_tokens->{_level_0};
$self->write_column_headings() unless $self->{_wrote_column_headings};
# could be arbitrarily long, so we use it unless it is too long
my $nesting_string =
"($paren_depth [$square_bracket_depth {$brace_depth";
- my $nesting_string_new = $$rnesting_tokens[0];
-
- my $ci_level = $$rci_levels[0];
+ my $nesting_string_new = $line_of_tokens->{_nesting_tokens_0};
+ my $ci_level = $line_of_tokens->{_ci_level_0};
if ( $ci_level > 9 ) { $ci_level = '*' }
- my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
+ my $bk = ( $line_of_tokens->{_nesting_blocks_0} =~ /1$/ ) ? '1' : '0';
if ( length($nesting_string_new) <= 8 ) {
$nesting_string =
$nesting_string_new . " " x ( 8 - length($nesting_string_new) );
}
- if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
$line_information_string =
-"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
+"L$input_line_number:$output_line_number$extra_space i$guessed_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
}
return $line_information_string;
}
sub logfile_output {
- my $self = shift;
- my ( $prompt, $msg ) = @_;
+ my ( $self, $prompt, $msg ) = @_;
return if ( $self->{_block_log_output} );
my $routput_array = $self->{_output_array};
push @{$routput_array}, "$msg";
}
}
+ return;
}
sub get_saw_brace_error {
sub increment_brace_error {
my $self = shift;
$self->{_saw_brace_error}++;
+ return;
}
sub brace_warning {
- my $self = shift;
- use constant BRACE_WARNING_LIMIT => 10;
- my $saw_brace_error = $self->{_saw_brace_error};
+ my ( $self, $msg ) = @_;
- if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
- $self->warning(@_);
+ #use constant BRACE_WARNING_LIMIT => 10;
+ my $BRACE_WARNING_LIMIT = 10;
+ my $saw_brace_error = $self->{_saw_brace_error};
+
+ if ( $saw_brace_error < $BRACE_WARNING_LIMIT ) {
+ $self->warning($msg);
}
$saw_brace_error++;
$self->{_saw_brace_error} = $saw_brace_error;
- if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
+ if ( $saw_brace_error == $BRACE_WARNING_LIMIT ) {
$self->warning("No further warnings of this type will be given\n");
}
+ return;
}
sub complain {
# handle non-critical warning messages based on input flag
- my $self = shift;
+ my ( $self, $msg ) = @_;
my $rOpts = $self->{_rOpts};
# these appear in .ERR output only if -w flag is used
if ( $rOpts->{'warning-output'} ) {
- $self->warning(@_);
+ $self->warning($msg);
}
# otherwise, they go to the .LOG file
else {
$self->{_complaint_count}++;
- $self->write_logfile_entry(@_);
+ $self->write_logfile_entry($msg);
}
+ return;
}
sub warning {
# report errors to .ERR file (or stdout)
- my $self = shift;
- use constant WARNING_LIMIT => 50;
+ my ( $self, $msg ) = @_;
+
+ #use constant WARNING_LIMIT => 50;
+ my $WARNING_LIMIT = 50;
my $rOpts = $self->{_rOpts};
unless ( $rOpts->{'quiet'} ) {
my $warning_count = $self->{_warning_count};
- unless ($warning_count) {
+ my $fh_warnings = $self->{_fh_warnings};
+ if ( !$fh_warnings ) {
my $warning_file = $self->{_warning_file};
- my $fh_warnings;
- if ( $rOpts->{'standard-error-output'} ) {
- $fh_warnings = *STDERR;
- }
- else {
- ( $fh_warnings, my $filename ) =
- Perl::Tidy::streamhandle( $warning_file, 'w' );
- $fh_warnings or die("couldn't open $filename $!\n");
- warn "## Please see file $filename\n";
- }
+ ( $fh_warnings, my $filename ) =
+ Perl::Tidy::streamhandle( $warning_file, 'w' );
+ $fh_warnings or Perl::Tidy::Die("couldn't open $filename $!\n");
+ Perl::Tidy::Warn "## Please see file $filename\n"
+ unless ref($warning_file);
$self->{_fh_warnings} = $fh_warnings;
+ $fh_warnings->print("Perltidy version is $Perl::Tidy::VERSION\n");
}
- my $fh_warnings = $self->{_fh_warnings};
- if ( $warning_count < WARNING_LIMIT ) {
+ if ( $warning_count < $WARNING_LIMIT ) {
if ( $self->get_use_prefix() > 0 ) {
my $input_line_number =
Perl::Tidy::Tokenizer::get_input_line_number();
- $fh_warnings->print("$input_line_number:\t@_");
- $self->write_logfile_entry("WARNING: @_");
+ if ( !defined($input_line_number) ) { $input_line_number = -1 }
+ $fh_warnings->print("$input_line_number:\t$msg");
+ $self->write_logfile_entry("WARNING: $msg");
}
else {
- $fh_warnings->print(@_);
- $self->write_logfile_entry(@_);
+ $fh_warnings->print($msg);
+ $self->write_logfile_entry($msg);
}
}
$warning_count++;
$self->{_warning_count} = $warning_count;
- if ( $warning_count == WARNING_LIMIT ) {
+ if ( $warning_count == $WARNING_LIMIT ) {
$fh_warnings->print("No further warnings will be given\n");
}
}
+ return;
}
# programming bug codes:
my $self = shift;
my $saw_code_bug = $self->{_saw_code_bug};
$self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
+ return;
}
sub report_definite_bug {
my $self = shift;
$self->{_saw_code_bug} = 1;
+ return;
}
sub ask_user_for_bug_report {
- my $self = shift;
- my ( $infile_syntax_ok, $formatter ) = @_;
+ my ( $self, $infile_syntax_ok, $formatter ) = @_;
my $saw_code_bug = $self->{_saw_code_bug};
if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
$self->warning(<<EOM);
}
}
}
+ return;
}
sub finish {
# called after all formatting to summarize errors
- my $self = shift;
- my ( $infile_syntax_ok, $formatter ) = @_;
+ my ( $self, $infile_syntax_ok, $formatter ) = @_;
my $rOpts = $self->{_rOpts};
my $warning_count = $self->{_warning_count};
my $saw_code_bug = $self->{_saw_code_bug};
- my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
+ my $save_logfile =
+ ( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
|| $saw_code_bug == 1
|| $rOpts->{'logfile'};
my $log_file = $self->{_log_file};
}
if ( $self->{_saw_brace_error}
- && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
+ && ( $self->{_logfile_gap} > 1 || !$save_logfile ) )
{
$self->warning("To save a full .LOG file rerun with -g\n");
}
if ($fh) {
my $routput_array = $self->{_output_array};
foreach ( @{$routput_array} ) { $fh->print($_) }
- eval { $fh->close() };
+ if ( $log_file ne '-' && !ref $log_file ) {
+ eval { $fh->close() };
+ }
}
}
+ return;
}
#####################################################################
#####################################################################
package Perl::Tidy::DevNull;
-sub new { return bless {}, $_[0] }
+sub new { my $self = shift; return bless {}, $self }
sub print { return }
sub close { return }
( $html_fh, my $html_filename ) =
Perl::Tidy::streamhandle( $html_file, 'w' );
unless ($html_fh) {
- warn("can't open $html_file: $!\n");
- return undef;
+ Perl::Tidy::Warn("can't open $html_file: $!\n");
+ return;
}
$html_file_opened = 1;
else {
eval "use Pod::Html";
if ($@) {
- warn
+ Perl::Tidy::Warn
"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
undef $rOpts->{'pod2html'};
}
my $src_filename;
if ( $rOpts->{'frames'} ) {
unless ($extension) {
- warn
+ Perl::Tidy::Warn
"cannot use frames without a specified output extension; ignoring -frm\n";
undef $rOpts->{'frames'};
}
my $toc_item_count = 0;
my $in_toc_package = "";
my $last_level = 0;
- bless {
+ return bless {
_input_file => $input_file, # name of input file
_title => $title, # title, unescaped
_html_file => $html_file, # name of .html output file
# We are given an anchor name and its type; types are:
# 'package', 'sub', '__END__', '__DATA__', 'EOF'
# There must be an 'EOF' call at the end to wrap things up.
- my $self = shift;
- my ( $name, $type ) = @_;
+ my ( $self, $name, $type ) = @_;
my $html_toc_fh = $self->{_html_toc_fh};
my $html_pre_fh = $self->{_html_pre_fh};
my $rtoc_name_count = $self->{_rtoc_name_count};
# packages contain sublists of subs, so to avoid errors all package
# items are written and finished with the following routines
my $end_package_list = sub {
- if ($$rin_toc_package) {
+ if ( ${$rin_toc_package} ) {
$html_toc_fh->print("</ul>\n</li>\n");
- $$rin_toc_package = "";
+ ${$rin_toc_package} = "";
}
};
my $start_package_list = sub {
my ( $unique_name, $package ) = @_;
- if ($$rin_toc_package) { $end_package_list->() }
+ if ( ${$rin_toc_package} ) { $end_package_list->() }
$html_toc_fh->print(<<EOM);
<li><a href=\"#$unique_name\">package $package</a>
<ul>
EOM
- $$rin_toc_package = $package;
+ ${$rin_toc_package} = $package;
};
# start the table of contents on the first item
- unless ($$rtoc_item_count) {
+ unless ( ${$rtoc_item_count} ) {
# but just quit if we hit EOF without any other entries
# in this case, there will be no toc
<ul>
TOC_END
}
- $$rtoc_item_count++;
+ ${$rtoc_item_count}++;
# make a unique anchor name for this location:
# - packages get a 'package-' prefix
# start/stop lists of subs
if ( $type eq 'sub' ) {
- my $package = $rpackage_stack->[$$rlast_level];
+ my $package = $rpackage_stack->[ ${$rlast_level} ];
unless ($package) { $package = 'main' }
# if we're already in a package/sub list, be sure its the right
# package or else close it
- if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
+ if ( ${$rin_toc_package} && ${$rin_toc_package} ne $package ) {
$end_package_list->();
}
# start a package/sub list if necessary
- unless ($$rin_toc_package) {
+ unless ( ${$rin_toc_package} ) {
$start_package_list->( $unique_name, $package );
}
}
<!-- END CODE INDEX -->
TOC_END
}
+ return;
}
BEGIN {
);
# These token types will all be called identifiers for now
- # FIXME: need to separate user defined modules as separate type
- my @identifier = qw" i t U C Y Z G :: ";
+ # FIXME: could separate user defined modules as separate type
+ my @identifier = qw" i t U C Y Z G :: CORE::";
@token_short_names{@identifier} = ('i') x scalar(@identifier);
# These token types will be called 'structure'
}
sub make_getopt_long_names {
- my $class = shift;
- my ($rgetopt_names) = @_;
+ my ( $class, $rgetopt_names ) = @_;
while ( my ( $short_name, $name ) = each %short_to_long_names ) {
- push @$rgetopt_names, "html-color-$name=s";
- push @$rgetopt_names, "html-italic-$name!";
- push @$rgetopt_names, "html-bold-$name!";
- }
- push @$rgetopt_names, "html-color-background=s";
- push @$rgetopt_names, "html-linked-style-sheet=s";
- push @$rgetopt_names, "nohtml-style-sheets";
- push @$rgetopt_names, "html-pre-only";
- push @$rgetopt_names, "html-line-numbers";
- push @$rgetopt_names, "html-entities!";
- push @$rgetopt_names, "stylesheet";
- push @$rgetopt_names, "html-table-of-contents!";
- push @$rgetopt_names, "pod2html!";
- push @$rgetopt_names, "frames!";
- push @$rgetopt_names, "html-toc-extension=s";
- push @$rgetopt_names, "html-src-extension=s";
+ push @{$rgetopt_names}, "html-color-$name=s";
+ push @{$rgetopt_names}, "html-italic-$name!";
+ push @{$rgetopt_names}, "html-bold-$name!";
+ }
+ push @{$rgetopt_names}, "html-color-background=s";
+ push @{$rgetopt_names}, "html-linked-style-sheet=s";
+ push @{$rgetopt_names}, "nohtml-style-sheets";
+ push @{$rgetopt_names}, "html-pre-only";
+ push @{$rgetopt_names}, "html-line-numbers";
+ push @{$rgetopt_names}, "html-entities!";
+ push @{$rgetopt_names}, "stylesheet";
+ push @{$rgetopt_names}, "html-table-of-contents!";
+ push @{$rgetopt_names}, "pod2html!";
+ push @{$rgetopt_names}, "frames!";
+ push @{$rgetopt_names}, "html-toc-extension=s";
+ push @{$rgetopt_names}, "html-src-extension=s";
# Pod::Html parameters:
- push @$rgetopt_names, "backlink=s";
- push @$rgetopt_names, "cachedir=s";
- push @$rgetopt_names, "htmlroot=s";
- push @$rgetopt_names, "libpods=s";
- push @$rgetopt_names, "podpath=s";
- push @$rgetopt_names, "podroot=s";
- push @$rgetopt_names, "title=s";
+ push @{$rgetopt_names}, "backlink=s";
+ push @{$rgetopt_names}, "cachedir=s";
+ push @{$rgetopt_names}, "htmlroot=s";
+ push @{$rgetopt_names}, "libpods=s";
+ push @{$rgetopt_names}, "podpath=s";
+ push @{$rgetopt_names}, "podroot=s";
+ push @{$rgetopt_names}, "title=s";
# Pod::Html parameters with leading 'pod' which will be removed
# before the call to Pod::Html
- push @$rgetopt_names, "podquiet!";
- push @$rgetopt_names, "podverbose!";
- push @$rgetopt_names, "podrecurse!";
- push @$rgetopt_names, "podflush";
- push @$rgetopt_names, "podheader!";
- push @$rgetopt_names, "podindex!";
+ push @{$rgetopt_names}, "podquiet!";
+ push @{$rgetopt_names}, "podverbose!";
+ push @{$rgetopt_names}, "podrecurse!";
+ push @{$rgetopt_names}, "podflush";
+ push @{$rgetopt_names}, "podheader!";
+ push @{$rgetopt_names}, "podindex!";
+ return;
}
sub make_abbreviated_names {
# 'hcc' => [qw(html-color-comment)],
# 'hck' => [qw(html-color-keyword)],
# etc
- my $class = shift;
- my ($rexpansion) = @_;
+ my ( $class, $rexpansion ) = @_;
# abbreviations for color/bold/italic properties
while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
${$rexpansion}{"nfrm"} = ["noframes"];
${$rexpansion}{"text"} = ["html-toc-extension"];
${$rexpansion}{"sext"} = ["html-src-extension"];
+ return;
}
sub check_options {
# This will be called once after options have been parsed
- my $class = shift;
- $rOpts = shift;
+ my ( $class, $rOpts ) = @_;
# X11 color names for default settings that seemed to look ok
# (these color names are only used for programming clarity; the hex
# write style sheet to STDOUT and die if requested
if ( defined( $rOpts->{'stylesheet'} ) ) {
write_style_sheet_file('-');
- exit 1;
+ Perl::Tidy::Exit 0;
}
# make sure user gives a file name after -css
if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
$css_linkname = $rOpts->{'html-linked-style-sheet'};
if ( $css_linkname =~ /^-/ ) {
- die "You must specify a valid filename after -css\n";
+ Perl::Tidy::Die "You must specify a valid filename after -css\n";
}
}
}
}
$missing_html_entities = 1 unless $rOpts->{'html-entities'};
+ return;
}
sub write_style_sheet_file {
my $css_filename = shift;
my $fh;
unless ( $fh = IO::File->new("> $css_filename") ) {
- die "can't open $css_filename: $!\n";
+ Perl::Tidy::Die "can't open $css_filename: $!\n";
}
write_style_sheet_data($fh);
eval { $fh->close };
+ return;
}
sub write_style_sheet_data {
}
$fh->print("} /* $long_name */\n");
}
+ return;
}
sub set_default_color {
my ( $key, $color ) = @_;
if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
$rOpts->{$key} = check_RGB($color);
+ return;
}
sub check_RGB {
$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
$key = "html-italic-$short_to_long_names{$short_name}";
$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
+ return;
}
sub pod_to_html {
# Use Pod::Html to process the pod and make the page
# then merge the perltidy code sections into it.
# return 1 if success, 0 otherwise
- my $self = shift;
- my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
+ my ( $self, $pod_string, $css_string, $toc_string, $rpre_string_stack ) =
+ @_;
my $input_file = $self->{_input_file};
my $title = $self->{_title};
my $success_flag = 0;
}
# Pod::Html requires a real temporary filename
- # If we are making a frame, we have a name available
- # Otherwise, we have to fine one
- my $tmpfile;
- if ( $rOpts->{'frames'} ) {
- $tmpfile = $self->{_toc_filename};
- }
- else {
- $tmpfile = Perl::Tidy::make_temporary_filename();
- }
- my $fh_tmp = IO::File->new( $tmpfile, 'w' );
+ my ( $fh_tmp, $tmpfile ) = File::Temp::tempfile();
unless ($fh_tmp) {
- warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
+ Perl::Tidy::Warn
+ "unable to open temporary file $tmpfile; cannot use pod2html\n";
return $success_flag;
}
my @args;
push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
- my $kw;
# Flags with string args:
# "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
# "podpath=s", "podroot=s"
# Note: -css=s is handled by perltidy itself
- foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
+ foreach my $kw (qw(backlink cachedir htmlroot libpods podpath podroot))
+ {
if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
}
# Toggle switches; these have extra leading 'pod'
# "header!", "index!", "recurse!", "quiet!", "verbose!"
- foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
+ foreach my $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
my $kwd = $kw; # allows us to strip 'pod'
if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
elsif ( defined( $rOpts->{$kw} ) ) {
}
# "flush",
- $kw = 'podflush';
+ my $kw = 'podflush';
if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
# Must clean up if pod2html dies (it can);
# Be careful not to overwrite callers __DIE__ routine
local $SIG{__DIE__} = sub {
- print $_[0];
unlink $tmpfile if -e $tmpfile;
- exit 1;
+ Perl::Tidy::Die $_[0];
};
pod2html(@args);
unless ($fh_tmp) {
# this error shouldn't happen ... we just used this filename
- warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
+ Perl::Tidy::Warn
+ "unable to open temporary file $tmpfile; cannot use pod2html\n";
goto RETURN;
}
my $html_fh = $self->{_html_fh};
my @toc;
my $in_toc;
+ my $ul_level = 0;
my $no_print;
# This routine will write the html selectively and store the toc
$title = escape_html($title);
$html_print->("<h1>$title</h1>\n");
}
+
+ # check for start of index, old pod2html
+ # before Pod::Html VERSION 1.15_02 it is delimited by comments as:
+ # <!-- INDEX BEGIN -->
+ # <ul>
+ # ...
+ # </ul>
+ # <!-- INDEX END -->
+ #
elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
- $in_toc = 1;
+ $in_toc = 'INDEX';
+
+ # when frames are used, an extra table of contents in the
+ # contents panel is confusing, so don't print it
+ $no_print = $rOpts->{'frames'}
+ || !$rOpts->{'html-table-of-contents'};
+ $html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
+ $html_print->($line);
+ }
+
+ # check for start of index, new pod2html
+ # After Pod::Html VERSION 1.15_02 it is delimited as:
+ # <ul id="index">
+ # ...
+ # </ul>
+ elsif ( $line =~ /^\s*<ul\s+id="index">/i ) {
+ $in_toc = 'UL';
+ $ul_level = 1;
# when frames are used, an extra table of contents in the
# contents panel is confusing, so don't print it
$html_print->($line);
}
- # Copy the perltidy toc, if any, after the Pod::Html toc
+ # Check for end of index, old pod2html
elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
$saw_index = 1;
$html_print->($line);
+
+ # Copy the perltidy toc, if any, after the Pod::Html toc
if ($toc_string) {
$html_print->("<hr />\n") if $rOpts->{'frames'};
$html_print->("<h2>Code Index:</h2>\n");
my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
$html_print->(@toc);
}
- $in_toc = 0;
+ $in_toc = "";
$no_print = 0;
}
+ # must track <ul> depth level for new pod2html
+ elsif ( $line =~ /\s*<ul>\s*$/i && $in_toc eq 'UL' ) {
+ $ul_level++;
+ $html_print->($line);
+ }
+
+ # Check for end of index, for new pod2html
+ elsif ( $line =~ /\s*<\/ul>/i && $in_toc eq 'UL' ) {
+ $ul_level--;
+ $html_print->($line);
+
+ # Copy the perltidy toc, if any, after the Pod::Html toc
+ if ( $ul_level <= 0 ) {
+ $saw_index = 1;
+ if ($toc_string) {
+ $html_print->("<hr />\n") if $rOpts->{'frames'};
+ $html_print->("<h2>Code Index:</h2>\n");
+ my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
+ $html_print->(@toc);
+ }
+ $in_toc = "";
+ $ul_level = 0;
+ $no_print = 0;
+ }
+ }
+
# Copy one perltidy section after each marker
elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
$line = $2;
# Intermingle code and pod sections if we saw multiple =cut's.
if ( $self->{_pod_cut_count} > 1 ) {
- my $rpre_string = shift(@$rpre_string_stack);
- if ($$rpre_string) {
+ my $rpre_string = shift( @{$rpre_string_stack} );
+ if ( ${$rpre_string} ) {
$html_print->('<pre>');
- $html_print->($$rpre_string);
+ $html_print->( ${$rpre_string} );
$html_print->('</pre>');
}
else {
# shouldn't happen: we stored a string before writing
# each marker.
- warn
+ Perl::Tidy::Warn
"Problem merging html stream with pod2html; order may be wrong\n";
}
$html_print->($line);
# Copy any remaining code section before the </body> tag
elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
$saw_body_end = 1;
- if (@$rpre_string_stack) {
+ if ( @{$rpre_string_stack} ) {
unless ( $self->{_pod_cut_count} > 1 ) {
$html_print->('<hr />');
}
- while ( my $rpre_string = shift(@$rpre_string_stack) ) {
+ while ( my $rpre_string = shift( @{$rpre_string_stack} ) ) {
$html_print->('<pre>');
- $html_print->($$rpre_string);
+ $html_print->( ${$rpre_string} );
$html_print->('</pre>');
}
}
$success_flag = 1;
unless ($saw_body) {
- warn "Did not see <body> in pod2html output\n";
+ Perl::Tidy::Warn "Did not see <body> in pod2html output\n";
$success_flag = 0;
}
unless ($saw_body_end) {
- warn "Did not see </body> in pod2html output\n";
+ Perl::Tidy::Warn "Did not see </body> in pod2html output\n";
$success_flag = 0;
}
unless ($saw_index) {
- warn "Did not find INDEX END in pod2html output\n";
+ Perl::Tidy::Warn "Did not find INDEX END in pod2html output\n";
$success_flag = 0;
}
# note that we have to unlink tmpfile before making frames
# because the tmpfile may be one of the names used for frames
- unlink $tmpfile if -e $tmpfile;
+ if ( -e $tmpfile ) {
+ unless ( unlink($tmpfile) ) {
+ Perl::Tidy::Warn("couldn't unlink temporary file $tmpfile: $!\n");
+ $success_flag = 0;
+ }
+ }
+
if ( $success_flag && $rOpts->{'frames'} ) {
$self->make_frame( \@toc );
}
# On entry:
# $html_filename contains the no-frames html output
# $rtoc is a reference to an array with the table of contents
- my $self = shift;
- my ($rtoc) = @_;
+ my ( $self, $rtoc ) = @_;
my $input_file = $self->{_input_file};
my $html_filename = $self->{_html_file};
my $toc_filename = $self->{_toc_filename};
# 2. The current .html filename is renamed to be the contents panel
rename( $html_filename, $src_filename )
- or die "Cannot rename $html_filename to $src_filename:$!\n";
+ or Perl::Tidy::Die "Cannot rename $html_filename to $src_filename:$!\n";
# 3. Then use the original html filename for the frame
write_frame_html(
$title, $html_filename, $top_basename,
$toc_basename, $src_basename, $src_frame_name
);
+ return;
}
sub write_toc_html {
# write a separate html table of contents file for frames
my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
my $fh = IO::File->new( $toc_filename, 'w' )
- or die "Cannot open $toc_filename:$!\n";
+ or Perl::Tidy::Die "Cannot open $toc_filename:$!\n";
$fh->print(<<EOM);
<html>
<head>
my $first_anchor =
change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
- $fh->print( join "", @$rtoc );
+ $fh->print( join "", @{$rtoc} );
$fh->print(<<EOM);
</body>
</html>
EOM
+ return;
}
sub write_frame_html {
) = @_;
my $fh = IO::File->new( $frame_filename, 'w' )
- or die "Cannot open $toc_basename:$!\n";
+ or Perl::Tidy::Die "Cannot open $toc_basename:$!\n";
$fh->print(<<EOM);
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
</frameset>
</html>
EOM
+ return;
}
sub change_anchor_names {
# also return the first anchor
my ( $rlines, $filename, $target ) = @_;
my $first_anchor;
- foreach my $line (@$rlines) {
+ foreach my $line ( @{$rlines} ) {
# We're looking for lines like this:
# <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
# use css linked to another file
if ( $rOpts->{'html-linked-style-sheet'} ) {
$fh_css->print(
- qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
- );
+ qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />));
}
# use css embedded in this file
# -----------------------------------------------------------
if ( $rOpts->{'pod2html'} ) {
my $rpod_string = $self->{_rpod_string};
- $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
- $rpre_string_stack )
- && return;
+ $self->pod_to_html(
+ ${$rpod_string}, $css_string,
+ ${$rtoc_string}, $rpre_string_stack
+ ) && return;
}
# --------------------------------------------------
EOM
# copy the table of contents
- if ( $$rtoc_string
+ if ( ${$rtoc_string}
&& !$rOpts->{'frames'}
&& $rOpts->{'html-table-of-contents'} )
{
- $html_fh->print($$rtoc_string);
+ $html_fh->print( ${$rtoc_string} );
}
# copy the pre section(s)
<pre>
END_PRE
- foreach my $rpre_string (@$rpre_string_stack) {
- $html_fh->print($$rpre_string);
+ foreach my $rpre_string ( @{$rpre_string_stack} ) {
+ $html_fh->print( ${$rpre_string} );
}
# and finish the html page
eval { $html_fh->close() }; # could be object without close method
if ( $rOpts->{'frames'} ) {
- my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
+ my @toc = map { $_ .= "\n" } split /\n/, ${$rtoc_string};
$self->make_frame( \@toc );
}
+ return;
}
sub markup_tokens {
- my $self = shift;
- my ( $rtokens, $rtoken_type, $rlevels ) = @_;
- my ( @colored_tokens, $j, $string, $type, $token, $level );
+ my ( $self, $rtokens, $rtoken_type, $rlevels ) = @_;
+ my ( @colored_tokens, $string, $type, $token, $level );
my $rlast_level = $self->{_rlast_level};
my $rpackage_stack = $self->{_rpackage_stack};
- for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
- $type = $$rtoken_type[$j];
- $token = $$rtokens[$j];
- $level = $$rlevels[$j];
+ for ( my $j = 0 ; $j < @{$rtoken_type} ; $j++ ) {
+ $type = $rtoken_type->[$j];
+ $token = $rtokens->[$j];
+ $level = $rlevels->[$j];
$level = 0 if ( $level < 0 );
#-------------------------------------------------------
# the toc correct because some packages may be declared within
# blocks and go out of scope when we leave the block.
#-------------------------------------------------------
- if ( $level > $$rlast_level ) {
+ if ( $level > ${$rlast_level} ) {
unless ( $rpackage_stack->[ $level - 1 ] ) {
$rpackage_stack->[ $level - 1 ] = 'main';
}
$rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
}
- elsif ( $level < $$rlast_level ) {
+ elsif ( $level < ${$rlast_level} ) {
my $package = $rpackage_stack->[$level];
unless ($package) { $package = 'main' }
$self->add_toc_item( $package, 'package' );
}
}
- $$rlast_level = $level;
+ ${$rlast_level} = $level;
#-------------------------------------------------------
# Intercept a sub name here; split it
# but don't include sub declarations in the toc;
# these wlll have leading token types 'i;'
- my $signature = join "", @$rtoken_type;
+ my $signature = join "", @{$rtoken_type};
unless ( $signature =~ /^i;/ ) {
my $subname = $token;
$subname =~ s/[\s\(].*$//; # remove any attributes and prototype
}
sub markup_html_element {
- my $self = shift;
- my ( $token, $type ) = @_;
+ my ( $self, $token, $type ) = @_;
- return $token if ( $type eq 'b' ); # skip a blank token
+ return $token if ( $type eq 'b' ); # skip a blank token
return $token if ( $token =~ /^\s*$/ ); # skip a blank line
$token = escape_html($token);
sub write_line {
- my $self = shift;
+ my ( $self, $line_of_tokens ) = @_;
return unless $self->{_html_file_opened};
- my $html_pre_fh = $self->{_html_pre_fh};
- my ($line_of_tokens) = @_;
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
- my $line_number = $line_of_tokens->{_line_number};
+ my $html_pre_fh = $self->{_html_pre_fh};
+ my $line_type = $line_of_tokens->{_line_type};
+ my $input_line = $line_of_tokens->{_line_text};
+ my $line_number = $line_of_tokens->{_line_number};
chomp $input_line;
# markup line of code..
}
my ($rcolored_tokens) =
$self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
- $html_line .= join '', @$rcolored_tokens;
+ $html_line .= join '', @{$rcolored_tokens};
}
# markup line of non-code..
# if we have written any non-blank lines to the
# current pre section, start writing to a new output
# string
- if ( $$rpre_string =~ /\S/ ) {
+ if ( ${$rpre_string} =~ /\S/ ) {
my $pre_string;
$html_pre_fh =
Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
$self->{_html_pre_fh} = $html_pre_fh;
- push @$rpre_string_stack, \$pre_string;
+ push @{$rpre_string_stack}, \$pre_string;
# leave a marker in the pod stream so we know
# where to put the pre section we just
# otherwise, just clear the current string and start
# over
else {
- $$rpre_string = "";
+ ${$rpre_string} = "";
$html_pod_fh->print("\n");
}
}
# add the line number if requested
if ( $rOpts->{'html-line-numbers'} ) {
- my $extra_space .=
+ my $extra_space =
( $line_number < 10 ) ? " "
: ( $line_number < 100 ) ? " "
: ( $line_number < 1000 ) ? " "
# write the line
$html_pre_fh->print("$html_line\n");
+ return;
}
#####################################################################
# Caution: these debug flags produce a lot of output
# They should all be 0 except when debugging small scripts
- use constant FORMATTER_DEBUG_FLAG_BOND => 0;
- use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_CI => 0;
- use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
- use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
- use constant FORMATTER_DEBUG_FLAG_LIST => 0;
- use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
- use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
- use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
- use constant FORMATTER_DEBUG_FLAG_STORE => 0;
- use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
- use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
+ use constant FORMATTER_DEBUG_FLAG_RECOMBINE => 0;
+ use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
+ use constant FORMATTER_DEBUG_FLAG_BOND => 0;
+ use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
+ use constant FORMATTER_DEBUG_FLAG_CI => 0;
+ use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
+ use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
+ use constant FORMATTER_DEBUG_FLAG_LIST => 0;
+ use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
+ use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
+ use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
+ use constant FORMATTER_DEBUG_FLAG_STORE => 0;
+ use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
+ use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
my $debug_warning = sub {
- print "FORMATTER_DEBUGGING with key $_[0]\n";
+ print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
};
- FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
- FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
- FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
- FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
- FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
- FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
- FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
- FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
- FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
- FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
- FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
- FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
+ FORMATTER_DEBUG_FLAG_RECOMBINE && $debug_warning->('RECOMBINE');
+ FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
+ FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
+ FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
+ FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
+ FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
+ FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
+ FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
+ FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
+ FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
+ FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
+ FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
+ FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
+ FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
}
use Carp;
$last_indentation_written
$last_unadjusted_indentation
$last_leading_token
+ $last_output_short_opening_token
+ $peak_batch_size
$saw_VERSION_in_this_file
$saw_END_or_DATA_
@container_environment_to_go
@bond_strength_to_go
@forced_breakpoint_to_go
- @lengths_to_go
+ @token_lengths_to_go
+ @summed_lengths_to_go
@levels_to_go
@leading_spaces_to_go
@reduced_spaces_to_go
@matching_token_to_go
@mate_index_to_go
- @nesting_blocks_to_go
@ci_levels_to_go
@nesting_depth_to_go
@nobreak_to_go
@old_breakpoint_to_go
@tokens_to_go
+ @rtoken_vars_to_go
+ @K_to_go
@types_to_go
+ @inext_to_go
+ @iprev_to_go
%saved_opening_indentation
$max_index_to_go
$comma_count_in_batch
- $old_line_count_in_batch
$last_nonblank_index_to_go
$last_nonblank_type_to_go
$last_nonblank_token_to_go
@nonblank_lines_at_depth
$starting_in_quote
$ending_in_quote
+ @whitespace_level_stack
+ $whitespace_last_level
- $in_format_skipping_section
$format_skipping_pattern_begin
$format_skipping_pattern_end
$tabbing_disagreement_count
$input_line_tabbing
- $last_line_type
$last_line_leading_type
$last_line_leading_level
$last_last_line_leading_level
%block_leading_text
%block_opening_line_number
$csc_new_statement_ok
+ $csc_last_label
+ %csc_block_label
$accumulating_text_for_block
$leading_block_text
$rleading_block_if_elsif_text
$closing_side_comment_prefix_pattern
$closing_side_comment_list_pattern
+ $blank_lines_after_opening_block_pattern
+ $blank_lines_before_closing_block_pattern
+
$last_nonblank_token
$last_nonblank_type
$last_last_nonblank_token
%is_assignment
%is_chain_operator
%is_if_unless_and_or_last_next_redo_return
- %is_until_while_for_if_elsif_else
+ %ok_to_add_semicolon_for_block_type
@has_broken_sublist
@dont_align
$file_writer_object
$formatter_self
@ci_stack
- $last_line_had_side_comment
%want_break_before
%outdent_keyword
$static_block_comment_pattern
%opening_vertical_tightness
%closing_vertical_tightness
%closing_token_indentation
+ $some_closing_token_indentation
%opening_token_right
%stack_opening_token
$rOpts_break_at_old_comma_breakpoints
$rOpts_break_at_old_logical_breakpoints
$rOpts_break_at_old_ternary_breakpoints
+ $rOpts_break_at_old_attribute_breakpoints
$rOpts_closing_side_comment_else_flag
$rOpts_closing_side_comment_maximum_text
$rOpts_continuation_indentation
$rOpts_line_up_parentheses
$rOpts_maximum_fields_per_table
$rOpts_maximum_line_length
+ $rOpts_variable_maximum_line_length
$rOpts_short_concatenation_item_length
- $rOpts_swallow_optional_blank_lines
+ $rOpts_keep_old_blank_lines
$rOpts_ignore_old_breakpoints
$rOpts_format_skipping
$rOpts_space_function_paren
$rOpts_space_keyword_paren
-
- $half_maximum_line_length
+ $rOpts_keep_interior_semicolons
+ $rOpts_ignore_side_comment_lengths
+ $rOpts_stack_closing_block_brace
+ $rOpts_space_backslash_quote
+ $rOpts_whitespace_cycle
%is_opening_type
%is_closing_type
%is_opening_type
%is_closing_token
%is_opening_token
+
+ %weld_len_left_closing
+ %weld_len_right_closing
+ %weld_len_left_opening
+ %weld_len_right_opening
+
+ $rcuddled_block_types
+
+ $SUB_PATTERN
+ $ASUB_PATTERN
+
+ $NVARS
+
};
BEGIN {
+ # Array index names for token vars
+ my $i = 0;
+ use constant {
+ _BLOCK_TYPE_ => $i++,
+ _CI_LEVEL_ => $i++,
+ _CONTAINER_ENVIRONMENT_ => $i++,
+ _CONTAINER_TYPE_ => $i++,
+ _CUMULATIVE_LENGTH_ => $i++,
+ _LINE_INDEX_ => $i++,
+ _KNEXT_SEQ_ITEM_ => $i++,
+ _LEVEL_ => $i++,
+ _LEVEL_TRUE_ => $i++,
+ _SLEVEL_ => $i++,
+ _TOKEN_ => $i++,
+ _TYPE_ => $i++,
+ _TYPE_SEQUENCE_ => $i++,
+ };
+ $NVARS = 1 + _TYPE_SEQUENCE_;
+
# default list of block types for which -bli would apply
$bli_list_string = 'if else elsif unless while for foreach do : sub';
- @_ = qw(
+ my @q;
+
+ @q = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x=
);
- @is_digraph{@_} = (1) x scalar(@_);
+ @is_digraph{@q} = (1) x scalar(@q);
- @_ = qw( ... **= <<= >>= &&= ||= //= <=> );
- @is_trigraph{@_} = (1) x scalar(@_);
+ @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
+ @is_trigraph{@q} = (1) x scalar(@q);
- @_ = qw(
+ @q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
- @is_assignment{@_} = (1) x scalar(@_);
+ @is_assignment{@q} = (1) x scalar(@q);
- @_ = qw(
+ @q = qw(
grep
keys
map
sort
split
);
- @is_keyword_returning_list{@_} = (1) x scalar(@_);
-
- @_ = qw(is if unless and or err last next redo return);
- @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
+ @is_keyword_returning_list{@q} = (1) x scalar(@q);
- # always break after a closing curly of these block types:
- @_ = qw(until while for if elsif else);
- @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
+ @q = qw(is if unless and or err last next redo return);
+ @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
- @_ = qw(last next redo return);
- @is_last_next_redo_return{@_} = (1) x scalar(@_);
+ @q = qw(last next redo return);
+ @is_last_next_redo_return{@q} = (1) x scalar(@q);
- @_ = qw(sort map grep);
- @is_sort_map_grep{@_} = (1) x scalar(@_);
+ @q = qw(sort map grep);
+ @is_sort_map_grep{@q} = (1) x scalar(@q);
- @_ = qw(sort map grep eval);
- @is_sort_map_grep_eval{@_} = (1) x scalar(@_);
+ @q = qw(sort map grep eval);
+ @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
- @_ = qw(sort map grep eval do);
- @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
+ @q = qw(sort map grep eval do);
+ @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
- @_ = qw(if unless);
- @is_if_unless{@_} = (1) x scalar(@_);
+ @q = qw(if unless);
+ @is_if_unless{@q} = (1) x scalar(@q);
- @_ = qw(and or err);
- @is_and_or{@_} = (1) x scalar(@_);
+ @q = qw(and or err);
+ @is_and_or{@q} = (1) x scalar(@q);
# Identify certain operators which often occur in chains.
# Note: the minus (-) causes a side effect of padding of the first line in
# Checkbutton => 'Transmission checked',
# -variable => \$TRANS
# This usually improves appearance so it seems ok.
- @_ = qw(&& || and or : ? . + - * /);
- @is_chain_operator{@_} = (1) x scalar(@_);
+ @q = qw(&& || and or : ? . + - * /);
+ @is_chain_operator{@q} = (1) x scalar(@q);
# We can remove semicolons after blocks preceded by these keywords
- @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
- unless while until for foreach);
- @is_block_without_semicolon{@_} = (1) x scalar(@_);
+ @q =
+ qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+ unless while until for foreach given when default);
+ @is_block_without_semicolon{@q} = (1) x scalar(@q);
+
+ # We will allow semicolons to be added within these block types
+ # as well as sub and package blocks.
+ # NOTES:
+ # 1. Note that these keywords are omitted:
+ # switch case given when default sort map grep
+ # 2. It is also ok to add for sub and package blocks and a labeled block
+ # 3. But not okay for other perltidy types including:
+ # { } ; G t
+ # 4. Test files: blktype.t, blktype1.t, semicolon.t
+ @q =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
+ unless do while until eval for foreach );
+ @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
# 'L' is token for opening { at hash key
- @_ = qw" L { ( [ ";
- @is_opening_type{@_} = (1) x scalar(@_);
+ @q = qw" L { ( [ ";
+ @is_opening_type{@q} = (1) x scalar(@q);
# 'R' is token for closing } at hash key
- @_ = qw" R } ) ] ";
- @is_closing_type{@_} = (1) x scalar(@_);
+ @q = qw" R } ) ] ";
+ @is_closing_type{@q} = (1) x scalar(@q);
- @_ = qw" { ( [ ";
- @is_opening_token{@_} = (1) x scalar(@_);
+ @q = qw" { ( [ ";
+ @is_opening_token{@q} = (1) x scalar(@q);
- @_ = qw" } ) ] ";
- @is_closing_token{@_} = (1) x scalar(@_);
+ @q = qw" } ) ] ";
+ @is_closing_token{@q} = (1) x scalar(@q);
+
+ # Patterns for standardizing matches to block types for regular subs and
+ # anonymous subs. Examples
+ # 'sub process' is a named sub
+ # 'sub ::m' is a named sub
+ # 'sub' is an anonymous sub
+ # 'sub:' is a label, not a sub
+ # 'substr' is a keyword
+ $SUB_PATTERN = '^sub\s+(::|\w)';
+ $ASUB_PATTERN = '^sub$';
}
# whitespace codes
# methods to count instances
my $_count = 0;
- sub get_count { $_count; }
- sub _increment_count { ++$_count }
- sub _decrement_count { --$_count }
+ sub get_count { return $_count; }
+ sub _increment_count { return ++$_count }
+ sub _decrement_count { return --$_count }
}
sub trim {
# trim leading and trailing whitespace from a string
- $_[0] =~ s/\s+$//;
- $_[0] =~ s/^\s+//;
- return $_[0];
+ my $str = shift;
+ $str =~ s/\s+$//;
+ $str =~ s/^\s+//;
+ return $str;
+}
+
+sub max {
+ my @vals = @_;
+ my $max = shift @vals;
+ foreach my $val (@vals) {
+ $max = ( $max < $val ) ? $val : $max;
+ }
+ return $max;
+}
+
+sub min {
+ my @vals = @_;
+ my $min = shift @vals;
+ foreach my $val (@vals) {
+ $min = ( $min > $val ) ? $val : $min;
+ }
+ return $min;
}
sub split_words {
return split( /\s+/, $str );
}
+sub check_keys {
+ my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
+
+ # Check the keys of a hash:
+ # $rtest = ref to hash to test
+ # $rexpected = ref to has with valid keys
+
+ # $msg = a message to write in case of error
+ # $exact_match defines the type of check:
+ # = false: test hash must not have unknown key
+ # = true: test hash must have exactly same keys as known hash
+ my @unknown_keys =
+ grep { !exists $rvalid->{$_} } keys %{$rtest};
+ my @missing_keys =
+ grep { !exists $rtest->{$_} } keys %{$rvalid};
+ my $error = @unknown_keys;
+ if ($exact_match) { $error ||= @missing_keys }
+ if ($error) {
+ local $" = ')(';
+ my @expected_keys = sort keys %{$rvalid};
+ @unknown_keys = sort @unknown_keys;
+ Perl::Tidy::Die <<EOM;
+------------------------------------------------------------------------
+Program error detected checking hash keys
+Message is: '$msg'
+Expected keys: (@expected_keys)
+Unknown key(s): (@unknown_keys)
+Missing key(s): (@missing_keys)
+------------------------------------------------------------------------
+EOM
+ }
+}
+
# interface to Perl::Tidy::Logger routines
sub warning {
- if ($logger_object) {
- $logger_object->warning(@_);
- }
+ my ($msg) = @_;
+ if ($logger_object) { $logger_object->warning($msg); }
+ return;
}
sub complain {
+ my ($msg) = @_;
if ($logger_object) {
- $logger_object->complain(@_);
+ $logger_object->complain($msg);
}
+ return;
}
sub write_logfile_entry {
+ my @msg = @_;
if ($logger_object) {
- $logger_object->write_logfile_entry(@_);
+ $logger_object->write_logfile_entry(@msg);
}
+ return;
}
sub black_box {
- if ($logger_object) {
- $logger_object->black_box(@_);
- }
+ my @msg = @_;
+ if ($logger_object) { $logger_object->black_box(@msg); }
+ return;
}
sub report_definite_bug {
if ($logger_object) {
$logger_object->report_definite_bug();
}
+ return;
}
sub get_saw_brace_error {
if ($logger_object) {
$logger_object->get_saw_brace_error();
}
+ return;
}
sub we_are_at_the_last_line {
if ($logger_object) {
$logger_object->we_are_at_the_last_line();
}
+ return;
}
# interface to Perl::Tidy::Diagnostics routine
sub write_diagnostics {
-
- if ($diagnostics_object) {
- $diagnostics_object->write_diagnostics(@_);
- }
+ my $msg = shift;
+ if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
+ return;
}
sub get_added_semicolon_count {
}
sub DESTROY {
- $_[0]->_decrement_count();
+ my $self = shift;
+ $self->_decrement_count();
+ return;
+}
+
+sub get_output_line_number {
+ return $vertical_aligner_object->get_output_line_number();
}
sub new {
# initialize the leading whitespace stack to negative levels
# so that we can never run off the end of the stack
+ $peak_batch_size = 0; # flag to determine if we have output code
$gnu_position_predictor = 0; # where the current token is predicted to be
$max_gnu_stack_index = 0;
$max_gnu_item_index = -1;
$gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
- @gnu_item_list = ();
- $last_output_indentation = 0;
- $last_indentation_written = 0;
- $last_unadjusted_indentation = 0;
- $last_leading_token = "";
+ @gnu_item_list = ();
+ $last_output_indentation = 0;
+ $last_indentation_written = 0;
+ $last_unadjusted_indentation = 0;
+ $last_leading_token = "";
+ $last_output_short_opening_token = 0;
$saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
$saw_END_or_DATA_ = 0;
@container_environment_to_go = ();
@bond_strength_to_go = ();
@forced_breakpoint_to_go = ();
- @lengths_to_go = (); # line length to start of ith token
+ @summed_lengths_to_go = (); # line length to start of ith token
+ @token_lengths_to_go = ();
@levels_to_go = ();
@matching_token_to_go = ();
@mate_index_to_go = ();
- @nesting_blocks_to_go = ();
@ci_levels_to_go = ();
@nesting_depth_to_go = (0);
@nobreak_to_go = ();
@old_breakpoint_to_go = ();
@tokens_to_go = ();
+ @rtoken_vars_to_go = ();
+ @K_to_go = ();
@types_to_go = ();
@leading_spaces_to_go = ();
@reduced_spaces_to_go = ();
+ @inext_to_go = ();
+ @iprev_to_go = ();
+
+ @whitespace_level_stack = ();
+ $whitespace_last_level = -1;
@dont_align = ();
@has_broken_sublist = ();
$in_tabbing_disagreement = 0;
$input_line_tabbing = undef;
- $last_line_type = "";
$last_last_line_leading_level = 0;
$last_line_leading_level = 0;
$last_line_leading_type = '#';
$added_semicolon_count = 0;
$first_added_semicolon_at = 0;
$last_added_semicolon_at = 0;
- $last_line_had_side_comment = 0;
$is_static_block_comment = 0;
%postponed_breakpoint = ();
%block_leading_text = ();
%block_opening_line_number = ();
$csc_new_statement_ok = 1;
+ %csc_block_label = ();
- %saved_opening_indentation = ();
- $in_format_skipping_section = 0;
+ %saved_opening_indentation = ();
reset_block_text_accumulator();
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
- # This was the start of a formatter referent, but object-oriented
- # coding has turned out to be too slow here.
- $formatter_self = {};
+ # This hash holds the main data structures for formatting
+ # All hash keys must be defined here.
+ $formatter_self = {
+ rlines => [], # = ref to array of lines of the file
+ rLL => [], # = ref to array with all tokens
+ # in the file. LL originally meant
+ # 'Linked List'. Linked lists were a
+ # bad idea but LL is easy to type.
+ Klimit => undef, # = maximum K index for rLL. This is
+ # needed to catch any autovivification
+ # problems.
+ rnested_pairs => [], # for welding decisions
+ K_opening_container => {}, # for quickly traversing structure
+ K_closing_container => {}, # for quickly traversing structure
+ K_opening_ternary => {}, # for quickly traversing structure
+ K_closing_ternary => {}, # for quickly traversing structure
+ rK_phantom_semicolons =>
+ undef, # for undoing phantom semicolons if iterating
+ rpaired_to_inner_container => {},
+ rbreak_container => {}, # prevent one-line blocks
+ rvalid_self_keys => [], # for checking
+ };
+ my @valid_keys = keys %{$formatter_self};
+ $formatter_self->{rvalid_self_keys} = \@valid_keys;
bless $formatter_self, $class;
return $formatter_self;
}
+sub Fault {
+ my ($msg) = @_;
+
+ # This routine is called for errors that really should not occur
+ # except if there has been a bug introduced by a recent program change
+ my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
+ my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
+ my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
+
+ Perl::Tidy::Die(<<EOM);
+==============================================================================
+Fault detected at line $line0 of sub '$subroutine1'
+in file '$filename1'
+which was called from line $line1 of sub '$subroutine2'
+Message: '$msg'
+This is probably an error introduced by a recent programming change.
+==============================================================================
+EOM
+}
+
+sub check_self_hash {
+ my $self = shift;
+ my @valid_self_keys = @{ $self->{rvalid_self_keys} };
+ my %valid_self_hash;
+ @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
+ check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
+ return;
+}
+
+sub check_token_array {
+ my $self = shift;
+
+ # Check for errors in the array of tokens
+ # Uses package variable $NVARS
+ $self->check_self_hash();
+ my $rLL = $self->{rLL};
+ for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+ my $nvars = @{ $rLL->[$KK] };
+ if ( $nvars != $NVARS ) {
+ my $type = $rLL->[$KK]->[_TYPE_];
+ $type = '*' unless defined($type);
+ Fault(
+"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
+ );
+ }
+ foreach my $var ( _TOKEN_, _TYPE_ ) {
+ if ( !defined( $rLL->[$KK]->[$var] ) ) {
+ my $iline = $rLL->[$KK]->[_LINE_INDEX_];
+ Fault("Undefined variable $var for K=$KK, line=$iline\n");
+ }
+ }
+ return;
+ }
+}
+
+sub set_rLL_max_index {
+ my $self = shift;
+
+ # Set the limit of the rLL array, assuming that it is correct.
+ # This should only be called by routines after they make changes
+ # to tokenization
+ my $rLL = $self->{rLL};
+ if ( !defined($rLL) ) {
+
+ # Shouldn't happen because rLL was initialized to be an array ref
+ Fault("Undefined Memory rLL");
+ }
+ my $Klimit_old = $self->{Klimit};
+ my $num = @{$rLL};
+ my $Klimit;
+ if ( $num > 0 ) { $Klimit = $num - 1 }
+ $self->{Klimit} = $Klimit;
+ return ($Klimit);
+}
+
+sub get_rLL_max_index {
+ my $self = shift;
+
+ # the memory location $rLL and number of tokens should be obtained
+ # from this routine so that any autovivication can be immediately caught.
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+ if ( !defined($rLL) ) {
+
+ # Shouldn't happen because rLL was initialized to be an array ref
+ Fault("Undefined Memory rLL");
+ }
+ my $num = @{$rLL};
+ if ( $num == 0 && defined($Klimit)
+ || $num > 0 && !defined($Klimit)
+ || $num > 0 && $Klimit != $num - 1 )
+ {
+
+ # Possible autovivification problem...
+ if ( !defined($Klimit) ) { $Klimit = '*' }
+ Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
+ }
+ return ($Klimit);
+}
+
sub prepare_for_new_input_lines {
+ # Remember the largest batch size processed. This is needed
+ # by the pad routine to avoid padding the first nonblank token
+ if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
+ $peak_batch_size = $max_index_to_go;
+ }
+
$gnu_sequence_number++; # increment output batch counter
%last_gnu_equals = ();
%gnu_comma_count = ();
$forced_breakpoint_count = 0;
$forced_breakpoint_undo_count = 0;
$rbrace_follower = undef;
- $lengths_to_go[0] = 0;
- $old_line_count_in_batch = 1;
+ $summed_lengths_to_go[0] = 0;
$comma_count_in_batch = 0;
$starting_in_quote = 0;
destroy_one_line_block();
+ return;
}
-sub write_line {
+sub break_lines {
- my $self = shift;
- my ($line_of_tokens) = @_;
+ # Loop over old lines to set new line break points
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line = $line_of_tokens->{_line_text};
+ my $self = shift;
+ my $rlines = $self->{rlines};
+
+ # Flag to prevent blank lines when POD occurs in a format skipping sect.
+ my $in_format_skipping_section;
+
+ my $line_type = "";
+ foreach my $line_of_tokens ( @{$rlines} ) {
+
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ my $input_line = $line_of_tokens->{_line_text};
+
+ # _line_type codes are:
+ # SYSTEM - system-specific code before hash-bang line
+ # CODE - line of perl code (including comments)
+ # POD_START - line starting pod, such as '=head'
+ # POD - pod documentation text
+ # POD_END - last line of pod section, '=cut'
+ # HERE - text of here-document
+ # HERE_END - last line of here-doc (target word)
+ # FORMAT - format section
+ # FORMAT_END - last line of format section, '.'
+ # DATA_START - __DATA__ line
+ # DATA - unidentified text following __DATA__
+ # END_START - __END__ line
+ # END - unidentified text following __END__
+ # ERROR - we are in big trouble, probably not a perl script
+
+ # put a blank line after an =cut which comes before __END__ and __DATA__
+ # (required by podchecker)
+ if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
+ $self->want_blank_line();
+ }
+ }
- my $want_blank_line_next = 0;
+ # handle line of code..
+ if ( $line_type eq 'CODE' ) {
- # _line_type codes are:
- # SYSTEM - system-specific code before hash-bang line
- # CODE - line of perl code (including comments)
- # POD_START - line starting pod, such as '=head'
- # POD - pod documentation text
- # POD_END - last line of pod section, '=cut'
- # HERE - text of here-document
- # HERE_END - last line of here-doc (target word)
- # FORMAT - format section
- # FORMAT_END - last line of format section, '.'
- # DATA_START - __DATA__ line
- # DATA - unidentified text following __DATA__
- # END_START - __END__ line
- # END - unidentified text following __END__
- # ERROR - we are in big trouble, probably not a perl script
- #
- # handle line of code..
+ my $CODE_type = $line_of_tokens->{_code_type};
+ $in_format_skipping_section = $CODE_type eq 'FS';
+
+ # Handle blank lines
+ if ( $CODE_type eq 'BL' ) {
+
+ # If keep-old-blank-lines is zero, we delete all
+ # old blank lines and let the blank line rules generate any
+ # needed blanks.
+ if ($rOpts_keep_old_blank_lines) {
+ $self->flush();
+ $file_writer_object->write_blank_code_line(
+ $rOpts_keep_old_blank_lines == 2 );
+ $last_line_leading_type = 'b';
+ }
+ next;
+ }
+ else {
+
+ # let logger see all non-blank lines of code
+ my $output_line_number = get_output_line_number();
+ ##$vertical_aligner_object->get_output_line_number();
+ black_box( $line_of_tokens, $output_line_number );
+ }
+
+ # Handle Format Skipping (FS) and Verbatim (VB) Lines
+ if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
+ $self->write_unindented_line("$input_line");
+ $file_writer_object->reset_consecutive_blank_lines();
+ next;
+ }
+
+ # Handle all other lines of code
+ $self->print_line_of_tokens($line_of_tokens);
+ }
+
+ # handle line of non-code..
+ else {
+
+ # set special flags
+ my $skip_line = 0;
+ my $tee_line = 0;
+ if ( $line_type =~ /^POD/ ) {
+
+ # Pod docs should have a preceding blank line. But stay
+ # out of __END__ and __DATA__ sections, because
+ # the user may be using this section for any purpose whatsoever
+ if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
+ if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
+ if ( $rOpts->{'trim-pod'} ) { $input_line =~ s/\s+$// }
+ if ( !$skip_line
+ && !$in_format_skipping_section
+ && $line_type eq 'POD_START'
+ && !$saw_END_or_DATA_ )
+ {
+ $self->want_blank_line();
+ }
+ }
+
+ # leave the blank counters in a predictable state
+ # after __END__ or __DATA__
+ elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
+ $file_writer_object->reset_consecutive_blank_lines();
+ $saw_END_or_DATA_ = 1;
+ }
+
+ # write unindented non-code line
+ if ( !$skip_line ) {
+ if ($tee_line) { $file_writer_object->tee_on() }
+ $self->write_unindented_line($input_line);
+ if ($tee_line) { $file_writer_object->tee_off() }
+ }
+ }
+ }
+ return;
+}
+
+{ ## Beginning of routine to check line hashes
+
+ my %valid_line_hash;
+
+ BEGIN {
+
+ # These keys are defined for each line in the formatter
+ # Each line must have exactly these quantities
+ my @valid_line_keys = qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _rK_range
+ _square_bracket_depth
+ _starting_in_quote
+ _ended_in_blank_token
+ _code_type
+
+ _ci_level_0
+ _level_0
+ _nesting_blocks_0
+ _nesting_tokens_0
+ );
+
+ @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
+ }
+
+ sub check_line_hashes {
+ my $self = shift;
+ $self->check_self_hash();
+ my $rlines = $self->{rlines};
+ foreach my $rline ( @{$rlines} ) {
+ my $iline = $rline->{_line_number};
+ my $line_type = $rline->{_line_type};
+ check_keys( $rline, \%valid_line_hash,
+ "Checkpoint: line number =$iline, line_type=$line_type", 1 );
+ }
+ return;
+ }
+
+} ## End check line hashes
+
+sub write_line {
+
+ # We are caching tokenized lines as they arrive and converting them to the
+ # format needed for the final formatting.
+ my ( $self, $line_of_tokens_old ) = @_;
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+ my $rlines_new = $self->{rlines};
+
+ my $Kfirst;
+ my $line_of_tokens = {};
+ foreach my $key (
+ qw(
+ _curly_brace_depth
+ _ending_in_quote
+ _guessed_indentation_level
+ _line_number
+ _line_text
+ _line_type
+ _paren_depth
+ _quote_character
+ _square_bracket_depth
+ _starting_in_quote
+ )
+ )
+ {
+ $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
+ }
+
+ # Data needed by Logger
+ $line_of_tokens->{_level_0} = 0;
+ $line_of_tokens->{_ci_level_0} = 0;
+ $line_of_tokens->{_nesting_blocks_0} = "";
+ $line_of_tokens->{_nesting_tokens_0} = "";
+
+ # Needed to avoid trimming quotes
+ $line_of_tokens->{_ended_in_blank_token} = undef;
+
+ my $line_type = $line_of_tokens_old->{_line_type};
+ my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
if ( $line_type eq 'CODE' ) {
- # let logger see all non-blank lines of code
- if ( $input_line !~ /^\s*$/ ) {
- my $output_line_number =
- $vertical_aligner_object->get_output_line_number();
- black_box( $line_of_tokens, $output_line_number );
+ my $rtokens = $line_of_tokens_old->{_rtokens};
+ my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
+ my $rblock_type = $line_of_tokens_old->{_rblock_type};
+ my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
+ my $rcontainer_environment =
+ $line_of_tokens_old->{_rcontainer_environment};
+ my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
+ my $rlevels = $line_of_tokens_old->{_rlevels};
+ my $rslevels = $line_of_tokens_old->{_rslevels};
+ my $rci_levels = $line_of_tokens_old->{_rci_levels};
+ my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
+ my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
+
+ my $jmax = @{$rtokens} - 1;
+ if ( $jmax >= 0 ) {
+ $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
+ foreach my $j ( 0 .. $jmax ) {
+ my @tokary;
+ @tokary[
+ _TOKEN_, _TYPE_,
+ _BLOCK_TYPE_, _CONTAINER_TYPE_,
+ _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
+ _LEVEL_, _LEVEL_TRUE_,
+ _SLEVEL_, _CI_LEVEL_,
+ _LINE_INDEX_,
+ ]
+ = (
+ $rtokens->[$j], $rtoken_type->[$j],
+ $rblock_type->[$j], $rcontainer_type->[$j],
+ $rcontainer_environment->[$j], $rtype_sequence->[$j],
+ $rlevels->[$j], $rlevels->[$j],
+ $rslevels->[$j], $rci_levels->[$j],
+ $input_line_no,
+ );
+ ##push @token_array, \@tokary;
+ push @{$rLL}, \@tokary;
+ }
+
+ #$Klast=@{$rLL}-1;
+ $Klimit = @{$rLL} - 1;
+
+ # Need to remember if we can trim the input line
+ $line_of_tokens->{_ended_in_blank_token} =
+ $rtoken_type->[$jmax] eq 'b';
+
+ $line_of_tokens->{_level_0} = $rlevels->[0];
+ $line_of_tokens->{_ci_level_0} = $rci_levels->[0];
+ $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
+ $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
+ }
+ }
+
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ];
+ $line_of_tokens->{_code_type} = "";
+ $self->{Klimit} = $Klimit;
+
+ push @{$rlines_new}, $line_of_tokens;
+ return;
+}
+
+BEGIN {
+
+ # initialize these global hashes, which control the use of
+ # whitespace around tokens:
+ #
+ # %binary_ws_rules
+ # %want_left_space
+ # %want_right_space
+ # %space_after_keyword
+ #
+ # Many token types are identical to the tokens themselves.
+ # See the tokenizer for a complete list. Here are some special types:
+ # k = perl keyword
+ # f = semicolon in for statement
+ # m = unary minus
+ # p = unary plus
+ # Note that :: is excluded since it should be contained in an identifier
+ # Note that '->' is excluded because it never gets space
+ # parentheses and brackets are excluded since they are handled specially
+ # curly braces are included but may be overridden by logic, such as
+ # newline logic.
+
+ # NEW_TOKENS: create a whitespace rule here. This can be as
+ # simple as adding your new letter to @spaces_both_sides, for
+ # example.
+
+ my @q;
+
+ @q = qw" L { ( [ ";
+ @is_opening_type{@q} = (1) x scalar(@q);
+
+ @q = qw" R } ) ] ";
+ @is_closing_type{@q} = (1) x scalar(@q);
+
+ my @spaces_both_sides = qw"
+ + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
+ .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
+ &&= ||= //= <=> A k f w F n C Y U G v
+ ";
+
+ my @spaces_left_side = qw"
+ t ! ~ m p { \ h pp mm Z j
+ ";
+ push( @spaces_left_side, '#' ); # avoids warning message
+
+ my @spaces_right_side = qw"
+ ; } ) ] R J ++ -- **=
+ ";
+ push( @spaces_right_side, ',' ); # avoids warning message
+
+ # Note that we are in a BEGIN block here. Later in processing
+ # the values of %want_left_space and %want_right_space
+ # may be overridden by any user settings specified by the
+ # -wls and -wrs parameters. However the binary_whitespace_rules
+ # are hardwired and have priority.
+ @want_left_space{@spaces_both_sides} =
+ (1) x scalar(@spaces_both_sides);
+ @want_right_space{@spaces_both_sides} =
+ (1) x scalar(@spaces_both_sides);
+ @want_left_space{@spaces_left_side} =
+ (1) x scalar(@spaces_left_side);
+ @want_right_space{@spaces_left_side} =
+ (-1) x scalar(@spaces_left_side);
+ @want_left_space{@spaces_right_side} =
+ (-1) x scalar(@spaces_right_side);
+ @want_right_space{@spaces_right_side} =
+ (1) x scalar(@spaces_right_side);
+ $want_left_space{'->'} = WS_NO;
+ $want_right_space{'->'} = WS_NO;
+ $want_left_space{'**'} = WS_NO;
+ $want_right_space{'**'} = WS_NO;
+ $want_right_space{'CORE::'} = WS_NO;
+
+ # These binary_ws_rules are hardwired and have priority over the above
+ # settings. It would be nice to allow adjustment by the user,
+ # but it would be complicated to specify.
+ #
+ # hash type information must stay tightly bound
+ # as in : ${xxxx}
+ $binary_ws_rules{'i'}{'L'} = WS_NO;
+ $binary_ws_rules{'i'}{'{'} = WS_YES;
+ $binary_ws_rules{'k'}{'{'} = WS_YES;
+ $binary_ws_rules{'U'}{'{'} = WS_YES;
+ $binary_ws_rules{'i'}{'['} = WS_NO;
+ $binary_ws_rules{'R'}{'L'} = WS_NO;
+ $binary_ws_rules{'R'}{'{'} = WS_NO;
+ $binary_ws_rules{'t'}{'L'} = WS_NO;
+ $binary_ws_rules{'t'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'L'} = WS_NO;
+ $binary_ws_rules{'}'}{'{'} = WS_NO;
+ $binary_ws_rules{'$'}{'L'} = WS_NO;
+ $binary_ws_rules{'$'}{'{'} = WS_NO;
+ $binary_ws_rules{'@'}{'L'} = WS_NO;
+ $binary_ws_rules{'@'}{'{'} = WS_NO;
+ $binary_ws_rules{'='}{'L'} = WS_YES;
+ $binary_ws_rules{'J'}{'J'} = WS_YES;
+
+ # the following includes ') {'
+ # as in : if ( xxx ) { yyy }
+ $binary_ws_rules{']'}{'L'} = WS_NO;
+ $binary_ws_rules{']'}{'{'} = WS_NO;
+ $binary_ws_rules{')'}{'{'} = WS_YES;
+ $binary_ws_rules{')'}{'['} = WS_NO;
+ $binary_ws_rules{']'}{'['} = WS_NO;
+ $binary_ws_rules{']'}{'{'} = WS_NO;
+ $binary_ws_rules{'}'}{'['} = WS_NO;
+ $binary_ws_rules{'R'}{'['} = WS_NO;
+
+ $binary_ws_rules{']'}{'++'} = WS_NO;
+ $binary_ws_rules{']'}{'--'} = WS_NO;
+ $binary_ws_rules{')'}{'++'} = WS_NO;
+ $binary_ws_rules{')'}{'--'} = WS_NO;
+
+ $binary_ws_rules{'R'}{'++'} = WS_NO;
+ $binary_ws_rules{'R'}{'--'} = WS_NO;
+
+ $binary_ws_rules{'i'}{'Q'} = WS_YES;
+ $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
+
+ # FIXME: we could to split 'i' into variables and functions
+ # and have no space for functions but space for variables. For now,
+ # I have a special patch in the special rules below
+ $binary_ws_rules{'i'}{'('} = WS_NO;
+
+ $binary_ws_rules{'w'}{'('} = WS_NO;
+ $binary_ws_rules{'w'}{'{'} = WS_YES;
+} ## end BEGIN block
+
+sub set_whitespace_flags {
+
+ # This routine examines each pair of nonblank tokens and
+ # sets a flag indicating if white space is needed.
+ #
+ # $rwhitespace_flags->[$j] is a flag indicating whether a white space
+ # BEFORE token $j is needed, with the following values:
+ #
+ # WS_NO = -1 do not want a space before token $j
+ # WS_OPTIONAL= 0 optional space or $j is a whitespace
+ # WS_YES = 1 want a space before token $j
+ #
+
+ my $self = shift;
+ my $rLL = $self->{rLL};
+
+ my $rwhitespace_flags = [];
+
+ my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
+ $token, $type, $block_type, $input_line_no );
+ my $j_tight_closing_paren = -1;
+
+ $token = ' ';
+ $type = 'b';
+ $block_type = '';
+ $input_line_no = 0;
+ $last_token = ' ';
+ $last_type = 'b';
+ $last_block_type = '';
+ $last_input_line_no = 0;
+
+ my $jmax = @{$rLL} - 1;
+
+ my ($ws);
+
+ # This is some logic moved to a sub to avoid deep nesting of if stmts
+ my $ws_in_container = sub {
+
+ my ($j) = @_;
+ my $ws = WS_YES;
+ if ( $j + 1 > $jmax ) { return (WS_NO) }
+
+ # Patch to count '-foo' as single token so that
+ # each of $a{-foo} and $a{foo} and $a{'foo'} do
+ # not get spaces with default formatting.
+ my $j_here = $j;
+ ++$j_here
+ if ( $token eq '-'
+ && $last_token eq '{'
+ && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
+
+ # $j_next is where a closing token should be if
+ # the container has a single token
+ if ( $j_here + 1 > $jmax ) { return (WS_NO) }
+ my $j_next =
+ ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
+ ? $j_here + 2
+ : $j_here + 1;
+
+ if ( $j_next > $jmax ) { return WS_NO }
+ my $tok_next = $rLL->[$j_next]->[_TOKEN_];
+ my $type_next = $rLL->[$j_next]->[_TYPE_];
+
+ # for tightness = 1, if there is just one token
+ # within the matching pair, we will keep it tight
+ if (
+ $tok_next eq $matching_token{$last_token}
+
+ # but watch out for this: [ [ ] (misc.t)
+ && $last_token ne $token
+
+ # double diamond is usually spaced
+ && $token ne '<<>>'
+
+ )
+ {
+
+ # remember where to put the space for the closing paren
+ $j_tight_closing_paren = $j_next;
+ return (WS_NO);
+ }
+ return (WS_YES);
+ };
+
+ # main loop over all tokens to define the whitespace flags
+ for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
+
+ my $rtokh = $rLL->[$j];
+
+ # Set a default
+ $rwhitespace_flags->[$j] = WS_OPTIONAL;
+
+ if ( $rtokh->[_TYPE_] eq 'b' ) {
+ next;
+ }
+
+ # set a default value, to be changed as needed
+ $ws = undef;
+ $last_token = $token;
+ $last_type = $type;
+ $last_block_type = $block_type;
+ $last_input_line_no = $input_line_no;
+ $token = $rtokh->[_TOKEN_];
+ $type = $rtokh->[_TYPE_];
+ $block_type = $rtokh->[_BLOCK_TYPE_];
+ $input_line_no = $rtokh->[_LINE_INDEX_];
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 1:
+ # Handle space on the inside of opening braces.
+ #---------------------------------------------------------------
+
+ # /^[L\{\(\[]$/
+ if ( $is_opening_type{$last_type} ) {
+
+ $j_tight_closing_paren = -1;
+
+ # let us keep empty matched braces together: () {} []
+ # except for BLOCKS
+ if ( $token eq $matching_token{$last_token} ) {
+ if ($block_type) {
+ $ws = WS_YES;
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+ else {
+
+ # we're considering the right of an opening brace
+ # tightness = 0 means always pad inside with space
+ # tightness = 1 means pad inside if "complex"
+ # tightness = 2 means never pad inside with space
+
+ my $tightness;
+ if ( $last_type eq '{'
+ && $last_token eq '{'
+ && $last_block_type )
+ {
+ $tightness = $rOpts_block_brace_tightness;
+ }
+ else { $tightness = $tightness{$last_token} }
+
+ #=============================================================
+ # Patch for test problem fabrice_bug.pl
+ # We must always avoid spaces around a bare word beginning
+ # with ^ as in:
+ # my $before = ${^PREMATCH};
+ # Because all of the following cause an error in perl:
+ # my $before = ${ ^PREMATCH };
+ # my $before = ${ ^PREMATCH};
+ # my $before = ${^PREMATCH };
+ # So if brace tightness flag is -bt=0 we must temporarily reset
+ # to bt=1. Note that here we must set tightness=1 and not 2 so
+ # that the closing space
+ # is also avoided (via the $j_tight_closing_paren flag in coding)
+ if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
+
+ #=============================================================
+
+ if ( $tightness <= 0 ) {
+ $ws = WS_YES;
+ }
+ elsif ( $tightness > 1 ) {
+ $ws = WS_NO;
+ }
+ else {
+ $ws = $ws_in_container->($j);
+ }
+ }
+ } # end setting space flag inside opening tokens
+ my $ws_1;
+ $ws_1 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 2:
+ # Handle space on inside of closing brace pairs.
+ #---------------------------------------------------------------
+
+ # /[\}\)\]R]/
+ if ( $is_closing_type{$type} ) {
+
+ if ( $j == $j_tight_closing_paren ) {
+
+ $j_tight_closing_paren = -1;
+ $ws = WS_NO;
+ }
+ else {
+
+ if ( !defined($ws) ) {
+
+ my $tightness;
+ if ( $type eq '}' && $token eq '}' && $block_type ) {
+ $tightness = $rOpts_block_brace_tightness;
+ }
+ else { $tightness = $tightness{$token} }
+
+ $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
+ }
+ }
+ } # end setting space flag inside closing tokens
+
+ my $ws_2;
+ $ws_2 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 3:
+ # Use the binary rule table.
+ #---------------------------------------------------------------
+ if ( !defined($ws) ) {
+ $ws = $binary_ws_rules{$last_type}{$type};
+ }
+ my $ws_3;
+ $ws_3 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 4:
+ # Handle some special cases.
+ #---------------------------------------------------------------
+ if ( $token eq '(' ) {
+
+ # This will have to be tweaked as tokenization changes.
+ # We usually want a space at '} (', for example:
+ # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
+ #
+ # But not others:
+ # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
+ # At present, the above & block is marked as type L/R so this case
+ # won't go through here.
+ if ( $last_type eq '}' ) { $ws = WS_YES }
+
+ # NOTE: some older versions of Perl had occasional problems if
+ # spaces are introduced between keywords or functions and opening
+ # parens. So the default is not to do this except is certain
+ # cases. The current Perl seems to tolerate spaces.
+
+ # Space between keyword and '('
+ elsif ( $last_type eq 'k' ) {
+ $ws = WS_NO
+ unless ( $rOpts_space_keyword_paren
+ || $space_after_keyword{$last_token} );
+ }
+
+ # Space between function and '('
+ # -----------------------------------------------------
+ # 'w' and 'i' checks for something like:
+ # myfun( &myfun( ->myfun(
+ # -----------------------------------------------------
+ elsif (( $last_type =~ /^[wUG]$/ )
+ || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
+ {
+ $ws = WS_NO unless ($rOpts_space_function_paren);
+ }
+
+ # space between something like $i and ( in
+ # for $i ( 0 .. 20 ) {
+ # FIXME: eventually, type 'i' needs to be split into multiple
+ # token types so this can be a hardwired rule.
+ elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
+ $ws = WS_YES;
+ }
+
+ # allow constant function followed by '()' to retain no space
+ elsif ($last_type eq 'C'
+ && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
+ {
+ $ws = WS_NO;
+ }
+ }
+
+ # patch for SWITCH/CASE: make space at ']{' optional
+ # since the '{' might begin a case or when block
+ elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
+ $ws = WS_OPTIONAL;
+ }
+
+ # keep space between 'sub' and '{' for anonymous sub definition
+ if ( $type eq '{' ) {
+ if ( $last_token eq 'sub' ) {
+ $ws = WS_YES;
+ }
+
+ # this is needed to avoid no space in '){'
+ if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+
+ # avoid any space before the brace or bracket in something like
+ # @opts{'a','b',...}
+ if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
+ $ws = WS_NO;
+ }
+ }
+
+ elsif ( $type eq 'i' ) {
+
+ # never a space before ->
+ if ( $token =~ /^\-\>/ ) {
+ $ws = WS_NO;
+ }
+ }
+
+ # retain any space between '-' and bare word
+ elsif ( $type eq 'w' || $type eq 'C' ) {
+ $ws = WS_OPTIONAL if $last_type eq '-';
+
+ # never a space before ->
+ if ( $token =~ /^\-\>/ ) {
+ $ws = WS_NO;
+ }
+ }
+
+ # retain any space between '-' and bare word
+ # example: avoid space between 'USER' and '-' here:
+ # $myhash{USER-NAME}='steve';
+ elsif ( $type eq 'm' || $type eq '-' ) {
+ $ws = WS_OPTIONAL if ( $last_type eq 'w' );
+ }
+
+ # always space before side comment
+ elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+
+ # always preserver whatever space was used after a possible
+ # filehandle (except _) or here doc operator
+ if (
+ $type ne '#'
+ && ( ( $last_type eq 'Z' && $last_token ne '_' )
+ || $last_type eq 'h' )
+ )
+ {
+ $ws = WS_OPTIONAL;
+ }
+
+ # space_backslash_quote; RT #123774
+ # allow a space between a backslash and single or double quote
+ # to avoid fooling html formatters
+ elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
+ if ($rOpts_space_backslash_quote) {
+ if ( $rOpts_space_backslash_quote == 1 ) {
+ $ws = WS_OPTIONAL;
+ }
+ elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
+ else { } # shouldnt happen
+ }
+ else {
+ $ws = WS_NO;
+ }
+ }
+
+ my $ws_4;
+ $ws_4 = $ws
+ if FORMATTER_DEBUG_FLAG_WHITE;
+
+ #---------------------------------------------------------------
+ # Whitespace Rules Section 5:
+ # Apply default rules not covered above.
+ #---------------------------------------------------------------
+
+ # If we fall through to here, look at the pre-defined hash tables for
+ # the two tokens, and:
+ # if (they are equal) use the common value
+ # if (either is zero or undef) use the other
+ # if (either is -1) use it
+ # That is,
+ # left vs right
+ # 1 vs 1 --> 1
+ # 0 vs 0 --> 0
+ # -1 vs -1 --> -1
+ #
+ # 0 vs -1 --> -1
+ # 0 vs 1 --> 1
+ # 1 vs 0 --> 1
+ # -1 vs 0 --> -1
+ #
+ # -1 vs 1 --> -1
+ # 1 vs -1 --> -1
+ if ( !defined($ws) ) {
+ my $wl = $want_left_space{$type};
+ my $wr = $want_right_space{$last_type};
+ if ( !defined($wl) ) { $wl = 0 }
+ if ( !defined($wr) ) { $wr = 0 }
+ $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
+ }
+
+ if ( !defined($ws) ) {
+ $ws = 0;
+ write_diagnostics(
+ "WS flag is undefined for tokens $last_token $token\n");
+ }
+
+ # Treat newline as a whitespace. Otherwise, we might combine
+ # 'Send' and '-recipients' here according to the above rules:
+ # my $msg = new Fax::Send
+ # -recipients => $to,
+ # -data => $data;
+ if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
+
+ if ( ( $ws == 0 )
+ && $j > 0
+ && $j < $jmax
+ && ( $last_type !~ /^[Zh]$/ ) )
+ {
+
+ # If this happens, we have a non-fatal but undesirable
+ # hole in the above rules which should be patched.
+ write_diagnostics(
+ "WS flag is zero for tokens $last_token $token\n");
+ }
+
+ $rwhitespace_flags->[$j] = $ws;
+
+ FORMATTER_DEBUG_FLAG_WHITE && do {
+ my $str = substr( $last_token, 0, 15 );
+ $str .= ' ' x ( 16 - length($str) );
+ if ( !defined($ws_1) ) { $ws_1 = "*" }
+ if ( !defined($ws_2) ) { $ws_2 = "*" }
+ if ( !defined($ws_3) ) { $ws_3 = "*" }
+ if ( !defined($ws_4) ) { $ws_4 = "*" }
+ print STDOUT
+"NEW WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
+ };
+ } ## end main loop
+
+ if ( $rOpts->{'tight-secret-operators'} ) {
+ new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
+ }
+ return $rwhitespace_flags;
+} ## end sub set_whitespace_flags
+
+sub respace_tokens {
+
+ my $self = shift;
+ return if $rOpts->{'indent-only'};
+
+ # This routine makes all necessary changes to the tokenization after the
+ # file has been read. This consists mostly of inserting and deleting spaces
+ # according to the selected parameters. In a few cases non-space characters
+ # are added, deleted or modified.
+
+ # The old tokens are copied one-by-one, with changes, from the old
+ # linear storage array to a new array.
+
+ my $rLL = $self->{rLL};
+ my $Klimit_old = $self->{Klimit};
+ my $rlines = $self->{rlines};
+ my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
+
+ my $rLL_new = []; # This is the new array
+ my $KK = 0;
+ my $rtoken_vars;
+ my $Kmax = @{$rLL} - 1;
+
+ # Set the whitespace flags, which indicate the token spacing preference.
+ my $rwhitespace_flags = $self->set_whitespace_flags();
+
+ # we will be setting token lengths as we go
+ my $cumulative_length = 0;
+
+ # We also define these hash indexes giving container token array indexes
+ # as a function of the container sequence numbers. For example,
+ my $K_opening_container = {}; # opening [ { or (
+ my $K_closing_container = {}; # closing ] } or )
+ my $K_opening_ternary = {}; # opening ? of ternary
+ my $K_closing_ternary = {}; # closing : of ternary
+
+ # List of new K indexes of phantom semicolons
+ # This will be needed if we want to undo them for iterations
+ my $rK_phantom_semicolons = [];
+
+ # Temporary hashes for adding semicolons
+ ##my $rKfirst_new = {};
+
+ # a sub to link preceding nodes forward to a new node type
+ my $link_back = sub {
+ my ( $Ktop, $key ) = @_;
+
+ my $Kprev = $Ktop - 1;
+ while ( $Kprev >= 0
+ && !defined( $rLL_new->[$Kprev]->[$key] ) )
+ {
+ $rLL_new->[$Kprev]->[$key] = $Ktop;
+ $Kprev -= 1;
+ }
+ };
+
+ # A sub to store each token in the new array
+ # All new tokens must be stored by this sub so that it can update
+ # all data structures on the fly.
+ my $last_nonblank_type = ';';
+ my $store_token = sub {
+ my ($item) = @_;
+
+ # This will be the index of this item in the new array
+ my $KK_new = @{$rLL_new};
+
+ # check for a sequenced item (i.e., container or ?/:)
+ my $type_sequence = $item->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+
+ $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
+
+ my $token = $item->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
+
+ $K_opening_container->{$type_sequence} = $KK_new;
+ }
+ elsif ( $is_closing_token{$token} ) {
+
+ $K_closing_container->{$type_sequence} = $KK_new;
+ }
+
+ # These are not yet used but could be useful
+ else {
+ if ( $token eq '?' ) {
+ $K_opening_ternary->{$type_sequence} = $KK;
+ }
+ elsif ( $token eq ':' ) {
+ $K_closing_ternary->{$type_sequence} = $KK;
+ }
+ else {
+ # shouldn't happen
+ print STDERR "Ugh: shouldn't happen\n";
+ }
+ }
+ }
+
+ # Save the length sum to just BEFORE this token
+ $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+
+ # now set the length of this token
+ my $token_length = length( $item->[_TOKEN_] );
+
+ # and update the cumulative length
+ $cumulative_length += $token_length;
+
+ my $type = $item->[_TYPE_];
+ if ( $type ne 'b' ) { $last_nonblank_type = $type }
+
+ # and finally, add this item to the new array
+ push @{$rLL_new}, $item;
+ };
+
+ my $add_phantom_semicolon = sub {
+
+ my ($KK) = @_;
+
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+
+ # we are only adding semicolons for certain block types
+ my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
+ return
+ unless ( $ok_to_add_semicolon_for_block_type{$block_type}
+ || $block_type =~ /^(sub|package)/
+ || $block_type =~ /^\w+\:$/ );
+
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+
+ # Do not add a semicolon if...
+ return
+ if (
+
+ # it would follow a comment (and be isolated)
+ $previous_nonblank_type eq '#'
+
+ # it follows a code block ( because they are not always wanted
+ # there and may add clutter)
+ || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
+
+ # it would follow a label
+ || $previous_nonblank_type eq 'J'
+
+ # it would be inside a 'format' statement (and cause syntax error)
+ || ( $previous_nonblank_type eq 'k'
+ && $previous_nonblank_token =~ /format/ )
+
+ # if it would prevent welding two containers
+ || $rpaired_to_inner_container->{$type_sequence}
+
+ );
+
+ # We will insert an empty semicolon here as a placeholder.
+ # Later, if it becomes the last token on a line, we will bring it to life.
+ # The advantage of doing this is that (1) we just have to check line endings,
+ # and (2) the phantom semicolon has zero width and therefore won't cause
+ # needless breaks of one-line blocks.
+ my $Ktop = -1;
+ if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
+ && $want_left_space{';'} == WS_NO )
+ {
+
+ # convert the blank into a semicolon..
+ # be careful: we are working on the new stack top
+ # on a token which has been stored.
+ my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
+
+ # Convert the existing blank to a semicolon
+ $rLL_new->[$Ktop]->[_TOKEN_] = ''; # zero length
+ $rLL_new->[$Ktop]->[_TYPE_] = ';';
+ $rLL_new->[$Ktop]->[_SLEVEL_] =
+ $rLL->[$KK]->[_SLEVEL_];
+
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+
+ # Then store a new blank
+ $store_token->($rcopy);
+ }
+ else {
+
+ # insert a new token
+ my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
+ $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
+ $store_token->($rcopy);
+ push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
+ }
+ };
+
+ my $check_Q = sub {
+
+ # Check that a quote looks okay
+ # This works but needs to by sync'd with the log file output
+ my ( $KK, $Kfirst ) = @_;
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ note_embedded_tab() if ( $token =~ "\t" );
+
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ return unless ( defined($Kp) );
+ my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
+ my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
+
+ my $previous_nonblank_type_2 = 'b';
+ my $previous_nonblank_token_2 = "";
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
+ $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
+ }
+
+ my $Kn = $self->K_next_nonblank($KK);
+ my $next_nonblank_token = "";
+ if ( defined($Kn) ) {
+ $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
+ }
+
+ my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
+ my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
+
+ # make note of something like '$var = s/xxx/yyy/;'
+ # in case it should have been '$var =~ s/xxx/yyy/;'
+ if (
+ $token =~ /^(s|tr|y|m|\/)/
+ && $previous_nonblank_token =~ /^(=|==|!=)$/
+
+ # preceded by simple scalar
+ && $previous_nonblank_type_2 eq 'i'
+ && $previous_nonblank_token_2 =~ /^\$/
+
+ # followed by some kind of termination
+ # (but give complaint if we can not see far enough ahead)
+ && $next_nonblank_token =~ /^[; \)\}]$/
+
+ # scalar is not declared
+ && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
+ )
+ {
+ my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
+ complain(
+"Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
+ );
+ }
+ };
+
+ # Main loop over all lines of the file
+ my $last_K_out;
+ my $CODE_type = "";
+ my $line_type = "";
+ foreach my $line_of_tokens ( @{$rlines} ) {
+
+ $input_line_number = $line_of_tokens->{_line_number};
+ my $last_line_type = $line_type;
+ $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $last_CODE_type = $CODE_type;
+ $CODE_type = $line_of_tokens->{_code_type};
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ next unless defined($Kfirst);
+
+ # Check for correct sequence of token indexes...
+ # An error here means that sub write_line() did not correctly
+ # package the tokenized lines as it received them.
+ if ( defined($last_K_out) ) {
+ if ( $Kfirst != $last_K_out + 1 ) {
+ Fault(
+ "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
+ );
+ }
+ }
+ else {
+ if ( $Kfirst != 0 ) {
+ Fault("Program Bug: first K is $Kfirst but should be 0");
+ }
+ }
+ $last_K_out = $Klast;
+
+ # Handle special lines of code
+ if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
+
+ # CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'FS' = Format Skipping - line goes out verbatim, no blanks
+ # 'IO' = Indent Only - only indentation may be changed
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'DEL'=Delete this line
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
+
+ # For a hanging side comment we insert an empty quote before
+ # the comment so that it becomes a normal side comment and
+ # will be aligned by the vertical aligner
+ if ( $CODE_type eq 'HSC' ) {
+
+ # Safety Check: This must be a line with one token (a comment)
+ my $rtoken_vars = $rLL->[$Kfirst];
+ if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
+
+ # Note that even if the flag 'noadd-whitespace' is set, we will
+ # make an exception here and allow a blank to be inserted to push the comment
+ # to the right. We can think of this as an adjustment of indentation
+ # rather than whitespace between tokens. This will also prevent the hanging
+ # side comment from getting converted to a block comment if whitespace
+ # gets deleted, as for example with the -extrude and -mangle options.
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
+ $store_token->($rcopy);
+ $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ $store_token->($rtoken_vars);
+ next;
+ }
+ else {
+
+ # This line was mis-marked by sub scan_comment
+ Fault(
+ "Program bug. A hanging side comment has been mismarked"
+ );
+ }
+ }
+
+ # Copy tokens unchanged
+ foreach my $KK ( $Kfirst .. $Klast ) {
+ $store_token->( $rLL->[$KK] );
+ }
+ next;
+ }
+
+ # Handle normal line..
+
+ # Insert any essential whitespace between lines
+ # if last line was normal CODE
+ my $type_next = $rLL->[$Kfirst]->[_TYPE_];
+ my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
+ my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
+ if ( $last_line_type eq 'CODE'
+ && $type_next ne 'b'
+ && defined($Kp) )
+ {
+ my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL_new->[$Kp]->[_TYPE_];
+
+ my ( $token_pp, $type_pp );
+ my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
+
+ if (
+ is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ )
+ )
+ {
+
+ # Copy this first token as blank, but use previous line number
+ my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+ $store_token->($rcopy);
+ }
+ }
+
+ # loop to copy all tokens on this line, with any changes
+ my $type_sequence;
+ for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
+ $rtoken_vars = $rLL->[$KK];
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ my $last_type_sequence = $type_sequence;
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+
+ # Handle a blank space ...
+ if ( $type eq 'b' ) {
+
+ # Delete it if not wanted by whitespace rules
+ # or we are deleting all whitespace
+ # Note that whitespace flag is a flag indicating whether a
+ # white space BEFORE the token is needed
+ next if ( $KK >= $Kmax ); # skip terminal blank
+ my $Knext = $KK + 1;
+ my $ws = $rwhitespace_flags->[$Knext];
+ if ( $ws == -1
+ || $rOpts_delete_old_whitespace )
+ {
+
+ # FIXME: maybe switch to using _new
+ my $Kp = $self->K_previous_nonblank($KK);
+ next unless defined($Kp);
+ my $token_p = $rLL->[$Kp]->[_TOKEN_];
+ my $type_p = $rLL->[$Kp]->[_TYPE_];
+
+ my ( $token_pp, $type_pp );
+
+ #my $Kpp = $K_previous_nonblank->($Kp);
+ my $Kpp = $self->K_previous_nonblank($Kp);
+ if ( defined($Kpp) ) {
+ $token_pp = $rLL->[$Kpp]->[_TOKEN_];
+ $type_pp = $rLL->[$Kpp]->[_TYPE_];
+ }
+ else {
+ $token_pp = ";";
+ $type_pp = ';';
+ }
+ my $token_next = $rLL->[$Knext]->[_TOKEN_];
+ my $type_next = $rLL->[$Knext]->[_TYPE_];
+
+ my $do_not_delete = is_essential_whitespace(
+ $token_pp, $type_pp, $token_p,
+ $type_p, $token_next, $type_next,
+ );
+
+ next unless ($do_not_delete);
+ }
+
+ # make it just one character if allowed
+ if ($rOpts_add_whitespace) {
+ $rtoken_vars->[_TOKEN_] = ' ';
+ }
+ $store_token->($rtoken_vars);
+ next;
+ }
+
+ # Handle a nonblank token...
+
+ # Modify certain tokens here for whitespace
+ # The following is not yet done, but could be:
+ # sub (x x x)
+ if ( $type =~ /^[wit]$/ ) {
+
+ # Examples:
+ # change '$ var' to '$var' etc
+ # '-> new' to '->new'
+ if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
+ $token =~ s/\s*//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # Split identifiers with leading arrows, inserting blanks if
+ # necessary. It is easier and safer here than in the
+ # tokenizer. For example '->new' becomes two tokens, '->' and
+ # 'new' with a possible blank between.
+ #
+ # Note: there is a related patch in sub set_whitespace_flags
+ if ( $token =~ /^\-\>(.*)$/ && $1 ) {
+ my $token_save = $1;
+ my $type_save = $type;
+
+ # store a blank to left of arrow if necessary
+ my $Kprev = $self->K_previous_nonblank($KK);
+ if ( defined($Kprev)
+ && $rLL->[$Kprev]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace
+ && $want_left_space{'->'} == WS_YES )
+ {
+ my $rcopy =
+ copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $store_token->($rcopy);
+ }
+
+ # then store the arrow
+ my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
+ $store_token->($rcopy);
+
+ # then reset the current token to be the remainder,
+ # and reset the whitespace flag according to the arrow
+ $token = $rtoken_vars->[_TOKEN_] = $token_save;
+ $type = $rtoken_vars->[_TYPE_] = $type_save;
+ $store_token->($rtoken_vars);
+ next;
+ }
+
+ if ( $token =~ /$SUB_PATTERN/ ) {
+ $token =~ s/\s+/ /g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # trim identifiers of trailing blanks which can occur
+ # under some unusual circumstances, such as if the
+ # identifier 'witch' has trailing blanks on input here:
+ #
+ # sub
+ # witch
+ # () # prototype may be on new line ...
+ # ...
+ if ( $type eq 'i' ) {
+ $token =~ s/\s+$//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+
+ # change 'LABEL :' to 'LABEL:'
+ elsif ( $type eq 'J' ) {
+ $token =~ s/\s+//g;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+
+ # patch to add space to something like "x10"
+ # This avoids having to split this token in the pre-tokenizer
+ elsif ( $type eq 'n' ) {
+ if ( $token =~ /^x\d+/ ) {
+ $token =~ s/x/x /;
+ $rtoken_vars->[_TOKEN_] = $token;
+ }
+ }
+
+ # check a quote for problems
+ elsif ( $type eq 'Q' ) {
+
+ # This is ready to go but is commented out because there is
+ # still identical logic in sub break_lines.
+ # $check_Q->($KK, $Kfirst);
+ }
+
+ # trim blanks from right of qw quotes
+ # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
+ elsif ( $type eq 'q' ) {
+ $token =~ s/\s*$//;
+ $rtoken_vars->[_TOKEN_] = $token;
+ note_embedded_tab() if ( $token =~ "\t" );
+ }
+
+ elsif ($type_sequence) {
+
+ # if ( $is_opening_token{$token} ) {
+ # }
+
+ if ( $is_closing_token{$token} ) {
+
+ # Insert a tentative missing semicolon if the next token is
+ # a closing block brace
+ if (
+ $type eq '}'
+ && $token eq '}'
+
+ # not preceded by a ';'
+ && $last_nonblank_type ne ';'
+
+ # and this is not a VERSION stmt (is all one line, we are not
+ # inserting semicolons on one-line blocks)
+ && $CODE_type ne 'VER'
+
+ # and we are allowed to add semicolons
+ && $rOpts->{'add-semicolons'}
+ )
+ {
+ $add_phantom_semicolon->($KK);
+ }
+ }
+ }
+
+ # Insert any needed whitespace
+ if ( @{$rLL_new}
+ && $rLL_new->[-1]->[_TYPE_] ne 'b'
+ && $rOpts_add_whitespace )
+ {
+ my $ws = $rwhitespace_flags->[$KK];
+ if ( $ws == 1 ) {
+
+ my $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
+ $rcopy->[_LINE_INDEX_] =
+ $rLL_new->[-1]->[_LINE_INDEX_];
+ $store_token->($rcopy);
+ }
+ }
+ $store_token->($rtoken_vars);
+ } # End token loop
+ } # End line loop
+
+ # Reset memory to be the new array
+ $self->{rLL} = $rLL_new;
+ $self->set_rLL_max_index();
+ $self->{K_opening_container} = $K_opening_container;
+ $self->{K_closing_container} = $K_closing_container;
+ $self->{K_opening_ternary} = $K_opening_ternary;
+ $self->{K_closing_ternary} = $K_closing_ternary;
+ $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
+
+ # make sure the new array looks okay
+ $self->check_token_array();
+
+ # reset the token limits of each line
+ $self->resync_lines_and_tokens();
+
+ return;
+}
+
+{ # scan_comments
+
+ my $Last_line_had_side_comment;
+ my $In_format_skipping_section;
+ my $Saw_VERSION_in_this_file;
+
+ sub scan_comments {
+ my $self = shift;
+ my $rlines = $self->{rlines};
+
+ $Last_line_had_side_comment = undef;
+ $In_format_skipping_section = undef;
+ $Saw_VERSION_in_this_file = undef;
+
+ # Loop over all lines
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ my $line_type = $line_of_tokens->{_line_type};
+ next unless ( $line_type eq 'CODE' );
+ my $CODE_type = $self->get_CODE_type($line_of_tokens);
+ $line_of_tokens->{_code_type} = $CODE_type;
+ }
+ return;
+ }
+
+ sub get_CODE_type {
+ my ( $self, $line_of_tokens ) = @_;
+
+ # We are looking at a line of code and setting a flag to
+ # describe any special processing that it requires
+
+ # Possible CODE_types are as follows.
+ # 'BL' = Blank Line
+ # 'VB' = Verbatim - line goes out verbatim
+ # 'IO' = Indent Only - line goes out unchanged except for indentation
+ # 'NIN' = No Internal Newlines - line does not get broken
+ # 'HSC'=Hanging Side Comment - fix this hanging side comment
+ # 'BC'=Block Comment - an ordinary full line comment
+ # 'SBC'=Static Block Comment - a block comment which does not get
+ # indented
+ # 'SBCX'=Static Block Comment Without Leading Space
+ # 'DEL'=Delete this line
+ # 'VER'=VERSION statement
+ # '' or (undefined) - no restructions
+
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+
+ my $CODE_type = $rOpts->{'indent-only'} ? 'IO' : "";
+ my $no_internal_newlines = 1 - $rOpts_add_newlines;
+ if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
+
+ # extract what we need for this line..
+
+ # Global value for error messages:
+ $input_line_number = $line_of_tokens->{_line_number};
+
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ my $jmax = -1;
+ if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
+ my $input_line = $line_of_tokens->{_line_text};
+ my $in_continued_quote = my $starting_in_quote =
+ $line_of_tokens->{_starting_in_quote};
+ my $in_quote = $line_of_tokens->{_ending_in_quote};
+ my $ending_in_quote = $in_quote;
+ my $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+
+ my $is_static_block_comment = 0;
+
+ # Handle a continued quote..
+ if ($in_continued_quote) {
+
+ # A line which is entirely a quote or pattern must go out
+ # verbatim. Note: the \n is contained in $input_line.
+ if ( $jmax <= 0 ) {
+ if ( ( $input_line =~ "\t" ) ) {
+ note_embedded_tab();
+ }
+ $Last_line_had_side_comment = 0;
+ return 'VB';
+ }
+ }
+
+ my $is_block_comment =
+ ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
+
+ # Write line verbatim if we are in a formatting skip section
+ if ($In_format_skipping_section) {
+ $Last_line_had_side_comment = 0;
+
+ # Note: extra space appended to comment simplifies pattern matching
+ if ( $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_end/o )
+ {
+ $In_format_skipping_section = 0;
+ write_logfile_entry("Exiting formatting skip section\n");
+ }
+ return 'FS';
+ }
+
+ # See if we are entering a formatting skip section
+ if ( $rOpts_format_skipping
+ && $is_block_comment
+ && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
+ /$format_skipping_pattern_begin/o )
+ {
+ $In_format_skipping_section = 1;
+ write_logfile_entry("Entering formatting skip section\n");
+ $Last_line_had_side_comment = 0;
+ return 'FS';
+ }
+
+ # ignore trailing blank tokens (they will get deleted later)
+ if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
+ $jmax--;
+ }
+
+ # Handle a blank line..
+ if ( $jmax < 0 ) {
+ $Last_line_had_side_comment = 0;
+ return 'BL';
+ }
+
+ # see if this is a static block comment (starts with ## by default)
+ my $is_static_block_comment_without_leading_space = 0;
+ if ( $is_block_comment
+ && $rOpts->{'static-block-comments'}
+ && $input_line =~ /$static_block_comment_pattern/o )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space =
+ substr( $input_line, 0, 1 ) eq '#';
+ }
+
+ # Check for comments which are line directives
+ # Treat exactly as static block comments without leading space
+ # reference: perlsyn, near end, section Plain Old Comments (Not!)
+ # example: '# line 42 "new_filename.plx"'
+ if (
+ $is_block_comment
+ && $input_line =~ /^\# \s*
+ line \s+ (\d+) \s*
+ (?:\s("?)([^"]+)\2)? \s*
+ $/x
+ )
+ {
+ $is_static_block_comment = 1;
+ $is_static_block_comment_without_leading_space = 1;
+ }
+
+ # look for hanging side comment
+ if (
+ $is_block_comment
+ && $Last_line_had_side_comment # last line had side comment
+ && $input_line =~ /^\s/ # there is some leading space
+ && !$is_static_block_comment # do not make static comment hanging
+ && $rOpts->{'hanging-side-comments'} # user is allowing
+ # hanging side comments
+ # like this
+ )
+ {
+ $Last_line_had_side_comment = 1;
+ return 'HSC';
+ }
+
+ # remember if this line has a side comment
+ $Last_line_had_side_comment =
+ ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
+
+ # Handle a block (full-line) comment..
+ if ($is_block_comment) {
+
+ if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
+
+ # TRIM COMMENTS -- This could be turned off as a option
+ $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//; # trim right end
+
+ if ($is_static_block_comment_without_leading_space) {
+ return 'SBCX';
+ }
+ elsif ($is_static_block_comment) {
+ return 'SBC';
+ }
+ else {
+ return 'BC';
+ }
+ }
+
+=pod
+ # NOTE: This does not work yet. Version in print-line-of-tokens
+ # is Still used until fixed
+
+ # compare input/output indentation except for continuation lines
+ # (because they have an unknown amount of initial blank space)
+ # and lines which are quotes (because they may have been outdented)
+ # Note: this test is placed here because we know the continuation flag
+ # at this point, which allows us to avoid non-meaningful checks.
+ my $structural_indentation_level = $rLL->[$Kfirst]->[_LEVEL_];
+ compare_indentation_levels( $guessed_indentation_level,
+ $structural_indentation_level )
+ unless ( $rLL->[$Kfirst]->[_CI_LEVEL_] > 0
+ || $guessed_indentation_level == 0
+ && $rLL->[$Kfirst]->[_TYPE_] eq 'Q' );
+=cut
+
+ # Patch needed for MakeMaker. Do not break a statement
+ # in which $VERSION may be calculated. See MakeMaker.pm;
+ # this is based on the coding in it.
+ # The first line of a file that matches this will be eval'd:
+ # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
+ # Examples:
+ # *VERSION = \'1.01';
+ # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
+ # We will pass such a line straight through without breaking
+ # it unless -npvl is used.
+
+ # Patch for problem reported in RT #81866, where files
+ # had been flattened into a single line and couldn't be
+ # tidied without -npvl. There are two parts to this patch:
+ # First, it is not done for a really long line (80 tokens for now).
+ # Second, we will only allow up to one semicolon
+ # before the VERSION. We need to allow at least one semicolon
+ # for statements like this:
+ # require Exporter; our $VERSION = $Exporter::VERSION;
+ # where both statements must be on a single line for MakeMaker
+
+ my $is_VERSION_statement = 0;
+ if ( !$Saw_VERSION_in_this_file
+ && $jmax < 80
+ && $input_line =~
+ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
+ {
+ $Saw_VERSION_in_this_file = 1;
+ write_logfile_entry("passing VERSION line; -npvl deactivates\n");
+ $CODE_type = 'VER';
+ }
+ return $CODE_type;
+ }
+}
+
+sub find_nested_pairs {
+ my $self = shift;
+
+ my $rLL = $self->{rLL};
+ return unless ( defined($rLL) && @{$rLL} );
+
+ # We define an array of pairs of nested containers
+ my @nested_pairs;
+
+ # We also set the following hash values to identify container pairs for
+ # which the opening and closing tokens are adjacent in the token stream:
+ # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
+ # $seqno_in are the seqence numbers of the outer and inner containers of
+ # the pair We need these later to decide if we can insert a missing
+ # semicolon
+ my $rpaired_to_inner_container = {};
+
+ # This local hash remembers if an outer container has a close following
+ # inner container;
+ # The key is the outer sequence number
+ # The value is the token_hash of the inner container
+
+ my %has_close_following_opening;
+
+ # Names of calling routines can either be marked as 'i' or 'w',
+ # and they may invoke a sub call with an '->'. We will consider
+ # any consecutive string of such types as a single unit when making
+ # weld decisions. We also allow a leading !
+ my $is_name_type = {
+ 'i' => 1,
+ 'w' => 1,
+ 'U' => 1,
+ '->' => 1,
+ '!' => 1,
+ };
+
+ my $is_name = sub {
+ my $type = shift;
+ return $type && $is_name_type->{$type};
+ };
+
+ my $last_container;
+ my $last_last_container;
+ my $last_nonblank_token_vars;
+ my $last_count;
+
+ my $nonblank_token_count = 0;
+
+ # loop over all tokens
+ foreach my $rtoken_vars ( @{$rLL} ) {
+
+ my $type = $rtoken_vars->[_TYPE_];
+
+ next if ( $type eq 'b' );
+
+ # long identifier-like items are counted as a single item
+ $nonblank_token_count++
+ unless ( $is_name->($type)
+ && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
+
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+
+ my $token = $rtoken_vars->[_TOKEN_];
+
+ if ( $is_opening_token{$token} ) {
+
+ # following previous opening token ...
+ if ( $last_container
+ && $is_opening_token{ $last_container->[_TOKEN_] } )
+ {
+
+ # adjacent to this one
+ my $tok_diff = $nonblank_token_count - $last_count;
+
+ my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
+
+ if ( $tok_diff == 1
+ || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
+ {
+
+ # remember this pair...
+ my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
+ my $inner_seqno = $type_sequence;
+ $has_close_following_opening{$outer_seqno} =
+ $rtoken_vars;
+ }
+ }
+ }
+
+ elsif ( $is_closing_token{$token} ) {
+
+ # if the corresponding opening token had an adjacent opening
+ if ( $has_close_following_opening{$type_sequence}
+ && $is_closing_token{ $last_container->[_TOKEN_] }
+ && $has_close_following_opening{$type_sequence}
+ ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
+ {
+
+ # The closing weld tokens must be adjacent
+ # NOTE: so intermediate commas and semicolons
+ # can currently block a weld. This is something
+ # that could be fixed in the future by including
+ # a flag to delete un-necessary commas and semicolons.
+ my $tok_diff = $nonblank_token_count - $last_count;
+
+ if ( $tok_diff == 1 ) {
+
+ # This is a closely nested pair ..
+ my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
+ my $outer_seqno = $type_sequence;
+ $rpaired_to_inner_container->{$outer_seqno} =
+ $inner_seqno;
+
+ push @nested_pairs, [ $inner_seqno, $outer_seqno ];
+ }
+ }
+ }
+
+ $last_last_container = $last_container;
+ $last_container = $rtoken_vars;
+ $last_count = $nonblank_token_count;
+ }
+ $last_nonblank_token_vars = $rtoken_vars;
+ }
+ $self->{rnested_pairs} = \@nested_pairs;
+ $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
+ return;
+}
+
+sub dump_tokens {
+
+ # a debug routine, not normally used
+ my ( $self, $msg ) = @_;
+ my $rLL = $self->{rLL};
+ my $nvars = @{$rLL};
+ print STDERR "$msg\n";
+ print STDERR "ntokens=$nvars\n";
+ print STDERR "K\t_TOKEN_\t_TYPE_\n";
+ my $K = 0;
+ foreach my $item ( @{$rLL} ) {
+ print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
+ $K++;
+ }
+}
+
+sub K_next_nonblank {
+ my ( $self, $KK, $rLL ) = @_;
+
+ # return the index K of the next nonblank token
+ return unless ( defined($KK) && $KK >= 0 );
+ $rLL = $self->{rLL} unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ my $Knnb = $KK + 1;
+ while ( $Knnb < $Num ) {
+ if ( !defined( $rLL->[$Knnb] ) ) {
+ Fault("Undefined entry for k=$Knnb");
+ }
+ if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
+ $Knnb++;
+ }
+ return;
+}
+
+sub K_previous_nonblank {
+
+ # return index of previous nonblank token before item K
+ # Call with $KK=undef to start search at the top of the array
+ my ( $self, $KK, $rLL ) = @_;
+ $rLL = $self->{rLL} unless ( defined($rLL) );
+ my $Num = @{$rLL};
+ if ( !defined($KK) ) { $KK = $Num }
+ elsif ( $KK > $Num ) {
+
+ # The caller should make the first call with KK_new=undef to
+ # avoid this error
+ Fault(
+"Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
+ );
+ }
+ my $Kpnb = $KK - 1;
+ while ( $Kpnb >= 0 ) {
+ if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
+ $Kpnb--;
+ }
+ return;
+}
+
+sub weld_containers {
+
+ # do any welding operations
+ my $self = shift;
+
+ # initialize weld length hashes needed later for checking line lengths
+ # TODO: These should eventually be stored in $self rather than be package vars
+ %weld_len_left_closing = ();
+ %weld_len_right_closing = ();
+ %weld_len_left_opening = ();
+ %weld_len_right_opening = ();
+
+ return if ( $rOpts->{'indent-only'} );
+ return unless ($rOpts_add_newlines);
+
+ $self->weld_nested_containers()
+ if $rOpts->{'weld-nested-containers'};
+
+ # Note that these two calls are order-dependent.
+ # sub weld_nested_containers() must be called before sub
+ # weld_cuddled_blocks(). This is because it is more complex and could
+ # overwrite the %weld_len_... hash values written by weld_cuddled_blocks().
+ # sub weld_cuddled_blocks(), on the other hand, is much simpler and will
+ # not overwrite the values written by weld_nested_containers. But
+ # note that weld_nested_containers() changes the _LEVEL_ values, so
+ # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
+
+ # Here is a good test case to Be sure that both cuddling and welding
+ # are working and not interfering with each other:
+
+ # perltidy -wn -cb -cbl='if-elsif-else'
+
+ # if ($BOLD_MATH) { (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # ) } else { (
+ # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
+ # $after
+ # ) }
+
+ $self->weld_cuddled_blocks()
+ if $rOpts->{'cuddled-blocks'};
+
+ return;
+}
+
+sub weld_cuddled_blocks {
+ my $self = shift;
+
+ # This routine implements the -cb flag by finding the appropriate
+ # closing and opening block braces and welding them together.
+
+ my $rLL = $self->{rLL};
+ return unless ( defined($rLL) && @{$rLL} );
+ my $rbreak_container = $self->{rbreak_container};
+
+ my $K_opening_container = $self->{K_opening_container};
+ my $K_closing_container = $self->{K_closing_container};
+
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+
+ my $is_broken_block = sub {
+
+ # a block is broken if the input line numbers of the braces differ
+ # we can only cuddle between broken blocks
+ my ($seqno) = @_;
+ my $K_opening = $K_opening_container->{$seqno};
+ return unless ( defined($K_opening) );
+ my $K_closing = $K_closing_container->{$seqno};
+ return unless ( defined($K_closing) );
+ return $rbreak_container->{$seqno}
+ || $rLL->[$K_closing]->[_LINE_INDEX_] !=
+ $rLL->[$K_opening]->[_LINE_INDEX_];
+ };
+
+ # A stack to remember open chains at all levels:
+ # $in_chain[$level] = [$chain_type, $type_sequence];
+ my @in_chain;
+ my $CBO = $rOpts->{'cuddled-break-option'};
+
+ # loop over structure items to find cuddled pairs
+ my $level = 0;
+ my $KK = 0;
+ while ( defined( $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] ) ) {
+ my $rtoken_vars = $rLL->[$KK];
+ my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ if ( !$type_sequence ) {
+ Fault("sequence = $type_sequence not defined");
+ }
+
+ # We use the original levels because they get changed by sub
+ # 'weld_nested_containers'. So if this were to be called before that
+ # routine, the levels would be wrong and things would go bad.
+ my $last_level = $level;
+ $level = $rtoken_vars->[_LEVEL_TRUE_];
+
+ if ( $level < $last_level ) { $in_chain[$last_level] = undef }
+ elsif ( $level > $last_level ) { $in_chain[$level] = undef }
+
+ # We are only looking at code blocks
+ my $token = $rtoken_vars->[_TOKEN_];
+ my $type = $rtoken_vars->[_TYPE_];
+ next unless ( $type eq $token );
+
+ if ( $token eq '{' ) {
+
+ my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ if ( !$block_type ) {
+
+ # patch for unrecognized block types which may not be labeled
+ my $Kp = $self->K_previous_nonblank($KK);
+ while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
+ $Kp = $self->K_previous_nonblank($Kp);
+ }
+ next unless $Kp;
+ $block_type = $rLL->[$Kp]->[_TOKEN_];
+ }
+ if ( $in_chain[$level] ) {
+
+ # we are in a chain and are at an opening block brace.
+ # See if we are welding this opening brace with the previous
+ # block brace. Get their identification numbers:
+ my $closing_seqno = $in_chain[$level]->[1];
+ my $opening_seqno = $type_sequence;
+
+ # The preceding block must be on multiple lines so that its
+ # closing brace will start a new line.
+ if ( !$is_broken_block->($closing_seqno) ) {
+ next unless ( $CBO == 2 );
+ $rbreak_container->{$closing_seqno} = 1;
+ }
+
+ # we will let the trailing block be either broken or intact
+ ## && $is_broken_block->($opening_seqno);
+
+ # We can weld the closing brace to its following word ..
+ my $Ko = $K_closing_container->{$closing_seqno};
+ my $Kon = $self->K_next_nonblank($Ko);
+
+ # ..unless it is a comment
+ if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
+ my $dlen =
+ $rLL->[ $Kon + 1 ]->[_CUMULATIVE_LENGTH_] -
+ $rLL->[$Ko]->[_CUMULATIVE_LENGTH_];
+ $weld_len_right_closing{$closing_seqno} = $dlen;
+
+ # Set flag that we want to break the next container
+ # so that the cuddled line is balanced.
+ $rbreak_container->{$opening_seqno} = 1
+ if ($CBO);
+ }
+
+ }
+ else {
+
+ # We are not in a chain. Start a new chain if we see the
+ # starting block type.
+ if ( $rcuddled_block_types->{$block_type} ) {
+ $in_chain[$level] = [ $block_type, $type_sequence ];
+ }
+ else {
+ $block_type = '*';
+ $in_chain[$level] = [ $block_type, $type_sequence ];
+ }
+ }
+ }
+ elsif ( $token eq '}' ) {
+ if ( $in_chain[$level] ) {
+
+ # We are in a chain at a closing brace. See if this chain
+ # continues..
+ my $Knn = $self->K_next_nonblank($KK);
+
+ # skip past comments
+ while ( $Knn && $rLL->[$Knn]->[_TYPE_] eq '#' ) {
+ $Knn = $self->K_next_nonblank($Knn);
+ }
+ next unless $Knn;
+
+ my $chain_type = $in_chain[$level]->[0];
+ my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
+ if (
+ $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
+ )
+ {
+
+ # Note that we do not weld yet because we must wait until
+ # we we are sure that an opening brace for this follows.
+ $in_chain[$level]->[1] = $type_sequence;
+ }
+ else { $in_chain[$level] = undef }
+ }
+ }
+ }
+
+ return;
+}
+
+sub weld_nested_containers {
+ my $self = shift;
+
+ # This routine implements the -wn flag by "welding together"
+ # the nested closing and opening tokens which were previously
+ # identified by sub 'find_nested_pairs'. "welding" simply
+ # involves setting certain hash values which will be checked
+ # later during formatting.
+
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->get_rLL_max_index();
+ my $rnested_pairs = $self->{rnested_pairs};
+ my $rlines = $self->{rlines};
+ my $K_opening_container = $self->{K_opening_container};
+ my $K_closing_container = $self->{K_closing_container};
+
+ # Return unless there are nested pairs to weld
+ return unless defined($rnested_pairs) && @{$rnested_pairs};
+
+ # This array will hold the sequence numbers of the tokens to be welded.
+ my @welds;
+
+ # Variables needed for estimating line lengths
+ my $starting_indent;
+ my $starting_lentot;
+
+ # A tolerance to the length for length estimates. In some rare cases
+ # this can avoid problems where a final weld slightly exceeds the
+ # line length and gets broken in a bad spot.
+ my $length_tol = 1;
+
+ my $excess_length_to = sub {
+ my ($rtoken_hash) = @_;
+
+ # Estimate the length from the line start to a given token
+ my $length = $rtoken_hash->[_CUMULATIVE_LENGTH_] - $starting_lentot;
+
+ my $excess_length =
+ $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
+ return ($excess_length);
+ };
+ my $length_to_opening_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_opening_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+ my $length_to_closing_seqno = sub {
+ my ($seqno) = @_;
+ my $KK = $K_closing_container->{$seqno};
+ my $lentot = $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
+ return $lentot;
+ };
+
+ # Abbreviations:
+ # _oo=outer opening, i.e. first of { {
+ # _io=inner opening, i.e. second of { {
+ # _oc=outer closing, i.e. second of } {
+ # _ic=inner closing, i.e. first of } }
+
+ my $previous_pair;
+
+ # We are working from outermost to innermost pairs so that
+ # level changes will be complete when we arrive at the inner pairs.
+
+ while ( my $item = pop( @{$rnested_pairs} ) ) {
+ my ( $inner_seqno, $outer_seqno ) = @{$item};
+
+ my $Kouter_opening = $K_opening_container->{$outer_seqno};
+ my $Kinner_opening = $K_opening_container->{$inner_seqno};
+ my $Kouter_closing = $K_closing_container->{$outer_seqno};
+ my $Kinner_closing = $K_closing_container->{$inner_seqno};
+
+ my $outer_opening = $rLL->[$Kouter_opening];
+ my $inner_opening = $rLL->[$Kinner_opening];
+ my $outer_closing = $rLL->[$Kouter_closing];
+ my $inner_closing = $rLL->[$Kinner_closing];
+
+ my $iline_oo = $outer_opening->[_LINE_INDEX_];
+ my $iline_io = $inner_opening->[_LINE_INDEX_];
+
+ # Set flag saying if this pair starts a new weld
+ my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
+
+ # Set flag saying if this pair is adjacent to the previous nesting pair
+ # (even if previous pair was rejected as a weld)
+ my $touch_previous_pair =
+ defined($previous_pair) && $outer_seqno == $previous_pair->[0];
+ $previous_pair = $item;
+
+ # Set a flag if we should not weld. It sometimes looks best not to weld
+ # when the opening and closing tokens are very close. However, there
+ # is a danger that we will create a "blinker", which oscillates between
+ # two semi-stable states, if we do not weld. So the rules for
+ # not welding have to be carefully defined and tested.
+ my $do_not_weld;
+ if ( !$touch_previous_pair ) {
+
+ # If this pair is not adjacent to the previous pair (skipped or
+ # not), then measure lengths from the start of line of oo
+
+ my $rK_range = $rlines->[$iline_oo]->{_rK_range};
+ my ( $Kfirst, $Klast ) = @{$rK_range};
+ $starting_lentot = $rLL->[$Kfirst]->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $rLL->[$Kfirst]->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
+
+ # DO-NOT-WELD RULE 1:
+ # Do not weld something that looks like the start of a two-line
+ # function call, like this:
+ # $trans->add_transformation(
+ # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
+ # We will look for a semicolon after the closing paren.
+
+ # We want to weld something complex, like this though
+ # my $compass = uc( opposite_direction( line_to_canvas_direction(
+ # @{ $coords[0] }, @{ $coords[1] } ) ) );
+ # Otherwise we will get a 'blinker'
+
+ my $iline_oc = $outer_closing->[_LINE_INDEX_];
+ if ( $iline_oc <= $iline_oo + 1 ) {
+
+ # Look for following semicolon...
+ my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
+ my $next_nonblank_type =
+ defined($Knext_nonblank)
+ ? $rLL->[$Knext_nonblank]->[_TYPE_]
+ : 'b';
+ if ( $next_nonblank_type eq ';' ) {
+
+ # Then do not weld if no other containers between inner
+ # opening and closing.
+ my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
+ if ( $Knext_seq_item == $Kinner_closing ) {
+ $do_not_weld ||= 1;
+ }
+ }
+ }
+ }
+
+ my $iline_ic = $inner_closing->[_LINE_INDEX_];
+
+ # DO-NOT-WELD RULE 2:
+ # Do not weld an opening paren to an inner one line brace block
+ # We will just use old line numbers for this test and require
+ # iterations if necessary for convergence
+
+ # For example, otherwise we could cause the opening paren
+ # in the following example to separate from the caller name
+ # as here:
+
+ # $_[0]->code_handler
+ # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
+
+ # Here is another example where we do not want to weld:
+ # $wrapped->add_around_modifier(
+ # sub { push @tracelog => 'around 1'; $_[0]->(); } );
+
+ # If the one line sub block gets broken due to length or by the
+ # user, then we can weld. The result will then be:
+ # $wrapped->add_around_modifier( sub {
+ # push @tracelog => 'around 1';
+ # $_[0]->();
+ # } );
+
+ if ( $iline_ic == $iline_io ) {
+
+ my $token_oo = $outer_opening->[_TOKEN_];
+ my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
+ my $token_io = $inner_opening->[_TOKEN_];
+ $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
+ }
+
+ # DO-NOT-WELD RULE 3:
+ # Do not weld if this makes our line too long
+ $do_not_weld ||= $excess_length_to->($inner_opening) > 0;
+
+ if ($do_not_weld) {
+
+ # After neglecting a pair, we start measuring from start of point io
+ $starting_lentot = $inner_opening->[_CUMULATIVE_LENGTH_];
+ $starting_indent = 0;
+ if ( !$rOpts_variable_maximum_line_length ) {
+ my $level = $inner_opening->[_LEVEL_];
+ $starting_indent = $rOpts_indent_columns * $level;
+ }
+
+ # Normally, a broken pair should not decrease indentation of
+ # intermediate tokens:
+ ## if ( $last_pair_broken ) { next }
+ # However, for long strings of welded tokens, such as '{{{{{{...'
+ # we will allow broken pairs to also remove indentation.
+ # This will keep very long strings of opening and closing
+ # braces from marching off to the right. We will do this if the
+ # number of tokens in a weld before the broken weld is 4 or more.
+ # This rule will mainly be needed for test scripts, since typical
+ # welds have fewer than about 4 welded tokens.
+ if ( !@welds || @{ $welds[-1] } < 4 ) { next }
+ }
+
+ # otherwise start new weld ...
+ elsif ($starting_new_weld) {
+ push @welds, $item;
+ }
+
+ # ... or extend current weld
+ else {
+ unshift @{ $welds[-1] }, $inner_seqno;
+ }
+
+ ########################################################################
+ # After welding, reduce the indentation level if all intermediate tokens
+ ########################################################################
+
+ my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
+ if ( $dlevel != 0 ) {
+ my $Kstart = $Kinner_opening;
+ my $Kstop = $Kinner_closing;
+ for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
+ $rLL->[$KK]->[_LEVEL_] += $dlevel;
+ }
+ }
+ }
+
+ #####################################################
+ # Define weld lengths needed later to set line breaks
+ #####################################################
+ foreach my $item (@welds) {
+
+ # sweep from inner to outer
+
+ my $inner_seqno;
+ my $len_close = 0;
+ my $len_open = 0;
+ foreach my $outer_seqno ( @{$item} ) {
+ if ($inner_seqno) {
+
+ my $dlen_opening =
+ $length_to_opening_seqno->($inner_seqno) -
+ $length_to_opening_seqno->($outer_seqno);
+
+ my $dlen_closing =
+ $length_to_closing_seqno->($outer_seqno) -
+ $length_to_closing_seqno->($inner_seqno);
+
+ $len_open += $dlen_opening;
+ $len_close += $dlen_closing;
+
+ }
+
+ $weld_len_left_closing{$outer_seqno} = $len_close;
+ $weld_len_right_opening{$outer_seqno} = $len_open;
+
+ $inner_seqno = $outer_seqno;
+ }
+
+ # sweep from outer to inner
+ foreach my $seqno ( reverse @{$item} ) {
+ $weld_len_right_closing{$seqno} =
+ $len_close - $weld_len_left_closing{$seqno};
+ $weld_len_left_opening{$seqno} =
+ $len_open - $weld_len_right_opening{$seqno};
+ }
+ }
+
+ #####################################
+ # DEBUG
+ #####################################
+ if (0) {
+ my $count = 0;
+ local $" = ')(';
+ foreach my $weld (@welds) {
+ print "\nWeld number $count has seq: (@{$weld})\n";
+ foreach my $seq ( @{$weld} ) {
+ print <<EOM;
+ seq=$seq
+ left_opening=$weld_len_left_opening{$seq};
+ right_opening=$weld_len_right_opening{$seq};
+ left_closing=$weld_len_left_closing{$seq};
+ right_closing=$weld_len_right_closing{$seq};
+EOM
+ }
+
+ $count++;
}
- print_line_of_tokens($line_of_tokens);
}
+ return;
+}
- # handle line of non-code..
- else {
+sub weld_len_left {
- # set special flags
- my $skip_line = 0;
- my $tee_line = 0;
- if ( $line_type =~ /^POD/ ) {
-
- # Pod docs should have a preceding blank line. But be
- # very careful in __END__ and __DATA__ sections, because:
- # 1. the user may be using this section for any purpose whatsoever
- # 2. the blank counters are not active there
- # It should be safe to request a blank line between an
- # __END__ or __DATA__ and an immediately following '=head'
- # type line, (types END_START and DATA_START), but not for
- # any other lines of type END or DATA.
- if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
- if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
- if ( !$skip_line
- && $line_type eq 'POD_START'
- && $last_line_type !~ /^(END|DATA)$/ )
- {
- want_blank_line();
+ my ( $seqno, $type_or_tok ) = @_;
+
+ # Given the sequence number of a token, and the token or its type,
+ # return the length of any weld to its left
+
+ my $weld_len;
+ if ($seqno) {
+ if ( $is_closing_type{$type_or_tok} ) {
+ $weld_len = $weld_len_left_closing{$seqno};
+ }
+ elsif ( $is_opening_type{$type_or_tok} ) {
+ $weld_len = $weld_len_left_opening{$seqno};
+ }
+ }
+ if ( !defined($weld_len) ) { $weld_len = 0 }
+ return $weld_len;
+}
+
+sub weld_len_right {
+
+ my ( $seqno, $type_or_tok ) = @_;
+
+ # Given the sequence number of a token, and the token or its type,
+ # return the length of any weld to its right
+
+ my $weld_len;
+ if ($seqno) {
+ if ( $is_closing_type{$type_or_tok} ) {
+ $weld_len = $weld_len_right_closing{$seqno};
+ }
+ elsif ( $is_opening_type{$type_or_tok} ) {
+ $weld_len = $weld_len_right_opening{$seqno};
+ }
+ }
+ if ( !defined($weld_len) ) { $weld_len = 0 }
+ return $weld_len;
+}
+
+sub weld_len_left_to_go {
+ my ($i) = @_;
+
+ # Given the index of a token in the 'to_go' array
+ # return the length of any weld to its left
+ return if ( $i < 0 );
+ my $weld_len =
+ weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
+ return $weld_len;
+}
+
+sub weld_len_right_to_go {
+ my ($i) = @_;
+
+ # Given the index of a token in the 'to_go' array
+ # return the length of any weld to its right
+ return if ( $i < 0 );
+ if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
+ my $weld_len =
+ weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
+ return $weld_len;
+}
+
+sub link_sequence_items {
+
+ # This has been merged into 'respace_tokens' but retained for reference
+ my $self = shift;
+ my $rlines = $self->{rlines};
+ my $rLL = $self->{rLL};
+
+ # We walk the token list and make links to the next sequence item.
+ # We also define these hashes to container tokens using sequence number as
+ # the key:
+ my $K_opening_container = {}; # opening [ { or (
+ my $K_closing_container = {}; # closing ] } or )
+ my $K_opening_ternary = {}; # opening ? of ternary
+ my $K_closing_ternary = {}; # closing : of ternary
+
+ # sub to link preceding nodes forward to a new node type
+ my $link_back = sub {
+ my ( $Ktop, $key ) = @_;
+
+ my $Kprev = $Ktop - 1;
+ while ( $Kprev >= 0
+ && !defined( $rLL->[$Kprev]->[$key] ) )
+ {
+ $rLL->[$Kprev]->[$key] = $Ktop;
+ $Kprev -= 1;
+ }
+ };
+
+ for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+
+ $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
+
+ my $type = $rLL->[$KK]->[_TYPE_];
+
+ next if ( $type eq 'b' );
+
+ my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
+ if ($type_sequence) {
+
+ $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
+
+ my $token = $rLL->[$KK]->[_TOKEN_];
+ if ( $is_opening_token{$token} ) {
+
+ $K_opening_container->{$type_sequence} = $KK;
}
+ elsif ( $is_closing_token{$token} ) {
- # patch to put a blank line after =cut
- # (required by podchecker)
- if ( $line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $want_blank_line_next = 1;
+ $K_closing_container->{$type_sequence} = $KK;
+ }
+
+ # These are not yet used but could be useful
+ else {
+ if ( $token eq '?' ) {
+ $K_opening_ternary->{$type_sequence} = $KK;
+ }
+ elsif ( $token eq ':' ) {
+ $K_closing_ternary->{$type_sequence} = $KK;
+ }
+ else {
+ Fault(<<EOM);
+Unknown sequenced token type '$type'. Expecting one of '{[(?:)]}'
+EOM
+ }
}
}
+ }
- # leave the blank counters in a predictable state
- # after __END__ or __DATA__
- elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
- $file_writer_object->reset_consecutive_blank_lines();
- $saw_END_or_DATA_ = 1;
+ $self->{K_opening_container} = $K_opening_container;
+ $self->{K_closing_container} = $K_closing_container;
+ $self->{K_opening_ternary} = $K_opening_ternary;
+ $self->{K_closing_ternary} = $K_closing_ternary;
+ return;
+}
+
+sub sum_token_lengths {
+ my $self = shift;
+
+ # This has been merged into 'respace_tokens' but retained for reference
+ my $rLL = $self->{rLL};
+ my $cumulative_length = 0;
+ for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
+
+ # Save the length sum to just BEFORE this token
+ $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
+
+ # now set the length of this token
+ my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
+
+ $cumulative_length += $token_length;
+ }
+ return;
+}
+
+sub resync_lines_and_tokens {
+
+ my $self = shift;
+ my $rLL = $self->{rLL};
+ my $Klimit = $self->{Klimit};
+ my $rlines = $self->{rlines};
+
+ # Re-construct the arrays of tokens associated with the original input lines
+ # since they have probably changed due to inserting and deleting blanks
+ # and a few other tokens.
+
+ my $Kmax = -1;
+
+ # This is the next token and its line index:
+ my $Knext = 0;
+ my $inext;
+ if ( defined($rLL) && @{$rLL} ) {
+ $Kmax = @{$rLL} - 1;
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
+ }
+
+ my $get_inext = sub {
+ if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
+ else {
+ $inext = $rLL->[$Knext]->[_LINE_INDEX_];
}
+ return $inext;
+ };
+
+ # Remember the most recently output token index
+ my $Klast_out;
- # write unindented non-code line
- if ( !$skip_line ) {
- if ($tee_line) { $file_writer_object->tee_on() }
- write_unindented_line($input_line);
- if ($tee_line) { $file_writer_object->tee_off() }
- if ($want_blank_line_next) { want_blank_line(); }
+ my $iline = -1;
+ foreach my $line_of_tokens ( @{$rlines} ) {
+ $iline++;
+ my $line_type = $line_of_tokens->{_line_type};
+ if ( $line_type eq 'CODE' ) {
+
+ my @K_array;
+ my $rK_range;
+ $inext = $get_inext->();
+ while ( defined($inext) && $inext <= $iline ) {
+ push @{K_array}, $Knext;
+ $Knext += 1;
+ $inext = $get_inext->();
+ }
+
+ # Delete any terminal blank token
+ if (@K_array) {
+ if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
+ pop @K_array;
+ }
+ }
+
+ # Define the range of K indexes for the line:
+ # $Kfirst = index of first token on line
+ # $Klast_out = index of last token on line
+ my ( $Kfirst, $Klast );
+ if (@K_array) {
+ $Kfirst = $K_array[0];
+ $Klast = $K_array[-1];
+ $Klast_out = $Klast;
+ }
+
+ # It is only safe to trim the actual line text if the input
+ # line had a terminal blank token. Otherwise, we may be
+ # in a quote.
+ if ( $line_of_tokens->{_ended_in_blank_token} ) {
+ $line_of_tokens->{_line_text} =~ s/\s+$//;
+ }
+ $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
}
}
- $last_line_type = $line_type;
+
+ # There shouldn't be any nodes beyond the last one unless we start
+ # allowing 'link_after' calls
+ if ( defined($inext) ) {
+
+ Fault("unexpected tokens at end of file when reconstructing lines");
+ }
+
+ return;
+}
+
+sub dump_verbatim {
+ my $self = shift;
+ my $rlines = $self->{rlines};
+ foreach my $line ( @{$rlines} ) {
+ my $input_line = $line->{_line_text};
+ $self->write_unindented_line($input_line);
+ }
+ return;
+}
+
+sub finish_formatting {
+
+ my ( $self, $severe_error ) = @_;
+
+ # The file has been tokenized and is ready to be formatted.
+ # All of the relevant data is stored in $self, ready to go.
+
+ # output file verbatim if severe error or no formatting requested
+ if ( $severe_error || $rOpts->{notidy} ) {
+ $self->dump_verbatim();
+ $self->wrapup();
+ return;
+ }
+
+ # Make a pass through the lines, looking at lines of CODE and identifying
+ # special processing needs, such format skipping sections marked by
+ # special comments
+ $self->scan_comments();
+
+ # Find nested pairs of container tokens for any welding. This information
+ # is also needed for adding semicolons, so it is split apart from the
+ # welding step.
+ $self->find_nested_pairs();
+
+ # Make sure everything looks good
+ $self->check_line_hashes();
+
+ # Future: Place to Begin future Iteration Loop
+ # foreach my $it_count(1..$maxit) {
+
+ # Future: We must reset some things after the first iteration.
+ # This includes:
+ # - resetting levels if there was any welding
+ # - resetting any phantom semicolons
+ # - dealing with any line numbering issues so we can relate final lines
+ # line numbers with input line numbers.
+ #
+ # If ($it_count>1) {
+ # Copy {level_raw} to [_LEVEL_] if ($it_count>1)
+ # Renumber lines
+ # }
+
+ # Make a pass through all tokens, adding or deleting any whitespace as
+ # required. Also make any other changes, such as adding semicolons.
+ # All token changes must be made here so that the token data structure
+ # remains fixed for the rest of this iteration.
+ $self->respace_tokens();
+
+ # Implement any welding needed for the -wn or -cb options
+ $self->weld_containers();
+
+ # Finishes formatting and write the result to the line sink.
+ # Eventually this call should just change the 'rlines' data according to the
+ # new line breaks and then return so that we can do an internal iteration
+ # before continuing with the next stages of formatting.
+ $self->break_lines();
+
+ ############################################################
+ # A possible future decomposition of 'break_lines()' follows.
+ # Benefits:
+ # - allow perltidy to do an internal iteration which eliminates
+ # many unnecessary steps, such as re-parsing and vertical alignment.
+ # This will allow iterations to be automatic.
+ # - consolidate all length calculations to allow utf8 alignment
+ ############################################################
+
+ # Future: Check for convergence of beginning tokens on CODE lines
+
+ # Future: End of Iteration Loop
+
+ # Future: add_padding($rargs);
+
+ # Future: add_closing_side_comments($rargs);
+
+ # Future: vertical_alignment($rargs);
+
+ # Future: output results
+
+ # A final routine to tie up any loose ends
+ $self->wrapup();
+ return;
}
sub create_one_line_block {
- $index_start_one_line_block = $_[0];
- $semicolons_before_block_self_destruct = $_[1];
+ ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
+ @_;
+ return;
}
sub destroy_one_line_block {
$index_start_one_line_block = UNDEFINED_INDEX;
$semicolons_before_block_self_destruct = 0;
+ return;
}
sub leading_spaces_to_go {
# return the number of indentation spaces for a token in the output stream;
# these were previously stored by 'set_leading_whitespace'.
- return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
+ my $ii = shift;
+ if ( $ii < 0 ) { $ii = 0 }
+ return get_spaces( $leading_spaces_to_go[$ii] );
}
-sub get_SPACES {
+sub get_spaces {
# return the number of leading spaces associated with an indentation
# variable $indentation is either a constant number of spaces or an object
- # with a get_SPACES method.
+ # with a get_spaces method.
my $indentation = shift;
- return ref($indentation) ? $indentation->get_SPACES() : $indentation;
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
}
-sub get_RECOVERABLE_SPACES {
+sub get_recoverable_spaces {
# return the number of spaces (+ means shift right, - means shift left)
# that we would like to shift a group of lines with the same indentation
# to get them to line up with their opening parens
my $indentation = shift;
- return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
+ return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
}
-sub get_AVAILABLE_SPACES_to_go {
+sub get_available_spaces_to_go {
- my $item = $leading_spaces_to_go[ $_[0] ];
+ my $ii = shift;
+ my $item = $leading_spaces_to_go[$ii];
# return the number of available leading spaces associated with an
# indentation variable. $indentation is either a constant number of
- # spaces or an object with a get_AVAILABLE_SPACES method.
- return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
+ # spaces or an object with a get_available_spaces method.
+ return ref($item) ? $item->get_available_spaces() : 0;
}
sub new_lp_indentation_item {
# define: space count of leading string which would apply if it
# were the first token of a new line.
- my ( $level, $ci_level, $in_continued_quote ) = @_;
+ my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
+
+ # Adjust levels if necessary to recycle whitespace:
+ # given $level_abs, the absolute level
+ # define $level, a possibly reduced level for whitespace
+ my $level = $level_abs;
+ if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
+ if ( $level_abs < $whitespace_last_level ) {
+ pop(@whitespace_level_stack);
+ }
+ if ( !@whitespace_level_stack ) {
+ push @whitespace_level_stack, $level_abs;
+ }
+ elsif ( $level_abs > $whitespace_last_level ) {
+ $level = $whitespace_level_stack[-1] +
+ ( $level_abs - $whitespace_last_level );
+
+ if (
+ # 1 Try to break at a block brace
+ (
+ $level > $rOpts_whitespace_cycle
+ && $last_nonblank_type eq '{'
+ && $last_nonblank_token eq '{'
+ )
+
+ # 2 Then either a brace or bracket
+ || ( $level > $rOpts_whitespace_cycle + 1
+ && $last_nonblank_token =~ /^[\{\[]$/ )
+
+ # 3 Then a paren too
+ || $level > $rOpts_whitespace_cycle + 2
+ )
+ {
+ $level = 1;
+ }
+ push @whitespace_level_stack, $level;
+ }
+ $level = $whitespace_level_stack[-1];
+ }
+ $whitespace_last_level = $level_abs;
# modify for -bli, which adds one continuation indentation for
# opening braces
}
# get the top state from the stack
- my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
- my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
- my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
+ my $space_count = $gnu_stack[$max_gnu_stack_index]->get_spaces();
+ my $current_level = $gnu_stack[$max_gnu_stack_index]->get_level();
+ my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
my $type = $types_to_go[$max_index_to_go];
my $token = $tokens_to_go[$max_index_to_go];
##my $too_close = ($i_test==$max_index_to_go-1);
my $test_position = total_line_length( $i_test, $max_index_to_go );
+ my $mll = maximum_line_length($i_test);
if (
##!$too_close &&
# if we are beyond the midpoint
- $gnu_position_predictor > $half_maximum_line_length
+ $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
- # or we are beyont the 1/4 point and there was an old
+ # or we are beyond the 1/4 point and there was an old
# break at the equals
|| (
- $gnu_position_predictor > $half_maximum_line_length / 2
+ $gnu_position_predictor >
+ $mll - $rOpts_maximum_line_length * 3 / 4
&& (
$old_breakpoint_to_go[$last_equals]
|| ( $last_equals > 0
}
}
+ my $halfway =
+ maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
+
# Check for decreasing depth ..
# Note that one token may have both decreasing and then increasing
# depth. For example, (level, ci) can go from (1,1) to (2,0). So,
if ($max_gnu_stack_index) {
# save index of token which closes this level
- $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
+ $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
# Undo any extra indentation if we saw no commas
my $available_spaces =
- $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
+ $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
my $comma_count = 0;
my $arrow_count = 0;
$comma_count = 0 unless $comma_count;
$arrow_count = 0 unless $arrow_count;
}
- $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
- $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
+ $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
+ $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
if ( $available_spaces > 0 ) {
if ( $comma_count <= 0 || $arrow_count > 0 ) {
- my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
+ my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
my $seqno =
$gnu_stack[$max_gnu_stack_index]
- ->get_SEQUENCE_NUMBER();
+ ->get_sequence_number();
# Be sure this item was created in this batch. This
# should be true because we delete any available
else {
if ( $arrow_count == 0 ) {
$gnu_item_list[$i]
- ->permanently_decrease_AVAILABLE_SPACES(
+ ->permanently_decrease_available_spaces(
$available_spaces);
}
else {
$gnu_item_list[$i]
- ->tentatively_decrease_AVAILABLE_SPACES(
+ ->tentatively_decrease_available_spaces(
$available_spaces);
}
-
- my $j;
- for (
- $j = $i + 1 ;
- $j <= $max_gnu_item_index ;
- $j++
- )
- {
+ foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
$gnu_item_list[$j]
->decrease_SPACES($available_spaces);
}
# go down one level
--$max_gnu_stack_index;
- $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
- $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
+ $lev = $gnu_stack[$max_gnu_stack_index]->get_level();
+ $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
# stop when we reach a level at or below the current level
if ( $lev <= $level && $ci_lev <= $ci_level ) {
$space_count =
- $gnu_stack[$max_gnu_stack_index]->get_SPACES();
+ $gnu_stack[$max_gnu_stack_index]->get_spaces();
$current_level = $lev;
$current_ci_level = $ci_lev;
last;
$space_count = $gnu_position_predictor;
my $min_gnu_indentation =
- $gnu_stack[$max_gnu_stack_index]->get_SPACES();
+ $gnu_stack[$max_gnu_stack_index]->get_spaces();
$available_space = $space_count - $min_gnu_indentation;
if ( $available_space >= $standard_increment ) {
# update state, but not on a blank token
if ( $types_to_go[$max_index_to_go] ne 'b' ) {
- $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
+ $gnu_stack[$max_gnu_stack_index]->set_have_child(1);
++$max_gnu_stack_index;
$gnu_stack[$max_gnu_stack_index] =
# to this minimum standard indentation. But the most deeply
# nested container will still probably be able to shift its
# parameters to the right for proper alignment, so in most
- # cases this will not be noticable.
- if ( $available_space > 0
- && $space_count > $half_maximum_line_length )
- {
+ # cases this will not be noticeable.
+ if ( $available_space > 0 && $space_count > $halfway ) {
$gnu_stack[$max_gnu_stack_index]
- ->tentatively_decrease_AVAILABLE_SPACES($available_space);
+ ->tentatively_decrease_available_spaces($available_space);
}
}
}
$last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
# and it is significantly to the right
- || $gnu_position_predictor > $half_maximum_line_length
+ || $gnu_position_predictor > $halfway
)
)
)
total_line_length( $line_start_index_to_go, $max_index_to_go );
}
else {
- $gnu_position_predictor = $space_count +
- token_sequence_length( $max_index_to_go, $max_index_to_go );
+ $gnu_position_predictor =
+ $space_count + $token_lengths_to_go[$max_index_to_go];
}
# store the indentation object for this token
# keep 2 extra free because they are needed in some cases
# (result of trial-and-error testing)
my $spaces_needed =
- $gnu_position_predictor - $rOpts_maximum_line_length + 2;
+ $gnu_position_predictor - maximum_line_length($max_index_to_go) + 2;
- return if ( $spaces_needed < 0 );
+ return if ( $spaces_needed <= 0 );
# We are over the limit, so try to remove a requested number of
# spaces from leading whitespace. We are only allowed to remove
# item must still be open to be a candidate (otherwise it
# cannot influence the current token)
- next if ( $item->get_CLOSED() >= 0 );
+ next if ( $item->get_closed() >= 0 );
- my $available_spaces = $item->get_AVAILABLE_SPACES();
+ my $available_spaces = $item->get_available_spaces();
if ( $available_spaces > 0 ) {
push( @candidates, [ $i, $available_spaces ] );
@candidates = sort { $b->[1] <=> $a->[1] } @candidates;
# keep removing whitespace until we are done or have no more
- my $candidate;
- foreach $candidate (@candidates) {
+ foreach my $candidate (@candidates) {
my ( $i, $available_spaces ) = @{$candidate};
my $deleted_spaces =
( $available_spaces > $spaces_needed )
: $available_spaces;
# remove the incremental space from this item
- $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
+ $gnu_item_list[$i]->decrease_available_spaces($deleted_spaces);
my $i_debug = $i;
# that came after it
for ( ; $i <= $max_gnu_item_index ; $i++ ) {
- my $old_spaces = $gnu_item_list[$i]->get_SPACES();
- if ( $old_spaces > $deleted_spaces ) {
+ my $old_spaces = $gnu_item_list[$i]->get_spaces();
+ if ( $old_spaces >= $deleted_spaces ) {
$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
}
# shouldn't happen except for code bug:
else {
- my $level = $gnu_item_list[$i_debug]->get_LEVEL();
- my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
- my $old_level = $gnu_item_list[$i]->get_LEVEL();
- my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
+ my $level = $gnu_item_list[$i_debug]->get_level();
+ my $ci_level = $gnu_item_list[$i_debug]->get_ci_level();
+ my $old_level = $gnu_item_list[$i]->get_level();
+ my $old_ci_level = $gnu_item_list[$i]->get_ci_level();
warning(
"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
);
$spaces_needed -= $deleted_spaces;
last unless ( $spaces_needed > 0 );
}
+ return;
}
sub finish_lp_batch {
- # This routine is called once after each each output stream batch is
+ # This routine is called once after each output stream batch is
# finished to undo indentation for all incomplete -lp
# indentation levels. It is too risky to leave a level open,
# because then we can't backtrack in case of a long line to follow.
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
# loop over all whitespace items created for the current batch
- my $i;
- for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
+ foreach my $i ( 0 .. $max_gnu_item_index ) {
my $item = $gnu_item_list[$i];
# only look for open items
- next if ( $item->get_CLOSED() >= 0 );
+ next if ( $item->get_closed() >= 0 );
# Tentatively remove all of the available space
# (The vertical aligner will try to get it back later)
- my $available_spaces = $item->get_AVAILABLE_SPACES();
+ my $available_spaces = $item->get_available_spaces();
if ( $available_spaces > 0 ) {
# delete incremental space for this item
$gnu_item_list[$i]
- ->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
+ ->tentatively_decrease_available_spaces($available_spaces);
# Reduce the total indentation space of any nodes that follow
# Note that any such nodes must necessarily be dependents
my $deleted_spaces = 0;
my $item = $leading_spaces_to_go[$i];
- my $available_spaces = $item->get_AVAILABLE_SPACES();
+ my $available_spaces = $item->get_available_spaces();
if (
$available_spaces > 0
&& ( ( $spaces_wanted <= $available_spaces )
- || !$item->get_HAVE_CHILD() )
+ || !$item->get_have_child() )
)
{
# we'll remove these spaces, but mark them as recoverable
$deleted_spaces =
- $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
+ $item->tentatively_decrease_available_spaces($spaces_wanted);
}
return $deleted_spaces;
sub token_sequence_length {
- # return length of tokens ($ifirst .. $ilast) including first & last
- # returns 0 if $ifirst > $ilast
- my $ifirst = shift;
- my $ilast = shift;
- return 0 if ( $ilast < 0 || $ifirst > $ilast );
- return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
- return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
+ # return length of tokens ($ibeg .. $iend) including $ibeg & $iend
+ # returns 0 if $ibeg > $iend (shouldn't happen)
+ my ( $ibeg, $iend ) = @_;
+ return 0 if ( $iend < 0 || $ibeg > $iend );
+ return $summed_lengths_to_go[ $iend + 1 ] if ( $ibeg < 0 );
+ return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
}
sub total_line_length {
- # return length of a line of tokens ($ifirst .. $ilast)
- my $ifirst = shift;
- my $ilast = shift;
- if ( $ifirst < 0 ) { $ifirst = 0 }
+ # return length of a line of tokens ($ibeg .. $iend)
+ my ( $ibeg, $iend ) = @_;
+ return leading_spaces_to_go($ibeg) + token_sequence_length( $ibeg, $iend );
+}
+
+sub maximum_line_length_for_level {
+
+ # return maximum line length for line starting with a given level
+ my $maximum_line_length = $rOpts_maximum_line_length;
+
+ # Modify if -vmll option is selected
+ if ($rOpts_variable_maximum_line_length) {
+ my $level = shift;
+ if ( $level < 0 ) { $level = 0 }
+ $maximum_line_length += $level * $rOpts_indent_columns;
+ }
+ return $maximum_line_length;
+}
- return leading_spaces_to_go($ifirst) +
- token_sequence_length( $ifirst, $ilast );
+sub maximum_line_length {
+
+ # return maximum line length for line starting with the token at given index
+ my $ii = shift;
+ return maximum_line_length_for_level( $levels_to_go[$ii] );
}
sub excess_line_length {
- # return number of characters by which a line of tokens ($ifirst..$ilast)
+ # return number of characters by which a line of tokens ($ibeg..$iend)
# exceeds the allowable line length.
- my $ifirst = shift;
- my $ilast = shift;
- if ( $ifirst < 0 ) { $ifirst = 0 }
- return leading_spaces_to_go($ifirst) +
- token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
+ my ( $ibeg, $iend, $ignore_left_weld, $ignore_right_weld ) = @_;
+
+ # Include left and right weld lengths unless requested not to
+ my $wl = $ignore_left_weld ? 0 : weld_len_left_to_go($iend);
+ my $wr = $ignore_right_weld ? 0 : weld_len_right_to_go($iend);
+
+ return total_line_length( $ibeg, $iend ) + $wl + $wr -
+ maximum_line_length($ibeg);
}
-sub finish_formatting {
+sub wrapup {
# flush buffer and write any informative messages
my $self = shift;
- flush();
+ $self->flush();
$file_writer_object->decrement_output_line_number()
; # fix up line number since it was incremented
we_are_at_the_last_line();
write_logfile_entry("No indentation disagreement seen\n");
}
}
+ if ($first_tabbing_disagreement) {
+ write_logfile_entry(
+"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
+ );
+ }
write_logfile_entry("\n");
$vertical_aligner_object->report_anything_unusual();
$file_writer_object->report_line_length_errors();
+
+ return;
}
sub check_options {
# This routine is called to check the Opts hash after it is defined
-
- ($rOpts) = @_;
- my ( $tabbing_string, $tab_msg );
+ $rOpts = shift;
make_static_block_comment_pattern();
make_static_side_comment_pattern();
make_bli_pattern();
make_block_brace_vertical_tightness_pattern();
+ make_blank_line_pattern();
+
+ prepare_cuddled_block_types();
+ if ( $rOpts->{'dump-cuddled-block-list'} ) {
+ dump_cuddled_block_list(*STDOUT);
+ Perl::Tidy::Exit 0;
+ }
if ( $rOpts->{'line-up-parentheses'} ) {
|| !$rOpts->{'add-newlines'}
|| !$rOpts->{'delete-old-newlines'} )
{
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
-----------------------------------------------------------------------
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
}
}
- # At present, tabs are not compatable with the line-up-parentheses style
+ # At present, tabs are not compatible with the line-up-parentheses style
# (it would be possible to entab the total leading whitespace
# just prior to writing the line, if desired).
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
- # Likewise, tabs are not compatable with outdenting..
+ # Likewise, tabs are not compatible with outdenting..
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
- warn <<EOM;
+ Perl::Tidy::Warn <<EOM;
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
$outdent_keyword{$_} = 1;
}
else {
- warn "ignoring '$_' in -okwl list; not a perl keyword";
+ Perl::Tidy::Warn "ignoring '$_' in -okwl list; not a perl keyword";
}
}
}
if ( $rOpts->{'dump-want-left-space'} ) {
dump_want_left_space(*STDOUT);
- exit 1;
+ Perl::Tidy::Exit 0;
}
if ( $rOpts->{'dump-want-right-space'} ) {
dump_want_right_space(*STDOUT);
- exit 1;
+ Perl::Tidy::Exit 0;
}
# default keywords for which space is introduced before an opening paren
# (at present, including them messes up vertical alignment)
@_ = qw(my local our and or err eq ne if else elsif until
- unless while for foreach return switch case given when);
+ unless while for foreach return switch case given when catch);
@space_after_keyword{@_} = (1) x scalar(@_);
- # allow user to modify these defaults
- if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
- @space_after_keyword{@_} = (1) x scalar(@_);
- }
-
+ # first remove any or all of these if desired
if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
+
+ # -nsak='*' selects all the above keywords
+ if ( @_ == 1 && $_[0] eq '*' ) { @_ = keys(%space_after_keyword) }
@space_after_keyword{@_} = (0) x scalar(@_);
}
+ # then allow user to add to these defaults
+ if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
+ @space_after_keyword{@_} = (1) x scalar(@_);
+ }
+
# implement user break preferences
- foreach my $tok ( split_words( $rOpts->{'want-break-after'} ) ) {
- if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
+ = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
+ . : ? && || and or err xor
+ );
+
+ my $break_after = sub {
+ foreach my $tok (@_) {
+ if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
}
- }
+ };
- foreach my $tok ( split_words( $rOpts->{'want-break-before'} ) ) {
- my $lbs = $left_bond_strength{$tok};
- my $rbs = $right_bond_strength{$tok};
- if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
- ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
- ( $lbs, $rbs );
+ my $break_before = sub {
+ foreach my $tok (@_) {
+ my $lbs = $left_bond_strength{$tok};
+ my $rbs = $right_bond_strength{$tok};
+ if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
+ ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
+ ( $lbs, $rbs );
+ }
}
- }
+ };
+
+ $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
+ $break_before->(@all_operators)
+ if ( $rOpts->{'break-before-all-operators'} );
+
+ $break_after->( split_words( $rOpts->{'want-break-after'} ) );
+ $break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
- foreach my $tok (
- '=', '.', ',', ':', '?', '&&', '||', 'and',
- 'or', 'err', 'xor', '+', '-', '*', '/',
- )
- {
+ foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
push @_, ',';
@is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
- # what can follow a one-line anonynomous sub closing curly:
- # one-line anonumous subs also have ']' here...
+ # what can follow a one-line anonymous sub closing curly:
+ # one-line anonymous subs also have ']' here...
# see tk3.t and PP.pm
@_ = qw# ; : => or and && || ) ] ~~ !~~ #;
push @_, ',';
$rOpts->{'long-block-line-count'} = 1000000;
}
+ my $enc = $rOpts->{'character-encoding'};
+ if ( $enc && $enc !~ /^(none|utf8)$/i ) {
+ Perl::Tidy::Die <<EOM;
+Unrecognized character-encoding '$enc'; expecting one of: (none, utf8)
+EOM
+ }
+
my $ole = $rOpts->{'output-line-ending'};
if ($ole) {
my %endings = (
mac => "\015",
unix => "\012",
);
- $ole = lc $ole;
- unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
- my $str = join " ", keys %endings;
- die <<EOM;
+
+ # Patch for RT #99514, a memoization issue.
+ # Normally, the user enters one of 'dos', 'win', etc, and we change the
+ # value in the options parameter to be the corresponding line ending
+ # character. But, if we are using memoization, on later passes through
+ # here the option parameter will already have the desired ending
+ # character rather than the keyword 'dos', 'win', etc. So
+ # we must check to see if conversion has already been done and, if so,
+ # bypass the conversion step.
+ my %endings_inverted = (
+ "\015\012" => 'dos',
+ "\015\012" => 'win',
+ "\015" => 'mac',
+ "\012" => 'unix',
+ );
+
+ if ( defined( $endings_inverted{$ole} ) ) {
+
+ # we already have valid line ending, nothing more to do
+ }
+ else {
+ $ole = lc $ole;
+ unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
+ my $str = join " ", keys %endings;
+ Perl::Tidy::Die <<EOM;
Unrecognized line ending '$ole'; expecting one of: $str
EOM
- }
- if ( $rOpts->{'preserve-line-endings'} ) {
- warn "Ignoring -ple; conflicts with -ole\n";
- $rOpts->{'preserve-line-endings'} = undef;
+ }
+ if ( $rOpts->{'preserve-line-endings'} ) {
+ Perl::Tidy::Warn "Ignoring -ple; conflicts with -ole\n";
+ $rOpts->{'preserve-line-endings'} = undef;
+ }
}
}
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
$rOpts_break_at_old_ternary_breakpoints =
$rOpts->{'break-at-old-ternary-breakpoints'};
+ $rOpts_break_at_old_attribute_breakpoints =
+ $rOpts->{'break-at-old-attribute-breakpoints'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
$rOpts_break_at_old_keyword_breakpoints =
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
+ $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
+
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
- $rOpts_swallow_optional_blank_lines =
- $rOpts->{'swallow-optional-blank-lines'};
- $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
- $rOpts_format_skipping = $rOpts->{'format-skipping'};
- $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
- $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
- $half_maximum_line_length = $rOpts_maximum_line_length / 2;
+
+ $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
+ $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
+ $rOpts_format_skipping = $rOpts->{'format-skipping'};
+ $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
+ $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
+ $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
+ $rOpts_ignore_side_comment_lengths =
+ $rOpts->{'ignore-side-comment-lengths'};
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
);
- # assume flag for '>' same as ')' for closing qw quotes
- %closing_token_indentation = (
- ')' => $rOpts->{'closing-paren-indentation'},
- '}' => $rOpts->{'closing-brace-indentation'},
- ']' => $rOpts->{'closing-square-bracket-indentation'},
- '>' => $rOpts->{'closing-paren-indentation'},
- );
+ # assume flag for '>' same as ')' for closing qw quotes
+ %closing_token_indentation = (
+ ')' => $rOpts->{'closing-paren-indentation'},
+ '}' => $rOpts->{'closing-brace-indentation'},
+ ']' => $rOpts->{'closing-square-bracket-indentation'},
+ '>' => $rOpts->{'closing-paren-indentation'},
+ );
+
+ # flag indicating if any closing tokens are indented
+ $some_closing_token_indentation =
+ $rOpts->{'closing-paren-indentation'}
+ || $rOpts->{'closing-brace-indentation'}
+ || $rOpts->{'closing-square-bracket-indentation'}
+ || $rOpts->{'indent-closing-brace'};
+
+ %opening_token_right = (
+ '(' => $rOpts->{'opening-paren-right'},
+ '{' => $rOpts->{'opening-hash-brace-right'},
+ '[' => $rOpts->{'opening-square-bracket-right'},
+ );
+
+ %stack_opening_token = (
+ '(' => $rOpts->{'stack-opening-paren'},
+ '{' => $rOpts->{'stack-opening-hash-brace'},
+ '[' => $rOpts->{'stack-opening-square-bracket'},
+ );
+
+ %stack_closing_token = (
+ ')' => $rOpts->{'stack-closing-paren'},
+ '}' => $rOpts->{'stack-closing-hash-brace'},
+ ']' => $rOpts->{'stack-closing-square-bracket'},
+ );
+ $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
+ $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
+ return;
+}
+
+sub bad_pattern {
+
+ # See if a pattern will compile. We have to use a string eval here,
+ # but it should be safe because the pattern has been constructed
+ # by this program.
+ my ($pattern) = @_;
+ eval "'##'=~/$pattern/";
+ return $@;
+}
+
+sub prepare_cuddled_block_types {
+
+ my $cuddled_string = $rOpts->{'cuddled-block-list'};
+ $cuddled_string = "try-catch-finally" unless defined($cuddled_string);
+
+ # we have a cuddled string of the form
+ # 'try-catch-finally'
+
+ # we want to prepare a hash of the form
+
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
+
+ # use -dcbl to dump this hash
+
+ # Multiple such strings are input as a space or comma separated list
+
+ # If we get two lists with the same leading type, such as
+ # -cbl = "-try-catch-finally -try-catch-otherwise"
+ # then they will get merged as follows:
+ # $rcuddled_block_types = {
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 2,
+ # 'otherwise' => 1,
+ # },
+ # };
+ # This will allow either type of chain to be followed.
+
+ $cuddled_string =~ s/,/ /g; # allow space or comma separated lists
+ my @cuddled_strings = split /\s+/, $cuddled_string;
+
+ $rcuddled_block_types = {};
+
+ # process each dash-separated string...
+ my $string_count = 0;
+ foreach my $string (@cuddled_strings) {
+ next unless $string;
+ my @words = split /-+/, $string; # allow multiple dashes
+
+ # we could look for and report possible errors here...
+ next unless ( @words && @words > 0 );
+ my $start = shift @words;
+
+ # allow either '-continue' or *-continue' for arbitrary starting type
+ $start = '*' unless $start;
+
+ # always make an entry for the leading word. If none follow, this
+ # will still prevent a wildcard from matching this word.
+ if ( !defined( $rcuddled_block_types->{$start} ) ) {
+ $rcuddled_block_types->{$start} = {};
+ }
+
+ # The count gives the original word order in case we ever want it.
+ $string_count++;
+ my $word_count = 0;
+ foreach my $word (@words) {
+ next unless $word;
+ $word_count++;
+ $rcuddled_block_types->{$start}->{$word} =
+ 1; #"$string_count.$word_count";
+ }
+ }
- %opening_token_right = (
- '(' => $rOpts->{'opening-paren-right'},
- '{' => $rOpts->{'opening-hash-brace-right'},
- '[' => $rOpts->{'opening-square-bracket-right'},
- );
+ return;
+}
- %stack_opening_token = (
- '(' => $rOpts->{'stack-opening-paren'},
- '{' => $rOpts->{'stack-opening-hash-brace'},
- '[' => $rOpts->{'stack-opening-square-bracket'},
- );
+sub dump_cuddled_block_list {
+ my ($fh) = @_;
+
+ # Here is the format of the cuddled block type hash
+ # which controls this routine
+ # my $rcuddled_block_types = {
+ # 'if' => {
+ # 'else' => 1,
+ # 'elsif' => 1
+ # },
+ # 'try' => {
+ # 'catch' => 1,
+ # 'finally' => 1
+ # },
+ # };
+ #The numerical values are string.word,
+ #where string = string number and word = word number in that string
+
+ my $cuddled_string = $rOpts->{'cuddled-block-list'};
+ $cuddled_string = '' unless $cuddled_string;
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+Hash of cuddled block types created from
+ -cbl='$cuddled_string'
+------------------------------------------------------------------------
+EOM
- %stack_closing_token = (
- ')' => $rOpts->{'stack-closing-paren'},
- '}' => $rOpts->{'stack-closing-hash-brace'},
- ']' => $rOpts->{'stack-closing-square-bracket'},
- );
+ use Data::Dumper;
+ $fh->print( Dumper($rcuddled_block_types) );
+
+ $fh->print(<<EOM);
+------------------------------------------------------------------------
+EOM
+ return;
}
sub make_static_block_comment_pattern {
# user may give leading caret to force matching left comments only
if ( $prefix !~ /^\^#/ ) {
if ( $prefix !~ /^#/ ) {
- die
+ Perl::Tidy::Die
"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
}
$pattern = '^\s*' . $prefix;
}
- eval "'##'=~/$pattern/";
- if ($@) {
- die
+ if ( bad_pattern($pattern) ) {
+ Perl::Tidy::Die
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
}
$static_block_comment_pattern = $pattern;
}
+ return;
}
sub make_format_skipping_pattern {
unless ($param) { $param = $default }
$param =~ s/^\s*//;
if ( $param !~ /^#/ ) {
- die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
+ Perl::Tidy::Die
+ "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
}
my $pattern = '^' . $param . '\s';
- eval "'#'=~/$pattern/";
- if ($@) {
- die
+ if ( bad_pattern($pattern) ) {
+ Perl::Tidy::Die
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
}
return $pattern;
$closing_side_comment_list_pattern =
make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
}
+ return;
}
sub make_bli_pattern {
}
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
+ return;
}
sub make_block_brace_vertical_tightness_pattern {
# turn any input list into a regex for recognizing selected block types
$block_brace_vertical_tightness_pattern =
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
-
if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
&& $rOpts->{'block-brace-vertical-tightness-list'} )
{
make_block_pattern( '-bbvtl',
$rOpts->{'block-brace-vertical-tightness-list'} );
}
+ return;
+}
+
+sub make_blank_line_pattern {
+
+ $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
+ my $key = 'blank-lines-before-closing-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_before_closing_block_pattern =
+ make_block_pattern( '-blbcl', $rOpts->{$key} );
+ }
+
+ $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
+ $key = 'blank-lines-after-opening-block-list';
+ if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
+ $blank_lines_after_opening_block_pattern =
+ make_block_pattern( '-blaol', $rOpts->{$key} );
+ }
+ return;
}
sub make_block_pattern {
# input string: "if else elsif unless while for foreach do : sub";
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
+ # Minor Update:
+ #
+ # To distinguish between anonymous subs and named subs, use 'sub' to
+ # indicate a named sub, and 'asub' to indicate an anonymous sub
+
my ( $abbrev, $string ) = @_;
my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
+ if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
next if $seen{$i};
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
+ elsif ( $i eq 'asub' ) {
+ }
+ elsif ( $i eq ';' ) {
+ push @words, ';';
+ }
+ elsif ( $i eq '{' ) {
+ push @words, '\{';
+ }
elsif ( $i eq ':' ) {
push @words, '\w+:';
}
push @words, $i;
}
else {
- warn "unrecognized block type $i after $abbrev, ignoring\n";
+ Perl::Tidy::Warn
+ "unrecognized block type $i after $abbrev, ignoring\n";
}
}
my $pattern = '(' . join( '|', @words ) . ')$';
+ my $sub_patterns = "";
if ( $seen{'sub'} ) {
- $pattern = '(' . $pattern . '|sub)';
+ $sub_patterns .= '|' . $SUB_PATTERN;
+ }
+ if ( $seen{'asub'} ) {
+ $sub_patterns .= '|' . $ASUB_PATTERN;
+ }
+ if ($sub_patterns) {
+ $pattern = '(' . $pattern . $sub_patterns . ')';
}
$pattern = '^' . $pattern;
return $pattern;
my $prefix = $rOpts->{'static-side-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = '^' . $prefix;
- eval "'##'=~/$pattern/";
- if ($@) {
- die
+ if ( bad_pattern($pattern) ) {
+ Perl::Tidy::Die
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
}
$static_side_comment_pattern = $pattern;
}
+ return;
}
sub make_closing_side_comment_prefix {
# make sure we have a good pattern
# if we fail this we probably have an error in escaping
# characters.
- eval "'##'=~/$test_csc_prefix_pattern/";
- if ($@) {
+
+ if ( bad_pattern($test_csc_prefix_pattern) ) {
# shouldn't happen..must have screwed up escaping, above
report_definite_bug();
- warn
+ Perl::Tidy::Warn
"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
# just warn and keep going with defaults
- warn "Please consider using a simpler -cscp prefix\n";
- warn "Using default -cscp instead; please check output\n";
+ Perl::Tidy::Warn "Please consider using a simpler -cscp prefix\n";
+ Perl::Tidy::Warn
+ "Using default -cscp instead; please check output\n";
}
else {
$csc_prefix = $test_csc_prefix;
}
$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
$closing_side_comment_prefix_pattern = $csc_prefix_pattern;
+ return;
}
sub dump_want_left_space {
-1 means the token does not want a space to its left
------------------------------------------------------------------------
EOM
- foreach ( sort keys %want_left_space ) {
- print $fh "$_\t$want_left_space{$_}\n";
+ foreach my $key ( sort keys %want_left_space ) {
+ print $fh "$key\t$want_left_space{$key}\n";
}
+ return;
}
sub dump_want_right_space {
-1 means the token does not want a space to its right
------------------------------------------------------------------------
EOM
- foreach ( sort keys %want_right_space ) {
- print $fh "$_\t$want_right_space{$_}\n";
+ foreach my $key ( sort keys %want_right_space ) {
+ print $fh "$key\t$want_right_space{$key}\n";
}
+ return;
}
{ # begin is_essential_whitespace
BEGIN {
- @_ = qw(sort grep map);
- @is_sort_grep_map{@_} = (1) x scalar(@_);
+ my @q;
+ @q = qw(sort grep map);
+ @is_sort_grep_map{@q} = (1) x scalar(@q);
- @_ = qw(for foreach);
- @is_for_foreach{@_} = (1) x scalar(@_);
+ @q = qw(for foreach);
+ @is_for_foreach{@q} = (1) x scalar(@q);
}
# my $size=-s::SINK if $file; <==OK but we won't do it
# don't join something like: for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
- ( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
+ ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
+ && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
- # do not combine a number with a concatination dot
+ # do not combine a number with a concatenation dot
# example: pom.caputo:
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
|| ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
# || ($tokenr eq '-')
# keep a space between a quote and a bareword to prevent the
- # bareword from becomming a quote modifier.
+ # bareword from becoming a quote modifier.
|| ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
# keep a space between a token ending in '$' and any word;
# keep paren separate in 'use Foo::Bar ()'
|| ( $tokenr eq '('
- && $typel eq 'w'
- && $typell eq 'k'
- && $tokenll eq 'use' )
-
- # keep any space between filehandle and paren:
- # file mangle.t with --mangle:
- || ( $typel eq 'Y' && $tokenr eq '(' )
-
- # retain any space after here doc operator ( hereerr.t)
- || ( $typel eq 'h' )
-
- # FIXME: this needs some further work; extrude.t has test cases
- # it is safest to retain any space after start of ? : operator
- # because of perl's quirky parser.
- # ie, this line will fail if you remove the space after the '?':
- # $b=join $comma ? ',' : ':', @_; # ok
- # $b=join $comma ?',' : ':', @_; # error!
- # but this is ok :)
- # $b=join $comma?',' : ':', @_; # not a problem!
- ## || ($typel eq '?')
-
- # be careful with a space around ++ and --, to avoid ambiguity as to
- # which token it applies
- || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
- || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
-
- # need space after foreach my; for example, this will fail in
- # older versions of Perl:
- # foreach my$ft(@filetypes)...
- || (
- $tokenl eq 'my'
-
- # /^(for|foreach)$/
- && $is_for_foreach{$tokenll}
- && $tokenr =~ /^\$/
- )
-
- # must have space between grep and left paren; "grep(" will fail
- || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
-
- # don't stick numbers next to left parens, as in:
- #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
- || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
-
- ; # the value of this long logic sequence is the result we want
- return $result;
- }
-}
-
-sub set_white_space_flag {
-
- # This routine examines each pair of nonblank tokens and
- # sets values for array @white_space_flag.
- #
- # $white_space_flag[$j] is a flag indicating whether a white space
- # BEFORE token $j is needed, with the following values:
- #
- # -1 do not want a space before token $j
- # 0 optional space or $j is a whitespace
- # 1 want a space before token $j
- #
- #
- # The values for the first token will be defined based
- # upon the contents of the "to_go" output array.
- #
- # Note: retain debug print statements because they are usually
- # required after adding new token types.
-
- BEGIN {
-
- # initialize these global hashes, which control the use of
- # whitespace around tokens:
- #
- # %binary_ws_rules
- # %want_left_space
- # %want_right_space
- # %space_after_keyword
- #
- # Many token types are identical to the tokens themselves.
- # See the tokenizer for a complete list. Here are some special types:
- # k = perl keyword
- # f = semicolon in for statement
- # m = unary minus
- # p = unary plus
- # Note that :: is excluded since it should be contained in an identifier
- # Note that '->' is excluded because it never gets space
- # parentheses and brackets are excluded since they are handled specially
- # curly braces are included but may be overridden by logic, such as
- # newline logic.
-
- # NEW_TOKENS: create a whitespace rule here. This can be as
- # simple as adding your new letter to @spaces_both_sides, for
- # example.
-
- @_ = qw" L { ( [ ";
- @is_opening_type{@_} = (1) x scalar(@_);
-
- @_ = qw" R } ) ] ";
- @is_closing_type{@_} = (1) x scalar(@_);
-
- my @spaces_both_sides = qw"
- + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
- .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
- &&= ||= //= <=> A k f w F n C Y U G v
- ";
-
- my @spaces_left_side = qw"
- t ! ~ m p { \ h pp mm Z j
- ";
- push( @spaces_left_side, '#' ); # avoids warning message
-
- my @spaces_right_side = qw"
- ; } ) ] R J ++ -- **=
- ";
- push( @spaces_right_side, ',' ); # avoids warning message
- @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
- @want_right_space{@spaces_both_sides} =
- (1) x scalar(@spaces_both_sides);
- @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
- @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
- @want_left_space{@spaces_right_side} =
- (-1) x scalar(@spaces_right_side);
- @want_right_space{@spaces_right_side} =
- (1) x scalar(@spaces_right_side);
- $want_left_space{'L'} = WS_NO;
- $want_left_space{'->'} = WS_NO;
- $want_right_space{'->'} = WS_NO;
- $want_left_space{'**'} = WS_NO;
- $want_right_space{'**'} = WS_NO;
-
- # hash type information must stay tightly bound
- # as in : ${xxxx}
- $binary_ws_rules{'i'}{'L'} = WS_NO;
- $binary_ws_rules{'i'}{'{'} = WS_YES;
- $binary_ws_rules{'k'}{'{'} = WS_YES;
- $binary_ws_rules{'U'}{'{'} = WS_YES;
- $binary_ws_rules{'i'}{'['} = WS_NO;
- $binary_ws_rules{'R'}{'L'} = WS_NO;
- $binary_ws_rules{'R'}{'{'} = WS_NO;
- $binary_ws_rules{'t'}{'L'} = WS_NO;
- $binary_ws_rules{'t'}{'{'} = WS_NO;
- $binary_ws_rules{'}'}{'L'} = WS_NO;
- $binary_ws_rules{'}'}{'{'} = WS_NO;
- $binary_ws_rules{'$'}{'L'} = WS_NO;
- $binary_ws_rules{'$'}{'{'} = WS_NO;
- $binary_ws_rules{'@'}{'L'} = WS_NO;
- $binary_ws_rules{'@'}{'{'} = WS_NO;
- $binary_ws_rules{'='}{'L'} = WS_YES;
-
- # the following includes ') {'
- # as in : if ( xxx ) { yyy }
- $binary_ws_rules{']'}{'L'} = WS_NO;
- $binary_ws_rules{']'}{'{'} = WS_NO;
- $binary_ws_rules{')'}{'{'} = WS_YES;
- $binary_ws_rules{')'}{'['} = WS_NO;
- $binary_ws_rules{']'}{'['} = WS_NO;
- $binary_ws_rules{']'}{'{'} = WS_NO;
- $binary_ws_rules{'}'}{'['} = WS_NO;
- $binary_ws_rules{'R'}{'['} = WS_NO;
-
- $binary_ws_rules{']'}{'++'} = WS_NO;
- $binary_ws_rules{']'}{'--'} = WS_NO;
- $binary_ws_rules{')'}{'++'} = WS_NO;
- $binary_ws_rules{')'}{'--'} = WS_NO;
-
- $binary_ws_rules{'R'}{'++'} = WS_NO;
- $binary_ws_rules{'R'}{'--'} = WS_NO;
-
- ########################################################
- # should no longer be necessary (see niek.pl)
- ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
- ##$binary_ws_rules{'w'}{':'} = WS_NO;
- ########################################################
- $binary_ws_rules{'i'}{'Q'} = WS_YES;
- $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
-
- # FIXME: we need to split 'i' into variables and functions
- # and have no space for functions but space for variables. For now,
- # I have a special patch in the special rules below
- $binary_ws_rules{'i'}{'('} = WS_NO;
-
- $binary_ws_rules{'w'}{'('} = WS_NO;
- $binary_ws_rules{'w'}{'{'} = WS_YES;
- }
- my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
- my ( $last_token, $last_type, $last_block_type, $token, $type,
- $block_type );
- my (@white_space_flag);
- my $j_tight_closing_paren = -1;
-
- if ( $max_index_to_go >= 0 ) {
- $token = $tokens_to_go[$max_index_to_go];
- $type = $types_to_go[$max_index_to_go];
- $block_type = $block_type_to_go[$max_index_to_go];
- }
- else {
- $token = ' ';
- $type = 'b';
- $block_type = '';
- }
-
- # loop over all tokens
- my ( $j, $ws );
-
- for ( $j = 0 ; $j <= $jmax ; $j++ ) {
-
- if ( $$rtoken_type[$j] eq 'b' ) {
- $white_space_flag[$j] = WS_OPTIONAL;
- next;
- }
-
- # set a default value, to be changed as needed
- $ws = undef;
- $last_token = $token;
- $last_type = $type;
- $last_block_type = $block_type;
- $token = $$rtokens[$j];
- $type = $$rtoken_type[$j];
- $block_type = $$rblock_type[$j];
-
- #---------------------------------------------------------------
- # section 1:
- # handle space on the inside of opening braces
- #---------------------------------------------------------------
-
- # /^[L\{\(\[]$/
- if ( $is_opening_type{$last_type} ) {
-
- $j_tight_closing_paren = -1;
-
- # let's keep empty matched braces together: () {} []
- # except for BLOCKS
- if ( $token eq $matching_token{$last_token} ) {
- if ($block_type) {
- $ws = WS_YES;
- }
- else {
- $ws = WS_NO;
- }
- }
- else {
-
- # we're considering the right of an opening brace
- # tightness = 0 means always pad inside with space
- # tightness = 1 means pad inside if "complex"
- # tightness = 2 means never pad inside with space
-
- my $tightness;
- if ( $last_type eq '{'
- && $last_token eq '{'
- && $last_block_type )
- {
- $tightness = $rOpts_block_brace_tightness;
- }
- else { $tightness = $tightness{$last_token} }
-
- if ( $tightness <= 0 ) {
- $ws = WS_YES;
- }
- elsif ( $tightness > 1 ) {
- $ws = WS_NO;
- }
- else {
-
- # Patch to count '-foo' as single token so that
- # each of $a{-foo} and $a{foo} and $a{'foo'} do
- # not get spaces with default formatting.
- my $j_here = $j;
- ++$j_here
- if ( $token eq '-'
- && $last_token eq '{'
- && $$rtoken_type[ $j + 1 ] eq 'w' );
-
- # $j_next is where a closing token should be if
- # the container has a single token
- my $j_next =
- ( $$rtoken_type[ $j_here + 1 ] eq 'b' )
- ? $j_here + 2
- : $j_here + 1;
- my $tok_next = $$rtokens[$j_next];
- my $type_next = $$rtoken_type[$j_next];
-
- # for tightness = 1, if there is just one token
- # within the matching pair, we will keep it tight
- if (
- $tok_next eq $matching_token{$last_token}
-
- # but watch out for this: [ [ ] (misc.t)
- && $last_token ne $token
- )
- {
-
- # remember where to put the space for the closing paren
- $j_tight_closing_paren = $j_next;
- $ws = WS_NO;
- }
- else {
- $ws = WS_YES;
- }
- }
- }
- } # done with opening braces and brackets
- my $ws_1 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # section 2:
- # handle space on inside of closing brace pairs
- #---------------------------------------------------------------
-
- # /[\}\)\]R]/
- if ( $is_closing_type{$type} ) {
-
- if ( $j == $j_tight_closing_paren ) {
-
- $j_tight_closing_paren = -1;
- $ws = WS_NO;
- }
- else {
-
- if ( !defined($ws) ) {
-
- my $tightness;
- if ( $type eq '}' && $token eq '}' && $block_type ) {
- $tightness = $rOpts_block_brace_tightness;
- }
- else { $tightness = $tightness{$token} }
-
- $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
- }
- }
- }
-
- my $ws_2 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # section 3:
- # use the binary table
- #---------------------------------------------------------------
- if ( !defined($ws) ) {
- $ws = $binary_ws_rules{$last_type}{$type};
- }
- my $ws_3 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
-
- #---------------------------------------------------------------
- # section 4:
- # some special cases
- #---------------------------------------------------------------
- if ( $token eq '(' ) {
-
- # This will have to be tweaked as tokenization changes.
- # We usually want a space at '} (', for example:
- # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
- #
- # But not others:
- # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
- # At present, the above & block is marked as type L/R so this case
- # won't go through here.
- if ( $last_type eq '}' ) { $ws = WS_YES }
-
- # NOTE: some older versions of Perl had occasional problems if
- # spaces are introduced between keywords or functions and opening
- # parens. So the default is not to do this except is certain
- # cases. The current Perl seems to tolerate spaces.
-
- # Space between keyword and '('
- elsif ( $last_type eq 'k' ) {
- $ws = WS_NO
- unless ( $rOpts_space_keyword_paren
- || $space_after_keyword{$last_token} );
- }
-
- # Space between function and '('
- # -----------------------------------------------------
- # 'w' and 'i' checks for something like:
- # myfun( &myfun( ->myfun(
- # -----------------------------------------------------
- elsif (( $last_type =~ /^[wU]$/ )
- || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
- {
- $ws = WS_NO unless ($rOpts_space_function_paren);
- }
-
- # space between something like $i and ( in
- # for $i ( 0 .. 20 ) {
- # FIXME: eventually, type 'i' needs to be split into multiple
- # token types so this can be a hardwired rule.
- elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
- $ws = WS_YES;
- }
+ && $typel eq 'w'
+ && $typell eq 'k'
+ && $tokenll eq 'use' )
- # allow constant function followed by '()' to retain no space
- elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
- $ws = WS_NO;
- }
- }
+ # keep any space between filehandle and paren:
+ # file mangle.t with --mangle:
+ || ( $typel eq 'Y' && $tokenr eq '(' )
- # patch for SWITCH/CASE: make space at ']{' optional
- # since the '{' might begin a case or when block
- elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
- $ws = WS_OPTIONAL;
- }
+ # retain any space after here doc operator ( hereerr.t)
+ || ( $typel eq 'h' )
- # keep space between 'sub' and '{' for anonymous sub definition
- if ( $type eq '{' ) {
- if ( $last_token eq 'sub' ) {
- $ws = WS_YES;
- }
+ # be careful with a space around ++ and --, to avoid ambiguity as to
+ # which token it applies
+ || ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
+ || ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
- # this is needed to avoid no space in '){'
- if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
+ # need space after foreach my; for example, this will fail in
+ # older versions of Perl:
+ # foreach my$ft(@filetypes)...
+ || (
+ $tokenl eq 'my'
- # avoid any space before the brace or bracket in something like
- # @opts{'a','b',...}
- if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
- $ws = WS_NO;
- }
- }
+ # /^(for|foreach)$/
+ && $is_for_foreach{$tokenll}
+ && $tokenr =~ /^\$/
+ )
- elsif ( $type eq 'i' ) {
+ # must have space between grep and left paren; "grep(" will fail
+ || ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
- }
- }
+ # don't stick numbers next to left parens, as in:
+ #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
+ || ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
- # retain any space between '-' and bare word
- elsif ( $type eq 'w' || $type eq 'C' ) {
- $ws = WS_OPTIONAL if $last_type eq '-';
+ # We must be sure that a space between a ? and a quoted string
+ # remains if the space before the ? remains. [Loca.pm, lockarea]
+ # ie,
+ # $b=join $comma ? ',' : ':', @_; # ok
+ # $b=join $comma?',' : ':', @_; # ok!
+ # $b=join $comma ?',' : ':', @_; # error!
+ # Not really required:
+ ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
- # never a space before ->
- if ( $token =~ /^\-\>/ ) {
- $ws = WS_NO;
- }
- }
+ # do not remove space between an '&' and a bare word because
+ # it may turn into a function evaluation, like here
+ # between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
+ # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
+ || ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
- # retain any space between '-' and bare word
- # example: avoid space between 'USER' and '-' here:
- # $myhash{USER-NAME}='steve';
- elsif ( $type eq 'm' || $type eq '-' ) {
- $ws = WS_OPTIONAL if ( $last_type eq 'w' );
- }
+ # space stacked labels (TODO: check if really necessary)
+ || ( $typel eq 'J' && $typer eq 'J' )
- # always space before side comment
- elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
+ ; # the value of this long logic sequence is the result we want
+##if ($typel eq 'j') {print STDERR "typel=$typel typer=$typer result='$result'\n"}
+ return $result;
+ }
+}
- # always preserver whatever space was used after a possible
- # filehandle (except _) or here doc operator
- if (
- $type ne '#'
- && ( ( $last_type eq 'Z' && $last_token ne '_' )
- || $last_type eq 'h' )
- )
- {
- $ws = WS_OPTIONAL;
- }
+{
+ my %secret_operators;
+ my %is_leading_secret_token;
- my $ws_4 = $ws
- if FORMATTER_DEBUG_FLAG_WHITE;
+ BEGIN {
- #---------------------------------------------------------------
- # section 5:
- # default rules not covered above
- #---------------------------------------------------------------
- # if we fall through to here,
- # look at the pre-defined hash tables for the two tokens, and
- # if (they are equal) use the common value
- # if (either is zero or undef) use the other
- # if (either is -1) use it
- # That is,
- # left vs right
- # 1 vs 1 --> 1
- # 0 vs 0 --> 0
- # -1 vs -1 --> -1
- #
- # 0 vs -1 --> -1
- # 0 vs 1 --> 1
- # 1 vs 0 --> 1
- # -1 vs 0 --> -1
+ # token lists for perl secret operators as compiled by Philippe Bruhat
+ # at: https://metacpan.org/module/perlsecret
+ %secret_operators = (
+ 'Goatse' => [qw#= ( ) =#], #=( )=
+ 'Venus1' => [qw#0 +#], # 0+
+ 'Venus2' => [qw#+ 0#], # +0
+ 'Enterprise' => [qw#) x ! !#], # ()x!!
+ 'Kite1' => [qw#~ ~ <>#], # ~~<>
+ 'Kite2' => [qw#~~ <>#], # ~~<>
+ 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
+ 'Bang bang ' => [qw#! !#], # !!
+ );
+
+ # The following operators and constants are not included because they
+ # are normally kept tight by perltidy:
+ # ~~ <~>
#
- # -1 vs 1 --> -1
- # 1 vs -1 --> -1
- if ( !defined($ws) ) {
- my $wl = $want_left_space{$type};
- my $wr = $want_right_space{$last_type};
- if ( !defined($wl) ) { $wl = 0 }
- if ( !defined($wr) ) { $wr = 0 }
- $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
- }
- if ( !defined($ws) ) {
- $ws = 0;
- write_diagnostics(
- "WS flag is undefined for tokens $last_token $token\n");
+ # Make a lookup table indexed by the first token of each operator:
+ # first token => [list, list, ...]
+ foreach my $value ( values(%secret_operators) ) {
+ my $tok = $value->[0];
+ push @{ $is_leading_secret_token{$tok} }, $value;
}
+ }
- # Treat newline as a whitespace. Otherwise, we might combine
- # 'Send' and '-recipients' here according to the above rules:
- # my $msg = new Fax::Send
- # -recipients => $to,
- # -data => $data;
- if ( $ws == 0 && $j == 0 ) { $ws = 1 }
+ sub new_secret_operator_whitespace {
- if ( ( $ws == 0 )
- && $j > 0
- && $j < $jmax
- && ( $last_type !~ /^[Zh]$/ ) )
- {
+ my ( $rlong_array, $rwhitespace_flags ) = @_;
- # If this happens, we have a non-fatal but undesirable
- # hole in the above rules which should be patched.
- write_diagnostics(
- "WS flag is zero for tokens $last_token $token\n");
- }
- $white_space_flag[$j] = $ws;
+ # Loop over all tokens in this line
+ my ( $token, $type );
+ my $jmax = @{$rlong_array} - 1;
+ foreach my $j ( 0 .. $jmax ) {
- FORMATTER_DEBUG_FLAG_WHITE && do {
- my $str = substr( $last_token, 0, 15 );
- $str .= ' ' x ( 16 - length($str) );
- if ( !defined($ws_1) ) { $ws_1 = "*" }
- if ( !defined($ws_2) ) { $ws_2 = "*" }
- if ( !defined($ws_3) ) { $ws_3 = "*" }
- if ( !defined($ws_4) ) { $ws_4 = "*" }
- print
-"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
- };
- }
- return \@white_space_flag;
+ $token = $rlong_array->[$j]->[_TOKEN_];
+ $type = $rlong_array->[$j]->[_TYPE_];
+
+ # Skip unless this token might start a secret operator
+ next if ( $type eq 'b' );
+ next unless ( $is_leading_secret_token{$token} );
+
+ # Loop over all secret operators with this leading token
+ foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
+ my $jend = $j - 1;
+ foreach my $tok ( @{$rpattern} ) {
+ $jend++;
+ $jend++
+
+ if ( $jend <= $jmax
+ && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
+ if ( $jend > $jmax
+ || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
+ {
+ $jend = undef;
+ last;
+ }
+ }
+
+ if ($jend) {
+
+ # set flags to prevent spaces within this operator
+ foreach my $jj ( $j + 1 .. $jend ) {
+ $rwhitespace_flags->[$jj] = WS_NO;
+ }
+ $j = $jend;
+ last;
+ }
+ } ## End Loop over all operators
+ } ## End loop over all tokens
+ return;
+ } # End sub
}
-{ # begin print_line_of_tokens
+{ # begin print_line_of_tokens
- my $rtoken_type;
- my $rtokens;
- my $rlevels;
- my $rslevels;
- my $rblock_type;
- my $rcontainer_type;
- my $rcontainer_environment;
- my $rtype_sequence;
- my $input_line;
- my $rnesting_tokens;
- my $rci_levels;
- my $rnesting_blocks;
+ my $rinput_token_array; # Current working array
+ my $rinput_K_array; # Future working array
my $in_quote;
- my $python_indentation_level;
+ my $guessed_indentation_level;
+ # This should be a return variable from extract_token
# These local token variables are stored by store_token_to_go:
+ my $rtoken_vars;
+ my $Ktoken_vars;
my $block_type;
my $ci_level;
my $container_environment;
my $container_type;
my $in_continued_quote;
my $level;
- my $nesting_blocks;
my $no_internal_newlines;
my $slevel;
my $token;
# routine to pull the jth token from the line of tokens
sub extract_token {
- my $j = shift;
- $token = $$rtokens[$j];
- $type = $$rtoken_type[$j];
- $block_type = $$rblock_type[$j];
- $container_type = $$rcontainer_type[$j];
- $container_environment = $$rcontainer_environment[$j];
- $type_sequence = $$rtype_sequence[$j];
- $level = $$rlevels[$j];
- $slevel = $$rslevels[$j];
- $nesting_blocks = $$rnesting_blocks[$j];
- $ci_level = $$rci_levels[$j];
+ my ( $self, $j ) = @_;
+
+ my $rLL = $self->{rLL};
+ $Ktoken_vars = $rinput_K_array->[$j];
+ if ( !defined($Ktoken_vars) ) {
+
+ # Shouldn't happen: an error here would be due to a recent program change
+ Fault("undefined index K for j=$j");
+ }
+ $rtoken_vars = $rLL->[$Ktoken_vars];
+
+ if ( $rtoken_vars->[_TOKEN_] ne $rLL->[$Ktoken_vars]->[_TOKEN_] ) {
+
+ # Shouldn't happen: an error here would be due to a recent program change
+ Fault(<<EOM);
+ j=$j, K=$Ktoken_vars, '$rtoken_vars->[_TOKEN_]' ne '$rLL->[$Ktoken_vars]'
+EOM
+ }
+
+ #########################################################
+ # these are now redundant and can eventually be eliminated
+
+ $token = $rtoken_vars->[_TOKEN_];
+ $type = $rtoken_vars->[_TYPE_];
+ $block_type = $rtoken_vars->[_BLOCK_TYPE_];
+ $container_type = $rtoken_vars->[_CONTAINER_TYPE_];
+ $container_environment = $rtoken_vars->[_CONTAINER_ENVIRONMENT_];
+ $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
+ $level = $rtoken_vars->[_LEVEL_];
+ $slevel = $rtoken_vars->[_SLEVEL_];
+ $ci_level = $rtoken_vars->[_CI_LEVEL_];
+ #########################################################
+
+ return;
}
{
$block_type, $ci_level,
$container_environment, $container_type,
$in_continued_quote, $level,
- $nesting_blocks, $no_internal_newlines,
- $slevel, $token,
- $type, $type_sequence,
+ $no_internal_newlines, $slevel,
+ $token, $type,
+ $type_sequence, $rtoken_vars,
+ $Ktoken_vars,
);
+ return;
}
sub restore_current_token {
$block_type, $ci_level,
$container_environment, $container_type,
$in_continued_quote, $level,
- $nesting_blocks, $no_internal_newlines,
- $slevel, $token,
- $type, $type_sequence,
+ $no_internal_newlines, $slevel,
+ $token, $type,
+ $type_sequence, $rtoken_vars,
+ $Ktoken_vars,
) = @saved_token;
+ return;
}
}
+ sub token_length {
+
+ # Returns the length of a token, given:
+ # $token=text of the token
+ # $type = type
+ # $not_first_token = should be TRUE if this is not the first token of
+ # the line. It might the index of this token in an array. It is
+ # used to test for a side comment vs a block comment.
+ # Note: Eventually this should be the only routine determining the
+ # length of a token in this package.
+ my ( $token, $type, $not_first_token ) = @_;
+ my $token_length = length($token);
+
+ # We mark lengths of side comments as just 1 if we are
+ # ignoring their lengths when setting line breaks.
+ $token_length = 1
+ if ( $rOpts_ignore_side_comment_lengths
+ && $not_first_token
+ && $type eq '#' );
+ return $token_length;
+ }
+
+ sub rtoken_length {
+
+ # return length of ith token in @{$rtokens}
+ my ($i) = @_;
+ return token_length( $rinput_token_array->[$i]->[_TOKEN_],
+ $rinput_token_array->[$i]->[_TYPE_], $i );
+ }
+
# Routine to place the current token into the output stream.
# Called once per output token.
sub store_token_to_go {
- my $flag = $no_internal_newlines;
- if ( $_[0] ) { $flag = 1 }
+ my ( $self, $side_comment_follows ) = @_;
+
+ my $flag = $side_comment_follows ? 1 : $no_internal_newlines;
- $tokens_to_go[ ++$max_index_to_go ] = $token;
+ ++$max_index_to_go;
+ $K_to_go[$max_index_to_go] = $Ktoken_vars;
+ $rtoken_vars_to_go[$max_index_to_go] = $rtoken_vars;
+ $tokens_to_go[$max_index_to_go] = $token;
$types_to_go[$max_index_to_go] = $type;
$nobreak_to_go[$max_index_to_go] = $flag;
$old_breakpoint_to_go[$max_index_to_go] = 0;
$block_type_to_go[$max_index_to_go] = $block_type;
$type_sequence_to_go[$max_index_to_go] = $type_sequence;
$container_environment_to_go[$max_index_to_go] = $container_environment;
- $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$mate_index_to_go[$max_index_to_go] = -1;
$matching_token_to_go[$max_index_to_go] = '';
+ $bond_strength_to_go[$max_index_to_go] = 0;
# Note: negative levels are currently retained as a diagnostic so that
# the 'final indentation level' is correctly reported for bad scripts.
## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
$levels_to_go[$max_index_to_go] = $level;
$nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
- $lengths_to_go[ $max_index_to_go + 1 ] =
- $lengths_to_go[$max_index_to_go] + length($token);
+
+ # link the non-blank tokens
+ my $iprev = $max_index_to_go - 1;
+ $iprev-- if ( $iprev >= 0 && $types_to_go[$iprev] eq 'b' );
+ $iprev_to_go[$max_index_to_go] = $iprev;
+ $inext_to_go[$iprev] = $max_index_to_go
+ if ( $iprev >= 0 && $type ne 'b' );
+ $inext_to_go[$max_index_to_go] = $max_index_to_go + 1;
+
+ $token_lengths_to_go[$max_index_to_go] =
+ token_length( $token, $type, $max_index_to_go );
+
+ # We keep a running sum of token lengths from the start of this batch:
+ # summed_lengths_to_go[$i] = total length to just before token $i
+ # summed_lengths_to_go[$i+1] = total length to just after token $i
+ $summed_lengths_to_go[ $max_index_to_go + 1 ] =
+ $summed_lengths_to_go[$max_index_to_go] +
+ $token_lengths_to_go[$max_index_to_go];
# Define the indentation that this token would have if it started
# a new line. We have to do this now because we need to know this
# when considering one-line blocks.
set_leading_whitespace( $level, $ci_level, $in_continued_quote );
+ # remember previous nonblank tokens seen
if ( $type ne 'b' ) {
$last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
$last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
FORMATTER_DEBUG_FLAG_STORE && do {
my ( $a, $b, $c ) = caller();
- print
+ print STDOUT
"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
};
+ return;
}
sub insert_new_token_to_go {
# insert a new token into the output stream. use same level as
# previous token; assumes a character at max_index_to_go.
+ my $self = shift;
+ my @args = @_;
save_current_token();
- ( $token, $type, $slevel, $no_internal_newlines ) = @_;
+ ( $token, $type, $slevel, $no_internal_newlines ) = @args;
if ( $max_index_to_go == UNDEFINED_INDEX ) {
warning("code bug: bad call to insert_new_token_to_go\n");
# FIXME: it seems to be necessary to use the next, rather than
# previous, value of this variable when creating a new blank (align.t)
#my $slevel = $nesting_depth_to_go[$max_index_to_go];
- $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
$ci_level = $ci_levels_to_go[$max_index_to_go];
$container_environment = $container_environment_to_go[$max_index_to_go];
$in_continued_quote = 0;
$block_type = "";
$type_sequence = "";
- store_token_to_go();
+ $self->store_token_to_go();
restore_current_token();
return;
}
+ sub copy_hash {
+ my ($rold_token_hash) = @_;
+ my %new_token_hash =
+ map { $_, $rold_token_hash->{$_} } keys %{$rold_token_hash};
+ return \%new_token_hash;
+ }
+
+ sub copy_array {
+ my ($rold) = @_;
+ my @new = map { $_ } @{$rold};
+ return \@new;
+ }
+
+ sub copy_token_as_type {
+ my ( $rold_token, $type, $token ) = @_;
+ if ( $type eq 'b' ) {
+ $token = " " unless defined($token);
+ }
+ elsif ( $type eq 'q' ) {
+ $token = '' unless defined($token);
+ }
+ elsif ( $type eq '->' ) {
+ $token = '->' unless defined($token);
+ }
+ elsif ( $type eq ';' ) {
+ $token = ';' unless defined($token);
+ }
+ else {
+ Fault(
+"Programming error: copy_token_as has type $type but should be 'b' or 'q'"
+ );
+ }
+ my $rnew_token = copy_array($rold_token);
+ $rnew_token->[_TYPE_] = $type;
+ $rnew_token->[_TOKEN_] = $token;
+ $rnew_token->[_BLOCK_TYPE_] = '';
+ $rnew_token->[_CONTAINER_TYPE_] = '';
+ $rnew_token->[_CONTAINER_ENVIRONMENT_] = '';
+ $rnew_token->[_TYPE_SEQUENCE_] = '';
+ return $rnew_token;
+ }
+
+ sub boolean_equals {
+ my ( $val1, $val2 ) = @_;
+ return ( $val1 && $val2 || !$val1 && !$val2 );
+ }
+
sub print_line_of_tokens {
- my $line_of_tokens = shift;
+ my ( $self, $line_of_tokens ) = @_;
# This routine is called once per input line to process all of
# the tokens on that line. This is the first stage of
# processing. This routine decides if there should be
# whitespace between each pair of non-white tokens, so later
# routines only need to decide on any additional line breaks.
- # Any whitespace is initally a single space character. Later,
+ # Any whitespace is initially a single space character. Later,
# the vertical aligner may expand that to be multiple space
# characters if necessary for alignment.
- # extract input line number for error messages
$input_line_number = $line_of_tokens->{_line_number};
+ my $input_line = $line_of_tokens->{_line_text};
+ my $CODE_type = $line_of_tokens->{_code_type};
- $rtoken_type = $line_of_tokens->{_rtoken_type};
- $rtokens = $line_of_tokens->{_rtokens};
- $rlevels = $line_of_tokens->{_rlevels};
- $rslevels = $line_of_tokens->{_rslevels};
- $rblock_type = $line_of_tokens->{_rblock_type};
- $rcontainer_type = $line_of_tokens->{_rcontainer_type};
- $rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
- $rtype_sequence = $line_of_tokens->{_rtype_sequence};
- $input_line = $line_of_tokens->{_line_text};
- $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
- $rci_levels = $line_of_tokens->{_rci_levels};
- $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
-
- $in_continued_quote = $starting_in_quote =
- $line_of_tokens->{_starting_in_quote};
- $in_quote = $line_of_tokens->{_ending_in_quote};
- $ending_in_quote = $in_quote;
- $python_indentation_level =
- $line_of_tokens->{_python_indentation_level};
-
- my $j;
- my $j_next;
- my $jmax;
- my $next_nonblank_token;
- my $next_nonblank_token_type;
- my $rwhite_space_flag;
-
- $jmax = @$rtokens - 1;
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
- $no_internal_newlines = 1 - $rOpts_add_newlines;
- $is_static_block_comment = 0;
-
- # Handle a continued quote..
- if ($in_continued_quote) {
-
- # A line which is entirely a quote or pattern must go out
- # verbatim. Note: the \n is contained in $input_line.
- if ( $jmax <= 0 ) {
- if ( ( $input_line =~ "\t" ) ) {
- note_embedded_tab();
- }
- write_unindented_line("$input_line");
- $last_line_had_side_comment = 0;
- return;
- }
-
- # prior to version 20010406, perltidy had a bug which placed
- # continuation indentation before the last line of some multiline
- # quotes and patterns -- exactly the lines passing this way.
- # To help find affected lines in scripts run with these
- # versions, run with '-chk', and it will warn of any quotes or
- # patterns which might have been modified by these early
- # versions.
- if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
- warning(
-"-chk: please check this line for extra leading whitespace\n"
- );
- }
- }
-
- # Write line verbatim if we are in a formatting skip section
- if ($in_format_skipping_section) {
- write_unindented_line("$input_line");
- $last_line_had_side_comment = 0;
-
- # Note: extra space appended to comment simplifies pattern matching
- if ( $jmax == 0
- && $$rtoken_type[0] eq '#'
- && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
- {
- $in_format_skipping_section = 0;
- write_logfile_entry("Exiting formatting skip section\n");
- }
- return;
- }
-
- # See if we are entering a formatting skip section
- if ( $rOpts_format_skipping
- && $jmax == 0
- && $$rtoken_type[0] eq '#'
- && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
- {
- flush();
- $in_format_skipping_section = 1;
- write_logfile_entry("Entering formatting skip section\n");
- write_unindented_line("$input_line");
- $last_line_had_side_comment = 0;
- return;
- }
+ my $rK_range = $line_of_tokens->{_rK_range};
+ my ( $K_first, $K_last ) = @{$rK_range};
- # delete trailing blank tokens
- if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
+ my $rLL = $self->{rLL};
+ my $rbreak_container = $self->{rbreak_container};
- # Handle a blank line..
- if ( $jmax < 0 ) {
+ if ( !defined($K_first) ) {
- # For the 'swallow-optional-blank-lines' option, we delete all
- # old blank lines and let the blank line rules generate any
- # needed blanks.
- if ( !$rOpts_swallow_optional_blank_lines ) {
- flush();
- $file_writer_object->write_blank_code_line();
- $last_line_leading_type = 'b';
- }
- $last_line_had_side_comment = 0;
+ # Unexpected blank line..
+ # Calling routine was supposed to handle this
+ Perl::Tidy::Warn(
+"Programming Error: Unexpected Blank Line in print_line_of_tokens. Ignoring"
+ );
return;
}
- # see if this is a static block comment (starts with ## by default)
- my $is_static_block_comment_without_leading_space = 0;
- if ( $jmax == 0
- && $$rtoken_type[0] eq '#'
- && $rOpts->{'static-block-comments'}
- && $input_line =~ /$static_block_comment_pattern/o )
- {
- $is_static_block_comment = 1;
- $is_static_block_comment_without_leading_space =
- substr( $input_line, 0, 1 ) eq '#';
+ $no_internal_newlines = 1 - $rOpts_add_newlines;
+ my $is_comment =
+ ( $K_first == $K_last && $rLL->[$K_first]->[_TYPE_] eq '#' );
+ my $is_static_block_comment_without_leading_space =
+ $CODE_type eq 'SBCX';
+ $is_static_block_comment =
+ $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
+ my $is_hanging_side_comment = $CODE_type eq 'HSC';
+ my $is_VERSION_statement = $CODE_type eq 'VER';
+ if ($is_VERSION_statement) {
+ $saw_VERSION_in_this_file = 1;
+ $no_internal_newlines = 1;
}
- # create a hanging side comment if appropriate
- if (
- $jmax == 0
- && $$rtoken_type[0] eq '#' # only token is a comment
- && $last_line_had_side_comment # last line had side comment
- && $input_line =~ /^\s/ # there is some leading space
- && !$is_static_block_comment # do not make static comment hanging
- && $rOpts->{'hanging-side-comments'} # user is allowing this
- )
- {
-
- # We will insert an empty qw string at the start of the token list
- # to force this comment to be a side comment. The vertical aligner
- # should then line it up with the previous side comment.
- unshift @$rtoken_type, 'q';
- unshift @$rtokens, '';
- unshift @$rlevels, $$rlevels[0];
- unshift @$rslevels, $$rslevels[0];
- unshift @$rblock_type, '';
- unshift @$rcontainer_type, '';
- unshift @$rcontainer_environment, '';
- unshift @$rtype_sequence, '';
- unshift @$rnesting_tokens, $$rnesting_tokens[0];
- unshift @$rci_levels, $$rci_levels[0];
- unshift @$rnesting_blocks, $$rnesting_blocks[0];
- $jmax = 1;
+ # Add interline blank if any
+ my $last_old_nonblank_type = "b";
+ my $first_new_nonblank_type = "b";
+ my $first_new_nonblank_token = " ";
+ if ( $max_index_to_go >= 0 ) {
+ $last_old_nonblank_type = $types_to_go[$max_index_to_go];
+ $first_new_nonblank_type = $rLL->[$K_first]->[_TYPE_];
+ $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
+ if ( !$is_comment
+ && $types_to_go[$max_index_to_go] ne 'b'
+ && $K_first > 0
+ && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
+ {
+ $K_first -= 1;
+ }
}
- # remember if this line has a side comment
- $last_line_had_side_comment =
- ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
+ # Copy the tokens into local arrays
+ $rinput_token_array = [];
+ $rinput_K_array = [];
+ $rinput_K_array = [ ( $K_first .. $K_last ) ];
+ $rinput_token_array = [ map { $rLL->[$_] } @{$rinput_K_array} ];
+ my $jmax = @{$rinput_K_array} - 1;
+
+ $in_continued_quote = $starting_in_quote =
+ $line_of_tokens->{_starting_in_quote};
+ $in_quote = $line_of_tokens->{_ending_in_quote};
+ $ending_in_quote = $in_quote;
+ $guessed_indentation_level =
+ $line_of_tokens->{_guessed_indentation_level};
+
+ my $j_next;
+ my $next_nonblank_token;
+ my $next_nonblank_token_type;
+
+ $block_type = "";
+ $container_type = "";
+ $container_environment = "";
+ $type_sequence = "";
+ ######################################
# Handle a block (full-line) comment..
- if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
+ ######################################
+ if ($is_comment) {
if ( $rOpts->{'delete-block-comments'} ) { return }
}
destroy_one_line_block();
- output_line_to_go();
+ $self->output_line_to_go();
# output a blank line before block comments
if (
- $last_line_leading_type !~ /^[#b]$/
- && $rOpts->{'blanks-before-comments'} # only if allowed
- && !
- $is_static_block_comment # never before static block comments
+ # unless we follow a blank or comment line
+ $last_line_leading_type !~ /^[#b]$/
+
+ # only if allowed
+ && $rOpts->{'blanks-before-comments'}
+
+ # if this is NOT an empty comment line
+ && $rinput_token_array->[0]->[_TOKEN_] ne '#'
+
+ # not after a short line ending in an opening token
+ # because we already have space above this comment.
+ # Note that the first comment in this if block, after
+ # the 'if (', does not get a blank line because of this.
+ && !$last_output_short_opening_token
+
+ # never before static block comments
+ && !$is_static_block_comment
)
{
- flush(); # switching to new output stream
+ $self->flush(); # switching to new output stream
$file_writer_object->write_blank_code_line();
$last_line_leading_type = 'b';
}
# TRIM COMMENTS -- This could be turned off as a option
- $$rtokens[0] =~ s/\s*$//; # trim right end
+ $rinput_token_array->[0]->[_TOKEN_] =~ s/\s*$//; # trim right end
if (
$rOpts->{'indent-block-comments'}
- && ( !$rOpts->{'indent-spaced-block-comments'}
+ && ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
{
- extract_token(0);
- store_token_to_go();
- output_line_to_go();
+ $self->extract_token(0);
+ $self->store_token_to_go();
+ $self->output_line_to_go();
}
else {
- flush(); # switching to new output stream
- $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
+ $self->flush(); # switching to new output stream
+ $file_writer_object->write_code_line(
+ $rinput_token_array->[0]->[_TOKEN_] . "\n" );
$last_line_leading_type = '#';
}
if ( $rOpts->{'tee-block-comments'} ) {
return;
}
+ # TODO: Move to sub scan_comments
# compare input/output indentation except for continuation lines
# (because they have an unknown amount of initial blank space)
# and lines which are quotes (because they may have been outdented)
# Note: this test is placed here because we know the continuation flag
# at this point, which allows us to avoid non-meaningful checks.
- my $structural_indentation_level = $$rlevels[0];
- compare_indentation_levels( $python_indentation_level,
+ my $structural_indentation_level = $rinput_token_array->[0]->[_LEVEL_];
+ compare_indentation_levels( $guessed_indentation_level,
$structural_indentation_level )
- unless ( $python_indentation_level < 0
- || ( $$rci_levels[0] > 0 )
- || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
- );
-
- # Patch needed for MakeMaker. Do not break a statement
- # in which $VERSION may be calculated. See MakeMaker.pm;
- # this is based on the coding in it.
- # The first line of a file that matches this will be eval'd:
- # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- # Examples:
- # *VERSION = \'1.01';
- # ( $VERSION ) = '$Revision: 1.61 $ ' =~ /\$Revision:\s+([^\s]+)/;
- # We will pass such a line straight through without breaking
- # it unless -npvl is used
-
- my $is_VERSION_statement = 0;
+ unless ( $is_hanging_side_comment
+ || $rinput_token_array->[0]->[_CI_LEVEL_] > 0
+ || $guessed_indentation_level == 0
+ && $rinput_token_array->[0]->[_TYPE_] eq 'Q' );
- if (
- !$saw_VERSION_in_this_file
- && $input_line =~ /VERSION/ # quick check to reject most lines
- && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
- )
- {
- $saw_VERSION_in_this_file = 1;
- $is_VERSION_statement = 1;
- write_logfile_entry("passing VERSION line; -npvl deactivates\n");
- $no_internal_newlines = 1;
- }
+ ##########################
+ # Handle indentation-only
+ ##########################
- # take care of indentation-only
# NOTE: In previous versions we sent all qw lines out immediately here.
# No longer doing this: also write a line which is entirely a 'qw' list
# to allow stacking of opening and closing tokens. Note that interior
# qw lines will still go out at the end of this routine.
- if ( $rOpts->{'indent-only'} ) {
- flush();
- trim($input_line);
+ ##if ( $rOpts->{'indent-only'} ) {
+ if ( $CODE_type eq 'IO' ) {
+ $self->flush();
+ my $line = $input_line;
+
+ # delete side comments if requested with -io, but
+ # we will not allow deleting of closing side comments with -io
+ # because the coding would be more complex
+ if ( $rOpts->{'delete-side-comments'}
+ && $rinput_token_array->[$jmax]->[_TYPE_] eq '#' )
+ {
+
+ $line = "";
+ foreach my $jj ( 0 .. $jmax - 1 ) {
+ $line .= $rinput_token_array->[$jj]->[_TOKEN_];
+ }
+ }
+ $line = trim($line);
- extract_token(0);
- $token = $input_line;
+ $self->extract_token(0);
+ $token = $line;
$type = 'q';
$block_type = "";
$container_type = "";
$container_environment = "";
$type_sequence = "";
- store_token_to_go();
- output_line_to_go();
+ $self->store_token_to_go();
+ $self->output_line_to_go();
return;
}
- push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
- push( @$rtoken_type, 'b', 'b' );
- ($rwhite_space_flag) =
- set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
-
- # find input tabbing to allow checks for tabbing disagreement
- ## not used for now
- ##$input_line_tabbing = "";
- ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
-
- # if the buffer hasn't been flushed, add a leading space if
- # necessary to keep essential whitespace. This is really only
- # necessary if we are squeezing out all ws.
- if ( $max_index_to_go >= 0 ) {
-
- $old_line_count_in_batch++;
+ ############################
+ # Handle all other lines ...
+ ############################
- if (
- is_essential_whitespace(
- $last_last_nonblank_token,
- $last_last_nonblank_type,
- $tokens_to_go[$max_index_to_go],
- $types_to_go[$max_index_to_go],
- $$rtokens[0],
- $$rtoken_type[0]
- )
- )
- {
- my $slevel = $$rslevels[0];
- insert_new_token_to_go( ' ', 'b', $slevel,
- $no_internal_newlines );
- }
- }
+ #######################################################
+ # FIXME: this should become unnecessary
+ # making $j+2 valid simplifies coding
+ my $rnew_blank =
+ copy_token_as_type( $rinput_token_array->[$jmax], 'b' );
+ push @{$rinput_token_array}, $rnew_blank;
+ push @{$rinput_token_array}, $rnew_blank;
+ #######################################################
# If we just saw the end of an elsif block, write nag message
# if we do not see another elseif or an else.
if ($looking_for_else) {
- unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
+ unless ( $rinput_token_array->[0]->[_TOKEN_] =~ /^(elsif|else)$/ ) {
write_logfile_entry("(No else block)\n");
}
$looking_for_else = 0;
}
# This is a good place to kill incomplete one-line blocks
- if ( ( $semicolons_before_block_self_destruct == 0 )
- && ( $max_index_to_go >= 0 )
- && ( $types_to_go[$max_index_to_go] eq ';' )
- && ( $$rtokens[0] ne '}' ) )
+ if (
+ (
+ ( $semicolons_before_block_self_destruct == 0 )
+ && ( $max_index_to_go >= 0 )
+ && ( $last_old_nonblank_type eq ';' )
+ && ( $first_new_nonblank_token ne '}' )
+ )
+
+ # Patch for RT #98902. Honor request to break at old commas.
+ || ( $rOpts_break_at_old_comma_breakpoints
+ && $max_index_to_go >= 0
+ && $last_old_nonblank_type eq ',' )
+ )
{
+ $forced_breakpoint_to_go[$max_index_to_go] = 1
+ if ($rOpts_break_at_old_comma_breakpoints);
destroy_one_line_block();
- output_line_to_go();
+ $self->output_line_to_go();
}
# loop to process the tokens one-by-one
$type = 'b';
$token = "";
- foreach $j ( 0 .. $jmax ) {
+ # We do not want a leading blank if the previous batch just got output
+ my $jmin = 0;
+ if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
+ $jmin = 1;
+ }
+
+ foreach my $j ( $jmin .. $jmax ) {
# pull out the local values for this token
- extract_token($j);
+ $self->extract_token($j);
if ( $type eq '#' ) {
if ( $rbrace_follower && $type ne 'b' ) {
unless ( $rbrace_follower->{$token} ) {
- output_line_to_go();
+ $self->output_line_to_go();
}
$rbrace_follower = undef;
}
- $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
- $next_nonblank_token = $$rtokens[$j_next];
- $next_nonblank_token_type = $$rtoken_type[$j_next];
-
- #--------------------------------------------------------
- # Start of section to patch token text
- #--------------------------------------------------------
-
- # Modify certain tokens here for whitespace
- # The following is not yet done, but could be:
- # sub (x x x)
- if ( $type =~ /^[wit]$/ ) {
-
- # Examples:
- # change '$ var' to '$var' etc
- # '-> new' to '->new'
- if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
- $token =~ s/\s*//g;
- }
-
- if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
- }
-
- # change 'LABEL :' to 'LABEL:'
- elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
-
- # patch to add space to something like "x10"
- # This avoids having to split this token in the pre-tokenizer
- elsif ( $type eq 'n' ) {
- if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
- }
+ $j_next =
+ ( $rinput_token_array->[ $j + 1 ]->[_TYPE_] eq 'b' )
+ ? $j + 2
+ : $j + 1;
+ $next_nonblank_token = $rinput_token_array->[$j_next]->[_TOKEN_];
+ $next_nonblank_token_type =
+ $rinput_token_array->[$j_next]->[_TYPE_];
- elsif ( $type eq 'Q' ) {
+ ######################
+ # MAYBE MOVE ELSEWHERE?
+ ######################
+ if ( $type eq 'Q' ) {
note_embedded_tab() if ( $token =~ "\t" );
# make note of something like '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
if (
- $token =~ /^(s|tr|y|m|\/)/
+ $token =~ /^(s|tr|y|m|\/)/
&& $last_nonblank_token =~ /^(=|==|!=)$/
- # precededed by simple scalar
+ # preceded by simple scalar
&& $last_last_nonblank_type eq 'i'
&& $last_last_nonblank_token =~ /^\$/
# (but give complaint if we can's see far enough ahead)
&& $next_nonblank_token =~ /^[; \)\}]$/
- # scalar is not decleared
+ # scalar is not declared
&& !(
$types_to_go[0] eq 'k'
&& $tokens_to_go[0] =~ /^(my|our|local)$/
}
}
- # trim blanks from right of qw quotes
- # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
- elsif ( $type eq 'q' ) {
- $token =~ s/\s*$//;
- note_embedded_tab() if ( $token =~ "\t" );
- }
-
- #--------------------------------------------------------
- # End of section to patch token text
- #--------------------------------------------------------
-
- # insert any needed whitespace
- if ( ( $type ne 'b' )
- && ( $max_index_to_go >= 0 )
- && ( $types_to_go[$max_index_to_go] ne 'b' )
- && $rOpts_add_whitespace )
- {
- my $ws = $$rwhite_space_flag[$j];
-
- if ( $ws == 1 ) {
- insert_new_token_to_go( ' ', 'b', $slevel,
- $no_internal_newlines );
- }
- }
-
# Do not allow breaks which would promote a side comment to a
# block comment. In order to allow a break before an opening
# or closing BLOCK, followed by a side comment, those sections
# Tentatively output this token. This is required before
# calling starting_one_line_block. We may have to unstore
# it, though, if we have to break before it.
- store_token_to_go($side_comment_follows);
+ $self->store_token_to_go($side_comment_follows);
+
+ # Look ahead to see if we might form a one-line block..
+ my $too_long = 0;
- # Look ahead to see if we might form a one-line block
- my $too_long =
- starting_one_line_block( $j, $jmax, $level, $slevel,
- $ci_level, $rtokens, $rtoken_type, $rblock_type );
+ # But obey any flag set for cuddled blocks
+ if ( $rbreak_container->{$type_sequence} ) {
+ destroy_one_line_block();
+ }
+ else {
+ $too_long =
+ starting_one_line_block( $j, $jmax, $level, $slevel,
+ $ci_level, $rinput_token_array );
+ }
clear_breakpoint_undo_stack();
# to simplify the logic below, set a flag to indicate if
my $want_break =
# use -bl flag if not a sub block of any type
- $block_type !~ /^sub/
+ $block_type !~ /^sub\b/
? $rOpts->{'opening-brace-on-new-line'}
- # use -sbl flag unless this is an anonymous sub block
- : $block_type !~ /^sub\W*$/
+ # use -sbl flag for a named sub block
+ : $block_type !~ /$ASUB_PATTERN/
? $rOpts->{'opening-sub-brace-on-new-line'}
- # do not break for anonymous subs
- : 0;
+ # use -asbl flag for an anonymous sub block
+ : $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
+
+ # Do not break if this token is welded to the left
+ if ( weld_len_left( $type_sequence, $token ) ) {
+ $want_break = 0;
+ }
# Break before an opening '{' ...
if (
unless ($no_internal_newlines) {
# since we already stored this token, we must unstore it
- unstore_token_to_go();
+ $self->unstore_token_to_go();
# then output the line
- output_line_to_go();
+ $self->output_line_to_go();
# and now store this token at the start of a new line
- store_token_to_go($side_comment_follows);
+ $self->store_token_to_go($side_comment_follows);
}
}
# now output this line
unless ($no_internal_newlines) {
- output_line_to_go();
+ $self->output_line_to_go();
}
}
# we have to terminate it if..
if (
- # it is too long (final length may be different from
- # initial estimate). note: must allow 1 space for this token
+ # it is too long (final length may be different from
+ # initial estimate). note: must allow 1 space for this
+ # token
excess_line_length( $index_start_one_line_block,
$max_index_to_go ) >= 0
|| $index_start_one_line_block != UNDEFINED_INDEX )
{
- # add missing semicolon if ...
- # there are some tokens
- if (
- ( $max_index_to_go > 0 )
-
- # and we don't have one
- && ( $last_nonblank_type ne ';' )
-
- # patch until some block type issues are fixed:
- # Do not add semi-colon for block types '{',
- # '}', and ';' because we cannot be sure yet
- # that this is a block and not an anonomyous
- # hash (blktype.t, blktype1.t)
- && ( $block_type !~ /^[\{\};]$/ )
-
- # it seems best not to add semicolons in these
- # special block types: sort|map|grep
- && ( !$is_sort_map_grep{$block_type} )
-
- # and we are allowed to do so.
- && $rOpts->{'add-semicolons'}
- )
- {
-
- save_current_token();
- $token = ';';
- $type = ';';
- $level = $levels_to_go[$max_index_to_go];
- $slevel = $nesting_depth_to_go[$max_index_to_go];
- $nesting_blocks =
- $nesting_blocks_to_go[$max_index_to_go];
- $ci_level = $ci_levels_to_go[$max_index_to_go];
- $block_type = "";
- $container_type = "";
- $container_environment = "";
- $type_sequence = "";
-
- # Note - we remove any blank AFTER extracting its
- # parameters such as level, etc, above
- if ( $types_to_go[$max_index_to_go] eq 'b' ) {
- unstore_token_to_go();
- }
- store_token_to_go();
-
- note_added_semicolon();
- restore_current_token();
- }
-
- # then write out everything before this closing curly brace
- output_line_to_go();
-
+ # write out everything before this closing curly brace
+ $self->output_line_to_go();
}
# Now update for side comment
if ($side_comment_follows) { $no_internal_newlines = 1 }
# store the closing curly brace
- store_token_to_go();
+ $self->store_token_to_go();
# ok, we just stored a closing curly brace. Often, but
# not always, we want to end the line immediately.
# But make a line break if the curly ends a
# significant block:
if (
- $is_block_without_semicolon{$block_type}
+ (
+ $is_block_without_semicolon{$block_type}
+
+ # Follow users break point for
+ # one line block types U & G, such as a 'try' block
+ || $is_one_line_block =~ /^[UG]$/ && $j == $jmax
+ )
# if needless semicolon follows we handle it later
&& $next_nonblank_token ne ';'
)
{
- output_line_to_go() unless ($no_internal_newlines);
+ $self->output_line_to_go()
+ unless ($no_internal_newlines);
}
}
}
# anonymous sub
- elsif ( $block_type =~ /^sub\W*$/ ) {
+ elsif ( $block_type =~ /$ASUB_PATTERN/ ) {
if ($is_one_line_block) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
&& $rOpts_add_newlines )
{
unless ($rbrace_follower) {
- output_line_to_go() unless ($no_internal_newlines);
+ $self->output_line_to_go()
+ unless ($no_internal_newlines);
}
}
elsif ($rbrace_follower) {
unless ( $rbrace_follower->{$next_nonblank_token} ) {
- output_line_to_go() unless ($no_internal_newlines);
+ $self->output_line_to_go()
+ unless ($no_internal_newlines);
}
$rbrace_follower = undef;
}
else {
- output_line_to_go() unless ($no_internal_newlines);
+ $self->output_line_to_go() unless ($no_internal_newlines);
}
} # end treatment of closing block token
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
- || $last_nonblank_block_type =~ /^sub\s+\w/
+ || $last_nonblank_block_type =~ /$SUB_PATTERN/
|| $last_nonblank_block_type =~ /^\w+:$/ )
)
|| $last_nonblank_type eq ';'
)
{
note_deleted_semicolon();
- output_line_to_go()
+ $self->output_line_to_go()
unless ( $no_internal_newlines
|| $index_start_one_line_block != UNDEFINED_INDEX );
next;
write_logfile_entry("Extra ';'\n");
}
}
- store_token_to_go();
+ $self->store_token_to_go();
- output_line_to_go()
+ $self->output_line_to_go()
unless ( $no_internal_newlines
+ || ( $rOpts_keep_interior_semicolons && $j < $jmax )
|| ( $next_nonblank_token eq '}' ) );
}
# handle here_doc target string
elsif ( $type eq 'h' ) {
- $no_internal_newlines =
- 1; # no newlines after seeing here-target
+
+ # no newlines after seeing here-target
+ $no_internal_newlines = 1;
destroy_one_line_block();
- store_token_to_go();
+ $self->store_token_to_go();
}
# handle all other token types
else {
- # if this is a blank...
- if ( $type eq 'b' ) {
-
- # make it just one character
- $token = ' ' if $rOpts_add_whitespace;
-
- # delete it if unwanted by whitespace rules
- # or we are deleting all whitespace
- my $ws = $$rwhite_space_flag[ $j + 1 ];
- if ( ( defined($ws) && $ws == -1 )
- || $rOpts_delete_old_whitespace )
- {
-
- # unless it might make a syntax error
- next
- unless is_essential_whitespace(
- $last_last_nonblank_token,
- $last_last_nonblank_type,
- $tokens_to_go[$max_index_to_go],
- $types_to_go[$max_index_to_go],
- $$rtokens[ $j + 1 ],
- $$rtoken_type[ $j + 1 ]
- );
- }
- }
- store_token_to_go();
+ $self->store_token_to_go();
}
# remember two previous nonblank OUTPUT tokens
# if this is a VERSION statement
|| $is_VERSION_statement
- # to keep a label on one line if that is how it is now
- || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
+ # to keep a label at the end of a line
+ || $type eq 'J'
# if we are instructed to keep all old line breaks
|| !$rOpts->{'delete-old-newlines'}
)
{
destroy_one_line_block();
- output_line_to_go();
+ $self->output_line_to_go();
}
# mark old line breakpoints in current output stream
if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
- $old_breakpoint_to_go[$max_index_to_go] = 1;
+ my $jobp = $max_index_to_go;
+ if ( $types_to_go[$max_index_to_go] eq 'b' && $max_index_to_go > 0 )
+ {
+ $jobp--;
+ }
+ $old_breakpoint_to_go[$jobp] = 1;
}
- } # end sub print_line_of_tokens
-} # end print_line_of_tokens
+ return;
+ } ## end sub print_line_of_tokens
+} ## end block print_line_of_tokens
# sub output_line_to_go sends one logical line of tokens on down the
# pipeline to the VerticalAligner package, breaking the line into continuation
# arrays.
sub output_line_to_go {
+ my $self = shift;
+
# debug stuff; this routine can be called from many points
FORMATTER_DEBUG_FLAG_OUTPUT && do {
my ( $a, $b, $c ) = caller;
write_diagnostics("$output_str\n");
};
+ # Do not end line in a weld
+ # TODO: Move this fix into the routine?
+ #my $jnb = $max_index_to_go;
+ #if ( $jnb > 0 && $types_to_go[$jnb] eq 'b' ) { $jnb-- }
+ return if ( weld_len_right_to_go($max_index_to_go) );
+
# just set a tentative breakpoint if we might be in a one-line block
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
set_forced_breakpoint($max_index_to_go);
}
my $cscw_block_comment;
- $cscw_block_comment = add_closing_side_comment()
+ $cscw_block_comment = $self->add_closing_side_comment()
if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
- match_opening_and_closing_tokens();
+ my $comma_arrow_count_contained = match_opening_and_closing_tokens();
# tell the -lp option we are outputting a batch so it can close
# any unfinished items in its stack
# anything left to write?
if ( $imin <= $imax ) {
- # add a blank line before certain key types
- if ( $last_line_leading_type !~ /^[#b]/ ) {
+ # add a blank line before certain key types but not after a comment
+ if ( $last_line_leading_type !~ /^[#]/ ) {
my $want_blank = 0;
my $leading_token = $tokens_to_go[$imin];
my $leading_type = $types_to_go[$imin];
# blank lines before subs except declarations and one-liners
# MCONVERSION LOCATION - for sub tokenization change
if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} )
- && (
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if (
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
$imax ) !~ /^[\;\}]$/
);
elsif ($leading_token =~ /^(package\s)/
&& $leading_type eq 'i' )
{
- $want_blank = ( $rOpts->{'blanks-before-subs'} );
+ $want_blank = $rOpts->{'blank-lines-before-packages'};
}
# break before certain key blocks except one-liners
if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
- $want_blank = ( $rOpts->{'blanks-before-subs'} )
- && (
+ $want_blank = $rOpts->{'blank-lines-before-subs'}
+ if (
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
$imax ) ne '}'
);
# Break before certain block types if we haven't had a
# break at this level for a while. This is the
# difficult decision..
- elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
- && $leading_type eq 'k' )
+ elsif ($leading_type eq 'k'
+ && $last_line_leading_type ne 'b'
+ && $leading_token =~ /^(unless|if|while|until|for|foreach)$/ )
{
my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
if ( !defined($lc) ) { $lc = 0 }
- $want_blank = $rOpts->{'blanks-before-blocks'}
+ $want_blank =
+ $rOpts->{'blanks-before-blocks'}
&& $lc >= $rOpts->{'long-block-line-count'}
&& $file_writer_object->get_consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
);
}
+ # Check for blank lines wanted before a closing brace
+ if ( $leading_token eq '}' ) {
+ if ( $rOpts->{'blank-lines-before-closing-block'}
+ && $block_type_to_go[$imin]
+ && $block_type_to_go[$imin] =~
+ /$blank_lines_before_closing_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
+ if ( $nblanks > $want_blank ) {
+ $want_blank = $nblanks;
+ }
+ }
+ }
+
if ($want_blank) {
# future: send blank line down normal path to VerticalAligner
Perl::Tidy::VerticalAligner::flush();
- $file_writer_object->write_blank_code_line();
+ $file_writer_object->require_blank_code_lines($want_blank);
}
}
FORMATTER_DEBUG_FLAG_FLUSH && do {
my ( $package, $file, $line ) = caller;
- print
+ print STDOUT
"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
};
# set all forced breakpoints for good list formatting
my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
+ my $old_line_count_in_batch =
+ $rtoken_vars_to_go[$max_index_to_go]->[_LINE_INDEX_] -
+ $rtoken_vars_to_go[0]->[_LINE_INDEX_] + 1;
+
if (
- $max_index_to_go > 0
- && (
- $is_long_line
- || $old_line_count_in_batch > 1
- || is_unbalanced_batch()
- || (
- $comma_count_in_batch
- && ( $rOpts_maximum_fields_per_table > 0
- || $rOpts_comma_arrow_breakpoints == 0 )
- )
+ $is_long_line
+ || $old_line_count_in_batch > 1
+
+ # must always call scan_list() with unbalanced batches because it
+ # is maintaining some stacks
+ || is_unbalanced_batch()
+
+ # call scan_list if we might want to break at commas
+ || (
+ $comma_count_in_batch
+ && ( $rOpts_maximum_fields_per_table > 0
+ || $rOpts_comma_arrow_breakpoints == 0 )
)
+
+ # call scan_list if user may want to break open some one-line
+ # hash references
+ || ( $comma_arrow_count_contained
+ && $rOpts_comma_arrow_breakpoints != 3 )
)
{
- $saw_good_break ||= scan_list();
+ ## This caused problems in one version of perl for unknown reasons:
+ ## $saw_good_break ||= scan_list();
+ my $sgb = scan_list();
+ $saw_good_break ||= $sgb;
}
# let $ri_first and $ri_last be references to lists of
)
)
{
- @$ri_first = ($imin);
- @$ri_last = ($imax);
+ @{$ri_first} = ($imin);
+ @{$ri_last} = ($imax);
}
# otherwise use multiple lines
else {
- ( $ri_first, $ri_last ) = set_continuation_breaks($saw_good_break);
+ ( $ri_first, $ri_last, my $colon_count ) =
+ set_continuation_breaks($saw_good_break);
break_all_chain_tokens( $ri_first, $ri_last );
+ break_equals( $ri_first, $ri_last );
+
# now we do a correction step to clean this up a bit
# (The only time we would not do this is for debugging)
if ( $rOpts->{'recombine'} ) {
( $ri_first, $ri_last ) =
recombine_breakpoints( $ri_first, $ri_last );
}
+
+ insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
}
# do corrector step if -lp option is used
if ($rOpts_line_up_parentheses) {
$do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
}
- send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
+ $self->unmask_phantom_semicolons( $ri_first, $ri_last );
+ $self->send_lines_to_vertical_aligner( $ri_first, $ri_last,
+ $do_not_pad );
+
+ # Insert any requested blank lines after an opening brace. We have to
+ # skip back before any side comment to find the terminal token
+ my $iterm;
+ for ( $iterm = $imax ; $iterm >= $imin ; $iterm-- ) {
+ next if $types_to_go[$iterm] eq '#';
+ next if $types_to_go[$iterm] eq 'b';
+ last;
+ }
+
+ # write requested number of blank lines after an opening block brace
+ if ( $iterm >= $imin && $types_to_go[$iterm] eq '{' ) {
+ if ( $rOpts->{'blank-lines-after-opening-block'}
+ && $block_type_to_go[$iterm]
+ && $block_type_to_go[$iterm] =~
+ /$blank_lines_after_opening_block_pattern/ )
+ {
+ my $nblanks = $rOpts->{'blank-lines-after-opening-block'};
+ Perl::Tidy::VerticalAligner::flush();
+ $file_writer_object->require_blank_code_lines($nblanks);
+ }
+ }
}
+
prepare_for_new_input_lines();
# output any new -cscw block comment
if ($cscw_block_comment) {
- flush();
+ $self->flush();
$file_writer_object->write_code_line( $cscw_block_comment . "\n" );
}
+ return;
}
sub note_added_semicolon {
- $last_added_semicolon_at = $input_line_number;
+ my ($line_number) = @_;
+ $last_added_semicolon_at = $line_number;
if ( $added_semicolon_count == 0 ) {
$first_added_semicolon_at = $last_added_semicolon_at;
}
$added_semicolon_count++;
write_logfile_entry("Added ';' here\n");
+ return;
}
sub note_deleted_semicolon {
}
$deleted_semicolon_count++;
write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
+ return;
}
sub note_embedded_tab {
if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
write_logfile_entry("Embedded tabs in quote or pattern\n");
}
+ return;
}
sub starting_one_line_block {
# though, because otherwise we would always break at a semicolon
# within a one-line block if the block contains multiple statements.
- my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
- $rblock_type )
- = @_;
+ my ( $j, $jmax, $level, $slevel, $ci_level, $rtoken_array ) = @_;
+
+ my $jmax_check = @{$rtoken_array};
+ if ( $jmax_check < $jmax ) {
+ print STDERR "jmax=$jmax > $jmax_check\n";
+ }
# kill any current block - we can only go 1 deep
destroy_one_line_block();
}
else {
- # cannot use one-line blocks with cuddled else else/elsif lines
+ # cannot use one-line blocks with cuddled else/elsif lines
if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
return 0;
}
}
- my $block_type = $$rblock_type[$j];
+ my $block_type = $rtoken_array->[$j]->[_BLOCK_TYPE_];
# find the starting keyword for this block (such as 'if', 'else', ...)
- if ( $block_type =~ /^[\{\}\;\:]$/ ) {
+ if ( $block_type =~ /^[\{\}\;\:]$/ || $block_type =~ /^package/ ) {
$i_start = $max_index_to_go;
}
+ # the previous nonblank token should start these block types
+ elsif (( $last_last_nonblank_token_to_go eq $block_type )
+ || ( $block_type =~ /^sub\b/ )
+ || $block_type =~ /\(\)/ )
+ {
+ $i_start = $last_last_nonblank_index_to_go;
+
+ # For signatures and extended syntax ...
+ # If this brace follows a parenthesized list, we should look back to
+ # find the keyword before the opening paren because otherwise we might
+ # form a one line block which stays intack, and cause the parenthesized
+ # expression to break open. That looks bad. However, actually
+ # searching for the opening paren is slow and tedius.
+ # The actual keyword is often at the start of a line, but might not be.
+ # For example, we might have an anonymous sub with signature list
+ # following a =>. It is safe to mark the start anywhere before the
+ # opening paren, so we just go back to the prevoious break (or start of
+ # the line) if that is before the opening paren. The minor downside is
+ # that we may very occasionally break open a block unnecessarily.
+ if ( $tokens_to_go[$i_start] eq ')' ) {
+ $i_start = $index_max_forced_break + 1;
+ if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; }
+ my $lev = $levels_to_go[$i_start];
+ if ( $lev > $level ) { return 0 }
+ }
+ }
+
elsif ( $last_last_nonblank_token_to_go eq ')' ) {
# For something like "if (xxx) {", the keyword "if" will be
# just after the most recent break. This will be 0 unless
# we have just killed a one-line block and are starting another.
# (doif.t)
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
- unless ( $tokens_to_go[$i_start] eq $block_type ) {
+ # Patch to avoid breaking short blocks defined with extended_syntax:
+ # Strip off any trailing () which was added in the parser to mark
+ # the opening keyword. For example, in the following
+ # create( TypeFoo $e) {$bubba}
+ # the blocktype would be marked as create()
+ my $stripped_block_type = $block_type;
+ $stripped_block_type =~ s/\(\)$//;
+
+ unless ( $tokens_to_go[$i_start] eq $stripped_block_type ) {
return 0;
}
}
- # the previous nonblank token should start these block types
- elsif (
- ( $last_last_nonblank_token_to_go eq $block_type )
- || ( $block_type =~ /^sub/
- && $last_last_nonblank_token_to_go =~ /^sub/ )
- )
- {
- $i_start = $last_last_nonblank_index_to_go;
- }
-
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
+
+ # Note: cannot use inext_index_to_go[] here because that array
+ # is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
- my $i;
-
# see if length is too long to even start
- if ( $pos > $rOpts_maximum_line_length ) {
+ if ( $pos > maximum_line_length($i_start) ) {
return 1;
}
- for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
+ foreach my $i ( $j + 1 .. $jmax ) {
# old whitespace could be arbitrarily large, so don't use it
- if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
- else { $pos += length( $$rtokens[$i] ) }
+ if ( $rtoken_array->[$i]->[_TYPE_] eq 'b' ) { $pos += 1 }
+ else { $pos += rtoken_length($i) }
# Return false result if we exceed the maximum line length,
- if ( $pos > $rOpts_maximum_line_length ) {
+ if ( $pos > maximum_line_length($i_start) ) {
return 0;
}
# or encounter another opening brace before finding the closing brace.
- elsif ($$rtokens[$i] eq '{'
- && $$rtoken_type[$i] eq '{'
- && $$rblock_type[$i] )
+ elsif ($rtoken_array->[$i]->[_TOKEN_] eq '{'
+ && $rtoken_array->[$i]->[_TYPE_] eq '{'
+ && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
{
return 0;
}
# if we find our closing brace..
- elsif ($$rtokens[$i] eq '}'
- && $$rtoken_type[$i] eq '}'
- && $$rblock_type[$i] )
+ elsif ($rtoken_array->[$i]->[_TOKEN_] eq '}'
+ && $rtoken_array->[$i]->[_TYPE_] eq '}'
+ && $rtoken_array->[$i]->[_BLOCK_TYPE_] )
{
# be sure any trailing comment also fits on the line
my $i_nonblank =
- ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
+ ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) ? $i + 2 : $i + 1;
+
+ # Patch for one-line sort/map/grep/eval blocks with side comments:
+ # We will ignore the side comment length for sort/map/grep/eval
+ # because this can lead to statements which change every time
+ # perltidy is run. Here is an example from Denis Moskowitz which
+ # oscillates between these two states without this patch:
- if ( $$rtoken_type[$i_nonblank] eq '#' ) {
- $pos += length( $$rtokens[$i_nonblank] );
+## --------
+## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+##
+## grep {
+## $_->foo ne 'bar'
+## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
+## @baz;
+## --------
+
+ # When the first line is input it gets broken apart by the main
+ # line break logic in sub print_line_of_tokens.
+ # When the second line is input it gets recombined by
+ # print_line_of_tokens and passed to the output routines. The
+ # output routines (set_continuation_breaks) do not break it apart
+ # because the bond strengths are set to the highest possible value
+ # for grep/map/eval/sort blocks, so the first version gets output.
+ # It would be possible to fix this by changing bond strengths,
+ # but they are high to prevent errors in older versions of perl.
+
+ if ( $rtoken_array->[$i_nonblank]->[_TYPE_] eq '#'
+ && !$is_sort_map_grep{$block_type} )
+ {
+
+ $pos += rtoken_length($i_nonblank);
if ( $i_nonblank > $i + 1 ) {
- $pos += length( $$rtokens[ $i + 1 ] );
+
+ # source whitespace could be anything, assume
+ # at least one space before the hash on output
+ if ( $rtoken_array->[ $i + 1 ]->[_TYPE_] eq 'b' ) {
+ $pos += 1;
+ }
+ else { $pos += rtoken_length( $i + 1 ) }
}
- if ( $pos > $rOpts_maximum_line_length ) {
+ if ( $pos >= maximum_line_length($i_start) ) {
return 0;
}
}
if ( $is_sort_map_grep_eval{$block_type} ) {
create_one_line_block( $i_start, 1 );
}
-
return 0;
}
sub unstore_token_to_go {
# remove most recent token from output stream
+ my $self = shift;
if ( $max_index_to_go > 0 ) {
$max_index_to_go--;
}
else {
$max_index_to_go = UNDEFINED_INDEX;
}
-
+ return;
}
sub want_blank_line {
- flush();
+ my $self = shift;
+ $self->flush();
$file_writer_object->want_blank_line();
+ return;
}
sub write_unindented_line {
- flush();
- $file_writer_object->write_line( $_[0] );
+ my ( $self, $line ) = @_;
+ $self->flush();
+ $file_writer_object->write_line($line);
+ return;
+}
+
+sub undo_ci {
+
+ # Undo continuation indentation in certain sequences
+ # For example, we can undo continuation indentation in sort/map/grep chains
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ # To align the map/sort/grep keywords like this:
+ # my $dat1 = pack( "n*",
+ # map { $_, $lookup->{$_} }
+ # sort { $a <=> $b }
+ # grep { $lookup->{$_} ne $default } keys %$lookup );
+ my ( $ri_first, $ri_last ) = @_;
+ my ( $line_1, $line_2, $lev_last );
+ my $this_line_is_semicolon_terminated;
+ my $max_line = @{$ri_first} - 1;
+
+ # looking at each line of this batch..
+ # We are looking at leading tokens and looking for a sequence
+ # all at the same level and higher level than enclosing lines.
+ foreach my $line ( 0 .. $max_line ) {
+
+ my $ibeg = $ri_first->[$line];
+ my $lev = $levels_to_go[$ibeg];
+ if ( $line > 0 ) {
+
+ # if we have started a chain..
+ if ($line_1) {
+
+ # see if it continues..
+ if ( $lev == $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+
+ # chain continues...
+ # check for chain ending at end of a statement
+ if ( $line == $max_line ) {
+
+ # see of this line ends a statement
+ my $iend = $ri_last->[$line];
+ $this_line_is_semicolon_terminated =
+ $types_to_go[$iend] eq ';'
+
+ # with possible side comment
+ || ( $types_to_go[$iend] eq '#'
+ && $iend - $ibeg >= 2
+ && $types_to_go[ $iend - 2 ] eq ';'
+ && $types_to_go[ $iend - 1 ] eq 'b' );
+ }
+ $line_2 = $line if ($this_line_is_semicolon_terminated);
+ }
+ else {
+
+ # kill chain
+ $line_1 = undef;
+ }
+ }
+ elsif ( $lev < $lev_last ) {
+
+ # chain ends with previous line
+ $line_2 = $line - 1;
+ }
+ elsif ( $lev > $lev_last ) {
+
+ # kill chain
+ $line_1 = undef;
+ }
+
+ # undo the continuation indentation if a chain ends
+ if ( defined($line_2) && defined($line_1) ) {
+ my $continuation_line_count = $line_2 - $line_1 + 1;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] =
+ (0) x ($continuation_line_count)
+ if ( $continuation_line_count >= 0 );
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
+ = @reduced_spaces_to_go[ @{$ri_first}
+ [ $line_1 .. $line_2 ] ];
+ $line_1 = undef;
+ }
+ }
+
+ # not in a chain yet..
+ else {
+
+ # look for start of a new sort/map/grep chain
+ if ( $lev > $lev_last ) {
+ if ( $types_to_go[$ibeg] eq 'k'
+ && $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
+ {
+ $line_1 = $line;
+ }
+ }
+ }
+ }
+ $lev_last = $lev;
+ }
+ return;
}
sub undo_lp_ci {
# . $1 * $1 . " ?");
my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
- my $max_line = @$ri_first - 1;
+ my $max_line = @{$ri_first} - 1;
# must be multiple lines
return unless $max_line > $line_open;
my $n;
my $line_1 = 1 + $line_open;
for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
- my $ibeg = $$ri_first[$n];
- my $iend = $$ri_last[$n];
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
if ( $ibeg eq $closing_index ) { $n--; last }
return if ( $lev_start != $levels_to_go[$ibeg] );
return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
last if ( $closing_index <= $iend );
}
- # we can reduce the indentation of all continuation lines
- my $continuation_line_count = $n - $line_open;
- @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
- (0) x ($continuation_line_count);
- @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
- @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
+ # we can reduce the indentation of all continuation lines
+ my $continuation_line_count = $n - $line_open;
+ @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ (0) x ($continuation_line_count);
+ @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
+ @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
+ return;
+}
+
+sub pad_token {
+
+ # insert $pad_spaces before token number $ipad
+ my ( $ipad, $pad_spaces ) = @_;
+ if ( $pad_spaces > 0 ) {
+ $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad];
+ }
+ elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq ' ' ) {
+ $tokens_to_go[$ipad] = "";
+ }
+ else {
+
+ # shouldn't happen
+ return;
+ }
+
+ $token_lengths_to_go[$ipad] += $pad_spaces;
+ foreach my $i ( $ipad .. $max_index_to_go ) {
+ $summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
+ }
+ return;
}
-sub set_logical_padding {
+{
+ my %is_math_op;
- # Look at a batch of lines and see if extra padding can improve the
- # alignment when there are certain leading operators. Here is an
- # example, in which some extra space is introduced before
- # '( $year' to make it line up with the subsequent lines:
- #
- # if ( ( $Year < 1601 )
- # || ( $Year > 2899 )
- # || ( $EndYear < 1601 )
- # || ( $EndYear > 2899 ) )
- # {
- # &Error_OutOfRange;
- # }
- #
- my ( $ri_first, $ri_last ) = @_;
- my $max_line = @$ri_first - 1;
+ BEGIN {
- my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
- $tok_next, $has_leading_op_next, $has_leading_op );
+ my @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
+ }
- # looking at each line of this batch..
- foreach $line ( 0 .. $max_line - 1 ) {
+ sub set_logical_padding {
- # see if the next line begins with a logical operator
- $ibeg = $$ri_first[$line];
- $iend = $$ri_last[$line];
- $ibeg_next = $$ri_first[ $line + 1 ];
- $tok_next = $tokens_to_go[$ibeg_next];
- $has_leading_op_next = $is_chain_operator{$tok_next};
- next unless ($has_leading_op_next);
+ # Look at a batch of lines and see if extra padding can improve the
+ # alignment when there are certain leading operators. Here is an
+ # example, in which some extra space is introduced before
+ # '( $year' to make it line up with the subsequent lines:
+ #
+ # if ( ( $Year < 1601 )
+ # || ( $Year > 2899 )
+ # || ( $EndYear < 1601 )
+ # || ( $EndYear > 2899 ) )
+ # {
+ # &Error_OutOfRange;
+ # }
+ #
+ my ( $ri_first, $ri_last ) = @_;
+ my $max_line = @{$ri_first} - 1;
- # next line must not be at lesser depth
- next
- if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
+ # FIXME: move these declarations below
+ my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
+ $tok_next, $type_next, $has_leading_op_next, $has_leading_op );
- # identify the token in this line to be padded on the left
- $ipad = undef;
+ # looking at each line of this batch..
+ foreach my $line ( 0 .. $max_line - 1 ) {
- # handle lines at same depth...
- if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
+ # see if the next line begins with a logical operator
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
+ $ibeg_next = $ri_first->[ $line + 1 ];
+ $tok_next = $tokens_to_go[$ibeg_next];
+ $type_next = $types_to_go[$ibeg_next];
- # if this is not first line of the batch ...
- if ( $line > 0 ) {
+ $has_leading_op_next = ( $tok_next =~ /^\w/ )
+ ? $is_chain_operator{$tok_next} # + - * / : ? && ||
+ : $is_chain_operator{$type_next}; # and, or
- # and we have leading operator
- next if $has_leading_op;
+ next unless ($has_leading_op_next);
- # and ..
- # 1. the previous line is at lesser depth, or
- # 2. the previous line ends in an assignment
- #
- # Example 1: previous line at lesser depth
- # if ( ( $Year < 1601 ) # <- we are here but
- # || ( $Year > 2899 ) # list has not yet
- # || ( $EndYear < 1601 ) # collapsed vertically
- # || ( $EndYear > 2899 ) )
- # {
- #
- # Example 2: previous line ending in assignment:
- # $leapyear =
- # $year % 4 ? 0 # <- We are here
- # : $year % 100 ? 1
- # : $year % 400 ? 0
- # : 1;
- next
- unless (
- $is_assignment{ $types_to_go[$iendm] }
- || ( $nesting_depth_to_go[$ibegm] <
- $nesting_depth_to_go[$ibeg] )
- );
+ # next line must not be at lesser depth
+ next
+ if ( $nesting_depth_to_go[$ibeg] >
+ $nesting_depth_to_go[$ibeg_next] );
- # we will add padding before the first token
- $ipad = $ibeg;
- }
+ # identify the token in this line to be padded on the left
+ $ipad = undef;
- # for first line of the batch..
- else {
+ # handle lines at same depth...
+ if ( $nesting_depth_to_go[$ibeg] ==
+ $nesting_depth_to_go[$ibeg_next] )
+ {
- # WARNING: Never indent if first line is starting in a
- # continued quote, which would change the quote.
- next if $starting_in_quote;
+ # if this is not first line of the batch ...
+ if ( $line > 0 ) {
+
+ # and we have leading operator..
+ next if $has_leading_op;
+
+ # Introduce padding if..
+ # 1. the previous line is at lesser depth, or
+ # 2. the previous line ends in an assignment
+ # 3. the previous line ends in a 'return'
+ # 4. the previous line ends in a comma
+ # Example 1: previous line at lesser depth
+ # if ( ( $Year < 1601 ) # <- we are here but
+ # || ( $Year > 2899 ) # list has not yet
+ # || ( $EndYear < 1601 ) # collapsed vertically
+ # || ( $EndYear > 2899 ) )
+ # {
+ #
+ # Example 2: previous line ending in assignment:
+ # $leapyear =
+ # $year % 4 ? 0 # <- We are here
+ # : $year % 100 ? 1
+ # : $year % 400 ? 0
+ # : 1;
+ #
+ # Example 3: previous line ending in comma:
+ # push @expr,
+ # /test/ ? undef
+ # : eval($_) ? 1
+ # : eval($_) ? 1
+ # : 0;
+
+ # be sure levels agree (do not indent after an indented 'if')
+ next
+ if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
+
+ # allow padding on first line after a comma but only if:
+ # (1) this is line 2 and
+ # (2) there are at more than three lines and
+ # (3) lines 3 and 4 have the same leading operator
+ # These rules try to prevent padding within a long
+ # comma-separated list.
+ my $ok_comma;
+ if ( $types_to_go[$iendm] eq ','
+ && $line == 1
+ && $max_line > 2 )
+ {
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ my $tok_next_next = $tokens_to_go[$ibeg_next_next];
+ $ok_comma = $tok_next_next eq $tok_next;
+ }
- # if this is text after closing '}'
- # then look for an interior token to pad
- if ( $types_to_go[$ibeg] eq '}' ) {
+ next
+ unless (
+ $is_assignment{ $types_to_go[$iendm] }
+ || $ok_comma
+ || ( $nesting_depth_to_go[$ibegm] <
+ $nesting_depth_to_go[$ibeg] )
+ || ( $types_to_go[$iendm] eq 'k'
+ && $tokens_to_go[$iendm] eq 'return' )
+ );
+ # we will add padding before the first token
+ $ipad = $ibeg;
}
- # otherwise, we might pad if it looks really good
+ # for first line of the batch..
else {
- # we might pad token $ibeg, so be sure that it
- # is at the same depth as the next line.
- next
- if ( $nesting_depth_to_go[$ibeg] !=
- $nesting_depth_to_go[$ibeg_next] );
+ # WARNING: Never indent if first line is starting in a
+ # continued quote, which would change the quote.
+ next if $starting_in_quote;
+
+ # if this is text after closing '}'
+ # then look for an interior token to pad
+ if ( $types_to_go[$ibeg] eq '}' ) {
+
+ }
+
+ # otherwise, we might pad if it looks really good
+ else {
- # We can pad on line 1 of a statement if at least 3
- # lines will be aligned. Otherwise, it
- # can look very confusing.
+ # we might pad token $ibeg, so be sure that it
+ # is at the same depth as the next line.
+ next
+ if ( $nesting_depth_to_go[$ibeg] !=
+ $nesting_depth_to_go[$ibeg_next] );
+
+ # We can pad on line 1 of a statement if at least 3
+ # lines will be aligned. Otherwise, it
+ # can look very confusing.
# We have to be careful not to pad if there are too few
# lines. The current rule is:
# : $i == 2 ? ( "Then", "Rarity" )
# : ( "Then", "Name" );
- if ( $max_line > 1 ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $tokens_differ;
-
- # never indent line 1 of a '.' series because
- # previous line is most likely at same level.
- # TODO: we should also look at the leasing_spaces
- # of the last output line and skip if it is same
- # as this line.
- next if ( $leading_token eq '.' );
-
- my $count = 1;
- foreach my $l ( 2 .. 3 ) {
- last if ( $line + $l > $max_line );
- my $ibeg_next_next = $$ri_first[ $line + $l ];
- if ( $tokens_to_go[$ibeg_next_next] ne
- $leading_token )
- {
- $tokens_differ = 1;
- last;
+ if ( $max_line > 1 ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $tokens_differ;
+
+ # never indent line 1 of a '.' series because
+ # previous line is most likely at same level.
+ # TODO: we should also look at the leasing_spaces
+ # of the last output line and skip if it is same
+ # as this line.
+ next if ( $leading_token eq '.' );
+
+ my $count = 1;
+ foreach my $l ( 2 .. 3 ) {
+ last if ( $line + $l > $max_line );
+ my $ibeg_next_next = $ri_first->[ $line + $l ];
+ if ( $tokens_to_go[$ibeg_next_next] ne
+ $leading_token )
+ {
+ $tokens_differ = 1;
+ last;
+ }
+ $count++;
}
- $count++;
+ next if ($tokens_differ);
+ next if ( $count < 3 && $leading_token ne ':' );
+ $ipad = $ibeg;
+ }
+ else {
+ next;
}
- next if ($tokens_differ);
- next if ( $count < 3 && $leading_token ne ':' );
- $ipad = $ibeg;
- }
- else {
- next;
}
}
}
- }
- # find interior token to pad if necessary
- if ( !defined($ipad) ) {
+ # find interior token to pad if necessary
+ if ( !defined($ipad) ) {
- for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
+ for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
- # find any unclosed container
- next
- unless ( $type_sequence_to_go[$i]
- && $mate_index_to_go[$i] > $iend );
+ # find any unclosed container
+ next
+ unless ( $type_sequence_to_go[$i]
+ && $mate_index_to_go[$i] > $iend );
- # find next nonblank token to pad
- $ipad = $i + 1;
- if ( $types_to_go[$ipad] eq 'b' ) {
- $ipad++;
+ # find next nonblank token to pad
+ $ipad = $inext_to_go[$i];
last if ( $ipad > $iend );
}
+ last unless $ipad;
+ }
+
+ # We cannot pad the first leading token of a file because
+ # it could cause a bug in which the starting indentation
+ # level is guessed incorrectly each time the code is run
+ # though perltidy, thus causing the code to march off to
+ # the right. For example, the following snippet would have
+ # this problem:
+
+## ov_method mycan( $package, '(""' ), $package
+## or ov_method mycan( $package, '(0+' ), $package
+## or ov_method mycan( $package, '(bool' ), $package
+## or ov_method mycan( $package, '(nomethod' ), $package;
+
+ # If this snippet is within a block this won't happen
+ # unless the user just processes the snippet alone within
+ # an editor. In that case either the user will see and
+ # fix the problem or it will be corrected next time the
+ # entire file is processed with perltidy.
+ ##next if ( $ipad == 0 && $levels_to_go[$ipad] == 0 );
+ next if ( $ipad == 0 && $peak_batch_size <= 1 );
+
+## THIS PATCH REMOVES THE FOLLOWING POOR PADDING (math.t) with -pbp, BUT
+## IT DID MORE HARM THAN GOOD
+## ceil(
+## $font->{'loca'}->{'glyphs'}[$x]->read->{'xMin'} * 1000
+## / $upem
+## ),
+##? # do not put leading padding for just 2 lines of math
+##? if ( $ipad == $ibeg
+##? && $line > 0
+##? && $levels_to_go[$ipad] > $levels_to_go[ $ipad - 1 ]
+##? && $is_math_op{$type_next}
+##? && $line + 2 <= $max_line )
+##? {
+##? my $ibeg_next_next = $ri_first->[ $line + 2 ];
+##? my $type_next_next = $types_to_go[$ibeg_next_next];
+##? next if !$is_math_op{$type_next_next};
+##? }
+
+ # next line must not be at greater depth
+ my $iend_next = $ri_last->[ $line + 1 ];
+ next
+ if ( $nesting_depth_to_go[ $iend_next + 1 ] >
+ $nesting_depth_to_go[$ipad] );
+
+ # lines must be somewhat similar to be padded..
+ my $inext_next = $inext_to_go[$ibeg_next];
+ my $type = $types_to_go[$ipad];
+ my $type_next = $types_to_go[ $ipad + 1 ];
+
+ # see if there are multiple continuation lines
+ my $logical_continuation_lines = 1;
+ if ( $line + 2 <= $max_line ) {
+ my $leading_token = $tokens_to_go[$ibeg_next];
+ my $ibeg_next_next = $ri_first->[ $line + 2 ];
+ if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
+ && $nesting_depth_to_go[$ibeg_next] eq
+ $nesting_depth_to_go[$ibeg_next_next] )
+ {
+ $logical_continuation_lines++;
+ }
}
- last unless $ipad;
- }
- # next line must not be at greater depth
- my $iend_next = $$ri_last[ $line + 1 ];
- next
- if ( $nesting_depth_to_go[ $iend_next + 1 ] >
- $nesting_depth_to_go[$ipad] );
-
- # lines must be somewhat similar to be padded..
- my $inext_next = $ibeg_next + 1;
- if ( $types_to_go[$inext_next] eq 'b' ) {
- $inext_next++;
- }
- my $type = $types_to_go[$ipad];
+ # see if leading types match
+ my $types_match = $types_to_go[$inext_next] eq $type;
+ my $matches_without_bang;
- # see if there are multiple continuation lines
- my $logical_continuation_lines = 1;
- if ( $line + 2 <= $max_line ) {
- my $leading_token = $tokens_to_go[$ibeg_next];
- my $ibeg_next_next = $$ri_first[ $line + 2 ];
- if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
- && $nesting_depth_to_go[$ibeg_next] eq
- $nesting_depth_to_go[$ibeg_next_next] )
- {
- $logical_continuation_lines++;
+ # if first line has leading ! then compare the following token
+ if ( !$types_match && $type eq '!' ) {
+ $types_match = $matches_without_bang =
+ $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
}
- }
- if (
-
- # either we have multiple continuation lines to follow
- # and we are not padding the first token
- ( $logical_continuation_lines > 1 && $ipad > 0 )
- # or..
- || (
-
- # types must match
- $types_to_go[$inext_next] eq $type
-
- # and keywords must match if keyword
- && !(
- $type eq 'k'
- && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
- )
- )
- )
- {
-
- #----------------------begin special checks--------------
- #
- # SPECIAL CHECK 1:
- # A check is needed before we can make the pad.
- # If we are in a list with some long items, we want each
- # item to stand out. So in the following example, the
- # first line begining with '$casefold->' would look good
- # padded to align with the next line, but then it
- # would be indented more than the last line, so we
- # won't do it.
- #
- # ok(
- # $casefold->{code} eq '0041'
- # && $casefold->{status} eq 'C'
- # && $casefold->{mapping} eq '0061',
- # 'casefold 0x41'
- # );
- #
- # Note:
- # It would be faster, and almost as good, to use a comma
- # count, and not pad if comma_count > 1 and the previous
- # line did not end with a comma.
- #
- my $ok_to_pad = 1;
+ if (
- my $ibg = $$ri_first[ $line + 1 ];
- my $depth = $nesting_depth_to_go[ $ibg + 1 ];
+ # either we have multiple continuation lines to follow
+ # and we are not padding the first token
+ ( $logical_continuation_lines > 1 && $ipad > 0 )
- # just use simplified formula for leading spaces to avoid
- # needless sub calls
- my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
+ # or..
+ || (
- # look at each line beyond the next ..
- my $l = $line + 1;
- foreach $l ( $line + 2 .. $max_line ) {
- my $ibg = $$ri_first[$l];
+ # types must match
+ $types_match
- # quit looking at the end of this container
- last
- if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
- || ( $nesting_depth_to_go[$ibg] < $depth );
+ # and keywords must match if keyword
+ && !(
+ $type eq 'k'
+ && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
+ )
+ )
+ )
+ {
- # cannot do the pad if a later line would be
- # outdented more
- if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
- $ok_to_pad = 0;
- last;
+ #----------------------begin special checks--------------
+ #
+ # SPECIAL CHECK 1:
+ # A check is needed before we can make the pad.
+ # If we are in a list with some long items, we want each
+ # item to stand out. So in the following example, the
+ # first line beginning with '$casefold->' would look good
+ # padded to align with the next line, but then it
+ # would be indented more than the last line, so we
+ # won't do it.
+ #
+ # ok(
+ # $casefold->{code} eq '0041'
+ # && $casefold->{status} eq 'C'
+ # && $casefold->{mapping} eq '0061',
+ # 'casefold 0x41'
+ # );
+ #
+ # Note:
+ # It would be faster, and almost as good, to use a comma
+ # count, and not pad if comma_count > 1 and the previous
+ # line did not end with a comma.
+ #
+ my $ok_to_pad = 1;
+
+ my $ibg = $ri_first->[ $line + 1 ];
+ my $depth = $nesting_depth_to_go[ $ibg + 1 ];
+
+ # just use simplified formula for leading spaces to avoid
+ # needless sub calls
+ my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
+
+ # look at each line beyond the next ..
+ my $l = $line + 1;
+ foreach my $ltest ( $line + 2 .. $max_line ) {
+ $l = $ltest;
+ my $ibg = $ri_first->[$l];
+
+ # quit looking at the end of this container
+ last
+ if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
+ || ( $nesting_depth_to_go[$ibg] < $depth );
+
+ # cannot do the pad if a later line would be
+ # outdented more
+ if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
+ $ok_to_pad = 0;
+ last;
+ }
}
- }
- # don't pad if we end in a broken list
- if ( $l == $max_line ) {
- my $i2 = $$ri_last[$l];
- if ( $types_to_go[$i2] eq '#' ) {
- my $i1 = $$ri_first[$l];
- next
- if (
- terminal_type( \@types_to_go, \@block_type_to_go, $i1,
- $i2 ) eq ','
- );
+ # don't pad if we end in a broken list
+ if ( $l == $max_line ) {
+ my $i2 = $ri_last->[$l];
+ if ( $types_to_go[$i2] eq '#' ) {
+ my $i1 = $ri_first->[$l];
+ next
+ if (
+ terminal_type( \@types_to_go, \@block_type_to_go,
+ $i1, $i2 ) eq ','
+ );
+ }
}
- }
- # SPECIAL CHECK 2:
- # a minus may introduce a quoted variable, and we will
- # add the pad only if this line begins with a bare word,
- # such as for the word 'Button' here:
- # [
- # Button => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- #
- # On the other hand, if 'Button' is quoted, it looks best
- # not to pad:
- # [
- # 'Button' => "Print letter \"~$_\"",
- # -command => [ sub { print "$_[0]\n" }, $_ ],
- # -accelerator => "Meta+$_"
- # ];
- if ( $types_to_go[$ibeg_next] eq 'm' ) {
- $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
- }
-
- next unless $ok_to_pad;
-
- #----------------------end special check---------------
-
- my $length_1 = total_line_length( $ibeg, $ipad - 1 );
- my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
- $pad_spaces = $length_2 - $length_1;
-
- # make sure this won't change if -lp is used
- my $indentation_1 = $leading_spaces_to_go[$ibeg];
- if ( ref($indentation_1) ) {
- if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
- my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
- unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
- $pad_spaces = 0;
+ # SPECIAL CHECK 2:
+ # a minus may introduce a quoted variable, and we will
+ # add the pad only if this line begins with a bare word,
+ # such as for the word 'Button' here:
+ # [
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ #
+ # On the other hand, if 'Button' is quoted, it looks best
+ # not to pad:
+ # [
+ # 'Button' => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ # -accelerator => "Meta+$_"
+ # ];
+ if ( $types_to_go[$ibeg_next] eq 'm' ) {
+ $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
+ }
+
+ next unless $ok_to_pad;
+
+ #----------------------end special check---------------
+
+ my $length_1 = total_line_length( $ibeg, $ipad - 1 );
+ my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
+ $pad_spaces = $length_2 - $length_1;
+
+ # If the first line has a leading ! and the second does
+ # not, then remove one space to try to align the next
+ # leading characters, which are often the same. For example:
+ # if ( !$ts
+ # || $ts == $self->Holder
+ # || $self->Holder->Type eq "Arena" )
+ #
+ # This usually helps readability, but if there are subsequent
+ # ! operators things will still get messed up. For example:
+ #
+ # if ( !exists $Net::DNS::typesbyname{$qtype}
+ # && exists $Net::DNS::classesbyname{$qtype}
+ # && !exists $Net::DNS::classesbyname{$qclass}
+ # && exists $Net::DNS::typesbyname{$qclass} )
+ # We can't fix that.
+ if ($matches_without_bang) { $pad_spaces-- }
+
+ # make sure this won't change if -lp is used
+ my $indentation_1 = $leading_spaces_to_go[$ibeg];
+ if ( ref($indentation_1) ) {
+ if ( $indentation_1->get_recoverable_spaces() == 0 ) {
+ my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
+ unless ( $indentation_2->get_recoverable_spaces() == 0 )
+ {
+ $pad_spaces = 0;
+ }
}
}
- }
- # we might be able to handle a pad of -1 by removing a blank
- # token
- if ( $pad_spaces < 0 ) {
- if ( $pad_spaces == -1 ) {
- if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
- $tokens_to_go[ $ipad - 1 ] = '';
+ # we might be able to handle a pad of -1 by removing a blank
+ # token
+ if ( $pad_spaces < 0 ) {
+
+ if ( $pad_spaces == -1 ) {
+ if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' )
+ {
+ pad_token( $ipad - 1, $pad_spaces );
+ }
}
+ $pad_spaces = 0;
}
- $pad_spaces = 0;
- }
- # now apply any padding for alignment
- if ( $ipad >= 0 && $pad_spaces ) {
- my $length_t = total_line_length( $ibeg, $iend );
- if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
- $tokens_to_go[$ipad] =
- ' ' x $pad_spaces . $tokens_to_go[$ipad];
+ # now apply any padding for alignment
+ if ( $ipad >= 0 && $pad_spaces ) {
+
+ my $length_t = total_line_length( $ibeg, $iend );
+ if ( $pad_spaces + $length_t <= maximum_line_length($ibeg) )
+ {
+ pad_token( $ipad, $pad_spaces );
+ }
}
}
}
+ continue {
+ $iendm = $iend;
+ $ibegm = $ibeg;
+ $has_leading_op = $has_leading_op_next;
+ } # end of loop over lines
+ return;
}
- continue {
- $iendm = $iend;
- $ibegm = $ibeg;
- $has_leading_op = $has_leading_op_next;
- } # end of loop over lines
- return;
}
sub correct_lp_indentation {
# We leave it to the aligner to decide how to do this.
# first remove continuation indentation if appropriate
- my $max_line = @$ri_first - 1;
+ my $max_line = @{$ri_first} - 1;
# looking at each line of this batch..
my ( $ibeg, $iend );
- my $line;
- foreach $line ( 0 .. $max_line ) {
- $ibeg = $$ri_first[$line];
- $iend = $$ri_last[$line];
+ foreach my $line ( 0 .. $max_line ) {
+ $ibeg = $ri_first->[$line];
+ $iend = $ri_last->[$line];
# looking at each token in this output line..
- my $i;
- foreach $i ( $ibeg .. $iend ) {
+ foreach my $i ( $ibeg .. $iend ) {
# How many space characters to place before this token
# for special alignment. Actual padding is done in the
# looking for next unvisited indentation item
my $indentation = $leading_spaces_to_go[$i];
- if ( !$indentation->get_MARKED() ) {
- $indentation->set_MARKED(1);
+ if ( !$indentation->get_marked() ) {
+ $indentation->set_marked(1);
# looking for indentation item for which we are aligning
# with parens, braces, and brackets
- next unless ( $indentation->get_ALIGN_PAREN() );
+ next unless ( $indentation->get_align_paren() );
# skip closed container on this line
if ( $i > $ibeg ) {
- my $im = $i - 1;
- if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
+ my $im = max( $ibeg, $iprev_to_go[$i] );
if ( $type_sequence_to_go[$im]
&& $mate_index_to_go[$im] <= $iend )
{
# Ok, let's see what the error is and try to fix it
my $actual_pos;
- my $predicted_pos = $indentation->get_SPACES();
+ my $predicted_pos = $indentation->get_spaces();
if ( $i > $ibeg ) {
# token is mid-line - use length to previous token
# additional lines have continuation indentation,
# and remove it if so. Otherwise, we do not get
# good alignment.
- my $closing_index = $indentation->get_CLOSED();
+ my $closing_index = $indentation->get_closed();
if ( $closing_index > $iend ) {
- my $ibeg_next = $$ri_first[ $line + 1 ];
+ my $ibeg_next = $ri_first->[ $line + 1 ];
if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
undo_lp_ci( $line, $i, $closing_index, $ri_first,
$ri_last );
# handle case where token starts a new line;
# use length of previous line
- my $ibegm = $$ri_first[ $line - 1 ];
- my $iendm = $$ri_last[ $line - 1 ];
+ my $ibegm = $ri_first->[ $line - 1 ];
+ my $iendm = $ri_last->[ $line - 1 ];
$actual_pos = total_line_length( $ibegm, $iendm );
# follow -pt style
# done if no error to correct (gnu2.t)
if ( $move_right == 0 ) {
- $indentation->set_RECOVERABLE_SPACES($move_right);
+ $indentation->set_recoverable_spaces($move_right);
next;
}
# if we have not seen closure for this indentation in
# this batch, we can only pass on a request to the
# vertical aligner
- my $closing_index = $indentation->get_CLOSED();
+ my $closing_index = $indentation->get_closed();
if ( $closing_index < 0 ) {
- $indentation->set_RECOVERABLE_SPACES($move_right);
+ $indentation->set_recoverable_spaces($move_right);
next;
}
# dependent nodes or need to move right.
my $right_margin = 0;
- my $have_child = $indentation->get_HAVE_CHILD();
+ my $have_child = $indentation->get_have_child();
my %saw_indentation;
my $line_count = 1;
}
# look ahead at the rest of the lines of this batch..
- my $line_t;
- foreach $line_t ( $line + 1 .. $max_line ) {
- my $ibeg_t = $$ri_first[$line_t];
- my $iend_t = $$ri_last[$line_t];
+ foreach my $line_t ( $line + 1 .. $max_line ) {
+ my $ibeg_t = $ri_first->[$line_t];
+ my $iend_t = $ri_last->[$line_t];
last if ( $closing_index <= $ibeg_t );
# remember all different indentation objects
$max_length = $length_t;
}
}
- $right_margin = $rOpts_maximum_line_length - $max_length;
+ $right_margin = maximum_line_length($ibeg) - $max_length;
if ( $right_margin < 0 ) { $right_margin = 0 }
}
my $first_line_comma_count =
grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
- my $comma_count = $indentation->get_COMMA_COUNT();
- my $arrow_count = $indentation->get_ARROW_COUNT();
+ my $comma_count = $indentation->get_comma_count();
+ my $arrow_count = $indentation->get_arrow_count();
# This is a simple approximate test for vertical alignment:
# if we broke just after an opening paren, brace, bracket,
foreach ( keys %saw_indentation ) {
$saw_indentation{$_}
- ->permanently_decrease_AVAILABLE_SPACES( -$move );
+ ->permanently_decrease_available_spaces( -$move );
}
}
# Otherwise, record what we want and the vertical aligner
# will try to recover it.
else {
- $indentation->set_RECOVERABLE_SPACES($move_right);
+ $indentation->set_recoverable_spaces($move_right);
}
}
}
# an alternate source of lines can be written in the correct order
sub flush {
+ my $self = shift;
destroy_one_line_block();
- output_line_to_go();
+ $self->output_line_to_go();
Perl::Tidy::VerticalAligner::flush();
+ return;
}
sub reset_block_text_accumulator {
$leading_block_text_length_exceeded = 0;
$leading_block_text_line_number = 0;
$leading_block_text_line_length = 0;
+ return;
}
sub set_block_text_accumulator {
if ( $accumulating_text_for_block !~ /^els/ ) {
$rleading_block_if_elsif_text = [];
}
- $leading_block_text = "";
- $leading_block_text_level = $levels_to_go[$i];
- $leading_block_text_line_number =
- $vertical_aligner_object->get_output_line_number();
+ $leading_block_text = "";
+ $leading_block_text_level = $levels_to_go[$i];
+ $leading_block_text_line_number = get_output_line_number();
+ ##$vertical_aligner_object->get_output_line_number();
$leading_block_text_length_exceeded = 0;
# this will contain the column number of the last character
# of the closing side comment
$leading_block_text_line_length =
+ length($csc_last_label) +
length($accumulating_text_for_block) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$leading_block_text_level * $rOpts_indent_columns + 3;
+ return;
}
sub accumulate_block_text {
&& $types_to_go[$i] ne '#' )
{
- my $added_length = length( $tokens_to_go[$i] );
+ my $added_length = $token_lengths_to_go[$i];
$added_length += 1 if $i == 0;
my $new_line_length = $leading_block_text_line_length + $added_length;
# the new total line length must be below the line length limit
# or the new length must be below the text length limit
# (ie, we may allow one token to exceed the text length limit)
- && ( $new_line_length < $rOpts_maximum_line_length
+ && (
+ $new_line_length <
+ maximum_line_length_for_level($leading_block_text_level)
+
|| length($leading_block_text) + $added_length <
- $rOpts_closing_side_comment_maximum_text )
+ $rOpts_closing_side_comment_maximum_text
+ )
# UNLESS: we are adding a closing paren before the brace we seek.
# This is an attempt to avoid situations where the ... to be
$leading_block_text .= '...';
}
}
+ return;
}
{
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
- @_ = qw(if elsif else unless while until for foreach case when);
- @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
+ my @q =
+ qw(if elsif else unless while until for foreach case when catch);
+ @is_if_elsif_else_unless_while_until_for_foreach{@q} =
+ (1) x scalar(@q);
}
sub accumulate_csc_text {
my $i_terminal = 0; # index of last nonblank token
my $terminal_block_type = "";
+ # update most recent statement label
+ $csc_last_label = "" unless ($csc_last_label);
+ if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
+ my $block_label = $csc_last_label;
+
+ # Loop over all tokens of this batch
for my $i ( 0 .. $max_index_to_go ) {
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
# restore any leading text saved when we entered this block
if ( defined( $block_leading_text{$type_sequence} ) ) {
- ( $block_leading_text, $rblock_leading_if_elsif_text ) =
- @{ $block_leading_text{$type_sequence} };
+ ( $block_leading_text, $rblock_leading_if_elsif_text )
+ = @{ $block_leading_text{$type_sequence} };
$i_block_leading_text = $i;
delete $block_leading_text{$type_sequence};
$rleading_block_if_elsif_text =
$rblock_leading_if_elsif_text;
}
+ if ( defined( $csc_block_label{$type_sequence} ) ) {
+ $block_label = $csc_block_label{$type_sequence};
+ delete $csc_block_label{$type_sequence};
+ }
+
# if we run into a '}' then we probably started accumulating
# at something like a trailing 'if' clause..no harm done.
if ( $accumulating_text_for_block
if ( defined( $block_opening_line_number{$type_sequence} ) )
{
- my $output_line_number =
- $vertical_aligner_object->get_output_line_number();
+ my $output_line_number = get_output_line_number();
+ ##$vertical_aligner_object->get_output_line_number();
$block_line_count =
$output_line_number -
$block_opening_line_number{$type_sequence} + 1;
elsif ( $token eq '{' ) {
- my $line_number =
- $vertical_aligner_object->get_output_line_number();
+ my $line_number = get_output_line_number();
+ ##$vertical_aligner_object->get_output_line_number();
$block_opening_line_number{$type_sequence} = $line_number;
+ # set a label for this block, except for
+ # a bare block which already has the label
+ # A label can only be used on the next {
+ if ( $block_type =~ /:$/ ) { $csc_last_label = "" }
+ $csc_block_label{$type_sequence} = $csc_last_label;
+ $csc_last_label = "";
+
if ( $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
$block_leading_text, $rblock_leading_if_elsif_text );
}
+ # if this line ends in a label then remember it for the next pass
+ $csc_last_label = "";
+ if ( $terminal_type eq 'J' ) {
+ $csc_last_label = $tokens_to_go[$i_terminal];
+ }
+
return ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count );
+ $block_leading_text, $block_line_count, $block_label );
}
}
my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
my $csc_text = $block_leading_text;
- if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
+ if ( $block_type eq 'elsif'
+ && $rOpts_closing_side_comment_else_flag == 0 )
{
return $csc_text;
}
length($block_type) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
- if ( $length > $rOpts_maximum_line_length ) {
+ if ( $length > maximum_line_length_for_level($leading_block_text_level) ) {
$csc_text = $saved_text;
}
return $csc_text;
}
+{ # sub balance_csc_text
+
+ my %matching_char;
+
+ BEGIN {
+ %matching_char = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '}' => '{',
+ ')' => '(',
+ ']' => '[',
+ );
+ }
+
+ sub balance_csc_text {
+
+ # Append characters to balance a closing side comment so that editors
+ # such as vim can correctly jump through code.
+ # Simple Example:
+ # input = ## end foreach my $foo ( sort { $b ...
+ # output = ## end foreach my $foo ( sort { $b ...})
+
+ # NOTE: This routine does not currently filter out structures within
+ # quoted text because the bounce algorithms in text editors do not
+ # necessarily do this either (a version of vim was checked and
+ # did not do this).
+
+ # Some complex examples which will cause trouble for some editors:
+ # while ( $mask_string =~ /\{[^{]*?\}/g ) {
+ # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
+ # if ( $1 eq '{' ) {
+ # test file test1/braces.pl has many such examples.
+
+ my ($csc) = @_;
+
+ # loop to examine characters one-by-one, RIGHT to LEFT and
+ # build a balancing ending, LEFT to RIGHT.
+ for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) {
+
+ my $char = substr( $csc, $pos, 1 );
+
+ # ignore everything except structural characters
+ next unless ( $matching_char{$char} );
+
+ # pop most recently appended character
+ my $top = chop($csc);
+
+ # push it back plus the mate to the newest character
+ # unless they balance each other.
+ $csc = $csc . $top . $matching_char{$char} unless $top eq $char;
+ }
+
+ # return the balanced string
+ return $csc;
+ }
+}
+
sub add_closing_side_comment {
+ my $self = shift;
+
# add closing side comments after closing block braces if -csc used
my $cscw_block_comment;
#---------------------------------------------------------------
my ( $terminal_type, $i_terminal, $i_block_leading_text,
- $block_leading_text, $block_line_count )
+ $block_leading_text, $block_line_count, $block_label )
= accumulate_csc_text();
#---------------------------------------------------------------
# Step 2: make the closing side comment if this ends a block
#---------------------------------------------------------------
- my $have_side_comment = $i_terminal != $max_index_to_go;
+ ##my $have_side_comment = $i_terminal != $max_index_to_go;
+ my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
# if this line might end in a block closure..
if (
# ..and either
&& (
- # this is the last token (line doesnt have a side comment)
+ # this is the last token (line doesn't have a side comment)
!$have_side_comment
# or the old side comment is a closing side comment
{
# then make the closing side comment text
+ if ($block_label) { $block_label .= " " }
my $token =
-"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
+"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
# append any extra descriptive text collected above
if ( $i_block_leading_text == $i_terminal ) {
$token .= $block_leading_text;
}
+
+ $token = balance_csc_text($token)
+ if $rOpts->{'closing-side-comments-balanced'};
+
$token =~ s/\s*$//; # trim any trailing whitespace
# handle case of existing closing side comment
if ( $rOpts->{'closing-side-comment-warnings'} ) {
my $old_csc = $tokens_to_go[$max_index_to_go];
my $new_csc = $token;
- $new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
- my $new_trailing_dots = $1;
- $old_csc =~ s/\.\.\.\s*$//;
$new_csc =~ s/\s+//g; # trim all whitespace
- $old_csc =~ s/\s+//g;
+ $old_csc =~ s/\s+//g; # trim all whitespace
+ $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
+ $new_csc =~ s/(\.\.\.)$//; # trim trailing '...'
+ my $new_trailing_dots = $1;
+ $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
# Patch to handle multiple closing side comments at
# else and elsif's. These have become too complicated
# if the new comment is shorter and has been limited,
# only compare the common part.
- if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
+ if ( length($new_csc) < length($old_csc)
+ && $new_trailing_dots )
{
$old_csc = substr( $old_csc, 0, length($new_csc) );
}
$rOpts->{'closing-side-comment-interval'} )
{
$token = undef;
- unstore_token_to_go()
+ $self->unstore_token_to_go()
if ( $types_to_go[$max_index_to_go] eq '#' );
- unstore_token_to_go()
+ $self->unstore_token_to_go()
if ( $types_to_go[$max_index_to_go] eq 'b' );
}
}
# handle case of NO existing closing side comment
else {
+ # Remove any existing blank and add another below.
+ # This is a tricky point. A side comment needs to have the same level
+ # as the preceding closing brace or else the line will not get the right
+ # indentation. So even if we have a blank, we are going to replace it.
+ if ( $types_to_go[$max_index_to_go] eq 'b' ) {
+ unstore_token_to_go();
+ }
+
# insert the new side comment into the output token stream
my $type = '#';
my $block_type = '';
my $slevel = $nesting_depth_to_go[$max_index_to_go];
my $no_internal_newlines = 0;
- my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
my $ci_level = $ci_levels_to_go[$max_index_to_go];
my $in_continued_quote = 0;
- # first insert a blank token
- insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
+ # insert a blank token
+ $self->insert_new_token_to_go( ' ', 'b', $slevel,
+ $no_internal_newlines );
# then the side comment
- insert_new_token_to_go( $token, $type, $slevel,
+ $self->insert_new_token_to_go( $token, $type, $slevel,
$no_internal_newlines );
}
}
}
sub previous_nonblank_token {
- my ($i) = @_;
- if ( $i <= 0 ) {
- return "";
- }
- elsif ( $types_to_go[ $i - 1 ] ne 'b' ) {
- return $tokens_to_go[ $i - 1 ];
- }
- elsif ( $i > 1 ) {
- return $tokens_to_go[ $i - 2 ];
- }
- else {
- return "";
+ my ($i) = @_;
+ my $name = "";
+ my $im = $i - 1;
+ return "" if ( $im < 0 );
+ if ( $types_to_go[$im] eq 'b' ) { $im--; }
+ return "" if ( $im < 0 );
+ $name = $tokens_to_go[$im];
+
+ # prepend any sub name to an isolated -> to avoid unwanted alignments
+ # [test case is test8/penco.pl]
+ if ( $name eq '->' ) {
+ $im--;
+ if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
+ $name = $tokens_to_go[$im] . $name;
+ }
}
+ return $name;
}
sub send_lines_to_vertical_aligner {
- my ( $ri_first, $ri_last, $do_not_pad ) = @_;
+ my ( $self, $ri_first, $ri_last, $do_not_pad ) = @_;
my $rindentation_list = [0]; # ref to indentations for each line
# flush if necessary to avoid unwanted alignment
my $must_flush = 0;
- if ( @$ri_first > 1 ) {
+ if ( @{$ri_first} > 1 ) {
# flush before a long if statement
if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
Perl::Tidy::VerticalAligner::flush();
}
- set_logical_padding( $ri_first, $ri_last );
+ undo_ci( $ri_first, $ri_last );
+
+ set_logical_padding( $ri_first, $ri_last );
+
+ # loop to prepare each line for shipment
+ my $n_last_line = @{$ri_first} - 1;
+ my $in_comma_list;
+ for my $n ( 0 .. $n_last_line ) {
+ my $ibeg = $ri_first->[$n];
+ my $iend = $ri_last->[$n];
+
+ my ( $rtokens, $rfields, $rpatterns ) =
+ make_alignment_patterns( $ibeg, $iend );
+
+ # Set flag to show how much level changes between this line
+ # and the next line, if we have it.
+ my $ljump = 0;
+ if ( $n < $n_last_line ) {
+ my $ibegp = $ri_first->[ $n + 1 ];
+ $ljump = $levels_to_go[$ibegp] - $levels_to_go[$iend];
+ }
+
+ my ( $indentation, $lev, $level_end, $terminal_type,
+ $is_semicolon_terminated, $is_outdented_line )
+ = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
+ $ri_first, $ri_last, $rindentation_list, $ljump );
+
+ # we will allow outdenting of long lines..
+ my $outdent_long_lines = (
+
+ # which are long quotes, if allowed
+ ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
+
+ # which are long block comments, if allowed
+ || (
+ $types_to_go[$ibeg] eq '#'
+ && $rOpts->{'outdent-long-comments'}
+
+ # but not if this is a static block comment
+ && !$is_static_block_comment
+ )
+ );
+
+ my $level_jump =
+ $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
+
+ my $rvertical_tightness_flags =
+ set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
+ $ri_first, $ri_last );
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ # Set a flag at the final ':' of a ternary chain to request
+ # vertical alignment of the final term. Here is a
+ # slightly complex example:
+ #
+ # $self->{_text} = (
+ # !$section ? ''
+ # : $type eq 'item' ? "the $section entry"
+ # : "the section on $section"
+ # )
+ # . (
+ # $page
+ # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
+ # : ' elsewhere in this document'
+ # );
+ #
+ my $is_terminal_ternary = 0;
+ if ( $tokens_to_go[$ibeg] eq ':'
+ || $n > 0 && $tokens_to_go[ $ri_last->[ $n - 1 ] ] eq ':' )
+ {
+ my $last_leading_type = ":";
+ if ( $n > 0 ) {
+ my $iprev = $ri_first->[ $n - 1 ];
+ $last_leading_type = $types_to_go[$iprev];
+ }
+ if ( $terminal_type ne ';'
+ && $n_last_line > $n
+ && $level_end == $lev )
+ {
+ my $inext = $ri_first->[ $n + 1 ];
+ $level_end = $levels_to_go[$inext];
+ $terminal_type = $types_to_go[$inext];
+ }
+
+ $is_terminal_ternary = $last_leading_type eq ':'
+ && ( ( $terminal_type eq ';' && $level_end <= $lev )
+ || ( $terminal_type ne ':' && $level_end < $lev ) )
+
+ # the terminal term must not contain any ternary terms, as in
+ # my $ECHO = (
+ # $Is_MSWin32 ? ".\\echo$$"
+ # : $Is_MacOS ? ":echo$$"
+ # : ( $Is_NetWare ? "echo$$" : "./echo$$" )
+ # );
+ && !grep /^[\?\:]$/, @types_to_go[ $ibeg + 1 .. $iend ];
+ }
+
+ # send this new line down the pipe
+ my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
+ Perl::Tidy::VerticalAligner::valign_input(
+ $lev,
+ $level_end,
+ $indentation,
+ $rfields,
+ $rtokens,
+ $rpatterns,
+ $forced_breakpoint_to_go[$iend] || $in_comma_list,
+ $outdent_long_lines,
+ $is_terminal_ternary,
+ $is_semicolon_terminated,
+ $do_not_pad,
+ $rvertical_tightness_flags,
+ $level_jump,
+ );
+ $in_comma_list =
+ $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
+
+ # flush an outdented line to avoid any unwanted vertical alignment
+ Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
+
+ $do_not_pad = 0;
+
+ # Set flag indicating if this line ends in an opening
+ # token and is very short, so that a blank line is not
+ # needed if the subsequent line is a comment.
+ # Examples of what we are looking for:
+ # {
+ # && (
+ # BEGIN {
+ # default {
+ # sub {
+ $last_output_short_opening_token
+
+ # line ends in opening token
+ = $types_to_go[$iend] =~ /^[\{\(\[L]$/
+
+ # and either
+ && (
+ # line has either single opening token
+ $iend == $ibeg
+
+ # or is a single token followed by opening token.
+ # Note that sub identifiers have blanks like 'sub doit'
+ || ( $iend - $ibeg <= 2 && $tokens_to_go[$ibeg] !~ /\s+/ )
+ )
+
+ # and limit total to 10 character widths
+ && token_sequence_length( $ibeg, $iend ) <= 10;
+
+ } # end of loop to output each line
+
+ # remember indentation of lines containing opening containers for
+ # later use by sub set_adjusted_indentation
+ save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
+ return;
+}
+
+{ # begin make_alignment_patterns
+
+ my %block_type_map;
+ my %keyword_map;
+
+ BEGIN {
+
+ # map related block names into a common name to
+ # allow alignment
+ %block_type_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'if',
+ 'default' => 'if',
+ 'case' => 'if',
+ 'sort' => 'map',
+ 'grep' => 'map',
+ );
- # loop to prepare each line for shipment
- my $n_last_line = @$ri_first - 1;
- my $in_comma_list;
- for my $n ( 0 .. $n_last_line ) {
- my $ibeg = $$ri_first[$n];
- my $iend = $$ri_last[$n];
+ # map certain keywords to the same 'if' class to align
+ # long if/elsif sequences. [elsif.pl]
+ %keyword_map = (
+ 'unless' => 'if',
+ 'else' => 'if',
+ 'elsif' => 'if',
+ 'when' => 'given',
+ 'default' => 'given',
+ 'case' => 'switch',
+
+ # treat an 'undef' similar to numbers and quotes
+ 'undef' => 'Q',
+ );
+ }
- my @patterns = ();
+ sub make_alignment_patterns {
+
+ # Here we do some important preliminary work for the
+ # vertical aligner. We create three arrays for one
+ # output line. These arrays contain strings that can
+ # be tested by the vertical aligner to see if
+ # consecutive lines can be aligned vertically.
+ #
+ # The three arrays are indexed on the vertical
+ # alignment fields and are:
+ # @tokens - a list of any vertical alignment tokens for this line.
+ # These are tokens, such as '=' '&&' '#' etc which
+ # we want to might align vertically. These are
+ # decorated with various information such as
+ # nesting depth to prevent unwanted vertical
+ # alignment matches.
+ # @fields - the actual text of the line between the vertical alignment
+ # tokens.
+ # @patterns - a modified list of token types, one for each alignment
+ # field. These should normally each match before alignment is
+ # allowed, even when the alignment tokens match.
+ my ( $ibeg, $iend ) = @_;
my @tokens = ();
my @fields = ();
+ my @patterns = ();
my $i_start = $ibeg;
- my $i;
my $depth = 0;
my @container_name = ("");
my $j = 0; # field index
$patterns[0] = "";
- for $i ( $ibeg .. $iend ) {
+ for my $i ( $ibeg .. $iend ) {
# Keep track of containers balanced on this line only.
# These are used below to prevent unwanted cross-line alignments.
# Unbalanced containers already avoid aligning across
# container boundaries.
if ( $tokens_to_go[$i] eq '(' ) {
+
+ # if container is balanced on this line...
my $i_mate = $mate_index_to_go[$i];
if ( $i_mate > $i && $i_mate <= $iend ) {
$depth++;
my $seqno = $type_sequence_to_go[$i];
my $count = comma_arrow_count($seqno);
$multiple_comma_arrows[$depth] = $count && $count > 1;
+
+ # Append the previous token name to make the container name
+ # more unique. This name will also be given to any commas
+ # within this container, and it helps avoid undesirable
+ # alignments of different types of containers.
my $name = previous_nonblank_token($i);
$name =~ s/^->//;
$container_name[$depth] = "+" . $name;
+
+ # Make the container name even more unique if necessary.
+ # If we are not vertically aligning this opening paren,
+ # append a character count to avoid bad alignment because
+ # it usually looks bad to align commas within containers
+ # for which the opening parens do not align. Here
+ # is an example very BAD alignment of commas (because
+ # the atan2 functions are not all aligned):
+ # $XY =
+ # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
+ # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
+ # $X * atan2( $X, 1 ) -
+ # $Y * atan2( $Y, 1 );
+ #
+ # On the other hand, it is usually okay to align commas if
+ # opening parens align, such as:
+ # glVertex3d( $cx + $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy + $s * $ys, $z );
+ # glVertex3d( $cx - $s * $xs, $cy, $z );
+ # glVertex3d( $cx, $cy - $s * $ys, $z );
+ #
+ # To distinguish between these situations, we will
+ # append the length of the line from the previous matching
+ # token, or beginning of line, to the function name. This
+ # will allow the vertical aligner to reject undesirable
+ # matches.
+
+ # if we are not aligning on this paren...
+ if ( $matching_token_to_go[$i] eq '' ) {
+
+ # Sum length from previous alignment, or start of line.
+ my $len =
+ ( $i_start == $ibeg )
+ ? total_line_length( $i_start, $i - 1 )
+ : token_sequence_length( $i_start, $i - 1 );
+
+ # tack length onto the container name to make unique
+ $container_name[$depth] .= "-" . $len;
+ }
}
}
elsif ( $tokens_to_go[$i] eq ')' ) {
$tok .= "$nesting_depth_to_go[$i]";
}
- # do any special decorations for commas to avoid unwanted
- # cross-line alignments.
- if ( $raw_tok eq ',' ) {
+ # also decorate commas with any container name to avoid
+ # unwanted cross-line alignments.
+ if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
if ( $container_name[$depth] ) {
$tok .= $container_name[$depth];
}
}
- # decorate '=>' with:
- # - Nothing if this container is unbalanced on this line.
- # - The previous token if it is balanced and multiple '=>'s
- # - The container name if it is bananced and no other '=>'s
- elsif ( $raw_tok eq '=>' ) {
- if ( $container_name[$depth] ) {
- if ( $multiple_comma_arrows[$depth] ) {
- $tok .= "+" . previous_nonblank_token($i);
- }
- else {
- $tok .= $container_name[$depth];
- }
+ # Patch to avoid aligning leading and trailing if, unless.
+ # Mark trailing if, unless statements with container names.
+ # This makes them different from leading if, unless which
+ # are not so marked at present. If we ever need to name
+ # them too, we could use ci to distinguish them.
+ # Example problem to avoid:
+ # return ( 2, "DBERROR" )
+ # if ( $retval == 2 );
+ # if ( scalar @_ ) {
+ # my ( $a, $b, $c, $d, $e, $f ) = @_;
+ # }
+ if ( $raw_tok eq '(' ) {
+ my $ci = $ci_levels_to_go[$ibeg];
+ if ( $container_name[$depth] =~ /^\+(if|unless)/
+ && $ci )
+ {
+ $tok .= $container_name[$depth];
}
}
+ # Decorate block braces with block types to avoid
+ # unwanted alignments such as the following:
+ # foreach ( @{$routput_array} ) { $fh->print($_) }
+ # eval { $fh->close() };
+ if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
+ my $block_type = $block_type_to_go[$i];
+
+ # map certain related block types to allow
+ # else blocks to align
+ $block_type = $block_type_map{$block_type}
+ if ( defined( $block_type_map{$block_type} ) );
+
+ # remove sub names to allow one-line sub braces to align
+ # regardless of name
+ #if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
+ if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
+
+ # allow all control-type blocks to align
+ if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
+
+ $tok .= $block_type;
+ }
+
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type = 'Q';
+
+ # Patch to ignore leading minus before words,
+ # by changing pattern 'mQ' into just 'Q',
+ # so that we can align things like this:
+ # Button => "Print letter \"~$_\"",
+ # -command => [ sub { print "$_[0]\n" }, $_ ],
+ if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
- # minor patch to make numbers and quotes align
+ # patch to make numbers and quotes align
if ( $type eq 'n' ) { $type = 'Q' }
+ # patch to ignore any ! in patterns
+ if ( $type eq '!' ) { $type = '' }
+
$patterns[$j] .= $type;
}
# for keywords we have to use the actual text
else {
- # map certain keywords to the same 'if' class to align
- # long if/elsif sequences. my testfile: elsif.pl
my $tok = $tokens_to_go[$i];
- if ( $n == 0 && $tok =~ /^(elsif|else|unless)$/ ) {
- $tok = 'if';
- }
+
+ # but map certain keywords to a common string to allow
+ # alignment.
+ $tok = $keyword_map{$tok}
+ if ( defined( $keyword_map{$tok} ) );
$patterns[$j] .= $tok;
}
}
# done with this line .. join text of tokens to make the last field
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
+ return ( \@tokens, \@fields, \@patterns );
+ }
- my ( $indentation, $lev, $level_end, $terminal_type,
- $is_semicolon_terminated, $is_outdented_line )
- = set_adjusted_indentation( $ibeg, $iend, \@fields, \@patterns,
- $ri_first, $ri_last, $rindentation_list );
-
- # we will allow outdenting of long lines..
- my $outdent_long_lines = (
-
- # which are long quotes, if allowed
- ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
-
- # which are long block comments, if allowed
- || (
- $types_to_go[$ibeg] eq '#'
- && $rOpts->{'outdent-long-comments'}
-
- # but not if this is a static block comment
- && !$is_static_block_comment
- )
- );
-
- my $level_jump =
- $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
-
- my $rvertical_tightness_flags =
- set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
- $ri_first, $ri_last );
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- my $is_terminal_ternary = 0;
- if ( $tokens_to_go[$ibeg] eq ':'
- || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
- {
- if ( ( $terminal_type eq ';' && $level_end <= $lev )
- || ( $level_end < $lev ) )
- {
- $is_terminal_ternary = 1;
- }
- }
-
- # send this new line down the pipe
- my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
- Perl::Tidy::VerticalAligner::append_line(
- $lev,
- $level_end,
- $indentation,
- \@fields,
- \@tokens,
- \@patterns,
- $forced_breakpoint_to_go[$iend] || $in_comma_list,
- $outdent_long_lines,
- $is_terminal_ternary,
- $is_semicolon_terminated,
- $do_not_pad,
- $rvertical_tightness_flags,
- $level_jump,
- );
- $in_comma_list =
- $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
-
- # flush an outdented line to avoid any unwanted vertical alignment
- Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
-
- $do_not_pad = 0;
-
- } # end of loop to output each line
-
- # remember indentation of lines containing opening containers for
- # later use by sub set_adjusted_indentation
- save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
-}
+} # end make_alignment_patterns
-{ # begin unmatched_indexes
+{ # begin unmatched_indexes
# closure to keep track of unbalanced containers.
# arrays shared by the routines in this block:
my %comma_arrow_count;
sub is_unbalanced_batch {
- @unmatched_opening_indexes_in_this_batch +
+ return @unmatched_opening_indexes_in_this_batch +
@unmatched_closing_indexes_in_this_batch;
}
sub comma_arrow_count {
- my $seqno = $_[0];
+ my $seqno = shift;
return $comma_arrow_count{$seqno};
}
@unmatched_opening_indexes_in_this_batch = ();
@unmatched_closing_indexes_in_this_batch = ();
%comma_arrow_count = ();
+ my $comma_arrow_count_contained = 0;
- my ( $i, $i_mate, $token );
- foreach $i ( 0 .. $max_index_to_go ) {
+ foreach my $i ( 0 .. $max_index_to_go ) {
if ( $type_sequence_to_go[$i] ) {
- $token = $tokens_to_go[$i];
+ my $token = $tokens_to_go[$i];
if ( $token =~ /^[\(\[\{\?]$/ ) {
push @unmatched_opening_indexes_in_this_batch, $i;
}
elsif ( $token =~ /^[\)\]\}\:]$/ ) {
- $i_mate = pop @unmatched_opening_indexes_in_this_batch;
+ my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
if ( defined($i_mate) && $i_mate >= 0 ) {
if ( $type_sequence_to_go[$i_mate] ==
$type_sequence_to_go[$i] )
{
$mate_index_to_go[$i] = $i_mate;
$mate_index_to_go[$i_mate] = $i;
+ my $seqno = $type_sequence_to_go[$i];
+ if ( $comma_arrow_count{$seqno} ) {
+ $comma_arrow_count_contained +=
+ $comma_arrow_count{$seqno};
+ }
}
else {
push @unmatched_opening_indexes_in_this_batch,
}
}
}
+ return $comma_arrow_count_contained;
}
sub save_opening_indentation {
)
];
}
+ return;
}
} # end unmatched_indexes
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
- my ( $indent, $offset );
+ my ( $indent, $offset, $is_leading, $exists );
+ $exists = 1;
if ( $i_opening >= 0 ) {
# it is..look up the indentation
- ( $indent, $offset ) =
+ ( $indent, $offset, $is_leading ) =
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
$rindentation_list );
}
my $seqno = $type_sequence_to_go[$i_closing];
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
- ( $indent, $offset ) = @{ $saved_opening_indentation{$seqno} };
+ ( $indent, $offset, $is_leading ) =
+ @{ $saved_opening_indentation{$seqno} };
}
# some kind of serious error
# (example is badfile.t)
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
# if no sequence number it must be an unbalanced container
else {
- $indent = 0;
- $offset = 0;
+ $indent = 0;
+ $offset = 0;
+ $is_leading = 0;
+ $exists = 0;
}
}
- return ( $indent, $offset );
+ return ( $indent, $offset, $is_leading, $exists );
}
sub lookup_opening_indentation {
$rindentation_list->[0] =
$nline; # save line number to start looking next call
- my $ibeg = $ri_start->[$nline];
- my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
- return ( $rindentation_list->[ $nline + 1 ], $offset );
+ my $ibeg = $ri_start->[$nline];
+ my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
+ my $is_leading = ( $ibeg == $i_opening );
+ return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
{
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
- @_ = qw(if elsif else unless while until for foreach case when);
- @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
+ my @q = qw(if elsif else unless while until for foreach case when);
+ @is_if_elsif_else_unless_while_until_for_foreach{@q} =
+ (1) x scalar(@q);
}
sub set_adjusted_indentation {
# outdenting.
my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
- $rindentation_list )
+ $rindentation_list, $level_jump )
= @_;
# we need to know the last token of this line
my $is_semicolon_terminated = $terminal_type eq ';'
&& $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
+ # NOTE: A future improvement would be to make it semicolon terminated
+ # even if it does not have a semicolon but is followed by a closing
+ # block brace. This would undo ci even for something like the
+ # following, in which the final paren does not have a semicolon because
+ # it is a possible weld location:
+
+ # if ($BOLD_MATH) {
+ # (
+ # $labels, $comment,
+ # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
+ # )
+ # }
+ #
+
+ # MOJO: Set a flag if this lines begins with ')->'
+ my $leading_paren_arrow = (
+ $types_to_go[$ibeg] eq '}'
+ && $tokens_to_go[$ibeg] eq ')'
+ && (
+ ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' )
+ || ( $ibeg < $i_terminal - 1
+ && $types_to_go[ $ibeg + 1 ] eq 'b'
+ && $types_to_go[ $ibeg + 2 ] eq '->' )
+ )
+ );
+
##########################################################
# Section 1: set a flag and a default indentation
#
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
- my ( $opening_indentation, $opening_offset );
+ my (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ );
# if we are at a closing token of some type..
- if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
+ if ( $types_to_go[$ibeg] =~ /^[\)\}\]R]$/ ) {
# get the indentation of the line containing the corresponding
# opening token
- ( $opening_indentation, $opening_offset ) =
- get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
# First set the default behavior:
- # default behavior is to outdent closing lines
- # of the form: "); }; ]; )->xxx;"
if (
+
+ # default behavior is to outdent closing lines
+ # of the form: "); }; ]; )->xxx;"
$is_semicolon_terminated
# and 'cuddled parens' of the form: ")->pack("
+ # Bug fix for RT #123749]: the types here were
+ # incorrectly '(' and ')'. Corrected to be '{' and '}'
|| (
- $terminal_type eq '('
- && $types_to_go[$ibeg] eq ')'
+ $terminal_type eq '{'
+ && $types_to_go[$ibeg] eq '}'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
)
+
+ # remove continuation indentation for any line like
+ # } ... {
+ # or without ending '{' and unbalanced, such as
+ # such as '}->{$operator}'
+ || (
+ $types_to_go[$ibeg] eq '}'
+
+ && ( $types_to_go[$iend] eq '{'
+ || $levels_to_go[$iend] < $levels_to_go[$ibeg] )
+ )
+
+ # and when the next line is at a lower indentation level
+ # PATCH: and only if the style allows undoing continuation
+ # for all closing token types. We should really wait until
+ # the indentation of the next line is known and then make
+ # a decision, but that would require another pass.
+ || ( $level_jump < 0 && !$some_closing_token_indentation )
+
+ # Patch for -wn=2, multiple welded closing tokens
+ || ( $i_terminal > $ibeg
+ && $types_to_go[$iend] =~ /^[\)\}\]R]$/ )
+
)
{
$adjust_indentation = 1;
}
- # TESTING: outdent something like '),'
+ # outdent something like '),'
if (
$terminal_type eq ','
- # allow just one character before the comma
- && $i_terminal == $ibeg + 1
+ # Removed this constraint for -wn
+ # OLD: allow just one character before the comma
+ # && $i_terminal == $ibeg + 1
- # requre LIST environment; otherwise, we may outdent too much --
+ # require LIST environment; otherwise, we may outdent too much -
# this can happen in calls without parentheses (overload.t);
&& $container_environment_to_go[$i_terminal] eq 'LIST'
)
{
$adjust_indentation = 1;
}
+
+ # Patch for RT #96101, in which closing brace of anonymous subs
+ # was not outdented. We should look ahead and see if there is
+ # a level decrease at the next token (i.e., a closing token),
+ # but right now we do not have that information. For now
+ # we see if we are in a list, and this works well.
+ # See test files 'sub*.t' for good test cases.
+ if ( $block_type_to_go[$ibeg] =~ /$ASUB_PATTERN/
+ && $container_environment_to_go[$i_terminal] eq 'LIST'
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
+ }
+ }
+
+ # YVES patch 1 of 2:
+ # Undo ci of line with leading closing eval brace,
+ # but not beyond the indention of the line with
+ # the opening brace.
+ if ( $block_type_to_go[$ibeg] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'} )
+ {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ my $indentation = $leading_spaces_to_go[$ibeg];
+ if ( defined($opening_indentation)
+ && get_spaces($indentation) >
+ get_spaces($opening_indentation) )
+ {
+ $adjust_indentation = 1;
+ }
}
$default_adjust_indentation = $adjust_indentation;
}
# if at ');', '};', '>;', and '];' of a terminal qw quote
- elsif ($$rpatterns[0] =~ /^qb*;$/
- && $$rfields[0] =~ /^([\)\}\]\>]);$/ )
+ elsif ($rpatterns->[0] =~ /^qb*;$/
+ && $rfields->[0] =~ /^([\)\}\]\>]);$/ )
{
if ( $closing_token_indentation{$1} == 0 ) {
$adjust_indentation = 1;
}
}
+ # if line begins with a ':', align it with any
+ # previous line leading with corresponding ?
+ elsif ( $types_to_go[$ibeg] eq ':' ) {
+ (
+ $opening_indentation, $opening_offset,
+ $is_leading, $opening_exists
+ )
+ = get_opening_indentation( $ibeg, $ri_first, $ri_last,
+ $rindentation_list );
+ if ($is_leading) { $adjust_indentation = 2; }
+ }
+
##########################################################
# Section 2: set indentation according to flag set above
#
$lev = $levels_to_go[$ibeg];
}
elsif ( $adjust_indentation == 1 ) {
- $indentation = $reduced_spaces_to_go[$i_terminal];
- $lev = $levels_to_go[$i_terminal];
+
+ # Change the indentation to be that of a different token on the line
+ # Previously, the indentation of the terminal token was used:
+ # OLD CODING:
+ # $indentation = $reduced_spaces_to_go[$i_terminal];
+ # $lev = $levels_to_go[$i_terminal];
+
+ # Generalization for MOJO:
+ # Use the lowest level indentation of the tokens on the line.
+ # For example, here we can use the indentation of the ending ';':
+ # } until ($selection > 0 and $selection < 10); # ok to use ';'
+ # But this will not outdent if we use the terminal indentation:
+ # )->then( sub { # use indentation of the ->, not the {
+ # Warning: reduced_spaces_to_go[] may be a reference, do not
+ # do numerical checks with it
+
+ my $i_ind = $ibeg;
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ while ( $i_ind < $i_terminal ) {
+ $i_ind++;
+ if ( $levels_to_go[$i_ind] < $lev ) {
+ $indentation = $reduced_spaces_to_go[$i_ind];
+ $lev = $levels_to_go[$i_ind];
+ }
+ }
}
# handle indented closing token which aligns with opening token
# calculate spaces needed to align with opening token
my $space_count =
- get_SPACES($opening_indentation) + $opening_offset;
+ get_spaces($opening_indentation) + $opening_offset;
# Indent less than the previous line.
#
# tokens, and in a worst case will leave a closing paren too far
# indented, but this is better than frequently leaving it not
# indented enough.
- my $last_spaces = get_SPACES($last_indentation_written);
+ my $last_spaces = get_spaces($last_indentation_written);
if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
$last_spaces +=
- get_RECOVERABLE_SPACES($last_indentation_written);
+ get_recoverable_spaces($last_indentation_written);
}
# reset the indentation to the new space count if it works
}
}
- # revert to default if it doesnt work
+ # revert to default if it doesn't work
else {
$space_count = leading_spaces_to_go($ibeg);
if ( $default_adjust_indentation == 0 ) {
if ( $block_type_to_go[$ibeg]
&& $ci_levels_to_go[$i_terminal] == 0 )
{
- my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
+ my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
$indentation = $spaces + $rOpts_indent_columns;
# NOTE: for -lp we could create a new indentation object, but
# Current method: use the minimum of the two. This avoids
# inconsistent indentation.
- if ( get_SPACES($last_indentation_written) <
- get_SPACES($indentation) )
+ if ( get_spaces($last_indentation_written) <
+ get_spaces($indentation) )
{
$indentation = $last_indentation_written;
}
# we must treat something like '} else {' as if it were
# an isolated brace my $is_isolated_block_brace = (
# $iend == $ibeg ) && $block_type_to_go[$ibeg];
+ #############################################################
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
&& ( $iend == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
- $block_type_to_go[$ibeg] } );
- #############################################################
- if ( !$is_isolated_block_brace && defined($opening_indentation) ) {
- if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
+ $block_type_to_go[$ibeg]
+ } );
+
+ # only do this for a ':; which is aligned with its leading '?'
+ my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
+
+ if (
+ defined($opening_indentation)
+ && !$leading_paren_arrow # MOJO
+ && !$is_isolated_block_brace
+ && !$is_unaligned_colon
+ )
+ {
+ if ( get_spaces($opening_indentation) > get_spaces($indentation) ) {
$indentation = $opening_indentation;
}
}
# if we should combine this line with the next line to achieve the
# desired vertical tightness. The array of parameters contains:
#
- # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
+ #
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1:
+ # Handle Lines 1 .. n-1 but not the last line
# For non-BLOCK tokens, we will need to examine the next line
# too, so we won't consider the last line.
+ #--------------------------------------------------------------
if ( $n < $n_last_line ) {
- # see if last token is an opening token...not a BLOCK...
- my $ibeg_next = $$ri_first[ $n + 1 ];
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1a:
+ # Look for Type 1, last token of this line is a non-block opening token
+ #--------------------------------------------------------------
+ my $ibeg_next = $ri_first->[ $n + 1 ];
my $token_end = $tokens_to_go[$iend];
- my $iend_next = $$ri_last[ $n + 1 ];
+ my $iend_next = $ri_last->[ $n + 1 ];
if (
$type_sequence_to_go[$iend]
&& !$block_type_to_go[$iend]
# avoid multiple jumps in nesting depth in one line if
# requested
my $ovt = $opening_vertical_tightness{$token_end};
- my $iend_next = $$ri_last[ $n + 1 ];
+ my $iend_next = $ri_last->[ $n + 1 ];
unless (
$ovt < 2
&& ( $nesting_depth_to_go[ $iend_next + 1 ] !=
}
}
- # see if first token of next line is a closing token...
- # ..and be sure this line does not have a side comment
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1b:
+ # Look for Type 2, first token of next line is a non-block closing
+ # token .. and be sure this line does not have a side comment
+ #--------------------------------------------------------------
my $token_next = $tokens_to_go[$ibeg_next];
if ( $type_sequence_to_go[$ibeg_next]
&& !$block_type_to_go[$ibeg_next]
}
}
- # Opening Token Right
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1c:
+ # Implement the Opening Token Right flag (Type 2)..
# If requested, move an isolated trailing opening token to the end of
# the previous line which ended in a comma. We could do this
# in sub recombine_breakpoints but that would cause problems
# quickly move far to the right in nested expressions. By
# doing it after indentation has been set, we avoid changes
# to the indentation. Actual movement of the token takes place
- # in sub write_leader_and_string.
+ # in sub valign_output_step_B.
+ #--------------------------------------------------------------
if (
$opening_token_right{ $tokens_to_go[$ibeg_next] }
# previous line ended in one of these
# (add other cases if necessary; '=>' and '.' are not necessary
- ##&& ($is_opening_token{$token_end} || $token_end eq ',')
&& !$block_type_to_go[$ibeg_next]
# this is a line with just an opening token
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
}
- # Stacking of opening and closing tokens
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 1d:
+ # Stacking of opening and closing tokens (Type 2)
+ #--------------------------------------------------------------
my $stackable;
my $token_beg_next = $tokens_to_go[$ibeg_next];
}
}
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 2:
+ # Handle type 3, opening block braces on last line of the batch
# Check for a last line with isolated opening BLOCK curly
+ #--------------------------------------------------------------
elsif ($rOpts_block_brace_vertical_tightness
- && $ibeg eq $iend
+ && $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/o )
( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
}
+ #--------------------------------------------------------------
+ # Vertical Tightness Flags Section 3:
+ # Handle type 4, a closing block brace on the last line of the batch Check
+ # for a last line with isolated closing BLOCK curly
+ #--------------------------------------------------------------
+ elsif ($rOpts_stack_closing_block_brace
+ && $ibeg eq $iend
+ && $block_type_to_go[$iend]
+ && $types_to_go[$iend] eq '}' )
+ {
+ my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
+ @{$rvertical_tightness_flags} =
+ ( 4, $spaces, $type_sequence_to_go[$iend], 1 );
+ }
+
# pack in the sequence numbers of the ends of this line
$rvertical_tightness_flags->[4] = get_seqno($ibeg);
$rvertical_tightness_flags->[5] = get_seqno($iend);
{
my %is_vertical_alignment_type;
my %is_vertical_alignment_keyword;
+ my %is_terminal_alignment_type;
BEGIN {
- @_ = qw#
+ my @q;
+
+ # Removed =~ from list to improve chances of alignment
+ # Removed // from list to improve chances of alignment (RT# 119588)
+ @q = qw#
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
- { ? : => =~ && || // ~~ !~~
+ { ? : => && || ~~ !~~
#;
- @is_vertical_alignment_type{@_} = (1) x scalar(@_);
+ @is_vertical_alignment_type{@q} = (1) x scalar(@q);
+
+ # only align these at end of line
+ @q = qw(&& ||);
+ @is_terminal_alignment_type{@q} = (1) x scalar(@q);
- @_ = qw(if unless and or err eq ne for foreach while until);
- @is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
+ # eq and ne were removed from this list to improve alignment chances
+ @q = qw(if unless and or err for foreach while until);
+ @is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
}
sub set_vertical_alignment_markers {
# $matching_token_to_go[$i] equal to those tokens at which we would
# accept vertical alignment.
+ my ( $ri_first, $ri_last ) = @_;
+
# nothing to do if we aren't allowed to change whitespace
if ( !$rOpts_add_whitespace ) {
for my $i ( 0 .. $max_index_to_go ) {
return;
}
- my ( $ri_first, $ri_last ) = @_;
-
# remember the index of last nonblank token before any sidecomment
my $i_terminal = $max_index_to_go;
if ( $types_to_go[$i_terminal] eq '#' ) {
my $vert_last_nonblank_type;
my $vert_last_nonblank_token;
my $vert_last_nonblank_block_type;
- my $max_line = @$ri_first - 1;
- my ( $i, $type, $token, $block_type, $alignment_type );
- my ( $ibeg, $iend, $line );
+ my $max_line = @{$ri_first} - 1;
- foreach $line ( 0 .. $max_line ) {
- $ibeg = $$ri_first[$line];
- $iend = $$ri_last[$line];
+ foreach my $line ( 0 .. $max_line ) {
+ my $ibeg = $ri_first->[$line];
+ my $iend = $ri_last->[$line];
$last_vertical_alignment_before_index = -1;
$vert_last_nonblank_type = '';
$vert_last_nonblank_token = '';
$vert_last_nonblank_block_type = '';
# look at each token in this output line..
- foreach $i ( $ibeg .. $iend ) {
- $alignment_type = '';
- $type = $types_to_go[$i];
- $block_type = $block_type_to_go[$i];
- $token = $tokens_to_go[$i];
+ foreach my $i ( $ibeg .. $iend ) {
+ my $alignment_type = '';
+ my $type = $types_to_go[$i];
+ my $block_type = $block_type_to_go[$i];
+ my $token = $tokens_to_go[$i];
# check for flag indicating that we should not align
# this token
$alignment_type = $token;
# Do not align a terminal token. Although it might
- # occasionally look ok to do this, it has been found to be
+ # occasionally look ok to do this, this has been found to be
# a good general rule. The main problems are:
# (1) that the terminal token (such as an = or :) might get
# moved far to the right where it is hard to see because
# nothing follows it, and
# (2) doing so may prevent other good alignments.
+ # Current exceptions are && and ||
if ( $i == $iend || $i >= $i_terminal ) {
- $alignment_type = "";
+ $alignment_type = ""
+ unless ( $is_terminal_alignment_type{$type} );
}
# Do not align leading ': (' or '. ('. This would prevent
}
}
}
+ return;
}
}
my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
# check for full-line comment..
- if ( $$rtype[$ibeg] eq '#' ) {
- return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
+ if ( $rtype->[$ibeg] eq '#' ) {
+ return wantarray ? ( $rtype->[$ibeg], $ibeg ) : $rtype->[$ibeg];
}
else {
- # start at end and walk bakwards..
+ # start at end and walk backwards..
for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
# skip past any side comment and blanks
- next if ( $$rtype[$i] eq 'b' );
- next if ( $$rtype[$i] eq '#' );
+ next if ( $rtype->[$i] eq 'b' );
+ next if ( $rtype->[$i] eq '#' );
# found it..make sure it is a BLOCK termination,
# but hide a terminal } after sort/grep/map because it is not
# necessarily the end of the line. (terminal.t)
- my $terminal_type = $$rtype[$i];
+ my $terminal_type = $rtype->[$i];
if (
$terminal_type eq '}'
- && ( !$$rblock_type[$i]
- || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
+ && ( !$rblock_type->[$i]
+ || ( $is_sort_map_grep_eval_do{ $rblock_type->[$i] } ) )
)
{
$terminal_type = 'b';
}
}
-{
+{ # set_bond_strengths
+
my %is_good_keyword_breakpoint;
my %is_lt_gt_le_ge;
+ my %binary_bond_strength;
+ my %nobreak_lhs;
+ my %nobreak_rhs;
+
+ my @bias_tokens;
+ my $delta_bias;
+
+ sub bias_table_key {
+ my ( $type, $token ) = @_;
+ my $bias_table_key = $type;
+ if ( $type eq 'k' ) {
+ $bias_table_key = $token;
+ if ( $token eq 'err' ) { $bias_table_key = 'or' }
+ }
+ return $bias_table_key;
+ }
+
sub set_bond_strengths {
BEGIN {
- @_ = qw(if unless while until for foreach);
- @is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
+ my @q;
+ @q = qw(if unless while until for foreach);
+ @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
- @_ = qw(lt gt le ge);
- @is_lt_gt_le_ge{@_} = (1) x scalar(@_);
+ @q = qw(lt gt le ge);
+ @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
+ #
+ # The decision about where to break a line depends upon a "bond
+ # strength" between tokens. The LOWER the bond strength, the MORE
+ # likely a break. A bond strength may be any value but to simplify
+ # things there are several pre-defined strength levels:
+
+ # NO_BREAK => 10000;
+ # VERY_STRONG => 100;
+ # STRONG => 2.1;
+ # NOMINAL => 1.1;
+ # WEAK => 0.8;
+ # VERY_WEAK => 0.55;
+
+ # The strength values are based on trial-and-error, and need to be
+ # tweaked occasionally to get desired results. Some comments:
+ #
+ # 1. Only relative strengths are important. small differences
+ # in strengths can make big formatting differences.
+ # 2. Each indentation level adds one unit of bond strength.
+ # 3. A value of NO_BREAK makes an unbreakable bond
+ # 4. A value of VERY_WEAK is the strength of a ','
+ # 5. Values below NOMINAL are considered ok break points.
+ # 6. Values above NOMINAL are considered poor break points.
+ #
+ # The bond strengths should roughly follow precedence order where
+ # possible. If you make changes, please check the results very
+ # carefully on a variety of scripts. Testing with the -extrude
+ # options is particularly helpful in exercising all of the rules.
- ###############################################################
- # NOTE: NO_BREAK's set here are HINTS which may not be honored;
- # essential NO_BREAKS's must be enforced in section 2, below.
- ###############################################################
+ # Wherever possible, bond strengths are defined in the following
+ # tables. There are two main stages to setting bond strengths and
+ # two types of tables:
+ #
+ # The first stage involves looking at each token individually and
+ # defining left and right bond strengths, according to if we want
+ # to break to the left or right side, and how good a break point it
+ # is. For example tokens like =, ||, && make good break points and
+ # will have low strengths, but one might want to break on either
+ # side to put them at the end of one line or beginning of the next.
+ #
+ # The second stage involves looking at certain pairs of tokens and
+ # defining a bond strength for that particular pair. This second
+ # stage has priority.
+
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 1.
+ # Set left and right bond strengths of individual tokens.
+ #---------------------------------------------------------------
- # adding NEW_TOKENS: add a left and right bond strength by
- # mimmicking what is done for an existing token type. You
- # can skip this step at first and take the default, then
- # tweak later to get desired results.
+ # NOTE: NO_BREAK's set in this section first are HINTS which will
+ # probably not be honored. Essential NO_BREAKS's should be set in
+ # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
+ # of this subroutine.
- # The bond strengths should roughly follow precenence order where
- # possible. If you make changes, please check the results very
- # carefully on a variety of scripts.
+ # Note that we are setting defaults in this section. The user
+ # cannot change bond strengths but can cause the left and right
+ # bond strengths of any token type to be swapped through the use of
+ # the -wba and -wbb flags. In this way the user can determine if a
+ # breakpoint token should appear at the end of one line or the
+ # beginning of the next line.
+
+ # The hash keys in this section are token types, plus the text of
+ # certain keywords like 'or', 'and'.
# no break around possible filehandle
$left_bond_strength{'Z'} = NO_BREAK;
# example print (STDERR, "bla"); will fail with break after (
$left_bond_strength{'w'} = NO_BREAK;
- # blanks always have infinite strength to force breaks after real tokens
+ # blanks always have infinite strength to force breaks after
+ # real tokens
$right_bond_strength{'b'} = NO_BREAK;
# try not to break on exponentation
- @_ = qw" ** .. ... <=> ";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} = (STRONG) x scalar(@_);
+ @q = qw" ** .. ... <=> ";
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
# The comma-arrow has very low precedence but not a good break point
$left_bond_strength{'=>'} = NO_BREAK;
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
+ $left_bond_strength{'CORE::'} = NOMINAL;
+ $right_bond_strength{'CORE::'} = NO_BREAK;
+
# breaking AFTER modulus operator is ok:
- @_ = qw" % ";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} =
- ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
+ @q = qw" % ";
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
# Break AFTER math operators * and /
- @_ = qw" * / x ";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
+ @q = qw" * / x ";
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# Break AFTER weakest math operators + and -
# Make them weaker than * but a bit stronger than '.'
- @_ = qw" + - ";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} =
- ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
+ @q = qw" + - ";
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
# breaking BEFORE these is just ok:
- @_ = qw" >> << ";
- @right_bond_strength{@_} = (STRONG) x scalar(@_);
- @left_bond_strength{@_} = (NOMINAL) x scalar(@_);
+ @q = qw" >> << ";
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
+ @left_bond_strength{@q} = (NOMINAL) x scalar(@q);
# breaking before the string concatenation operator seems best
# because it can be hard to see at the end of a line
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
- @_ = qw"} ] ) ";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} = (NOMINAL) x scalar(@_);
+ @q = qw"} ] ) R";
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# make these a little weaker than nominal so that they get
# favored for end-of-line characters
- @_ = qw"!= == =~ !~ ~~ !~~";
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} =
- ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
+ @q = qw"!= == =~ !~ ~~ !~~";
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
# break AFTER these
- @_ = qw" < > | & >= <=";
- @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
- @right_bond_strength{@_} =
- ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
+ @q = qw" < > | & >= <=";
+ @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
# breaking either before or after a quote is ok
# but bias for breaking before a quote
$left_bond_strength{'G'} = NOMINAL;
$right_bond_strength{'G'} = STRONG;
- # it is good to break AFTER various assignment operators
- @_ = qw(
+ # assignment operators
+ @q = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
- @left_bond_strength{@_} = (STRONG) x scalar(@_);
- @right_bond_strength{@_} =
- ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
- # break BEFORE '&&' and '||' and '//'
+ # Default is to break AFTER various assignment operators
+ @left_bond_strength{@q} = (STRONG) x scalar(@q);
+ @right_bond_strength{@q} =
+ ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
+
+ # Default is to break BEFORE '&&' and '||' and '//'
# set strength of '||' to same as '=' so that chains like
# $a = $b || $c || $d will break before the first '||'
$right_bond_strength{'||'} = NOMINAL;
$left_bond_strength{','} = VERY_STRONG;
$right_bond_strength{','} = VERY_WEAK;
+ # remaining digraphs and trigraphs not defined above
+ @q = qw( :: <> ++ --);
+ @left_bond_strength{@q} = (WEAK) x scalar(@q);
+ @right_bond_strength{@q} = (STRONG) x scalar(@q);
+
# Set bond strengths of certain keywords
# make 'or', 'err', 'and' slightly weaker than a ','
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
$right_bond_strength{'or'} = NOMINAL;
$right_bond_strength{'err'} = NOMINAL;
$right_bond_strength{'xor'} = STRONG;
- }
+
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 2.
+ # Set binary rules for bond strengths between certain token types.
+ #---------------------------------------------------------------
+
+ # We have a little problem making tables which apply to the
+ # container tokens. Here is a list of container tokens and
+ # their types:
+ #
+ # type tokens // meaning
+ # { {, [, ( // indent
+ # } }, ], ) // outdent
+ # [ [ // left non-structural [ (enclosing an array index)
+ # ] ] // right non-structural square bracket
+ # ( ( // left non-structural paren
+ # ) ) // right non-structural paren
+ # L { // left non-structural curly brace (enclosing a key)
+ # R } // right non-structural curly brace
+ #
+ # Some rules apply to token types and some to just the token
+ # itself. We solve the problem by combining type and token into a
+ # new hash key for the container types.
+ #
+ # If a rule applies to a token 'type' then we need to make rules
+ # for each of these 'type.token' combinations:
+ # Type Type.Token
+ # { {{, {[, {(
+ # [ [[
+ # ( ((
+ # L L{
+ # } }}, }], })
+ # ] ]]
+ # ) ))
+ # R R}
+ #
+ # If a rule applies to a token then we need to make rules for
+ # these 'type.token' combinations:
+ # Token Type.Token
+ # { {{, L{
+ # [ {[, [[
+ # ( {(, ((
+ # } }}, R}
+ # ] }], ]]
+ # ) }), ))
+
+ # allow long lines before final { in an if statement, as in:
+ # if (..........
+ # ..........)
+ # {
+ #
+ # Otherwise, the line before the { tends to be too short.
+
+ $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
+ $binary_bond_strength{'(('}{'{{'} = NOMINAL;
+
+ # break on something like '} (', but keep this stronger than a ','
+ # example is in 'howe.pl'
+ $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+ $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
+
+ # keep matrix and hash indices together
+ # but make them a little below STRONG to allow breaking open
+ # something like {'some-word'}{'some-very-long-word'} at the }{
+ # (bracebrk.t)
+ $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
+ $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
+
+ # increase strength to the point where a break in the following
+ # will be after the opening paren rather than at the arrow:
+ # $a->$b($c);
+ $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
+
+ $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+ $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
+
+ $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+ $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
+
+ #---------------------------------------------------------------
+ # Binary NO_BREAK rules
+ #---------------------------------------------------------------
+
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
+ $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
+
+ # Never break between a bareword and a following paren because
+ # perl may give an error. For example, if a break is placed
+ # between 'to_filehandle' and its '(' the following line will
+ # give a syntax error [Carp.pm]: my( $no) =fileno(
+ # to_filehandle( $in)) ;
+ $binary_bond_strength{'C'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'C'}{'{('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'U'}{'{('} = NO_BREAK;
+
+ # use strict requires that bare word within braces not start new
+ # line
+ $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
+
+ $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
+
+ # use strict requires that bare word and => not be separated
+ $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
+
+ # use strict does not allow separating type info from trailing { }
+ # testfile is readmail.pl
+ $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
+ $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
+
+ # As a defensive measure, do not break between a '(' and a
+ # filehandle. In some cases, this can cause an error. For
+ # example, the following program works:
+ # my $msg="hi!\n";
+ # print
+ # ( STDOUT
+ # $msg
+ # );
+ #
+ # But this program fails:
+ # my $msg="hi!\n";
+ # print
+ # (
+ # STDOUT
+ # $msg
+ # );
+ #
+ # This is normally only a problem with the 'extrude' option
+ $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
+ $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
+
+ # never break between sub name and opening paren
+ $binary_bond_strength{'w'}{'(('} = NO_BREAK;
+ $binary_bond_strength{'w'}{'{('} = NO_BREAK;
+
+ # keep '}' together with ';'
+ $binary_bond_strength{'}}'}{';'} = NO_BREAK;
+
+ # Breaking before a ++ can cause perl to guess wrong. For
+ # example the following line will cause a syntax error
+ # with -extrude if we break between '$i' and '++' [fixstyle2]
+ # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
+ $nobreak_lhs{'++'} = NO_BREAK;
+
+ # Do not break before a possible file handle
+ $nobreak_lhs{'Z'} = NO_BREAK;
+
+ # use strict hates bare words on any new line. For
+ # example, a break before the underscore here provokes the
+ # wrath of use strict:
+ # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
+ $nobreak_rhs{'F'} = NO_BREAK;
+ $nobreak_rhs{'CORE::'} = NO_BREAK;
+
+ #---------------------------------------------------------------
+ # Bond Strength BEGIN Section 3.
+ # Define tables and values for applying a small bias to the above
+ # values.
+ #---------------------------------------------------------------
+ # Adding a small 'bias' to strengths is a simple way to make a line
+ # break at the first of a sequence of identical terms. For
+ # example, to force long string of conditional operators to break
+ # with each line ending in a ':', we can add a small number to the
+ # bond strength of each ':' (colon.t)
+ @bias_tokens = qw( : && || f and or . ); # tokens which get bias
+ $delta_bias = 0.0001; # a very small strength level
+
+ } ## end BEGIN
# patch-its always ok to break at end of line
$nobreak_to_go[$max_index_to_go] = 0;
- # adding a small 'bias' to strengths is a simple way to make a line
- # break at the first of a sequence of identical terms. For example,
- # to force long string of conditional operators to break with
- # each line ending in a ':', we can add a small number to the bond
- # strength of each ':'
- my $colon_bias = 0;
- my $amp_bias = 0;
- my $bar_bias = 0;
- my $and_bias = 0;
- my $or_bias = 0;
- my $dot_bias = 0;
- my $f_bias = 0;
- my $code_bias = -.01;
- my $type = 'b';
- my $token = ' ';
+ # we start a new set of bias values for each line
+ my %bias;
+ @bias{@bias_tokens} = (0) x scalar(@bias_tokens);
+ my $code_bias = -.01; # bias for closing block braces
+
+ my $type = 'b';
+ my $token = ' ';
my $last_type;
my $last_nonblank_type = $type;
my $last_nonblank_token = $token;
- my $delta_bias = 0.0001;
my $list_str = $left_bond_strength{'?'};
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
$next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
);
- # preliminary loop to compute bond strengths
- for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
+ # main loop to compute bond strengths between each pair of tokens
+ foreach my $i ( 0 .. $max_index_to_go ) {
$last_type = $type;
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
- # Some token chemistry... The decision about where to break a
- # line depends upon a "bond strength" between tokens. The LOWER
- # the bond strength, the MORE likely a break. The strength
- # values are based on trial-and-error, and need to be tweaked
- # occasionally to get desired results. Things to keep in mind
- # are:
- # 1. relative strengths are important. small differences
- # in strengths can make big formatting differences.
- # 2. each indentation level adds one unit of bond strength
- # 3. a value of NO_BREAK makes an unbreakable bond
- # 4. a value of VERY_WEAK is the strength of a ','
- # 5. values below NOMINAL are considered ok break points
- # 6. values above NOMINAL are considered poor break points
# We are computing the strength of the bond between the current
# token and the NEXT token.
- my $bond_str = VERY_STRONG; # a default, high strength
#---------------------------------------------------------------
- # section 1:
- # use minimum of left and right bond strengths if defined;
- # digraphs and trigraphs like to break on their left
+ # Bond Strength Section 1:
+ # First Approximation.
+ # Use minimum of individual left and right tabulated bond
+ # strengths.
#---------------------------------------------------------------
my $bsr = $right_bond_strength{$type};
-
- if ( !defined($bsr) ) {
-
- if ( $is_digraph{$type} || $is_trigraph{$type} ) {
- $bsr = STRONG;
- }
- else {
- $bsr = VERY_STRONG;
- }
- }
+ my $bsl = $left_bond_strength{$next_nonblank_type};
# define right bond strengths of certain keywords
if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
elsif ( $token eq 'ne' or $token eq 'eq' ) {
$bsr = NOMINAL;
}
- my $bsl = $left_bond_strength{$next_nonblank_type};
# set terminal bond strength to the nominal value
# this will cause good preceding breaks to be retained
$bsl = NOMINAL;
}
- if ( !defined($bsl) ) {
-
- if ( $is_digraph{$next_nonblank_type}
- || $is_trigraph{$next_nonblank_type} )
- {
- $bsl = WEAK;
- }
- else {
- $bsl = VERY_STRONG;
- }
- }
-
# define right bond strengths of certain keywords
if ( $next_nonblank_type eq 'k'
&& defined( $left_bond_strength{$next_nonblank_token} ) )
$bsl = 0.9 * NOMINAL + 0.1 * STRONG;
}
- # Note: it might seem that we would want to keep a NO_BREAK if
- # either token has this value. This didn't work, because in an
- # arrow list, it prevents the comma from separating from the
- # following bare word (which is probably quoted by its arrow).
- # So necessary NO_BREAK's have to be handled as special cases
- # in the final section.
- $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
+ # Use the minimum of the left and right strengths. Note: it might
+ # seem that we would want to keep a NO_BREAK if either token has
+ # this value. This didn't work, for example because in an arrow
+ # list, it prevents the comma from separating from the following
+ # bare word (which is probably quoted by its arrow). So necessary
+ # NO_BREAK's have to be handled as special cases in the final
+ # section.
+ if ( !defined($bsr) ) { $bsr = VERY_STRONG }
+ if ( !defined($bsl) ) { $bsl = VERY_STRONG }
+ my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
my $bond_str_1 = $bond_str;
#---------------------------------------------------------------
- # section 2:
- # special cases
+ # Bond Strength Section 2:
+ # Apply hardwired rules..
#---------------------------------------------------------------
- # allow long lines before final { in an if statement, as in:
- # if (..........
- # ..........)
- # {
+ # Patch to put terminal or clauses on a new line: Weaken the bond
+ # at an || followed by die or similar keyword to make the terminal
+ # or clause fall on a new line, like this:
#
- # Otherwise, the line before the { tends to be too short.
- if ( $type eq ')' ) {
- if ( $next_nonblank_type eq '{' ) {
- $bond_str = VERY_WEAK + 0.03;
- }
- }
-
- elsif ( $type eq '(' ) {
- if ( $next_nonblank_type eq '{' ) {
- $bond_str = NOMINAL;
- }
- }
-
- # break on something like '} (', but keep this stronger than a ','
- # example is in 'howe.pl'
- elsif ( $type eq 'R' or $type eq '}' ) {
- if ( $next_nonblank_type eq '(' ) {
- $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
- }
- }
-
- #-----------------------------------------------------------------
- # adjust bond strength bias
- #-----------------------------------------------------------------
-
- elsif ( $type eq 'f' ) {
- $bond_str += $f_bias;
- $f_bias += $delta_bias;
- }
-
- # in long ?: conditionals, bias toward just one set per line (colon.t)
- elsif ( $type eq ':' ) {
- if ( !$want_break_before{$type} ) {
- $bond_str += $colon_bias;
- $colon_bias += $delta_bias;
- }
- }
-
- if ( $next_nonblank_type eq ':'
- && $want_break_before{$next_nonblank_type} )
- {
- $bond_str += $colon_bias;
- $colon_bias += $delta_bias;
- }
-
- # if leading '.' is used, align all but 'short' quotes;
- # the idea is to not place something like "\n" on a single line.
- elsif ( $next_nonblank_type eq '.' ) {
- if ( $want_break_before{'.'} ) {
- unless (
- $last_nonblank_type eq '.'
- && (
- length($token) <=
- $rOpts_short_concatenation_item_length )
- && ( $token !~ /^[\)\]\}]$/ )
- )
- {
- $dot_bias += $delta_bias;
+ # my $class = shift
+ # || die "Cannot add broadcast: No class identifier found";
+ #
+ # Otherwise the break will be at the previous '=' since the || and
+ # = have the same starting strength and the or is biased, like
+ # this:
+ #
+ # my $class =
+ # shift || die "Cannot add broadcast: No class identifier found";
+ #
+ # In any case if the user places a break at either the = or the ||
+ # it should remain there.
+ if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
+ if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
+ if ( $want_break_before{$token} && $i > 0 ) {
+ $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
}
- $bond_str += $dot_bias;
- }
- }
- elsif ($next_nonblank_type eq '&&'
- && $want_break_before{$next_nonblank_type} )
- {
- $bond_str += $amp_bias;
- $amp_bias += $delta_bias;
- }
- elsif ($next_nonblank_type eq '||'
- && $want_break_before{$next_nonblank_type} )
- {
- $bond_str += $bar_bias;
- $bar_bias += $delta_bias;
- }
- elsif ( $next_nonblank_type eq 'k' ) {
-
- if ( $next_nonblank_token eq 'and'
- && $want_break_before{$next_nonblank_token} )
- {
- $bond_str += $and_bias;
- $and_bias += $delta_bias;
- }
- elsif ($next_nonblank_token =~ /^(or|err)$/
- && $want_break_before{$next_nonblank_token} )
- {
- $bond_str += $or_bias;
- $or_bias += $delta_bias;
- }
-
- # FIXME: needs more testing
- elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
- $bond_str = $list_str if ( $bond_str > $list_str );
- }
- elsif ( $token eq 'err'
- && !$want_break_before{$token} )
- {
- $bond_str += $or_bias;
- $or_bias += $delta_bias;
- }
- }
-
- if ( $type eq ':'
- && !$want_break_before{$type} )
- {
- $bond_str += $colon_bias;
- $colon_bias += $delta_bias;
- }
- elsif ( $type eq '&&'
- && !$want_break_before{$type} )
- {
- $bond_str += $amp_bias;
- $amp_bias += $delta_bias;
- }
- elsif ( $type eq '||'
- && !$want_break_before{$type} )
- {
- $bond_str += $bar_bias;
- $bar_bias += $delta_bias;
- }
- elsif ( $type eq 'k' ) {
-
- if ( $token eq 'and'
- && !$want_break_before{$token} )
- {
- $bond_str += $and_bias;
- $and_bias += $delta_bias;
- }
- elsif ( $token eq 'or'
- && !$want_break_before{$token} )
- {
- $bond_str += $or_bias;
- $or_bias += $delta_bias;
- }
- }
-
- # keep matrix and hash indices together
- # but make them a little below STRONG to allow breaking open
- # something like {'some-word'}{'some-very-long-word'} at the }{
- # (bracebrk.t)
- if ( ( $type eq ']' or $type eq 'R' )
- && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
- )
- {
- $bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
- }
-
- if ( $next_nonblank_token =~ /^->/ ) {
-
- # increase strength to the point where a break in the following
- # will be after the opening paren rather than at the arrow:
- # $a->$b($c);
- if ( $type eq 'i' ) {
- $bond_str = 1.45 * STRONG;
- }
-
- elsif ( $type =~ /^[\)\]\}R]$/ ) {
- $bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
- }
-
- # otherwise make strength before an '->' a little over a '+'
- else {
- if ( $bond_str <= NOMINAL ) {
- $bond_str = NOMINAL + 0.01;
+ else {
+ $bond_str -= $delta_bias;
}
}
}
- if ( $token eq ')' && $next_nonblank_token eq '[' ) {
- $bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
- }
-
- # map1.t -- correct for a quirk in perl
- if ( $token eq '('
- && $next_nonblank_type eq 'i'
- && $last_nonblank_type eq 'k'
- && $is_sort_map_grep{$last_nonblank_token} )
-
- # /^(sort|map|grep)$/ )
- {
- $bond_str = NO_BREAK;
- }
-
- # extrude.t: do not break before paren at:
- # -l pid_filename(
- if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
- $bond_str = NO_BREAK;
- }
-
# good to break after end of code blocks
- if ( $type eq '}' && $block_type ) {
+ if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
$code_bias += $delta_bias;
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
-# Don't break after keyword my. This is a quick fix for a
-# rare problem with perl. An example is this line from file
-# Container.pm:
-# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
+ # Don't break after keyword my. This is a quick fix for a
+ # rare problem with perl. An example is this line from file
+ # Container.pm:
+
+ # foreach my $question( Debian::DebConf::ConfigDb::gettree(
+ # $this->{'question'} ) )
if ( $token eq 'my' ) {
$bond_str = NO_BREAK;
$bond_str = VERY_WEAK;
}
- if ( $next_nonblank_type eq 'k' ) {
+ if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
+
+ # FIXME: needs more testing
+ if ( $is_keyword_returning_list{$next_nonblank_token} ) {
+ $bond_str = $list_str if ( $bond_str > $list_str );
+ }
# keywords like 'unless', 'if', etc, within statements
# make good breaks
if ( $bond_str < STRONG ) { $bond_str = STRONG }
}
- #----------------------------------------------------------------------
- # only set NO_BREAK's from here on
- #----------------------------------------------------------------------
- if ( $type eq 'C' or $type eq 'U' ) {
+ #---------------------------------------------------------------
+ # Additional hardwired NOBREAK rules
+ #---------------------------------------------------------------
- # use strict requires that bare word and => not be separated
- if ( $next_nonblank_type eq '=>' ) {
- $bond_str = NO_BREAK;
- }
+ # map1.t -- correct for a quirk in perl
+ if ( $token eq '('
+ && $next_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'k'
+ && $is_sort_map_grep{$last_nonblank_token} )
+ # /^(sort|map|grep)$/ )
+ {
+ $bond_str = NO_BREAK;
}
- # use strict requires that bare word within braces not start new line
- elsif ( $type eq 'L' ) {
-
- if ( $next_nonblank_type eq 'w' ) {
- $bond_str = NO_BREAK;
- }
+ # extrude.t: do not break before paren at:
+ # -l pid_filename(
+ if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
+ $bond_str = NO_BREAK;
}
# in older version of perl, use strict can cause problems with
# breaks before bare words following opening parens. For example,
# this will fail under older versions if a break is made between
- # '(' and 'MAIL':
- # use strict;
- # open( MAIL, "a long filename or command");
- # close MAIL;
- elsif ( $type eq '{' ) {
+ # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
+ # command"); close MAIL;
+ if ( $type eq '{' ) {
if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
$next_next_type = $types_to_go[$i_next_next_nonblank];
}
- ##if ( $next_next_type ne '=>' ) {
- # these are ok: '->xxx', '=>', '('
-
# We'll check for an old breakpoint and keep a leading
# bareword if it was that way in the input file.
# Presumably it was ok that way. For example, the
# );
#
# This should be sufficient:
- if ( !$old_breakpoint_to_go[$i]
- && ( $next_next_type eq ',' || $next_next_type eq '}' )
+ if (
+ !$old_breakpoint_to_go[$i]
+ && ( $next_next_type eq ','
+ || $next_next_type eq '}' )
)
{
$bond_str = NO_BREAK;
}
}
- elsif ( $type eq 'w' ) {
-
- if ( $next_nonblank_type eq 'R' ) {
- $bond_str = NO_BREAK;
- }
-
- # use strict requires that bare word and => not be separated
- if ( $next_nonblank_type eq '=>' ) {
- $bond_str = NO_BREAK;
- }
- }
-
- # in fact, use strict hates bare words on any new line. For
- # example, a break before the underscore here provokes the
- # wrath of use strict:
- # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
- elsif ( $type eq 'F' ) {
- $bond_str = NO_BREAK;
- }
-
- # use strict does not allow separating type info from trailing { }
- # testfile is readmail.pl
- elsif ( $type eq 't' or $type eq 'i' ) {
-
- if ( $next_nonblank_type eq 'L' ) {
- $bond_str = NO_BREAK;
- }
- }
-
# Do not break between a possible filehandle and a ? or / and do
# not introduce a break after it if there is no blank
# (extrude.t)
elsif ( $type eq 'Z' ) {
- # dont break..
+ # don't break..
if (
# if there is no blank and we do not want one. Examples:
}
}
- # Do not break before a possible file handle
- if ( $next_nonblank_type eq 'Z' ) {
- $bond_str = NO_BREAK;
+ # Breaking before a ? before a quote can cause trouble if
+ # they are not separated by a blank.
+ # Example: a syntax error occurs if you break before the ? here
+ # my$logic=join$all?' && ':' || ',@regexps;
+ # From: Professional_Perl_Programming_Code/multifind.pl
+ if ( $next_nonblank_type eq '?' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
}
- # As a defensive measure, do not break between a '(' and a
- # filehandle. In some cases, this can cause an error. For
- # example, the following program works:
- # my $msg="hi!\n";
- # print
- # ( STDOUT
- # $msg
- # );
- #
- # But this program fails:
- # my $msg="hi!\n";
- # print
- # (
- # STDOUT
- # $msg
- # );
- #
- # This is normally only a problem with the 'extrude' option
- if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
- $bond_str = NO_BREAK;
+ # Breaking before a . followed by a number
+ # can cause trouble if there is no intervening space
+ # Example: a syntax error occurs if you break before the .2 here
+ # $str .= pack($endian.2, ensurrogate($ord));
+ # From: perl58/Unicode.pm
+ elsif ( $next_nonblank_type eq '.' ) {
+ $bond_str = NO_BREAK
+ if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
}
# patch to put cuddled elses back together when on multiple
$bond_str = NO_BREAK;
}
}
+ my $bond_str_2 = $bond_str;
- # keep '}' together with ';'
- if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
- $bond_str = NO_BREAK;
- }
+ #---------------------------------------------------------------
+ # End of hardwired rules
+ #---------------------------------------------------------------
- # never break between sub name and opening paren
- if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
- $bond_str = NO_BREAK;
+ #---------------------------------------------------------------
+ # Bond Strength Section 3:
+ # Apply table rules. These have priority over the above
+ # hardwired rules.
+ #---------------------------------------------------------------
+
+ my $tabulated_bond_str;
+ my $ltype = $type;
+ my $rtype = $next_nonblank_type;
+ if ( $token =~ /^[\(\[\{\)\]\}]/ ) { $ltype = $type . $token }
+ if ( $next_nonblank_token =~ /^[\(\[\{\)\]\}]/ ) {
+ $rtype = $next_nonblank_type . $next_nonblank_token;
+ }
+
+ if ( $binary_bond_strength{$ltype}{$rtype} ) {
+ $bond_str = $binary_bond_strength{$ltype}{$rtype};
+ $tabulated_bond_str = $bond_str;
+ }
+
+ if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
+ $bond_str = NO_BREAK;
+ $tabulated_bond_str = $bond_str;
+ }
+ my $bond_str_3 = $bond_str;
+
+ # If the hardwired rules conflict with the tabulated bond
+ # strength then there is an inconsistency that should be fixed
+ FORMATTER_DEBUG_FLAG_BOND_TABLES
+ && $tabulated_bond_str
+ && $bond_str_1
+ && $bond_str_1 != $bond_str_2
+ && $bond_str_2 != $tabulated_bond_str
+ && do {
+ print STDERR
+"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
+ };
+
+ #-----------------------------------------------------------------
+ # Bond Strength Section 4:
+ # Modify strengths of certain tokens which often occur in sequence
+ # by adding a small bias to each one in turn so that the breaks
+ # occur from left to right.
+ #
+ # Note that we only changing strengths by small amounts here,
+ # and usually increasing, so we should not be altering any NO_BREAKs.
+ # Other routines which check for NO_BREAKs will use a tolerance
+ # of one to avoid any problem.
+ #-----------------------------------------------------------------
+
+ # The bias tables use special keys
+ my $left_key = bias_table_key( $type, $token );
+ my $right_key =
+ bias_table_key( $next_nonblank_type, $next_nonblank_token );
+
+ # add any bias set by sub scan_list at old comma break points.
+ if ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i] }
+
+ # bias left token
+ elsif ( defined( $bias{$left_key} ) ) {
+ if ( !$want_break_before{$left_key} ) {
+ $bias{$left_key} += $delta_bias;
+ $bond_str += $bias{$left_key};
+ }
+ }
+
+ # bias right token
+ if ( defined( $bias{$right_key} ) ) {
+ if ( $want_break_before{$right_key} ) {
+
+ # for leading '.' align all but 'short' quotes; the idea
+ # is to not place something like "\n" on a single line.
+ if ( $right_key eq '.' ) {
+ unless (
+ $last_nonblank_type eq '.'
+ && (
+ length($token) <=
+ $rOpts_short_concatenation_item_length )
+ && ( !$is_closing_token{$token} )
+ )
+ {
+ $bias{$right_key} += $delta_bias;
+ }
+ }
+ else {
+ $bias{$right_key} += $delta_bias;
+ }
+ $bond_str += $bias{$right_key};
+ }
}
+ my $bond_str_4 = $bond_str;
#---------------------------------------------------------------
- # section 3:
- # now take nesting depth into account
+ # Bond Strength Section 5:
+ # Fifth Approximation.
+ # Take nesting depth into account by adding the nesting depth
+ # to the bond strength.
#---------------------------------------------------------------
- # final strength incorporates the bond strength and nesting depth
my $strength;
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
$strength = NO_BREAK;
}
+ #---------------------------------------------------------------
+ # Bond Strength Section 6:
+ # Sixth Approximation. Welds.
+ #---------------------------------------------------------------
+
+ # Do not allow a break within welds,
+ if ( weld_len_right_to_go($i) ) { $strength = NO_BREAK }
+
+ # But encourage breaking after opening welded tokens
+ elsif ( weld_len_left_to_go($i) && $is_opening_token{$token} ) {
+ $strength -= 1;
+ }
+
+## # TESTING: weaken before first weld closing token
+## # This did not help
+## elsif ($i_next_nonblank <= $max_index_to_go
+## && weld_len_right_to_go($i_next_nonblank)
+## && $next_nonblank_token =~ /^[\}\]\)]$/ )
+## {
+## $strength -= 0.9;
+## }
+
# always break after side comment
if ( $type eq '#' ) { $strength = 0 }
FORMATTER_DEBUG_FLAG_BOND && do {
my $str = substr( $token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
- print
-"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
+ print STDOUT
+"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
};
- }
- }
-
+ } ## end main loop
+ return;
+ } ## end sub set_bond_strengths
}
sub pad_array_to_go {
elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
}
+ return;
}
{ # begin scan_list
my (
- $block_type, $current_depth,
- $depth, $i,
- $i_last_nonblank_token, $last_colon_sequence_number,
- $last_nonblank_token, $last_nonblank_type,
- $last_old_breakpoint_count, $minimum_depth,
- $next_nonblank_block_type, $next_nonblank_token,
- $next_nonblank_type, $old_breakpoint_count,
- $starting_breakpoint_count, $starting_depth,
- $token, $type,
- $type_sequence,
+ $block_type, $current_depth,
+ $depth, $i,
+ $i_last_nonblank_token, $last_colon_sequence_number,
+ $last_nonblank_token, $last_nonblank_type,
+ $last_nonblank_block_type, $last_old_breakpoint_count,
+ $minimum_depth, $next_nonblank_block_type,
+ $next_nonblank_token, $next_nonblank_type,
+ $old_breakpoint_count, $starting_breakpoint_count,
+ $starting_depth, $token,
+ $type, $type_sequence,
);
my (
$want_comma_break[$depth] = 0;
}
}
+ return;
}
# routine to decide which commas to break at within a container;
my $dd = shift;
my $bp_count = 0;
my $do_not_break_apart = 0;
- if ( $item_count_stack[$dd] && !$dont_align[$dd] ) {
-
- my $fbc = $forced_breakpoint_count;
-
- # always open comma lists not preceded by keywords,
- # barewords, identifiers (that is, anything that doesn't
- # look like a function call)
- my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
-
- set_comma_breakpoints_do(
- $dd,
- $opening_structure_index_stack[$dd],
- $i,
- $item_count_stack[$dd],
- $identifier_count_stack[$dd],
- $comma_index[$dd],
- $next_nonblank_type,
- $container_type[$dd],
- $interrupted_list[$dd],
- \$do_not_break_apart,
- $must_break_open,
- );
- $bp_count = $forced_breakpoint_count - $fbc;
- $do_not_break_apart = 0 if $must_break_open;
+
+ # anything to do?
+ if ( $item_count_stack[$dd] ) {
+
+ # handle commas not in containers...
+ if ( $dont_align[$dd] ) {
+ do_uncontained_comma_breaks($dd);
+ }
+
+ # handle commas within containers...
+ else {
+ my $fbc = $forced_breakpoint_count;
+
+ # always open comma lists not preceded by keywords,
+ # barewords, identifiers (that is, anything that doesn't
+ # look like a function call)
+ my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
+
+ set_comma_breakpoints_do(
+ $dd,
+ $opening_structure_index_stack[$dd],
+ $i,
+ $item_count_stack[$dd],
+ $identifier_count_stack[$dd],
+ $comma_index[$dd],
+ $next_nonblank_type,
+ $container_type[$dd],
+ $interrupted_list[$dd],
+ \$do_not_break_apart,
+ $must_break_open,
+ );
+ $bp_count = $forced_breakpoint_count - $fbc;
+ $do_not_break_apart = 0 if $must_break_open;
+ }
}
return ( $bp_count, $do_not_break_apart );
}
+ sub do_uncontained_comma_breaks {
+
+ # Handle commas not in containers...
+ # This is a catch-all routine for commas that we
+ # don't know what to do with because the don't fall
+ # within containers. We will bias the bond strength
+ # to break at commas which ended lines in the input
+ # file. This usually works better than just trying
+ # to put as many items on a line as possible. A
+ # downside is that if the input file is garbage it
+ # won't work very well. However, the user can always
+ # prevent following the old breakpoints with the
+ # -iob flag.
+ my $dd = shift;
+ my $bias = -.01;
+ my $old_comma_break_count = 0;
+ foreach my $ii ( @{ $comma_index[$dd] } ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $old_comma_break_count++;
+ $bond_strength_to_go[$ii] = $bias;
+
+ # reduce bias magnitude to force breaks in order
+ $bias *= 0.99;
+ }
+ }
+
+ # Also put a break before the first comma if
+ # (1) there was a break there in the input, and
+ # (2) there was exactly one old break before the first comma break
+ # (3) OLD: there are multiple old comma breaks
+ # (3) NEW: there are one or more old comma breaks (see return example)
+ #
+ # For example, we will follow the user and break after
+ # 'print' in this snippet:
+ # print
+ # "conformability (Not the same dimension)\n",
+ # "\t", $have, " is ", text_unit($hu), "\n",
+ # "\t", $want, " is ", text_unit($wu), "\n",
+ # ;
+ #
+ # Another example, just one comma, where we will break after
+ # the return:
+ # return
+ # $x * cos($a) - $y * sin($a),
+ # $x * sin($a) + $y * cos($a);
+
+ # Breaking a print statement:
+ # print SAVEOUT
+ # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
+ # ( $? & 128 ) ? " -- core dumped" : "", "\n";
+ #
+ # But we will not force a break after the opening paren here
+ # (causes a blinker):
+ # $heap->{stream}->set_output_filter(
+ # poe::filter::reference->new('myotherfreezer') ),
+ # ;
+ #
+ my $i_first_comma = $comma_index[$dd]->[0];
+ if ( $old_breakpoint_to_go[$i_first_comma] ) {
+ my $level_comma = $levels_to_go[$i_first_comma];
+ my $ibreak = -1;
+ my $obp_count = 0;
+ for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ if ( $old_breakpoint_to_go[$ii] ) {
+ $obp_count++;
+ last if ( $obp_count > 1 );
+ $ibreak = $ii
+ if ( $levels_to_go[$ii] == $level_comma );
+ }
+ }
+
+ # Changed rule from multiple old commas to just one here:
+ if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
+ {
+ # Do not to break before an opening token because
+ # it can lead to "blinkers".
+ my $ibreakm = $ibreak;
+ $ibreakm-- if ( $types_to_go[$ibreakm] eq 'b' );
+ if ( $ibreakm >= 0 && $types_to_go[$ibreakm] !~ /^[\(\{\[L]$/ )
+ {
+ set_forced_breakpoint($ibreak);
+ }
+ }
+ }
+ return;
+ }
+
my %is_logical_container;
BEGIN {
- @_ = qw# if elsif unless while and or err not && | || ? : ! #;
- @is_logical_container{@_} = (1) x scalar(@_);
+ my @q = qw# if elsif unless while and or err not && | || ? : ! #;
+ @is_logical_container{@q} = (1) x scalar(@q);
}
sub set_for_semicolon_breakpoints {
foreach ( @{ $rfor_semicolon_list[$dd] } ) {
set_forced_breakpoint($_);
}
+ return;
}
sub set_logical_breakpoints {
$item_count_stack[$dd] == 0
&& $is_logical_container{ $container_type[$dd] }
- # TESTING:
|| $has_old_logical_breakpoints[$dd]
)
{
}
}
}
+ return;
}
sub is_unbreakable_container {
# never break a container of one of these types
# because bad things can happen (map1.t)
my $dd = shift;
- $is_sort_map_grep{ $container_type[$dd] };
+ return $is_sort_map_grep{ $container_type[$dd] };
}
sub scan_list {
$type = ';';
$type_sequence = '';
+ my $total_depth_variation = 0;
+ my $i_old_assignment_break;
+ my $depth_last = $starting_depth;
+
check_for_new_minimum_depth($current_depth);
my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
$last_nonblank_type = $type;
$last_nonblank_token = $token;
$last_nonblank_block_type = $block_type;
- }
+ } ## end if ( $type ne 'b' )
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
# as '}') which forms a one-line block, this break might
# get undone.
$want_previous_breakpoint = $i;
+ } ## end if ( $next_nonblank_type...)
+ } ## end if ($rOpts_break_at_old_keyword_breakpoints)
+
+ # Break before attributes if user broke there
+ if ($rOpts_break_at_old_attribute_breakpoints) {
+ if ( $next_nonblank_type eq 'A' ) {
+ $want_previous_breakpoint = $i;
}
}
- }
+
+ # remember an = break as possible good break point
+ if ( $is_assignment{$type} ) {
+ $i_old_assignment_break = $i;
+ }
+ elsif ( $is_assignment{$next_nonblank_type} ) {
+ $i_old_assignment_break = $i_next_nonblank;
+ }
+ } ## end if ( $old_breakpoint_to_go...)
+
next if ( $type eq 'b' );
$depth = $nesting_depth_to_go[ $i + 1 ];
+ $total_depth_variation += abs( $depth - $depth_last );
+ $depth_last = $depth;
+
# safety check - be sure we always break after a comment
# Shouldn't happen .. an error here probably means that the
# nobreak flag did not get turned off correctly during
report_definite_bug();
$nobreak_to_go[$i] = 0;
set_forced_breakpoint($i);
- }
- }
+ } ## end if ( $i != $max_index_to_go)
+ } ## end if ( $type eq '#' )
# Force breakpoints at certain tokens in long lines.
# Note that such breakpoints will be undone later if these tokens
)
{
set_forced_breakpoint( $i - 1 );
- }
+ } ## end if ( $type eq 'k' && $i...)
# remember locations of '||' and '&&' for possible breaks if we
# decide this is a long logical expression.
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
- }
+ } ## end if ( $type eq '||' )
elsif ( $type eq '&&' ) {
push @{ $rand_or_list[$depth][3] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
- }
+ } ## end elsif ( $type eq '&&' )
elsif ( $type eq 'f' ) {
push @{ $rfor_semicolon_list[$depth] }, $i;
}
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
- }
+ } ## end if ( $token eq 'and' )
# break immediately at 'or's which are probably not in a logical
# block -- but we will break in logical breaks below so that
{
$saw_good_breakpoint = 1;
}
- }
- }
+ } ## end else [ if ( $is_logical_container...)]
+ } ## end elsif ( $token eq 'or' )
elsif ( $token eq 'if' || $token eq 'unless' ) {
push @{ $rand_or_list[$depth][4] }, $i;
if ( ( $i == $i_line_start || $i == $i_line_end )
{
set_forced_breakpoint($i);
}
- }
- }
+ } ## end elsif ( $token eq 'if' ||...)
+ } ## end elsif ( $type eq 'k' )
elsif ( $is_assignment{$type} ) {
$i_equals[$depth] = $i;
}
if ( $type eq ':' ) {
$last_colon_sequence_number = $type_sequence;
- # TESTING: retain break at a ':' line break
+ # retain break at a ':' line break
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_ternary_breakpoints )
{
- # TESTING:
set_forced_breakpoint($i);
# break at previous '='
set_forced_breakpoint( $i_equals[$depth] );
$i_equals[$depth] = -1;
}
- }
- }
+ } ## end if ( ( $i == $i_line_start...))
+ } ## end if ( $type eq ':' )
if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
set_forced_breakpoint( $i - $inc );
delete $postponed_breakpoint{$type_sequence};
}
- }
+ } ## end if ( $token =~ /^[\)\]\}\:]$/[{[(])
# set breaks at ?/: if they will get separated (and are
# not a ?/: chain), or if the '?' is at the end of the
|| $tokens_to_go[$max_index_to_go] eq '#'
);
set_closing_breakpoint($i);
- }
- }
- }
+ } ## end if ( $i_colon <= 0 ||...)
+ } ## end elsif ( $token eq '?' )
+ } ## end if ($type_sequence)
#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
# and user wants brace to left
&& !$rOpts->{'opening-brace-always-on-right'}
- && ( $type eq '{' ) # should be true
+ && ( $type eq '{' ) # should be true
&& ( $token eq '{' ) # should be true
)
{
set_forced_breakpoint( $i - 1 );
- }
- }
+ } ## end if ( $block_type && ( ...))
+ } ## end if ( $depth > $current_depth)
#------------------------------------------------------------
# Handle Decreasing Depth..
&& !$rOpts->{'opening-brace-always-on-right'} )
{
set_forced_breakpoint($i);
- }
+ } ## end if ( $token eq ')' && ...
#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
# this term is long if we had to break at interior commas..
my $is_long_term = $bp_count > 0;
- # ..or if the length between opening and closing parens exceeds
- # allowed line length
+ # If this is a short container with one or more comma arrows,
+ # then we will mark it as a long term to open it if requested.
+ # $rOpts_comma_arrow_breakpoints =
+ # 0 - open only if comma precedes closing brace
+ # 1 - stable: except for one line blocks
+ # 2 - try to form 1 line blocks
+ # 3 - ignore =>
+ # 4 - always open up if vt=0
+ # 5 - stable: even for one line blocks if vt=0
+ if ( !$is_long_term
+ && $tokens_to_go[$i_opening] =~ /^[\(\{\[]$/
+ && $index_before_arrow[ $depth + 1 ] > 0
+ && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] }
+ )
+ {
+ $is_long_term = $rOpts_comma_arrow_breakpoints == 4
+ || ( $rOpts_comma_arrow_breakpoints == 0
+ && $last_nonblank_token eq ',' )
+ || ( $rOpts_comma_arrow_breakpoints == 5
+ && $old_breakpoint_to_go[$i_opening] );
+ } ## end if ( !$is_long_term &&...)
+
+ # mark term as long if the length between opening and closing
+ # parens exceeds allowed line length
if ( !$is_long_term && $saw_opening_structure ) {
my $i_opening_minus = find_token_starting_list($i_opening);
# Note: we have to allow for one extra space after a
# closing token so that we do not strand a comma or
# semicolon, hence the '>=' here (oneline.t)
+ # Note: we ignore left weld lengths here for best results
$is_long_term =
- excess_line_length( $i_opening_minus, $i ) >= 0;
- }
+ excess_line_length( $i_opening_minus, $i, 1 ) >= 0;
+ } ## end if ( !$is_long_term &&...)
# We've set breaks after all comma-arrows. Now we have to
# undo them if this can be a one-line block
# user doesn't require breaking after all comma-arrows
( $rOpts_comma_arrow_breakpoints != 0 )
+ && ( $rOpts_comma_arrow_breakpoints != 4 )
# and if the opening structure is in this batch
&& $saw_opening_structure
{
undo_forced_breakpoint_stack(
$breakpoint_undo_stack[$current_depth] );
- }
+ } ## end if ( ( $rOpts_comma_arrow_breakpoints...))
# now see if we have any comma breakpoints left
my $has_comma_breakpoints =
$forced_breakpoint_count );
# update broken-sublist flag of the outer container
- $has_broken_sublist[$depth] = $has_broken_sublist[$depth]
+ $has_broken_sublist[$depth] =
+ $has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
else {
set_logical_breakpoints($current_depth);
}
- }
+ } ## end if ( $item_count_stack...)
if ( $is_long_term
&& @{ $rfor_semicolon_list[$current_depth] } )
# leading term alignment unless -lp is used.
$has_comma_breakpoints = 1
unless $rOpts_line_up_parentheses;
- }
+ } ## end if ( $is_long_term && ...)
if (
$item = $leading_spaces_to_go[ $i_opening + 2 ];
}
if ( defined($item) ) {
- my $i_start_2 = $item->get_STARTING_INDEX();
+ my $i_start_2 = $item->get_starting_index();
if (
defined($i_start_2)
if ( $test2 == $test1 ) {
set_forced_breakpoint( $i_start_2 - 1 );
}
- }
- }
- }
+ } ## end if ( defined($i_start_2...))
+ } ## end if ( defined($item) )
+ } ## end if ( $rOpts_line_up_parentheses...)
# break after opening structure.
# note: break before closing structure will be automatic
unless ( $do_not_break_apart
|| is_unbreakable_container($current_depth) );
+ # break at ',' of lower depth level before opening token
+ if ( $last_comma_index[$depth] ) {
+ set_forced_breakpoint( $last_comma_index[$depth] );
+ }
+
# break at '.' of lower depth level before opening token
if ( $last_dot_index[$depth] ) {
set_forced_breakpoint( $last_dot_index[$depth] );
}
- # break before opening structure if preeced by another
+ # break before opening structure if preceded by another
# closing structure and a comma. This is normally
# done by the previous closing brace, but not
# if it was a one-line block.
if ( $want_break_before{$token_prev} ) {
set_forced_breakpoint($i_prev);
}
- }
- }
- }
+ } ## end elsif ( $types_to_go[$i_prev...])
+ } ## end if ( $i_opening > 2 )
+ } ## end if ( $minimum_depth <=...)
# break after comma following closing structure
if ( $next_type eq ',' ) {
)
{
set_forced_breakpoint($i);
- }
+ } ## end if ( $is_assignment{$next_nonblank_type...})
# break at any comma before the opening structure Added
# for -lp, but seems to be good in general. It isn't
# must set fake breakpoint to alert outer containers that
# they are complex
set_fake_breakpoint();
- }
- }
+ } ## end elsif ($is_long_term)
+
+ } ## end elsif ( $depth < $current_depth)
#------------------------------------------------------------
# Handle this token
$want_comma_break[$depth] = 1;
$index_before_arrow[$depth] = $i_last_nonblank_token;
next;
- }
+ } ## end if ( $type eq '=>' )
elsif ( $type eq '.' ) {
$last_dot_index[$depth] = $i;
$dont_align[$depth] = 1;
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
- }
+ } ## end elsif ( ( $type =~ /^[\;\<\>\~]$/...))
# now just handle any commas
next unless ( $type eq ',' );
if ( $want_comma_break[$depth] ) {
if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
- $want_comma_break[$depth] = 0;
- $index_before_arrow[$depth] = -1;
- next;
+ if ($rOpts_comma_arrow_breakpoints) {
+ $want_comma_break[$depth] = 0;
+ ##$index_before_arrow[$depth] = -1;
+ next;
+ }
}
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
- if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
+ if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
# don't break pointer calls, such as the following:
# File::Spec->curdir => 1,
# (This is tokenized as adjacent 'w' tokens)
- if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+ ##if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
+
+ # And don't break before a comma, as in the following:
+ # ( LONGER_THAN,=> 1,
+ # EIGHTY_CHARACTERS,=> 2,
+ # CAUSES_FORMATTING,=> 3,
+ # LIKE_THIS,=> 4,
+ # );
+ # This example is for -tso but should be general rule
+ if ( $tokens_to_go[ $ibreak + 1 ] ne '->'
+ && $tokens_to_go[ $ibreak + 1 ] ne ',' )
+ {
set_forced_breakpoint($ibreak);
}
- }
- }
+ } ## end if ( $types_to_go[$ibreak...])
+ } ## end if ( $ibreak > 0 && $tokens_to_go...)
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# treat any list items so far as an interrupted list
$interrupted_list[$depth] = 1;
next;
- }
-
- # skip past these commas if we are not supposed to format them
- next if ( $dont_align[$depth] );
+ } ## end if ( $want_comma_break...)
# break after all commas above starting depth
- if ( $depth < $starting_depth ) {
+ if ( $depth < $starting_depth && !$dont_align[$depth] ) {
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
next;
}
&& $container_environment_to_go[$i] eq 'BLOCK' )
{
$dont_align[$depth] = 1;
- next;
}
- }
+ } ## end if ( $item_count == 0 )
$comma_index[$depth][$item_count] = $i;
++$item_count_stack[$depth];
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
$identifier_count_stack[$depth]++;
}
- }
+ } ## end while ( ++$i <= $max_index_to_go)
#-------------------------------------------
# end of loop over all tokens in this batch
&& $i_opening >= $max_index_to_go - 2
&& $token =~ /^['"]$/ )
);
- }
+ } ## end for ( my $dd = $current_depth...)
# Return a flag indicating if the input file had some good breakpoints.
# This flag will be used to force a break in a line shorter than the
if ( $has_old_logical_breakpoints[$current_depth] ) {
$saw_good_breakpoint = 1;
}
+
+ # A complex line with one break at an = has a good breakpoint.
+ # This is not complex ($total_depth_variation=0):
+ # $res1
+ # = 10;
+ #
+ # This is complex ($total_depth_variation=6):
+ # $res2 =
+ # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
+ elsif ($i_old_assignment_break
+ && $total_depth_variation > 4
+ && $old_breakpoint_count == 1 )
+ {
+ $saw_good_breakpoint = 1;
+ } ## end elsif ( $i_old_assignment_break...)
+
return $saw_good_breakpoint;
- }
+ } ## end sub scan_list
} # end scan_list
sub find_token_starting_list {
# These keywords have prototypes which allow a special leading item
# followed by a list
- @_ =
+ my @q =
qw(formline grep kill map printf sprintf push chmod join pack unshift);
- @is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
+ @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
}
sub set_comma_breakpoints_do {
# nothing to do if no commas seen
return if ( $item_count < 1 );
- my $i_first_comma = $$rcomma_index[0];
- my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
+ my $i_first_comma = $rcomma_index->[0];
+ my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
my $i_last_comma = $i_true_last_comma;
if ( $i_last_comma >= $max_index_to_go ) {
- $i_last_comma = $$rcomma_index[ --$item_count - 1 ];
+ $i_last_comma = $rcomma_index->[ --$item_count - 1 ];
return if ( $item_count < 1 );
}
my $i = $i_opening_paren;
my $is_odd = 1;
- for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
+ foreach my $j ( 0 .. $comma_count - 1 ) {
$is_odd = 1 - $is_odd;
$i_prev_plus = $i + 1;
- $i = $$rcomma_index[$j];
+ $i = $rcomma_index->[$j];
my $i_term_end =
( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
}
#my ( $a, $b, $c ) = caller();
-#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
+#print "LISTX: in set_list $a $c interrupt=$interrupted count=$item_count
#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
#---------------------------------------------------------------
# Interrupted List Rule:
- # A list is is forced to use old breakpoints if it was interrupted
+ # A list is forced to use old breakpoints if it was interrupted
# by side comments or blank lines, or requested by user.
#---------------------------------------------------------------
if ( $rOpts_break_at_old_comma_breakpoints
# exceeds the available space after the '('.
my $need_lp_break_open = $must_break_open;
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
- my $columns_if_unbroken = $rOpts_maximum_line_length -
+ my $columns_if_unbroken =
+ maximum_line_length($i_opening_minus) -
total_line_length( $i_opening_minus, $i_opening_paren );
- $need_lp_break_open = ( $max_length[0] > $columns_if_unbroken )
+ $need_lp_break_open =
+ ( $max_length[0] > $columns_if_unbroken )
|| ( $max_length[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
# should the container be broken open?
if ( $item_count < 3 ) {
if ( $i_first_comma - $i_opening_paren < 4 ) {
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
}
}
elsif ($first_term_length < 20
{
my $columns = table_columns_available($i_first_comma);
if ( $first_term_length < $columns ) {
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
}
}
}
$use_separate_first_term = 1;
set_forced_breakpoint($i_first_comma);
$i_opening_paren = $i_first_comma;
- $i_first_comma = $$rcomma_index[1];
+ $i_first_comma = $rcomma_index->[1];
$item_count--;
return if $comma_count == 1;
shift @item_lengths;
)
)
{
- my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
+ my $available_spaces = get_available_spaces_to_go($i_first_comma);
if ( $available_spaces > 0 ) {
my $spaces_wanted = $max_width - $columns; # for 1 field
# )
# if $style eq 'all';
- my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
+ my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
my $long_first_term =
excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
# or if this is a sublist of a larger list
|| $in_hierarchical_list
- # or if multiple commas and we dont have a long first or last
+ # or if multiple commas and we don't have a long first or last
# term
|| ( $comma_count > 1
&& !( $long_last_term || $long_first_term ) )
)
{
foreach ( 0 .. $comma_count - 1 ) {
- set_forced_breakpoint( $$rcomma_index[$_] );
+ set_forced_breakpoint( $rcomma_index->[$_] );
}
}
elsif ($long_last_term) {
set_forced_breakpoint($i_last_comma);
- $$rdo_not_break_apart = 1 unless $must_break_open;
+ ${$rdo_not_break_apart} = 1 unless $must_break_open;
}
elsif ($long_first_term) {
# as a table for relatively small parenthesized lists. These
# are usually easier to read if not formatted as tables.
if (
- $packed_lines <= 2 # probably can fit in 2 lines
- && $item_count < 9 # doesn't have too many items
+ $packed_lines <= 2 # probably can fit in 2 lines
+ && $item_count < 9 # doesn't have too many items
&& $opening_environment eq 'BLOCK' # not a sub-container
- && $opening_token eq '(' # is paren list
+ && $opening_token eq '(' # is paren list
)
{
# Shortcut method 1: for -lp and just one comma:
# This is a no-brainer, just break at the comma.
if (
- $rOpts_line_up_parentheses # -lp
- && $item_count == 2 # two items, one comma
+ $rOpts_line_up_parentheses # -lp
+ && $item_count == 2 # two items, one comma
&& !$must_break_open
)
{
- my $i_break = $$rcomma_index[0];
+ my $i_break = $rcomma_index->[0];
set_forced_breakpoint($i_break);
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
set_non_alignment_flags( $comma_count, $rcomma_index );
return;
unless ($must_break_open) {
if ( $break_count <= 1 ) {
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
}
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
{
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
}
}
set_non_alignment_flags( $comma_count, $rcomma_index );
# debug stuff
FORMATTER_DEBUG_FLAG_SPARSE && do {
- print
+ print STDOUT
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
};
unless ($must_break_open_container) {
if ( $break_count <= 1 ) {
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
}
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
{
- $$rdo_not_break_apart = 1;
+ ${$rdo_not_break_apart} = 1;
}
}
set_non_alignment_flags( $comma_count, $rcomma_index );
$j += $number_of_fields
)
{
- my $i = $$rcomma_index[$j];
+ my $i = $rcomma_index->[$j];
set_forced_breakpoint($i);
}
return;
# aligned
my ( $comma_count, $rcomma_index ) = @_;
foreach ( 0 .. $comma_count - 1 ) {
- $matching_token_to_go[ $$rcomma_index[$_] ] = 1;
+ $matching_token_to_go[ $rcomma_index->[$_] ] = 1;
}
+ return;
}
sub study_list_complexity {
my $total_variation_1 = 0;
my $total_variation_2 = 0;
my @total_variation_2 = ( 0, 0 );
- for ( my $j = 0 ; $j < $item_count ; $j++ ) {
+ foreach my $j ( 0 .. $item_count - 1 ) {
$is_odd = 1 - $is_odd;
my $length = $ritem_lengths->[$j];
sub table_columns_available {
my $i_first_comma = shift;
my $columns =
- $rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
+ maximum_line_length($i_first_comma) -
+ leading_spaces_to_go($i_first_comma);
# Patch: the vertical formatter does not line up lines whose lengths
# exactly equal the available line length because of allowances
my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
my $break_count = 0;
- foreach (@$ri_ragged_break_list) {
+ foreach ( @{$ri_ragged_break_list} ) {
my $j = $ri_term_comma->[$_];
if ($j) {
set_forced_breakpoint($j);
set_forced_breakpoint($i);
}
}
+ return;
}
sub set_nobreaks {
FORMATTER_DEBUG_FLAG_NOBREAK && do {
my ( $a, $b, $c ) = caller();
- print(
-"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
- );
+ print STDOUT
+"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
};
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
else {
FORMATTER_DEBUG_FLAG_NOBREAK && do {
my ( $a, $b, $c ) = caller();
- print(
-"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
- );
+ print STDOUT
+ "NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n";
};
}
+ return;
}
sub set_fake_breakpoint {
# This is useful if we have breaks but may want to postpone deciding where
# to make them.
$forced_breakpoint_count++;
+ return;
}
sub set_forced_breakpoint {
return unless defined $i && $i >= 0;
+ # no breaks between welded tokens
+ return if ( weld_len_right_to_go($i) );
+
# when called with certain tokens, use bond strengths to decide
# if we break before or after it
my $token = $tokens_to_go[$i];
FORMATTER_DEBUG_FLAG_FORCE && do {
my ( $a, $b, $c ) = caller();
- print
-"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
+ print STDOUT
+"FORCE $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
};
if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
}
}
}
+ return;
}
sub clear_breakpoint_undo_stack {
$forced_breakpoint_undo_count = 0;
+ return;
}
sub undo_forced_breakpoint_stack {
FORMATTER_DEBUG_FLAG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
- print(
-"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
- );
+ print STDOUT
+"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n";
};
}
else {
FORMATTER_DEBUG_FLAG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
- print(
-"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
- );
+ print STDOUT
+"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go";
};
}
}
+ return;
}
-sub recombine_breakpoints {
+{ # begin recombine_breakpoints
- # sub set_continuation_breaks is very liberal in setting line breaks
- # for long lines, always setting breaks at good breakpoints, even
- # when that creates small lines. Occasionally small line fragments
- # are produced which would look better if they were combined.
- # That's the task of this routine, recombine_breakpoints.
- my ( $ri_first, $ri_last ) = @_;
- my $more_to_do = 1;
-
- # We keep looping over all of the lines of this batch
- # until there are no more possible recombinations
- my $nmax_last = @$ri_last;
- while ($more_to_do) {
- my $n_best = 0;
- my $bs_best;
- my $n;
- my $nmax = @$ri_last - 1;
-
- # safety check for infinite loop
- unless ( $nmax < $nmax_last ) {
-
- # shouldn't happen because splice below decreases nmax on each pass:
- # but i get paranoid sometimes
- die "Program bug-infinite loop in recombine breakpoints\n";
- }
- $nmax_last = $nmax;
- $more_to_do = 0;
- my $previous_outdentable_closing_paren;
- my $leading_amp_count = 0;
- my $this_line_is_semicolon_terminated;
-
- # loop over all remaining lines in this batch
- for $n ( 1 .. $nmax ) {
-
- #----------------------------------------------------------
- # If we join the current pair of lines,
- # line $n-1 will become the left part of the joined line
- # line $n will become the right part of the joined line
- #
- # Here are Indexes of the endpoint tokens of the two lines:
- #
- # ---left---- | ---right---
- # $if $imid | $imidr $il
- #
- # We want to decide if we should join tokens $imid to $imidr
- #
- # We will apply a number of ad-hoc tests to see if joining
- # here will look ok. The code will just issue a 'next'
- # command if the join doesn't look good. If we get through
- # the gauntlet of tests, the lines will be recombined.
- #----------------------------------------------------------
- my $if = $$ri_first[ $n - 1 ];
- my $il = $$ri_last[$n];
- my $imid = $$ri_last[ $n - 1 ];
- my $imidr = $$ri_first[$n];
-
- #my $depth_increase=( $nesting_depth_to_go[$imidr] -
- # $nesting_depth_to_go[$if] );
-
-##print "RECOMBINE: n=$n imid=$imid if=$if type=$types_to_go[$if] =$tokens_to_go[$if] next_type=$types_to_go[$imidr] next_tok=$tokens_to_go[$imidr]\n";
-
- # If line $n is the last line, we set some flags and
- # do any special checks for it
- if ( $n == $nmax ) {
-
- # a terminal '{' should stay where it is
- next if $types_to_go[$imidr] eq '{';
-
- # set flag if statement $n ends in ';'
- $this_line_is_semicolon_terminated = $types_to_go[$il] eq ';'
-
- # with possible side comment
- || ( $types_to_go[$il] eq '#'
- && $il - $imidr >= 2
- && $types_to_go[ $il - 2 ] eq ';'
- && $types_to_go[ $il - 1 ] eq 'b' );
- }
-
- #----------------------------------------------------------
- # Section 1: examine token at $imid (right end of first line
- # of pair)
- #----------------------------------------------------------
-
- # an isolated '}' may join with a ';' terminated segment
- if ( $types_to_go[$imid] eq '}' ) {
-
- # Check for cases where combining a semicolon terminated
- # statement with a previous isolated closing paren will
- # allow the combined line to be outdented. This is
- # generally a good move. For example, we can join up
- # the last two lines here:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # )
- # = stat($file);
- #
- # to get:
- # (
- # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
- # $size, $atime, $mtime, $ctime, $blksize, $blocks
- # ) = stat($file);
- #
- # which makes the parens line up.
- #
- # Another example, from Joe Matarazzo, probably looks best
- # with the 'or' clause appended to the trailing paren:
- # $self->some_method(
- # PARAM1 => 'foo',
- # PARAM2 => 'bar'
- # ) or die "Some_method didn't work";
- #
- $previous_outdentable_closing_paren =
- $this_line_is_semicolon_terminated # ends in ';'
- && $if == $imid # only one token on last line
- && $tokens_to_go[$imid] eq ')' # must be structural paren
-
- # only &&, ||, and : if no others seen
- # (but note: our count made below could be wrong
- # due to intervening comments)
- && ( $leading_amp_count == 0
- || $types_to_go[$imidr] !~ /^(:|\&\&|\|\|)$/ )
-
- # but leading colons probably line up with with a
- # previous colon or question (count could be wrong).
- && $types_to_go[$imidr] ne ':'
-
- # only one step in depth allowed. this line must not
- # begin with a ')' itself.
- && ( $nesting_depth_to_go[$imid] ==
- $nesting_depth_to_go[$il] + 1 );
+ my %is_amp_amp;
+ my %is_ternary;
+ my %is_math_op;
+ my %is_plus_minus;
+ my %is_mult_div;
- next
- unless (
- $previous_outdentable_closing_paren
+ BEGIN {
- # handle '.' and '?' specially below
- || ( $types_to_go[$imidr] =~ /^[\.\?]$/ )
- );
- }
+ my @q;
+ @q = qw( && || );
+ @is_amp_amp{@q} = (1) x scalar(@q);
- # do not recombine lines with ending &&, ||, or :
- elsif ( $types_to_go[$imid] =~ /^(|:|\&\&|\|\|)$/ ) {
- next unless $want_break_before{ $types_to_go[$imid] };
- }
+ @q = qw( ? : );
+ @is_ternary{@q} = (1) x scalar(@q);
- # for lines ending in a comma...
- elsif ( $types_to_go[$imid] eq ',' ) {
+ @q = qw( + - * / );
+ @is_math_op{@q} = (1) x scalar(@q);
- # an isolated '},' may join with an identifier + ';'
- # this is useful for the class of a 'bless' statement (bless.t)
- if ( $types_to_go[$if] eq '}'
- && $types_to_go[$imidr] eq 'i' )
- {
- next
- unless ( ( $if == ( $imid - 1 ) )
- && ( $il == ( $imidr + 1 ) )
- && $this_line_is_semicolon_terminated );
+ @q = qw( + - );
+ @is_plus_minus{@q} = (1) x scalar(@q);
- # override breakpoint
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ @q = qw( * / );
+ @is_mult_div{@q} = (1) x scalar(@q);
+ }
- # but otherwise, do not recombine unless this will leave
- # just 1 more line
- else {
- next unless ( $n + 1 >= $nmax );
- }
+ sub DUMP_BREAKPOINTS {
+
+ # Debug routine to dump current breakpoints...not normally called
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end, $msg ) = @_;
+ print STDERR "----Dumping breakpoints from: $msg----\n";
+ for my $n ( 0 .. @{$ri_end} - 1 ) {
+ my $ibeg = $ri_beg->[$n];
+ my $iend = $ri_end->[$n];
+ my $text = "";
+ foreach my $i ( $ibeg .. $iend ) {
+ $text .= $tokens_to_go[$i];
}
+ print STDERR "$n ($ibeg:$iend) $text\n";
+ }
+ print STDERR "----\n";
+ return;
+ }
- # opening paren..
- elsif ( $types_to_go[$imid] eq '(' ) {
+ sub unmask_phantom_semicolons {
- # No longer doing this
- }
+ my ( $self, $ri_beg, $ri_end ) = @_;
+
+ # Walk down the lines of this batch and unmask any invisible line-ending
+ # semicolons. They were placed by sub respace_tokens but we only now
+ # know if we actually need them.
+
+ my $nmax = @{$ri_end} - 1;
+ foreach my $n ( 0 .. $nmax ) {
- elsif ( $types_to_go[$imid] eq ')' ) {
+ my $i = $ri_end->[$n];
+ if ( $types_to_go[$i] eq ';' && $tokens_to_go[$i] eq '' ) {
- # No longer doing this
+ $tokens_to_go[$i] = $rtoken_vars_to_go[$i]->[_TOKEN_] =
+ $want_left_space{';'} == WS_NO ? ';' : ' ;';
+ my $line_number = $rtoken_vars_to_go[$i]->[_LINE_INDEX_] + 1;
+ note_added_semicolon($line_number);
}
+ }
+ return;
+ }
- # keep a terminal colon
- elsif ( $types_to_go[$imid] eq ':' ) {
- next;
+ sub recombine_breakpoints {
+
+ # sub set_continuation_breaks is very liberal in setting line breaks
+ # for long lines, always setting breaks at good breakpoints, even
+ # when that creates small lines. Sometimes small line fragments
+ # are produced which would look better if they were combined.
+ # That's the task of this routine.
+ #
+ # We are given indexes to the current lines:
+ # $ri_beg = ref to array of BEGinning indexes of each line
+ # $ri_end = ref to array of ENDing indexes of each line
+ my ( $ri_beg, $ri_end ) = @_;
+
+ # Make a list of all good joining tokens between the lines
+ # n-1 and n.
+ my @joint;
+ my $nmax = @{$ri_end} - 1;
+ for my $n ( 1 .. $nmax ) {
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+
+ my ( $itok, $itokp, $itokm );
+
+ foreach my $itest ( $iend_1, $ibeg_2 ) {
+ my $type = $types_to_go[$itest];
+ if ( $is_math_op{$type}
+ || $is_amp_amp{$type}
+ || $is_assignment{$type}
+ || $type eq ':' )
+ {
+ $itok = $itest;
+ }
}
+ $joint[$n] = [$itok];
+ }
- # keep a terminal for-semicolon
- elsif ( $types_to_go[$imid] eq 'f' ) {
- next;
+ my $more_to_do = 1;
+
+ # We keep looping over all of the lines of this batch
+ # until there are no more possible recombinations
+ my $nmax_last = @{$ri_end};
+ my $reverse = 0;
+ while ($more_to_do) {
+ my $n_best = 0;
+ my $bs_best;
+ my $nmax = @{$ri_end} - 1;
+
+ # Safety check for infinite loop
+ unless ( $nmax < $nmax_last ) {
+
+ # Shouldn't happen because splice below decreases nmax on each
+ # pass.
+ Fault("Program bug-infinite loop in recombine breakpoints\n");
}
+ $nmax_last = $nmax;
+ $more_to_do = 0;
+ my $skip_Section_3;
+ my $leading_amp_count = 0;
+ my $this_line_is_semicolon_terminated;
+
+ # loop over all remaining lines in this batch
+ for my $iter ( 1 .. $nmax ) {
+
+ # alternating sweep direction gives symmetric results
+ # for recombining lines which exceed the line length
+ # such as eval {{{{.... }}}}
+ my $n;
+ if ($reverse) { $n = 1 + $nmax - $iter; }
+ else { $n = $iter }
+
+ #----------------------------------------------------------
+ # If we join the current pair of lines,
+ # line $n-1 will become the left part of the joined line
+ # line $n will become the right part of the joined line
+ #
+ # Here are Indexes of the endpoint tokens of the two lines:
+ #
+ # -----line $n-1--- | -----line $n-----
+ # $ibeg_1 $iend_1 | $ibeg_2 $iend_2
+ # ^
+ # |
+ # We want to decide if we should remove the line break
+ # between the tokens at $iend_1 and $ibeg_2
+ #
+ # We will apply a number of ad-hoc tests to see if joining
+ # here will look ok. The code will just issue a 'next'
+ # command if the join doesn't look good. If we get through
+ # the gauntlet of tests, the lines will be recombined.
+ #----------------------------------------------------------
+ #
+ # beginning and ending tokens of the lines we are working on
+ my $ibeg_1 = $ri_beg->[ $n - 1 ];
+ my $iend_1 = $ri_end->[ $n - 1 ];
+ my $iend_2 = $ri_end->[$n];
+ my $ibeg_2 = $ri_beg->[$n];
+ my $ibeg_nmax = $ri_beg->[$nmax];
+
+ # combined line cannot be too long
+ my $excess = excess_line_length( $ibeg_1, $iend_2, 1, 1 );
+ next if ( $excess > 0 );
+
+ my $type_iend_1 = $types_to_go[$iend_1];
+ my $type_iend_2 = $types_to_go[$iend_2];
+ my $type_ibeg_1 = $types_to_go[$ibeg_1];
+ my $type_ibeg_2 = $types_to_go[$ibeg_2];
+
+ # terminal token of line 2 if any side comment is ignored:
+ my $iend_2t = $iend_2;
+ my $type_iend_2t = $type_iend_2;
+
+ # some beginning indexes of other lines, which may not exist
+ my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
+ my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
+ my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
+
+ my $bs_tweak = 0;
+
+ #my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
+ # $nesting_depth_to_go[$ibeg_1] );
+
+ FORMATTER_DEBUG_FLAG_RECOMBINE && do {
+ print STDERR
+"RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
+ };
+
+ # If line $n is the last line, we set some flags and
+ # do any special checks for it
+ if ( $n == $nmax ) {
+
+ # a terminal '{' should stay where it is
+ next if $type_ibeg_2 eq '{';
+
+ if ( $type_iend_2 eq '#'
+ && $iend_2 - $ibeg_2 >= 2
+ && $types_to_go[ $iend_2 - 1 ] eq 'b' )
+ {
+ $iend_2t = $iend_2 - 2;
+ $type_iend_2t = $types_to_go[$iend_2t];
+ }
+
+ $this_line_is_semicolon_terminated = $type_iend_2t eq ';';
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 0:
+ # Examine the special token joining this line pair, if any.
+ # Put as many tests in this section to avoid duplicate code and
+ # to make formatting independent of whether breaks are to the
+ # left or right of an operator.
+ #----------------------------------------------------------
+
+ my ($itok) = @{ $joint[$n] };
+ if ($itok) {
+
+ # FIXME: Patch - may not be necessary
+ my $iend_1 =
+ $type_iend_1 eq 'b'
+ ? $iend_1 - 1
+ : $iend_1;
+
+ my $iend_2 =
+ $type_iend_2 eq 'b'
+ ? $iend_2 - 1
+ : $iend_2;
+ ## END PATCH
+
+ my $type = $types_to_go[$itok];
+
+ if ( $type eq ':' ) {
+
+ # do not join at a colon unless it disobeys the break request
+ if ( $itok eq $iend_1 ) {
+ next unless $want_break_before{$type};
+ }
+ else {
+ $leading_amp_count++;
+ next if $want_break_before{$type};
+ }
+ } ## end if ':'
+
+ # handle math operators + - * /
+ elsif ( $is_math_op{$type} ) {
- # if '=' at end of line ...
- elsif ( $is_assignment{ $types_to_go[$imid] } ) {
-
- my $is_short_quote =
- ( $types_to_go[$imidr] eq 'Q'
- && $imidr == $il
- && length( $tokens_to_go[$imidr] ) <
- $rOpts_short_concatenation_item_length );
- my $ifnmax = $$ri_first[$nmax];
- my $ifnp = ( $nmax > $n ) ? $$ri_first[ $n + 1 ] : $ifnmax;
- my $is_qk =
- ( $types_to_go[$if] eq '?' && $types_to_go[$ifnp] eq ':' );
-
- # always join an isolated '=', a short quote, or if this
- # will put ?/: at start of adjacent lines
- if ( $if != $imid
- && !$is_short_quote
- && !$is_qk )
+ # Combine these lines if this line is a single
+ # number, or if it is a short term with same
+ # operator as the previous line. For example, in
+ # the following code we will combine all of the
+ # short terms $A, $B, $C, $D, $E, $F, together
+ # instead of leaving them one per line:
+ # my $time =
+ # $A * $B * $C * $D * $E * $F *
+ # ( 2. * $eps * $sigma * $area ) *
+ # ( 1. / $tcold**3 - 1. / $thot**3 );
+
+ # This can be important in math-intensive code.
+
+ my $good_combo;
+
+ my $itokp = min( $inext_to_go[$itok], $iend_2 );
+ my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
+ my $itokm = max( $iprev_to_go[$itok], $ibeg_1 );
+ my $itokmm = max( $iprev_to_go[$itokm], $ibeg_1 );
+
+ # check for a number on the right
+ if ( $types_to_go[$itokp] eq 'n' ) {
+
+ # ok if nothing else on right
+ if ( $itokp == $iend_2 ) {
+ $good_combo = 1;
+ }
+ else {
+
+ # look one more token to right..
+ # okay if math operator or some termination
+ $good_combo =
+ ( ( $itokpp == $iend_2 )
+ && $is_math_op{ $types_to_go[$itokpp] } )
+ || $types_to_go[$itokpp] =~ /^[#,;]$/;
+ }
+ }
+
+ # check for a number on the left
+ if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
+
+ # okay if nothing else to left
+ if ( $itokm == $ibeg_1 ) {
+ $good_combo = 1;
+ }
+
+ # otherwise look one more token to left
+ else {
+
+ # okay if math operator, comma, or assignment
+ $good_combo = ( $itokmm == $ibeg_1 )
+ && ( $is_math_op{ $types_to_go[$itokmm] }
+ || $types_to_go[$itokmm] =~ /^[,]$/
+ || $is_assignment{ $types_to_go[$itokmm] }
+ );
+ }
+ }
+
+ # look for a single short token either side of the
+ # operator
+ if ( !$good_combo ) {
+
+ # Slight adjustment factor to make results
+ # independent of break before or after operator in
+ # long summed lists. (An operator and a space make
+ # two spaces).
+ my $two = ( $itok eq $iend_1 ) ? 2 : 0;
+
+ $good_combo =
+
+ # numbers or id's on both sides of this joint
+ $types_to_go[$itokp] =~ /^[in]$/
+ && $types_to_go[$itokm] =~ /^[in]$/
+
+ # one of the two lines must be short:
+ && (
+ (
+ # no more than 2 nonblank tokens right of
+ # joint
+ $itokpp == $iend_2
+
+ # short
+ && token_sequence_length( $itokp, $iend_2 )
+ < $two +
+ $rOpts_short_concatenation_item_length
+ )
+ || (
+ # no more than 2 nonblank tokens left of
+ # joint
+ $itokmm == $ibeg_1
+
+ # short
+ && token_sequence_length( $ibeg_1, $itokm )
+ < 2 - $two +
+ $rOpts_short_concatenation_item_length
+ )
+
+ )
+
+ # keep pure terms; don't mix +- with */
+ && !(
+ $is_plus_minus{$type}
+ && ( $is_mult_div{ $types_to_go[$itokmm] }
+ || $is_mult_div{ $types_to_go[$itokpp] } )
+ )
+ && !(
+ $is_mult_div{$type}
+ && ( $is_plus_minus{ $types_to_go[$itokmm] }
+ || $is_plus_minus{ $types_to_go[$itokpp] } )
+ )
+
+ ;
+ }
+
+ # it is also good to combine if we can reduce to 2 lines
+ if ( !$good_combo ) {
+
+ # index on other line where same token would be in a
+ # long chain.
+ my $iother =
+ ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
+
+ $good_combo =
+ $n == 2
+ && $n == $nmax
+ && $types_to_go[$iother] ne $type;
+ }
+
+ next unless ($good_combo);
+
+ } ## end math
+
+ elsif ( $is_amp_amp{$type} ) {
+ ##TBD
+ } ## end &&, ||
+
+ elsif ( $is_assignment{$type} ) {
+ ##TBD
+ } ## end assignment
+ }
+
+ #----------------------------------------------------------
+ # Recombine Section 1:
+ # Join welded nested containers immediately
+ # use alternating sweep direction until all are welds
+ # are done. This produces more symmetric opening and
+ # closing joins when lines exceed line length.
+ #----------------------------------------------------------
+ if ( weld_len_right_to_go($iend_1)
+ || weld_len_left_to_go($ibeg_2) )
{
+ $n_best = $n;
+ $reverse = 1 - $reverse;
+ last;
+ }
+ $reverse = 0;
+
+ #----------------------------------------------------------
+ # Recombine Section 2:
+ # Examine token at $iend_1 (right end of first line of pair)
+ #----------------------------------------------------------
+
+ # an isolated '}' may join with a ';' terminated segment
+ if ( $type_iend_1 eq '}' ) {
+
+ # Check for cases where combining a semicolon terminated
+ # statement with a previous isolated closing paren will
+ # allow the combined line to be outdented. This is
+ # generally a good move. For example, we can join up
+ # the last two lines here:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # )
+ # = stat($file);
+ #
+ # to get:
+ # (
+ # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ # $size, $atime, $mtime, $ctime, $blksize, $blocks
+ # ) = stat($file);
+ #
+ # which makes the parens line up.
+ #
+ # Another example, from Joe Matarazzo, probably looks best
+ # with the 'or' clause appended to the trailing paren:
+ # $self->some_method(
+ # PARAM1 => 'foo',
+ # PARAM2 => 'bar'
+ # ) or die "Some_method didn't work";
+ #
+ # But we do not want to do this for something like the -lp
+ # option where the paren is not outdentable because the
+ # trailing clause will be far to the right.
+ #
+ # The logic here is synchronized with the logic in sub
+ # sub set_adjusted_indentation, which actually does
+ # the outdenting.
+ #
+ $skip_Section_3 ||= $this_line_is_semicolon_terminated
+
+ # only one token on last line
+ && $ibeg_1 == $iend_1
+
+ # must be structural paren
+ && $tokens_to_go[$iend_1] eq ')'
+
+ # style must allow outdenting,
+ && !$closing_token_indentation{')'}
+
+ # only leading '&&', '||', and ':' if no others seen
+ # (but note: our count made below could be wrong
+ # due to intervening comments)
+ && ( $leading_amp_count == 0
+ || $type_ibeg_2 !~ /^(:|\&\&|\|\|)$/ )
+
+ # but leading colons probably line up with a
+ # previous colon or question (count could be wrong).
+ && $type_ibeg_2 ne ':'
+
+ # only one step in depth allowed. this line must not
+ # begin with a ')' itself.
+ && ( $nesting_depth_to_go[$iend_1] ==
+ $nesting_depth_to_go[$iend_2] + 1 );
+
+ # YVES patch 2 of 2:
+ # Allow cuddled eval chains, like this:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # } or do {
+ # #handle error
+ # };
+ # This patch works together with a patch in
+ # setting adjusted indentation (where the closing eval
+ # brace is outdented if possible).
+ # The problem is that an 'eval' block has continuation
+ # indentation and it looks better to undo it in some
+ # cases. If we do not use this patch we would get:
+ # eval {
+ # #STUFF;
+ # 1; # return true
+ # }
+ # or do {
+ # #handle error
+ # };
+ # The alternative, for uncuddled style, is to create
+ # a patch in set_adjusted_indentation which undoes
+ # the indentation of a leading line like 'or do {'.
+ # This doesn't work well with -icb through
+ if (
+ $block_type_to_go[$iend_1] eq 'eval'
+ && !$rOpts->{'line-up-parentheses'}
+ && !$rOpts->{'indent-closing-brace'}
+ && $tokens_to_go[$iend_2] eq '{'
+ && (
+ ( $type_ibeg_2 =~ /^(|\&\&|\|\|)$/ )
+ || ( $type_ibeg_2 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_2] } )
+ || $is_if_unless{ $tokens_to_go[$ibeg_2] }
+ )
+ )
+ {
+ $skip_Section_3 ||= 1;
+ }
+
next
unless (
- (
+ $skip_Section_3
- # unless we can reduce this to two lines
- $nmax < $n + 2
+ # handle '.' and '?' specially below
+ || ( $type_ibeg_2 =~ /^[\.\?]$/ )
+ );
+ }
- # or three lines, the last with a leading semicolon
- || ( $nmax == $n + 2
- && $types_to_go[$ifnmax] eq ';' )
+ elsif ( $type_iend_1 eq '{' ) {
- # or the next line ends with a here doc
- || $types_to_go[$il] eq 'h'
- )
+ # YVES
+ # honor breaks at opening brace
+ # Added to prevent recombining something like this:
+ # } || eval { package main;
+ next if $forced_breakpoint_to_go[$iend_1];
+ }
- # do not recombine if the two lines might align well
- # this is a very approximate test for this
- && $types_to_go[$imidr] ne $types_to_go[$ifnp]
- );
+ # do not recombine lines with ending &&, ||,
+ elsif ( $is_amp_amp{$type_iend_1} ) {
+ next unless $want_break_before{$type_iend_1};
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_iend_1 eq '?' ) {
+
+ # Do not recombine different levels
+ next
+ if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
+
+ # do not recombine unless next line ends in :
+ next unless $type_iend_2 eq ':';
+ }
- # -lp users often prefer this:
- # my $title = function($env, $env, $sysarea,
- # "bubba Borrower Entry");
- # so we will recombine if -lp is used we have ending comma
- if ( !$rOpts_line_up_parentheses
- || $types_to_go[$il] ne ',' )
+ # for lines ending in a comma...
+ elsif ( $type_iend_1 eq ',' ) {
+
+ # Do not recombine at comma which is following the
+ # input bias.
+ # TODO: might be best to make a special flag
+ next if ( $old_breakpoint_to_go[$iend_1] );
+
+ # an isolated '},' may join with an identifier + ';'
+ # this is useful for the class of a 'bless' statement (bless.t)
+ if ( $type_ibeg_1 eq '}'
+ && $type_ibeg_2 eq 'i' )
{
+ next
+ unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
+ && ( $iend_2 == ( $ibeg_2 + 1 ) )
+ && $this_line_is_semicolon_terminated );
+
+ # override breakpoint
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
+
+ # but otherwise ..
+ else {
+
+ # do not recombine after a comma unless this will leave
+ # just 1 more line
+ next unless ( $n + 1 >= $nmax );
- # otherwise, scan the rhs line up to last token for
- # complexity. Note that we are not counting the last
- # token in case it is an opening paren.
- my $tv = 0;
- my $depth = $nesting_depth_to_go[$imidr];
- for ( my $i = $imidr + 1 ; $i < $il ; $i++ ) {
- if ( $nesting_depth_to_go[$i] != $depth ) {
- $tv++;
- last if ( $tv > 1 );
+ # do not recombine if there is a change in indentation depth
+ next
+ if (
+ $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
+
+ # do not recombine a "complex expression" after a
+ # comma. "complex" means no parens.
+ my $saw_paren;
+ foreach my $ii ( $ibeg_2 .. $iend_2 ) {
+ if ( $tokens_to_go[$ii] eq '(' ) {
+ $saw_paren = 1;
+ last;
}
- $depth = $nesting_depth_to_go[$i];
}
+ next if $saw_paren;
+ }
+ }
+
+ # opening paren..
+ elsif ( $type_iend_1 eq '(' ) {
+
+ # No longer doing this
+ }
+
+ elsif ( $type_iend_1 eq ')' ) {
+
+ # No longer doing this
+ }
+
+ # keep a terminal for-semicolon
+ elsif ( $type_iend_1 eq 'f' ) {
+ next;
+ }
+
+ # if '=' at end of line ...
+ elsif ( $is_assignment{$type_iend_1} ) {
+
+ # keep break after = if it was in input stream
+ # this helps prevent 'blinkers'
+ next if $old_breakpoint_to_go[$iend_1]
+
+ # don't strand an isolated '='
+ && $iend_1 != $ibeg_1;
+
+ my $is_short_quote =
+ ( $type_ibeg_2 eq 'Q'
+ && $ibeg_2 == $iend_2
+ && token_sequence_length( $ibeg_2, $ibeg_2 ) <
+ $rOpts_short_concatenation_item_length );
+ my $is_ternary =
+ ( $type_ibeg_1 eq '?'
+ && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
+
+ # always join an isolated '=', a short quote, or if this
+ # will put ?/: at start of adjacent lines
+ if ( $ibeg_1 != $iend_1
+ && !$is_short_quote
+ && !$is_ternary )
+ {
+ next
+ unless (
+ (
+
+ # unless we can reduce this to two lines
+ $nmax < $n + 2
+
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == $n + 2
+ && $types_to_go[$ibeg_nmax] eq ';' )
- # ok to recombine if no level changes before last token
- if ( $tv > 0 ) {
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # otherwise, do not recombine if more than two
- # level changes.
- next if ( $tv > 1 );
+ # or the next line ends in an open paren or brace
+ # and the break hasn't been forced [dima.t]
+ || ( !$forced_breakpoint_to_go[$iend_1]
+ && $type_iend_2 eq '{' )
+ )
+
+ # do not recombine if the two lines might align well
+ # this is a very approximate test for this
+ && ( $ibeg_3 >= 0
+ && $type_ibeg_2 ne $types_to_go[$ibeg_3] )
+ );
+
+ if (
+
+ # Recombine if we can make two lines
+ $nmax >= $n + 2
+
+ # -lp users often prefer this:
+ # my $title = function($env, $env, $sysarea,
+ # "bubba Borrower Entry");
+ # so we will recombine if -lp is used we have
+ # ending comma
+ && ( !$rOpts_line_up_parentheses
+ || $type_iend_2 ne ',' )
+ )
+ {
- # check total complexity of the two adjacent lines
- # that will occur if we do this join
- my $istop =
- ( $n < $nmax ) ? $$ri_last[ $n + 1 ] : $il;
- for ( my $i = $il ; $i <= $istop ; $i++ ) {
+ # otherwise, scan the rhs line up to last token for
+ # complexity. Note that we are not counting the last
+ # token in case it is an opening paren.
+ my $tv = 0;
+ my $depth = $nesting_depth_to_go[$ibeg_2];
+ foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
- last if ( $tv > 2 );
+ last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
+ # ok to recombine if no level changes before last token
+ if ( $tv > 0 ) {
+
+ # otherwise, do not recombine if more than two
+ # level changes.
+ next if ( $tv > 1 );
+
+ # check total complexity of the two adjacent lines
+ # that will occur if we do this join
+ my $istop =
+ ( $n < $nmax )
+ ? $ri_end->[ $n + 1 ]
+ : $iend_2;
+ foreach my $i ( $iend_2 .. $istop ) {
+ if ( $nesting_depth_to_go[$i] != $depth ) {
+ $tv++;
+ last if ( $tv > 2 );
+ }
+ $depth = $nesting_depth_to_go[$i];
+ }
+
# do not recombine if total is more than 2 level changes
- next if ( $tv > 2 );
+ next if ( $tv > 2 );
+ }
}
}
- }
- unless ( $tokens_to_go[$imidr] =~ /^[\{\(\[]$/ ) {
- $forced_breakpoint_to_go[$imid] = 0;
+ unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
}
- }
- # for keywords..
- elsif ( $types_to_go[$imid] eq 'k' ) {
+ # for keywords..
+ elsif ( $type_iend_1 eq 'k' ) {
- # make major control keywords stand out
- # (recombine.t)
- next
- if (
+ # make major control keywords stand out
+ # (recombine.t)
+ next
+ if (
- #/^(last|next|redo|return)$/
- $is_last_next_redo_return{ $tokens_to_go[$imid] }
+ #/^(last|next|redo|return)$/
+ $is_last_next_redo_return{ $tokens_to_go[$iend_1] }
- # but only if followed by multiple lines
- && $n < $nmax
- );
+ # but only if followed by multiple lines
+ && $n < $nmax
+ );
- if ( $is_and_or{ $tokens_to_go[$imid] } ) {
- next unless $want_break_before{ $tokens_to_go[$imid] };
+ if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
+ next
+ unless $want_break_before{ $tokens_to_go[$iend_1] };
+ }
}
- }
- # handle trailing + - * /
- elsif ( $types_to_go[$imid] =~ /^[\+\-\*\/]$/ ) {
- my $i_next_nonblank = $imidr;
- my $i_next_next = $i_next_nonblank + 1;
- $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+ #----------------------------------------------------------
+ # Recombine Section 3:
+ # Examine token at $ibeg_2 (left end of second line of pair)
+ #----------------------------------------------------------
- # do not strand numbers
- next
- unless (
- $types_to_go[$i_next_nonblank] eq 'n'
- && (
- $i_next_nonblank == $il
- || ( $i_next_next == $il
- && $types_to_go[$i_next_next] =~ /^[\+\-\*\/]$/ )
- || $types_to_go[$i_next_next] eq ';'
- )
- );
- }
+ # join lines identified above as capable of
+ # causing an outdented line with leading closing paren
+ # Note that we are skipping the rest of this section
+ # and the rest of the loop to do the join
+ if ($skip_Section_3) {
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ $n_best = $n;
+ last;
+ }
- #----------------------------------------------------------
- # Section 2: Now examine token at $imidr (left end of second
- # line of pair)
- #----------------------------------------------------------
-
- # join lines identified above as capable of
- # causing an outdented line with leading closing paren
- if ($previous_outdentable_closing_paren) {
- $forced_breakpoint_to_go[$imid] = 0;
- }
-
- # do not recombine lines with leading &&, ||, or :
- elsif ( $types_to_go[$imidr] =~ /^(:|\&\&|\|\|)$/ ) {
- $leading_amp_count++;
- next if $want_break_before{ $types_to_go[$imidr] };
- }
-
- # Identify and recombine a broken ?/: chain
- elsif ( $types_to_go[$imidr] eq '?' ) {
-
- # indexes of line first tokens --
- # mm - line before previous line
- # f - previous line
- # <-- this line
- # ff - next line
- # fff - line after next
- my $iff = $n < $nmax ? $$ri_first[ $n + 1 ] : -1;
- my $ifff = $n + 2 <= $nmax ? $$ri_first[ $n + 2 ] : -1;
- my $imm = $n > 1 ? $$ri_first[ $n - 2 ] : -1;
- my $seqno = $type_sequence_to_go[$imidr];
- my $f_ok =
- ( $types_to_go[$if] eq ':'
- && $type_sequence_to_go[$if] ==
- $seqno - TYPE_SEQUENCE_INCREMENT );
- my $mm_ok =
- ( $imm >= 0
- && $types_to_go[$imm] eq ':'
- && $type_sequence_to_go[$imm] ==
- $seqno - 2 * TYPE_SEQUENCE_INCREMENT );
-
- my $ff_ok =
- ( $iff > 0
- && $types_to_go[$iff] eq ':'
- && $type_sequence_to_go[$iff] == $seqno );
- my $fff_ok =
- ( $ifff > 0
- && $types_to_go[$ifff] eq ':'
- && $type_sequence_to_go[$ifff] ==
- $seqno + TYPE_SEQUENCE_INCREMENT );
-
- # we require that this '?' be part of a correct sequence
- # of 3 in a row or else no recombination is done.
- next
- unless ( ( $ff_ok || $mm_ok ) && ( $f_ok || $fff_ok ) );
- $forced_breakpoint_to_go[$imid] = 0;
- }
+ # handle lines with leading &&, ||
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
+
+ $leading_amp_count++;
+
+ # ok to recombine if it follows a ? or :
+ # and is followed by an open paren..
+ my $ok =
+ ( $is_ternary{$type_ibeg_1}
+ && $tokens_to_go[$iend_2] eq '(' )
- # do not recombine lines with leading '.'
- elsif ( $types_to_go[$imidr] =~ /^(\.)$/ ) {
- my $i_next_nonblank = $imidr + 1;
- if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
- $i_next_nonblank++;
+ # or is followed by a ? or : at same depth
+ #
+ # We are looking for something like this. We can
+ # recombine the && line with the line above to make the
+ # structure more clear:
+ # return
+ # exists $G->{Attr}->{V}
+ # && exists $G->{Attr}->{V}->{$u}
+ # ? %{ $G->{Attr}->{V}->{$u} }
+ # : ();
+ #
+ # We should probably leave something like this alone:
+ # return
+ # exists $G->{Attr}->{E}
+ # && exists $G->{Attr}->{E}->{$u}
+ # && exists $G->{Attr}->{E}->{$u}->{$v}
+ # ? %{ $G->{Attr}->{E}->{$u}->{$v} }
+ # : ();
+ # so that we either have all of the &&'s (or ||'s)
+ # on one line, as in the first example, or break at
+ # each one as in the second example. However, it
+ # sometimes makes things worse to check for this because
+ # it prevents multiple recombinations. So this is not done.
+ || ( $ibeg_3 >= 0
+ && $is_ternary{ $types_to_go[$ibeg_3] }
+ && $nesting_depth_to_go[$ibeg_3] ==
+ $nesting_depth_to_go[$ibeg_2] );
+
+ next if !$ok && $want_break_before{$type_ibeg_2};
+ $forced_breakpoint_to_go[$iend_1] = 0;
+
+ # tweak the bond strength to give this joint priority
+ # over ? and :
+ $bs_tweak = 0.25;
+ }
+
+ # Identify and recombine a broken ?/: chain
+ elsif ( $type_ibeg_2 eq '?' ) {
+
+ # Do not recombine different levels
+ my $lev = $levels_to_go[$ibeg_2];
+ next if ( $lev ne $levels_to_go[$ibeg_1] );
+
+ # Do not recombine a '?' if either next line or
+ # previous line does not start with a ':'. The reasons
+ # are that (1) no alignment of the ? will be possible
+ # and (2) the expression is somewhat complex, so the
+ # '?' is harder to see in the interior of the line.
+ my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
+ my $precedes_colon =
+ $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
+ next unless ( $follows_colon || $precedes_colon );
+
+ # we will always combining a ? line following a : line
+ if ( !$follows_colon ) {
+
+ # ...otherwise recombine only if it looks like a chain.
+ # we will just look at a few nearby lines to see if
+ # this looks like a chain.
+ my $local_count = 0;
+ foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
+ $local_count++
+ if $ii >= 0
+ && $types_to_go[$ii] eq ':'
+ && $levels_to_go[$ii] == $lev;
+ }
+ next unless ( $local_count > 1 );
+ }
+ $forced_breakpoint_to_go[$iend_1] = 0;
}
- next
- unless (
+ # do not recombine lines with leading '.'
+ elsif ( $type_ibeg_2 eq '.' ) {
+ my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
+ next
+ unless (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
- (
- $n == 2
- && $n == $nmax
- && $types_to_go[$if] ne $types_to_go[$imidr]
- )
-
- # ... or this would strand a short quote , like this
- # . "some long qoute"
- # . "\n";
-
- || ( $types_to_go[$i_next_nonblank] eq 'Q'
- && $i_next_nonblank >= $il - 1
- && length( $tokens_to_go[$i_next_nonblank] ) <
- $rOpts_short_concatenation_item_length )
- );
- }
-
- # handle leading keyword..
- elsif ( $types_to_go[$imidr] eq 'k' ) {
-
- # handle leading "and" and "or"
- if ( $is_and_or{ $tokens_to_go[$imidr] } ) {
-
- # Decide if we will combine a single terminal 'and' and
- # 'or' after an 'if' or 'unless'. We should consider the
- # possible vertical alignment, and visual clutter.
-
- # This looks best with the 'and' on the same
- # line as the 'if':
- #
- # $a = 1
- # if $seconds and $nu < 2;
- #
- # But this looks better as shown:
- #
- # $a = 1
- # if !$this->{Parents}{$_}
- # or $this->{Parents}{$_} eq $_;
- #
- # Eventually, it would be nice to look for
- # similarities (such as 'this' or 'Parents'), but
- # for now I'm using a simple rule that says that
- # the resulting line length must not be more than
- # half the maximum line length (making it 80/2 =
- # 40 characters by default).
- next
- unless (
- $this_line_is_semicolon_terminated
- && (
+ (
+ $n == 2
+ && $n == $nmax
+ && $type_ibeg_1 ne $type_ibeg_2
+ )
- # following 'if' or 'unless'
- $types_to_go[$if] eq 'k'
- && $is_if_unless{ $tokens_to_go[$if] }
+ # ... or this would strand a short quote , like this
+ # . "some long quote"
+ # . "\n";
- )
+ || ( $types_to_go[$i_next_nonblank] eq 'Q'
+ && $i_next_nonblank >= $iend_2 - 1
+ && $token_lengths_to_go[$i_next_nonblank] <
+ $rOpts_short_concatenation_item_length )
);
}
- # handle leading "if" and "unless"
- elsif ( $is_if_unless{ $tokens_to_go[$imidr] } ) {
+ # handle leading keyword..
+ elsif ( $type_ibeg_2 eq 'k' ) {
- # FIXME: This is still experimental..may not be too useful
- next
- unless (
- $this_line_is_semicolon_terminated
+ # handle leading "or"
+ if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
+ next
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
+
+ # important: only combine a very simple or
+ # statement because the step below may have
+ # combined a trailing 'and' with this or,
+ # and we do not want to then combine
+ # everything together
+ && ( $iend_2 - $ibeg_2 <= 7 )
+ )
+ );
- # previous line begins with 'and' or 'or'
- && $types_to_go[$if] eq 'k'
- && $is_and_or{ $tokens_to_go[$if] }
+ #X: RT #81854
+ $forced_breakpoint_to_go[$iend_1] = 0
+ unless $old_breakpoint_to_go[$iend_1];
+ }
- );
- }
+ # handle leading 'and'
+ elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
- # handle all other leading keywords
- else {
+ # Decide if we will combine a single terminal 'and'
+ # after an 'if' or 'unless'.
- # keywords look best at start of lines,
- # but combine things like "1 while"
- unless ( $is_assignment{ $types_to_go[$imid] } ) {
+ # This looks best with the 'and' on the same
+ # line as the 'if':
+ #
+ # $a = 1
+ # if $seconds and $nu < 2;
+ #
+ # But this looks better as shown:
+ #
+ # $a = 1
+ # if !$this->{Parents}{$_}
+ # or $this->{Parents}{$_} eq $_;
+ #
next
- if ( ( $types_to_go[$imid] ne 'k' )
- && ( $tokens_to_go[$imidr] ne 'while' ) );
+ unless (
+ $this_line_is_semicolon_terminated
+ && (
+
+ # following 'if' or 'unless' or 'or'
+ $type_ibeg_1 eq 'k'
+ && ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
+ || $tokens_to_go[$ibeg_1] eq 'or' )
+ )
+ );
}
- }
- }
- # similar treatment of && and || as above for 'and' and 'or':
- # NOTE: This block of code is currently bypassed because
- # of a previous block but is retained for possible future use.
- elsif ( $types_to_go[$imidr] =~ /^(&&|\|\|)$/ ) {
+ # handle leading "if" and "unless"
+ elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
- # maybe looking at something like:
- # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
+ # FIXME: This is still experimental..may not be too useful
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- next
- unless (
- $this_line_is_semicolon_terminated
+ # previous line begins with 'and' or 'or'
+ && $type_ibeg_1 eq 'k'
+ && $is_and_or{ $tokens_to_go[$ibeg_1] }
- # previous line begins with an 'if' or 'unless' keyword
- && $types_to_go[$if] eq 'k'
- && $is_if_unless{ $tokens_to_go[$if] }
+ );
+ }
- );
- }
+ # handle all other leading keywords
+ else {
- # handle leading + - * /
- elsif ( $types_to_go[$imidr] =~ /^[\+\-\*\/]$/ ) {
- my $i_next_nonblank = $imidr + 1;
- if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
- $i_next_nonblank++;
+ # keywords look best at start of lines,
+ # but combine things like "1 while"
+ unless ( $is_assignment{$type_iend_1} ) {
+ next
+ if ( ( $type_iend_1 ne 'k' )
+ && ( $tokens_to_go[$ibeg_2] ne 'while' ) );
+ }
+ }
}
- my $i_next_next = $i_next_nonblank + 1;
- $i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
+ # similar treatment of && and || as above for 'and' and 'or':
+ # NOTE: This block of code is currently bypassed because
+ # of a previous block but is retained for possible future use.
+ elsif ( $is_amp_amp{$type_ibeg_2} ) {
- next
- unless (
+ # maybe looking at something like:
+ # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
- # unless there is just one and we can reduce
- # this to two lines if we do. For example, this
- (
- $n == 2
- && $n == $nmax
- && $types_to_go[$if] ne $types_to_go[$imidr]
- )
+ next
+ unless (
+ $this_line_is_semicolon_terminated
- # do not strand numbers
- || (
- $types_to_go[$i_next_nonblank] eq 'n'
- && ( $i_next_nonblank >= $il - 1
- || $types_to_go[$i_next_next] eq ';' )
- )
- );
- }
+ # previous line begins with an 'if' or 'unless' keyword
+ && $type_ibeg_1 eq 'k'
+ && $is_if_unless{ $tokens_to_go[$ibeg_1] }
- # handle line with leading = or similar
- elsif ( $is_assignment{ $types_to_go[$imidr] } ) {
- next unless $n == 1;
- my $ifnmax = $$ri_first[$nmax];
- next
- unless (
+ );
+ }
- # unless we can reduce this to two lines
- $nmax == 2
+ # handle line with leading = or similar
+ elsif ( $is_assignment{$type_ibeg_2} ) {
+ next unless ( $n == 1 || $n == $nmax );
+ next if $old_breakpoint_to_go[$iend_1];
+ next
+ unless (
- # or three lines, the last with a leading semicolon
- || ( $nmax == 3 && $types_to_go[$ifnmax] eq ';' )
+ # unless we can reduce this to two lines
+ $nmax == 2
- # or the next line ends with a here doc
- || $types_to_go[$il] eq 'h'
- );
- }
+ # or three lines, the last with a leading semicolon
+ || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
- #----------------------------------------------------------
- # Section 3:
- # Combine the lines if we arrive here and it is possible
- #----------------------------------------------------------
+ # or the next line ends with a here doc
+ || $type_iend_2 eq 'h'
- # honor hard breakpoints
- next if ( $forced_breakpoint_to_go[$imid] > 0 );
+ # or this is a short line ending in ;
+ || ( $n == $nmax && $this_line_is_semicolon_terminated )
+ );
+ $forced_breakpoint_to_go[$iend_1] = 0;
+ }
- my $bs = $bond_strength_to_go[$imid];
+ #----------------------------------------------------------
+ # Recombine Section 4:
+ # Combine the lines if we arrive here and it is possible
+ #----------------------------------------------------------
- # combined line cannot be too long
- next
- if excess_line_length( $if, $il ) > 0;
+ # honor hard breakpoints
+ next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
- # do not recombine if we would skip in indentation levels
- if ( $n < $nmax ) {
- my $if_next = $$ri_first[ $n + 1 ];
- next
- if (
- $levels_to_go[$if] < $levels_to_go[$imidr]
- && $levels_to_go[$imidr] < $levels_to_go[$if_next]
+ my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
- # but an isolated 'if (' is undesirable
- && !(
- $n == 1
- && $imid - $if <= 2
- && $types_to_go[$if] eq 'k'
- && $tokens_to_go[$if] eq 'if'
- && $tokens_to_go[$imid] ne '('
- )
- );
- }
+ # Require a few extra spaces before recombining lines if we are
+ # at an old breakpoint unless this is a simple list or terminal
+ # line. The goal is to avoid oscillating between two
+ # quasi-stable end states. For example this snippet caused
+ # problems:
+## my $this =
+## bless {
+## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
+## },
+## $type;
+ next
+ if ( $old_breakpoint_to_go[$iend_1]
+ && !$this_line_is_semicolon_terminated
+ && $n < $nmax
+ && $excess + 4 > 0
+ && $type_iend_2 ne ',' );
- # honor no-break's
- next if ( $bs == NO_BREAK );
+ # do not recombine if we would skip in indentation levels
+ if ( $n < $nmax ) {
+ my $if_next = $ri_beg->[ $n + 1 ];
+ next
+ if (
+ $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
+ && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
+
+ # but an isolated 'if (' is undesirable
+ && !(
+ $n == 1
+ && $iend_1 - $ibeg_1 <= 2
+ && $type_ibeg_1 eq 'k'
+ && $tokens_to_go[$ibeg_1] eq 'if'
+ && $tokens_to_go[$iend_1] ne '('
+ )
+ );
+ }
- # remember the pair with the greatest bond strength
- if ( !$n_best ) {
- $n_best = $n;
- $bs_best = $bs;
- }
- else {
+ # honor no-break's
+ next if ( $bs >= NO_BREAK - 1 );
- if ( $bs > $bs_best ) {
+ # remember the pair with the greatest bond strength
+ if ( !$n_best ) {
$n_best = $n;
$bs_best = $bs;
}
+ else {
- # we have 2 or more candidates, so need another pass
- $more_to_do++;
+ if ( $bs > $bs_best ) {
+ $n_best = $n;
+ $bs_best = $bs;
+ }
+ }
}
- }
- # recombine the pair with the greatest bond strength
- if ($n_best) {
- splice @$ri_first, $n_best, 1;
- splice @$ri_last, $n_best - 1, 1;
+ # recombine the pair with the greatest bond strength
+ if ($n_best) {
+ splice @{$ri_beg}, $n_best, 1;
+ splice @{$ri_end}, $n_best - 1, 1;
+ splice @joint, $n_best, 1;
+
+ # keep going if we are still making progress
+ $more_to_do++;
+ }
}
+ return ( $ri_beg, $ri_end );
}
- return ( $ri_first, $ri_last );
-}
+} # end recombine_breakpoints
sub break_all_chain_tokens {
# statement. If we see a break at any one, break at all similar tokens
# within the same container.
#
- # TODO:
- # does not handle nested ?: operators correctly
- # coordinate better with ?: logic in set_continuation_breaks
my ( $ri_left, $ri_right ) = @_;
my %saw_chain_type;
my %left_chain_type;
my %right_chain_type;
my %interior_chain_type;
- my $nmax = @$ri_right - 1;
+ my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $count = 0;
for my $n ( 0 .. $nmax ) {
- my $il = $$ri_left[$n];
- my $ir = $$ri_right[$n];
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
$typel = '+' if ( $typel eq '-' ); # treat + and - the same
# now look for any interior tokens of the same types
$count = 0;
for my $n ( 0 .. $nmax ) {
- my $il = $$ri_left[$n];
- my $ir = $$ri_right[$n];
- for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir - 1 ) {
my $type = $types_to_go[$i];
$type = '+' if ( $type eq '-' );
$type = '*' if ( $type eq '/' );
foreach my $i ( @{ $left_chain_type{$type} } ) {
next unless in_same_container( $i, $itest );
push @insert_list, $itest - 1;
+
+ # Break at matching ? if this : is at a different level.
+ # For example, the ? before $THRf_DEAD in the following
+ # should get a break if its : gets a break.
+ #
+ # my $flags =
+ # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
+ # : ( $_ & 4 ) ? $THRf_R_DETACHED
+ # : $THRf_R_JOINABLE;
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question > 0 ) {
+ push @insert_list, $i_question - 1;
+ }
+ }
last;
}
}
foreach my $i ( @{ $right_chain_type{$type} } ) {
next unless in_same_container( $i, $itest );
push @insert_list, $itest;
+
+ # break at matching ? if this : is at a different level
+ if ( $type eq ':'
+ && $levels_to_go[$i] != $levels_to_go[$itest] )
+ {
+ my $i_question = $mate_index_to_go[$itest];
+ if ( $i_question >= 0 ) {
+ push @insert_list, $i_question;
+ }
+ }
last;
}
}
if (@insert_list) {
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
+ return;
+}
+
+sub break_equals {
+
+ # Look for assignment operators that could use a breakpoint.
+ # For example, in the following snippet
+ #
+ # $HOME = $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # we could break at the = to get this, which is a little nicer:
+ # $HOME =
+ # $ENV{HOME}
+ # || $ENV{LOGDIR}
+ # || $pw[7]
+ # || die "no home directory for user $<";
+ #
+ # The logic here follows the logic in set_logical_padding, which
+ # will add the padding in the second line to improve alignment.
+ #
+ my ( $ri_left, $ri_right ) = @_;
+ my $nmax = @{$ri_right} - 1;
+ return unless ( $nmax >= 2 );
+
+ # scan the left ends of first two lines
+ my $tokbeg = "";
+ my $depth_beg;
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $typel = $types_to_go[$il];
+ my $tokenl = $tokens_to_go[$il];
+
+ my $has_leading_op = ( $tokenl =~ /^\w/ )
+ ? $is_chain_operator{$tokenl} # + - * / : ? && ||
+ : $is_chain_operator{$typel}; # and, or
+ return unless ($has_leading_op);
+ if ( $n > 1 ) {
+ return
+ unless ( $tokenl eq $tokbeg
+ && $nesting_depth_to_go[$il] eq $depth_beg );
+ }
+ $tokbeg = $tokenl;
+ $depth_beg = $nesting_depth_to_go[$il];
+ }
+
+ # now look for any interior tokens of the same types
+ my $il = $ri_left->[0];
+ my $ir = $ri_right->[0];
+
+ # now make a list of all new break points
+ my @insert_list;
+ for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
+ my $type = $types_to_go[$i];
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ if ( $want_break_before{$type} ) {
+ push @insert_list, $i - 1;
+ }
+ else {
+ push @insert_list, $i;
+ }
+ }
+ }
+
+ # Break after a 'return' followed by a chain of operators
+ # return ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ # To give:
+ # return
+ # ( $^O !~ /win32|dos/i )
+ # && ( $^O ne 'VMS' )
+ # && ( $^O ne 'OS2' )
+ # && ( $^O ne 'MacOS' );
+ my $i = 0;
+ if ( $types_to_go[$i] eq 'k'
+ && $tokens_to_go[$i] eq 'return'
+ && $ir > $il
+ && $nesting_depth_to_go[$i] eq $depth_beg )
+ {
+ push @insert_list, $i;
+ }
+
+ return unless (@insert_list);
+
+ # One final check...
+ # scan second and third lines and be sure there are no assignments
+ # we want to avoid breaking at an = to make something like this:
+ # unless ( $icon =
+ # $html_icons{"$type-$state"}
+ # or $icon = $html_icons{$type}
+ # or $icon = $html_icons{$state} )
+ for my $n ( 1 .. 2 ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ foreach my $i ( $il + 1 .. $ir ) {
+ my $type = $types_to_go[$i];
+ return
+ if ( $is_assignment{$type}
+ && $nesting_depth_to_go[$i] eq $depth_beg );
+ }
+ }
+
+ # ok, insert any new break point
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ return;
+}
+
+sub insert_final_breaks {
+
+ my ( $ri_left, $ri_right ) = @_;
+
+ my $nmax = @{$ri_right} - 1;
+
+ # scan the left and right end tokens of all lines
+ my $count = 0;
+ my $i_first_colon = -1;
+ for my $n ( 0 .. $nmax ) {
+ my $il = $ri_left->[$n];
+ my $ir = $ri_right->[$n];
+ my $typel = $types_to_go[$il];
+ my $typer = $types_to_go[$ir];
+ return if ( $typel eq '?' );
+ return if ( $typer eq '?' );
+ if ( $typel eq ':' ) { $i_first_colon = $il; last; }
+ elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
+ }
+
+ # For long ternary chains,
+ # if the first : we see has its # ? is in the interior
+ # of a preceding line, then see if there are any good
+ # breakpoints before the ?.
+ if ( $i_first_colon > 0 ) {
+ my $i_question = $mate_index_to_go[$i_first_colon];
+ if ( $i_question > 0 ) {
+ my @insert_list;
+ for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
+ my $token = $tokens_to_go[$ii];
+ my $type = $types_to_go[$ii];
+
+ # For now, a good break is either a comma or a 'return'.
+ if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
+ && in_same_container( $ii, $i_question ) )
+ {
+ push @insert_list, $ii;
+ last;
+ }
+ }
+
+ # insert any new break points
+ if (@insert_list) {
+ insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
+ }
+ }
+ }
+ return;
}
sub in_same_container {
my $depth = $nesting_depth_to_go[$i1];
return unless ( $nesting_depth_to_go[$i2] == $depth );
if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
- for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
+
+ ###########################################################
+ # This is potentially a very slow routine and not critical.
+ # For safety just give up for large differences.
+ # See test file 'infinite_loop.txt'
+ # TODO: replace this loop with a data structure
+ ###########################################################
+ return if ( $i2 - $i1 > 200 );
+
+ foreach my $i ( $i1 + 1 .. $i2 - 1 ) {
next if ( $nesting_depth_to_go[$i] > $depth );
return if ( $nesting_depth_to_go[$i] < $depth );
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = "";
- my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
+ my @colon_list = grep /^[\?\:]$/, @types_to_go[ 0 .. $max_index_to_go ];
+ my $colon_count = @colon_list;
foreach (@colon_list) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
#-------------------------------------------------------
while ( $i_begin <= $imax ) {
my $lowest_strength = NO_BREAK;
- my $starting_sum = $lengths_to_go[$i_begin];
+ my $starting_sum = $summed_lengths_to_go[$i_begin];
my $i_lowest = -1;
my $i_test = -1;
my $lowest_next_token = '';
# BEGINNING of inner loop to find the best next breakpoint
#-------------------------------------------------------
for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
- my $type = $types_to_go[$i_test];
- my $token = $tokens_to_go[$i_test];
- my $next_type = $types_to_go[ $i_test + 1 ];
- my $next_token = $tokens_to_go[ $i_test + 1 ];
- my $i_next_nonblank =
- ( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
+ my $type = $types_to_go[$i_test];
+ my $token = $tokens_to_go[$i_test];
+ my $next_type = $types_to_go[ $i_test + 1 ];
+ my $next_token = $tokens_to_go[ $i_test + 1 ];
+ my $i_next_nonblank = $inext_to_go[$i_test];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
my $strength = $bond_strength_to_go[$i_test];
- my $must_break = 0;
+ my $maximum_line_length = maximum_line_length($i_begin);
+
+ # use old breaks as a tie-breaker. For example to
+ # prevent blinkers with -pbp in this code:
- # FIXME: TESTING: Might want to be able to break after these
- # force an immediate break at certain operators
- # with lower level than the start of the line
+##@keywords{
+## qw/ARG OUTPUT PROTO CONSTRUCTOR RETURNS DESC PARAMS SEEALSO EXAMPLE/}
+## = ();
+
+ # At the same time try to prevent a leading * in this code
+ # with the default formatting:
+ #
+## return
+## factorial( $a + $b - 1 ) / factorial( $a - 1 ) / factorial( $b - 1 )
+## * ( $x**( $a - 1 ) )
+## * ( ( 1 - $x )**( $b - 1 ) );
+
+ # reduce strength a bit to break ties at an old breakpoint ...
+ if (
+ $old_breakpoint_to_go[$i_test]
+
+ # which is a 'good' breakpoint, meaning ...
+ # we don't want to break before it
+ && !$want_break_before{$type}
+
+ # and either we want to break before the next token
+ # or the next token is not short (i.e. not a '*', '/' etc.)
+ && $i_next_nonblank <= $imax
+ && ( $want_break_before{$next_nonblank_type}
+ || $token_lengths_to_go[$i_next_nonblank] > 2
+ || $next_nonblank_type =~ /^[\,\(\[\{L]$/ )
+ )
+ {
+ $strength -= $tiny_bias;
+ }
+
+ # otherwise increase strength a bit if this token would be at the
+ # maximum line length. This is necessary to avoid blinking
+ # in the above example when the -iob flag is added.
+ else {
+ my $len =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 1 ] -
+ $starting_sum;
+ if ( $len >= $maximum_line_length ) {
+ $strength += $tiny_bias;
+ }
+ }
+
+ my $must_break = 0;
+
+ # Force an immediate break at certain operators
+ # with lower level than the start of the line,
+ # unless we've already seen a better break.
+ #
+ ##############################################
+ # Note on an issue with a preceding ?
+ ##############################################
+ # We don't include a ? in the above list, but there may
+ # be a break at a previous ? if the line is long.
+ # Because of this we do not want to force a break if
+ # there is a previous ? on this line. For now the best way
+ # to do this is to not break if we have seen a lower strength
+ # point, which is probably a ?.
+ #
+ # Example of unwanted breaks we are avoiding at a '.' following a ?
+ # from pod2html using perltidy -gnu:
+ # )
+ # ? "\n<A NAME=\""
+ # . $value
+ # . "\">\n$text</A>\n"
+ # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
if (
(
$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
)
&& ( $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_next_nonblank] )
+ && ( $strength <= $lowest_strength )
)
{
set_forced_breakpoint($i_next_nonblank);
# break between ) { in a continued line so that the '{' can
# be outdented
# See similar logic in scan_list which catches instances
- # where a line is just something like ') {'
- || ( $line_count
+ # where a line is just something like ') {'. We have to
+ # be careful because the corresponding block keyword might
+ # not be on the first line, such as 'for' here:
+ #
+ # eval {
+ # for ("a") {
+ # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
+ # }
+ # };
+ #
+ || (
+ $line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
- && !$rOpts->{'opening-brace-always-on-right'} )
+ && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
+
+ # RT #104427: Dont break before opening sub brace because
+ # sub block breaks handled at higher level, unless
+ # it looks like the preceeding list is long and broken
+ && !(
+ $next_nonblank_block_type =~ /^sub\b/
+ && ( $nesting_depth_to_go[$i_begin] ==
+ $nesting_depth_to_go[$i_next_nonblank] )
+ )
+
+ && !$rOpts->{'opening-brace-always-on-right'}
+ )
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
# Forced breakpoints must sometimes be overridden, for example
# because of a side comment causing a NO_BREAK. It is easier
# to catch this here than when they are set.
- if ( $strength < NO_BREAK ) {
+ if ( $strength < NO_BREAK - 1 ) {
$strength = $lowest_strength - $tiny_bias;
$must_break = 1;
}
&& (
(
$leading_spaces +
- $lengths_to_go[ $i_next_nonblank + 1 ] -
+ $summed_lengths_to_go[ $i_next_nonblank + 1 ] -
$starting_sum
- ) > $rOpts_maximum_line_length
+ ) > $maximum_line_length
)
)
{
# Avoid a break which would strand a single punctuation
# token. For example, we do not want to strand a leading
# '.' which is followed by a long quoted string.
+ # But note that we do want to do this with -extrude (l=1)
+ # so please test any changes to this code on -extrude.
if (
!$must_break
&& ( $i_test == $i_begin )
&& (
(
$leading_spaces +
- $lengths_to_go[ $i_test + 1 ] -
+ $summed_lengths_to_go[ $i_test + 1 ] -
$starting_sum
- ) <= $rOpts_maximum_line_length
+ ) < $maximum_line_length
)
)
{
- $i_test++;
-
- if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
- $i_test++;
- }
+ $i_test = min( $imax, $inext_to_go[$i_test] );
redo;
}
# break It is only called if a breakpoint is required or
# desired. This will probably need some adjustments
# over time. A goal is to try to be sure that, if a new
- # side comment is introduced into formated text, then
+ # side comment is introduced into formatted text, then
# the same breakpoints will occur. scbreak.t
last
if (
- $i_test == $imax # we are at the end
- && !$forced_breakpoint_count #
- && $saw_good_break # old line had good break
- && $type =~ /^[#;\{]$/ # and this line ends in
- # ';' or side comment
- && $i_last_break < 0 # and we haven't made a break
- && $i_lowest > 0 # and we saw a possible break
- && $i_lowest < $imax - 1 # (but not just before this ;)
+ $i_test == $imax # we are at the end
+ && !$forced_breakpoint_count #
+ && $saw_good_break # old line had good break
+ && $type =~ /^[#;\{]$/ # and this line ends in
+ # ';' or side comment
+ && $i_last_break < 0 # and we haven't made a break
+ && $i_lowest >= 0 # and we saw a possible break
+ && $i_lowest < $imax - 1 # (but not just before this ;)
&& $strength - $lowest_strength < 0.5 * WEAK # and it's good
);
+ # Do not skip past an important break point in a short final
+ # segment. For example, without this check we would miss the
+ # break at the final / in the following code:
+ #
+ # $depth_stop =
+ # ( $tau * $mass_pellet * $q_0 *
+ # ( 1. - exp( -$t_stop / $tau ) ) -
+ # 4. * $pi * $factor * $k_ice *
+ # ( $t_melt - $t_ice ) *
+ # $r_pellet *
+ # $t_stop ) /
+ # ( $rho_ice * $Qs * $pi * $r_pellet**2 );
+ #
+ if ( $line_count > 2
+ && $i_lowest < $i_test
+ && $i_test > $imax - 2
+ && $nesting_depth_to_go[$i_begin] >
+ $nesting_depth_to_go[$i_lowest]
+ && $lowest_strength < $last_break_strength - .5 * WEAK )
+ {
+ # Make this break for math operators for now
+ my $ir = $inext_to_go[$i_lowest];
+ my $il = $iprev_to_go[$ir];
+ last
+ if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
+ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ );
+ }
+
+ # Update the minimum bond strength location
$lowest_strength = $strength;
$i_lowest = $i_test;
$lowest_next_token = $next_nonblank_token;
&& ( $lowest_strength - $last_break_strength <= $max_bias )
)
{
- my $i_last_end = $i_begin - 1;
- if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
- my $tok_beg = $tokens_to_go[$i_begin];
- my $type_beg = $types_to_go[$i_begin];
+ my $i_last_end = $iprev_to_go[$i_begin];
+ my $tok_beg = $tokens_to_go[$i_begin];
+ my $type_beg = $types_to_go[$i_begin];
if (
# check for leading alignment of certain tokens
}
}
- my $too_long =
- ( $i_test >= $imax )
- ? 1
- : (
- (
- $leading_spaces +
- $lengths_to_go[ $i_test + 2 ] -
- $starting_sum
- ) > $rOpts_maximum_line_length
- );
+ my $too_long = ( $i_test >= $imax );
+ if ( !$too_long ) {
+ my $next_length =
+ $leading_spaces +
+ $summed_lengths_to_go[ $i_test + 2 ] -
+ $starting_sum;
+ $too_long = $next_length > $maximum_line_length;
+
+ # To prevent blinkers we will avoid leaving a token exactly at
+ # the line length limit unless it is the last token or one of
+ # several "good" types.
+ #
+ # The following code was a blinker with -pbp before this
+ # modification:
+## $last_nonblank_token eq '('
+## && $is_indirect_object_taker{ $paren_type
+## [$paren_depth] }
+ # The issue causing the problem is that if the
+ # term [$paren_depth] gets broken across a line then
+ # the whitespace routine doesn't see both opening and closing
+ # brackets and will format like '[ $paren_depth ]'. This
+ # leads to an oscillation in length depending if we break
+ # before the closing bracket or not.
+ if ( !$too_long
+ && $i_test + 1 < $imax
+ && $next_nonblank_type !~ /^[,\}\]\)R]$/ )
+ {
+ $too_long = $next_length >= $maximum_line_length;
+ }
+ }
FORMATTER_DEBUG_FLAG_BREAK
- && print
-"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
+ && do {
+ my $ltok = $token;
+ my $rtok = $next_nonblank_token ? $next_nonblank_token : "";
+ my $i_testp2 = $i_test + 2;
+ if ( $i_testp2 > $max_index_to_go + 1 ) {
+ $i_testp2 = $max_index_to_go + 1;
+ }
+ if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
+ if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
+ print STDOUT
+"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] 2long=$too_long str=$strength $ltok $rtok\n";
+ };
# allow one extra terminal token after exceeding line length
# if it would strand this token.
if ( $rOpts_fuzzy_line_length
&& $too_long
- && ( $i_lowest == $i_test )
- && ( length($token) > 1 )
- && ( $next_nonblank_type =~ /^[\;\,]$/ ) )
+ && $i_lowest == $i_test
+ && $token_lengths_to_go[$i_test] > 1
+ && $next_nonblank_type =~ /^[\;\,]$/ )
{
$too_long = 0;
}
if ( $i_lowest < 0 ) { $i_lowest = $imax }
# semi-final index calculation
- my $i_next_nonblank = (
- ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
- ? $i_lowest + 2
- : $i_lowest + 1
- );
+ my $i_next_nonblank = $inext_to_go[$i_lowest];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
# ?/: rule 1 : if a break here will separate a '?' on this
# line from its closing ':', then break at the '?' instead.
#-------------------------------------------------------
- my $i;
- foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
+ foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
next unless ( $tokens_to_go[$i] eq '?' );
# do not break if probable sequence of ?/: statements
#-------------------------------------------------------
# final index calculation
- $i_next_nonblank = (
- ( $types_to_go[ $i_lowest + 1 ] eq 'b' )
- ? $i_lowest + 2
- : $i_lowest + 1
- );
+ $i_next_nonblank = $inext_to_go[$i_lowest];
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
FORMATTER_DEBUG_FLAG_BREAK
- && print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
+ && print STDOUT
+ "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
#-------------------------------------------------------
# ?/: rule 2 : if we break at a '?', then break at its ':'
my $i_question = $mate_index_to_go[$_];
if ( $i_question >= 0 ) {
if ( $want_break_before{'?'} ) {
- $i_question--;
- if ( $i_question > 0
- && $types_to_go[$i_question] eq 'b' )
- {
- $i_question--;
- }
+ $i_question = $iprev_to_go[$i_question];
}
if ( $i_question >= 0 ) {
}
}
}
- return \@i_first, \@i_last;
+ return ( \@i_first, \@i_last, $colon_count );
}
sub insert_additional_breaks {
my $i_f;
my $i_l;
my $line_number = 0;
- my $i_break_left;
- foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
+ foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
- $i_f = $$ri_first[$line_number];
- $i_l = $$ri_last[$line_number];
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
while ( $i_break_left >= $i_l ) {
$line_number++;
# shouldn't happen unless caller passes bad indexes
- if ( $line_number >= @$ri_last ) {
+ if ( $line_number >= @{$ri_last} ) {
warning(
"Non-fatal program bug: couldn't set break at $i_break_left\n"
);
report_definite_bug();
return;
}
- $i_f = $$ri_first[$line_number];
- $i_l = $$ri_last[$line_number];
+ $i_f = $ri_first->[$line_number];
+ $i_l = $ri_last->[$line_number];
}
- my $i_break_right = $i_break_left + 1;
- if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
+ # Do not leave a blank at the end of a line; back up if necessary
+ if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
+ my $i_break_right = $inext_to_go[$i_break_left];
if ( $i_break_left >= $i_f
&& $i_break_left < $i_l
&& $i_break_right > $i_f
&& $i_break_right <= $i_l )
{
- splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
- splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
+ splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
+ splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
}
}
+ return;
}
sub set_closing_breakpoint {
$postponed_breakpoint{$type_sequence} = 1;
}
}
+ return;
}
-# check to see if output line tabbing agrees with input line
-# this can be very useful for debugging a script which has an extra
-# or missing brace
sub compare_indentation_levels {
- my ( $python_indentation_level, $structural_indentation_level ) = @_;
- if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
+ # check to see if output line tabbing agrees with input line
+ # this can be very useful for debugging a script which has an extra
+ # or missing brace
+ my ( $guessed_indentation_level, $structural_indentation_level ) = @_;
+ if ( $guessed_indentation_level ne $structural_indentation_level ) {
$last_tabbing_disagreement = $input_line_number;
if ($in_tabbing_disagreement) {
if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
-"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
+"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
);
}
$in_tabbing_disagreement = $input_line_number;
$in_tabbing_disagreement = 0;
}
}
+ return;
}
#####################################################################
package Perl::Tidy::IndentationItem;
-# Indexes for indentation items
-use constant SPACES => 0; # total leading white spaces
-use constant LEVEL => 1; # the indentation 'level'
-use constant CI_LEVEL => 2; # the 'continuation level'
-use constant AVAILABLE_SPACES => 3; # how many left spaces available
- # for this level
-use constant CLOSED => 4; # index where we saw closing '}'
-use constant COMMA_COUNT => 5; # how many commas at this level?
-use constant SEQUENCE_NUMBER => 6; # output batch number
-use constant INDEX => 7; # index in output batch list
-use constant HAVE_CHILD => 8; # any dependents?
-use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
- # we would like to move to get
- # alignment (negative if left)
-use constant ALIGN_PAREN => 10; # do we want to try to align
- # with an opening structure?
-use constant MARKED => 11; # if visited by corrector logic
-use constant STACK_DEPTH => 12; # indentation nesting depth
-use constant STARTING_INDEX => 13; # first token index of this level
-use constant ARROW_COUNT => 14; # how many =>'s
-
sub new {
# Create an 'indentation_item' which describes one level of leading
- # whitespace when the '-lp' indentation is used. We return
- # a reference to an anonymous array of associated variables.
- # See above constants for storage scheme.
+ # whitespace when the '-lp' indentation is used.
my (
$class, $spaces, $level,
$ci_level, $available_spaces, $index,
$gnu_sequence_number, $align_paren, $stack_depth,
$starting_index,
) = @_;
+
my $closed = -1;
my $arrow_count = 0;
my $comma_count = 0;
my $have_child = 0;
my $want_right_spaces = 0;
my $marked = 0;
- bless [
- $spaces, $level, $ci_level,
- $available_spaces, $closed, $comma_count,
- $gnu_sequence_number, $index, $have_child,
- $want_right_spaces, $align_paren, $marked,
- $stack_depth, $starting_index, $arrow_count,
- ], $class;
+
+ # DEFINITIONS:
+ # spaces => # total leading white spaces
+ # level => # the indentation 'level'
+ # ci_level => # the 'continuation level'
+ # available_spaces => # how many left spaces available
+ # # for this level
+ # closed => # index where we saw closing '}'
+ # comma_count => # how many commas at this level?
+ # sequence_number => # output batch number
+ # index => # index in output batch list
+ # have_child => # any dependents?
+ # recoverable_spaces => # how many spaces to the right
+ # # we would like to move to get
+ # # alignment (negative if left)
+ # align_paren => # do we want to try to align
+ # # with an opening structure?
+ # marked => # if visited by corrector logic
+ # stack_depth => # indentation nesting depth
+ # starting_index => # first token index of this level
+ # arrow_count => # how many =>'s
+
+ return bless {
+ _spaces => $spaces,
+ _level => $level,
+ _ci_level => $ci_level,
+ _available_spaces => $available_spaces,
+ _closed => $closed,
+ _comma_count => $comma_count,
+ _sequence_number => $gnu_sequence_number,
+ _index => $index,
+ _have_child => $have_child,
+ _recoverable_spaces => $want_right_spaces,
+ _align_paren => $align_paren,
+ _marked => $marked,
+ _stack_depth => $stack_depth,
+ _starting_index => $starting_index,
+ _arrow_count => $arrow_count,
+ }, $class;
}
-sub permanently_decrease_AVAILABLE_SPACES {
+sub permanently_decrease_available_spaces {
# make a permanent reduction in the available indentation spaces
# at one indentation item. NOTE: if there are child nodes, their
# total SPACES must be reduced by the caller.
my ( $item, $spaces_needed ) = @_;
- my $available_spaces = $item->get_AVAILABLE_SPACES();
+ my $available_spaces = $item->get_available_spaces();
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
- $item->decrease_AVAILABLE_SPACES($deleted_spaces);
+ $item->decrease_available_spaces($deleted_spaces);
$item->decrease_SPACES($deleted_spaces);
- $item->set_RECOVERABLE_SPACES(0);
+ $item->set_recoverable_spaces(0);
return $deleted_spaces;
}
-sub tentatively_decrease_AVAILABLE_SPACES {
+sub tentatively_decrease_available_spaces {
# We are asked to tentatively delete $spaces_needed of indentation
# for a indentation item. We may want to undo this later. NOTE: if
# there are child nodes, their total SPACES must be reduced by the
# caller.
my ( $item, $spaces_needed ) = @_;
- my $available_spaces = $item->get_AVAILABLE_SPACES();
+ my $available_spaces = $item->get_available_spaces();
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
- $item->decrease_AVAILABLE_SPACES($deleted_spaces);
+ $item->decrease_available_spaces($deleted_spaces);
$item->decrease_SPACES($deleted_spaces);
- $item->increase_RECOVERABLE_SPACES($deleted_spaces);
+ $item->increase_recoverable_spaces($deleted_spaces);
return $deleted_spaces;
}
-sub get_STACK_DEPTH {
+sub get_stack_depth {
my $self = shift;
- return $self->[STACK_DEPTH];
+ return $self->{_stack_depth};
}
-sub get_SPACES {
+sub get_spaces {
my $self = shift;
- return $self->[SPACES];
+ return $self->{_spaces};
}
-sub get_MARKED {
+sub get_marked {
my $self = shift;
- return $self->[MARKED];
+ return $self->{_marked};
}
-sub set_MARKED {
+sub set_marked {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[MARKED] = $value;
+ $self->{_marked} = $value;
}
- return $self->[MARKED];
+ return $self->{_marked};
}
-sub get_AVAILABLE_SPACES {
+sub get_available_spaces {
my $self = shift;
- return $self->[AVAILABLE_SPACES];
+ return $self->{_available_spaces};
}
sub decrease_SPACES {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[SPACES] -= $value;
+ $self->{_spaces} -= $value;
}
- return $self->[SPACES];
+ return $self->{_spaces};
}
-sub decrease_AVAILABLE_SPACES {
+sub decrease_available_spaces {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[AVAILABLE_SPACES] -= $value;
+ $self->{_available_spaces} -= $value;
}
- return $self->[AVAILABLE_SPACES];
+ return $self->{_available_spaces};
}
-sub get_ALIGN_PAREN {
+sub get_align_paren {
my $self = shift;
- return $self->[ALIGN_PAREN];
+ return $self->{_align_paren};
}
-sub get_RECOVERABLE_SPACES {
+sub get_recoverable_spaces {
my $self = shift;
- return $self->[RECOVERABLE_SPACES];
+ return $self->{_recoverable_spaces};
}
-sub set_RECOVERABLE_SPACES {
+sub set_recoverable_spaces {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[RECOVERABLE_SPACES] = $value;
+ $self->{_recoverable_spaces} = $value;
}
- return $self->[RECOVERABLE_SPACES];
+ return $self->{_recoverable_spaces};
}
-sub increase_RECOVERABLE_SPACES {
+sub increase_recoverable_spaces {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[RECOVERABLE_SPACES] += $value;
+ $self->{_recoverable_spaces} += $value;
}
- return $self->[RECOVERABLE_SPACES];
+ return $self->{_recoverable_spaces};
}
-sub get_CI_LEVEL {
+sub get_ci_level {
my $self = shift;
- return $self->[CI_LEVEL];
+ return $self->{_ci_level};
}
-sub get_LEVEL {
+sub get_level {
my $self = shift;
- return $self->[LEVEL];
+ return $self->{_level};
}
-sub get_SEQUENCE_NUMBER {
+sub get_sequence_number {
my $self = shift;
- return $self->[SEQUENCE_NUMBER];
+ return $self->{_sequence_number};
}
-sub get_INDEX {
+sub get_index {
my $self = shift;
- return $self->[INDEX];
+ return $self->{_index};
}
-sub get_STARTING_INDEX {
+sub get_starting_index {
my $self = shift;
- return $self->[STARTING_INDEX];
+ return $self->{_starting_index};
}
-sub set_HAVE_CHILD {
+sub set_have_child {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[HAVE_CHILD] = $value;
+ $self->{_have_child} = $value;
}
- return $self->[HAVE_CHILD];
+ return $self->{_have_child};
}
-sub get_HAVE_CHILD {
+sub get_have_child {
my $self = shift;
- return $self->[HAVE_CHILD];
+ return $self->{_have_child};
}
-sub set_ARROW_COUNT {
+sub set_arrow_count {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[ARROW_COUNT] = $value;
+ $self->{_arrow_count} = $value;
}
- return $self->[ARROW_COUNT];
+ return $self->{_arrow_count};
}
-sub get_ARROW_COUNT {
+sub get_arrow_count {
my $self = shift;
- return $self->[ARROW_COUNT];
+ return $self->{_arrow_count};
}
-sub set_COMMA_COUNT {
+sub set_comma_count {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[COMMA_COUNT] = $value;
+ $self->{_comma_count} = $value;
}
- return $self->[COMMA_COUNT];
+ return $self->{_comma_count};
}
-sub get_COMMA_COUNT {
+sub get_comma_count {
my $self = shift;
- return $self->[COMMA_COUNT];
+ return $self->{_comma_count};
}
-sub set_CLOSED {
+sub set_closed {
my ( $self, $value ) = @_;
if ( defined($value) ) {
- $self->[CLOSED] = $value;
+ $self->{_closed} = $value;
}
- return $self->[CLOSED];
+ return $self->{_closed};
}
-sub get_CLOSED {
+sub get_closed {
my $self = shift;
- return $self->[CLOSED];
+ return $self->{_closed};
}
#####################################################################
{
use strict;
- use Carp;
-
- use constant JMAX => 0;
- use constant JMAX_ORIGINAL_LINE => 1;
- use constant RTOKENS => 2;
- use constant RFIELDS => 3;
- use constant RPATTERNS => 4;
- use constant INDENTATION => 5;
- use constant LEADING_SPACE_COUNT => 6;
- use constant OUTDENT_LONG_LINES => 7;
- use constant LIST_TYPE => 8;
- use constant IS_HANGING_SIDE_COMMENT => 9;
- use constant RALIGNMENTS => 10;
- use constant MAXIMUM_LINE_LENGTH => 11;
- use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
-
- my %_index_map;
- $_index_map{jmax} = JMAX;
- $_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
- $_index_map{rtokens} = RTOKENS;
- $_index_map{rfields} = RFIELDS;
- $_index_map{rpatterns} = RPATTERNS;
- $_index_map{indentation} = INDENTATION;
- $_index_map{leading_space_count} = LEADING_SPACE_COUNT;
- $_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
- $_index_map{list_type} = LIST_TYPE;
- $_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
- $_index_map{ralignments} = RALIGNMENTS;
- $_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
- $_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
-
- my @_default_data = ();
- $_default_data[JMAX] = undef;
- $_default_data[JMAX_ORIGINAL_LINE] = undef;
- $_default_data[RTOKENS] = undef;
- $_default_data[RFIELDS] = undef;
- $_default_data[RPATTERNS] = undef;
- $_default_data[INDENTATION] = undef;
- $_default_data[LEADING_SPACE_COUNT] = undef;
- $_default_data[OUTDENT_LONG_LINES] = undef;
- $_default_data[LIST_TYPE] = undef;
- $_default_data[IS_HANGING_SIDE_COMMENT] = undef;
- $_default_data[RALIGNMENTS] = [];
- $_default_data[MAXIMUM_LINE_LENGTH] = undef;
- $_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
-
+ ##use Carp;
+
+ my %default_data = (
+ jmax => undef,
+ jmax_original_line => undef,
+ rtokens => undef,
+ rfields => undef,
+ rpatterns => undef,
+ indentation => undef,
+ leading_space_count => undef,
+ outdent_long_lines => undef,
+ list_type => undef,
+ is_hanging_side_comment => undef,
+ ralignments => [],
+ maximum_line_length => undef,
+ rvertical_tightness_flags => undef
+ );
{
# methods to count object population
my $_count = 0;
- sub get_count { $_count; }
- sub _increment_count { ++$_count }
- sub _decrement_count { --$_count }
+ sub get_count { return $_count; }
+ sub _increment_count { return ++$_count }
+ sub _decrement_count { return --$_count }
}
# Constructor may be called as a class method
my ( $caller, %arg ) = @_;
my $caller_is_obj = ref($caller);
my $class = $caller_is_obj || $caller;
- no strict "refs";
- my $self = bless [], $class;
+ ##no strict "refs";
+ my $self = bless {}, $class;
- $self->[RALIGNMENTS] = [];
+ $self->{_ralignments} = [];
- my $index;
- foreach ( keys %_index_map ) {
- $index = $_index_map{$_};
- if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
- elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
- else { $self->[$index] = $_default_data[$index] }
+ foreach my $key ( keys %default_data ) {
+ my $_key = '_' . $key;
+
+ # Caller keys do not have an underscore
+ if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
+ elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} }
+ else { $self->{$_key} = $default_data{$_key} }
}
$self->_increment_count();
return $self;
}
- sub DESTROY {
- $_[0]->_decrement_count();
- }
-
- sub get_jmax { $_[0]->[JMAX] }
- sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
- sub get_rtokens { $_[0]->[RTOKENS] }
- sub get_rfields { $_[0]->[RFIELDS] }
- sub get_rpatterns { $_[0]->[RPATTERNS] }
- sub get_indentation { $_[0]->[INDENTATION] }
- sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
- sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
- sub get_list_type { $_[0]->[LIST_TYPE] }
- sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
- sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
-
- sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
- sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
- sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
- sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
+ sub DESTROY {
+ my $self = shift;
+ $self->_decrement_count();
+ return;
+ }
+
+ sub get_jmax { my $self = shift; return $self->{_jmax} }
+
+ sub get_jmax_original_line {
+ my $self = shift;
+ return $self->{_jmax_original_line};
+ }
+ sub get_rtokens { my $self = shift; return $self->{_rtokens} }
+ sub get_rfields { my $self = shift; return $self->{_rfields} }
+ sub get_rpatterns { my $self = shift; return $self->{_rpatterns} }
+ sub get_indentation { my $self = shift; return $self->{_indentation} }
+
+ sub get_leading_space_count {
+ my $self = shift;
+ return $self->{_leading_space_count};
+ }
+
+ sub get_outdent_long_lines {
+ my $self = shift;
+ return $self->{_outdent_long_lines};
+ }
+ sub get_list_type { my $self = shift; return $self->{_list_type} }
+
+ sub get_is_hanging_side_comment {
+ my $self = shift;
+ return $self->{_is_hanging_side_comment};
+ }
+
+ sub get_rvertical_tightness_flags {
+ my $self = shift;
+ return $self->{_rvertical_tightness_flags};
+ }
+
+ sub set_column {
+ ## FIXME: does caller ever supply $val??
+ my ( $self, $j, $val ) = @_;
+ return $self->{_ralignments}->[$j]->set_column($val);
+ }
+
+ sub get_alignment {
+ my ( $self, $j ) = @_;
+ return $self->{_ralignments}->[$j];
+ }
+ sub get_alignments { my $self = shift; return @{ $self->{_ralignments} } }
+
+ sub get_column {
+ my ( $self, $j ) = @_;
+ return $self->{_ralignments}->[$j]->get_column();
+ }
sub get_starting_column {
- $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
+ my ( $self, $j ) = @_;
+ return $self->{_ralignments}->[$j]->get_starting_column();
}
sub increment_column {
- $_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
+ my ( $self, $k, $pad ) = @_;
+ $self->{_ralignments}->[$k]->increment_column($pad);
+ return;
}
- sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
- sub current_field_width {
+ sub set_alignments {
my $self = shift;
- my ($j) = @_;
+ @{ $self->{_ralignments} } = @_;
+ return;
+ }
+
+ sub current_field_width {
+ my ( $self, $j ) = @_;
if ( $j == 0 ) {
return $self->get_column($j);
}
}
sub field_width_growth {
- my $self = shift;
- my $j = shift;
+ my ( $self, $j ) = @_;
return $self->get_column($j) - $self->get_starting_column($j);
}
sub starting_field_width {
- my $self = shift;
- my $j = shift;
+ my ( $self, $j ) = @_;
if ( $j == 0 ) {
return $self->get_starting_column($j);
}
sub increase_field_width {
- my $self = shift;
- my ( $j, $pad ) = @_;
+ my ( $self, $j, $pad ) = @_;
my $jmax = $self->get_jmax();
for my $k ( $j .. $jmax ) {
$self->increment_column( $k, $pad );
}
+ return;
}
sub get_available_space_on_right {
my $self = shift;
my $jmax = $self->get_jmax();
- return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
+ return $self->{_maximum_line_length} - $self->get_column($jmax);
+ }
+
+ sub set_jmax { my ( $self, $val ) = @_; $self->{_jmax} = $val; return }
+
+ sub set_jmax_original_line {
+ my ( $self, $val ) = @_;
+ $self->{_jmax_original_line} = $val;
+ return;
+ }
+
+ sub set_rtokens {
+ my ( $self, $val ) = @_;
+ $self->{_rtokens} = $val;
+ return;
+ }
+
+ sub set_rfields {
+ my ( $self, $val ) = @_;
+ $self->{_rfields} = $val;
+ return;
+ }
+
+ sub set_rpatterns {
+ my ( $self, $val ) = @_;
+ $self->{_rpatterns} = $val;
+ return;
+ }
+
+ sub set_indentation {
+ my ( $self, $val ) = @_;
+ $self->{_indentation} = $val;
+ return;
+ }
+
+ sub set_leading_space_count {
+ my ( $self, $val ) = @_;
+ $self->{_leading_space_count} = $val;
+ return;
}
- sub set_jmax { $_[0]->[JMAX] = $_[1] }
- sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
- sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
- sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
- sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
- sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
- sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
- sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
- sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
- sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
- sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
+ sub set_outdent_long_lines {
+ my ( $self, $val ) = @_;
+ $self->{_outdent_long_lines} = $val;
+ return;
+ }
+
+ sub set_list_type {
+ my ( $self, $val ) = @_;
+ $self->{_list_type} = $val;
+ return;
+ }
+
+ sub set_is_hanging_side_comment {
+ my ( $self, $val ) = @_;
+ $self->{_is_hanging_side_comment} = $val;
+ return;
+ }
+
+ sub set_alignment {
+ my ( $self, $j, $val ) = @_;
+ $self->{_ralignments}->[$j] = $val;
+ return;
+ }
}
#use Carp;
- # Symbolic array indexes
- use constant COLUMN => 0; # the current column number
- use constant STARTING_COLUMN => 1; # column number when created
- use constant MATCHING_TOKEN => 2; # what token we are matching
- use constant STARTING_LINE => 3; # the line index of creation
- use constant ENDING_LINE => 4; # the most recent line to use it
- use constant SAVED_COLUMN => 5; # the most recent line to use it
- use constant SERIAL_NUMBER => 6; # unique number for this alignment
- # (just its index in an array)
-
- # Correspondence between variables and array indexes
- my %_index_map;
- $_index_map{column} = COLUMN;
- $_index_map{starting_column} = STARTING_COLUMN;
- $_index_map{matching_token} = MATCHING_TOKEN;
- $_index_map{starting_line} = STARTING_LINE;
- $_index_map{ending_line} = ENDING_LINE;
- $_index_map{saved_column} = SAVED_COLUMN;
- $_index_map{serial_number} = SERIAL_NUMBER;
-
- my @_default_data = ();
- $_default_data[COLUMN] = undef;
- $_default_data[STARTING_COLUMN] = undef;
- $_default_data[MATCHING_TOKEN] = undef;
- $_default_data[STARTING_LINE] = undef;
- $_default_data[ENDING_LINE] = undef;
- $_default_data[SAVED_COLUMN] = undef;
- $_default_data[SERIAL_NUMBER] = undef;
+ # _column # the current column number
+ # _starting_column # column number when created
+ # _matching_token # what token we are matching
+ # _starting_line # the line index of creation
+ # _ending_line
+ # the most recent line to use it
+ # _saved_column
+ # _serial_number # unique number for this alignment
+
+ my %default_data = (
+ column => undef,
+ starting_column => undef,
+ matching_token => undef,
+ starting_line => undef,
+ ending_line => undef,
+ saved_column => undef,
+ serial_number => undef,
+ );
# class population count
{
my $_count = 0;
- sub get_count { $_count; }
- sub _increment_count { ++$_count }
- sub _decrement_count { --$_count }
+ sub get_count { return $_count }
+ sub _increment_count { return ++$_count }
+ sub _decrement_count { return --$_count }
}
# constructor
my ( $caller, %arg ) = @_;
my $caller_is_obj = ref($caller);
my $class = $caller_is_obj || $caller;
- no strict "refs";
- my $self = bless [], $class;
+ ##no strict "refs";
+ my $self = bless {}, $class;
- foreach ( keys %_index_map ) {
- my $index = $_index_map{$_};
- if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
- elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
- else { $self->[$index] = $_default_data[$index] }
+ foreach my $key ( keys %default_data ) {
+ my $_key = '_' . $key;
+ if ( exists $arg{$key} ) { $self->{$_key} = $arg{$key} }
+ elsif ($caller_is_obj) { $self->{$_key} = $caller->{$_key} }
+ else { $self->{$_key} = $default_data{$_key} }
}
$self->_increment_count();
return $self;
}
sub DESTROY {
- $_[0]->_decrement_count();
+ my $self = shift;
+ $self->_decrement_count();
+ return;
+ }
+
+ sub get_column { my $self = shift; return $self->{_column} }
+
+ sub get_starting_column {
+ my $self = shift;
+ return $self->{_starting_column};
+ }
+ sub get_matching_token { my $self = shift; return $self->{_matching_token} }
+ sub get_starting_line { my $self = shift; return $self->{_starting_line} }
+ sub get_ending_line { my $self = shift; return $self->{_ending_line} }
+ sub get_serial_number { my $self = shift; return $self->{_serial_number} }
+
+ sub set_column { my ( $self, $val ) = @_; $self->{_column} = $val; return }
+
+ sub set_starting_column {
+ my ( $self, $val ) = @_;
+ $self->{_starting_column} = $val;
+ return;
+ }
+
+ sub set_matching_token {
+ my ( $self, $val ) = @_;
+ $self->{_matching_token} = $val;
+ return;
+ }
+
+ sub set_starting_line {
+ my ( $self, $val ) = @_;
+ $self->{_starting_line} = $val;
+ return;
}
- sub get_column { return $_[0]->[COLUMN] }
- sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
- sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
- sub get_starting_line { return $_[0]->[STARTING_LINE] }
- sub get_ending_line { return $_[0]->[ENDING_LINE] }
- sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
+ sub set_ending_line {
+ my ( $self, $val ) = @_;
+ $self->{_ending_line} = $val;
+ return;
+ }
- sub set_column { $_[0]->[COLUMN] = $_[1] }
- sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
- sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
- sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
- sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
- sub increment_column { $_[0]->[COLUMN] += $_[1] }
+ sub increment_column {
+ my ( $self, $val ) = @_;
+ $self->{_column} += $val;
+ return;
+ }
- sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
- sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
+ sub save_column {
+ my $self = shift;
+ $self->{_saved_column} = $self->{_column};
+ return;
+ }
+ sub restore_column {
+ my $self = shift;
+ $self->{_column} = $self->{_saved_column};
+ return;
+ }
}
package Perl::Tidy::VerticalAligner;
# attempts to line up certain common tokens, such as => and #, which are
# identified by the calling routine.
#
-# There are two main routines: append_line and flush. Append acts as a
+# There are two main routines: valign_input and flush. Append acts as a
# storage buffer, collecting lines into a group which can be vertically
# aligned. When alignment is no longer possible or desirable, it dumps
# the group to flush.
#
-# append_line -----> flush
+# valign_input -----> flush
#
# collects writes
# vertical one
use constant VALIGN_DEBUG_FLAG_APPEND => 0;
use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
+ use constant VALIGN_DEBUG_FLAG_TABS => 0;
my $debug_warning = sub {
- print "VALIGN_DEBUGGING with key $_[0]\n";
+ print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
+ return;
};
VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
+ VALIGN_DEBUG_FLAG_TERNARY && $debug_warning->('TERNARY');
+ VALIGN_DEBUG_FLAG_TABS && $debug_warning->('TABS');
}
$group_type
$group_maximum_gap
$marginal_match
- $last_group_level_written
+ $last_level_written
$last_leading_space_count
$extra_indent_ok
$zero_count
@side_comment_history
$comment_leading_space_count
$is_matching_terminal_line
+ $consecutive_block_comments
$cached_line_text
$cached_line_type
$cached_line_leading_space_count
$cached_seqno_string
+ $valign_buffer_filling
+ @valign_buffer
+
$seqno_string
$last_nonblank_seqno_string
$rOpts
$rOpts_maximum_line_length
+ $rOpts_variable_maximum_line_length
$rOpts_continuation_indentation
$rOpts_indent_columns
$rOpts_tabs
$rOpts_entab_leading_whitespace
$rOpts_valign
+ $rOpts_fixed_position_side_comment
$rOpts_minimum_space_to_comment
);
sub initialize {
- my $class;
-
- ( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
- = @_;
+ (
+ my $class, $rOpts, $file_writer_object, $logger_object,
+ $diagnostics_object
+ ) = @_;
# variables describing the entire space group:
$ralignment_list = [];
$group_level = 0;
- $last_group_level_written = -1;
+ $last_level_written = -1;
$extra_indent_ok = 0; # can we move all lines to the right?
$last_side_comment_length = 0;
$maximum_jmax_seen = 0;
$side_comment_history[1] = [ -200, 0 ];
$side_comment_history[2] = [ -100, 0 ];
- # write_leader_and_string cache:
+ # valign_output_step_B cache:
$cached_line_text = "";
$cached_line_type = 0;
$cached_line_flag = 0;
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_tabs = $rOpts->{'tabs'};
$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
+ $rOpts_fixed_position_side_comment =
+ $rOpts->{'fixed-position-side-comment'};
$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
- $rOpts_valign = $rOpts->{'valign'};
+ $rOpts_variable_maximum_line_length =
+ $rOpts->{'variable-maximum-line-length'};
+ $rOpts_valign = $rOpts->{'valign'};
+ $consecutive_block_comments = 0;
forget_side_comment();
initialize_for_new_group();
$marginal_match = 0;
$comment_leading_space_count = 0;
$last_leading_space_count = 0;
+ return;
}
# interface to Perl::Tidy::Diagnostics routines
sub write_diagnostics {
+ my $msg = shift;
if ($diagnostics_object) {
- $diagnostics_object->write_diagnostics(@_);
+ $diagnostics_object->write_diagnostics($msg);
}
+ return;
}
# interface to Perl::Tidy::Logger routines
sub warning {
+ my ($msg) = @_;
if ($logger_object) {
- $logger_object->warning(@_);
+ $logger_object->warning($msg);
}
+ return;
}
sub write_logfile_entry {
+ my ($msg) = @_;
if ($logger_object) {
- $logger_object->write_logfile_entry(@_);
+ $logger_object->write_logfile_entry($msg);
}
+ return;
}
sub report_definite_bug {
if ($logger_object) {
$logger_object->report_definite_bug();
}
+ return;
}
-sub get_SPACES {
+sub get_spaces {
# return the number of leading spaces associated with an indentation
# variable $indentation is either a constant number of spaces or an
- # object with a get_SPACES method.
+ # object with a get_spaces method.
my $indentation = shift;
- return ref($indentation) ? $indentation->get_SPACES() : $indentation;
+ return ref($indentation) ? $indentation->get_spaces() : $indentation;
}
-sub get_RECOVERABLE_SPACES {
+sub get_recoverable_spaces {
# return the number of spaces (+ means shift right, - means shift left)
# that we would like to shift a group of lines with the same indentation
# to get them to line up with their opening parens
my $indentation = shift;
- return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
+ return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
}
-sub get_STACK_DEPTH {
+sub get_stack_depth {
my $indentation = shift;
- return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
+ return ref($indentation) ? $indentation->get_stack_depth() : 0;
}
sub make_alignment {
}
sub dump_alignments {
- print
+ print STDOUT
"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
for my $i ( 0 .. $maximum_alignment_index ) {
my $column = $ralignment_list->[$i]->get_column();
my $matching_token = $ralignment_list->[$i]->get_matching_token();
my $starting_line = $ralignment_list->[$i]->get_starting_line();
my $ending_line = $ralignment_list->[$i]->get_ending_line();
- print
+ print STDOUT
"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
}
+ return;
}
sub save_alignment_columns {
for my $i ( 0 .. $maximum_alignment_index ) {
$ralignment_list->[$i]->save_column();
}
+ return;
}
sub restore_alignment_columns {
for my $i ( 0 .. $maximum_alignment_index ) {
$ralignment_list->[$i]->restore_column();
}
+ return;
}
sub forget_side_comment {
$last_comment_column = 0;
+ return;
+}
+
+sub maximum_line_length_for_level {
+
+ # return maximum line length for line starting with a given level
+ my $maximum_line_length = $rOpts_maximum_line_length;
+ if ($rOpts_variable_maximum_line_length) {
+ my $level = shift;
+ if ( $level < 0 ) { $level = 0 }
+ $maximum_line_length += $level * $rOpts_indent_columns;
+ }
+ return $maximum_line_length;
}
-sub append_line {
+sub valign_input {
- # sub append is called to place one line in the current vertical group.
+ # Place one line in the current vertical group.
#
# The input parameters are:
# $level = indentation level of this line
# first one is always at zero. The interior columns are at the start of
# the matching tokens, and the last one tracks the maximum line length.
#
- # Basically, each time a new line comes in, it joins the current vertical
+ # Each time a new line comes in, it joins the current vertical
# group if possible. Otherwise it causes the current group to be dumped
# and a new group is started.
#
# number of tokens between fields is $jmax-1
my $jmax = $#{$rfields};
- my $leading_space_count = get_SPACES($indentation);
+ my $leading_space_count = get_spaces($indentation);
# set outdented flag to be sure we either align within statements or
# across statement boundaries, but not both.
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
$is_outdented = 0 if $is_hanging_side_comment;
+ # Forget side comment alignment after seeing 2 or more block comments
+ my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
+ if ($is_block_comment) {
+ $consecutive_block_comments++;
+ }
+ else {
+ if ( $consecutive_block_comments > 1 ) { forget_side_comment() }
+ $consecutive_block_comments = 0;
+ }
+
VALIGN_DEBUG_FLAG_APPEND0 && do {
- print
+ print STDOUT
"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
};
&& $rvertical_tightness_flags->[2] == $cached_seqno )
{
$rvertical_tightness_flags->[3] ||= 1;
- $cached_line_valid ||= 1;
+ $cached_line_valid ||= 1;
}
}
# we are allowed to shift a group of lines to the right if its
# level is greater than the previous and next group
$extra_indent_ok =
- ( $level < $group_level && $last_group_level_written < $group_level );
+ ( $level < $group_level && $last_level_written < $group_level );
my_flush();
# wait until after the above flush to get the leading space
# count because it may have been changed if the -icp flag is in
# effect
- $leading_space_count = get_SPACES($indentation);
+ $leading_space_count = get_spaces($indentation);
}
# Patch to collect outdentable block COMMENTS
# --------------------------------------------------------------------
my $is_blank_line = "";
- my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
if ( $group_type eq 'COMMENT' ) {
if (
(
$zero_count++;
if ( $maximum_line_index >= 0
- && !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
+ && !get_recoverable_spaces( $group_lines[0]->get_indentation() ) )
{
# flush the current group if it has some aligned columns..
( $zero_count > 3 )
# ..or if this new line doesn't fit to the left of the comments
- || ( ( $leading_space_count + length( $$rfields[0] ) ) >
+ || ( ( $leading_space_count + length( $rfields->[0] ) ) >
$group_lines[0]->get_column(0) )
)
{
# just write this line directly if no current group, no side comment,
# and no space recovery is needed.
- if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
+ if ( $maximum_line_index < 0 && !get_recoverable_spaces($indentation) )
{
- write_leader_and_string( $leading_space_count, $$rfields[0], 0,
- $outdent_long_lines, $rvertical_tightness_flags );
+ valign_output_step_B( $leading_space_count, $rfields->[0], 0,
+ $outdent_long_lines, $rvertical_tightness_flags, $level );
return;
}
}
outdent_long_lines => $outdent_long_lines,
list_type => "",
is_hanging_side_comment => $is_hanging_side_comment,
- maximum_line_length => $rOpts->{'maximum-line-length'},
+ maximum_line_length => maximum_line_length_for_level($level),
rvertical_tightness_flags => $rvertical_tightness_flags,
);
# --------------------------------------------------------------------
# Append this line to the current group (or start new group)
# --------------------------------------------------------------------
- accept_line($new_line);
+ add_to_group($new_line);
# Future update to allow this to vary:
$current_line = $new_line if ( $maximum_line_index == 0 );
# Step 8. Some old debugging stuff
# --------------------------------------------------------------------
VALIGN_DEBUG_FLAG_APPEND && do {
- print "APPEND fields:";
- dump_array(@$rfields);
- print "APPEND tokens:";
- dump_array(@$rtokens);
- print "APPEND patterns:";
- dump_array(@$rpatterns);
+ print STDOUT "APPEND fields:";
+ dump_array( @{$rfields} );
+ print STDOUT "APPEND tokens:";
+ dump_array( @{$rtokens} );
+ print STDOUT "APPEND patterns:";
+ dump_array( @{$rpatterns} );
dump_alignments();
};
my $jmax = $line->get_jmax();
return 0 unless $jmax == 1; # must be 2 fields
my $rtokens = $line->get_rtokens();
- return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
+ return 0 unless $rtokens->[0] eq '#'; # the second field is a comment..
my $rfields = $line->get_rfields();
- return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
+ return 0 unless $rfields->[0] =~ /^\s*$/; # the first field is empty...
my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
return 0
$line->set_is_hanging_side_comment(1);
$jmax = $maximum_field_index;
$line->set_jmax($jmax);
- $$rfields[$jmax] = $$rfields[1];
- $$rtokens[ $jmax - 1 ] = $$rtokens[0];
- $$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
- for ( my $j = 1 ; $j < $jmax ; $j++ ) {
- $$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
- $$rtokens[ $j - 1 ] = "";
- $$rpatterns[ $j - 1 ] = "";
+ $rfields->[$jmax] = $rfields->[1];
+ $rtokens->[ $jmax - 1 ] = $rtokens->[0];
+ $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
+ foreach my $j ( 1 .. $jmax - 1 ) {
+ $rfields->[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
+ $rtokens->[ $j - 1 ] = "";
+ $rpatterns->[ $j - 1 ] = "";
}
return 1;
}
my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
- # this line must have fewer fields
- return unless $maximum_field_index > $jmax;
+ ###############################################
+ # Moved below to allow new coding for => matches
+ # return unless $maximum_field_index > $jmax;
+ ###############################################
# Identify specific cases where field elimination is allowed:
# case=1: both lines have comma-separated lists, and the first
my $case = 1;
# See if case 2: both lines have leading '='
- # We'll require smiliar leading patterns in this case
+ # We'll require similar leading patterns in this case
my $old_rtokens = $old_line->get_rtokens();
my $rtokens = $new_line->get_rtokens();
my $rpatterns = $new_line->get_rpatterns();
my $old_rpatterns = $old_line->get_rpatterns();
- if ( $rtokens->[0] =~ /^=\d*$/
- && $old_rtokens->[0] eq $rtokens->[0]
+ if ( $rtokens->[0] =~ /^=>?\d*$/
+ && $old_rtokens->[0] eq $rtokens->[0]
&& $old_rpatterns->[0] eq $rpatterns->[0] )
{
$case = 2;
my $old_rfields = $old_line->get_rfields();
return
if ( $case == 1
- && length( $$old_rfields[$maximum_field_index] ) == 0 );
+ && length( $old_rfields->[$maximum_field_index] ) == 0 );
my $rfields = $new_line->get_rfields();
my @new_matching_patterns = ();
my @new_matching_tokens = ();
- my $j = 0;
- my $k;
+ my $j = 0;
my $current_field = '';
my $current_pattern = '';
# loop over all old tokens
my $in_match = 0;
- for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
- $current_field .= $$old_rfields[$k];
- $current_pattern .= $$old_rpatterns[$k];
+ foreach my $k ( 0 .. $maximum_field_index - 1 ) {
+ $current_field .= $old_rfields->[$k];
+ $current_pattern .= $old_rpatterns->[$k];
last if ( $j > $jmax - 1 );
- if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
+ if ( $old_rtokens->[$k] eq $rtokens->[$j] ) {
$in_match = 1;
$new_fields[$j] = $current_field;
$new_matching_patterns[$j] = $current_pattern;
$current_field = '';
$current_pattern = '';
- $new_matching_tokens[$j] = $$old_rtokens[$k];
+ $new_matching_tokens[$j] = $old_rtokens->[$k];
$new_alignments[$j] = $old_line->get_alignment($k);
$j++;
}
else {
- if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
+ if ( $old_rtokens->[$k] =~ /^\=\d*$/ ) {
last if ( $case == 2 ); # avoid problems with stuff
# like: $a=$b=$c=$d;
$hid_equals = 1;
}
# Modify the current state if we are successful.
- # We must exactly reach the ends of both lists for success.
+ # We must exactly reach the ends of the new list for success, and the old
+ # pattern must have more fields. Here is an example where the first and
+ # second lines have the same number, and we should not align:
+ # my @a = map chr, 0 .. 255;
+ # my @b = grep /\W/, @a;
+ # my @c = grep /[^\w]/, @a;
+
+ # Otherwise, we would get all of the commas aligned, which doesn't work as
+ # well:
+ # my @a = map chr, 0 .. 255;
+ # my @b = grep /\W/, @a;
+ # my @c = grep /[^\w]/, @a;
+
if ( ( $j == $jmax )
&& ( $current_field eq '' )
- && ( $case != 1 || $hid_equals ) )
+ && ( $case != 1 || $hid_equals )
+ && ( $maximum_field_index > $jmax ) )
{
- $k = $maximum_field_index;
- $current_field .= $$old_rfields[$k];
- $current_pattern .= $$old_rpatterns[$k];
+ my $k = $maximum_field_index;
+ $current_field .= $old_rfields->[$k];
+ $current_pattern .= $old_rpatterns->[$k];
$new_fields[$j] = $current_field;
$new_matching_patterns[$j] = $current_pattern;
$old_line->set_jmax($jmax);
$old_line->set_rtokens( \@new_matching_tokens );
$old_line->set_rfields( \@new_fields );
- $old_line->set_rpatterns( \@$rpatterns );
+ $old_line->set_rpatterns( \@{$rpatterns} );
+ }
+
+ # Dumb Down starting match if necessary:
+ #
+ # Consider the following two lines:
+ #
+ # {
+ # $a => 20 > 3 ? 1 : 0,
+ # $xyz => 5,
+ # }
+
+# We would like to get alignment regardless of the order of the two lines.
+# If the lines come in in this order, then we will simplify the patterns of the first line
+# in sub eliminate_new_fields.
+# If the lines come in reverse order, then we achieve this with eliminate_new_fields.
+
+ # This update is currently restricted to leading '=>' matches. Although we
+ # could do this for both '=' and '=>', overall the results for '=' come out
+ # better without this step because this step can eliminate some other good
+ # matches. For example, with the '=' we get:
+
+# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
+# my @dsf = map "$_\x{FFFE}Fred", @disilva;
+# my @dsj = map "$_\x{FFFE}John", @disilva;
+# my @dsJ = map "$_ John", @disilva;
+
+ # without including '=' we get:
+
+# my @disilva = ( "di Silva", "diSilva", "di Si\x{301}lva", "diSi\x{301}lva" );
+# my @dsf = map "$_\x{FFFE}Fred", @disilva;
+# my @dsj = map "$_\x{FFFE}John", @disilva;
+# my @dsJ = map "$_ John", @disilva;
+ elsif (
+ $case == 2
+
+ && @new_matching_tokens == 1
+ ##&& $new_matching_tokens[0] =~ /^=/ # see note above
+ && $new_matching_tokens[0] =~ /^=>/
+ && $maximum_field_index > 2
+ )
+ {
+ my $jmaxm = $jmax - 1;
+ my $kmaxm = $maximum_field_index - 1;
+ my $have_side_comment = $old_rtokens->[$kmaxm] eq '#';
+
+ # We need to reduce the group pattern to be just two tokens,
+ # the leading equality or => and the final side comment
+
+ my $mid_field = join "",
+ @{$old_rfields}[ 1 .. $maximum_field_index - 1 ];
+ my $mid_patterns = join "",
+ @{$old_rpatterns}[ 1 .. $maximum_field_index - 1 ];
+ my @new_alignments = (
+ $old_line->get_alignment(0),
+ $old_line->get_alignment( $maximum_field_index - 1 )
+ );
+ my @new_tokens =
+ ( $old_rtokens->[0], $old_rtokens->[ $maximum_field_index - 1 ] );
+ my @new_fields = (
+ $old_rfields->[0], $mid_field, $old_rfields->[$maximum_field_index]
+ );
+ my @new_patterns = (
+ $old_rpatterns->[0], $mid_patterns,
+ $old_rpatterns->[$maximum_field_index]
+ );
+
+ $maximum_field_index = 2;
+ $old_line->set_jmax($maximum_field_index);
+ $old_line->set_rtokens( \@new_tokens );
+ $old_line->set_rfields( \@new_fields );
+ $old_line->set_rpatterns( \@new_patterns );
+
+ initialize_for_new_group();
+ add_to_group($old_line);
+ $current_line = $old_line;
}
+ return;
}
# create an empty side comment if none exists
sub make_side_comment {
- my $new_line = shift;
- my $level_end = shift;
- my $jmax = $new_line->get_jmax();
- my $rtokens = $new_line->get_rtokens();
+ my ( $new_line, $level_end ) = @_;
+ my $jmax = $new_line->get_jmax();
+ my $rtokens = $new_line->get_rtokens();
# if line does not have a side comment...
- if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
+ if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
my $rfields = $new_line->get_rfields();
my $rpatterns = $new_line->get_rpatterns();
- $$rtokens[$jmax] = '#';
- $$rfields[ ++$jmax ] = '';
- $$rpatterns[$jmax] = '#';
+ $rtokens->[$jmax] = '#';
+ $rfields->[ ++$jmax ] = '';
+ $rpatterns->[$jmax] = '#';
$new_line->set_jmax($jmax);
$new_line->set_jmax_original_line($jmax);
}
$line_number - $last_side_comment_line_number > 12
# and don't remember comment location across block level changes
- || ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
+ || ( $level_end < $last_side_comment_level
+ && $rfields->[0] =~ /^}/ )
)
{
forget_side_comment();
$last_side_comment_line_number = $line_number;
$last_side_comment_level = $level_end;
}
+ return;
}
sub decide_if_list {
# where the trailing digit is the nesting depth. Allow braces
# to allow nested list items.
my $rtokens = $line->get_rtokens();
- my $test_token = $$rtokens[0];
+ my $test_token = $rtokens->[0];
if ( $test_token =~ /^(\,|=>)/ ) {
my $list_type = $test_token;
my $jmax = $line->get_jmax();
foreach ( 1 .. $jmax - 2 ) {
- if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
+ if ( $rtokens->[$_] !~ /^(\,|=>|\{)/ ) {
$list_type = "";
last;
}
}
$line->set_list_type($list_type);
}
+ return;
}
sub eliminate_new_fields {
- return unless ( $maximum_line_index >= 0 );
my ( $new_line, $old_line ) = @_;
+ return unless ( $maximum_line_index >= 0 );
my $jmax = $new_line->get_jmax();
my $old_rtokens = $old_line->get_rtokens();
my $rtokens = $new_line->get_rtokens();
my $is_assignment =
- ( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
+ ( $rtokens->[0] =~ /^=>?\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
# must be monotonic variation
return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
# loop over all OLD tokens except comment and check match
my $match = 1;
- my $k;
- for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
- if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
- || ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
+ foreach my $k ( 0 .. $maximum_field_index - 2 ) {
+ if ( ( $old_rtokens->[$k] ne $rtokens->[$k] )
+ || ( $old_rpatterns->[$k] ne $rpatterns->[$k] ) )
{
$match = 0;
last;
# first tokens agree, so combine extra new tokens
if ($match) {
- for $k ( $maximum_field_index .. $jmax - 1 ) {
+ ##for my $k ( $maximum_field_index .. $jmax - 1 ) {
+ foreach my $k ( $maximum_field_index .. $jmax - 1 ) {
- $$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
- $$rfields[$k] = "";
- $$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
- $$rpatterns[$k] = "";
+ $rfields->[ $maximum_field_index - 1 ] .= $rfields->[$k];
+ $rfields->[$k] = "";
+ $rpatterns->[ $maximum_field_index - 1 ] .= $rpatterns->[$k];
+ $rpatterns->[$k] = "";
}
- $$rtokens[ $maximum_field_index - 1 ] = '#';
- $$rfields[$maximum_field_index] = $$rfields[$jmax];
- $$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
- $jmax = $maximum_field_index;
+ $rtokens->[ $maximum_field_index - 1 ] = '#';
+ $rfields->[$maximum_field_index] = $rfields->[$jmax];
+ $rpatterns->[$maximum_field_index] = $rpatterns->[$jmax];
+ $jmax = $maximum_field_index;
}
$new_line->set_jmax($jmax);
+ return;
}
sub fix_terminal_ternary {
my ($jquestion);
my $depth_question;
my $pad = "";
- for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
+ foreach my $j ( 0 .. $maximum_field_index - 1 ) {
my $tok = $rtokens_old->[$j];
if ( $tok =~ /^\?(\d+)$/ ) {
$depth_question = $1;
VALIGN_DEBUG_FLAG_TERNARY && do {
local $" = '><';
- print "CURRENT FIELDS=<@{$rfields_old}>\n";
- print "CURRENT TOKENS=<@{$rtokens_old}>\n";
- print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
- print "UNMODIFIED FIELDS=<@{$rfields}>\n";
- print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
- print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
+ print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
+ print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
+ print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
+ print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
+ print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
+ print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
};
# handle cases of leading colon on this line
VALIGN_DEBUG_FLAG_TERNARY && do {
local $" = '><';
- print "MODIFIED TOKENS=<@tokens>\n";
- print "MODIFIED PATTERNS=<@patterns>\n";
- print "MODIFIED FIELDS=<@fields>\n";
+ print STDOUT "MODIFIED TOKENS=<@tokens>\n";
+ print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
+ print STDOUT "MODIFIED FIELDS=<@fields>\n";
};
# all ok .. update the arrays
# TBD: add handling for 'case'
return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
- # look for the opening brace after the else, and extrace the depth
+ # look for the opening brace after the else, and extract the depth
my $tok_brace = $rtokens->[0];
my $depth_brace;
- if ( $tok_brace =~ /^\{(\d+)$/ ) { $depth_brace = $1; }
+ if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
# probably: "else # side_comment"
else { return }
# Now find the opening block brace
my ($jbrace);
- for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
+ foreach my $j ( 1 .. $maximum_field_index - 1 ) {
my $tok = $rtokens_old->[$j];
if ( $tok eq $tok_brace ) {
$jbrace = $j;
splice( @{$rfields}, 1, 0, ('') x $jadd );
# force a flush after this line if it does not follow a case
- return $jbrace
- unless ( $rfields_old->[0] =~ /^case\s*$/ );
+ if ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
+ else { return $jbrace }
}
-sub check_match {
-
- my $new_line = shift;
- my $old_line = shift;
-
- # uses global variables:
- # $previous_minimum_jmax_seen
- # $maximum_jmax_seen
- # $maximum_line_index
- # $marginal_match
- my $jmax = $new_line->get_jmax();
- my $maximum_field_index = $old_line->get_jmax();
-
- # flush if this line has too many fields
- if ( $jmax > $maximum_field_index ) { my_flush(); return }
-
- # flush if adding this line would make a non-monotonic field count
- if (
- ( $maximum_field_index > $jmax ) # this has too few fields
- && (
- ( $previous_minimum_jmax_seen < $jmax ) # and wouldn't be monotonic
- || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
- )
- )
- {
- my_flush();
- return;
- }
-
- # otherwise append this line if everything matches
- my $jmax_original_line = $new_line->get_jmax_original_line();
- my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
- my $rtokens = $new_line->get_rtokens();
- my $rfields = $new_line->get_rfields();
- my $rpatterns = $new_line->get_rpatterns();
- my $list_type = $new_line->get_list_type();
+{ # sub check_match
+ my %is_good_alignment;
- my $group_list_type = $old_line->get_list_type();
- my $old_rpatterns = $old_line->get_rpatterns();
- my $old_rtokens = $old_line->get_rtokens();
+ BEGIN {
- my $jlimit = $jmax - 1;
- if ( $maximum_field_index > $jmax ) {
- $jlimit = $jmax_original_line;
- --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ # Vertically aligning on certain "good" tokens is usually okay
+ # so we can be less restrictive in marginal cases.
+ my @q = qw( { ? => = );
+ push @q, (',');
+ @is_good_alignment{@q} = (1) x scalar(@q);
}
- my $everything_matches = 1;
-
- # common list types always match
- unless ( ( $group_list_type && ( $list_type eq $group_list_type ) )
- || $is_hanging_side_comment )
- {
-
- my $leading_space_count = $new_line->get_leading_space_count();
- my $saw_equals = 0;
- for my $j ( 0 .. $jlimit ) {
- my $match = 1;
-
- my $old_tok = $$old_rtokens[$j];
- my $new_tok = $$rtokens[$j];
-
- # Dumb down the match AFTER an equals and
- # also dumb down after seeing a ? ternary operator ...
- # Everything after a + is the token which preceded the previous
- # opening paren (container name). We won't require them to match.
- if ( $saw_equals && $new_tok =~ /(.*)\+/ ) {
- $new_tok = $1;
- $old_tok =~ s/\+.*$//;
- }
+ sub check_match {
- if ( $new_tok =~ /^[\?=]\d*$/ ) { $saw_equals = 1 }
+ # See if the current line matches the current vertical alignment group.
+ # If not, flush the current group.
+ my ( $new_line, $old_line ) = @_;
- # we never match if the matching tokens differ
- if ( $j < $jlimit
- && $old_tok ne $new_tok )
- {
- $match = 0;
- }
+ # uses global variables:
+ # $previous_minimum_jmax_seen
+ # $maximum_jmax_seen
+ # $maximum_line_index
+ # $marginal_match
+ my $jmax = $new_line->get_jmax();
+ my $maximum_field_index = $old_line->get_jmax();
- # otherwise, if patterns match, we always have a match.
- # However, if patterns don't match, we have to be careful...
- elsif ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
+ # flush if this line has too many fields
+ if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
- # We have to be very careful about aligning commas when the
- # pattern's don't match, because it can be worse to create an
- # alignment where none is needed than to omit one. The current
- # rule: if we are within a matching sub call (indicated by '+'
- # in the matching token), we'll allow a marginal match, but
- # otherwise not.
- #
- # Here's an example where we'd like to align the '='
- # my $cfile = File::Spec->catfile( 't', 'callext.c' );
- # my $inc = File::Spec->catdir( 'Basic', 'Core' );
- # because the function names differ.
- # Future alignment logic should make this unnecessary.
- #
- # Here's an example where the ','s are not contained in a call.
- # The first line below should probably not match the next two:
- # ( $a, $b ) = ( $b, $r );
- # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
- # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
- if ( $new_tok =~ /^,/ ) {
- if ( $$rtokens[$j] =~ /[A-Za-z]/ ) {
- $marginal_match = 1;
- }
- else {
- $match = 0;
+ # flush if adding this line would make a non-monotonic field count
+ if (
+ ( $maximum_field_index > $jmax ) # this has too few fields
+ && (
+ ( $previous_minimum_jmax_seen <
+ $jmax ) # and wouldn't be monotonic
+ || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
+ )
+ )
+ {
+ goto NO_MATCH;
+ }
+
+ # otherwise see if this line matches the current group
+ my $jmax_original_line = $new_line->get_jmax_original_line();
+ my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
+ my $rtokens = $new_line->get_rtokens();
+ my $rfields = $new_line->get_rfields();
+ my $rpatterns = $new_line->get_rpatterns();
+ my $list_type = $new_line->get_list_type();
+
+ my $group_list_type = $old_line->get_list_type();
+ my $old_rpatterns = $old_line->get_rpatterns();
+ my $old_rtokens = $old_line->get_rtokens();
+
+ my $jlimit = $jmax - 1;
+ if ( $maximum_field_index > $jmax ) {
+ $jlimit = $jmax_original_line;
+ --$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
+ }
+
+ # handle comma-separated lists ..
+ if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
+ for my $j ( 0 .. $jlimit ) {
+ my $old_tok = $old_rtokens->[$j];
+ next unless $old_tok;
+ my $new_tok = $rtokens->[$j];
+ next unless $new_tok;
+
+ # lists always match ...
+ # unless they would align any '=>'s with ','s
+ goto NO_MATCH
+ if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
+ || $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
+ }
+ }
+
+ # do detailed check for everything else except hanging side comments
+ elsif ( !$is_hanging_side_comment ) {
+
+ my $leading_space_count = $new_line->get_leading_space_count();
+
+ my $max_pad = 0;
+ my $min_pad = 0;
+ my $saw_good_alignment;
+
+ for my $j ( 0 .. $jlimit ) {
+
+ my $old_tok = $old_rtokens->[$j];
+ my $new_tok = $rtokens->[$j];
+
+ # Note on encoding used for alignment tokens:
+ # -------------------------------------------
+ # Tokens are "decorated" with information which can help
+ # prevent unwanted alignments. Consider for example the
+ # following two lines:
+ # local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
+ # local ( $i, $f ) = &'bdiv( $xn, $xd );
+ # There are three alignment tokens in each line, a comma,
+ # an =, and a comma. In the first line these three tokens
+ # are encoded as:
+ # ,4+local-18 =3 ,4+split-7
+ # and in the second line they are encoded as
+ # ,4+local-18 =3 ,4+&'bdiv-8
+ # Tokens always at least have token name and nesting
+ # depth. So in this example the ='s are at depth 3 and
+ # the ,'s are at depth 4. This prevents aligning tokens
+ # of different depths. Commas contain additional
+ # information, as follows:
+ # , {depth} + {container name} - {spaces to opening paren}
+ # This allows us to reject matching the rightmost commas
+ # in the above two lines, since they are for different
+ # function calls. This encoding is done in
+ # 'sub send_lines_to_vertical_aligner'.
+
+ # Pick off actual token.
+ # Everything up to the first digit is the actual token.
+ my $alignment_token = $new_tok;
+ if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
+
+ # see if the decorated tokens match
+ my $tokens_match = $new_tok eq $old_tok
+
+ # Exception for matching terminal : of ternary statement..
+ # consider containers prefixed by ? and : a match
+ || ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
+
+ # No match if the alignment tokens differ...
+ if ( !$tokens_match ) {
+
+ # ...Unless this is a side comment
+ if (
+ $j == $jlimit
+
+ # and there is either at least one alignment token
+ # or this is a single item following a list. This
+ # latter rule is required for 'December' to join
+ # the following list:
+ # my (@months) = (
+ # '', 'January', 'February', 'March',
+ # 'April', 'May', 'June', 'July',
+ # 'August', 'September', 'October', 'November',
+ # 'December'
+ # );
+ # If it doesn't then the -lp formatting will fail.
+ && ( $j > 0 || $old_tok =~ /^,/ )
+ )
+ {
+ $marginal_match = 1
+ if ( $marginal_match == 0
+ && $maximum_line_index == 0 );
+ last;
}
- }
- # parens don't align well unless patterns match
- elsif ( $new_tok =~ /^\(/ ) {
- $match = 0;
+ goto NO_MATCH;
}
- # Handle an '=' alignment with different patterns to
- # the left.
- elsif ( $new_tok =~ /^=\d*$/ ) {
+ # Calculate amount of padding required to fit this in.
+ # $pad is the number of spaces by which we must increase
+ # the current field to squeeze in this field.
+ my $pad =
+ length( $rfields->[$j] ) - $old_line->current_field_width($j);
+ if ( $j == 0 ) { $pad += $leading_space_count; }
+
+ # remember max pads to limit marginal cases
+ if ( $alignment_token ne '#' ) {
+ if ( $pad > $max_pad ) { $max_pad = $pad }
+ if ( $pad < $min_pad ) { $min_pad = $pad }
+ }
+ if ( $is_good_alignment{$alignment_token} ) {
+ $saw_good_alignment = 1;
+ }
+
+ # If patterns don't match, we have to be careful...
+ if ( $old_rpatterns->[$j] ne $rpatterns->[$j] ) {
+
+ # flag this as a marginal match since patterns differ
+ $marginal_match = 1
+ if ( $marginal_match == 0 && $maximum_line_index == 0 );
+
+ # We have to be very careful about aligning commas
+ # when the pattern's don't match, because it can be
+ # worse to create an alignment where none is needed
+ # than to omit one. Here's an example where the ','s
+ # are not in named containers. The first line below
+ # should not match the next two:
+ # ( $a, $b ) = ( $b, $r );
+ # ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
+ # ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
+ if ( $alignment_token eq ',' ) {
+
+ # do not align commas unless they are in named containers
+ goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
+ }
- $saw_equals = 1;
+ # do not align parens unless patterns match;
+ # large ugly spaces can occur in math expressions.
+ elsif ( $alignment_token eq '(' ) {
- # It is best to be a little restrictive when
- # aligning '=' tokens. Here is an example of
- # two lines that we will not align:
- # my $variable=6;
- # $bb=4;
- # The problem is that one is a 'my' declaration,
- # and the other isn't, so they're not very similar.
- # We will filter these out by comparing the first
- # letter of the pattern. This is crude, but works
- # well enough.
- if (
- substr( $$old_rpatterns[$j], 0, 1 ) ne
- substr( $$rpatterns[$j], 0, 1 ) )
- {
- $match = 0;
+ # But we can allow a match if the parens don't
+ # require any padding.
+ if ( $pad != 0 ) { goto NO_MATCH }
}
- # If we pass that test, we'll call it a marginal match.
- # Here is an example of a marginal match:
- # $done{$$op} = 1;
- # $op = compile_bblock($op);
- # The left tokens are both identifiers, but
- # one accesses a hash and the other doesn't.
- # We'll let this be a tentative match and undo
- # it later if we don't find more than 2 lines
- # in the group.
- elsif ( $maximum_line_index == 0 ) {
- $marginal_match = 1;
+ # Handle an '=' alignment with different patterns to
+ # the left.
+ elsif ( $alignment_token eq '=' ) {
+
+ # It is best to be a little restrictive when
+ # aligning '=' tokens. Here is an example of
+ # two lines that we will not align:
+ # my $variable=6;
+ # $bb=4;
+ # The problem is that one is a 'my' declaration,
+ # and the other isn't, so they're not very similar.
+ # We will filter these out by comparing the first
+ # letter of the pattern. This is crude, but works
+ # well enough.
+ if (
+ substr( $old_rpatterns->[$j], 0, 1 ) ne
+ substr( $rpatterns->[$j], 0, 1 ) )
+ {
+ goto NO_MATCH;
+ }
+
+ # If we pass that test, we'll call it a marginal match.
+ # Here is an example of a marginal match:
+ # $done{$$op} = 1;
+ # $op = compile_bblock($op);
+ # The left tokens are both identifiers, but
+ # one accesses a hash and the other doesn't.
+ # We'll let this be a tentative match and undo
+ # it later if we don't find more than 2 lines
+ # in the group.
+ elsif ( $maximum_line_index == 0 ) {
+ $marginal_match =
+ 2; # =2 prevents being undone below
+ }
}
}
+
+ # Don't let line with fewer fields increase column widths
+ # ( align3.t )
+ if ( $maximum_field_index > $jmax ) {
+
+ # Exception: suspend this rule to allow last lines to join
+ if ( $pad > 0 ) { goto NO_MATCH; }
+ }
+ } ## end for my $j ( 0 .. $jlimit)
+
+ # Turn off the "marginal match" flag in some cases...
+ # A "marginal match" occurs when the alignment tokens agree
+ # but there are differences in the other tokens (patterns).
+ # If we leave the marginal match flag set, then the rule is that we
+ # will align only if there are more than two lines in the group.
+ # We will turn of the flag if we almost have a match
+ # and either we have seen a good alignment token or we
+ # just need a small pad (2 spaces) to fit. These rules are
+ # the result of experimentation. Tokens which misaligned by just
+ # one or two characters are annoying. On the other hand,
+ # large gaps to less important alignment tokens are also annoying.
+ if ( $marginal_match == 1
+ && $jmax == $maximum_field_index
+ && ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
+ )
+ {
+ $marginal_match = 0;
}
+ ##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
+ }
- # Don't let line with fewer fields increase column widths
- # ( align3.t )
- if ( $maximum_field_index > $jmax ) {
- my $pad =
- length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ # We have a match (even if marginal).
+ # If the current line has fewer fields than the current group
+ # but otherwise matches, copy the remaining group fields to
+ # make it a perfect match.
+ if ( $maximum_field_index > $jmax ) {
- if ( $j == 0 ) {
- $pad += $leading_space_count;
- }
+ ##########################################################
+ # FIXME: The previous version had a bug which made side comments
+ # become regular fields, so for now the program does not allow a
+ # line with side comment to match. This should eventually be done.
+ # The best test file for experimenting is 'lista.t'
+ ##########################################################
- # TESTING: suspend this rule to allow last lines to join
- if ( $pad > 0 ) { $match = 0; }
- }
+ my $comment = $rfields->[$jmax];
+ goto NO_MATCH if ($comment);
- unless ($match) {
- $everything_matches = 0;
- last;
+ # Corrected loop
+ for my $jj ( $jlimit .. $maximum_field_index ) {
+ $rtokens->[$jj] = $old_rtokens->[$jj];
+ $rfields->[ $jj + 1 ] = '';
+ $rpatterns->[ $jj + 1 ] = $old_rpatterns->[ $jj + 1 ];
}
- }
- }
- if ( $maximum_field_index > $jmax ) {
+## THESE DO NOT GIVE CORRECT RESULTS
+## $rfields->[$jmax] = $comment;
+## $new_line->set_jmax($jmax);
- if ($everything_matches) {
-
- my $comment = $$rfields[$jmax];
- for $jmax ( $jlimit .. $maximum_field_index ) {
- $$rtokens[$jmax] = $$old_rtokens[$jmax];
- $$rfields[ ++$jmax ] = '';
- $$rpatterns[$jmax] = $$old_rpatterns[$jmax];
- }
- $$rfields[$jmax] = $comment;
- $new_line->set_jmax($jmax);
}
- }
+ return;
- my_flush() unless ($everything_matches);
+ NO_MATCH:
+ ##print "no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$old_rtokens->[0]\n";
+ my_flush();
+ return;
+ }
}
sub check_fit {
+ my ( $new_line, $old_line ) = @_;
return unless ( $maximum_line_index >= 0 );
- my $new_line = shift;
- my $old_line = shift;
my $jmax = $new_line->get_jmax();
my $leading_space_count = $new_line->get_leading_space_count();
# save current columns in case this doesn't work
save_alignment_columns();
- my ( $j, $pad, $eight );
my $maximum_field_index = $old_line->get_jmax();
- for $j ( 0 .. $jmax ) {
+ for my $j ( 0 .. $jmax ) {
- $pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
+ my $pad = length( $rfields->[$j] ) - $old_line->current_field_width($j);
if ( $j == 0 ) {
$pad += $leading_space_count;
next if $pad < 0;
+ ## OLD NOTES:
## This patch helps sometimes, but it doesn't check to see if
## the line is too long even without the side comment. It needs
## to be reworked.
##don't let a long token with no trailing side comment push
##side comments out, or end a group. (sidecmt1.t)
- ##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
+ ##next if ($j==$jmax-1 && length($rfields->[$jmax])==0);
+
+ # BEGIN PATCH for keith1.txt.
+ # If the group began matching multiple tokens but later this got
+ # reduced to a fewer number of matching tokens, then the fields
+ # of the later lines will still have to fit into their corresponding
+ # fields. So a large later field will "push" the other fields to
+ # the right, including previous side comments, and if there is no room
+ # then there is no match.
+ # For example, look at the last line in the following snippet:
+
+ # my $b_prod_db = ( $ENV{ORACLE_SID} =~ m/p$/ && !$testing ) ? true : false;
+ # my $env = ($b_prod_db) ? "prd" : "val";
+ # my $plant = ( $OPT{p} ) ? $OPT{p} : "STL";
+ # my $task = $OPT{t};
+ # my $fnam = "longggggggggggggggg.$record_created.$env.$plant.idash";
+
+ # The long term will push the '?' to the right to fit in, and in this
+ # case there is not enough room so it will not match the equals unless
+ # we do something special.
+
+ # Usually it looks good to keep an initial alignment of '=' going, and
+ # we can do this if the long term can fit in the space taken up by the
+ # remaining fields (the ? : fields here).
+
+ # Allowing any matching token for now, but it could be restricted
+ # to an '='-like token if necessary.
+
+ if (
+ $pad > $padding_available
+ && $jmax == 2 # matching one thing (plus #)
+ && $j == $jmax - 1 # at last field
+ && $maximum_line_index > 0 # more than 1 line in group now
+ && $jmax < $maximum_field_index # other lines have more fields
+ && length( $rfields->[$jmax] ) == 0 # no side comment
+
+ # Uncomment to match only equals (but this does not seem necessary)
+ # && $rtokens->[0] =~ /^=\d/ # matching an equals
+ )
+ {
+ my $extra_padding = 0;
+ foreach my $jj ( $j + 1 .. $maximum_field_index - 1 ) {
+ $extra_padding += $old_line->current_field_width($jj);
+ }
+
+ next if ( $pad <= $padding_available + $extra_padding );
+ }
+
+ # END PATCH for keith1.pl
# This line will need space; lets see if we want to accept it..
if (
$group_maximum_gap = $pad;
}
}
+ return;
}
-sub accept_line {
+sub add_to_group {
# The current line either starts a new alignment group or is
# accepted into the current alignment group.
my $jmax = $new_line->get_jmax();
my $rfields = $new_line->get_rfields();
my $rtokens = $new_line->get_rtokens();
- my $j;
- my $col = $new_line->get_leading_space_count();
+ my $col = $new_line->get_leading_space_count();
- for $j ( 0 .. $jmax ) {
- $col += length( $$rfields[$j] );
+ for my $j ( 0 .. $jmax ) {
+ $col += length( $rfields->[$j] );
# create initial alignments for the new group
my $token = "";
- if ( $j < $jmax ) { $token = $$rtokens[$j] }
+ if ( $j < $jmax ) { $token = $rtokens->[$j] }
my $alignment = make_alignment( $col, $token );
$new_line->set_alignment( $j, $alignment );
}
$new_line->set_alignments(@new_alignments);
}
- # remember group jmax extremes for next call to append_line
+ # remember group jmax extremes for next call to valign_input
$previous_minimum_jmax_seen = $minimum_jmax_seen;
$previous_maximum_jmax_seen = $maximum_jmax_seen;
+ return;
}
sub dump_array {
# debug routine to dump array contents
local $" = ')(';
- print "(@_)\n";
+ print STDOUT "(@_)\n";
+ return;
}
# flush() sends the current Perl::Tidy::VerticalAligner group down the
# pipeline to Perl::Tidy::FileWriter.
-# This is the external flush, which also empties the cache
+# This is the external flush, which also empties the buffer and cache
sub flush {
+ # the buffer must be emptied first, then any cached text
+ dump_valign_buffer();
+
if ( $maximum_line_index < 0 ) {
if ($cached_line_type) {
$seqno_string = $cached_seqno_string;
- entab_and_output( $cached_line_text,
+ valign_output_step_C( $cached_line_text,
$cached_line_leading_space_count,
- $last_group_level_written );
+ $last_level_written );
$cached_line_type = 0;
$cached_line_text = "";
$cached_seqno_string = "";
else {
my_flush();
}
+ return;
+}
+
+sub reduce_valign_buffer_indentation {
+
+ my ($diff) = @_;
+ if ( $valign_buffer_filling && $diff ) {
+ my $max_valign_buffer = @valign_buffer;
+ foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
+ my ( $line, $leading_space_count, $level ) =
+ @{ $valign_buffer[$i] };
+ my $ws = substr( $line, 0, $diff );
+ if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
+ $line = substr( $line, $diff );
+ }
+ if ( $leading_space_count >= $diff ) {
+ $leading_space_count -= $diff;
+ $level = level_change( $leading_space_count, $diff, $level );
+ }
+ $valign_buffer[$i] = [ $line, $leading_space_count, $level ];
+ }
+ }
+ return;
+}
+
+sub level_change {
+
+ # compute decrease in level when we remove $diff spaces from the
+ # leading spaces
+ my ( $leading_space_count, $diff, $level ) = @_;
+ if ($rOpts_indent_columns) {
+ my $olev =
+ int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
+ my $nlev = int( $leading_space_count / $rOpts_indent_columns );
+ $level -= ( $olev - $nlev );
+ if ( $level < 0 ) { $level = 0 }
+ }
+ return $level;
+}
+
+sub dump_valign_buffer {
+ if (@valign_buffer) {
+ foreach (@valign_buffer) {
+ valign_output_step_D( @{$_} );
+ }
+ @valign_buffer = ();
+ }
+ $valign_buffer_filling = "";
+ return;
}
# This is the internal flush, which leaves the cache intact
VALIGN_DEBUG_FLAG_APPEND0 && do {
my ( $a, $b, $c ) = caller();
- print
+ print STDOUT
"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
};
for my $i ( 0 .. $maximum_line_index ) {
my $str = $group_lines[$i];
my $excess =
- length($str) + $leading_space_count - $rOpts_maximum_line_length;
+ length($str) +
+ $leading_space_count -
+ maximum_line_length_for_level($group_level);
if ( $excess > $max_excess ) {
$max_excess = $excess;
}
# write the group of lines
my $outdent_long_lines = 0;
for my $i ( 0 .. $maximum_line_index ) {
- write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
- $outdent_long_lines, "" );
+ valign_output_step_B( $leading_space_count, $group_lines[$i], 0,
+ $outdent_long_lines, "", $group_level );
}
}
my $group_list_type = $group_lines[0]->get_list_type();
my ( $a, $b, $c ) = caller();
my $maximum_field_index = $group_lines[0]->get_jmax();
- print
+ print STDOUT
"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
};
my $group_leader_length = $group_lines[0]->get_leading_space_count();
# add extra leading spaces if helpful
- my $min_ci_gap = improve_continuation_indentation( $do_not_align,
- $group_leader_length );
+ # NOTE: Use zero; this did not work well
+ my $min_ci_gap = 0;
# loop to output all lines
for my $i ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$i];
- write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
+ valign_output_step_A( $line, $min_ci_gap, $do_not_align,
$group_leader_length, $extra_leading_spaces );
}
}
initialize_for_new_group();
+ return;
}
sub decide_if_aligned {
my $maximum_field_index = $group_lines[0]->get_jmax();
if ( $do_not_align
&& ( $maximum_line_index > 0 )
- && ( length( $$rfields[$maximum_field_index] ) > 0 ) )
+ && ( length( $rfields->[$maximum_field_index] ) > 0 ) )
{
combine_fields();
$do_not_align = 0;
if ( $move >= 0
&& $last_side_comment_length > 0
&& ( $first_side_comment_line == 0 )
- && $group_level == $last_group_level_written )
+ && $group_level == $last_level_written )
{
$min_move = 0;
}
$move = $min_move;
}
- # prevously, an upper bound was placed on $move here,
+ # previously, an upper bound was placed on $move here,
# (maximum_space_to_comment), but it was not helpful
# don't exceed the available space
return $do_not_align;
}
-sub improve_continuation_indentation {
- my ( $do_not_align, $group_leader_length ) = @_;
-
- # See if we can increase the continuation indentation
- # to move all continuation lines closer to the next field
- # (unless it is a comment).
- #
- # '$min_ci_gap'is the extra indentation that we may need to introduce.
- # We will only introduce this to fields which already have some ci.
- # Without this variable, we would occasionally get something like this
- # (Complex.pm):
- #
- # use overload '+' => \&plus,
- # '-' => \&minus,
- # '*' => \&multiply,
- # ...
- # 'tan' => \&tan,
- # 'atan2' => \&atan2,
- #
- # Whereas with this variable, we can shift variables over to get this:
- #
- # use overload '+' => \&plus,
- # '-' => \&minus,
- # '*' => \&multiply,
- # ...
- # 'tan' => \&tan,
- # 'atan2' => \&atan2,
-
- ## BUB: Deactivated####################
- # The trouble with this patch is that it may, for example,
- # move in some 'or's or ':'s, and leave some out, so that the
- # left edge alignment suffers.
- return 0;
- ###########################################
-
- my $maximum_field_index = $group_lines[0]->get_jmax();
-
- my $min_ci_gap = $rOpts_maximum_line_length;
- if ( $maximum_field_index > 1 && !$do_not_align ) {
-
- for my $i ( 0 .. $maximum_line_index ) {
- my $line = $group_lines[$i];
- my $leading_space_count = $line->get_leading_space_count();
- my $rfields = $line->get_rfields();
-
- my $gap =
- $line->get_column(0) -
- $leading_space_count -
- length( $$rfields[0] );
-
- if ( $leading_space_count > $group_leader_length ) {
- if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
- }
- }
-
- if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
- $min_ci_gap = 0;
- }
- }
- else {
- $min_ci_gap = 0;
- }
- return $min_ci_gap;
-}
+sub valign_output_step_A {
-sub write_vertically_aligned_line {
+ ###############################################################
+ # This is Step A in writing vertically aligned lines.
+ # The line is prepared according to the alignments which have
+ # been found and shipped to the next step.
+ ###############################################################
my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
$extra_leading_spaces )
$leading_space_count += $min_ci_gap;
}
- my $str = $$rfields[0];
+ my $str = $rfields->[0];
# loop to concatenate all fields of this line and needed padding
my $total_pad_count = 0;
- my ( $j, $pad );
- for $j ( 1 .. $maximum_field_index ) {
+ for my $j ( 1 .. $maximum_field_index ) {
# skip zero-length side comments
last
- if ( ( $j == $maximum_field_index )
- && ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
+ if (
+ ( $j == $maximum_field_index )
+ && ( !defined( $rfields->[$j] )
+ || ( length( $rfields->[$j] ) == 0 ) )
);
# compute spaces of padding before this field
my $col = $line->get_column( $j - 1 );
- $pad = $col - ( length($str) + $leading_space_count );
+ my $pad = $col - ( length($str) + $leading_space_count );
if ($do_not_align) {
$pad =
: $rOpts_minimum_space_to_comment - 1;
}
+ # if the -fpsc flag is set, move the side comment to the selected
+ # column if and only if it is possible, ignoring constraints on
+ # line length and minimum space to comment
+ if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
+ {
+ my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
+ if ( $newpad >= 0 ) { $pad = $newpad; }
+ }
+
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
# add this field
- if ( !defined $$rfields[$j] ) {
+ if ( !defined $rfields->[$j] ) {
write_diagnostics("UNDEFined field at j=$j\n");
}
# only add padding when we have a finite field;
# this avoids extra terminal spaces if we have empty fields
- if ( length( $$rfields[$j] ) > 0 ) {
+ if ( length( $rfields->[$j] ) > 0 ) {
$str .= ' ' x $total_pad_count;
$total_pad_count = 0;
- $str .= $$rfields[$j];
+ $str .= $rfields->[$j];
}
else {
$total_pad_count = 0;
}
}
- my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
+ my $side_comment_length = ( length( $rfields->[$maximum_field_index] ) );
# ship this line off
- write_leader_and_string( $leading_space_count + $extra_leading_spaces,
+ valign_output_step_B( $leading_space_count + $extra_leading_spaces,
$str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags );
+ $rvertical_tightness_flags, $group_level );
+ return;
}
sub get_extra_leading_spaces {
# list before it sees everything. When this happens, it sets
# the indentation to the standard scheme, but notes how
# many spaces it would have liked to use. We may be able
- # to recover that space here in the event that that all of the
+ # to recover that space here in the event that all of the
# lines of a list are back together again.
#----------------------------------------------------------
my $object = $group_lines[0]->get_indentation();
if ( ref($object) ) {
my $extra_indentation_spaces_wanted =
- get_RECOVERABLE_SPACES($object);
+ get_recoverable_spaces($object);
# all indentation objects must be the same
- my $i;
- for $i ( 1 .. $maximum_line_index ) {
+ for my $i ( 1 .. $maximum_line_index ) {
if ( $object != $group_lines[$i]->get_indentation() ) {
$extra_indentation_spaces_wanted = 0;
last;
# update the indentation object because with -icp the terminal
# ');' will use the same adjustment.
- $object->permanently_decrease_AVAILABLE_SPACES(
+ $object->permanently_decrease_available_spaces(
-$extra_leading_spaces );
}
}
# Uses global variables:
# @group_lines
# $maximum_line_index
- my ( $j, $k );
my $maximum_field_index = $group_lines[0]->get_jmax();
- for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
+ foreach my $j ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$j];
my $rfields = $line->get_rfields();
foreach ( 1 .. $maximum_field_index - 1 ) {
- $$rfields[0] .= $$rfields[$_];
+ $rfields->[0] .= $rfields->[$_];
}
- $$rfields[1] = $$rfields[$maximum_field_index];
+ $rfields->[1] = $rfields->[$maximum_field_index];
$line->set_jmax(1);
$line->set_column( 0, 0 );
}
$maximum_field_index = 1;
- for $j ( 0 .. $maximum_line_index ) {
+ for my $j ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$j];
my $rfields = $line->get_rfields();
- for $k ( 0 .. $maximum_field_index ) {
- my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
+ for my $k ( 0 .. $maximum_field_index ) {
+ my $pad = length( $rfields->[$k] ) - $line->current_field_width($k);
if ( $k == 0 ) {
$pad += $group_lines[$j]->get_leading_space_count();
}
}
}
+ return;
}
sub get_output_line_number {
# the output line number reported to a caller is the number of items
# written plus the number of items in the buffer
my $self = shift;
- 1 + $maximum_line_index + $file_writer_object->get_output_line_number();
+ return 1 + $maximum_line_index +
+ $file_writer_object->get_output_line_number();
}
-sub write_leader_and_string {
+sub valign_output_step_B {
+
+ ###############################################################
+ # This is Step B in writing vertically aligned lines.
+ # Vertical tightness is applied according to preset flags.
+ # In particular this routine handles stacking of opening
+ # and closing tokens.
+ ###############################################################
my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
- $rvertical_tightness_flags )
+ $rvertical_tightness_flags, $level )
= @_;
# handle outdenting of long lines:
length($str) -
$side_comment_length +
$leading_space_count -
- $rOpts_maximum_line_length;
+ maximum_line_length_for_level($level);
if ( $excess > 0 ) {
$leading_space_count = 0;
$last_outdented_line_at =
# Unpack any recombination data; it was packed by
# sub send_lines_to_vertical_aligner. Contents:
#
- # [0] type: 1=opening 2=closing 3=opening block brace
+ # [0] type: 1=opening non-block 2=closing non-block
+ # 3=opening block brace 4=closing block brace
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
# either append this line to it or write it out
if ( length($cached_line_text) ) {
+ # Dump an invalid cached line
if ( !$cached_line_valid ) {
- entab_and_output( $cached_line_text,
+ valign_output_step_C( $cached_line_text,
$cached_line_leading_space_count,
- $last_group_level_written );
+ $last_level_written );
}
- # handle cached line with opening container token
+ # Handle cached line ending in OPENING tokens
elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
my $gap = $leading_space_count - length($cached_line_text);
}
}
- if ( $gap >= 0 ) {
+ if ( $gap >= 0 && defined($seqno_beg) ) {
$leading_string = $cached_line_text . ' ' x $gap;
$leading_space_count = $cached_line_leading_space_count;
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
+ $level = $last_level_written;
}
else {
- entab_and_output( $cached_line_text,
+ valign_output_step_C( $cached_line_text,
$cached_line_leading_space_count,
- $last_group_level_written );
+ $last_level_written );
}
}
- # handle cached line to place before this closing container token
+ # Handle cached line ending in CLOSING tokens
else {
my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
+ if (
+
+ # The new line must start with container
+ $seqno_beg
+
+ # The container combination must be okay..
+ && (
+
+ # okay to combine like types
+ ( $open_or_close == $cached_line_type )
+
+ # closing block brace may append to non-block
+ || ( $cached_line_type == 2 && $open_or_close == 4 )
+
+ # something like ');'
+ || ( !$open_or_close && $cached_line_type == 2 )
- if ( length($test_line) <= $rOpts_maximum_line_length ) {
+ )
+
+ # The combined line must fit
+ && (
+ length($test_line) <=
+ maximum_line_length_for_level($last_level_written) )
+ )
+ {
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
# and eliminate multiple colons might appear to be slow,
# but it's not an issue because we almost never come
# through here. In a typical file we don't.
- $seqno_string =~ s/^:+//;
+ $seqno_string =~ s/^:+//;
$last_nonblank_seqno_string =~ s/^:+//;
- $seqno_string =~ s/:+/:/g;
+ $seqno_string =~ s/:+/:/g;
$last_nonblank_seqno_string =~ s/:+/:/g;
# how many spaces can we outdent?
$test_line = substr( $test_line, $diff );
$cached_line_leading_space_count -= $diff;
+ $last_level_written =
+ level_change(
+ $cached_line_leading_space_count,
+ $diff, $last_level_written );
+ reduce_valign_buffer_indentation($diff);
}
# shouldn't happen, but not critical:
$str = $test_line;
$leading_string = "";
$leading_space_count = $cached_line_leading_space_count;
+ $level = $last_level_written;
}
else {
- entab_and_output( $cached_line_text,
+ valign_output_step_C( $cached_line_text,
$cached_line_leading_space_count,
- $last_group_level_written );
+ $last_level_written );
}
}
}
# write or cache this line
if ( !$open_or_close || $side_comment_length > 0 ) {
- entab_and_output( $line, $leading_space_count, $group_level );
+ valign_output_step_C( $line, $leading_space_count, $level );
}
else {
$cached_line_text = $line;
$cached_seqno_string = $seqno_string;
}
- $last_group_level_written = $group_level;
+ $last_level_written = $level;
$last_side_comment_length = $side_comment_length;
$extra_indent_ok = 0;
+ return;
+}
+
+sub valign_output_step_C {
+
+ ###############################################################
+ # This is Step C in writing vertically aligned lines.
+ # Lines are either stored in a buffer or passed along to the next step.
+ # The reason for storing lines is that we may later want to reduce their
+ # indentation when -sot and -sct are both used.
+ ###############################################################
+ my @args = @_;
+
+ # Dump any saved lines if we see a line with an unbalanced opening or
+ # closing token.
+ dump_valign_buffer() if ( $seqno_string && $valign_buffer_filling );
+
+ # Either store or write this line
+ if ($valign_buffer_filling) {
+ push @valign_buffer, [@args];
+ }
+ else {
+ valign_output_step_D(@args);
+ }
+
+ # For lines starting or ending with opening or closing tokens..
+ if ($seqno_string) {
+ $last_nonblank_seqno_string = $seqno_string;
+
+ # Start storing lines when we see a line with multiple stacked opening
+ # tokens.
+ # patch for RT #94354, requested by Colin Williams
+ if ( $seqno_string =~ /^\d+(\:+\d+)+$/ && $args[0] !~ /^[\}\)\]\:\?]/ )
+ {
+
+ # This test is efficient but a little subtle: The first test says
+ # that we have multiple sequence numbers and hence multiple opening
+ # or closing tokens in this line. The second part of the test
+ # rejects stacked closing and ternary tokens. So if we get here
+ # then we should have stacked unbalanced opening tokens.
+
+ # Here is a complex example:
+
+ # Foo($Bar[0], { # (side comment)
+ # baz => 1,
+ # });
+
+ # The first line has sequence 6::4. It does not begin with
+ # a closing token or ternary, so it passes the test and must be
+ # stacked opening tokens.
+
+ # The last line has sequence 4:6 but is a stack of closing tokens,
+ # so it gets rejected.
+
+ # Note that the sequence number of an opening token for a qw quote
+ # is a negative number and will be rejected.
+ # For example, for the following line:
+ # skip_symbols([qw(
+ # $seqno_string='10:5:-1'. It would be okay to accept it but
+ # I decided not to do this after testing.
+
+ $valign_buffer_filling = $seqno_string;
+
+ }
+ }
+ return;
}
-sub entab_and_output {
+sub valign_output_step_D {
+
+ ###############################################################
+ # This is Step D in writing vertically aligned lines.
+ # Write one vertically aligned line of code to the output object.
+ ###############################################################
+
my ( $line, $leading_space_count, $level ) = @_;
# The line is currently correct if there is no tabbing (recommended!)
}
else {
- # REMOVE AFTER TESTING
# shouldn't happen - program error counting whitespace
- # we'll skip entabbing
- warning(
-"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
- );
+ # - skip entabbing
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
+ );
}
}
# shouldn't happen:
if ( $space_count < 0 ) {
- warning(
-"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
- );
+
+ # But it could be an outdented comment
+ if ( $line !~ /^\s*#/ ) {
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error entabbing in valign_output_step_D: for level=$group_level count=$leading_space_count\n"
+ );
+ }
$leading_string = ( ' ' x $leading_space_count );
}
else {
}
else {
- # REMOVE AFTER TESTING
# shouldn't happen - program error counting whitespace
# we'll skip entabbing
- warning(
-"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
- );
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error entabbing in valign_output_step_D: expected count=$leading_space_count\n"
+ );
}
}
}
$file_writer_object->write_code_line( $line . "\n" );
- if ($seqno_string) {
- $last_nonblank_seqno_string = $seqno_string;
- }
+ return;
}
{ # begin get_leading_string
# shouldn't happen:
if ( $space_count < 0 ) {
- warning(
-"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
- );
+ VALIGN_DEBUG_FLAG_TABS
+ && warning(
+"Error in get_leading_string: for level=$group_level count=$leading_whitespace_count\n"
+ );
+
+ # -- skip entabbing
$leading_string = ( ' ' x $leading_whitespace_count );
}
else {
);
write_logfile_entry("\n");
}
+ return;
}
#####################################################################
use constant MAX_NAG_MESSAGES => 6;
sub write_logfile_entry {
- my $self = shift;
+ my ( $self, $msg ) = @_;
my $logger_object = $self->{_logger_object};
if ($logger_object) {
- $logger_object->write_logfile_entry(@_);
+ $logger_object->write_logfile_entry($msg);
}
+ return;
}
sub new {
- my $class = shift;
- my ( $line_sink_object, $rOpts, $logger_object ) = @_;
+ my ( $class, $line_sink_object, $rOpts, $logger_object ) = @_;
- bless {
+ return bless {
_line_sink_object => $line_sink_object,
_logger_object => $logger_object,
_rOpts => $rOpts,
sub tee_on {
my $self = shift;
$self->{_line_sink_object}->tee_on();
+ return;
}
sub tee_off {
my $self = shift;
$self->{_line_sink_object}->tee_off();
+ return;
}
sub get_output_line_number {
sub decrement_output_line_number {
my $self = shift;
$self->{_output_line_number}--;
+ return;
}
sub get_consecutive_nonblank_lines {
sub reset_consecutive_blank_lines {
my $self = shift;
$self->{_consecutive_blank_lines} = 0;
+ return;
}
sub want_blank_line {
unless ( $self->{_consecutive_blank_lines} ) {
$self->write_blank_code_line();
}
+ return;
+}
+
+sub require_blank_code_lines {
+
+ # write out the requested number of blanks regardless of the value of -mbl
+ # unless -mbl=0. This allows extra blank lines to be written for subs and
+ # packages even with the default -mbl=1
+ my ( $self, $count ) = @_;
+ my $need = $count - $self->{_consecutive_blank_lines};
+ my $rOpts = $self->{_rOpts};
+ my $forced = $rOpts->{'maximum-consecutive-blank-lines'} > 0;
+ foreach my $i ( 0 .. $need - 1 ) {
+ $self->write_blank_code_line($forced);
+ }
+ return;
}
sub write_blank_code_line {
- my $self = shift;
- my $rOpts = $self->{_rOpts};
+ my $self = shift;
+ my $forced = shift;
+ my $rOpts = $self->{_rOpts};
return
- if ( $self->{_consecutive_blank_lines} >=
+ if (!$forced
+ && $self->{_consecutive_blank_lines} >=
$rOpts->{'maximum-consecutive-blank-lines'} );
$self->{_consecutive_blank_lines}++;
$self->{_consecutive_nonblank_lines} = 0;
$self->write_line("\n");
+ return;
}
sub write_code_line {
$self->{_consecutive_nonblank_lines}++;
}
$self->write_line($a);
+ return;
}
sub write_line {
- my $self = shift;
- my $a = shift;
+ my ( $self, $a ) = @_;
# TODO: go through and see if the test is necessary here
if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
}
$self->{_line_length_error_count}++;
}
-
+ return;
}
sub report_line_length_errors {
);
}
}
+ return;
}
#####################################################################
my ( $class, $filename ) = @_;
- bless {
+ return bless {
_debug_file => $filename,
_debug_file_opened => 0,
_fh => undef,
my $debug_file = $self->{_debug_file};
my $fh;
unless ( $fh = IO::File->new("> $debug_file") ) {
- warn("can't open $debug_file: $!\n");
+ Perl::Tidy::Warn("can't open $debug_file: $!\n");
}
$self->{_debug_file_opened} = 1;
$self->{_fh} = $fh;
print $fh
"Use -dump-token-types (-dtt) to get a list of token type codes\n";
+ return;
}
sub close_debug_file {
my $self = shift;
my $fh = $self->{_fh};
if ( $self->{_debug_file_opened} ) {
-
eval { $self->{_fh}->close() };
}
+ return;
}
sub write_debug_entry {
# This is a debug dump routine which may be modified as necessary
# to dump tokens on a line-by-line basis. The output will be written
# to the .DEBUG file when the -D flag is entered.
- my $self = shift;
- my $line_of_tokens = shift;
+ my ( $self, $line_of_tokens ) = @_;
+
+ my $input_line = $line_of_tokens->{_line_text};
+
+ my $rtoken_type = $line_of_tokens->{_rtoken_type};
+ my $rtokens = $line_of_tokens->{_rtokens};
+ my $rlevels = $line_of_tokens->{_rlevels};
+ my $rslevels = $line_of_tokens->{_rslevels};
+ my $rblock_type = $line_of_tokens->{_rblock_type};
- my $input_line = $line_of_tokens->{_line_text};
- my $rtoken_type = $line_of_tokens->{_rtoken_type};
- my $rtokens = $line_of_tokens->{_rtokens};
- my $rlevels = $line_of_tokens->{_rlevels};
- my $rslevels = $line_of_tokens->{_rslevels};
- my $rblock_type = $line_of_tokens->{_rblock_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $line_type = $line_of_tokens->{_line_type};
+ ##my $rtoken_array = $line_of_tokens->{_token_array};
my ( $j, $num );
unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
my $fh = $self->{_fh};
- for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
+ # FIXME: could convert to use of token_array instead
+ foreach my $j ( 0 .. @{$rtoken_type} - 1 ) {
# testing patterns
- if ( $$rtoken_type[$j] eq 'k' ) {
- $pattern .= $$rtokens[$j];
+ if ( $rtoken_type->[$j] eq 'k' ) {
+ $pattern .= $rtokens->[$j];
}
else {
- $pattern .= $$rtoken_type[$j];
+ $pattern .= $rtoken_type->[$j];
}
- $reconstructed_original .= $$rtokens[$j];
- $block_str .= "($$rblock_type[$j])";
- $num = length( $$rtokens[$j] );
- my $type_str = $$rtoken_type[$j];
+ $reconstructed_original .= $rtokens->[$j];
+ $block_str .= "($rblock_type->[$j])";
+ $num = length( $rtokens->[$j] );
+ my $type_str = $rtoken_type->[$j];
# be sure there are no blank tokens (shouldn't happen)
# This can only happen if a programming error has been made
print $fh "$token_str\n";
#print $fh "$block_str\n";
+ return;
}
#####################################################################
sub new {
- my $class = shift;
- my $line_source_object = shift;
+ my ( $class, $line_source_object ) = @_;
return bless {
_line_source_object => $line_source_object,
}
sub peek_ahead {
- my $self = shift;
- my $buffer_index = shift;
+ my ( $self, $buffer_index ) = @_;
my $line = undef;
my $line_source_object = $self->{_line_source_object};
my $rlookahead_buffer = $self->{_rlookahead_buffer};
- if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
- $line = $$rlookahead_buffer[$buffer_index];
+ if ( $buffer_index < scalar( @{$rlookahead_buffer} ) ) {
+ $line = $rlookahead_buffer->[$buffer_index];
}
else {
$line = $line_source_object->get_line();
- push( @$rlookahead_buffer, $line );
+ push( @{$rlookahead_buffer}, $line );
}
return $line;
}
my $line_source_object = $self->{_line_source_object};
my $rlookahead_buffer = $self->{_rlookahead_buffer};
- if ( scalar(@$rlookahead_buffer) ) {
- $line = shift @$rlookahead_buffer;
+ if ( scalar( @{$rlookahead_buffer} ) ) {
+ $line = shift @{$rlookahead_buffer};
}
else {
$line = $line_source_object->get_line();
use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
my $debug_warning = sub {
- print "TOKENIZER_DEBUGGING with key $_[0]\n";
+ print STDOUT "TOKENIZER_DEBUGGING with key $_[0]\n";
};
TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
use Carp;
-# PACKAGE VARIABLES for for processing an entire FILE.
+# PACKAGE VARIABLES for processing an entire FILE.
use vars qw{
$tokenizer_self
$square_bracket_depth
@current_depth
+ @total_depth
+ $total_depth
@nesting_sequence_number
@current_sequence_number
@paren_type
@paren_structural_type
@brace_type
@brace_structural_type
- @brace_statement_type
@brace_context
@brace_package
@square_bracket_type
@square_bracket_structural_type
@depth_array
+ @nested_ternary_flag
+ @nested_statement_type
@starting_line_of_current_depth
};
%is_digraph
%is_file_test_operator
%is_trigraph
+ %is_tetragraph
%is_valid_token_type
%is_keyword
%is_code_block_token
# methods to count instances
my $_count = 0;
- sub get_count { $_count; }
- sub _increment_count { ++$_count }
- sub _decrement_count { --$_count }
+ sub get_count { return $_count; }
+ sub _increment_count { return ++$_count }
+ sub _decrement_count { return --$_count }
}
sub DESTROY {
- $_[0]->_decrement_count();
+ my $self = shift;
+ $self->_decrement_count();
+ return;
}
sub new {
logger_object => undef,
starting_level => undef,
indent_columns => 4,
- tabs => 0,
+ tabsize => 8,
look_for_hash_bang => 0,
trim_qw => 1,
look_for_autoloader => 1,
look_for_selfloader => 1,
starting_line_number => 1,
+ extended_syntax => 0,
);
my %args = ( %defaults, @_ );
# _in_attribute_list flag telling if we are looking for attributes
# _in_quote flag telling if we are chasing a quote
# _starting_level indentation level of first line
- # _input_tabstr string denoting one indentation level of input file
- # _know_input_tabstr flag indicating if we know _input_tabstr
# _line_buffer_object object with get_line() method to supply source code
# _diagnostics_object place to write debugging information
# _unexpected_error_count error count used to limit output
# _lower_case_labels_at line numbers where lower case labels seen
+ # _hit_bug program bug detected
$tokenizer_self = {
_rhere_target_list => [],
_in_here_doc => 0,
_line_start_quote => -1,
_starting_level => $args{starting_level},
_know_starting_level => defined( $args{starting_level} ),
- _tabs => $args{tabs},
+ _tabsize => $args{tabsize},
_indent_columns => $args{indent_columns},
_look_for_hash_bang => $args{look_for_hash_bang},
_trim_qw => $args{trim_qw},
- _input_tabstr => "",
- _know_input_tabstr => -1,
+ _continuation_indentation => $args{continuation_indentation},
+ _outdent_labels => $args{outdent_labels},
_last_line_number => $args{starting_line_number} - 1,
_saw_perl_dash_P => 0,
_saw_perl_dash_w => 0,
_saw_use_strict => 0,
_saw_v_string => 0,
+ _hit_bug => 0,
_look_for_autoloader => $args{look_for_autoloader},
_look_for_selfloader => $args{look_for_selfloader},
_saw_autoloader => 0,
_nearly_matched_here_target_at => undef,
_line_text => "",
_rlower_case_labels_at => undef,
+ _extended_syntax => $args{extended_syntax},
};
prepare_for_a_new_file();
# interface to Perl::Tidy::Logger routines
sub warning {
+ my $msg = shift;
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
- $logger_object->warning(@_);
+ $logger_object->warning($msg);
}
+ return;
}
sub complain {
+ my $msg = shift;
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
- $logger_object->complain(@_);
+ $logger_object->complain($msg);
}
+ return;
}
sub write_logfile_entry {
+ my $msg = shift;
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
- $logger_object->write_logfile_entry(@_);
+ $logger_object->write_logfile_entry($msg);
}
+ return;
}
sub interrupt_logfile {
if ($logger_object) {
$logger_object->interrupt_logfile();
}
+ return;
}
sub resume_logfile {
if ($logger_object) {
$logger_object->resume_logfile();
}
+ return;
}
sub increment_brace_error {
if ($logger_object) {
$logger_object->increment_brace_error();
}
+ return;
}
sub report_definite_bug {
+ $tokenizer_self->{_hit_bug} = 1;
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->report_definite_bug();
}
+ return;
}
sub brace_warning {
+ my $msg = shift;
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
- $logger_object->brace_warning(@_);
+ $logger_object->brace_warning($msg);
}
+ return;
}
sub get_saw_brace_error {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
- $logger_object->get_saw_brace_error();
+ return $logger_object->get_saw_brace_error();
}
else {
- 0;
+ return 0;
}
}
# interface to Perl::Tidy::Diagnostics routines
sub write_diagnostics {
+ my $msg = shift;
if ( $tokenizer_self->{_diagnostics_object} ) {
- $tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
+ $tokenizer_self->{_diagnostics_object}->write_diagnostics($msg);
}
+ return;
}
sub report_tokenization_errors {
- my $self = shift;
+ my $self = shift;
+ my $severe_error = $self->{_in_error};
my $level = get_indentation_level();
if ( $level != $tokenizer_self->{_starting_level} ) {
}
if ( $tokenizer_self->{_in_here_doc} ) {
+ $severe_error = 1;
my $here_doc_target = $tokenizer_self->{_here_doc_target};
my $started_looking_for_here_target_at =
$tokenizer_self->{_started_looking_for_here_target_at};
}
if ( $tokenizer_self->{_in_quote} ) {
+ $severe_error = 1;
my $line_start_quote = $tokenizer_self->{_line_start_quote};
my $quote_target = $tokenizer_self->{_quote_target};
my $what =
);
}
+ if ( $tokenizer_self->{_hit_bug} ) {
+ $severe_error = 1;
+ }
+
+ my $logger_object = $tokenizer_self->{_logger_object};
+
+# TODO: eventually may want to activate this to cause file to be output verbatim
+ if (0) {
+
+ # Set the severe error for a fairly high warning count because
+ # some of the warnings do not harm formatting, such as duplicate
+ # sub names.
+ my $warning_count = $logger_object->{_warning_count};
+ if ( $warning_count > 50 ) {
+ $severe_error = 1;
+ }
+
+ # Brace errors are significant, so set the severe error flag at
+ # a low number.
+ my $saw_brace_error = $logger_object->{_saw_brace_error};
+ if ( $saw_brace_error > 2 ) {
+ $severe_error = 1;
+ }
+ }
+
unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
if ( $] < 5.006 ) {
write_logfile_entry("Suggest including '-w parameter'\n");
write_logfile_entry("Suggest including 'use strict;'\n");
}
- # it is suggested that lables have at least one upper case character
+ # it is suggested that labels have at least one upper case character
# for legibility and to avoid code breakage as new keywords are introduced
if ( $tokenizer_self->{_rlower_case_labels_at} ) {
my @lower_case_labels_at =
local $" = ')(';
write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
}
+ return $severe_error;
}
sub report_v_string {
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
);
}
+ return;
}
sub get_input_line_number {
my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
$tokenizer_self->{_line_text} = $input_line;
- return undef unless ($input_line);
+ return unless ($input_line);
my $input_line_number = ++$tokenizer_self->{_last_line_number};
$input_line_separator = $2 . $input_line_separator;
}
- # for backwards compatability we keep the line text terminated with
+ # for backwards compatibility we keep the line text terminated with
# a newline character
$input_line .= "\n";
$tokenizer_self->{_line_text} = $input_line; # update
# _ending_in_quote - this line ends in a multi-line quote
# (so don't trim trailing blanks!)
my $line_of_tokens = {
- _line_type => 'EOF',
- _line_text => $input_line,
- _line_number => $input_line_number,
- _rtoken_type => undef,
- _rtokens => undef,
- _rlevels => undef,
- _rslevels => undef,
- _rblock_type => undef,
- _rcontainer_type => undef,
- _rcontainer_environment => undef,
- _rtype_sequence => undef,
- _rnesting_tokens => undef,
- _rci_levels => undef,
- _rnesting_blocks => undef,
- _python_indentation_level => -1, ## 0,
+ _line_type => 'EOF',
+ _line_text => $input_line,
+ _line_number => $input_line_number,
+ _rtoken_type => undef,
+ _rtokens => undef,
+ _rlevels => undef,
+ _rslevels => undef,
+ _rblock_type => undef,
+ _rcontainer_type => undef,
+ _rcontainer_environment => undef,
+ _rtype_sequence => undef,
+ _rnesting_tokens => undef,
+ _rci_levels => undef,
+ _rnesting_blocks => undef,
+ _guessed_indentation_level => 0,
_starting_in_quote => 0, # to be set by subroutine
_ending_in_quote => 0,
_curly_brace_depth => $brace_depth,
my $here_quote_character = $tokenizer_self->{_here_quote_character};
my $candidate_target = $input_line;
chomp $candidate_target;
+
+ # Handle <<~ targets, which are indicated here by a leading space on
+ # the here quote character
+ if ( $here_quote_character =~ /^\s/ ) {
+ $candidate_target =~ s/^\s*//;
+ }
if ( $candidate_target eq $here_doc_target ) {
$tokenizer_self->{_nearly_matched_here_target_at} = undef;
$line_of_tokens->{_line_type} = 'HERE_END';
write_logfile_entry("Exiting HERE document $here_doc_target\n");
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
- if (@$rhere_target_list) { # there can be multiple here targets
+ if ( @{$rhere_target_list} ) { # there can be multiple here targets
( $here_doc_target, $here_quote_character ) =
- @{ shift @$rhere_target_list };
+ @{ shift @{$rhere_target_list} };
$tokenizer_self->{_here_doc_target} = $here_doc_target;
$tokenizer_self->{_here_quote_character} =
$here_quote_character;
}
# must print line unchanged if we have seen a severe error (i.e., we
- # are seeing illegal tokens and connot continue. Syntax errors do
+ # are seeing illegal tokens and cannot continue. Syntax errors do
# not pass this route). Calling routine can decide what to do, but
# the default can be to just pass all lines as if they were after __END__
elsif ( $tokenizer_self->{_in_error} ) {
$tokenizer_self->{_saw_perl_dash_w} = 1;
}
- if ( ( $input_line_number > 1 )
- && ( !$tokenizer_self->{_look_for_hash_bang} ) )
+ if (
+ ( $input_line_number > 1 )
+
+ # leave any hash bang in a BEGIN block alone
+ # i.e. see 'debugger-duck_type.t'
+ && !(
+ $last_nonblank_block_type
+ && $last_nonblank_block_type eq 'BEGIN'
+ )
+ && ( !$tokenizer_self->{_look_for_hash_bang} )
+ )
{
# this is helpful for VMS systems; we may have accidentally
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
complain("=cut while not in pod ignored\n");
$tokenizer_self->{_in_pod} = 0;
- $line_of_tokens->{_line_type} = 'POD_STOP';
+ $line_of_tokens->{_line_type} = 'POD_END';
}
else {
- $line_of_tokens->{_line_type} = 'POD_END';
+ $line_of_tokens->{_line_type} = 'POD_START';
complain(
"=cut starts a pod section .. this can fool pod utilities.\n"
);
# update indentation levels for log messages
if ( $input_line !~ /^\s*$/ ) {
- my $rlevels = $line_of_tokens->{_rlevels};
- my $structural_indentation_level = $$rlevels[0];
- my ( $python_indentation_level, $msg ) =
- find_indentation_level( $input_line, $structural_indentation_level );
- if ($msg) { write_logfile_entry("$msg") }
- if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
- $line_of_tokens->{_python_indentation_level} =
- $python_indentation_level;
- }
+ my $rlevels = $line_of_tokens->{_rlevels};
+ $line_of_tokens->{_guessed_indentation_level} =
+ guess_old_indentation_level($input_line);
}
# see if this line contains here doc targets
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
- if (@$rhere_target_list) {
+ if ( @{$rhere_target_list} ) {
my ( $here_doc_target, $here_quote_character ) =
- @{ shift @$rhere_target_list };
+ @{ shift @{$rhere_target_list} };
$tokenizer_self->{_in_here_doc} = 1;
$tokenizer_self->{_here_doc_target} = $here_doc_target;
$tokenizer_self->{_here_quote_character} = $here_quote_character;
$line_of_tokens->{_line_type} = 'CODE';
# remember if we have seen any real code
- if ( !$tokenizer_self->{_started_tokenizing}
+ if ( !$tokenizer_self->{_started_tokenizing}
&& $input_line !~ /^\s*$/
&& $input_line !~ /^\s*#/ )
{
}
}
elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
- and !$tokenizer_self->{_in_quote} )
+ && !$tokenizer_self->{_in_quote} )
{
$tokenizer_self->{_line_start_quote} = -1;
write_logfile_entry("End of multi-line quote or pattern\n");
sub find_starting_indentation_level {
+ # We need to find the indentation level of the first line of the
+ # script being formatted. Often it will be zero for an entire file,
+ # but if we are formatting a local block of code (within an editor for
+ # example) it may not be zero. The user may specify this with the
+ # -sil=n parameter but normally doesn't so we have to guess.
+ #
# USES GLOBAL VARIABLES: $tokenizer_self
- my $starting_level = 0;
- my $know_input_tabstr = -1; # flag for find_indentation_level
+ my $starting_level = 0;
# use value if given as parameter
if ( $tokenizer_self->{_know_starting_level} ) {
# otherwise figure it out from the input file
else {
my $line;
- my $i = 0;
- my $structural_indentation_level = -1; # flag for find_indentation_level
+ my $i = 0;
+ # keep looking at lines until we find a hash bang or piece of code
my $msg = "";
while ( $line =
$tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
$starting_level = 0;
last;
}
- next if ( $line =~ /^\s*#/ ); # must not be comment
- next if ( $line =~ /^\s*$/ ); # must not be blank
- ( $starting_level, $msg ) =
- find_indentation_level( $line, $structural_indentation_level );
- if ($msg) { write_logfile_entry("$msg") }
+ next if ( $line =~ /^\s*#/ ); # skip past comments
+ next if ( $line =~ /^\s*$/ ); # skip past blank lines
+ $starting_level = guess_old_indentation_level($line);
last;
}
$msg = "Line $i implies starting-indentation-level = $starting_level\n";
-
- if ( $starting_level > 0 ) {
-
- my $input_tabstr = $tokenizer_self->{_input_tabstr};
- if ( $input_tabstr eq "\t" ) {
- $msg .= "by guessing input tabbing uses 1 tab per level\n";
- }
- else {
- my $cols = length($input_tabstr);
- $msg .=
- "by guessing input tabbing uses $cols blanks per level\n";
- }
- }
write_logfile_entry("$msg");
}
$tokenizer_self->{_starting_level} = $starting_level;
reset_indentation_level($starting_level);
+ return;
}
-# Find indentation level given a input line. At the same time, try to
-# figure out the input tabbing scheme.
-#
-# There are two types of calls:
-#
-# Type 1: $structural_indentation_level < 0
-# In this case we have to guess $input_tabstr to figure out the level.
-#
-# Type 2: $structural_indentation_level >= 0
-# In this case the level of this line is known, and this routine can
-# update the tabbing string, if still unknown, to make the level correct.
-
-sub find_indentation_level {
- my ( $line, $structural_indentation_level ) = @_;
+sub guess_old_indentation_level {
+ my ($line) = @_;
+ # Guess the indentation level of an input line.
+ #
+ # For the first line of code this result will define the starting
+ # indentation level. It will mainly be non-zero when perltidy is applied
+ # within an editor to a local block of code.
+ #
+ # This is an impossible task in general because we can't know what tabs
+ # meant for the old script and how many spaces were used for one
+ # indentation level in the given input script. For example it may have
+ # been previously formatted with -i=7 -et=3. But we can at least try to
+ # make sure that perltidy guesses correctly if it is applied repeatedly to
+ # a block of code within an editor, so that the block stays at the same
+ # level when perltidy is applied repeatedly.
+ #
# USES GLOBAL VARIABLES: $tokenizer_self
my $level = 0;
- my $msg = "";
-
- my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
- my $input_tabstr = $tokenizer_self->{_input_tabstr};
-
- # find leading whitespace
- my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
-
- # make first guess at input tabbing scheme if necessary
- if ( $know_input_tabstr < 0 ) {
-
- $know_input_tabstr = 0;
-
- if ( $tokenizer_self->{_tabs} ) {
- $input_tabstr = "\t";
- if ( length($leading_whitespace) > 0 ) {
- if ( $leading_whitespace !~ /\t/ ) {
-
- my $cols = $tokenizer_self->{_indent_columns};
-
- if ( length($leading_whitespace) < $cols ) {
- $cols = length($leading_whitespace);
- }
- $input_tabstr = " " x $cols;
- }
- }
- }
- else {
- $input_tabstr = " " x $tokenizer_self->{_indent_columns};
-
- if ( length($leading_whitespace) > 0 ) {
- if ( $leading_whitespace =~ /^\t/ ) {
- $input_tabstr = "\t";
- }
- }
- }
- $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
- $tokenizer_self->{_input_tabstr} = $input_tabstr;
- }
-
- # determine the input tabbing scheme if possible
- if ( ( $know_input_tabstr == 0 )
- && ( length($leading_whitespace) > 0 )
- && ( $structural_indentation_level > 0 ) )
- {
- my $saved_input_tabstr = $input_tabstr;
-
- # check for common case of one tab per indentation level
- if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
- if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
- $input_tabstr = "\t";
- $msg = "Guessing old indentation was tab character\n";
- }
- }
-
- else {
- # detab any tabs based on 8 blanks per tab
- my $entabbed = "";
- if ( $leading_whitespace =~ s/^\t+/ /g ) {
- $entabbed = "entabbed";
- }
+ # find leading tabs, spaces, and any statement label
+ my $spaces = 0;
+ if ( $line =~ /^(\t+)?(\s+)?(\w+:[^:])?/ ) {
- # now compute tabbing from number of spaces
- my $columns =
- length($leading_whitespace) / $structural_indentation_level;
- if ( $columns == int $columns ) {
- $msg =
- "Guessing old indentation was $columns $entabbed spaces\n";
- }
- else {
- $columns = int $columns;
- $msg =
-"old indentation is unclear, using $columns $entabbed spaces\n";
- }
- $input_tabstr = " " x $columns;
- }
- $know_input_tabstr = 1;
- $tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
- $tokenizer_self->{_input_tabstr} = $input_tabstr;
+ # If there are leading tabs, we use the tab scheme for this run, if
+ # any, so that the code will remain stable when editing.
+ if ($1) { $spaces += length($1) * $tokenizer_self->{_tabsize} }
- # see if mistakes were made
- if ( ( $tokenizer_self->{_starting_level} > 0 )
- && !$tokenizer_self->{_know_starting_level} )
- {
+ if ($2) { $spaces += length($2) }
- if ( $input_tabstr ne $saved_input_tabstr ) {
- complain(
-"I made a bad starting level guess; rerun with a value for -sil \n"
- );
- }
+ # correct for outdented labels
+ if ( $3 && $tokenizer_self->{'_outdent_labels'} ) {
+ $spaces += $tokenizer_self->{_continuation_indentation};
}
}
- # use current guess at input tabbing to get input indentation level
- #
- # Patch to handle a common case of entabbed leading whitespace
- # If the leading whitespace equals 4 spaces and we also have
- # tabs, detab the input whitespace assuming 8 spaces per tab.
- if ( length($input_tabstr) == 4 ) {
- $leading_whitespace =~ s/^\t+/ /g;
- }
-
- if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
- my $pos = 0;
-
- while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
- {
- $pos += $len_tab;
- $level++;
- }
- }
- return ( $level, $msg );
+ # compute indentation using the value of -i for this run.
+ # If -i=0 is used for this run (which is possible) it doesn't matter
+ # what we do here but we'll guess that the old run used 4 spaces per level.
+ my $indent_columns = $tokenizer_self->{_indent_columns};
+ $indent_columns = 4 if ( !$indent_columns );
+ $level = int( $spaces / $indent_columns );
+ return ($level);
}
# This is a currently unused debug routine
sub dump_functions {
my $fh = *STDOUT;
- my ( $pkg, $sub );
- foreach $pkg ( keys %is_user_function ) {
+ foreach my $pkg ( keys %is_user_function ) {
print $fh "\nnon-constant subs in package $pkg\n";
- foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
+ foreach my $sub ( keys %{ $is_user_function{$pkg} } ) {
my $msg = "";
if ( $is_block_list_function{$pkg}{$sub} ) {
$msg = 'block_list';
}
}
- foreach $pkg ( keys %is_constant ) {
+ foreach my $pkg ( keys %is_constant ) {
print $fh "\nconstants and constant subs in package $pkg\n";
- foreach $sub ( keys %{ $is_constant{$pkg} } ) {
+ foreach my $sub ( keys %{ $is_constant{$pkg} } ) {
print $fh "$sub\n";
}
}
+ return;
}
sub ones_count {
# count number of 1's in a string of 1's and 0's
# example: ones_count("010101010101") gives 6
- return ( my $cis = $_[0] ) =~ tr/1/0/;
+ my $str = shift;
+ return $str =~ tr/1/0/;
}
sub prepare_for_a_new_file {
$square_bracket_depth = 0;
@current_depth[ 0 .. $#closing_brace_names ] =
(0) x scalar @closing_brace_names;
+ $total_depth = 0;
+ @total_depth = ();
@nesting_sequence_number[ 0 .. $#closing_brace_names ] =
( 0 .. $#closing_brace_names );
@current_sequence_number = ();
$paren_structural_type[$brace_depth] = '';
$brace_type[$brace_depth] = ';'; # identify opening brace as code block
$brace_structural_type[$brace_depth] = '';
- $brace_statement_type[$brace_depth] = "";
$brace_context[$brace_depth] = UNKNOWN_CONTEXT;
$brace_package[$paren_depth] = $current_package;
$square_bracket_type[$square_bracket_depth] = '';
$square_bracket_structural_type[$square_bracket_depth] = '';
initialize_tokenizer_state();
+ return;
}
{ # begin tokenize_this_line
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
);
# TV2: refs to ARRAYS for processing one LINE
my $routput_block_type = []; # types of code block
my $routput_container_type = []; # paren types, such as if, elsif, ..
my $routput_type_sequence = []; # nesting sequential number
+ my $routput_indent_flag = []; #
# TV3: SCALARS for quote variables. These are initialized with a
# subroutine call and continually updated as lines are processed.
# TV4: SCALARS for multi-line identifiers and
# statements. These are initialized with a subroutine call
# and continually updated as lines are processed.
- my ( $id_scan_state, $identifier, $want_paren, );
+ my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
# TV5: SCALARS for tracking indentation level.
# Initialized once and continually updated as lines are
$allowed_quote_modifiers = "";
# TV4:
- $id_scan_state = '';
- $identifier = '';
- $want_paren = "";
+ $id_scan_state = '';
+ $identifier = '';
+ $want_paren = "";
+ $indented_if_level = 0;
# TV5:
$nesting_token_string = "";
$last_last_nonblank_container_type = '';
$last_last_nonblank_type_sequence = '';
$last_nonblank_prototype = "";
+ return;
}
sub save_tokenizer_state {
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
];
my $rTV2 = [
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence,
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_indent_flag,
];
my $rTV3 = [
$quoted_string_2, $allowed_quote_modifiers,
];
- my $rTV4 = [ $id_scan_state, $identifier, $want_paren, ];
+ my $rTV4 =
+ [ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
my $rTV5 = [
$nesting_token_string, $nesting_type_string,
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
- $type, $type_sequence,
+ $type, $type_sequence, $indent_flag,
) = @{$rTV1};
(
- $routput_token_list, $routput_token_type,
- $routput_block_type, $routput_container_type,
- $routput_type_sequence,
+ $routput_token_list, $routput_token_type,
+ $routput_block_type, $routput_container_type,
+ $routput_type_sequence, $routput_type_sequence,
) = @{$rTV2};
(
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
) = @{$rTV3};
- ( $id_scan_state, $identifier, $want_paren, ) = @{$rTV4};
+ ( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
+ @{$rTV4};
(
$nesting_token_string, $nesting_type_string,
$last_last_nonblank_type_sequence,
$last_nonblank_prototype,
) = @{$rTV6};
+ return;
}
sub get_indentation_level {
+
+ # patch to avoid reporting error if indented if is not terminated
+ if ($indented_if_level) { return $level_in_tokenizer - 1 }
return $level_in_tokenizer;
}
sub reset_indentation_level {
- $level_in_tokenizer = $_[0];
- $slevel_in_tokenizer = $_[0];
+ $level_in_tokenizer = $slevel_in_tokenizer = shift;
push @{$rslevel_stack}, $slevel_in_tokenizer;
+ return;
}
sub peeked_ahead {
- $peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
+ my $flag = shift;
+ $peeked_ahead = defined($flag) ? $flag : $peeked_ahead;
+ return $peeked_ahead;
}
# ------------------------------------------------------------
my ($replacement_text) = @_;
# quick check
- return undef unless ( $replacement_text =~ /<</ );
+ return unless ( $replacement_text =~ /<</ );
write_logfile_entry("scanning replacement text for here-doc targets\n");
# localize all package variables
local (
- $tokenizer_self, $last_nonblank_token,
- $last_nonblank_type, $last_nonblank_block_type,
- $statement_type, $in_attribute_list,
- $current_package, $context,
- %is_constant, %is_user_function,
- %user_function_prototype, %is_block_function,
- %is_block_list_function, %saw_function_definition,
- $brace_depth, $paren_depth,
- $square_bracket_depth, @current_depth,
- @nesting_sequence_number, @current_sequence_number,
- @paren_type, @paren_semicolon_count,
- @paren_structural_type, @brace_type,
- @brace_structural_type, @brace_statement_type,
- @brace_context, @brace_package,
- @square_bracket_type, @square_bracket_structural_type,
- @depth_array, @starting_line_of_current_depth,
+ $tokenizer_self, $last_nonblank_token,
+ $last_nonblank_type, $last_nonblank_block_type,
+ $statement_type, $in_attribute_list,
+ $current_package, $context,
+ %is_constant, %is_user_function,
+ %user_function_prototype, %is_block_function,
+ %is_block_list_function, %saw_function_definition,
+ $brace_depth, $paren_depth,
+ $square_bracket_depth, @current_depth,
+ @total_depth, $total_depth,
+ @nesting_sequence_number, @current_sequence_number,
+ @paren_type, @paren_semicolon_count,
+ @paren_structural_type, @brace_type,
+ @brace_structural_type, @brace_context,
+ @brace_package, @square_bracket_type,
+ @square_bracket_structural_type, @depth_array,
+ @starting_line_of_current_depth, @nested_ternary_flag,
+ @nested_statement_type,
);
# save all lexical variables
}
# now its safe to report errors
- $tokenizer->report_tokenization_errors();
+ my $severe_error = $tokenizer->report_tokenization_errors();
+
+ # TODO: Could propagate a severe error up
# restore all tokenizer lexical variables
restore_tokenizer_state($rstate);
( $i, $tok, $type, $prototype ) =
scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
$rtoken_map, $max_token_index );
+ return;
}
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
- $max_token_index );
+ $max_token_index, $expecting, $paren_type[$paren_depth] );
+ return;
}
sub scan_id {
( $i, $tok, $type, $id_scan_state ) =
scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
$id_scan_state, $max_token_index );
+ return;
}
sub scan_number {
sub error_if_expecting_TERM {
if ( $expecting == TERM ) {
if ( $really_want_term{$last_nonblank_type} ) {
- unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
- $rtoken_type, $input_line );
- 1;
+ report_unexpected( $tok, "term", $i_tok, $last_nonblank_i,
+ $rtoken_map, $rtoken_type, $input_line );
+ return 1;
}
}
+ return;
}
# a sub to warn if token found where operator expected
sub error_if_expecting_OPERATOR {
+ my $thing = shift;
if ( $expecting == OPERATOR ) {
- my $thing = defined $_[0] ? $_[0] : $tok;
- unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
+ if ( !defined($thing) ) { $thing = $tok }
+ report_unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
$rtoken_map, $rtoken_type, $input_line );
if ( $i_tok == 0 ) {
interrupt_logfile();
warning("Missing ';' above?\n");
resume_logfile();
}
- 1;
+ return 1;
}
+ return;
}
# ------------------------------------------------------------
# keyword ( .... ) { BLOCK }
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
my %is_blocktype_with_paren;
- @_ = qw(if elsif unless while until for foreach switch case given when);
+ @_ =
+ qw(if elsif unless while until for foreach switch case given when catch);
@is_blocktype_with_paren{@_} = (1) x scalar(@_);
# ------------------------------------------------------------
$tokenizer_self->{_saw_perl_dash_w} = 1;
}
- # Check for indentifier in indirect object slot
+ # Check for identifier in indirect object slot
# (vorboard.pl, sort.t). Something like:
# /^(print|printf|sort|exec|system)$/
if (
$container_type = $want_paren;
$want_paren = "";
}
+ elsif ( $statement_type =~ /^sub\b/ ) {
+ $container_type = $statement_type;
+ }
else {
$container_type = $last_nonblank_token;
} ## end if ( $expecting == OPERATOR...
}
$paren_type[$paren_depth] = $container_type;
- $type_sequence =
- increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
# propagate types down through nested parens
# for example: the second paren in 'if ((' would be structural
},
')' => sub {
- $type_sequence =
- decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( PAREN, $rtoken_map->[$i_tok] );
if ( $paren_structural_type[$paren_depth] eq '{' ) {
$type = '}';
$container_type = $paren_type[$paren_depth];
+ # restore statement type as 'sub' at closing paren of a signature
+ # so that a subsequent ':' is identified as an attribute
+ if ( $container_type =~ /^sub\b/ ) {
+ $statement_type = $container_type;
+ }
+
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
my $num_sc = $paren_semicolon_count[$paren_depth];
';' => sub {
$context = UNKNOWN_CONTEXT;
$statement_type = '';
+ $want_paren = "";
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } )
'/' => sub {
my $is_pattern;
- if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
+ if ( $expecting == UNKNOWN ) { # indeterminate, must guess..
my $msg;
( $is_pattern, $msg ) =
guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]';
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else { # not a pattern; check for a /= token
- if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
+ if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
$i++;
$tok = '/=';
$type = $tok;
# check for syntax error here;
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
- my $list = join( ' ', sort keys %is_blocktype_with_paren );
- warning(
- "syntax error at ') {', didn't see one of: $list\n");
+ if ( $tokenizer_self->{'_extended_syntax'} ) {
+
+ # we append a trailing () to mark this as an unknown
+ # block type. This allows perltidy to format some
+ # common extensions of perl syntax.
+ # This is used by sub code_block_type
+ $last_nonblank_token .= '()';
+ }
+ else {
+ my $list =
+ join( ' ', sort keys %is_blocktype_with_paren );
+ warning(
+"syntax error at ') {', didn't see one of: <<$list>>; If this code is okay try using the -xs flag\n"
+ );
+ }
}
}
# allow paren-less identifier after 'when'
# if the brace is preceded by a space
if ( $statement_type eq 'when'
- && $last_nonblank_type eq 'i'
+ && $last_nonblank_type eq 'i'
&& $last_last_nonblank_type eq 'k'
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
{
}
}
}
- $brace_type[ ++$brace_depth ] = $block_type;
- $brace_package[$brace_depth] = $current_package;
- $type_sequence =
- increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
+
+ $brace_type[ ++$brace_depth ] = $block_type;
+ $brace_package[$brace_depth] = $current_package;
$brace_structural_type[$brace_depth] = $type;
$brace_context[$brace_depth] = $context;
- $brace_statement_type[$brace_depth] = $statement_type;
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
},
'}' => sub {
$block_type = $brace_type[$brace_depth];
# can happen on brace error (caught elsewhere)
else {
}
- $type_sequence =
- decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( BRACE, $rtoken_map->[$i_tok] );
if ( $brace_structural_type[$brace_depth] eq 'L' ) {
$type = 'R';
}
- # propagate type information for 'do' and 'eval' blocks.
- # This is necessary to enable us to know if an operator
- # or term is expected next
- if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
- $tok = $brace_type[$brace_depth];
+ # propagate type information for 'do' and 'eval' blocks, and also
+ # for smartmatch operator. This is necessary to enable us to know
+ # if an operator or term is expected next.
+ if ( $is_block_operator{$block_type} ) {
+ $tok = $block_type;
}
- $context = $brace_context[$brace_depth];
- $statement_type = $brace_statement_type[$brace_depth];
+ $context = $brace_context[$brace_depth];
if ( $brace_depth > 0 ) { $brace_depth--; }
},
'&' => sub { # maybe sub call? start looking
# got mistaken as a q operator in an early version:
# print BODY &q(<<'EOT');
if ( $expecting != OPERATOR ) {
- scan_identifier();
+
+ # But only look for a sub call if we are expecting a term or
+ # if there is no existing space after the &.
+ # For example we probably don't want & as sub call here:
+ # Fcntl::S_IRUSR & $mode;
+ if ( $expecting == TERM || $next_type ne 'b' ) {
+ scan_identifier();
+ }
}
else {
}
find_angle_operator_termination( $input_line, $i, $rtoken_map,
$expecting, $max_token_index );
+ if ( $type eq '<' && $expecting == TERM ) {
+ error_if_expecting_TERM();
+ interrupt_logfile();
+ warning("Unterminated <> operator?\n");
+ resume_logfile();
+ }
}
else {
}
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
- $allowed_quote_modifiers = '[cgimosx]'; # TBD:check this
+ $allowed_quote_modifiers = '[msixpodualngc]';
}
else {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
increase_nesting_depth( QUESTION_COLON,
- $$rtoken_map[$i_tok] );
+ $rtoken_map->[$i_tok] );
}
},
'*' => sub { # typeglob, or multiply?
}
else {
- if ( $$rtokens[ $i + 1 ] eq '=' ) {
+ if ( $rtokens->[ $i + 1 ] eq '=' ) {
$tok = '*=';
$type = $tok;
$i++;
}
- elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
+ elsif ( $rtokens->[ $i + 1 ] eq '*' ) {
$tok = '**';
$type = $tok;
$i++;
- if ( $$rtokens[ $i + 1 ] eq '=' ) {
+ if ( $rtokens->[ $i + 1 ] eq '=' ) {
$tok = '**=';
$type = $tok;
$i++;
# ATTRS: check for a ':' which introduces an attribute list
# (this might eventually get its own token type)
- elsif ( $statement_type =~ /^sub/ ) {
+ elsif ( $statement_type =~ /^sub\b/ ) {
$type = 'A';
$in_attribute_list = 1;
}
# otherwise, it should be part of a ?/: operator
else {
- $type_sequence =
+ ( $type_sequence, $indent_flag ) =
decrease_nesting_depth( QUESTION_COLON,
- $$rtoken_map[$i_tok] );
+ $rtoken_map->[$i_tok] );
if ( $last_nonblank_token eq '?' ) {
warning("Syntax error near ? :\n");
}
'[' => sub {
$square_bracket_type[ ++$square_bracket_depth ] =
$last_nonblank_token;
- $type_sequence =
- increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
+ ( $type_sequence, $indent_flag ) =
+ increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
# It may seem odd, but structural square brackets have
# type '{' and '}'. This simplifies the indentation logic.
$square_bracket_structural_type[$square_bracket_depth] = $type;
},
']' => sub {
- $type_sequence =
- decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
+ ( $type_sequence, $indent_flag ) =
+ decrease_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
{
$type = '}';
}
+
+ # propagate type information for smartmatch operator. This is
+ # necessary to enable us to know if an operator or term is expected
+ # next.
+ if ( $square_bracket_type[$square_bracket_depth] eq '~~' ) {
+ $tok = $square_bracket_type[$square_bracket_depth];
+ }
+
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
},
'-' => sub { # what kind of minus?
if ( ( $expecting != OPERATOR )
&& $is_file_test_operator{$next_tok} )
{
- $i++;
- $tok .= $next_tok;
- $type = 'F';
+ my ( $next_nonblank_token, $i_next ) =
+ find_next_nonblank_token( $i + 1, $rtokens,
+ $max_token_index );
+
+ # check for a quoted word like "-w=>xx";
+ # it is sufficient to just check for a following '='
+ if ( $next_nonblank_token eq '=' ) {
+ $type = 'm';
+ }
+ else {
+ $i++;
+ $tok .= $next_tok;
+ $type = 'F';
+ }
}
elsif ( $expecting == TERM ) {
my $number = scan_number();
}
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
complain(
- "Unconventional here-target: '$here_doc_target'\n"
- );
+ "Unconventional here-target: '$here_doc_target'\n");
+ }
+ }
+ elsif ( $expecting == TERM ) {
+ unless ($saw_error) {
+
+ # shouldn't happen..
+ warning("Program bug; didn't find here doc target\n");
+ report_definite_bug();
+ }
+ }
+ }
+ else {
+ }
+ },
+ '<<~' => sub { # a here-doc, new type added in v26
+ return
+ unless ( $i < $max_token_index )
+ ; # here-doc not possible if end of line
+ if ( $expecting != OPERATOR ) {
+ my ( $found_target, $here_doc_target, $here_quote_character,
+ $saw_error );
+ (
+ $found_target, $here_doc_target, $here_quote_character, $i,
+ $saw_error
+ )
+ = find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
+ $max_token_index );
+
+ if ($found_target) {
+
+ if ( length($here_doc_target) > 80 ) {
+ my $truncated = substr( $here_doc_target, 0, 80 );
+ complain("Long here-target: '$truncated' ...\n");
+ }
+ elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
+ complain(
+ "Unconventional here-target: '$here_doc_target'\n");
}
+
+ # Note that we put a leading space on the here quote
+ # character indicate that it may be preceded by spaces
+ $here_quote_character = " " . $here_quote_character;
+ push @{$rhere_target_list},
+ [ $here_doc_target, $here_quote_character ];
+ $type = 'h';
}
elsif ( $expecting == TERM ) {
unless ($saw_error) {
# These block types terminate statements and do not need a trailing
# semicolon
- # patched for SWITCH/CASE:
+ # patched for SWITCH/CASE/
my %is_zero_continuation_block_type;
- @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY continue ;
+ @_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
if elsif else unless while until for foreach switch case given when);
@is_zero_continuation_block_type{@_} = (1) x scalar(@_);
'__DATA__' => '_in_data',
);
- # ref: camel 3 p 147,
+ # original ref: camel 3 p 147,
# but perl may accept undocumented flags
+ # perl 5.10 adds 'p' (preserve)
+ # Perl version 5.22 added 'n'
+ # From http://perldoc.perl.org/perlop.html we have
+ # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
+ # s/PATTERN/REPLACEMENT/msixpodualngcer
+ # y/SEARCHLIST/REPLACEMENTLIST/cdsr
+ # tr/SEARCHLIST/REPLACEMENTLIST/cdsr
+ # qr/STRING/msixpodualn
my %quote_modifiers = (
- 's' => '[cegimosx]',
- 'y' => '[cds]',
- 'tr' => '[cds]',
- 'm' => '[cgimosx]',
- 'qr' => '[imosx]',
+ 's' => '[msixpodualngcer]',
+ 'y' => '[cdsr]',
+ 'tr' => '[cdsr]',
+ 'm' => '[msixpodualngc]',
+ 'qr' => '[msixpodualn]',
'q' => "",
'qq' => "",
'qw' => "",
# For example, I used 'v' for v-strings.
#
# *. Implement coding to recognize the $type of the token in this routine.
- # This is the hardest part, and is best done by immitating or modifying
+ # This is the hardest part, and is best done by imitating or modifying
# some of the existing coding. For example, to recognize v-strings, I
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
# anything. I may tune it up someday if I don't like the way line
# breaks with v-strings look.
#
- # *. Implement a 'whitespace' rule in sub set_white_space_flag in
+ # *. Implement a 'whitespace' rule in sub set_whitespace_flags in
# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
# and saw that type 'n' used spaces on both sides, so I just added 'v'
# to the array @spaces_both_sides.
if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
# must not be in multi-line quote
- # and must not be in an eqn
- if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
+ # and must not be in an equation
+ if ( !$in_quote && ( operator_expected( 'b', '=', 'b' ) == TERM ) )
{
$tokenizer_self->{_in_pod} = 1;
return;
$input_line =~ s/^\s*//; # trim left end
}
+ # Set a flag to indicate if we might be at an __END__ or __DATA__ line
+ # This will be used below to avoid quoting a bare word followed by
+ # a fat comma.
+ my $is_END_or_DATA = $input_line =~ /^\s*__(END|DATA)__\s*$/;
+
# update the copy of the line for use in error messages
# This must be exactly what we give the pre_tokenizer
$tokenizer_self->{_line_text} = $input_line;
$block_type = $last_nonblank_block_type;
$container_type = $last_nonblank_container_type;
$type_sequence = $last_nonblank_type_sequence;
+ $indent_flag = 0;
$peeked_ahead = 0;
# tokenization is done in two stages..
( $rtokens, $rtoken_map, $rtoken_type ) =
pre_tokenize( $input_line, $max_tokens_wanted );
- $max_token_index = scalar(@$rtokens) - 1;
- push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
- push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
- push( @$rtoken_type, 'b', 'b', 'b' );
+ $max_token_index = scalar( @{$rtokens} ) - 1;
+ push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic
+ push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
+ push( @{$rtoken_type}, 'b', 'b', 'b' );
# initialize for main loop
- for $i ( 0 .. $max_token_index + 3 ) {
- $routput_token_type->[$i] = "";
- $routput_block_type->[$i] = "";
- $routput_container_type->[$i] = "";
- $routput_type_sequence->[$i] = "";
+ foreach my $ii ( 0 .. $max_token_index + 3 ) {
+ $routput_token_type->[$ii] = "";
+ $routput_block_type->[$ii] = "";
+ $routput_container_type->[$ii] = "";
+ $routput_type_sequence->[$ii] = "";
+ $routput_indent_flag->[$ii] = 0;
}
$i = -1;
$i_tok = -1;
if ($allowed_quote_modifiers) {
# check for exact quote modifiers
- if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
- my $str = $$rtokens[$i];
+ if ( $rtokens->[$i] =~ /^[A-Za-z_]/ ) {
+ my $str = $rtokens->[$i];
my $saw_modifier_e;
while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
my $pos = pos($str);
Please put a space between quote modifiers and trailing keywords.
EOM
- # print "token $$rtokens[$i]\n";
- # my $num = length($str) - pos($str);
- # $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
- # print "continuing with new token $$rtokens[$i]\n";
+ # print "token $rtokens->[$i]\n";
+ # my $num = length($str) - pos($str);
+ # $rtokens->[$i]=substr($rtokens->[$i],pos($str),$num);
+ # print "continuing with new token $rtokens->[$i]\n";
# skipping past this token does least damage
last if ( ++$i > $max_token_index );
}
}
- unless ( $tok =~ /^\s*$/ ) {
+ unless ( $tok =~ /^\s*$/ || $tok eq 'CORE::' ) {
# try to catch some common errors
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
- my $pre_tok = $$rtokens[$i]; # get the next pre-token
- my $pre_type = $$rtoken_type[$i]; # and type
+ my $pre_tok = $rtokens->[$i]; # get the next pre-token
+ my $pre_type = $rtoken_type->[$i]; # and type
$tok = $pre_tok;
- $type = $pre_type; # to be modified as necessary
+ $type = $pre_type; # to be modified as necessary
$block_type = ""; # blank for all tokens except code block braces
$container_type = ""; # blank for all tokens except some parens
$type_sequence = ""; # blank for all tokens except ?/:
+ $indent_flag = 0;
$prototype = ""; # blank for all tokens except user defined subs
$i_tok = $i;
# handle whitespace tokens..
next if ( $type eq 'b' );
- my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
- my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
+ my $prev_tok = $i > 0 ? $rtokens->[ $i - 1 ] : ' ';
+ my $prev_type = $i > 0 ? $rtoken_type->[ $i - 1 ] : 'b';
# Build larger tokens where possible, since we are not in a quote.
#
# I have allowed tokens starting with <, such as <=,
# because I don't think these could be valid angle operators.
# test file: storrs4.pl
- my $test_tok = $tok . $$rtokens[ $i + 1 ];
+ my $test_tok = $tok . $rtokens->[ $i + 1 ];
my $combine_ok = $is_digraph{$test_tok};
# check for special cases which cannot be combined
# '//' must be defined_or operator if an operator is expected.
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
# could be migrated here for clarity
- if ( $test_tok eq '//' ) {
- my $next_type = $$rtokens[ $i + 1 ];
+
+ # Patch for RT#102371, misparsing a // in the following snippet:
+ # state $b //= ccc();
+ # The solution is to always accept the digraph (or trigraph) after
+ # token type 'Z' (possible file handle). The reason is that
+ # sub operator_expected gives TERM expected here, which is
+ # wrong in this case.
+ if ( $test_tok eq '//' && $last_nonblank_type ne 'Z' ) {
+ my $next_type = $rtokens->[ $i + 1 ];
my $expecting =
operator_expected( $prev_type, $tok, $next_type );
- $combine_ok = 0 unless ( $expecting == OPERATOR );
+
+ # Patched for RT#101547, was 'unless ($expecting==OPERATOR)'
+ $combine_ok = 0 if ( $expecting == TERM );
+ }
+
+ # Patch for RT #114359: Missparsing of "print $x ** 0.5;
+ # Accept the digraphs '**' only after type 'Z'
+ # Otherwise postpone the decision.
+ if ( $test_tok eq '**' ) {
+ if ( $last_nonblank_type ne 'Z' ) { $combine_ok = 0 }
}
}
if (
$combine_ok
+
&& ( $test_tok ne '/=' ) # might be pattern
&& ( $test_tok ne 'x=' ) # might be $x
- && ( $test_tok ne '**' ) # typeglob?
&& ( $test_tok ne '*=' ) # typeglob?
+
+ # Moved above as part of fix for
+ # RT #114359: Missparsing of "print $x ** 0.5;
+ # && ( $test_tok ne '**' ) # typeglob?
)
{
$tok = $test_tok;
# Now try to assemble trigraphs. Note that all possible
# perl trigraphs can be constructed by appending a character
# to a digraph.
- $test_tok = $tok . $$rtokens[ $i + 1 ];
+ $test_tok = $tok . $rtokens->[ $i + 1 ];
if ( $is_trigraph{$test_tok} ) {
$tok = $test_tok;
$i++;
}
+
+ # The only current tetragraph is the double diamond operator
+ # and its first three characters are not a trigraph, so
+ # we do can do a special test for it
+ elsif ( $test_tok eq '<<>' ) {
+ $test_tok .= $rtokens->[ $i + 2 ];
+ if ( $is_tetragraph{$test_tok} ) {
+ $tok = $test_tok;
+ $i += 2;
+ }
+ }
}
$type = $tok;
- $next_tok = $$rtokens[ $i + 1 ];
- $next_type = $$rtoken_type[ $i + 1 ];
+ $next_tok = $rtokens->[ $i + 1 ];
+ $next_type = $rtoken_type->[ $i + 1 ];
TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
local $" = ')(';
$brace_type[$brace_depth], $paren_depth,
$paren_type[$paren_depth]
);
- print "TOKENIZE:(@debug_list)\n";
+ print STDOUT "TOKENIZE:(@debug_list)\n";
};
# turn off attribute list on first non-blank, non-bareword
}
# quote a word followed by => operator
- if ( $next_nonblank_token eq '=' ) {
+ # unless the word __END__ or __DATA__ and the only word on
+ # the line.
+ if ( !$is_END_or_DATA && $next_nonblank_token eq '=' ) {
- if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
+ if ( $rtokens->[ $i_next + 1 ] eq '>' ) {
if ( $is_constant{$current_package}{$tok} ) {
$type = 'C';
}
}
}
- # quote a bare word within braces..like xxx->{s}; note that we
- # must be sure this is not a structural brace, to avoid
- # mistaking {s} in the following for a quoted bare word:
- # for(@[){s}bla}BLA}
- if ( ( $last_nonblank_type eq 'L' )
- && ( $next_nonblank_token eq '}' ) )
+ # quote a bare word within braces..like xxx->{s}; note that we
+ # must be sure this is not a structural brace, to avoid
+ # mistaking {s} in the following for a quoted bare word:
+ # for(@[){s}bla}BLA}
+ # Also treat q in something like var{-q} as a bare word, not qoute operator
+ if (
+ $next_nonblank_token eq '}'
+ && (
+ $last_nonblank_type eq 'L'
+ || ( $last_nonblank_type eq 'm'
+ && $last_last_nonblank_type eq 'L' )
+ )
+ )
{
$type = 'w';
next;
# a bare word immediately followed by :: is not a keyword;
# use $tok_kw when testing for keywords to avoid a mistake
my $tok_kw = $tok;
- if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
+ if ( $rtokens->[ $i + 1 ] eq ':'
+ && $rtokens->[ $i + 2 ] eq ':' )
{
$tok_kw .= '::';
}
if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
if ( $tok eq 'x' ) {
- if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
+ if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
$tok = 'x=';
$type = $tok;
$i++;
$type = 'n';
}
}
-
+ elsif ( $tok_kw eq 'CORE::' ) {
+ $type = $tok = $tok_kw;
+ $i += 2;
+ }
elsif ( ( $tok eq 'strict' )
and ( $last_nonblank_token eq 'use' ) )
{
if ($next_nonblank_token) {
if ( $is_keyword{$next_nonblank_token} ) {
- warning(
+
+ # Assume qw is used as a quote and okay, as in:
+ # use constant qw{ DEBUG 0 };
+ # Not worth trying to parse for just a warning
+
+ # NOTE: This warning is deactivated because recent
+ # versions of perl do not complain here, but
+ # the coding is retained for reference.
+ if ( 0 && $next_nonblank_token ne 'qw' ) {
+ warning(
"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
- );
+ );
+ }
}
# FIXME: could check for error in which next token is
# not a word (number, punctuation, ..)
else {
- $is_constant{$current_package}
- {$next_nonblank_token} = 1;
+ $is_constant{$current_package}{$next_nonblank_token}
+ = 1;
}
}
}
# various quote operators
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
+##NICOL PATCH
if ( $expecting == OPERATOR ) {
- # patch for paren-less for/foreach glitch, part 1
- # perl will accept this construct as valid:
+ # Be careful not to call an error for a qw quote
+ # where a parenthesized list is allowed. For example,
+ # it could also be a for/foreach construct such as
#
# foreach my $key qw\Uno Due Tres Quadro\ {
# print "Set $key\n";
# }
- unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
+ #
+
+ # Or it could be a function call.
+ # NOTE: Braces in something like &{ xxx } are not
+ # marked as a block, we might have a method call.
+ # &method(...), $method->(..), &{method}(...),
+ # $ref[2](list) is ok & short for $ref[2]->(list)
+ #
+ # See notes in 'sub code_block_type' and
+ # 'sub is_non_structural_brace'
+
+ unless (
+ $tok eq 'qw'
+ && ( $last_nonblank_token =~ /^([\]\}\&]|\-\>)/
+ || $is_for_foreach{$want_paren} )
+ )
{
error_if_expecting_OPERATOR();
}
# check for a statement label
elsif (
( $next_nonblank_token eq ':' )
- && ( $$rtokens[ $i_next + 1 ] ne ':' )
- && ( $i_next <= $max_token_index ) # colon on same line
+ && ( $rtokens->[ $i_next + 1 ] ne ':' )
+ && ( $i_next <= $max_token_index ) # colon on same line
&& label_ok()
)
{
- if ( $tok !~ /A-Z/ ) {
+ if ( $tok !~ /[A-Z]/ ) {
push @{ $tokenizer_self->{_rlower_case_labels_at} },
$input_line_number;
}
elsif ( $tok eq 'else' ) {
# patched for SWITCH/CASE
- if ( $last_nonblank_token ne ';'
+ if (
+ $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
- /^(if|elsif|unless|case|when)$/ )
+ /^(if|elsif|unless|case|when)$/
+
+ # patch to avoid an unwanted error message for
+ # the case of a parenless 'case' (RT 105484):
+ # switch ( 1 ) { case x { 2 } else { } }
+ && $statement_type !~
+ /^(if|elsif|unless|case|when)$/
+ )
{
warning(
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
# note: ';' '{' and '}' in list above
# because continues can follow bare blocks;
# ':' is labeled block
- warning("'$tok' should follow a block\n");
+ #
+ ############################################
+ # NOTE: This check has been deactivated because
+ # continue has an alternative usage for given/when
+ # blocks in perl 5.10
+ ## warning("'$tok' should follow a block\n");
+ ############################################
}
}
elsif ( $tok eq 'when' || $tok eq 'case' ) {
$statement_type = $tok; # next '{' is block
}
+
+ #
+ # indent trailing if/unless/while/until
+ # outdenting will be handled by later indentation loop
+## DEACTIVATED: unfortunately this can cause some unwanted indentation like:
+##$opt_o = 1
+## if !(
+## $opt_b
+## || $opt_c
+## || $opt_d
+## || $opt_f
+## || $opt_i
+## || $opt_l
+## || $opt_o
+## || $opt_x
+## );
+## if ( $tok =~ /^(if|unless|while|until)$/
+## && $next_nonblank_token ne '(' )
+## {
+## $indent_flag = 1;
+## }
}
# check for inline label following
# mark bare words immediately followed by a paren as
# functions
- $next_tok = $$rtokens[ $i + 1 ];
+ $next_tok = $rtokens->[ $i + 1 ];
if ( $next_tok eq '(' ) {
$type = 'U';
}
# not treated as keywords:
if (
(
- $tok eq 'case'
+ $tok eq 'case'
&& $brace_type[$brace_depth] eq 'switch'
)
|| ( $tok eq 'when'
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
+ $routput_indent_flag->[$i_tok] = $indent_flag;
}
unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
# running value of this variable is $level_in_tokenizer.
#
# The total continuation is much more difficult to compute, and requires
-# several variables. These veriables are:
+# several variables. These variables are:
#
# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
# each indentation level, if there are intervening open secondary
# indentation level, if the level is of type BLOCK or not.
# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
# $nesting_list_string = a string of 1's and 0's indicating, for each
-# indentation level, if it is is appropriate for list formatting.
+# indentation level, if it is appropriate for list formatting.
# If so, continuation indentation is used to indent long list items.
# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
# @{$rslevel_stack} = a stack of total nesting depths at each
$nesting_list_string_i, $nesting_token_string_i,
$nesting_type_string_i, );
- foreach $i ( @{$routput_token_list} )
+ foreach my $i ( @{$routput_token_list} )
{ # scan the list of pre-tokens indexes
# self-checking for valid token types
- my $type = $routput_token_type->[$i];
- my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
+ my $type = $routput_token_type->[$i];
+ my $forced_indentation_flag = $routput_indent_flag->[$i];
+
+ # See if we should undo the $forced_indentation_flag.
+ # Forced indentation after 'if', 'unless', 'while' and 'until'
+ # expressions without trailing parens is optional and doesn't
+ # always look good. It is usually okay for a trailing logical
+ # expression, but if the expression is a function call, code block,
+ # or some kind of list it puts in an unwanted extra indentation
+ # level which is hard to remove.
+ #
+ # Example where extra indentation looks ok:
+ # return 1
+ # if $det_a < 0 and $det_b > 0
+ # or $det_a > 0 and $det_b < 0;
+ #
+ # Example where extra indentation is not needed because
+ # the eval brace also provides indentation:
+ # print "not " if defined eval {
+ # reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
+ # };
+ #
+ # The following rule works fairly well:
+ # Undo the flag if the end of this line, or start of the next
+ # line, is an opening container token or a comma.
+ # This almost always works, but if not after another pass it will
+ # be stable.
+ if ( $forced_indentation_flag && $type eq 'k' ) {
+ my $ixlast = -1;
+ my $ilast = $routput_token_list->[$ixlast];
+ my $toklast = $routput_token_type->[$ilast];
+ if ( $toklast eq '#' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast eq 'b' ) {
+ $ixlast--;
+ $ilast = $routput_token_list->[$ixlast];
+ $toklast = $routput_token_type->[$ilast];
+ }
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ else {
+ ( $toklast, my $i_next ) =
+ find_next_nonblank_token( $max_token_index, $rtokens,
+ $max_token_index );
+ if ( $toklast =~ /^[\{,]$/ ) {
+ $forced_indentation_flag = 0;
+ }
+ }
+ }
+
+ # if we are already in an indented if, see if we should outdent
+ if ($indented_if_level) {
+
+ # don't try to nest trailing if's - shouldn't happen
+ if ( $type eq 'k' ) {
+ $forced_indentation_flag = 0;
+ }
+
+ # check for the normal case - outdenting at next ';'
+ elsif ( $type eq ';' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $forced_indentation_flag = -1;
+ $indented_if_level = 0;
+ }
+ }
+
+ # handle case of missing semicolon
+ elsif ( $type eq '}' ) {
+ if ( $level_in_tokenizer == $indented_if_level ) {
+ $indented_if_level = 0;
+
+ # TBD: This could be a subroutine call
+ $level_in_tokenizer--;
+ if ( @{$rslevel_stack} > 1 ) {
+ pop( @{$rslevel_stack} );
+ }
+ if ( length($nesting_block_string) > 1 )
+ { # true for valid script
+ chop $nesting_block_string;
+ chop $nesting_list_string;
+ }
+
+ }
+ }
+ }
+
+ my $tok = $rtokens->[$i]; # the token, but ONLY if same as pretoken
$level_i = $level_in_tokenizer;
# This can happen by running perltidy on non-scripts
# Note: these are set so that the leading braces have a HIGHER
# level than their CONTENTS, which is convenient for indentation
# Also, define continuation indentation for each token.
- if ( $type eq '{' || $type eq 'L' ) {
+ if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
+ {
# use environment before updating
$container_environment =
push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
$level_in_tokenizer++;
- if ( $routput_block_type->[$i] ) {
- $nesting_block_flag = 1;
- $nesting_block_string .= '1';
+ if ($forced_indentation_flag) {
+
+ # break BEFORE '?' when there is forced indentation
+ if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
+ if ( $type eq 'k' ) {
+ $indented_if_level = $level_in_tokenizer;
+ }
+
+ # do not change container environment here if we are not
+ # at a real list. Adding this check prevents "blinkers"
+ # often near 'unless" clauses, such as in the following
+ # code:
+## next
+## unless -e (
+## $archive =
+## File::Spec->catdir( $_, "auto", $root, "$sub$lib_ext" )
+## );
+
+ $nesting_block_string .= "$nesting_block_flag";
}
else {
- $nesting_block_flag = 0;
- $nesting_block_string .= '0';
+
+ if ( $routput_block_type->[$i] ) {
+ $nesting_block_flag = 1;
+ $nesting_block_string .= '1';
+ }
+ else {
+ $nesting_block_flag = 0;
+ $nesting_block_string .= '0';
+ }
}
# we will use continuation indentation within containers
if (
!$routput_block_type->[$i] # patch: skip for BLOCK
&& ($in_statement_continuation)
+ && !( $forced_indentation_flag && $type eq ':' )
)
{
$total_ci += $in_statement_continuation
$in_statement_continuation = 0;
}
- elsif ( $type eq '}' || $type eq 'R' ) {
+ elsif ($type eq '}'
+ || $type eq 'R'
+ || $forced_indentation_flag < 0 )
+ {
# only a nesting error in the script would prevent popping here
if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
# ...and include all block types except user subs with
# block prototypes and these: (sort|grep|map|do|eval)
-# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
+# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
elsif (
$is_zero_continuation_block_type{
- $routput_block_type->[$i] } )
+ $routput_block_type->[$i]
+ } )
{
$in_statement_continuation = 0;
}
# /^(sort|grep|map|do|eval)$/ )
elsif (
$is_not_zero_continuation_block_type{
- $routput_block_type->[$i] } )
+ $routput_block_type->[$i]
+ } )
{
}
}
# If we are in a list, then
- # we must set continuatoin indentation at the closing
+ # we must set continuation indentation at the closing
# paren of something like this (paren after $check):
# assert(
# __LINE__,
$in_statement_continuation = 1
if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
}
+
+ elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
}
# use environment after updating
$in_statement_continuation = 0;
}
- # otherwise, the next token after a ',' starts a new term
- elsif ( $type eq ',' ) {
+ # otherwise, the token after a ',' starts a new term
+
+ # Patch FOR RT#99961; no continuation after a ';'
+ # This is needed because perltidy currently marks
+ # a block preceded by a type character like % or @
+ # as a non block, to simplify formatting. But these
+ # are actually blocks and can have semicolons.
+ # See code_block_type() and is_non_structural_brace().
+ elsif ( $type eq ',' || $type eq ';' ) {
$in_statement_continuation = 0;
}
}
}
- # set secondary nesting levels based on all continment token types
+ # set secondary nesting levels based on all containment token types
# Note: these are set so that the nesting depth is the depth
# of the PREVIOUS TOKEN, which is convenient for setting
- # the stength of token bonds
+ # the strength of token bonds
my $slevel_i = $slevel_in_tokenizer;
# /^[L\{\(\[]$/
# now form the previous token
if ( $im >= 0 ) {
$num =
- $$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
+ $rtoken_map->[$i] - $rtoken_map->[$im]; # how many characters
if ( $num > 0 ) {
push( @tokens,
- substr( $input_line, $$rtoken_map[$im], $num ) );
+ substr( $input_line, $rtoken_map->[$im], $num ) );
}
}
$im = $i;
}
- $num = length($input_line) - $$rtoken_map[$im]; # make the last token
+ $num = length($input_line) - $rtoken_map->[$im]; # make the last token
if ( $num > 0 ) {
- push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
+ push( @tokens, substr( $input_line, $rtoken_map->[$im], $num ) );
}
$tokenizer_self->{_in_attribute_list} = $in_attribute_list;
# OPERATOR.
#
# If a UNKNOWN is returned, the calling routine must guess. A major
- # goal of this tokenizer is to minimize the possiblity of returning
+ # goal of this tokenizer is to minimize the possibility of returning
# UNKNOWN, because a wrong guess can spoil the formatting of a
# script.
#
my $op_expected = UNKNOWN;
-#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
+##print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
# Note: function prototype is available for token type 'U' for future
# program development. It contains the leading and trailing parens,
}
}
+ # Check for smartmatch operator before preceding brace or square bracket.
+ # For example, at the ? after the ] in the following expressions we are
+ # expecting an operator:
+ #
+ # qr/3/ ~~ ['1234'] ? 1 : 0;
+ # map { $_ ~~ [ '0', '1' ] ? 'x' : 'o' } @a;
+ elsif ( $last_nonblank_type eq '}' && $last_nonblank_token eq '~~' ) {
+ $op_expected = OPERATOR;
+ }
+
# handle something after 'do' and 'eval'
elsif ( $is_block_operator{$last_nonblank_token} ) {
}
# something like $a = do { BLOCK } / 2;
+ # or this ? after a smartmatch anonynmous hash or array reference:
+ # qr/3/ ~~ ['1234'] ? 1 : 0;
# ^
else {
$op_expected = OPERATOR; # block mode following }
{
$op_expected = UNKNOWN;
}
+
+ # expecting VERSION or {} after package NAMESPACE
+ elsif ($statement_type =~ /^package\b/
+ && $last_nonblank_token =~ /^package\b/ )
+ {
+ $op_expected = TERM;
+ }
}
# no operator after many keywords, such as "die", "warn", etc
# TODO: This list is incomplete, and these should be put
# into a hash.
if ( $tok eq '/'
- && $next_type eq '/'
+ && $next_type eq '/'
&& $last_nonblank_type eq 'k'
&& $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
{
# patch for dor.t (defined or).
if ( $tok eq '/'
- && $next_type eq '/'
+ && $next_type eq '/'
&& $last_nonblank_token eq ']' )
{
$op_expected = OPERATOR;
}
+
+ # Patch for RT #116344: misparse a ternary operator after an anonymous
+ # hash, like this:
+ # return ref {} ? 1 : 0;
+ # The right brace should really be marked type 'R' in this case, and
+ # it is safest to return an UNKNOWN here. Expecting a TERM will
+ # cause the '?' to always be interpreted as a pattern delimiter
+ # rather than introducing a ternary operator.
+ elsif ( $tok eq '?' ) {
+ $op_expected = UNKNOWN;
+ }
else {
$op_expected = TERM;
}
}
TOKENIZER_DEBUG_FLAG_EXPECT && do {
- print
+ print STDOUT
"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
};
return $op_expected;
return $brace_type[$brace_depth];
}
- # otherwise, it is a label if and only if it follows a ';'
- # (real or fake)
+ # otherwise, it is a label if and only if it follows a ';' (real or fake)
+ # or another label
else {
- return ( $last_nonblank_type eq ';' );
+ return ( $last_nonblank_type eq ';' || $last_nonblank_type eq 'J' );
}
}
}
}
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub is_non_structural_brace.
- # elsif ( $last_nonblank_type eq 't' ) {
- # return $last_nonblank_token;
- # }
+ ################################################################
+
+## elsif ( $last_nonblank_type eq 't' ) {
+## return $last_nonblank_token;
+## }
# brace after label:
elsif ( $last_nonblank_type eq 'J' ) {
# otherwise, look at previous token. This must be a code block if
# it follows any of these:
-# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
+# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
elsif ( $is_code_block_token{$last_nonblank_token} ) {
- return $last_nonblank_token;
+
+ # Bug Patch: Note that the opening brace after the 'if' in the following
+ # snippet is an anonymous hash ref and not a code block!
+ # print 'hi' if { x => 1, }->{x};
+ # We can identify this situation because the last nonblank type
+ # will be a keyword (instead of a closing peren)
+ if ( $last_nonblank_token =~ /^(if|unless)$/
+ && $last_nonblank_type eq 'k' )
+ {
+ return "";
+ }
+ else {
+ return $last_nonblank_token;
+ }
}
- # or a sub definition
+ # or a sub or package BLOCK
elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
- && $last_nonblank_token =~ /^sub\b/ )
+ && $last_nonblank_token =~ /^(sub|package)\b/ )
{
return $last_nonblank_token;
}
+ elsif ( $statement_type =~ /^(sub|package)\b/ ) {
+ return $statement_type;
+ }
+
# user-defined subs with block parameters (like grep/map/eval)
elsif ( $last_nonblank_type eq 'G' ) {
return $last_nonblank_token;
$max_token_index );
}
+ # Patch for bug # RT #94338 reported by Daniel Trizen
+ # for-loop in a parenthesized block-map triggering an error message:
+ # map( { foreach my $item ( '0', '1' ) { print $item} } qw(a b c) );
+ # Check for a code block within a parenthesized function call
+ elsif ( $last_nonblank_token eq '(' ) {
+ my $paren_type = $paren_type[$paren_depth];
+ if ( $paren_type && $paren_type =~ /^(map|grep|sort)$/ ) {
+
+ # We will mark this as a code block but use type 't' instead
+ # of the name of the contining function. This will allow for
+ # correct parsing but will usually produce better formatting.
+ # Braces with block type 't' are not broken open automatically
+ # in the formatter as are other code block types, and this usually
+ # works best.
+ return 't'; # (Not $paren_type)
+ }
+ else {
+ return "";
+ }
+ }
+
+ # handle unknown syntax ') {'
+ # we previously appended a '()' to mark this case
+ elsif ( $last_nonblank_token =~ /\(\)$/ ) {
+ return $last_nonblank_token;
+ }
+
# anything else must be anonymous hash reference
else {
return "";
# USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
+
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# We are only going to look ahead one more (nonblank/comment) line.
# Strange formatting could cause a bad guess, but that's unlikely.
- my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
- my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
+ my @pre_types;
+ my @pre_tokens;
+
+ # Ignore the rest of this line if it is a side comment
+ if ( $next_nonblank_token ne '#' ) {
+ @pre_types = @{$rtoken_type}[ $i + 1 .. $max_token_index ];
+ @pre_tokens = @{$rtokens}[ $i + 1 .. $max_token_index ];
+ }
my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
# generous, and prevents
# wasting lots of
# time in mangled files
- if ( defined($rpre_types) && @$rpre_types ) {
- push @pre_types, @$rpre_types;
- push @pre_tokens, @$rpre_tokens;
+ if ( defined($rpre_types) && @{$rpre_types} ) {
+ push @pre_types, @{$rpre_types};
+ push @pre_tokens, @{$rpre_tokens};
}
- # put a sentinal token to simplify stopping the search
+ # put a sentinel token to simplify stopping the search
+ push @pre_types, '}';
push @pre_types, '}';
my $jbeg = 0;
# find the closing quote; don't worry about escapes
my $quote_mark = $pre_types[$j];
- for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
+ foreach my $k ( $j + 1 .. $#pre_types - 1 ) {
if ( $pre_types[$k] eq $quote_mark ) {
$j = $k + 1;
my $next = $pre_types[$j];
$j++;
}
elsif ( $pre_types[$j] eq 'w' ) {
- unless ( $is_keyword{ $pre_tokens[$j] } ) {
- $j++;
- }
+ $j++;
}
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
$j++;
$j++ if $pre_types[$j] eq 'b';
- # it's a hash ref if a comma or => follow next
- if ( $pre_types[$j] eq ','
- || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
+ # Patched for RT #95708
+ if (
+
+ # it is a comma which is not a pattern delimeter except for qw
+ (
+ $pre_types[$j] eq ','
+ && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
+ )
+
+ # or a =>
+ || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
+ )
{
$code_block_type = "";
}
return $code_block_type;
}
-sub unexpected {
+sub report_unexpected {
# report unexpected token type and show where it is
# USES GLOBAL VARIABLES: $tokenizer_self
if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
my $msg = "found $found where $expecting expected";
- my $pos = $$rpretoken_map[$i_tok];
+ my $pos = $rpretoken_map->[$i_tok];
interrupt_logfile();
my $input_line_number = $tokenizer_self->{_last_line_number};
my ( $offset, $numbered_line, $underline ) =
my $trailer = "";
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
- my $pos_prev = $$rpretoken_map[$last_nonblank_i];
+ my $pos_prev = $rpretoken_map->[$last_nonblank_i];
my $num;
- if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
- $num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
+ if ( $rpretoken_type->[ $i_tok - 1 ] eq 'b' ) {
+ $num = $rpretoken_map->[ $i_tok - 1 ] - $pos_prev;
}
else {
$num = $pos - $pos_prev;
warning( $msg . $trailer . "\n" );
resume_logfile();
}
+ return;
}
sub is_non_structural_brace {
# return 0;
# }
+ ################################################################
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub code_block_type
- # if ($last_nonblank_type eq 't') {return 0}
+ ################################################################
+
+ ##if ($last_nonblank_type eq 't') {return 0}
# otherwise, it is non-structural if it is decorated
# by type information.
# For example, the '{' here is non-structural: ${xxx}
- (
+ return (
$last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
# or if we follow a hash or array closing curly brace or bracket
# way.
sub increase_nesting_depth {
- my ( $a, $pos ) = @_;
+ my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
- # @current_sequence_number, @depth_array, @starting_line_of_current_depth
- my $b;
- $current_depth[$a]++;
+ # @current_sequence_number, @depth_array, @starting_line_of_current_depth,
+ # $statement_type
+ $current_depth[$aa]++;
+ $total_depth++;
+ $total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
# Sequence numbers increment by number of items. This keeps
# a unique set of numbers but still allows the relative location
# of any type to be determined.
- $nesting_sequence_number[$a] += scalar(@closing_brace_names);
- my $seqno = $nesting_sequence_number[$a];
- $current_sequence_number[$a][ $current_depth[$a] ] = $seqno;
+ $nesting_sequence_number[$aa] += scalar(@closing_brace_names);
+ my $seqno = $nesting_sequence_number[$aa];
+ $current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
- $starting_line_of_current_depth[$a][ $current_depth[$a] ] =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
[ $input_line_number, $input_line, $pos ];
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
- $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
+ for my $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
+ $depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
+ }
+
+ # set a flag for indenting a nested ternary statement
+ my $indent = 0;
+ if ( $aa == QUESTION_COLON ) {
+ $nested_ternary_flag[ $current_depth[$aa] ] = 0;
+ if ( $current_depth[$aa] > 1 ) {
+ if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
+ my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
+ if ( $pdepth == $total_depth - 1 ) {
+ $indent = 1;
+ $nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
+ }
+ }
+ }
}
- return $seqno;
+ $nested_statement_type[$aa][ $current_depth[$aa] ] = $statement_type;
+ $statement_type = "";
+ return ( $seqno, $indent );
}
sub decrease_nesting_depth {
- my ( $a, $pos ) = @_;
+ my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
- my $b;
+ # $statement_type
my $seqno = 0;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
- if ( $current_depth[$a] > 0 ) {
+ my $outdent = 0;
+ $total_depth--;
+ if ( $current_depth[$aa] > 0 ) {
- $seqno = $current_sequence_number[$a][ $current_depth[$a] ];
+ # set a flag for un-indenting after seeing a nested ternary statement
+ $seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
+ if ( $aa == QUESTION_COLON ) {
+ $outdent = $nested_ternary_flag[ $current_depth[$aa] ];
+ }
+ $statement_type = $nested_statement_type[$aa][ $current_depth[$aa] ];
- # check that any brace types $b contained within are balanced
- for $b ( 0 .. $#closing_brace_names ) {
- next if ( $b == $a );
+ # check that any brace types $bb contained within are balanced
+ for my $bb ( 0 .. $#closing_brace_names ) {
+ next if ( $bb == $aa );
- unless ( $depth_array[$a][$b][ $current_depth[$a] ] ==
- $current_depth[$b] )
+ unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
+ $current_depth[$bb] )
{
my $diff =
- $current_depth[$b] -
- $depth_array[$a][$b][ $current_depth[$a] ];
+ $current_depth[$bb] -
+ $depth_array[$aa][$bb][ $current_depth[$aa] ];
# don't whine too many times
my $saw_brace_error = get_saw_brace_error();
if (
$saw_brace_error <= MAX_NAG_MESSAGES
- # if too many closing types have occured, we probably
+ # if too many closing types have occurred, we probably
# already caught this error
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
)
{
interrupt_logfile();
my $rsl =
- $starting_line_of_current_depth[$a][ $current_depth[$a] ];
- my $sl = $$rsl[0];
+ $starting_line_of_current_depth[$aa]
+ [ $current_depth[$aa] ];
+ my $sl = $rsl->[0];
my $rel = [ $input_line_number, $input_line, $pos ];
- my $el = $$rel[0];
+ my $el = $rel->[0];
my ($ess);
if ( $diff == 1 || $diff == -1 ) {
}
my $bname =
( $diff > 0 )
- ? $opening_brace_names[$b]
- : $closing_brace_names[$b];
- write_error_indicator_pair( @$rsl, '^' );
+ ? $opening_brace_names[$bb]
+ : $closing_brace_names[$bb];
+ write_error_indicator_pair( @{$rsl}, '^' );
my $msg = <<"EOM";
-Found $diff extra $bname$ess between $opening_brace_names[$a] on line $sl and $closing_brace_names[$a] on line $el
+Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
EOM
if ( $diff > 0 ) {
my $rml =
- $starting_line_of_current_depth[$b]
- [ $current_depth[$b] ];
- my $ml = $$rml[0];
+ $starting_line_of_current_depth[$bb]
+ [ $current_depth[$bb] ];
+ my $ml = $rml->[0];
$msg .=
" The most recent un-matched $bname is on line $ml\n";
- write_error_indicator_pair( @$rml, '^' );
+ write_error_indicator_pair( @{$rml}, '^' );
}
- write_error_indicator_pair( @$rel, '^' );
+ write_error_indicator_pair( @{$rel}, '^' );
warning($msg);
resume_logfile();
}
increment_brace_error();
}
}
- $current_depth[$a]--;
+ $current_depth[$aa]--;
}
else {
my $saw_brace_error = get_saw_brace_error();
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
my $msg = <<"EOM";
-There is no previous $opening_brace_names[$a] to match a $closing_brace_names[$a] on line $input_line_number
+There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
EOM
indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
}
increment_brace_error();
}
- return $seqno;
+ return ( $seqno, $outdent );
}
sub check_final_nesting_depths {
- my ($a);
# USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
- for $a ( 0 .. $#closing_brace_names ) {
+ for my $aa ( 0 .. $#closing_brace_names ) {
- if ( $current_depth[$a] ) {
- my $rsl = $starting_line_of_current_depth[$a][ $current_depth[$a] ];
- my $sl = $$rsl[0];
+ if ( $current_depth[$aa] ) {
+ my $rsl =
+ $starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
+ my $sl = $rsl->[0];
my $msg = <<"EOM";
-Final nesting depth of $opening_brace_names[$a]s is $current_depth[$a]
-The most recent un-matched $opening_brace_names[$a] is on line $sl
+Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
+The most recent un-matched $opening_brace_names[$aa] is on line $sl
EOM
- indicate_error( $msg, @$rsl, '^' );
+ indicate_error( $msg, @{$rsl}, '^' );
increment_brace_error();
}
}
+ return;
}
#########i#############################################################
my ( $rtok, $rmap, $rtype ) =
pre_tokenize( $line, 2 ); # only need 2 pre-tokens
my $j = $max_token_index + 1;
- my $tok;
- foreach $tok (@$rtok) {
+ foreach my $tok ( @{$rtok} ) {
last if ( $tok =~ "\n" );
- $$rtokens[ ++$j ] = $tok;
+ $rtokens->[ ++$j ] = $tok;
}
last;
}
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
# msg = a warning or diagnostic message
# USES GLOBAL VARIABLES: $last_nonblank_token
+
+ # FIXME: this needs to be rewritten
+
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $is_pattern = 0;
my $msg = "guessing that ? after $last_nonblank_token starts a ";
else {
my $ibeg = $i;
$i = $ibeg + 1;
- my $next_token = $$rtokens[$i]; # first token after ?
+ my $next_token = $rtokens->[$i]; # first token after ?
# look for a possible ending ? on this line..
my $in_quote = 1;
}
else {
- if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
+ # Watch out for an ending ? in quotes, like this
+ # my $case_flag = File::Spec->case_tolerant ? '(?i)' : '';
+ my $s_quote = 0;
+ my $d_quote = 0;
+ my $colons = 0;
+ foreach my $ii ( $ibeg + 1 .. $i - 1 ) {
+ my $tok = $rtokens->[$ii];
+ if ( $tok eq ":" ) { $colons++ }
+ if ( $tok eq "'" ) { $s_quote++ }
+ if ( $tok eq '"' ) { $d_quote++ }
+ }
+ if ( $s_quote % 2 || $d_quote % 2 || $colons ) {
+ $is_pattern = 0;
+ $msg .= "found ending ? but unbalanced quote chars\n";
+ }
+ elsif ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
$is_pattern = 1;
$msg .= "pattern (found ending ? and pattern expected)\n";
}
my $msg = "guessing that / after $last_nonblank_token starts a ";
if ( $i >= $max_token_index ) {
- "division (no end to pattern found on the line)\n";
+ $msg .= "division (no end to pattern found on the line)\n";
}
else {
my $ibeg = $i;
my $divide_expected =
numerator_expected( $i, $rtokens, $max_token_index );
$i = $ibeg + 1;
- my $next_token = $$rtokens[$i]; # first token after slash
+ my $next_token = $rtokens->[$i]; # first token after slash
# look for a possible ending / on this line..
my $in_quote = 1;
# little reason to change it.
# USES GLOBAL VARIABLES: $tokenizer_self, $current_package
# %is_constant,
- use constant HERE_DOC_WINDOW => 40;
+ my $HERE_DOC_WINDOW = 40;
my $next_token = shift;
my $here_doc_expected = 0;
$here_doc_expected = 1; # got it
last;
}
- last if ( $k >= HERE_DOC_WINDOW );
+ last if ( $k >= $HERE_DOC_WINDOW );
}
unless ($here_doc_expected) {
# we have to back up one pretoken at a :: since each : is one pretoken
if ( $tok eq '::' ) { $i_beg-- }
if ( $tok eq '->' ) { $i_beg-- }
- my $pos_beg = $$rtoken_map[$i_beg];
+ my $pos_beg = $rtoken_map->[$i_beg];
pos($input_line) = $pos_beg;
# Examples:
if ( $type eq 'w' ) {
# check for v-string with leading 'v' type character
- # (This seems to have presidence over filehandle, type 'Y')
+ # (This seems to have precedence over filehandle, type 'Y')
if ( $tok =~ /^v\d[_\d]*$/ ) {
# we only have the first part - something like 'v101' -
# doesn't get in the way of good scripts.
# Complain if a filehandle has any lower case
- # letters. This is suggested good practice, but the
- # main reason for this warning is that prior to
- # release 20010328, perltidy incorrectly parsed a
- # function call after a print/printf, with the
- # result that a space got added before the opening
- # paren, thereby converting the function name to a
- # filehandle according to perl's weird rules. This
- # will not usually generate a syntax error, so this
- # is a potentially serious bug. By warning
- # of filehandles with any lower case letters,
- # followed by opening parens, we will help the user
- # find almost all of these older errors.
- # use 'sub_name' because something like
+ # letters. This is suggested good practice.
+ # Use 'sub_name' because something like
# main::MYHANDLE is ok for filehandle
if ( $sub_name =~ /[a-z]/ ) {
# find $i_beg = index of next nonblank token,
# and handle empty lines
my $blank_line = 0;
- my $next_nonblank_token = $$rtokens[$i_beg];
+ my $next_nonblank_token = $rtokens->[$i_beg];
if ( $i_beg > $max_token_index ) {
$blank_line = 1;
}
}
TOKENIZER_DEBUG_FLAG_NSCAN && do {
- print
+ print STDOUT
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
};
return ( $i, $tok, $type, $id_scan_state );
else {
$is_user_function{$package}{$subname} = 1;
}
+ return;
}
sub do_scan_package {
# token following a 'package' token.
# USES GLOBAL VARIABLES: $current_package,
+ # package NAMESPACE
+ # package NAMESPACE VERSION
+ # package NAMESPACE BLOCK
+ # package NAMESPACE VERSION BLOCK
+ #
+ # If VERSION is provided, package sets the $VERSION variable in the given
+ # namespace to a version object with the VERSION provided. VERSION must be
+ # a "strict" style version number as defined by the version module: a
+ # positive decimal number (integer or decimal-fraction) without
+ # exponentiation or else a dotted-decimal v-string with a leading 'v'
+ # character and at least three components.
+ # reference http://perldoc.perl.org/functions/package.html
+
my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
$max_token_index )
= @_;
my $package = undef;
- my $pos_beg = $$rtoken_map[$i_beg];
+ my $pos_beg = $rtoken_map->[$i_beg];
pos($input_line) = $pos_beg;
# handle non-blank line; package name, if any, must follow
if ($error) { warning("Possibly invalid package\n") }
$current_package = $package;
- # check for error
+ # we should now have package NAMESPACE
+ # now expecting VERSION, BLOCK, or ; to follow ...
+ # package NAMESPACE VERSION
+ # package NAMESPACE BLOCK
+ # package NAMESPACE VERSION BLOCK
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
- if ( $next_nonblank_token !~ /^[;\}]$/ ) {
+
+ # check that something recognizable follows, but do not parse.
+ # A VERSION number will be parsed later as a number or v-string in the
+ # normal way. What is important is to set the statement type if
+ # everything looks okay so that the operator_expected() routine
+ # knows that the number is in a package statement.
+ # Examples of valid primitive tokens that might follow are:
+ # 1235 . ; { } v3 v
+ if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) {
+ $statement_type = $tok;
+ }
+ else {
warning(
"Unexpected '$next_nonblank_token' after package name '$tok'\n"
);
# USES GLOBAL VARIABLES: $context, $last_nonblank_token,
# $last_nonblank_type
- my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index ) = @_;
+ my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
+ $expecting, $container_type )
+ = @_;
my $i_begin = $i;
my $type = '';
- my $tok_begin = $$rtokens[$i_begin];
+ my $tok_begin = $rtokens->[$i_begin];
if ( $tok_begin eq ':' ) { $tok_begin = '::' }
my $id_scan_state_begin = $id_scan_state;
my $identifier_begin = $identifier;
my $tok = $tok_begin;
my $message = "";
+ my $in_prototype_or_signature = $container_type =~ /^sub/;
+
# these flags will be used to help figure out the type:
my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
my $saw_type;
while ( $i < $max_token_index ) {
$i_save = $i unless ( $tok =~ /^\s*$/ );
- $tok = $$rtokens[ ++$i ];
+ $tok = $rtokens->[ ++$i ];
- if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
+ if ( ( $tok eq ':' ) && ( $rtokens->[ $i + 1 ] eq ':' ) ) {
$tok = '::';
$i++;
}
last;
}
}
+
+ # POSTDEFREF ->@ ->% ->& ->*
+ elsif ( ( $tok =~ /^[\@\%\&\*]$/ ) && $identifier =~ /\-\>$/ ) {
+ $identifier .= $tok;
+ }
elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
$saw_alpha = 1;
$id_scan_state = ':'; # now need ::
# howdy::123::bubba();
#
}
- elsif ( $tok =~ /^[0-9]/ ) { # numeric
+ elsif ( $tok =~ /^[0-9]/ ) { # numeric
$saw_alpha = 1;
- $id_scan_state = ':'; # now need ::
+ $id_scan_state = ':'; # now need ::
$identifier .= $tok;
}
elsif ( $tok eq '::' ) {
$id_scan_state = 'A';
$identifier .= $tok;
}
- elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
+
+ # $# and POSTDEFREF ->$#
+ elsif ( ( $tok eq '#' ) && ( $identifier =~ /\$$/ ) ) { # $#array
$identifier .= $tok; # keep same state, a $ could follow
}
elsif ( $tok eq '{' ) {
# check for something like ${#} or ${©}
- if ( $identifier eq '$'
+ if (
+ (
+ $identifier eq '$'
+ || $identifier eq '@'
+ || $identifier eq '$#'
+ )
&& $i + 2 <= $max_token_index
- && $$rtokens[ $i + 2 ] eq '}'
- && $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
+ && $rtokens->[ $i + 2 ] eq '}'
+ && $rtokens->[ $i + 1 ] !~ /[\s\w]/
+ )
{
- my $next2 = $$rtokens[ $i + 2 ];
- my $next1 = $$rtokens[ $i + 1 ];
+ my $next2 = $rtokens->[ $i + 2 ];
+ my $next1 = $rtokens->[ $i + 1 ];
$identifier .= $tok . $next1 . $next2;
$i += 2;
$id_scan_state = '';
# Perl accepts '$^]' or '@^]', but
# there must not be a space before the ']'.
- my $next1 = $$rtokens[ $i + 1 ];
+ my $next1 = $rtokens->[ $i + 1 ];
if ( $next1 eq ']' ) {
$i++;
$identifier .= $next1;
}
else { # something else
+ if ( $in_prototype_or_signature && $tok =~ /^[\),=]/ ) {
+ $id_scan_state = '';
+ $i = $i_save;
+ $type = 'i'; # probably punctuation variable
+ last;
+ }
+
# check for various punctuation variables
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
}
+ # POSTDEFREF: Postfix reference ->$* ->%* ->@* ->** ->&* ->$#*
+ elsif ( $tok eq '*' && $identifier =~ /([\@\%\$\*\&]|\$\#)$/ ) {
+ $identifier .= $tok;
+ }
+
elsif ( $identifier eq '$#' ) {
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
# punctuation variable?
# testfile: cunningham4.pl
- if ( $identifier eq '&' ) {
+ #
+ # We have to be careful here. If we are in an unknown state,
+ # we will reject the punctuation variable. In the following
+ # example the '&' is a binary operator but we are in an unknown
+ # state because there is no sigil on 'Prima', so we don't
+ # know what it is. But it is a bad guess that
+ # '&~' is a function variable.
+ # $self->{text}->{colorMap}->[
+ # Prima::PodView::COLOR_CODE_FOREGROUND
+ # & ~tb::COLOR_INDEX ] =
+ # $sec->{ColorCode}
+ if ( $identifier eq '&' && $expecting ) {
$identifier .= $tok;
}
else {
TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
my ( $a, $b, $c ) = caller;
- print
+ print STDOUT
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
- print
+ print STDOUT
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
};
return ( $i, $tok, $type, $id_scan_state, $identifier );
my $attrs = undef;
my $match;
- my $pos_beg = $$rtoken_map[$i_beg];
+ my $pos_beg = $rtoken_map->[$i_beg];
pos($input_line) = $pos_beg;
- # sub NAME PROTO ATTRS
+ # Look for the sub NAME
if (
$input_line =~ m/\G\s*
((?:\w*(?:'|::))*) # package - something that ends in :: or '
(\w+) # NAME - required
- (\s*\([^){]*\))? # PROTO - something in parens
- (\s*:)? # ATTRS - leading : of attribute list
/gcx
)
{
$match = 1;
$subname = $2;
- $proto = $3;
- $attrs = $4;
$package = ( defined($1) && $1 ) ? $1 : $current_package;
$package =~ s/\'/::/g;
$type = 'i';
}
- # Look for prototype/attributes not preceded on this line by subname;
- # This might be an anonymous sub with attributes,
+ # Now look for PROTO ATTRS
+ # Look for prototype/attributes which are usually on the same
+ # line as the sub name but which might be on a separate line.
+ # For example, we might have an anonymous sub with attributes,
# or a prototype on a separate line from its sub name
- elsif (
- $input_line =~ m/\G(\s*\([^){]*\))? # PROTO
+
+ # NOTE: We only want to parse PROTOTYPES here. If we see anything that
+ # does not look like a prototype, we assume it is a SIGNATURE and we
+ # will stop and let the the standard tokenizer handle it. In
+ # particular, we stop if we see any nested parens, braces, or commas.
+ my $saw_opening_paren = $input_line =~ /\G\s*\(/;
+ if (
+ $input_line =~ m/\G(\s*\([^\)\(\}\{\,]*\))? # PROTO
(\s*:)? # ATTRS leading ':'
/gcx
&& ( $1 || $2 )
)
{
- $match = 1;
$proto = $1;
$attrs = $2;
+ # If we also found the sub name on this call then append PROTO.
+ # This is not necessary but for compatability with previous
+ # versions when the -csc flag is used:
+ if ( $match && $proto ) {
+ $tok .= $proto;
+ }
+ $match ||= 1;
+
# Handle prototype on separate line from subname
if ($subname_saved) {
$package = $package_saved;
$in_attribute_list = 1;
}
- # We must convert back from character position
- # to pre_token index.
+ # Otherwise, if we found a match we must convert back from
+ # string position to the pre_token index for continued parsing.
else {
# I don't think an error flag can occur here ..but ?
{ # skip blank or side comment
my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(1);
- if ( defined($rpre_tokens) && @$rpre_tokens ) {
+ if ( defined($rpre_tokens) && @{$rpre_tokens} ) {
$next_nonblank_token = $rpre_tokens->[0];
}
else {
}
$package_saved = "";
$subname_saved = "";
+
+ # See what's next...
if ( $next_nonblank_token eq '{' ) {
if ($subname) {
$statement_type = $tok;
}
- # see if PROTO follows on another line:
+ # if we stopped before an open paren ...
elsif ( $next_nonblank_token eq '(' ) {
- if ( $attrs || $proto ) {
- warning(
-"unexpected '(' after definition or declaration of sub '$subname'\n"
- );
- }
- else {
- $id_scan_state = 'sub'; # we must come back to get proto
- $statement_type = $tok;
- $package_saved = $package;
- $subname_saved = $subname;
+
+ # If we DID NOT see this paren above then it must be on the
+ # next line so we will set a flag to come back here and see if
+ # it is a PROTOTYPE
+
+ # Otherwise, we assume it is a SIGNATURE rather than a
+ # PROTOTYPE and let the normal tokenizer handle it as a list
+ if ( !$saw_opening_paren ) {
+ $id_scan_state = 'sub'; # we must come back to get proto
+ $package_saved = $package;
+ $subname_saved = $subname;
}
+ $statement_type = $tok;
}
elsif ($next_nonblank_token) { # EOF technically ok
warning(
peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
}
}
- my $next_nonblank_token = $$rtokens[ ++$i ];
+ my $next_nonblank_token = $rtokens->[ ++$i ];
if ( $next_nonblank_token =~ /^\s*$/ ) {
- $next_nonblank_token = $$rtokens[ ++$i ];
+ $next_nonblank_token = $rtokens->[ ++$i ];
}
return ( $next_nonblank_token, $i );
}
# Note: I am using the convention that variables ending in
# _expected have these 3 possible values.
my ( $i, $rtokens, $max_token_index ) = @_;
- my $next_token = $$rtokens[ $i + 1 ];
+ my $numerator_expected = 0;
+
+ my $next_token = $rtokens->[ $i + 1 ];
if ( $next_token eq '=' ) { $i++; } # handle /=
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
- 1;
+ $numerator_expected = 1;
}
else {
if ( $next_nonblank_token =~ /^\s*$/ ) {
- 0;
+ $numerator_expected = 0;
}
else {
- -1;
+ $numerator_expected = -1;
}
}
+ return $numerator_expected;
}
sub pattern_expected {
# This is the start of a filter for a possible pattern.
- # It looks at the token after a possbible pattern and tries to
+ # It looks at the token after a possible pattern and tries to
# determine if that token could end a pattern.
# returns -
# 1 - yes
# 0 - can't tell
# -1 - no
my ( $i, $rtokens, $max_token_index ) = @_;
- my $next_token = $$rtokens[ $i + 1 ];
- if ( $next_token =~ /^[cgimosx]/ ) { $i++; } # skip possible modifier
+ my $is_pattern = 0;
+
+ my $next_token = $rtokens->[ $i + 1 ];
+ if ( $next_token =~ /^[msixpodualgc]/ ) { $i++; } # skip possible modifier
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# (can probably be expanded)
if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
{
- 1;
+ $is_pattern = 1;
}
else {
if ( $next_nonblank_token =~ /^\s*$/ ) {
- 0;
+ $is_pattern = 0;
}
else {
- -1;
+ $is_pattern = -1;
}
}
+ return $is_pattern;
}
sub find_next_nonblank_token_on_this_line {
my $next_nonblank_token;
if ( $i < $max_token_index ) {
- $next_nonblank_token = $$rtokens[ ++$i ];
+ $next_nonblank_token = $rtokens->[ ++$i ];
if ( $next_nonblank_token =~ /^\s*$/ ) {
if ( $i < $max_token_index ) {
- $next_nonblank_token = $$rtokens[ ++$i ];
+ $next_nonblank_token = $rtokens->[ ++$i ];
}
}
}
my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
my $i = $i_beg;
my $type = '<';
- pos($input_line) = 1 + $$rtoken_map[$i];
+ pos($input_line) = 1 + $rtoken_map->[$i];
my $filter;
# fooled.
my $pos = pos($input_line);
- my $pos_beg = $$rtoken_map[$i];
+ my $pos_beg = $rtoken_map->[$i];
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
# Reject if the closing '>' follows a '-' as in:
- # if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
+ # if ( VERSION < 5.009 && $op-> name eq 'assign' ) { }
if ( $expecting eq UNKNOWN ) {
my $check = substr( $input_line, $pos - 2, 1 );
if ( $check eq '-' ) {
# number - the number (characters); or undef if not a number
my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
- my $pos_beg = $$rtoken_map[$i];
+ my $pos_beg = $rtoken_map->[$i];
my $pos;
my $i_begin = $i;
my $number = undef;
# handle octal, hex, binary
if ( !defined($number) ) {
pos($input_line) = $pos_beg;
- if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
+ if ( $input_line =~
+ /\G[+-]?0(([xX][0-9a-fA-F_]+)|([0-7_]+)|([bB][01_]+))/g )
{
$pos = pos($input_line);
my $numc = $pos - $pos_beg;
while ( ++$i <= $max_token_index ) {
- if ( $pos <= $$rtoken_map[$i] ) {
+ if ( $pos <= $rtoken_map->[$i] ) {
# Let the calling routine handle errors in which we do not
# land on a pre-token boundary. It can happen by running
# perltidy on some non-perl scripts, for example.
- if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
+ if ( $pos < $rtoken_map->[$i] ) { $error = 1 }
$i--;
last;
}
my $here_quote_character = '';
my $saw_error = 0;
my ( $next_nonblank_token, $i_next_nonblank, $next_token );
- $next_token = $$rtokens[ $i + 1 ];
+ $next_token = $rtokens->[ $i + 1 ];
# perl allows a backslash before the target string (heredoc.t)
my $backslash = 0;
if ( $next_token eq '\\' ) {
$backslash = 1;
- $next_token = $$rtokens[ $i + 2 ];
+ $next_token = $rtokens->[ $i + 2 ];
}
( $next_nonblank_token, $i_next_nonblank ) =
}
}
else { # found ending quote
- my $j;
+ ##my $j;
$found_target = 1;
my $tokj;
- for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
- $tokj = $$rtokens[$j];
+ foreach my $j ( $i_next_nonblank + 1 .. $i - 1 ) {
+ $tokj = $rtokens->[$j];
# we have to remove any backslash before the quote character
# so that the here-doc-target exactly matches this string
next
if ( $tokj eq "\\"
&& $j < $i - 1
- && $$rtokens[ $j + 1 ] eq $here_quote_character );
+ && $rtokens->[ $j + 1 ] eq $here_quote_character );
$here_doc_target .= $tokj;
}
}
my $quoted_string = "";
TOKENIZER_DEBUG_FLAG_QUOTE && do {
- print
+ print STDOUT
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
};
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
while ( $i < $max_token_index ) {
- $tok = $$rtokens[ ++$i ];
+ $tok = $rtokens->[ ++$i ];
if ( $tok !~ /^\s*$/ ) {
while ( $i < $max_token_index ) {
if ( $quote_pos == 0 || ( $i < 0 ) ) {
- $tok = $$rtokens[ ++$i ];
+ $tok = $rtokens->[ ++$i ];
if ( $tok eq '\\' ) {
# retain backslash unless it hides the end token
$quoted_string .= $tok
- unless $$rtokens[ $i + 1 ] eq $end_tok;
+ unless $rtokens->[ $i + 1 ] eq $end_tok;
$quote_pos++;
last if ( $i >= $max_token_index );
- $tok = $$rtokens[ ++$i ];
+ $tok = $rtokens->[ ++$i ];
}
}
my $old_pos = $quote_pos;
else {
while ( $i < $max_token_index ) {
- $tok = $$rtokens[ ++$i ];
+ $tok = $rtokens->[ ++$i ];
if ( $tok eq $end_tok ) {
$quote_depth--;
elsif ( $tok eq '\\' ) {
# retain backslash unless it hides the beginning or end token
- $tok = $$rtokens[ ++$i ];
+ $tok = $rtokens->[ ++$i ];
$quoted_string .= '\\'
unless ( $tok eq $end_tok || $tok eq $beginning_tok );
}
warning($msg);
write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
resume_logfile();
+ return;
}
sub write_error_indicator_pair {
warning( $numbered_line . "\n" );
$underline =~ s/\s*$//;
warning( $underline . "\n" );
+ return;
}
sub make_numbered_line {
sub show_tokens {
# this is an old debug routine
+ # not called, but saved for reference
my ( $rtokens, $rtoken_map ) = @_;
- my $num = scalar(@$rtokens);
- my $i;
+ my $num = scalar( @{$rtokens} );
- for ( $i = 0 ; $i < $num ; $i++ ) {
- my $len = length( $$rtokens[$i] );
- print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
+ foreach my $i ( 0 .. $num - 1 ) {
+ my $len = length( $rtokens->[$i] );
+ print STDOUT "$i:$len:$rtoken_map->[$i]:$rtokens->[$i]:\n";
}
+ return;
}
-sub matching_end_token {
-
- # find closing character for a pattern
- my $beginning_token = shift;
+{
+ my %matching_end_token;
- if ( $beginning_token eq '{' ) {
- '}';
- }
- elsif ( $beginning_token eq '[' ) {
- ']';
- }
- elsif ( $beginning_token eq '<' ) {
- '>';
- }
- elsif ( $beginning_token eq '(' ) {
- ')';
+ BEGIN {
+ %matching_end_token = (
+ '{' => '}',
+ '(' => ')',
+ '[' => ']',
+ '<' => '>',
+ );
}
- else {
- $beginning_token;
+
+ sub matching_end_token {
+
+ # return closing character for a pattern
+ my $beginning_token = shift;
+ if ( $matching_end_token{$beginning_token} ) {
+ return $matching_end_token{$beginning_token};
+ }
+ return ($beginning_token);
}
}
sub dump_token_types {
- my $class = shift;
- my $fh = shift;
+ my ( $class, $fh ) = @_;
# This should be the latest list of token types in use
# adding NEW_TOKENS: add a comment here
[ left non-structural square bracket (enclosing an array index)
] right non-structural square bracket
( left non-structural paren (all but a list right of an =)
- ) right non-structural parena
+ ) right non-structural paren
L left non-structural curly brace (enclosing a key)
R right non-structural curly brace
; terminal semicolon
END - unidentified text following __END__
ERROR - we are in big trouble, probably not a perl script
END_OF_LIST
+
+ return;
}
BEGIN {
@opening_brace_names = qw# '{' '[' '(' '?' #;
@closing_brace_names = qw# '}' ']' ')' ':' #;
+ my @q;
+
my @digraphs = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
- <= >= == =~ !~ != ++ -- /= x= ~~
+ <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
);
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
- my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
+ my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
+ my @tetragraphs = qw( <<>> );
+ @is_tetragraph{@tetragraphs} = (1) x scalar(@tetragraphs);
+
# make a hash of all valid token types for self-checking the tokenizer
# (adding NEW_TOKENS : select a new character and add to this list)
my @valid_token_types = qw#
#;
push( @valid_token_types, @digraphs );
push( @valid_token_types, @trigraphs );
- push( @valid_token_types, '#' );
- push( @valid_token_types, ',' );
+ push( @valid_token_types, @tetragraphs );
+ push( @valid_token_types, ( '#', ',', 'CORE::' ) );
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
# these functions have prototypes of the form (&), so when they are
# followed by a block, that block MAY BE followed by an operator.
- @_ = qw( do eval );
- @is_block_operator{@_} = (1) x scalar(@_);
+ # Smartmatch operator ~~ may be followed by anonymous hash or array ref
+ @q = qw( do eval );
+ @is_block_operator{@q} = (1) x scalar(@q);
# these functions allow an identifier in the indirect object slot
- @_ = qw( print printf sort exec system say);
- @is_indirect_object_taker{@_} = (1) x scalar(@_);
+ @q = qw( print printf sort exec system say);
+ @is_indirect_object_taker{@q} = (1) x scalar(@q);
# These tokens may precede a code block
- # patched for SWITCH/CASE
- @_ = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY continue if elsif else
+ # patched for SWITCH/CASE/CATCH. Actually these could be removed
+ # now and we could let the extended-syntax coding handle them
+ @q =
+ qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
- switch case given when);
- @is_code_block_token{@_} = (1) x scalar(@_);
+ switch case given when catch try finally);
+ @is_code_block_token{@q} = (1) x scalar(@q);
# I'll build the list of keywords incrementally
my @Keywords = ();
LE
LT
NE
+ UNITCHECK
abs
accept
alarm
bind
binmode
bless
+ break
caller
chdir
chmod
when
err
say
+
+ catch
);
# patched above for SWITCH/CASE given/when err say
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
f F pp mm Y p m U J G j >> << ^ t
+ ~. ^. |. &. ^.= |.= &.=
#;
push( @value_requestor_type, ',' )
; # (perl doesn't like a ',' in a qw block)
delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
# let perl do it
- @_ = qw(q qq qw qx qr s y tr m);
- @is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
+ @q = qw(q qq qw qx qr s y tr m);
+ @is_q_qq_qw_qx_qr_s_y_tr_m{@q} = (1) x scalar(@q);
# These keywords are handled specially in the tokenizer code:
my @special_keywords = qw(
vec
warn
while
+ given
+ when
);
@is_keyword_taking_list{@keyword_taking_list} =
(1) x scalar(@keyword_taking_list);
# These are not used in any way yet
# my @unused_keywords = qw(
- # CORE
# __FILE__
# __LINE__
# __PACKAGE__
# );
- # The list of keywords was extracted from function 'keyword' in
+ # The list of keywords was originally extracted from function 'keyword' in
# perl file toke.c version 5.005.03, using this utility, plus a
# little editing: (file getkwd.pl):
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
@is_keyword{@Keywords} = (1) x scalar(@Keywords);
}
1;
-__END__
-
-=head1 NAME
-
-Perl::Tidy - Parses and beautifies perl source
-
-=head1 SYNOPSIS
-
- use Perl::Tidy;
-
- Perl::Tidy::perltidy(
- source => $source,
- destination => $destination,
- stderr => $stderr,
- argv => $argv,
- perltidyrc => $perltidyrc,
- logfile => $logfile,
- errorfile => $errorfile,
- formatter => $formatter, # callback object (see below)
- dump_options => $dump_options,
- dump_options_type => $dump_options_type,
- );
-
-=head1 DESCRIPTION
-
-This module makes the functionality of the perltidy utility available to perl
-scripts. Any or all of the input parameters may be omitted, in which case the
-@ARGV array will be used to provide input parameters as described
-in the perltidy(1) man page.
-
-For example, the perltidy script is basically just this:
-
- use Perl::Tidy;
- Perl::Tidy::perltidy();
-
-The module accepts input and output streams by a variety of methods.
-The following list of parameters may be any of a the following: a
-filename, an ARRAY reference, a SCALAR reference, or an object with
-either a B<getline> or B<print> method, as appropriate.
-
- source - the source of the script to be formatted
- destination - the destination of the formatted output
- stderr - standard error output
- perltidyrc - the .perltidyrc file
- logfile - the .LOG file stream, if any
- errorfile - the .ERR file stream, if any
- dump_options - ref to a hash to receive parameters (see below),
- dump_options_type - controls contents of dump_options
- dump_getopt_flags - ref to a hash to receive Getopt flags
- dump_options_category - ref to a hash giving category of options
- dump_abbreviations - ref to a hash giving all abbreviations
-
-The following chart illustrates the logic used to decide how to
-treat a parameter.
-
- ref($param) $param is assumed to be:
- ----------- ---------------------
- undef a filename
- SCALAR ref to string
- ARRAY ref to array
- (other) object with getline (if source) or print method
-
-If the parameter is an object, and the object has a B<close> method, that
-close method will be called at the end of the stream.
-
-=over 4
-
-=item source
-
-If the B<source> parameter is given, it defines the source of the
-input stream.
-
-=item destination
-
-If the B<destination> parameter is given, it will be used to define the
-file or memory location to receive output of perltidy.
-
-=item stderr
-
-The B<stderr> parameter allows the calling program to capture the output
-to what would otherwise go to the standard error output device.
-
-=item perltidyrc
-
-If the B<perltidyrc> file is given, it will be used instead of any
-F<.perltidyrc> configuration file that would otherwise be used.
-
-=item argv
-
-If the B<argv> parameter is given, it will be used instead of the
-B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
-string, or a reference to an array. If it is a string or reference to a
-string, it will be parsed into an array of items just as if it were a
-command line string.
-
-=item dump_options
-
-If the B<dump_options> parameter is given, it must be the reference to a hash.
-In this case, the parameters contained in any perltidyrc configuration file
-will be placed in this hash and perltidy will return immediately. This is
-equivalent to running perltidy with --dump-options, except that the perameters
-are returned in a hash rather than dumped to standard output. Also, by default
-only the parameters in the perltidyrc file are returned, but this can be
-changed (see the next parameter). This parameter provides a convenient method
-for external programs to read a perltidyrc file. An example program using
-this feature, F<perltidyrc_dump.pl>, is included in the distribution.
-
-Any combination of the B<dump_> parameters may be used together.
-
-=item dump_options_type
-
-This parameter is a string which can be used to control the parameters placed
-in the hash reference supplied by B<dump_options>. The possible values are
-'perltidyrc' (default) and 'full'. The 'full' parameter causes both the
-default options plus any options found in a perltidyrc file to be returned.
-
-=item dump_getopt_flags
-
-If the B<dump_getopt_flags> parameter is given, it must be the reference to a
-hash. This hash will receive all of the parameters that perltidy understands
-and flags that are passed to Getopt::Long. This parameter may be
-used alone or with the B<dump_options> flag. Perltidy will
-exit immediately after filling this hash. See the demo program
-F<perltidyrc_dump.pl> for example usage.
-
-=item dump_options_category
-
-If the B<dump_options_category> parameter is given, it must be the reference to a
-hash. This hash will receive a hash with keys equal to all long parameter names
-and values equal to the title of the corresponding section of the perltidy manual.
-See the demo program F<perltidyrc_dump.pl> for example usage.
-
-=item dump_abbreviations
-
-If the B<dump_abbreviations> parameter is given, it must be the reference to a
-hash. This hash will receive all abbreviations used by Perl::Tidy. See the
-demo program F<perltidyrc_dump.pl> for example usage.
-
-=back
-
-=head1 EXAMPLE
-
-The following example passes perltidy a snippet as a reference
-to a string and receives the result back in a reference to
-an array.
-
- use Perl::Tidy;
-
- # some messy source code to format
- my $source = <<'EOM';
- use strict;
- my @editors=('Emacs', 'Vi '); my $rand = rand();
- print "A poll of 10 random programmers gave these results:\n";
- foreach(0..10) {
- my $i=int ($rand+rand());
- print " $editors[$i] users are from Venus" . ", " .
- "$editors[1-$i] users are from Mars" .
- "\n";
- }
- EOM
-
- # We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
- my @dest;
- perltidy( source => \$source, destination => \@dest );
- foreach (@dest) {print}
-
-=head1 Using the B<formatter> Callback Object
-
-The B<formatter> parameter is an optional callback object which allows
-the calling program to receive tokenized lines directly from perltidy for
-further specialized processing. When this parameter is used, the two
-formatting options which are built into perltidy (beautification or
-html) are ignored. The following diagram illustrates the logical flow:
-
- |-- (normal route) -> code beautification
- caller->perltidy->|-- (-html flag ) -> create html
- |-- (formatter given)-> callback to write_line
-
-This can be useful for processing perl scripts in some way. The
-parameter C<$formatter> in the perltidy call,
-
- formatter => $formatter,
-
-is an object created by the caller with a C<write_line> method which
-will accept and process tokenized lines, one line per call. Here is
-a simple example of a C<write_line> which merely prints the line number,
-the line type (as determined by perltidy), and the text of the line:
-
- sub write_line {
-
- # This is called from perltidy line-by-line
- my $self = shift;
- my $line_of_tokens = shift;
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- print "$input_line_number:$line_type:$input_line";
- }
-
-The complete program, B<perllinetype>, is contained in the examples section of
-the source distribution. As this example shows, the callback method
-receives a parameter B<$line_of_tokens>, which is a reference to a hash
-of other useful information. This example uses these hash entries:
-
- $line_of_tokens->{_line_number} - the line number (1,2,...)
- $line_of_tokens->{_line_text} - the text of the line
- $line_of_tokens->{_line_type} - the type of the line, one of:
-
- SYSTEM - system-specific code before hash-bang line
- CODE - line of perl code (including comments)
- POD_START - line starting pod, such as '=head'
- POD - pod documentation text
- POD_END - last line of pod section, '=cut'
- HERE - text of here-document
- HERE_END - last line of here-doc (target word)
- FORMAT - format section
- FORMAT_END - last line of format section, '.'
- DATA_START - __DATA__ line
- DATA - unidentified text following __DATA__
- END_START - __END__ line
- END - unidentified text following __END__
- ERROR - we are in big trouble, probably not a perl script
-
-Most applications will be only interested in lines of type B<CODE>. For
-another example, let's write a program which checks for one of the
-so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
-can slow down processing. Here is a B<write_line>, from the example
-program B<find_naughty.pl>, which does that:
-
- sub write_line {
-
- # This is called back from perltidy line-by-line
- # We're looking for $`, $&, and $'
- my ( $self, $line_of_tokens ) = @_;
-
- # pull out some stuff we might need
- my $line_type = $line_of_tokens->{_line_type};
- my $input_line_number = $line_of_tokens->{_line_number};
- my $input_line = $line_of_tokens->{_line_text};
- my $rtoken_type = $line_of_tokens->{_rtoken_type};
- my $rtokens = $line_of_tokens->{_rtokens};
- chomp $input_line;
-
- # skip comments, pod, etc
- return if ( $line_type ne 'CODE' );
-
- # loop over tokens looking for $`, $&, and $'
- for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
-
- # we only want to examine token types 'i' (identifier)
- next unless $$rtoken_type[$j] eq 'i';
-
- # pull out the actual token text
- my $token = $$rtokens[$j];
-
- # and check it
- if ( $token =~ /^\$[\`\&\']$/ ) {
- print STDERR
- "$input_line_number: $token\n";
- }
- }
- }
-
-This example pulls out these tokenization variables from the $line_of_tokens
-hash reference:
-
- $rtoken_type = $line_of_tokens->{_rtoken_type};
- $rtokens = $line_of_tokens->{_rtokens};
-
-The variable C<$rtoken_type> is a reference to an array of token type codes,
-and C<$rtokens> is a reference to a corresponding array of token text.
-These are obviously only defined for lines of type B<CODE>.
-Perltidy classifies tokens into types, and has a brief code for each type.
-You can get a complete list at any time by running perltidy from the
-command line with
-
- perltidy --dump-token-types
-
-In the present example, we are only looking for tokens of type B<i>
-(identifiers), so the for loop skips past all other types. When an
-identifier is found, its actual text is checked to see if it is one
-being sought. If so, the above write_line prints the token and its
-line number.
-
-The B<formatter> feature is relatively new in perltidy, and further
-documentation needs to be written to complete its description. However,
-several example programs have been written and can be found in the
-B<examples> section of the source distribution. Probably the best way
-to get started is to find one of the examples which most closely matches
-your application and start modifying it.
-
-For help with perltidy's pecular way of breaking lines into tokens, you
-might run, from the command line,
-
- perltidy -D filename
-
-where F<filename> is a short script of interest. This will produce
-F<filename.DEBUG> with interleaved lines of text and their token types.
-The B<-D> flag has been in perltidy from the beginning for this purpose.
-If you want to see the code which creates this file, it is
-C<write_debug_entry> in Tidy.pm.
-
-=head1 EXPORT
-
- &perltidy
-
-=head1 CREDITS
-
-Thanks to Hugh Myers who developed the initial modular interface
-to perltidy.
-
-=head1 VERSION
-
-This man page documents Perl::Tidy version 20070424.
-
-=head1 AUTHOR
-
- Steve Hancock
- perltidy at users.sourceforge.net
-
-=head1 SEE ALSO
-
-The perltidy(1) man page describes all of the features of perltidy. It
-can be found at http://perltidy.sourceforge.net.
-
-=cut